def __init__(self, value_network, config, writer=None): self.config = config # Load configs self.betas_for_duplication = parse(self.config["betas_for_duplication"]) self.betas_for_discretisation = parse(self.config["betas_for_discretisation"]) self.loss_function = loss_function_factory(self.config["loss_function"]) self.loss_function_c = loss_function_factory(self.config["loss_function_c"]) self.device = choose_device(self.config["device"]) # Load network self._value_network = value_network self._value_network = self._value_network.to(self.device) self.n_actions = self._value_network.predict.out_features // 2 self.writer = writer if writer: self.writer.add_graph(self._value_network, input_to_model=torch.tensor(np.zeros((1, 1, self._value_network.size_state + 1), dtype=np.float32)).to(self.device)) self.memory = ReplayMemory(transition_type=TransitionBFTQ, config=self.config) self.optimizer = None self.batch = 0 self.epoch = 0 self.reset()
def __init__(self, env, config=None): super(DQNAgent, self).__init__(env, config) self.value_net = model_factory(self.config["model"]) self.target_net = model_factory(self.config["model"]) self.target_net.load_state_dict(self.value_net.state_dict()) self.target_net.eval() self.device = choose_device(self.config["device"]) self.value_net.to(self.device) self.target_net.to(self.device) self.loss_function = loss_function_factory( self.config["loss_function"]) self.optimizer = optimizer_factory(self.config["optimizer"]["type"], self.value_net.parameters(), **self.config["optimizer"]) self.steps = 0
def __init__(self, env, config=None): super(DQNAgent, self).__init__(env, config) size_model_config(self.env, self.config["model"]) self.value_net = model_factory(self.config["model"]) self.target_net = model_factory(self.config["model"]) self.target_net.load_state_dict(self.value_net.state_dict()) self.target_net.eval() logger.debug("Number of trainable parameters: {}".format(trainable_parameters(self.value_net))) self.device = choose_device(self.config["device"]) self.value_net.to(self.device) self.target_net.to(self.device) self.loss_function = loss_function_factory(self.config["loss_function"]) self.optimizer = optimizer_factory(self.config["optimizer"]["type"], self.value_net.parameters(), **self.config["optimizer"]) self.steps = 0