def __init__(self, value_network, config, writer=None):
        self.config = config

        # Load configs
        self.betas_for_duplication = parse(self.config["betas_for_duplication"])
        self.betas_for_discretisation = parse(self.config["betas_for_discretisation"])
        self.loss_function = loss_function_factory(self.config["loss_function"])
        self.loss_function_c = loss_function_factory(self.config["loss_function_c"])
        self.device = choose_device(self.config["device"])

        # Load network
        self._value_network = value_network
        self._value_network = self._value_network.to(self.device)
        self.n_actions = self._value_network.predict.out_features // 2

        self.writer = writer
        if writer:
            self.writer.add_graph(self._value_network,
                                  input_to_model=torch.tensor(np.zeros((1, 1, self._value_network.size_state + 1),
                                                                       dtype=np.float32)).to(self.device))

        self.memory = ReplayMemory(transition_type=TransitionBFTQ, config=self.config)
        self.optimizer = None
        self.batch = 0
        self.epoch = 0
        self.reset()
Exemple #2
0
 def __init__(self, env, config=None):
     super(DQNAgent, self).__init__(env, config)
     self.value_net = model_factory(self.config["model"])
     self.target_net = model_factory(self.config["model"])
     self.target_net.load_state_dict(self.value_net.state_dict())
     self.target_net.eval()
     self.device = choose_device(self.config["device"])
     self.value_net.to(self.device)
     self.target_net.to(self.device)
     self.loss_function = loss_function_factory(
         self.config["loss_function"])
     self.optimizer = optimizer_factory(self.config["optimizer"]["type"],
                                        self.value_net.parameters(),
                                        **self.config["optimizer"])
     self.steps = 0
Exemple #3
0
 def __init__(self, env, config=None):
     super(DQNAgent, self).__init__(env, config)
     size_model_config(self.env, self.config["model"])
     self.value_net = model_factory(self.config["model"])
     self.target_net = model_factory(self.config["model"])
     self.target_net.load_state_dict(self.value_net.state_dict())
     self.target_net.eval()
     logger.debug("Number of trainable parameters: {}".format(trainable_parameters(self.value_net)))
     self.device = choose_device(self.config["device"])
     self.value_net.to(self.device)
     self.target_net.to(self.device)
     self.loss_function = loss_function_factory(self.config["loss_function"])
     self.optimizer = optimizer_factory(self.config["optimizer"]["type"],
                                        self.value_net.parameters(),
                                        **self.config["optimizer"])
     self.steps = 0