Esempio n. 1
0
 def set_writer(self, writer):
     super().set_writer(writer)
     obs_shape = self.env.observation_space.shape if isinstance(self.env.observation_space, spaces.Box) else \
         self.env.observation_space.spaces[0].shape
     model_input = torch.zeros((1, *obs_shape), dtype=torch.float, device=self.device)
     self.writer.add_graph(self.value_net, input_to_model=(model_input,)),
     self.writer.add_scalar("agent/trainable_parameters", trainable_parameters(self.value_net), 0)
Esempio n. 2
0
 def set_writer(self, writer):
     super().set_writer(writer)
     self.writer.add_graph(self.value_net,
                           input_to_model=torch.zeros(
                               (1, *self.env.observation_space.shape),
                               dtype=torch.float,
                               device=self.device))
     self.writer.add_scalar("agent/trainable_parameters",
                            trainable_parameters(self.value_net), 0)
Esempio n. 3
0
 def __init__(self, env, config=None):
     super(DQNAgent, self).__init__(env, config)
     size_model_config(self.env, self.config["model"])
     self.value_net = model_factory(self.config["model"])
     self.target_net = model_factory(self.config["model"])
     self.target_net.load_state_dict(self.value_net.state_dict())
     self.target_net.eval()
     logger.debug("Number of trainable parameters: {}".format(trainable_parameters(self.value_net)))
     self.device = choose_device(self.config["device"])
     self.value_net.to(self.device)
     self.target_net.to(self.device)
     self.loss_function = loss_function_factory(self.config["loss_function"])
     self.optimizer = optimizer_factory(self.config["optimizer"]["type"],
                                        self.value_net.parameters(),
                                        **self.config["optimizer"])
     self.steps = 0