def set_writer(self, writer): super().set_writer(writer) obs_shape = self.env.observation_space.shape if isinstance(self.env.observation_space, spaces.Box) else \ self.env.observation_space.spaces[0].shape model_input = torch.zeros((1, *obs_shape), dtype=torch.float, device=self.device) self.writer.add_graph(self.value_net, input_to_model=(model_input,)), self.writer.add_scalar("agent/trainable_parameters", trainable_parameters(self.value_net), 0)
def set_writer(self, writer): super().set_writer(writer) self.writer.add_graph(self.value_net, input_to_model=torch.zeros( (1, *self.env.observation_space.shape), dtype=torch.float, device=self.device)) self.writer.add_scalar("agent/trainable_parameters", trainable_parameters(self.value_net), 0)
def __init__(self, env, config=None): super(DQNAgent, self).__init__(env, config) size_model_config(self.env, self.config["model"]) self.value_net = model_factory(self.config["model"]) self.target_net = model_factory(self.config["model"]) self.target_net.load_state_dict(self.value_net.state_dict()) self.target_net.eval() logger.debug("Number of trainable parameters: {}".format(trainable_parameters(self.value_net))) self.device = choose_device(self.config["device"]) self.value_net.to(self.device) self.target_net.to(self.device) self.loss_function = loss_function_factory(self.config["loss_function"]) self.optimizer = optimizer_factory(self.config["optimizer"]["type"], self.value_net.parameters(), **self.config["optimizer"]) self.steps = 0