def __init__(self, env, config=None): super(DQNAgent, self).__init__(env, config) self.value_net = model_factory(self.config["model"]) self.target_net = model_factory(self.config["model"]) self.target_net.load_state_dict(self.value_net.state_dict()) self.target_net.eval() self.device = choose_device(self.config["device"]) self.value_net.to(self.device) self.target_net.to(self.device) self.loss_function = loss_function_factory( self.config["loss_function"]) self.optimizer = optimizer_factory(self.config["optimizer"]["type"], self.value_net.parameters(), **self.config["optimizer"]) self.steps = 0
def __init__(self, env, config=None): super(DQNAgent, self).__init__(env, config) size_model_config(self.env, self.config["model"]) self.value_net = model_factory(self.config["model"]) self.target_net = model_factory(self.config["model"]) self.target_net.load_state_dict(self.value_net.state_dict()) self.target_net.eval() logger.debug("Number of trainable parameters: {}".format(trainable_parameters(self.value_net))) self.device = choose_device(self.config["device"]) self.value_net.to(self.device) self.target_net.to(self.device) self.loss_function = loss_function_factory(self.config["loss_function"]) self.optimizer = optimizer_factory(self.config["optimizer"]["type"], self.value_net.parameters(), **self.config["optimizer"]) self.steps = 0