def __init__(self, agent, env=None): self.lock = th.Lock() self.agent = agent # one and only self.para_list = self.get_parameter_list() self.farmer = farmer_class(self.para_list) self.ep_num = 0 self.total_steps = 0 self.history_reward = [] self.ep_value = {} self.value_init() self.relative_time = 0 self.average_len_of_episode = self.agent.args.max_pathlength self.num_rollouts = int(self.agent.args.timesteps_per_batch / self.average_len_of_episode) self.rollout_count = 0 self.rollout_paths = [] self.iteration = 0 self.log_scalar_name_list = [ 'reward', 'kl_div', 'entropy', 'surrogate_loss', 'value_loss' ] self.log_scalar_type_list = [ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32 ] self.logger = Logger(self.agent.session, self.agent.args.log_path + 'train', self.log_scalar_name_list, self.log_scalar_type_list) self.write_log = self.logger.create_scalar_log_method() self.start_time = time.time()
def __init__(self, agent, env=None): self.lock = th.Lock() self.agent = agent # one and only self.para_list = self.get_parameter_list() self.farmer = farmer_class(self.para_list) self.ep_num = 0 self.total_steps = 0 self.history_reward = [] self.ep_value = {} self.value_init() self.relative_time = 0 self.average_steps = self.agent.args.max_pathlength self.start_time = time.time()
def __init__(self, agent, env=None): self.lock = th.Lock() self.agent = agent # one and only self.farmer = farmer_class(self.agent.para_list) self.ep_num = 0 self.total_steps = 0 self.history_reward = [] self.ep_value = {} self.value_init() self.relative_time = 0 self.average_steps = self.agent.para_list["max_pathlength"] self.log_scalar_name_list = [ 'mean_reward', 'actor_loss', 'critic_loss' ] self.log_scalar_type_list = [tf.float32, tf.float32, tf.float32] self.logger = Logger(self.agent.session, self.agent.para_list["log_path"] + 'train', self.log_scalar_name_list, self.log_scalar_type_list) self.write_log = self.logger.create_scalar_log_method() self.start_time = time.time()
def refarm(self): # most time no use del self.farmer self.farmer = farmer_class()