def __init__(self, args, observation_size, action_size, task_q, result_q, actor_id, monitor, name_scope='policy-actor', init_filter_parameters=None): # the base agent super(rollout_agent, self).__init__(args=args, observation_size=observation_size, action_size=action_size, task_q=task_q, result_q=result_q, name_scope=name_scope) self.allow_monitor = monitor self.actor_id = actor_id self._npr = np.random.RandomState(args.seed + actor_id) if init_filter_parameters is not None: self.ob_normalizer = ob_normalizer.normalizer( mean=init_filter_parameters['mean'], variance=init_filter_parameters['variance'], num_steps=init_filter_parameters['step']) else: self.ob_normalizer = ob_normalizer.normalizer() logger.info('The sampler {} is online'.format(self.actor_id))
def __init__(self, args, task_q, result_q, agent_id, name_scope='evolutionary_agent'): # the base agent self.agent_id = agent_id base_agent.__init__(self, args=args, observation_size=-1, action_size=-1, task_q=task_q, result_q=result_q, name_scope=name_scope) self.reset_running_mean_info() # evolutionary agent will never load from ckpt_name # self.load_running_means() self.ob_normalizer = ob_normalizer.normalizer() # the variables and networks to be used, init them before use them self.baseline_network = None self.env_info = None self.error_count = 0 # used to save the checkpoint files self.best_reward = -np.inf self.timesteps_so_far = 0 self._npr = np.random.RandomState(args.seed + self.agent_id) self.debug = 0 # self.last_save_iteration = 0 self.last_average_reward = np.nan self.end_average_reward = np.nan self.start_time = None self.is_dead_species = False self.brute_search_reward = [] if self.args.nervenetplus: if self.args.fc_pruning: assert self.args.use_nervenet else: assert self.args.use_nervenet and self.args.use_gnn_as_policy