Exemple #1
0
    def __init__(self,
                 args,
                 observation_size,
                 action_size,
                 task_q,
                 result_q,
                 actor_id,
                 monitor,
                 name_scope='policy-actor',
                 init_filter_parameters=None):

        # the base agent
        super(rollout_agent, self).__init__(args=args,
                                            observation_size=observation_size,
                                            action_size=action_size,
                                            task_q=task_q,
                                            result_q=result_q,
                                            name_scope=name_scope)
        self.allow_monitor = monitor
        self.actor_id = actor_id
        self._npr = np.random.RandomState(args.seed + actor_id)

        if init_filter_parameters is not None:
            self.ob_normalizer = ob_normalizer.normalizer(
                mean=init_filter_parameters['mean'],
                variance=init_filter_parameters['variance'],
                num_steps=init_filter_parameters['step'])
        else:
            self.ob_normalizer = ob_normalizer.normalizer()

        logger.info('The sampler {} is online'.format(self.actor_id))
    def __init__(self,
                 args,
                 task_q,
                 result_q,
                 agent_id,
                 name_scope='evolutionary_agent'):
        # the base agent
        self.agent_id = agent_id
        base_agent.__init__(self,
                            args=args,
                            observation_size=-1,
                            action_size=-1,
                            task_q=task_q,
                            result_q=result_q,
                            name_scope=name_scope)

        self.reset_running_mean_info()
        # evolutionary agent will never load from ckpt_name
        # self.load_running_means()
        self.ob_normalizer = ob_normalizer.normalizer()

        # the variables and networks to be used, init them before use them
        self.baseline_network = None
        self.env_info = None
        self.error_count = 0

        # used to save the checkpoint files
        self.best_reward = -np.inf
        self.timesteps_so_far = 0
        self._npr = np.random.RandomState(args.seed + self.agent_id)
        self.debug = 0

        # self.last_save_iteration = 0
        self.last_average_reward = np.nan
        self.end_average_reward = np.nan
        self.start_time = None
        self.is_dead_species = False
        self.brute_search_reward = []

        if self.args.nervenetplus:
            if self.args.fc_pruning:
                assert self.args.use_nervenet
            else:
                assert self.args.use_nervenet and self.args.use_gnn_as_policy