コード例 #1
0
    def run(self,
            agent,
            min_n_samples=None,
            max_n_rollouts=None,
            force_cpu=False,
            with_animation=False):

        # default keywords
        kwargs = {
            'min_n_samples': min_n_samples,
            'max_n_rollouts': max_n_rollouts,
            'force_cpu': force_cpu,
            'with_animation': with_animation
        }

        if self._n_processes > 1:  # parallel data collection
            if not hasattr(self, '_job_runner'):  # start the process
                workers = [
                    Worker(method=self._gen_ro)
                    for _ in range(self._n_processes)
                ]
                self._job_runner = JobRunner(workers,
                                             max_run_calls=self._max_run_calls)
            # determine rollout configs
            N = self._n_processes  # number of jobs
            if max_n_rollouts is not None:
                N = int(np.ceil(max_n_rollouts / self._min_ro_per_process))
                max_n_rollouts = self._min_ro_per_process
            if min_n_samples is not None:
                min_n_samples = int(min_n_samples / N)
            kwargs['min_n_samples'] = min_n_samples
            kwargs['max_n_rollouts'] = max_n_rollouts
            kwargs['min_n_rollouts'] = self._min_ro_per_process
            # start data collection
            job = ((agent, ), kwargs)
            res = self._job_runner.run([job] * N)
            ros, agents = [r[0] for r in res], [r[1] for r in res]
        else:
            ro, agent = self._gen_ro(agent, **kwargs)
            ros, agents = [ro], [agent]
        return ros, agents
コード例 #2
0
def main(script_name, range_names, n_processes=-1, config_name=None):
    """ Run the `main` function in script_name.py in parallel with
        `n_processes` with different configurations given by `range_names`.

        Each configuration is jointly specificed by `config_name` and
        `range_names`. If `config_name` is None, it defaults to use the
        `CONFIG` dict in the script file. A valid config is a dict and must
        contains a key 'exp_name' whose value will be used to create the
        indentifier string to log the experiments.

        `range_names` is a list of string, which correspond to a range that
        specifies a set of parameters in the config dict one wish to experiment
        with. For example, if a string "name" is in `range_names`, the dict
        named `range_name` in script_name_ranges.py will be loaded. If
        script_name_ranges.py does not exist, it loads ranges.py.  The values
        of these experimental parameters will be used, jointly with `exp_name`,
        to create the identifier in logging.
    """
    # Set to the number of workers.
    # It defaults to the cpu count of your machine.
    if n_processes == -1:
        n_processes = mp.cpu_count()
    script = importlib.import_module('scripts.'+script_name)
    template = load_config(script_name, config_name)

    try:
        script_ranges = importlib.import_module('scripts.'+script_name+'_ranges')
    except ImportError:
        script_ranges = importlib.import_module('scripts.ranges')

    # Create the configs for all the experiments.
    tps = []
    for range_name in range_names:
        r = getattr(script_ranges, 'range_'+range_name)
        combs, keys = get_combs_and_keys(r)

        # Save the range file
        last_keys = [key[-1] for key in keys]
        if 'top_log_dir' in last_keys:
            ind = last_keys.index('top_log_dir')
            assert all(combs[0][ind]==comb[ind] for comb in combs), 'multiple top_log_dir found'
            top_log_dir = combs[0][ind]
        else:
            top_log_dir = template['top_log_dir']
        save_range(r, top_log_dir)

        print('Total number of combinations: {}'.format(len(combs)))
        for _, comb in enumerate(combs):
            tp = copy.deepcopy(template)
            # Generate a unique exp name based on the provided ranges.
            # The description string start from the the exp name.
            value_strs = [tp['exp_name']]
            for (value, key) in zip(comb, keys):
                entry = tp
                for k in key[:-1]:  # walk down the template tree
                    entry = entry[k]
                # Make sure the key is indeed included in the template,
                # so that we set the desired flag.
                assert key[-1] in entry, 'missing {} in the config'.format(key[-1])
                entry[key[-1]] = value
                if key[-1]=='seed' or key[-1]=='top_log_dir':
                    continue # do not include seed number or the log directory
                else:
                    if value is True:
                        value = 'T'
                    if value is False:
                        value = 'F'
                    value = str(value).replace('/','-')
                    value_strs.append(value)
                    # value_strs.append(str(value).split('/')[0])
                   
            tp['exp_name'] = '-'.join(value_strs)
            tps.append(tp)

    # Launch the experiments.
    n_processes = min(n_processes, len(combs))
    print('# of CPU (threads): {}. Running {} processes'.format(mp.cpu_count(), n_processes))

    # with mp.Pool(processes=n_processes, maxtasksperchild=1) as p:
    #     p.map(script.main, tps, chunksize=1)
    #     # p.map(func, tps, chunksize=1)

    # workers = [Worker(method=script.main) for _ in range(n_processes)]
    # job_runner = JobRunner(workers)
    # jobs = [((tp,),{}) for tp in tps]
    # job_runner.run(jobs)

    workers = [Worker() for _ in range(n_processes)]
    job_runner = JobRunner(workers)
    jobs = [(partial(run_script, script.main, tp),(), {}) for tp in tps]
    job_runner.run(jobs)
コード例 #3
0
def run_script(main, config):
    w = Worker(method=main)
    j = ((config,),{})
    return JobRunner([w]).run([j])
コード例 #4
0
class MDP:
    """ A wrapper for gym env. """
    def __init__(self,
                 env,
                 gamma=1.0,
                 horizon=None,
                 use_time_info=True,
                 v_end=None,
                 rw_scale=1.0,
                 n_processes=1,
                 min_ro_per_process=1,
                 max_run_calls=None):
        self.env = env  # a gym-like env
        self.gamma = gamma
        horizon = float('Inf') if horizon is None else horizon
        self.horizon = horizon
        self.use_time_info = use_time_info
        self.rw_scale = rw_scale

        # configs for rollouts
        t_state = partial(linear_t_state,
                          horizon=self.horizon) if use_time_info else None
        rw_shaping = partial(rw_scaling, scale=self.rw_scale)
        self._gen_ro = partial(self.generate_rollout,
                               env=self.env,
                               v_end=v_end,
                               rw_shaping=rw_shaping,
                               t_state=t_state,
                               max_rollout_len=horizon)
        self._n_processes = n_processes
        self._min_ro_per_process = int(max(1, min_ro_per_process))
        self._max_run_calls = max_run_calls  # for freeing memory

    def initialize(self):
        try:  # try to reset the env
            self.env.initialize()
        except:
            pass

    @property
    def ob_shape(self):
        return self.env.observation_space.shape

    @property
    def ac_shape(self):
        return self.env.action_space.shape

    def run(self,
            agent,
            min_n_samples=None,
            max_n_rollouts=None,
            force_cpu=False,
            with_animation=False):

        # default keywords
        kwargs = {
            'min_n_samples': min_n_samples,
            'max_n_rollouts': max_n_rollouts,
            'force_cpu': force_cpu,
            'with_animation': with_animation
        }

        if self._n_processes > 1:  # parallel data collection
            if not hasattr(self, '_job_runner'):  # start the process
                workers = [
                    Worker(method=self._gen_ro)
                    for _ in range(self._n_processes)
                ]
                self._job_runner = JobRunner(workers,
                                             max_run_calls=self._max_run_calls)
            # determine rollout configs
            N = self._n_processes  # number of jobs
            if max_n_rollouts is not None:
                N = int(np.ceil(max_n_rollouts / self._min_ro_per_process))
                max_n_rollouts = self._min_ro_per_process
            if min_n_samples is not None:
                min_n_samples = int(min_n_samples / N)
            kwargs['min_n_samples'] = min_n_samples
            kwargs['max_n_rollouts'] = max_n_rollouts
            kwargs['min_n_rollouts'] = self._min_ro_per_process
            # start data collection
            job = ((agent, ), kwargs)
            res = self._job_runner.run([job] * N)
            ros, agents = [r[0] for r in res], [r[1] for r in res]
        else:
            ro, agent = self._gen_ro(agent, **kwargs)
            ros, agents = [ro], [agent]
        return ros, agents

    @staticmethod
    def generate_rollout(agent, *args, force_cpu=False, **kwargs):  # a wrapper
        if force_cpu:  # requires tensorflow
            import tensorflow as tf
            with tf.device('/device:CPU:0'):
                agent = copy.deepcopy(agent)
                ro = generate_rollout(agent.pi,
                                      agent.logp,
                                      *args,
                                      callback=agent.callback,
                                      **kwargs)
        else:
            ro = generate_rollout(agent.pi,
                                  agent.logp,
                                  *args,
                                  callback=agent.callback,
                                  **kwargs)
        return ro, agent
コード例 #5
0
class MDP:
    """ A wrapper for gym env. """
    def __init__(self,
                 env,
                 gamma=1.0,
                 horizon=None,
                 use_time_info=True,
                 v_end=None,
                 n_processes=1,
                 min_ro_per_process=1):
        self.env = env  # a gym-like env
        self.gamma = gamma
        horizon = float('Inf') if horizon is None else horizon
        self.horizon = horizon
        self.use_time_info = use_time_info

        # configs for rollouts
        t_state = partial(self.t_state,
                          horizon=horizon) if use_time_info else None
        self._gen_ro = partial(self.generate_rollout,
                               env=self.env,
                               v_end=v_end,
                               t_state=t_state,
                               max_rollout_len=horizon)
        self._n_processes = n_processes
        self._min_ro_per_process = int(max(1, min_ro_per_process))

    def initialize(self):
        try:  # try to reset the env
            self.env.initialize()
        except:
            pass

    @staticmethod
    def t_state(t, horizon):
        return t / horizon

    @property
    def ob_shape(self):
        return self.env.observation_space.shape

    @property
    def ac_shape(self):
        return self.env.action_space.shape

    def run(self,
            agent,
            min_n_samples=None,
            max_n_rollouts=None,
            with_animation=False):
        if self._n_processes > 1:  # parallel data collection
            if not hasattr(self, '_job_runner'):  # start the process
                workers = [
                    Worker(method=self._gen_ro)
                    for _ in range(self._n_processes)
                ]
                self._job_runner = JobRunner(workers)
            # determine rollout configs
            N = self._n_processes  # number of jobs
            if max_n_rollouts is not None:
                N = int(np.ceil(max_n_rollouts / self._min_ro_per_process))
                max_n_rollouts = self._min_ro_per_process
            if min_n_samples is not None:
                min_n_samples = int(min_n_samples / N)
            kwargs = {
                'min_n_samples': min_n_samples,
                'max_n_rollouts': max_n_rollouts,
                'with_animation': False
            }
            # start data collection
            job = ((agent, ), kwargs)
            res = self._job_runner.run([job] * N)
            ros, agents = [r[0] for r in res], [r[1] for r in res]
        else:
            kwargs = {
                'min_n_samples': min_n_samples,
                'max_n_rollouts': max_n_rollouts,
                'with_animation': with_animation
            }
            ro, agent = self._gen_ro(agent, **kwargs)
            ros, agents = [ro], [agent]
        return ros, agents

    @staticmethod
    def generate_rollout(agent, *args, **kwargs):  # a wrapper
        ro = generate_rollout(agent.pi,
                              agent.logp,
                              *args,
                              callback=agent.callback,
                              **kwargs)
        return ro, agent