def run(self, agent, min_n_samples=None, max_n_rollouts=None, force_cpu=False, with_animation=False): # default keywords kwargs = { 'min_n_samples': min_n_samples, 'max_n_rollouts': max_n_rollouts, 'force_cpu': force_cpu, 'with_animation': with_animation } if self._n_processes > 1: # parallel data collection if not hasattr(self, '_job_runner'): # start the process workers = [ Worker(method=self._gen_ro) for _ in range(self._n_processes) ] self._job_runner = JobRunner(workers, max_run_calls=self._max_run_calls) # determine rollout configs N = self._n_processes # number of jobs if max_n_rollouts is not None: N = int(np.ceil(max_n_rollouts / self._min_ro_per_process)) max_n_rollouts = self._min_ro_per_process if min_n_samples is not None: min_n_samples = int(min_n_samples / N) kwargs['min_n_samples'] = min_n_samples kwargs['max_n_rollouts'] = max_n_rollouts kwargs['min_n_rollouts'] = self._min_ro_per_process # start data collection job = ((agent, ), kwargs) res = self._job_runner.run([job] * N) ros, agents = [r[0] for r in res], [r[1] for r in res] else: ro, agent = self._gen_ro(agent, **kwargs) ros, agents = [ro], [agent] return ros, agents
def main(script_name, range_names, n_processes=-1, config_name=None): """ Run the `main` function in script_name.py in parallel with `n_processes` with different configurations given by `range_names`. Each configuration is jointly specificed by `config_name` and `range_names`. If `config_name` is None, it defaults to use the `CONFIG` dict in the script file. A valid config is a dict and must contains a key 'exp_name' whose value will be used to create the indentifier string to log the experiments. `range_names` is a list of string, which correspond to a range that specifies a set of parameters in the config dict one wish to experiment with. For example, if a string "name" is in `range_names`, the dict named `range_name` in script_name_ranges.py will be loaded. If script_name_ranges.py does not exist, it loads ranges.py. The values of these experimental parameters will be used, jointly with `exp_name`, to create the identifier in logging. """ # Set to the number of workers. # It defaults to the cpu count of your machine. if n_processes == -1: n_processes = mp.cpu_count() script = importlib.import_module('scripts.'+script_name) template = load_config(script_name, config_name) try: script_ranges = importlib.import_module('scripts.'+script_name+'_ranges') except ImportError: script_ranges = importlib.import_module('scripts.ranges') # Create the configs for all the experiments. tps = [] for range_name in range_names: r = getattr(script_ranges, 'range_'+range_name) combs, keys = get_combs_and_keys(r) # Save the range file last_keys = [key[-1] for key in keys] if 'top_log_dir' in last_keys: ind = last_keys.index('top_log_dir') assert all(combs[0][ind]==comb[ind] for comb in combs), 'multiple top_log_dir found' top_log_dir = combs[0][ind] else: top_log_dir = template['top_log_dir'] save_range(r, top_log_dir) print('Total number of combinations: {}'.format(len(combs))) for _, comb in enumerate(combs): tp = copy.deepcopy(template) # Generate a unique exp name based on the provided ranges. # The description string start from the the exp name. value_strs = [tp['exp_name']] for (value, key) in zip(comb, keys): entry = tp for k in key[:-1]: # walk down the template tree entry = entry[k] # Make sure the key is indeed included in the template, # so that we set the desired flag. assert key[-1] in entry, 'missing {} in the config'.format(key[-1]) entry[key[-1]] = value if key[-1]=='seed' or key[-1]=='top_log_dir': continue # do not include seed number or the log directory else: if value is True: value = 'T' if value is False: value = 'F' value = str(value).replace('/','-') value_strs.append(value) # value_strs.append(str(value).split('/')[0]) tp['exp_name'] = '-'.join(value_strs) tps.append(tp) # Launch the experiments. n_processes = min(n_processes, len(combs)) print('# of CPU (threads): {}. Running {} processes'.format(mp.cpu_count(), n_processes)) # with mp.Pool(processes=n_processes, maxtasksperchild=1) as p: # p.map(script.main, tps, chunksize=1) # # p.map(func, tps, chunksize=1) # workers = [Worker(method=script.main) for _ in range(n_processes)] # job_runner = JobRunner(workers) # jobs = [((tp,),{}) for tp in tps] # job_runner.run(jobs) workers = [Worker() for _ in range(n_processes)] job_runner = JobRunner(workers) jobs = [(partial(run_script, script.main, tp),(), {}) for tp in tps] job_runner.run(jobs)
def run_script(main, config): w = Worker(method=main) j = ((config,),{}) return JobRunner([w]).run([j])
class MDP: """ A wrapper for gym env. """ def __init__(self, env, gamma=1.0, horizon=None, use_time_info=True, v_end=None, rw_scale=1.0, n_processes=1, min_ro_per_process=1, max_run_calls=None): self.env = env # a gym-like env self.gamma = gamma horizon = float('Inf') if horizon is None else horizon self.horizon = horizon self.use_time_info = use_time_info self.rw_scale = rw_scale # configs for rollouts t_state = partial(linear_t_state, horizon=self.horizon) if use_time_info else None rw_shaping = partial(rw_scaling, scale=self.rw_scale) self._gen_ro = partial(self.generate_rollout, env=self.env, v_end=v_end, rw_shaping=rw_shaping, t_state=t_state, max_rollout_len=horizon) self._n_processes = n_processes self._min_ro_per_process = int(max(1, min_ro_per_process)) self._max_run_calls = max_run_calls # for freeing memory def initialize(self): try: # try to reset the env self.env.initialize() except: pass @property def ob_shape(self): return self.env.observation_space.shape @property def ac_shape(self): return self.env.action_space.shape def run(self, agent, min_n_samples=None, max_n_rollouts=None, force_cpu=False, with_animation=False): # default keywords kwargs = { 'min_n_samples': min_n_samples, 'max_n_rollouts': max_n_rollouts, 'force_cpu': force_cpu, 'with_animation': with_animation } if self._n_processes > 1: # parallel data collection if not hasattr(self, '_job_runner'): # start the process workers = [ Worker(method=self._gen_ro) for _ in range(self._n_processes) ] self._job_runner = JobRunner(workers, max_run_calls=self._max_run_calls) # determine rollout configs N = self._n_processes # number of jobs if max_n_rollouts is not None: N = int(np.ceil(max_n_rollouts / self._min_ro_per_process)) max_n_rollouts = self._min_ro_per_process if min_n_samples is not None: min_n_samples = int(min_n_samples / N) kwargs['min_n_samples'] = min_n_samples kwargs['max_n_rollouts'] = max_n_rollouts kwargs['min_n_rollouts'] = self._min_ro_per_process # start data collection job = ((agent, ), kwargs) res = self._job_runner.run([job] * N) ros, agents = [r[0] for r in res], [r[1] for r in res] else: ro, agent = self._gen_ro(agent, **kwargs) ros, agents = [ro], [agent] return ros, agents @staticmethod def generate_rollout(agent, *args, force_cpu=False, **kwargs): # a wrapper if force_cpu: # requires tensorflow import tensorflow as tf with tf.device('/device:CPU:0'): agent = copy.deepcopy(agent) ro = generate_rollout(agent.pi, agent.logp, *args, callback=agent.callback, **kwargs) else: ro = generate_rollout(agent.pi, agent.logp, *args, callback=agent.callback, **kwargs) return ro, agent
class MDP: """ A wrapper for gym env. """ def __init__(self, env, gamma=1.0, horizon=None, use_time_info=True, v_end=None, n_processes=1, min_ro_per_process=1): self.env = env # a gym-like env self.gamma = gamma horizon = float('Inf') if horizon is None else horizon self.horizon = horizon self.use_time_info = use_time_info # configs for rollouts t_state = partial(self.t_state, horizon=horizon) if use_time_info else None self._gen_ro = partial(self.generate_rollout, env=self.env, v_end=v_end, t_state=t_state, max_rollout_len=horizon) self._n_processes = n_processes self._min_ro_per_process = int(max(1, min_ro_per_process)) def initialize(self): try: # try to reset the env self.env.initialize() except: pass @staticmethod def t_state(t, horizon): return t / horizon @property def ob_shape(self): return self.env.observation_space.shape @property def ac_shape(self): return self.env.action_space.shape def run(self, agent, min_n_samples=None, max_n_rollouts=None, with_animation=False): if self._n_processes > 1: # parallel data collection if not hasattr(self, '_job_runner'): # start the process workers = [ Worker(method=self._gen_ro) for _ in range(self._n_processes) ] self._job_runner = JobRunner(workers) # determine rollout configs N = self._n_processes # number of jobs if max_n_rollouts is not None: N = int(np.ceil(max_n_rollouts / self._min_ro_per_process)) max_n_rollouts = self._min_ro_per_process if min_n_samples is not None: min_n_samples = int(min_n_samples / N) kwargs = { 'min_n_samples': min_n_samples, 'max_n_rollouts': max_n_rollouts, 'with_animation': False } # start data collection job = ((agent, ), kwargs) res = self._job_runner.run([job] * N) ros, agents = [r[0] for r in res], [r[1] for r in res] else: kwargs = { 'min_n_samples': min_n_samples, 'max_n_rollouts': max_n_rollouts, 'with_animation': with_animation } ro, agent = self._gen_ro(agent, **kwargs) ros, agents = [ro], [agent] return ros, agents @staticmethod def generate_rollout(agent, *args, **kwargs): # a wrapper ro = generate_rollout(agent.pi, agent.logp, *args, callback=agent.callback, **kwargs) return ro, agent