def create_tasks(alg, number_of_runs, number_of_cores, env_id, log_dir): ts = list() alg_parse = {'trpo': 'trpo_mpi', 'ppo': 'ppo2'} assert alg in alg_parse.keys() runs = range(number_of_runs) if alg == 'ppo': task_number = 0 learning_rates = [0.00025, 0.0005, 0.001, 0.002] # train_iters = [5, 25, 50, 100] noptepochs = [1, 2, 4, 8] for _, lr, num_opt_epochs in product(runs, learning_rates, noptepochs): task_number += 1 experiment_path = os.path.join('baselines', env_id, alg) logger_kwargs = setup_logger_kwargs(base_dir=log_dir, exp_name=experiment_path, seed=task_number, use_tensor_board=True, verbose=(number_of_cores == 1)) kwargs = dict( # total_timesteps=3*3200, # todo: used for debugging alg=alg_parse[alg], env_id=env_id, logger_kwargs=logger_kwargs, seed=task_number, lr=lr, noptepochs=num_opt_epochs, nsteps=32000, nminibatches=32) target_fn = run_training t = Task(id=task_number, target_function=target_fn, kwargs=kwargs) ts.append(t) elif alg == 'trpo': task_number = 0 target_kls = [0.01, 0.02, 0.03, 0.05, 0.005] for _, target_kl in product(runs, target_kls): task_number += 1 experiment_path = os.path.join('baselines', env_id, alg) logger_kwargs = setup_logger_kwargs(base_dir=log_dir, exp_name=experiment_path, seed=task_number, use_tensor_board=True, verbose=(number_of_cores == 1)) kwargs = dict(alg=alg_parse[alg], env_id=env_id, logger_kwargs=logger_kwargs, seed=task_number, max_kl=target_kl) target_fn = run_training t = Task(id=task_number, target_function=target_fn, kwargs=kwargs) ts.append(t) else: raise NotImplementedError(f'Alg={alg}; only ppo and trpo supported') return ts
def create_tasks(number_of_runs, log_dir, env_id): ts = list() alg = 'iwpg' defaults = utils.get_defaults_kwargs(alg=alg, env_id=env_id) task_number = 0 gen = hyper_parameter_generator(number_of_runs=number_of_runs) hms_time = time.strftime("%Y-%m-%d__%H-%M-%S") try: while True: generated_params = next(gen) task_number += 1 kwargs = defaults.copy() experiment_path = os.path.join('experiments_a', env_id) logger_kwargs = setup_logger_kwargs(base_dir=log_dir, exp_name=experiment_path, seed=task_number, hms_time=hms_time, use_tensor_board=False, verbose=False) kwargs.update(logger_kwargs=logger_kwargs, seed=task_number, env_id=env_id, alg=alg, **generated_params) target_fn = run_training t = Task(id=task_number, target_function=target_fn, kwargs=kwargs) ts.append(t) except StopIteration: print(f'Created {task_number} tasks.') return ts
def create_tasks(number_of_runs, log_dir, env_id): ts = list() alg = 'iwpg' task_number = 0 gen = hyper_parameter_generator(number_of_runs) try: while True: generated_params = next(gen) task_number += 1 kwargs = utils.get_defaults_kwargs(alg=alg, env_id=env_id) experiment_path = os.path.join('experiments_c', env_id) logger_kwargs = setup_logger_kwargs(base_dir=log_dir, exp_name=experiment_path, seed=task_number, use_tensor_board=False, verbose=False) kwargs.update(logger_kwargs=logger_kwargs, seed=task_number, env_id=env_id, alg=alg, **generated_params) # deactivate reward scaling for manipulation tasks env_type, _ = utils.get_env_type(env_id=env_id) if env_type == 'gym_manipulator_envs' or env_type == 'bullet': kwargs['use_reward_scaling'] = False target_fn = run_iwpg_training t = Task(id=task_number, target_function=target_fn, kwargs=kwargs) ts.append(t) except StopIteration: print(f'Created {task_number} tasks.') return ts
def _fill_scheduler(self, target_fn): """ Create tasks for multi-process execution. will be called if model.compile(multi_thread=True) is enabled.""" ts = list() task_number = 1 # for param_set in self.param_grid: for param_set in product(*self.param_grid.values()): grid_kwargs = dict(zip(self.param_grid.keys(), param_set)) for i in range(self.num_runs): kwargs = self.kwargs.copy() _seed = task_number + self.seed logger_kwargs = setup_logger_kwargs(base_dir=self.log_dir, exp_name=self.exp_name, seed=_seed, use_tensor_board=True, verbose=False) kwargs.update(logger_kwargs=logger_kwargs, seed=_seed, alg=self.alg, env_id=self.env_id) # now pass the grid search parameters... kwargs.update(**grid_kwargs) t = mp_utils.Task(id=_seed, target_function=target_fn, kwargs=kwargs) ts.append(t) task_number += 1 self.scheduler.fill(tasks=ts)
def create_tasks(number_of_runs, log_dir, env_id): ts = list() task_number = 1000 # === seed number alg = 'iwpg' if env_id == 'all': env_ids = [ 'HalfCheetahBulletEnv-v0', 'AntBulletEnv-v0', 'HopperBulletEnv-v0', 'Walker2DBulletEnv-v0', 'HumanoidBulletEnv-v0', 'ReacherBulletEnv-v0', 'PusherBulletEnv-v0', 'KukaBulletEnv-v0', ] else: env_ids = [ env_id, ] for env_id in env_ids: defaults = utils.get_defaults_kwargs(alg=alg, env_id=env_id) gen = hyper_parameter_generator(number_of_runs) try: while True: generated_params = next(gen) task_number += 1 kwargs = defaults.copy() experiment_path = os.path.join('experiments_d', env_id) logger_kwargs = setup_logger_kwargs(base_dir=log_dir, exp_name=experiment_path, seed=task_number, use_tensor_board=False, verbose=False) kwargs.update(logger_kwargs=logger_kwargs, seed=task_number, env_id=env_id, alg=alg, **generated_params) # deactivate reward scaling for manipulation tasks env_type, _ = utils.get_env_type(env_id=env_id) if env_type == 'gym_manipulator_envs' or env_type == 'bullet': kwargs['use_reward_scaling'] = False target_fn = run_training t = Task(id=task_number, target_function=target_fn, kwargs=kwargs) ts.append(t) except StopIteration: pass print(f'Created {task_number} tasks.') return ts
def check_alg(alg_name, env_id): """" Run one epoch update with algorithm.""" print(f'Run {alg_name}.') defaults = U.get_defaults_kwargs(alg=alg_name, env_id=env_id) defaults['epochs'] = 1 defaults['num_mini_batches'] = 4 defaults['steps_per_epoch'] = 1000 defaults['verbose'] = False learn_fn = U.get_learn_function(alg_name) defaults['logger_kwargs'] = setup_logger_kwargs(exp_name='unittest', seed=None, base_dir='/var/tmp/', datestamp=True, use_tensor_board=True, verbose=False) return learn_fn(env_id, **defaults)
def compile(self, num_runs=1, num_cores=os.cpu_count(), target='_run_mp_training', **kwargs_update ): if num_runs > 1: self.num_runs = num_runs self.multi_thread = True self.scheduler = mp_utils.Scheduler(num_cores=num_cores) target_fn = getattr(self, target) self._fill_scheduler(target_fn) self.kwargs.update(kwargs_update) _seed = self.kwargs.get('seed', None) self.logger_kwargs = setup_logger_kwargs(base_dir=self.log_dir, exp_name=self.exp_name, seed=_seed) self.compiled = True return self
def _fill_scheduler(self, target_fn): """Create tasks for multi-process execution. This method is called when model.compile(multi_thread=True) is enabled. """ ts = list() for task_number in range(1, self.num_runs + 1): kwargs = self.kwargs.copy() _seed = task_number + self.seed logger_kwargs = setup_logger_kwargs(base_dir=self.log_dir, exp_name=self.exp_name, seed=_seed, use_tensor_board=True, verbose=False) kwargs.update(logger_kwargs=logger_kwargs, seed=_seed, alg=self.alg, env_id=self.env_id) t = mp_utils.Task(id=_seed, target_function=target_fn, kwargs=kwargs) ts.append(t) self.scheduler.fill(tasks=ts)