Exemple #1
0
def create_tasks(alg, number_of_runs, number_of_cores, env_id, log_dir):
    ts = list()

    alg_parse = {'trpo': 'trpo_mpi', 'ppo': 'ppo2'}
    assert alg in alg_parse.keys()
    runs = range(number_of_runs)

    if alg == 'ppo':
        task_number = 0
        learning_rates = [0.00025, 0.0005, 0.001, 0.002]
        # train_iters = [5, 25, 50, 100]
        noptepochs = [1, 2, 4, 8]
        for _, lr, num_opt_epochs in product(runs, learning_rates, noptepochs):
            task_number += 1
            experiment_path = os.path.join('baselines', env_id, alg)
            logger_kwargs = setup_logger_kwargs(base_dir=log_dir,
                                                exp_name=experiment_path,
                                                seed=task_number,
                                                use_tensor_board=True,
                                                verbose=(number_of_cores == 1))
            kwargs = dict(
                # total_timesteps=3*3200,  # todo: used for debugging
                alg=alg_parse[alg],
                env_id=env_id,
                logger_kwargs=logger_kwargs,
                seed=task_number,
                lr=lr,
                noptepochs=num_opt_epochs,
                nsteps=32000,
                nminibatches=32)

            target_fn = run_training
            t = Task(id=task_number, target_function=target_fn, kwargs=kwargs)
            ts.append(t)

    elif alg == 'trpo':
        task_number = 0
        target_kls = [0.01, 0.02, 0.03, 0.05, 0.005]
        for _, target_kl in product(runs, target_kls):
            task_number += 1
            experiment_path = os.path.join('baselines', env_id, alg)
            logger_kwargs = setup_logger_kwargs(base_dir=log_dir,
                                                exp_name=experiment_path,
                                                seed=task_number,
                                                use_tensor_board=True,
                                                verbose=(number_of_cores == 1))
            kwargs = dict(alg=alg_parse[alg],
                          env_id=env_id,
                          logger_kwargs=logger_kwargs,
                          seed=task_number,
                          max_kl=target_kl)

            target_fn = run_training
            t = Task(id=task_number, target_function=target_fn, kwargs=kwargs)
            ts.append(t)

    else:
        raise NotImplementedError(f'Alg={alg}; only ppo and trpo supported')

    return ts
Exemple #2
0
def create_tasks(number_of_runs, log_dir, env_id):
    ts = list()
    alg = 'iwpg'
    defaults = utils.get_defaults_kwargs(alg=alg, env_id=env_id)
    task_number = 0
    gen = hyper_parameter_generator(number_of_runs=number_of_runs)
    hms_time = time.strftime("%Y-%m-%d__%H-%M-%S")

    try:
        while True:
            generated_params = next(gen)
            task_number += 1
            kwargs = defaults.copy()
            experiment_path = os.path.join('experiments_a', env_id)
            logger_kwargs = setup_logger_kwargs(base_dir=log_dir,
                                                exp_name=experiment_path,
                                                seed=task_number,
                                                hms_time=hms_time,
                                                use_tensor_board=False,
                                                verbose=False)
            kwargs.update(logger_kwargs=logger_kwargs,
                          seed=task_number,
                          env_id=env_id,
                          alg=alg,
                          **generated_params)
            target_fn = run_training
            t = Task(id=task_number, target_function=target_fn, kwargs=kwargs)
            ts.append(t)

    except StopIteration:
        print(f'Created {task_number} tasks.')

    return ts
Exemple #3
0
def create_tasks(number_of_runs, log_dir, env_id):
    ts = list()
    alg = 'iwpg'
    task_number = 0
    gen = hyper_parameter_generator(number_of_runs)

    try:
        while True:
            generated_params = next(gen)
            task_number += 1
            kwargs = utils.get_defaults_kwargs(alg=alg, env_id=env_id)
            experiment_path = os.path.join('experiments_c', env_id)
            logger_kwargs = setup_logger_kwargs(base_dir=log_dir,
                                                exp_name=experiment_path,
                                                seed=task_number,
                                                use_tensor_board=False,
                                                verbose=False)
            kwargs.update(logger_kwargs=logger_kwargs,
                          seed=task_number,
                          env_id=env_id,
                          alg=alg,
                          **generated_params)
            # deactivate reward scaling for manipulation tasks
            env_type, _ = utils.get_env_type(env_id=env_id)
            if env_type == 'gym_manipulator_envs' or env_type == 'bullet':
                kwargs['use_reward_scaling'] = False
            target_fn = run_iwpg_training
            t = Task(id=task_number, target_function=target_fn, kwargs=kwargs)
            ts.append(t)

    except StopIteration:
        print(f'Created {task_number} tasks.')

    return ts
Exemple #4
0
    def _fill_scheduler(self, target_fn):
        """ Create tasks for multi-process execution.

        will be called if model.compile(multi_thread=True) is enabled."""

        ts = list()
        task_number = 1

        # for param_set in self.param_grid:
        for param_set in product(*self.param_grid.values()):
            grid_kwargs = dict(zip(self.param_grid.keys(), param_set))

            for i in range(self.num_runs):
                kwargs = self.kwargs.copy()
                _seed = task_number + self.seed
                logger_kwargs = setup_logger_kwargs(base_dir=self.log_dir,
                                                    exp_name=self.exp_name,
                                                    seed=_seed,
                                                    use_tensor_board=True,
                                                    verbose=False)
                kwargs.update(logger_kwargs=logger_kwargs,
                              seed=_seed,
                              alg=self.alg,
                              env_id=self.env_id)
                # now pass the grid search parameters...
                kwargs.update(**grid_kwargs)
                t = mp_utils.Task(id=_seed,
                                  target_function=target_fn,
                                  kwargs=kwargs)
                ts.append(t)
                task_number += 1

        self.scheduler.fill(tasks=ts)
Exemple #5
0
def create_tasks(number_of_runs, log_dir, env_id):
    ts = list()
    task_number = 1000  # === seed number
    alg = 'iwpg'

    if env_id == 'all':
        env_ids = [
            'HalfCheetahBulletEnv-v0',
            'AntBulletEnv-v0',
            'HopperBulletEnv-v0',
            'Walker2DBulletEnv-v0',
            'HumanoidBulletEnv-v0',
            'ReacherBulletEnv-v0',
            'PusherBulletEnv-v0',
            'KukaBulletEnv-v0',
        ]
    else:
        env_ids = [
            env_id,
        ]

    for env_id in env_ids:
        defaults = utils.get_defaults_kwargs(alg=alg, env_id=env_id)
        gen = hyper_parameter_generator(number_of_runs)

        try:
            while True:
                generated_params = next(gen)
                task_number += 1
                kwargs = defaults.copy()
                experiment_path = os.path.join('experiments_d', env_id)
                logger_kwargs = setup_logger_kwargs(base_dir=log_dir,
                                                    exp_name=experiment_path,
                                                    seed=task_number,
                                                    use_tensor_board=False,
                                                    verbose=False)
                kwargs.update(logger_kwargs=logger_kwargs,
                              seed=task_number,
                              env_id=env_id,
                              alg=alg,
                              **generated_params)
                # deactivate reward scaling for manipulation tasks
                env_type, _ = utils.get_env_type(env_id=env_id)
                if env_type == 'gym_manipulator_envs' or env_type == 'bullet':
                    kwargs['use_reward_scaling'] = False
                target_fn = run_training
                t = Task(id=task_number,
                         target_function=target_fn,
                         kwargs=kwargs)
                ts.append(t)

        except StopIteration:
            pass
    print(f'Created {task_number} tasks.')

    return ts
 def check_alg(alg_name, env_id):
     """" Run one epoch update with algorithm."""
     print(f'Run {alg_name}.')
     defaults = U.get_defaults_kwargs(alg=alg_name, env_id=env_id)
     defaults['epochs'] = 1
     defaults['num_mini_batches'] = 4
     defaults['steps_per_epoch'] = 1000
     defaults['verbose'] = False
     learn_fn = U.get_learn_function(alg_name)
     defaults['logger_kwargs'] = setup_logger_kwargs(exp_name='unittest',
                                                     seed=None,
                                                     base_dir='/var/tmp/',
                                                     datestamp=True,
                                                     use_tensor_board=True,
                                                     verbose=False)
     return learn_fn(env_id, **defaults)
Exemple #7
0
    def compile(self,
                num_runs=1,
                num_cores=os.cpu_count(),
                target='_run_mp_training',
                **kwargs_update
                ):

        if num_runs > 1:
            self.num_runs = num_runs
            self.multi_thread = True
            self.scheduler = mp_utils.Scheduler(num_cores=num_cores)
            target_fn = getattr(self, target)
            self._fill_scheduler(target_fn)

        self.kwargs.update(kwargs_update)
        _seed = self.kwargs.get('seed', None)
        self.logger_kwargs = setup_logger_kwargs(base_dir=self.log_dir,
                                                 exp_name=self.exp_name,
                                                 seed=_seed)

        self.compiled = True

        return self
Exemple #8
0
    def _fill_scheduler(self, target_fn):
        """Create tasks for multi-process execution. This method is called when
        model.compile(multi_thread=True) is enabled.
        """
        ts = list()
        for task_number in range(1, self.num_runs + 1):
            kwargs = self.kwargs.copy()
            _seed = task_number + self.seed
            logger_kwargs = setup_logger_kwargs(base_dir=self.log_dir,
                                                exp_name=self.exp_name,
                                                seed=_seed,
                                                use_tensor_board=True,
                                                verbose=False)
            kwargs.update(logger_kwargs=logger_kwargs,
                          seed=_seed,
                          alg=self.alg,
                          env_id=self.env_id)
            t = mp_utils.Task(id=_seed,
                              target_function=target_fn,
                              kwargs=kwargs)
            ts.append(t)

        self.scheduler.fill(tasks=ts)