Ejemplo n.º 1
0
def create_tasks(number_of_runs, log_dir, env_id):
    ts = list()
    alg = 'iwpg'
    defaults = utils.get_defaults_kwargs(alg=alg, env_id=env_id)
    task_number = 0
    gen = hyper_parameter_generator(number_of_runs=number_of_runs)
    hms_time = time.strftime("%Y-%m-%d__%H-%M-%S")

    try:
        while True:
            generated_params = next(gen)
            task_number += 1
            kwargs = defaults.copy()
            experiment_path = os.path.join('experiments_a', env_id)
            logger_kwargs = setup_logger_kwargs(base_dir=log_dir,
                                                exp_name=experiment_path,
                                                seed=task_number,
                                                hms_time=hms_time,
                                                use_tensor_board=False,
                                                verbose=False)
            kwargs.update(logger_kwargs=logger_kwargs,
                          seed=task_number,
                          env_id=env_id,
                          alg=alg,
                          **generated_params)
            target_fn = run_training
            t = Task(id=task_number, target_function=target_fn, kwargs=kwargs)
            ts.append(t)

    except StopIteration:
        print(f'Created {task_number} tasks.')

    return ts
Ejemplo n.º 2
0
def create_tasks(number_of_runs, log_dir, env_id):
    ts = list()
    alg = 'iwpg'
    task_number = 0
    gen = hyper_parameter_generator(number_of_runs)

    try:
        while True:
            generated_params = next(gen)
            task_number += 1
            kwargs = utils.get_defaults_kwargs(alg=alg, env_id=env_id)
            experiment_path = os.path.join('experiments_c', env_id)
            logger_kwargs = setup_logger_kwargs(base_dir=log_dir,
                                                exp_name=experiment_path,
                                                seed=task_number,
                                                use_tensor_board=False,
                                                verbose=False)
            kwargs.update(logger_kwargs=logger_kwargs,
                          seed=task_number,
                          env_id=env_id,
                          alg=alg,
                          **generated_params)
            # deactivate reward scaling for manipulation tasks
            env_type, _ = utils.get_env_type(env_id=env_id)
            if env_type == 'gym_manipulator_envs' or env_type == 'bullet':
                kwargs['use_reward_scaling'] = False
            target_fn = run_iwpg_training
            t = Task(id=task_number, target_function=target_fn, kwargs=kwargs)
            ts.append(t)

    except StopIteration:
        print(f'Created {task_number} tasks.')

    return ts
Ejemplo n.º 3
0
    def __init__(self,
                 alg: str,
                 env_id: str,
                 log_dir: str,
                 seed: int,
                 unparsed_args: list = ()
                 ) -> None:
        """ Class Constructor  """
        self.alg = alg
        self.env_id = env_id
        self.log_dir = log_dir
        self.seed = seed

        self.multi_thread = False
        self.num_runs = 1
        self.training = False
        self.compiled = False
        self.trained = False

        self.default_kwargs = utils.get_defaults_kwargs(alg=alg,
                                                        env_id=env_id)
        self.kwargs = self.default_kwargs.copy()
        # update algorithm kwargs with unparsed arguments from command line
        keys = [k[2:] for k in unparsed_args[0::2]]  # remove -- from argument
        values = [eval(v) for v in unparsed_args[1::2]]
        unparsed_dict = {k: v for k, v in zip(keys, values)}
        self.kwargs.update(**unparsed_dict)
        self.logger_kwargs = None  # defined by compile (a specific seed might be passed)
        self.exp_name = os.path.join(self.env_id, self.alg)

        # assigned by class methods
        self.model = None
        self.env = None
        self.scheduler = None
def learn(env_id, **kwargs) -> tuple:
    defaults = utils.get_defaults_kwargs(alg='npg', env_id=env_id)
    defaults.update(**kwargs)
    alg = NaturalPolicyGradientAlgorithm(env_id=env_id, **kwargs)
    ac, env = alg.learn()

    return ac, env
Ejemplo n.º 5
0
def create_tasks(number_of_runs, log_dir, env_id):
    ts = list()
    task_number = 1000  # === seed number
    alg = 'iwpg'

    if env_id == 'all':
        env_ids = [
            'HalfCheetahBulletEnv-v0',
            'AntBulletEnv-v0',
            'HopperBulletEnv-v0',
            'Walker2DBulletEnv-v0',
            'HumanoidBulletEnv-v0',
            'ReacherBulletEnv-v0',
            'PusherBulletEnv-v0',
            'KukaBulletEnv-v0',
        ]
    else:
        env_ids = [
            env_id,
        ]

    for env_id in env_ids:
        defaults = utils.get_defaults_kwargs(alg=alg, env_id=env_id)
        gen = hyper_parameter_generator(number_of_runs)

        try:
            while True:
                generated_params = next(gen)
                task_number += 1
                kwargs = defaults.copy()
                experiment_path = os.path.join('experiments_d', env_id)
                logger_kwargs = setup_logger_kwargs(base_dir=log_dir,
                                                    exp_name=experiment_path,
                                                    seed=task_number,
                                                    use_tensor_board=False,
                                                    verbose=False)
                kwargs.update(logger_kwargs=logger_kwargs,
                              seed=task_number,
                              env_id=env_id,
                              alg=alg,
                              **generated_params)
                # deactivate reward scaling for manipulation tasks
                env_type, _ = utils.get_env_type(env_id=env_id)
                if env_type == 'gym_manipulator_envs' or env_type == 'bullet':
                    kwargs['use_reward_scaling'] = False
                target_fn = run_training
                t = Task(id=task_number,
                         target_function=target_fn,
                         kwargs=kwargs)
                ts.append(t)

        except StopIteration:
            pass
    print(f'Created {task_number} tasks.')

    return ts
 def check_alg(alg_name, env_id):
     """" Run one epoch update with algorithm."""
     print(f'Run {alg_name}.')
     defaults = U.get_defaults_kwargs(alg=alg_name, env_id=env_id)
     defaults['epochs'] = 1
     defaults['num_mini_batches'] = 4
     defaults['steps_per_epoch'] = 1000
     defaults['verbose'] = False
     learn_fn = U.get_learn_function(alg_name)
     defaults['logger_kwargs'] = setup_logger_kwargs(exp_name='unittest',
                                                     seed=None,
                                                     base_dir='/var/tmp/',
                                                     datestamp=True,
                                                     use_tensor_board=True,
                                                     verbose=False)
     return learn_fn(env_id, **defaults)