コード例 #1
0
def get_variant_spec_base(universe, domain, task, policy, algorithm):
    algorithm_params = deep_update(ALGORITHM_PARAMS_BASE,
                                   ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {}))
    algorithm_params = deep_update(
        algorithm_params, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}))
    variant_spec = {
        'domain':
        domain,
        'task':
        task,
        'universe':
        universe,
        'git_sha':
        get_git_rev(),
        'env_params':
        ENV_PARAMS.get(domain, {}).get(task, {}),
        'policy_params':
        deep_update(POLICY_PARAMS_BASE[policy],
                    POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {})),
        'Q_params': {
            'type': 'double_feedforward_Q_function',
            'kwargs': {
                'hidden_layer_sizes': (M, M),
            }
        },
        'algorithm_params':
        algorithm_params,
        'replay_pool_params': {
            'type': 'SimpleReplayPool',
            'kwargs': {
                'max_size': 1e6,
            }
        },
        'sampler_params': {
            'type': 'SimpleSampler',
            'kwargs': {
                'max_path_length':
                MAX_PATH_LENGTH_PER_DOMAIN.get(domain,
                                               DEFAULT_MAX_PATH_LENGTH),
                'min_pool_size':
                MAX_PATH_LENGTH_PER_DOMAIN.get(domain,
                                               DEFAULT_MAX_PATH_LENGTH),
                'batch_size':
                256,
            }
        },
        'run_params': {
            'seed':
            tune.sample_from(lambda spec: np.random.randint(0, 10000)),
            'checkpoint_at_end':
            True,
            'checkpoint_frequency':
            NUM_EPOCHS_PER_DOMAIN.get(domain, DEFAULT_NUM_EPOCHS) //
            NUM_CHECKPOINTS,
            'checkpoint_replay_pool':
            False,
        },
    }

    return variant_spec
コード例 #2
0
ファイル: variants.py プロジェクト: YunchuZhang/softlearning
def get_variant_spec_base(universe, domain, task, policy, algorithm, sampler,
                          replay_pool):
    algorithm_params = deep_update(
        ALGORITHM_PARAMS_BASE,
        ALGORITHM_PARAMS_PER_DOMAIN.get(domain,
                                        DEFAULT_ALGORITHM_DOMAIN_PARAMS))
    algorithm_params = deep_update(
        algorithm_params, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}))
    variant_spec = {
        'git_sha':
        get_git_rev(),
        'environment_params': {
            'training': {
                'domain': domain,
                'task': task,
                'universe': universe,
                'kwargs': (ENVIRONMENT_PARAMS.get(domain, {}).get(task, {})),
            },
            'evaluation':
            tune.sample_from(lambda spec: (spec.get('config', spec)[
                'environment_params']['training'])),
        },
        'policy_params':
        deep_update(POLICY_PARAMS_BASE[policy],
                    POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {})),
        'Q_params': {
            'type': 'double_feedforward_Q_function',
            'kwargs': {
                'hidden_layer_sizes': (M, M),
            }
        },
        'algorithm_params':
        algorithm_params,
        'replay_pool_params':
        deep_update(REPLAY_POOL_PARAMS_BASE[replay_pool]),
        'sampler_params':
        deep_update(
            SAMPLER_PARAMS_BASE[sampler],
            SAMPLER_PARAMS_PER_DOMAIN.get(domain,
                                          DEFAULT_SAMPLER_DOMAIN_PARAMS)),
        'run_params': {
            'seed':
            tune.sample_from(lambda spec: np.random.randint(0, 10000)),
            'checkpoint_at_end':
            True,
            'checkpoint_frequency':
            NUM_EPOCHS_PER_DOMAIN.get(domain, DEFAULT_NUM_EPOCHS) //
            NUM_CHECKPOINTS,
            'checkpoint_replay_pool':
            False,
        },
    }

    return variant_spec
コード例 #3
0
def get_variant_spec(args):
    algorithm = args.algorithm

    layer_size = 128
    variant_spec = {
        'seed': 1,

        'layer_size': layer_size,
        'policy_params': {
            'type': 'GaussianPolicy',
            'kwargs': {
                'hidden_layer_sizes': (layer_size, layer_size),
                'squash': True,
            },
        },
        'algorithm_params': deep_update(
            ALGORITHM_PARAMS_BASE,
            ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {})
        ),
        'Q_params': {
            'type': 'double_feedforward_Q_function',
            'kwargs': {
                'hidden_layer_sizes': (layer_size, layer_size),
            }
        },
        'run_params': {}
    }

    return variant_spec
コード例 #4
0
ファイル: variants.py プロジェクト: nflu/softlearning
def get_variant_spec_base(universe, domain, task, policy, algorithm):
    algorithm_params = deep_update(
        ALGORITHM_PARAMS_BASE,
        get_algorithm_params(universe, domain, task),
        ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}),
    )
    variant_spec = {
        'git_sha': get_git_rev(__file__),
        'environment_params': {
            'training': {
                'domain': domain,
                'task': task,
                'universe': universe,
                'kwargs': get_environment_params(universe, domain, task),
            },
            'evaluation':
            tune.sample_from(lambda spec: (spec.get('config', spec)[
                'environment_params']['training'])),
        },
        'policy_params': get_policy_params(universe, domain, task),
        'exploration_policy_params': {
            'type': 'UniformPolicy',
            'kwargs': {
                'observation_keys':
                tune.sample_from(lambda spec: (spec.get('config', spec)[
                    'policy_params']['kwargs'].get('observation_keys')))
            },
        },
        'Q_params': {
            'type': 'double_feedforward_Q_function',
            'kwargs': {
                'hidden_layer_sizes': (M, M),
                'observation_keys': None,
                'observation_preprocessors_params': {}
            }
        },
        'algorithm_params': algorithm_params,
        'replay_pool_params': {
            'type': 'SimpleReplayPool',
            'kwargs': {
                'max_size': int(1e6)
            }
        },
        'sampler_params': {
            'type': 'SimpleSampler',
            'kwargs': {
                'max_path_length': get_max_path_length(universe, domain, task),
                'min_pool_size': get_max_path_length(universe, domain, task),
                'batch_size': 256,
            }
        },
        'run_params': {
            'seed': tune.sample_from(lambda spec: np.random.randint(0, 10000)),
            'checkpoint_at_end': True,
            'checkpoint_frequency': tune.sample_from(get_checkpoint_frequency),
            'checkpoint_replay_pool': False,
        },
    }

    return variant_spec
コード例 #5
0
ファイル: base.py プロジェクト: xionghuichen/mopo
def get_variant_spec_base(universe, domain, task, policy, algorithm, env_params):
    algorithm_params = deep_update(
        env_params,
        ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {})
    )
    algorithm_params = deep_update(
        algorithm_params,
        ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {})
    )
    variant_spec = {
        # 'git_sha': get_git_rev(),

        'environment_params': {
            'training': {
                'domain': domain,
                'task': task,
                'universe': universe,
                'kwargs': (
                    ENVIRONMENT_PARAMS.get(domain, {}).get(task, {})),
            },
            'evaluation': lambda spec: (
                spec['environment_params']['training']),
        },
        'policy_params': deep_update(
            POLICY_PARAMS_BASE[policy],
            POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {})
        ),
        'Q_params': {
            'type': 'double_feedforward_Q_function',
            'kwargs': {
                'hidden_layer_sizes': (M, M),
            }
        },
        'algorithm_params': algorithm_params,
        'replay_pool_params': {
            'type': 'SimpleReplayPool',
            'kwargs': {
                'max_size': lambda spec: (
                    {
                        'SimpleReplayPool': int(1e6),
                        'TrajectoryReplayPool': int(1e4),
                    }.get(spec['replay_pool_params']['type'], int(1e6))
                ),
            }
        },
        'sampler_params': {
            'type': 'SimpleSampler',
            'kwargs': {
                'max_path_length': MAX_PATH_LENGTH_PER_DOMAIN.get(
                    domain, DEFAULT_MAX_PATH_LENGTH),
                'min_pool_size': MAX_PATH_LENGTH_PER_DOMAIN.get(
                    domain, DEFAULT_MAX_PATH_LENGTH),
                'batch_size': 256,
            }
        },
        'run_params': {
            'seed': 88,
            'checkpoint_at_end': True,
            'checkpoint_frequency': NUM_EPOCHS_PER_DOMAIN.get(
                domain, DEFAULT_NUM_EPOCHS) // NUM_CHECKPOINTS,
            'checkpoint_replay_pool': False,
            'info': ''
        },
    }

    return variant_spec
コード例 #6
0
def get_variant_spec_base(universe, domain, task, task_eval, policy, algorithm,
                          from_vision):
    algorithm_params = ALGORITHM_PARAMS_BASE
    algorithm_params = deep_update(
        algorithm_params, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}))

    variant_spec = {
        'git_sha':
        get_git_rev(),
        'environment_params': {
            'training': {
                'domain':
                domain,
                'task':
                task,
                'universe':
                universe,
                'kwargs':
                get_environment_params(universe, domain, task, from_vision),
            },
            'evaluation': {
                'domain':
                domain,
                'task':
                task_eval,
                'universe':
                universe,
                'kwargs':
                (tune.sample_from(lambda spec: (spec.get('config', spec)[
                    'environment_params']['training'].get('kwargs')))
                 if task == task_eval else get_environment_params(
                     universe, domain, task_eval, from_vision)),
            },
        },
        'policy_params':
        deep_update(POLICY_PARAMS_BASE[policy],
                    POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {})),
        'exploration_policy_params': {
            'type': 'UniformPolicy',
            'kwargs': {
                'observation_keys':
                tune.sample_from(lambda spec: (spec.get('config', spec)[
                    'policy_params']['kwargs'].get('observation_keys')))
            },
        },
        'Q_params': {
            'type': 'double_feedforward_Q_function',
            'kwargs': {
                'hidden_layer_sizes': (M, M),
                'observation_keys':
                tune.sample_from(lambda spec: (spec.get('config', spec)[
                    'policy_params']['kwargs'].get('observation_keys'))),
                'observation_preprocessors_params': {}
            }
        },
        'distance_fn_params':
        get_distance_fn_params(universe, domain, task),
        'algorithm_params':
        algorithm_params,
        'replay_pool_params': {
            'type': 'SimpleReplayPool',
            'kwargs': {
                # 'max_size': int(5e5),
                'max_size': tune.grid_search([int(5e4)]),
            }
        },
        'sampler_params': {
            'type': 'SimpleSampler',
            'kwargs': {
                'max_path_length': get_max_path_length(universe, domain, task),
                'min_pool_size': 50,
                'batch_size': 256,  # tune.grid_search([128, 256]),
                'store_last_n_paths': 20,
            }
        },
        'run_params': {
            'seed': tune.sample_from(lambda spec: np.random.randint(0, 10000)),
            'checkpoint_at_end': False,
            'checkpoint_frequency': tune.sample_from(get_checkpoint_frequency),
            'checkpoint_replay_pool': False,
        },
    }

    # Filter out parts of the state relating to the object when training from pixels
    env_kwargs = variant_spec['environment_params']['training']['kwargs']
    if from_vision and "device_path" not in env_kwargs.keys():
        env_obs_keys = env_kwargs.get('observation_keys', tuple())

        non_image_obs_keys = tuple(key for key in env_obs_keys
                                   if key != 'pixels')
        variant_spec['replay_pool_params']['kwargs'][
            'obs_save_keys'] = non_image_obs_keys

        non_object_obs_keys = tuple(key for key in env_obs_keys
                                    if 'object' not in key)
        variant_spec['policy_params']['kwargs'][
            'observation_keys'] = variant_spec['exploration_policy_params'][
                'kwargs']['observation_keys'] = variant_spec['Q_params'][
                    'kwargs']['observation_keys'] = variant_spec[
                        'distance_fn_params']['kwargs'][
                            'observation_keys'] = non_object_obs_keys

    return variant_spec
コード例 #7
0
def get_variant_spec_base(universe, domain, task, policy, algorithm):
    algorithm_params = deep_update(
        ALGORITHM_PARAMS_BASE,
        ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {})
    )
    algorithm_params = deep_update(
        algorithm_params,
        ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {})
    )
    variant_spec = {
        'git_sha': get_git_rev(__file__),

        'environment_params': {
            'training': {
                'domain': domain,
                'task': task,
                'universe': universe,
                'kwargs': (
                    ENVIRONMENT_PARAMS.get(domain, {}).get(task, {})),
            },
            'evaluation': tune.sample_from(lambda spec: (
                spec.get('config', spec)
                ['environment_params']
                ['training']
            )),
        },
        'policy_params': deep_update(
            POLICY_PARAMS_BASE[policy],
            POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {})
        ),
        'exploration_policy_params': {
            'type': 'UniformPolicy',
            'kwargs': {
                'observation_keys': tune.sample_from(lambda spec: (
                    spec.get('config', spec)
                    ['policy_params']
                    ['kwargs']
                    .get('observation_keys')
                ))
            },
        },
        'Q_params': {
            'type': 'double_feedforward_Q_function',
            'kwargs': {
                'hidden_layer_sizes': (M, M),
                'observation_keys': None,
                'observation_preprocessors_params': {}
            }
        },
        'algorithm_params': algorithm_params,
        'replay_pool_params': {
            'type': 'SimpleReplayPool',
            'kwargs': {
                'max_size': tune.sample_from(lambda spec: (
                    {
                        'SimpleReplayPool': int(1e6),
                        'TrajectoryReplayPool': int(1e4),
                    }.get(
                        spec.get('config', spec)
                        ['replay_pool_params']
                        ['type'],
                        int(1e6))
                )),
            }
        },
        'sampler_params': {
            'type': 'SimpleSampler',
            'kwargs': {
                'max_path_length': MAX_PATH_LENGTH_PER_DOMAIN.get(
                    domain, DEFAULT_MAX_PATH_LENGTH),
                'min_pool_size': MAX_PATH_LENGTH_PER_DOMAIN.get(
                    domain, DEFAULT_MAX_PATH_LENGTH),
                'batch_size': 256,
            }
        },
        'run_params': {
            'seed': tune.sample_from(
                lambda spec: np.random.randint(0, 10000)),
            'checkpoint_at_end': True,
            'checkpoint_frequency': NUM_EPOCHS_PER_DOMAIN.get(
                domain, DEFAULT_NUM_EPOCHS) // NUM_CHECKPOINTS,
            'checkpoint_replay_pool': False,
        },
    }

    return variant_spec
コード例 #8
0
ファイル: variants.1.py プロジェクト: justinvyu/softlearning
def get_variant_spec_base(universe, domain, task, policy, algorithm):
    algorithm_params = ALGORITHM_PARAMS_BASE
    algorithm_params = deep_update(
            algorithm_params,
            ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}),
            ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {})
        )

    variant_spec = {
        'git_sha': get_git_rev(__file__),

        'environment_params': {
            'training': {
                'domain': domain,
                'task': task,
                'universe': universe,
                'kwargs': (
                    ENVIRONMENT_PARAMS.get(domain, {}).get(task, {})),
            },
            'evaluation': tune.sample_from(lambda spec: (
                spec.get('config', spec)
                ['environment_params']
                ['training']
            )),
        },
        'policy_params': deep_update(
            POLICY_PARAMS_BASE[policy],
            POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {})
        ),
        'exploration_policy_params': {
            'type': 'UniformPolicy',
            'kwargs': {
                'observation_keys': tune.sample_from(lambda spec: (
                    spec.get('config', spec)
                    ['policy_params']
                    ['kwargs']
                    .get('observation_keys')
                ))
            },
        },
        'Q_params': {
            'type': 'double_feedforward_Q_function',
            'kwargs': {
                'hidden_layer_sizes': (M, M),
                'observation_keys': None,
                'observation_preprocessors_params': {
                    'observations': None,
                }
            }
        },
        'algorithm_params': algorithm_params,
        'replay_pool_params': {
            'type': 'SimpleReplayPool',
            'kwargs': {
                'max_size': tune.sample_from(lambda spec: (
                    {
                        'SimpleReplayPool': int(5e5),
                        'TrajectoryReplayPool': int(1e4),
                    }.get(
                        spec.get('config', spec)
                        ['replay_pool_params']
                        ['type'],
                        int(1e6))
                )),
            }
        },
        'sampler_params': deep_update({
            'type': 'SimpleSampler',
            'kwargs': {
                'max_path_length': MAX_PATH_LENGTH_PER_DOMAIN.get(
                    domain, DEFAULT_MAX_PATH_LENGTH),
                'min_pool_size': 1000,
                'batch_size': 256,
            }
        }, SAMPLER_PARAMS_PER_DOMAIN.get(domain, {})),
        'run_params': {
            'seed': tune.sample_from(
                lambda spec: np.random.randint(0, 10000)),
            'checkpoint_at_end': True,
            'checkpoint_frequency': tune.sample_from(lambda spec: (
                25000 // (spec.get('config', spec)
                          ['algorithm_params']
                          ['kwargs']
                          ['epoch_length'])
            )),
        },
    }

    if task == 'InfoScrewV2-v0':
        variant_spec['replay_pool_params']['kwargs']['include_images'] = True
    if task == 'ImageScrewV2-v0' and ENVIRONMENT_PARAMS['DClaw3']['ImageScrewV2-v0']['state_reward']:
        variant_spec['replay_pool_params']['kwargs']['super_observation_space_shape'] = (9+9+2+1+2,)
    if domain == 'HardwareDClaw3':
        variant_spec['sampler_params']['type'] == 'RemoteSampler'
        variant_spec['algorithm_params']['kwargs']['max_train_repeat_per_timestep'] = 1

    return variant_spec
コード例 #9
0
def get_variant_spec_base(universe, domain, task, task_evaluation, policy,
                          algorithm):
    # algorithm_params = deep_update(
    #     ALGORITHM_PARAMS_BASE,
    #     ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {})
    # )
    # algorithm_params = deep_update(
    #     algorithm_params,
    #     ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {})
    # )
    algorithm_params = ALGORITHM_PARAMS_BASE
    algorithm_params = deep_update(
        algorithm_params, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}))

    variant_spec = {
        'domain':
        domain,
        'task':
        task,
        'task_evaluation':
        task_evaluation,
        'universe':
        universe,
        'git_sha':
        get_git_rev(),
        'env_params':
        ENV_PARAMS.get(domain, {}).get(task, {}),
        'policy_params':
        deep_update(POLICY_PARAMS_BASE[policy],
                    POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {})),
        'exploration_policy_params': {
            'type': 'UniformPolicy',
            'kwargs': {
                'observation_keys':
                tune.sample_from(lambda spec: (spec.get('config', spec)[
                    'policy_params']['kwargs'].get('observation_keys')))
            },
        },
        'Q_params': {
            'type': 'double_feedforward_Q_function',
            'kwargs': {
                'hidden_layer_sizes': (M, M),
            }
        },
        'algorithm_params':
        algorithm_params,
        'replay_pool_params': {
            # 'type': 'SimpleReplayPool',
            # 'type': 'RelabelReplayPool',
            'type': 'HindsightExperienceReplayPool',
            'kwargs': {
                'max_size': 200000,
                # implement this
                'update_batch_fn': tune.function(REPLACE_FLAT_OBSERVATION),
                #'reward_fn': tune.function(SACClassifier._reward_relabeler),
                'reward_fn': None,
                'terminal_fn': None,
                'her_strategy': {
                    'resampling_probability':
                    0.,  # tune.grid_search([.5, 0.8]),
                    'type': 'future',
                }
            }
        },
        'sampler_params': {
            'type': 'SimpleSampler',
            'kwargs': {
                'max_path_length':
                MAX_PATH_LENGTH_PER_DOMAIN.get(domain,
                                               DEFAULT_MAX_PATH_LENGTH),
                'min_pool_size':
                MAX_PATH_LENGTH_PER_DOMAIN.get(domain,
                                               DEFAULT_MAX_PATH_LENGTH),
                'batch_size':
                256,
                'store_last_n_paths':
                20,
            }
        },
        'run_params': {
            'seed': tune.sample_from(lambda spec: np.random.randint(0, 10000)),
            'checkpoint_at_end': True,
            'checkpoint_frequency': DEFAULT_NUM_EPOCHS // NUM_CHECKPOINTS,
            'checkpoint_replay_pool': False,
        },
    }

    return variant_spec
コード例 #10
0
ファイル: base.py プロジェクト: anyboby/ConstrainedMBPO
def get_variant_spec_base(universe, domain, task, policy, algorithm, env_params):
    algorithm_params = deep_update(
        env_params,
        ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {})
    )
    algorithm_params = deep_update(
        algorithm_params,
        ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {})
    )
    variant_spec = {
        'git_sha': get_git_rev(),

        'environment_params': {
            'training': {
                'domain': domain,
                'task': task,
                'universe': universe,
                'kwargs': (
                    ENVIRONMENT_PARAMS.get(domain, {}).get(task, {})),
            },
            'evaluation': tune.sample_from(lambda spec: (
                spec.get('config', spec)
                ['environment_params']
                ['training']
            )),
        },
        'policy_params': deep_update(
            POLICY_PARAMS_BASE[policy],
            POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {}),
            {'log_dir':env_params['log_dir']},
        ),
        'Q_params': {
            'type': 'double_feedforward_Q_function',
            'kwargs': {
                'hidden_layer_sizes': (M, M),
            }
        },
        'algorithm_params': algorithm_params,
        'replay_pool_params': REPLAY_POOL_PARAMS_PER_ALGO.get(algorithm, REPLAY_POOL_PARAMS_PER_ALGO['default']),
        'sampler_params': {
            'type': SAMPLER_TYPES_PER_ALGO.get(algorithm, SAMPLER_TYPES_PER_ALGO['default']),
            'kwargs': {
                'max_path_length': MAX_PATH_LENGTH_PER_DOMAIN.get(
                    domain, DEFAULT_MAX_PATH_LENGTH),
                'min_pool_size': MAX_PATH_LENGTH_PER_DOMAIN.get(
                    domain, DEFAULT_MAX_PATH_LENGTH),
                'batch_size': 256,
                'preprocess_type': 'default'#'default'#'pointgoal0'
            },
        },
        'run_params': {
            'seed': tune.sample_from(
                lambda spec: np.random.randint(0, 10000)),
            'checkpoint_at_end': True,
            'checkpoint_frequency': NUM_EPOCHS_PER_DOMAIN.get(    #@anyboby uncomment
                domain, DEFAULT_NUM_EPOCHS) // NUM_CHECKPOINTS,
            # 'checkpoint_frequency': 1,
            'checkpoint_replay_pool': False,
        },
    }

    return variant_spec