コード例 #1
0
def get_environment_from_params_custom(environment_params):
    universe = environment_params['universe']
    task = environment_params['task']
    domain = environment_params['domain']
    # st()
    environment_kwargs_gym = environment_params.get('kwargs', {}).copy()
    if "map3D" in environment_kwargs_gym:
      environment_kwargs_gym.pop("map3D")
    if "observation_keys" in environment_kwargs_gym:
      environment_kwargs_gym.pop("observation_keys")
    env = gym.make(f"{domain}-{task}",**environment_kwargs_gym)

    camera_space={'dist_low': 0.7,'dist_high': 1.5,'angle_low': 0,'angle_high': 180,'elev_low': -180,'elev_high': -90}

    env_n = ImageEnv(
            wrapped_env=env,
            imsize=64,
            normalize=True,
            camera_space=camera_space,
            init_camera=(lambda x: init_multiple_cameras(x, camera_space)),
            num_cameras=4,#4 for training
            depth=True,
            cam_info=True,
            reward_type='wrapped_env',
            flatten=False
        )

    environment_kwargs = environment_params.get('kwargs', {}).copy()
    environment_kwargs["env"] = env_n
    return get_environment(universe, domain, task, environment_kwargs)
コード例 #2
0
 def setUp(self):
     self.env = get_environment('gym', 'Swimmer', 'v3', {})
     self.policy = get_policy_from_params({'type': 'UniformPolicy'},
                                          env=self.env)
     self.pool = SimpleReplayPool(max_size=100, environment=self.env)
     self.remote_sampler = RemoteSampler(max_path_length=10,
                                         min_pool_size=10,
                                         batch_size=10)
コード例 #3
0
def run_experiment(variant, reporter):
    env = get_environment('gym', 'MultiGoal', 'Default', {
        'actuation_cost_coeff': 1,
        'distance_cost_coeff': 0.1,
        'goal_reward': 1,
        'init_sigma': 0.1,
    })

    pool = SimpleReplayPool(
        observation_space=env.observation_space,
        action_space=env.action_space,
        max_size=1e6)

    sampler = SimpleSampler(
        max_path_length=30, min_pool_size=100, batch_size=64)

    Qs = get_Q_function_from_variant(variant, env)
    policy = get_policy_from_variant(variant, env, Qs)
    plotter = QFPolicyPlotter(
        Q=Qs[0],
        policy=policy,
        obs_lst=np.array(((-2.5, 0.0),
                          (0.0, 0.0),
                          (2.5, 2.5),
                          (-2.5, -2.5))),
        default_action=(np.nan, np.nan),
        n_samples=100)

    algorithm = SAC(
        sampler=sampler,
        reparameterize=True,
        epoch_length=100,
        n_epochs=1000,
        n_train_repeat=1,
        eval_render_mode=None,
        eval_n_episodes=10,
        eval_deterministic=False,

        env=env,
        policy=policy,
        initial_exploration_policy=None,
        pool=pool,
        Qs=Qs,
        plotter=plotter,

        lr=3e-4,
        target_entropy=-2.0,
        discount=0.99,
        tau=1e-4,

        save_full_state=True,
    )

    initialize_tf_variables(algorithm._session, only_uninitialized=True)

    for train_result in algorithm.train():
        reporter(**train_result)
コード例 #4
0
    def setUp(self):
        self.env = get_environment('gym', 'Swimmer', 'v3', {})
        self.hidden_layer_sizes = (128, 128)

        self.policy = FeedforwardGaussianPolicy(
            input_shapes=self.env.observation_shape,
            output_shape=self.env.action_space.shape,
            hidden_layer_sizes=self.hidden_layer_sizes,
            observation_keys=self.env.observation_keys)
コード例 #5
0
 def setUp(self):
     self.env = get_environment('gym', 'Swimmer', 'v3', {})
     self.policy = ContinuousUniformPolicy(
         action_range=(
             self.env.action_space.low,
             self.env.action_space.high,
         ),
         input_shapes=self.env.observation_shape,
         output_shape=self.env.action_shape,
         observation_keys=self.env.observation_keys)
コード例 #6
0
def run_experiment(variant, reporter):
    training_environment = (get_environment(
        'gym', 'MultiGoal', 'Default-v0', {
            'actuation_cost_coeff': 30,
            'distance_cost_coeff': 1,
            'goal_reward': 10,
            'init_sigma': 0.1,
        }))
    evaluation_environment = training_environment.copy()

    pool = SimpleReplayPool(environment=training_environment, max_size=1e6)

    sampler = SimpleSampler(max_path_length=30)

    variant['Q_params']['config'].update({
        'input_shapes': (
            training_environment.observation_shape,
            training_environment.action_shape,
        )
    })
    Qs = value_functions.get(variant['Q_params'])

    variant['policy_params']['config'].update({
        'action_range': (training_environment.action_space.low,
                         training_environment.action_space.high),
        'input_shapes':
        training_environment.observation_shape,
        'output_shape':
        training_environment.action_shape,
    })
    policy = policies.get(variant['policy_params'])

    plotter = QFPolicyPlotter(Q=Qs[0],
                              policy=policy,
                              obs_lst=np.array(((-2.5, 0.0), (0.0, 0.0),
                                                (2.5, 2.5), (-2.5, -2.5))),
                              default_action=(np.nan, np.nan),
                              n_samples=100)

    variant['algorithm_params']['config'].update({
        'training_environment': training_environment,
        'evaluation_environment': evaluation_environment,
        'policy': policy,
        'Qs': Qs,
        'pool': pool,
        'sampler': sampler,
        'min_pool_size': 100,
        'batch_size': 64,
        'plotter': plotter,
    })
    algorithm = algorithms.get(variant['algorithm_params'])

    for train_result in algorithm.train():
        reporter(**train_result)
コード例 #7
0
 def setUp(self):
     self.env = get_environment('gym', 'Swimmer', 'v3', {})
     self.policy = policies.ContinuousUniformPolicy(
         action_range=(
             self.env.action_space.low,
             self.env.action_space.high,
         ),
         input_shapes=self.env.observation_shape,
         output_shape=self.env.action_shape,
         observation_keys=self.env.observation_keys)
     self.pool = SimpleReplayPool(max_size=100, environment=self.env)
     self.remote_sampler = RemoteSampler(max_path_length=10)
コード例 #8
0
    def setUp(self):
        self.env = get_environment('gym', 'Swimmer', 'v3', {})
        self.hidden_layer_sizes = (8, 8)

        observation_shapes = OrderedDict(
            ((key, value)
             for key, value in self.env.observation_shape.items()))
        action_shape = self.env.action_shape
        input_shapes = (observation_shapes, action_shape)
        self.value_function = feedforward_Q_function(
            input_shapes=input_shapes,
            hidden_layer_sizes=self.hidden_layer_sizes,
        )
コード例 #9
0
    def setUp(self):
        self.env = get_environment('gym', 'Swimmer', 'v3', {})
        self.hidden_layer_sizes = (16, 16)
        self.num_coupling_layers = 2

        self.policy = RealNVPPolicy(
            input_shapes=self.env.observation_shape,
            output_shape=self.env.action_shape,
            action_range=(
                self.env.action_space.low,
                self.env.action_space.high,
            ),
            hidden_layer_sizes=self.hidden_layer_sizes,
            num_coupling_layers=self.num_coupling_layers,
            observation_keys=self.env.observation_keys,
        )
コード例 #10
0
def run_experiment(variant, reporter):
    training_environment = (
        get_environment('gym', 'MultiGoal', 'Default-v0', {
            'actuation_cost_coeff': 30,
            'distance_cost_coeff': 1,
            'goal_reward': 10,
            'init_sigma': 0.1,
        }))
    evaluation_environment = training_environment.copy()

    pool = SimpleReplayPool(
        environment=training_environment,
        max_size=1e6)

    sampler = SimpleSampler(max_path_length=30)

    Qs = get_Q_function_from_variant(variant, training_environment)
    policy = get_policy_from_variant(variant, training_environment)
    plotter = QFPolicyPlotter(
        Q=Qs[0],
        policy=policy,
        obs_lst=np.array(((-2.5, 0.0),
                          (0.0, 0.0),
                          (2.5, 2.5),
                          (-2.5, -2.5))),
        default_action=(np.nan, np.nan),
        n_samples=100)

    algorithm = get_algorithm_from_variant(
        variant=variant,
        training_environment=training_environment,
        evaluation_environment=evaluation_environment,
        policy=policy,
        Qs=Qs,
        pool=pool,
        sampler=sampler,
        min_pool_size=100,
        batch_size=46,
        plotter=plotter,
    )

    initialize_tf_variables(algorithm._session, only_uninitialized=True)

    for train_result in algorithm.train():
        reporter(**train_result)
コード例 #11
0
    def test_add_samples_dict_observation(self):
        env = get_environment('gym', 'Swimmer', 'v3', {})
        pool = create_pool(env=env, max_size=100)

        env.reset()

        num_samples = pool._max_size // 2

        samples = {
            'observations': {
                name: np.empty((num_samples, *space.shape), dtype=space.dtype)
                for name, space in env.observation_space.spaces.items()
            },
            'next_observations': {
                name: np.empty((num_samples, *space.shape), dtype=space.dtype)
                for name, space in env.observation_space.spaces.items()
            },
            'actions': np.empty((num_samples, *env.action_space.shape)),
            'rewards': np.empty((num_samples, 1), dtype=np.float32),
            'terminals': np.empty((num_samples, 1), dtype=bool),
        }

        for i in range(num_samples):
            action = env.action_space.sample()
            observation, reward, terminal, info = env.step(action)
            for name, value in observation.items():
                samples['observations'][name][i, :] = value
                samples['next_observations'][name][i, :] = value
            samples['actions'][i] = action
            samples['rewards'][i] = reward
            samples['terminals'][i] = terminal

        pool.add_path(samples)
        last_n_batch = pool.last_n_batch(num_samples)
        np.testing.assert_equal(
            {
                key: value
                for key, value in last_n_batch.items()
                if key not in ('episode_index_backwards',
                               'episode_index_forwards')
            }, samples)
コード例 #12
0
    def test_create_pool(self):
        ENVIRONMENTS = (
            get_environment('gym', 'Swimmer', 'v3', {}),
            gym.make('Swimmer-v3'),
            gym.make('HandManipulateBlock-v0'),
        )
        for environment in ENVIRONMENTS:
            pool = create_pool(env=environment, max_size=100)

            def verify_field(field, expected_name, expected_dtype,
                             expected_shape):
                self.assertIsInstance(field, Field)
                self.assertEqual(field.name, expected_name)
                self.assertEqual(field.dtype, expected_dtype)
                self.assertEqual(field.shape, expected_shape)
                self.assertEqual(field.initializer, np.zeros)
                self.assertEqual(field.default_value, 0.0)

            if isinstance(environment.observation_space, gym.spaces.Dict):
                self.assertIsInstance(pool.fields['observations'], dict)
                for name, space in environment.observation_space.spaces.items(
                ):
                    self.assertIn(name, pool.fields['observations'])
                    field = pool.fields['observations'][name]
                    verify_field(field, name, space.dtype, space.shape)

            elif isinstance(environment.observation_space, gym.spaces.Box):
                self.assertIsInstance(pool.fields['observations'], Field)
                verify_field(field, 'observations',
                             environment.observation_space.dtype,
                             environment.observation_space.shape)
            else:
                raise ValueError(environment.observation_space)

            verify_field(pool.fields['actions'], 'actions',
                         environment.action_space.dtype,
                         environment.action_space.shape)

            verify_field(pool.fields['rewards'], 'rewards', 'float32', (1, ))
            verify_field(pool.fields['terminals'], 'terminals', 'bool', (1, ))
コード例 #13
0
    def test_create_pool(self):
        env = get_environment('gym', 'Swimmer', 'v3', {})
        self.assertIsInstance(env.observation_space, gym.spaces.Dict)
        pool = create_pool(env=env, max_size=100)

        def verify_field(field, expected_name, expected_dtype, expected_shape):
            self.assertIsInstance(field, Field)
            self.assertEqual(field.name, expected_name)
            self.assertEqual(field.dtype, expected_dtype)
            self.assertEqual(field.shape, expected_shape)
            self.assertEqual(field.initializer, np.zeros)
            self.assertEqual(field.default_value, 0.0)

        self.assertIsInstance(pool.fields['observations'], dict)
        for name, space in env.observation_space.spaces.items():
            self.assertIn(name, pool.fields)
            field = pool.fields['observations'][name]
            verify_field(field, name, space.dtype, space.shape)

        verify_field(pool.fields['actions'], 'actions', env.action_space.dtype,
                     env.action_space.shape)

        verify_field(pool.fields['rewards'], 'rewards', 'float32', (1, ))
        verify_field(pool.fields['terminals'], 'terminals', 'bool', (1, ))
コード例 #14
0
    def test_resampling(self, strategy_type, resampling_probability):
        env = get_environment('gym', 'HandReach', 'v0', {
            'observation_keys': ('observation', ),
            'goal_keys': ('desired_goal', ),
        })
        assert isinstance(env.observation_space, gym.spaces.Dict)

        max_size = 1000
        episode_length = 50

        her_strategy = {
            'type': strategy_type,
            'resampling_probability': resampling_probability,
        }

        pool = create_pool(
            env=env,
            max_size=max_size,
            her_strategy=her_strategy,
        )

        strategy_validator = {
            'random': RandomStrategyValidator,
            'final': FinalStrategyValidator,
            'episode': EpisodeStrategyValidator,
            'future': FutureStrategyValidator,
        }[strategy_type](her_strategy=her_strategy)

        episode_lengths = []
        while pool.size < pool._max_size:
            episode_length = np.random.randint(5, 50)
            episode_lengths.append(episode_length)

            samples = {
                'observations': {
                    name: np.empty(
                        (episode_length, *space.shape), dtype=space.dtype)
                    for name, space in env.observation_space.spaces.items()
                },
                'next_observations': {
                    name: np.empty(
                        (episode_length, *space.shape), dtype=space.dtype)
                    for name, space in env.observation_space.spaces.items()
                },
                'actions': np.empty((episode_length, *env.action_space.shape)),
                'rewards': np.empty((episode_length, 1), dtype=np.float32),
                'terminals': np.empty((episode_length, 1), dtype=bool),
            }

            observation = env.reset()
            for i in range(episode_length):
                action = env.action_space.sample()
                next_observation, reward, terminal, info = env.step(action)
                for name, value in observation.items():
                    samples['observations'][name][i, :] = value
                    samples['next_observations'][name][i, :] = next_observation[name]
                    samples['actions'][i] = action
                    samples['rewards'][i] = reward
                    samples['terminals'][i] = terminal
                observation = next_observation

            pool.add_path(samples)

        for i in range(100):
            random_batch = pool.random_batch(256)
            strategy_validator.verify_batch(random_batch)

        assert strategy_validator.statistics_match()
コード例 #15
0
 def setUp(self):
     self.env = get_environment('gym', 'Swimmer', 'v3', {})
     self.policy = UniformPolicy(input_shapes=self.env.observation_shape,
                                 output_shape=self.env.action_space.shape,
                                 observation_keys=self.env.observation_keys)