コード例 #1
0
    def test_get_action(self, obs_dim, task_num, latent_dim, action_dim):
        env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        embedding_spec = InOutSpec(
            input_space=akro.Box(low=np.zeros(task_num),
                                 high=np.ones(task_num)),
            output_space=akro.Box(low=np.zeros(latent_dim),
                                  high=np.ones(latent_dim)))
        encoder = GaussianMLPEncoder(embedding_spec)
        policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                encoder=encoder)

        env.reset()
        obs = env.step(1).observation
        latent = np.random.random((latent_dim, ))
        task = np.zeros(task_num)
        task[0] = 1

        action1, _ = policy.get_action_given_latent(obs, latent)
        action2, _ = policy.get_action_given_task(obs, task)
        action3, _ = policy.get_action(np.concatenate([obs.flatten(), task]))

        assert env.action_space.contains(action1)
        assert env.action_space.contains(action2)
        assert env.action_space.contains(action3)

        obses, latents, tasks = [obs] * 3, [latent] * 3, [task] * 3
        aug_obses = [np.concatenate([obs.flatten(), task])] * 3
        action1n, _ = policy.get_actions_given_latents(obses, latents)
        action2n, _ = policy.get_actions_given_tasks(obses, tasks)
        action3n, _ = policy.get_actions(aug_obses)

        for action in chain(action1n, action2n, action3n):
            assert env.action_space.contains(action)
コード例 #2
0
    def test_get_vars(self):
        obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2
        env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        embedding_spec = InOutSpec(
            input_space=akro.Box(low=np.zeros(task_num),
                                 high=np.ones(task_num)),
            output_space=akro.Box(low=np.zeros(latent_dim),
                                  high=np.ones(latent_dim)))
        encoder = GaussianMLPEncoder(embedding_spec, hidden_sizes=[32, 32, 32])
        policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                encoder=encoder,
                                                hidden_sizes=[32, 32, 32])

        vars1 = sorted(policy.get_trainable_vars(), key=lambda v: v.name)
        vars2 = sorted(policy.get_global_vars(), key=lambda v: v.name)

        assert vars1 == vars2
        # Two network. Each with 4 layers * (1 weight + 1 bias) + 1 log_std
        assert len(vars1) == 2 * (4 * 2 + 1)

        obs = np.random.random(obs_dim)
        latent = np.random.random((latent_dim, ))

        for var in vars1:
            var.assign(np.ones(var.shape))
        assert np.any(policy.get_action_given_latent(obs, latent) != 0)

        for var in vars1:
            var.assign(np.zeros(var.shape))
        assert not np.all(policy.get_action_given_latent(obs, latent) == 0)
コード例 #3
0
ファイル: kant.py プロジェクト: fangqyi/garage
    def get_env_spec(cls, env_spec, latent_dim, num_skills, module):
        obs_dim = int(np.prod(env_spec.observation_space.shape))
        # print("obs_dim is")
        # print(obs_dim)
        action_dim = int(np.prod(env_spec.action_space.shape))
        if module == 'encoder':
            in_dim = obs_dim + action_dim + num_skills + 1
            out_dim = latent_dim * 2
        elif module == 'vf':
            in_dim = obs_dim
            out_dim = latent_dim
        elif module == 'controller_policy':
            in_dim = obs_dim + latent_dim
            out_dim = num_skills
        elif module == 'qf':
            in_dim = obs_dim + latent_dim
            out_dim = num_skills

        in_space = akro.Box(low=-1, high=1, shape=(in_dim, ), dtype=np.float32)
        out_space = akro.Box(low=-1,
                             high=1,
                             shape=(out_dim, ),
                             dtype=np.float32)

        if module == 'encoder':
            spec = InOutSpec(in_space, out_space)
        elif module == 'vf':
            spec = EnvSpec(in_space, out_space)
        elif module == 'controller_policy':
            spec = EnvSpec(in_space, out_space)
        elif module == 'qf':
            spec = EnvSpec(in_space, out_space)
        return spec
コード例 #4
0
    def __init__(self,
                 goal=np.array((1., 1.), dtype=np.float32),
                 arena_size=5.,
                 done_bonus=0.,
                 never_done=False,
                 max_episode_length=math.inf):
        goal = np.array(goal, dtype=np.float32)
        self._goal = goal
        self._done_bonus = done_bonus
        self._never_done = never_done
        self._arena_size = arena_size

        assert ((goal >= -arena_size) & (goal <= arena_size)).all()

        self._step_cnt = None
        self._max_episode_length = max_episode_length
        self._visualize = False

        self._point = np.zeros_like(self._goal)
        self._task = {'goal': self._goal}
        self._observation_space = akro.Box(low=-np.inf,
                                           high=np.inf,
                                           shape=(3, ),
                                           dtype=np.float32)
        self._action_space = akro.Box(low=-0.1,
                                      high=0.1,
                                      shape=(2, ),
                                      dtype=np.float32)
        self._spec = EnvSpec(action_space=self.action_space,
                             observation_space=self.observation_space,
                             max_episode_length=max_episode_length)
コード例 #5
0
def test_new_time_step(sample_data):
    s = TimeStep(**sample_data)
    assert s.env_spec is sample_data['env_spec']
    assert s.observation is sample_data['observation']
    assert s.action is sample_data['action']
    assert s.reward is sample_data['reward']
    assert s.step_type is sample_data['step_type']
    assert s.env_info is sample_data['env_info']
    assert s.agent_info is sample_data['agent_info']
    del s

    obs_space = akro.Box(low=-1, high=10, shape=(4, 3, 2), dtype=np.float32)
    act_space = akro.Box(low=-1, high=10, shape=(4, 2), dtype=np.float32)
    env_spec = EnvSpec(obs_space, act_space)
    sample_data['env_spec'] = env_spec
    obs_space = akro.Box(low=-1000,
                         high=1000,
                         shape=(4, 3, 2),
                         dtype=np.float32)
    act_space = akro.Box(low=-1000, high=1000, shape=(4, 2), dtype=np.float32)
    sample_data['observation'] = obs_space.sample()
    sample_data['next_observation'] = obs_space.sample()
    sample_data['action'] = act_space.sample()
    s = TimeStep(**sample_data)

    assert s.observation is sample_data['observation']
    assert s.next_observation is sample_data['next_observation']
    assert s.action is sample_data['action']
コード例 #6
0
ファイル: pearl.py プロジェクト: liuying1905/garage
    def get_env_spec(cls, env_spec, latent_dim, module):
        """Get environment specs of encoder with latent dimension.

        Args:
            env_spec (EnvSpec): Environment specification.
            latent_dim (int): Latent dimension.
            module (str): Module to get environment specs for.

        Returns:
            InOutSpec: Module environment specs with latent dimension.

        """
        obs_dim = int(np.prod(env_spec.observation_space.shape))
        action_dim = int(np.prod(env_spec.action_space.shape))
        if module == 'encoder':
            in_dim = obs_dim + action_dim + 1
            out_dim = latent_dim * 2
        elif module == 'vf':
            in_dim = obs_dim
            out_dim = latent_dim
        in_space = akro.Box(low=-1, high=1, shape=(in_dim, ), dtype=np.float32)
        out_space = akro.Box(low=-1,
                             high=1,
                             shape=(out_dim, ),
                             dtype=np.float32)
        if module == 'encoder':
            spec = InOutSpec(in_space, out_space)
        elif module == 'vf':
            spec = EnvSpec(in_space, out_space)

        return spec
コード例 #7
0
    def load_model(self, model_path=None):
        if model_path:
            self.model_path = model_path

        self.osim_model = OsimModel(
            self.model_path,
            self.visualize,
            integrator_accuracy=self.integrator_accuracy)

        # Create specs, action and observation spaces mocks for compatibility with OpenAI gym
        self.spec = Spec()
        self.spec.timestep_limit = self.time_limit
        self.spec.action_space = akro.Box(
            low=0, high=1, shape=(self.osim_model.get_action_space_size(), ))
        self.spec.observation_space = akro.Box(
            low=-math.pi * 100,
            high=math.pi * 100,
            shape=(self.get_observation_space_size(), ))

        self.action_space = ([0.0] * self.osim_model.get_action_space_size(),
                             [1.0] * self.osim_model.get_action_space_size())
        #        self.observation_space = ( [-math.pi*100] * self.get_observation_space_size(), [math.pi*100] * self.get_observation_space_s
        self.observation_space = ([0] * self.get_observation_space_size(),
                                  [0] * self.get_observation_space_size())

        self.action_space = convert_to_gym(self.action_space)
        self.observation_space = convert_to_gym(self.observation_space)
コード例 #8
0
def test_env_spec_pickleable():
    env_spec = EnvSpec(akro.Box(-1, 1, (1, )), akro.Box(-2, 2, (2, )), 500)
    round_trip = cloudpickle.loads(cloudpickle.dumps(env_spec))
    assert round_trip
    assert round_trip.action_space == env_spec.action_space
    assert round_trip.observation_space == env_spec.observation_space
    assert round_trip.max_episode_length == env_spec.max_episode_length
コード例 #9
0
    def test_auxiliary(self):
        obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 2, 2
        env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        embedding_spec = InOutSpec(
            input_space=akro.Box(low=np.zeros(task_num),
                                 high=np.ones(task_num)),
            output_space=akro.Box(low=np.zeros(latent_dim),
                                  high=np.ones(latent_dim)))
        encoder = GaussianMLPEncoder(embedding_spec)
        policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                encoder=encoder)
        obs_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, 2))
        task_input = tf.compat.v1.placeholder(tf.float32,
                                              shape=(None, None, 2))
        networks = policy.build(obs_input, task_input)
        dist = networks[0].dist
        encoder_dist = networks[1].dist

        assert dist.loc.get_shape().as_list()[-1] == env.action_space.flat_dim
        assert policy.encoder == encoder
        assert policy.latent_space.flat_dim == latent_dim
        assert policy.task_space.flat_dim == task_num
        assert (policy.augmented_observation_space.flat_dim ==
                env.observation_space.flat_dim + task_num)
        assert encoder_dist.loc.get_shape().as_list()[-1] == latent_dim
コード例 #10
0
ファイル: test_functions.py プロジェクト: fangqyi/garage
def test_log_multitask_performance_task_id():
    lengths = np.array([10, 5, 1, 1])
    batch = TrajectoryBatch(
        EnvSpec(akro.Box(np.array([0., 0., 0.]), np.array([1., 1., 1.])),
                akro.Box(np.array([-1., -1.]), np.array([0., 0.]))),
        observations=np.ones((sum(lengths), 3), dtype=np.float32),
        last_observations=np.ones((len(lengths), 3), dtype=np.float32),
        actions=np.zeros((sum(lengths), 2), dtype=np.float32),
        rewards=np.array([
            0.34026529, 0.58263177, 0.84307509, 0.97651095, 0.81723901,
            0.22631398, 0.03421301, 0.97515046, 0.64311832, 0.65068933,
            0.17657714, 0.04783857, 0.73904013, 0.41364329, 0.52235551,
            0.24203526, 0.43328910
        ]),
        terminals=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1],
                           dtype=bool),
        env_infos={
            'success':
            np.array([0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1],
                     dtype=bool),
            'task_id':
            np.array([1] * 10 + [3] * 5 + [1] + [4])
        },
        agent_infos={},
        lengths=lengths)

    log_file = tempfile.NamedTemporaryFile()
    csv_output = dowel.CsvOutput(log_file.name)
    logger.add_output(csv_output)
    log_multitask_performance(7, batch, 0.8, {
        1: 'env1',
        3: 'env2',
        4: 'env3',
        5: 'env4'
    })
    logger.log(tabular)
    logger.dump_output_type(dowel.CsvOutput)
    with open(log_file.name, 'r') as file:
        rows = list(csv.DictReader(file))
    res = {k: float(r) for (k, r) in rows[0].items()}
    assert res['env1/Iteration'] == 7
    assert res['env2/Iteration'] == 7
    assert res['env3/Iteration'] == 7
    assert res['env4/Iteration'] == 7
    assert res['env1/NumTrajs'] == 2
    assert res['env2/NumTrajs'] == 1
    assert res['env3/NumTrajs'] == 1
    assert res['env4/NumTrajs'] == 0
    assert math.isclose(res['env1/SuccessRate'], 0.5)
    assert math.isclose(res['env2/SuccessRate'], 1.0)
    assert math.isclose(res['env3/SuccessRate'], 1.0)
    assert math.isnan(res['env4/SuccessRate'])
    assert math.isnan(res['env4/AverageReturn'])
コード例 #11
0
    def test_auxiliary(self):
        input_space = akro.Box(np.array([-1, -1]), np.array([1, 1]))
        latent_space = akro.Box(np.array([-2, -2, -2]), np.array([2, 2, 2]))
        embedding_spec = InOutSpec(input_space=input_space,
                                   output_space=latent_space)
        embedding = GaussianMLPEncoder(embedding_spec,
                                       hidden_sizes=[32, 32, 32])
        task_input = tf.compat.v1.placeholder(tf.float32,
                                              shape=(None, None,
                                                     embedding.input_dim))
        embedding.build(task_input)
        # 9 Layers: (3 hidden + 1 output) * (1 weight + 1 bias) + 1 log_std
        assert len(embedding.get_params()) == 9
        assert len(embedding.get_global_vars()) == 9

        assert embedding.distribution.loc.get_shape().as_list(
        )[-1] == latent_space.shape[0]
        assert embedding.input.shape.as_list() == [
            None, None, input_space.shape[0]
        ]
        assert (embedding.latent_mean.shape.as_list() == [
            None, None, latent_space.shape[0]
        ])
        assert (embedding.latent_std_param.shape.as_list() == [
            None, None, latent_space.shape[0]
        ])

        # To increase coverage in embeddings/base.py
        embedding.reset()
        assert embedding.input_dim == embedding_spec.input_space.flat_dim
        assert embedding.output_dim == embedding_spec.output_space.flat_dim

        var_shapes = [
            (2, 32),
            (32, ),  # input
            (32, 32),
            (32, ),  # hidden 0
            (32, 32),
            (32, ),  # hidden 1
            (32, 3),
            (3, ),  # hidden 2
            (3, )
        ]  # log_std
        assert sorted(embedding.get_param_shapes()) == sorted(var_shapes)

        var_count = sum(list(map(np.prod, var_shapes)))
        embedding.set_param_values(np.ones(var_count))
        assert (embedding.get_param_values() == np.ones(var_count)).all()

        assert (sorted(
            map(np.shape, embedding.flat_to_params(
                np.ones(var_count)))) == sorted(var_shapes))
コード例 #12
0
    def observation_space(self):
        """Return the observation space.

        Returns:
            akro.Dict: Observation space.
        """
        return akro.Dict({
            'achieved_goal':
            akro.Box(low=-200., high=200., shape=(3, ), dtype=np.float32),
            'desired_goal':
            akro.Box(low=-200., high=200., shape=(3, ), dtype=np.float32),
            'observation':
            akro.Box(low=-200., high=200., shape=(25, ), dtype=np.float32)
        })
コード例 #13
0
def test_set_output_size(kernel_sizes, hidden_channels, strides, pool_shape,
                         pool_stride):
    spec = InOutSpec(akro.Box(shape=[3, 19, 15], high=np.inf, low=-np.inf),
                     akro.Box(shape=[200], high=np.inf, low=-np.inf))
    model = CNNModule(spec,
                      image_format='NCHW',
                      hidden_channels=hidden_channels,
                      kernel_sizes=kernel_sizes,
                      strides=strides,
                      pool_shape=[(pool_shape, pool_shape)],
                      pool_stride=[(pool_stride, pool_stride)],
                      layer_normalization=True)
    images = torch.ones(10, 3, 19, 15)
    x = model(images)
    assert x.shape == (10, 200)
コード例 #14
0
    def get_infer_spec(cls, env_spec, latent_dim, inference_window_size):
        """Get the embedding spec of the inference.

        Every `inference_window_size` timesteps in the trajectory will be used
        as the inference network input.

        Args:
            env_spec (garage.envs.EnvSpec): Environment spec.
            latent_dim (int): Latent dimension.
            inference_window_size (int): Length of inference window.

        Returns:
            garage.InOutSpec: Inference spec.

        """
        latent_space = cls._get_latent_space(latent_dim)

        obs_lb, obs_ub = env_spec.observation_space.bounds
        obs_lb_flat = env_spec.observation_space.flatten(obs_lb)
        obs_ub_flat = env_spec.observation_space.flatten(obs_ub)
        traj_lb = np.stack([obs_lb_flat] * inference_window_size)
        traj_ub = np.stack([obs_ub_flat] * inference_window_size)
        traj_space = akro.Box(traj_lb, traj_ub)

        return InOutSpec(traj_space, latent_space)
コード例 #15
0
def test_act_box_env_spec_mismatch_eps(eps_data):
    with pytest.raises(ValueError, match='Each action has shape'):
        eps_data['env_spec'] = EnvSpec(
            eps_data['env_spec'].observation_space,
            akro.Box(low=1, high=np.inf, shape=(4, 3, 2), dtype=np.float32))
        t = EpisodeBatch(**eps_data)
        del t
コード例 #16
0
def test_act_box_env_spec_mismatch_batch(batch_data):
    with pytest.raises(ValueError, match='Each action has'):
        batch_data['env_spec'] = EnvSpec(
            batch_data['env_spec'].observation_space,
            akro.Box(low=1, high=np.inf, shape=(4, 3, 2), dtype=np.float32))
        s = TimeStepBatch(**batch_data)
        del s
コード例 #17
0
def test_log_performance():
    lengths = np.array([10, 5, 1, 1])
    batch = EpisodeBatch(
        EnvSpec(akro.Box(np.array([0., 0., 0.]), np.array([1., 1., 1.])),
                akro.Box(np.array([-1., -1.]), np.array([0., 0.]))),
        observations=np.ones((sum(lengths), 3), dtype=np.float32),
        last_observations=np.ones((len(lengths), 3), dtype=np.float32),
        actions=np.zeros((sum(lengths), 2), dtype=np.float32),
        rewards=np.array([
            0.34026529, 0.58263177, 0.84307509, 0.97651095, 0.81723901,
            0.22631398, 0.03421301, 0.97515046, 0.64311832, 0.65068933,
            0.17657714, 0.04783857, 0.73904013, 0.41364329, 0.52235551,
            0.24203526, 0.43328910
        ]),
        step_types=np.array(
            [StepType.FIRST] + [StepType.MID] * (lengths[0] - 2) +
            [StepType.TERMINAL] + [StepType.FIRST] + [StepType.MID] *
            (lengths[1] - 2) + [StepType.TERMINAL] + [StepType.FIRST] +
            [StepType.FIRST],
            dtype=StepType),
        env_infos={
            'success':
            np.array([0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1],
                     dtype=bool)
        },
        agent_infos={},
        lengths=lengths)

    log_file = tempfile.NamedTemporaryFile()
    csv_output = dowel.CsvOutput(log_file.name)
    logger.add_output(csv_output)
    log_performance(7, batch, 0.8, prefix='test_log_performance')
    logger.log(tabular)
    logger.dump_output_type(dowel.CsvOutput)
    with open(log_file.name, 'r') as file:
        rows = list(csv.DictReader(file))
    res = {k: float(r) for (k, r) in rows[0].items()}
    assert res['test_log_performance/Iteration'] == 7
    assert res['test_log_performance/NumEpisodes'] == 4
    assert math.isclose(res['test_log_performance/SuccessRate'], 0.75)
    assert math.isclose(res['test_log_performance/TerminationRate'], 0.5)
    assert math.isclose(res['test_log_performance/AverageDiscountedReturn'],
                        1.1131040640673113)
    assert math.isclose(res['test_log_performance/AverageReturn'],
                        2.1659965525)
    assert math.isclose(res['test_log_performance/StdReturn'],
                        2.354067152038576)
コード例 #18
0
def test_act_box_env_spec_mismatch_eps(eps_data):
    with pytest.raises(ValueError, match='actions should have'):
        eps_data['env_spec'].action_space = akro.Box(low=1,
                                                     high=np.inf,
                                                     shape=(4, 3, 2),
                                                     dtype=np.float32)
        t = EpisodeBatch(**eps_data)
        del t
コード例 #19
0
def concat_spaces(top, bottom):
    assert isinstance(top, akro.Box)
    assert isinstance(bottom, akro.Box)

    top_lb, top_ub = top.bounds
    bottom_lb, bottom_ub = bottom.bounds
    return akro.Box(np.concatenate([top_lb, bottom_lb]),
                    np.concatenate([top_ub, bottom_ub]))
コード例 #20
0
    def action_space(self):
        """Return the action space.

        Returns:
            akro.Box: Action space.

        """
        return akro.Box(low=-5.0, high=5.0, shape=(1, ), dtype=np.float32)
コード例 #21
0
    def __init__(self, env, task_id, task_name, pad=False):

        super().__init__(env)
        self.task_id = task_id
        self.task_name = task_name
        self.pad = pad
        if pad and np.prod(env.observation_space.shape) < 9:
            self.observation_space = akro.Box(low=-1, high=1, shape=(9, ))
コード例 #22
0
ファイル: box_env.py プロジェクト: keiohta/torchrl
    def observation_space(self):
        """Return an observation space.

        Returns:
            gym.spaces: The observation space of the environment.

        """
        return akro.Box(low=-1, high=1, shape=self._obs_dim, dtype=np.float32)
コード例 #23
0
    def test_pickling(self):
        obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        embedding_spec = InOutSpec(
            input_space=akro.Box(low=np.zeros(task_num),
                                 high=np.ones(task_num)),
            output_space=akro.Box(low=np.zeros(latent_dim),
                                  high=np.ones(latent_dim)))
        encoder = GaussianMLPEncoder(embedding_spec)
        policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                encoder=encoder)

        pickled = pickle.dumps(policy)
        with tf.compat.v1.variable_scope('resumed'):
            unpickled = pickle.loads(pickled)
            assert hasattr(unpickled, '_f_dist_obs_latent')
            assert hasattr(unpickled, '_f_dist_obs_task')
コード例 #24
0
ファイル: test_dtypes.py プロジェクト: songanz/garage
def test_act_box_env_spec_mismatch_batch(batch_data):
    with pytest.raises(ValueError, match='actions should have'):
        batch_data['env_spec'].action_space = akro.Box(low=1,
                                                       high=np.inf,
                                                       shape=(4, 3, 2),
                                                       dtype=np.float32)
        s = TimeStepBatch(**batch_data)
        del s
コード例 #25
0
ファイル: test_dtypes.py プロジェクト: songanz/garage
def test_act_box_env_spec_mismatch_traj(traj_data):
    with pytest.raises(ValueError, match='actions should have'):
        traj_data['env_spec'].action_space = akro.Box(low=1,
                                                      high=np.inf,
                                                      shape=(4, 3, 2),
                                                      dtype=np.float32)
        t = TrajectoryBatch(**traj_data)
        del t
コード例 #26
0
def test_output_values(output_dim, kernel_sizes, hidden_channels, strides,
                       paddings):

    input_width = 32
    input_height = 32
    in_channel = 3
    input_shape = (in_channel, input_height, input_width)
    spec = InOutSpec(akro.Box(shape=input_shape, low=-np.inf, high=np.inf),
                     akro.Box(shape=(output_dim, ), low=-np.inf, high=np.inf))
    obs = torch.rand(input_shape)

    module = DiscreteCNNModule(spec=spec,
                               image_format='NCHW',
                               hidden_channels=hidden_channels,
                               hidden_sizes=hidden_channels,
                               kernel_sizes=kernel_sizes,
                               strides=strides,
                               paddings=paddings,
                               padding_mode='zeros',
                               hidden_w_init=nn.init.ones_,
                               output_w_init=nn.init.ones_)

    cnn = CNNModule(spec=InOutSpec(
        akro.Box(shape=input_shape, low=-np.inf, high=np.inf), None),
                    image_format='NCHW',
                    hidden_channels=hidden_channels,
                    kernel_sizes=kernel_sizes,
                    strides=strides,
                    paddings=paddings,
                    padding_mode='zeros',
                    hidden_w_init=nn.init.ones_)
    flat_dim = torch.flatten(cnn(obs).detach(), start_dim=1).shape[1]

    mlp = MLPModule(
        flat_dim,
        output_dim,
        hidden_channels,
        hidden_w_init=nn.init.ones_,
        output_w_init=nn.init.ones_,
    )

    cnn_out = cnn(obs)
    output = mlp(torch.flatten(cnn_out, start_dim=1))

    assert torch.all(torch.eq(output.detach(), module(obs).detach()))
コード例 #27
0
    def observation_space(self):
        """Observation space.

        Returns:
            akro.Box: Observation space.

        """
        if self._mode == 'vanilla':
            return self.env.observation_space
        elif self._mode == 'add-onehot':
            task_lb, task_ub = self.task_space.bounds
            env_lb, env_ub = self._observation_space.bounds
            return akro.Box(np.concatenate([env_lb, task_lb]),
                            np.concatenate([env_ub, task_ub]))
        else:  # self._mode == 'del-onehot'
            env_lb, env_ub = self._observation_space.bounds
            num_tasks = self._num_tasks
            return akro.Box(env_lb[:-num_tasks], env_ub[:-num_tasks])
コード例 #28
0
 def __init__(self, env, task_number, num_tasks, max_env_shape):
     super().__init__(env)
     self._task_number = task_number
     self._num_tasks = num_tasks
     one_hot_ub = np.ones(self._num_tasks)
     one_hot_lb = np.zeros(self._num_tasks)
     task_space = akro.Box(one_hot_lb, one_hot_ub)
     self.one_hot = np.zeros(task_space.shape)
     self.one_hot[task_number] = task_space.high[task_number]
     self.max_env_shape = max_env_shape
コード例 #29
0
    def task_space(self):
        """Task Space.

        Returns:
            akro.Box: Task space.

        """
        one_hot_ub = np.ones(self.num_tasks)
        one_hot_lb = np.zeros(self.num_tasks)
        return akro.Box(one_hot_lb, one_hot_ub)
コード例 #30
0
    def test_get_latent(self):
        obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        embedding_spec = InOutSpec(
            input_space=akro.Box(low=np.zeros(task_num),
                                 high=np.ones(task_num)),
            output_space=akro.Box(low=np.zeros(latent_dim),
                                  high=np.ones(latent_dim)))
        encoder = GaussianMLPEncoder(embedding_spec)
        policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                encoder=encoder)

        task_id = 3
        task_onehot = np.zeros(task_num)
        task_onehot[task_id] = 1
        latent, latent_info = policy.get_latent(task_onehot)
        assert latent.shape == (latent_dim, )
        assert latent_info['mean'].shape == (latent_dim, )
        assert latent_info['log_std'].shape == (latent_dim, )