Ejemplo n.º 1
0
    def test_param_network(self, batch_size=1):
        input_spec = TensorSpec((3, 32, 32), torch.float32)
        conv_layer_params = ((16, (2, 2), 1, (1, 0)), (15, 2, (1, 2), 1, 2))
        fc_layer_params = ((128, True), )
        last_layer_size = 10
        last_activation = math_ops.identity
        network = ParamNetwork(input_spec,
                               conv_layer_params=conv_layer_params,
                               fc_layer_params=fc_layer_params,
                               last_layer_param=(last_layer_size, True),
                               last_activation=last_activation)
        self.assertLen(network._fc_layers, 2)

        # test non-parallel forward
        image = input_spec.zeros(outer_dims=(batch_size, ))
        output, _ = network(image)
        output_shape = (batch_size, last_layer_size)
        self.assertEqual(output_shape[1:], network.output_spec.shape)
        self.assertEqual(output_shape, tuple(output.size()))

        # test parallel forward
        replica = 2
        image = input_spec.zeros(outer_dims=(batch_size, ))
        replica_image = input_spec.zeros(outer_dims=(batch_size, replica))
        params = torch.randn(replica, network.param_length)
        network.set_parameters(params)
        output, _ = network(image)
        replica_output, _ = network(replica_image)
        self.assertEqual(output.shape, replica_output.shape)

        output_shape = (batch_size, replica, last_layer_size)
        self.assertEqual(output_shape[1:], network.output_spec.shape)
        self.assertEqual(output_shape, tuple(output.size()))
Ejemplo n.º 2
0
    def test_value_distribution(self, lstm_hidden_size):
        input_spec1 = TensorSpec((3, 20, 20))
        input_spec2 = TensorSpec((100, ))
        conv_layer_params = ((8, 3, 1), (16, 3, 2, 1))
        embedding_dim = 100

        image = input_spec1.zeros(outer_dims=(1, ))
        vector = input_spec2.zeros(outer_dims=(1, ))

        network_ctor, state = self._init(lstm_hidden_size)

        value_net = network_ctor(
            input_tensor_spec=[input_spec1, input_spec2],
            input_preprocessors=[
                EmbeddingPreprocessor(
                    input_spec1,
                    embedding_dim=embedding_dim,
                    conv_layer_params=conv_layer_params), None
            ],
            preprocessing_combiner=NestConcat())

        value, state = value_net([image, vector], state)

        self.assertEqual(value_net._processed_input_tensor_spec.shape[0], 200)
        self.assertEqual(value_net.output_spec, TensorSpec(()))
        # (batch_size,)
        self.assertEqual(value.shape, (1, ))
Ejemplo n.º 3
0
    def test_parallel_image_encoding_network(self, same_padding,
                                             flatten_output):
        input_spec = TensorSpec((3, 80, 80), torch.float32)

        replica = 2
        network = ParallelImageEncodingNetwork(
            input_channels=input_spec.shape[0],
            input_size=input_spec.shape[1:3],
            n=replica,
            conv_layer_params=((16, (5, 3), 2, (1, 1)), (15, 3, (2, 2), 0)),
            same_padding=same_padding,
            flatten_output=flatten_output)

        self.assertLen(list(network.parameters()), 4)

        batch_size = 3
        # 1) shared input case
        img = input_spec.zeros(outer_dims=(batch_size, ))
        output, _ = network(img)

        if same_padding:
            output_shape = (batch_size, replica, 15, 20, 20)
        else:
            output_shape = (batch_size, replica, 15, 19, 19)

        if flatten_output:
            self.assertEqual((*output_shape[1:2], np.prod(output_shape[2:])),
                             network.output_spec.shape)
            self.assertEqual((*output_shape[0:2], np.prod(output_shape[2:])),
                             tuple(output.size()))
        else:
            self.assertEqual(output_shape[1:], network.output_spec.shape)
            self.assertEqual(output_shape, tuple(output.size()))

        # 2) non-shared input case
        img = input_spec.zeros(outer_dims=(
            batch_size,
            replica,
        ))
        output, _ = network(img)

        if same_padding:
            output_shape = (batch_size, replica, 15, 20, 20)
        else:
            output_shape = (batch_size, replica, 15, 19, 19)

        if flatten_output:
            self.assertEqual((*output_shape[1:2], np.prod(output_shape[2:])),
                             network.output_spec.shape)
            self.assertEqual((*output_shape[0:2], np.prod(output_shape[2:])),
                             tuple(output.size()))
        else:
            self.assertEqual(output_shape[1:], network.output_spec.shape)
            self.assertEqual(output_shape, tuple(output.size()))
Ejemplo n.º 4
0
class ICMAlgorithmTest(alf.test.TestCase):
    def setUp(self):
        self._input_tensor_spec = TensorSpec((10, ))
        self._time_step = TimeStep(
            step_type=StepType.MID,
            reward=0,
            discount=1,
            observation=self._input_tensor_spec.zeros(outer_dims=(1, )),
            prev_action=None,
            env_id=None)
        self._hidden_size = 100

    def test_discrete_action(self):
        action_spec = BoundedTensorSpec((),
                                        dtype=torch.int64,
                                        minimum=0,
                                        maximum=3)
        alg = ICMAlgorithm(action_spec=action_spec,
                           observation_spec=self._input_tensor_spec,
                           hidden_size=self._hidden_size)
        state = self._input_tensor_spec.zeros(outer_dims=(1, ))

        alg_step = alg.train_step(
            self._time_step._replace(prev_action=action_spec.zeros(
                outer_dims=(1, ))), state)

        # the inverse net should predict a uniform distribution
        self.assertTensorClose(
            torch.sum(alg_step.info.loss.extra['inverse_loss']),
            torch.as_tensor(
                math.log(action_spec.maximum - action_spec.minimum + 1)),
            epsilon=1e-4)

    def test_continuous_action(self):
        action_spec = TensorSpec((4, ))
        alg = ICMAlgorithm(action_spec=action_spec,
                           observation_spec=self._input_tensor_spec,
                           hidden_size=self._hidden_size)
        state = self._input_tensor_spec.zeros(outer_dims=(1, ))

        alg_step = alg.train_step(
            self._time_step._replace(prev_action=action_spec.zeros(
                outer_dims=(1, ))), state)

        # the inverse net should predict a zero action vector
        self.assertTensorClose(
            torch.sum(alg_step.info.loss.extra['inverse_loss']),
            torch.as_tensor(0))
Ejemplo n.º 5
0
    def test_param_convnet(self,
                           batch_size=1,
                           same_padding=False,
                           use_bias=True,
                           flatten_output=False):
        input_spec = TensorSpec((3, 32, 32), torch.float32)
        network = ParamConvNet(input_channels=input_spec.shape[0],
                               input_size=input_spec.shape[1:],
                               conv_layer_params=((16, (2, 2), 1, (1, 0)),
                                                  (15, 2, (1, 2), 1, 2)),
                               same_padding=same_padding,
                               activation=torch.tanh,
                               flatten_output=flatten_output)
        self.assertLen(network._conv_layers, 2)

        # test non-parallel forward
        image = input_spec.zeros(outer_dims=(batch_size, ))
        output, _ = network(image)
        if same_padding:
            output_shape = (batch_size, 15, 15, 7)
        else:
            output_shape = (batch_size, 15, 17, 8)
        if flatten_output:
            output_shape = (batch_size, np.prod(output_shape[1:]))
        self.assertEqual(output_shape[1:], network.output_spec.shape)
        self.assertEqual(output_shape, tuple(output.size()))

        # test parallel forward
        replica = 2
        image = input_spec.zeros(outer_dims=(batch_size, ))
        replica_image = input_spec.zeros(outer_dims=(batch_size, replica))
        params = torch.randn(replica, network.param_length)
        network.set_parameters(params)
        output, _ = network(image)
        replica_output, _ = network(replica_image)
        self.assertEqual(output.shape, replica_output.shape)

        if same_padding:
            output_shape = (batch_size, replica, 15, 15, 7)
        else:
            output_shape = (batch_size, replica, 15, 17, 8)
        if flatten_output:
            output_shape = (*output_shape[0:2], np.prod(output_shape[2:]))
        self.assertEqual(output_shape[1:], network.output_spec.shape)
        self.assertEqual(output_shape, tuple(output.size()))
Ejemplo n.º 6
0
 def setUp(self):
     input_tensor_spec = TensorSpec((10, ))
     self._time_step = TimeStep(
         step_type=torch.tensor(StepType.MID, dtype=torch.int32),
         reward=0,
         discount=1,
         observation=input_tensor_spec.zeros(outer_dims=(1, )),
         prev_action=None,
         env_id=None)
     self._encoding_net = EncodingNetwork(
         input_tensor_spec=input_tensor_spec)
Ejemplo n.º 7
0
    def test_parallel_image_decoding_network(self, preprocessing_fc_layers,
                                             same_padding):
        input_spec = TensorSpec((100, ), torch.float32)

        replica = 2
        network = ParallelImageDecodingNetwork(
            input_size=input_spec.shape[0],
            n=replica,
            transconv_layer_params=((16, (2, 2), 1, (1, 0)), (64, 3, (1, 2),
                                                              0)),
            start_decoding_size=(20, 31),
            start_decoding_channels=8,
            same_padding=same_padding,
            preprocess_fc_layer_params=preprocessing_fc_layers)

        num_layers = 3 if preprocessing_fc_layers is None else 5
        self.assertLen(list(network.parameters()), num_layers * 2)

        batch_size = 3
        # 1) shared input case
        embedding = input_spec.zeros(outer_dims=(batch_size, ))
        output, _ = network(embedding)
        if same_padding:
            output_shape = (batch_size, replica, 64, 21, 63)
        else:
            output_shape = (batch_size, replica, 64, 21, 65)
        self.assertEqual(output_shape[1:], network.output_spec.shape)
        self.assertEqual(output_shape, tuple(output.size()))

        # 2) non-shared input case
        embedding = input_spec.zeros(outer_dims=(
            batch_size,
            replica,
        ))
        output, _ = network(embedding)
        if same_padding:
            output_shape = (batch_size, replica, 64, 21, 63)
        else:
            output_shape = (batch_size, replica, 64, 21, 65)
        self.assertEqual(output_shape[1:], network.output_spec.shape)
        self.assertEqual(output_shape, tuple(output.size()))
Ejemplo n.º 8
0
    def test_encoding_network_img(self):
        input_spec = TensorSpec((3, 80, 80), torch.float32)
        img = input_spec.zeros(outer_dims=(1, ))
        network = EncodingNetwork(input_tensor_spec=input_spec,
                                  conv_layer_params=((16, (5, 3), 2, (1, 1)),
                                                     (15, 3, (2, 2), 0)))

        self.assertLen(list(network.parameters()), 4)

        output, _ = network(img)
        output_spec = network._img_encoding_net.output_spec
        self.assertEqual(output.shape[-1], np.prod(output_spec.shape))
Ejemplo n.º 9
0
    def test_continuous_skill_loss(self):
        skill_spec = TensorSpec((4, ))
        alg = DIAYNAlgorithm(skill_spec=skill_spec,
                             encoding_net=self._encoding_net)
        skill = state = skill_spec.zeros(outer_dims=(1, ))

        alg_step = alg.train_step(
            self._time_step._replace(
                observation=[self._time_step.observation, skill]), state)

        # the discriminator should predict a zero skill vector
        self.assertTensorClose(torch.sum(alg_step.info.loss),
                               torch.as_tensor(0))
Ejemplo n.º 10
0
    def test_critic(self, lstm_hidden_size):
        obs_spec = TensorSpec((3, 20, 20), torch.float32)
        action_spec = TensorSpec((5, ), torch.float32)
        input_spec = (obs_spec, action_spec)

        observation_conv_layer_params = ((8, 3, 1), (16, 3, 2, 1))
        action_fc_layer_params = (10, 8)
        joint_fc_layer_params = (6, 4)

        image = obs_spec.zeros(outer_dims=(1, ))
        action = action_spec.randn(outer_dims=(1, ))

        network_input = (image, action)

        network_ctor, state = self._init(lstm_hidden_size)

        critic_net = network_ctor(
            input_spec,
            observation_conv_layer_params=observation_conv_layer_params,
            action_fc_layer_params=action_fc_layer_params,
            joint_fc_layer_params=joint_fc_layer_params)

        value, state = critic_net._test_forward()
        self.assertEqual(value.shape, (1, ))
        if lstm_hidden_size is None:
            self.assertEqual(state, ())

        value, state = critic_net(network_input, state)

        self.assertEqual(critic_net.output_spec, TensorSpec(()))
        # (batch_size,)
        self.assertEqual(value.shape, (1, ))

        # test make_parallel
        pnet = critic_net.make_parallel(6)

        if lstm_hidden_size is not None:
            # shape of state should be [B, n, ...]
            self.assertRaises(AssertionError, pnet, network_input, state)

        state = alf.nest.map_structure(
            lambda x: x.unsqueeze(1).expand(x.shape[0], 6, x.shape[1]), state)

        if lstm_hidden_size is None:
            self.assertTrue(isinstance(pnet, ParallelCriticNetwork))
        else:
            self.assertTrue(isinstance(pnet, NaiveParallelNetwork))

        value, state = pnet(network_input, state)
        self.assertEqual(pnet.output_spec, TensorSpec((6, )))
        self.assertEqual(value.shape, (1, 6))
Ejemplo n.º 11
0
    def test_parallel_q_network(self):
        input_spec = TensorSpec([10])
        inputs = input_spec.zeros(outer_dims=(1, ))

        network_ctor, state = self._init(None)

        q_net = network_ctor(input_spec, self._action_spec)
        n = 5
        parallel_q_net = q_net.make_parallel(n)

        q_value, _ = parallel_q_net(inputs, state)

        # (batch_size, n, num_actions)
        self.assertEqual(q_value.shape, (1, n, self._num_actions))
Ejemplo n.º 12
0
    def test_continuous_action(self):
        action_spec = TensorSpec((4, ))
        alg = ICMAlgorithm(action_spec=action_spec,
                           observation_spec=self._input_tensor_spec,
                           hidden_size=self._hidden_size)
        state = self._input_tensor_spec.zeros(outer_dims=(1, ))

        alg_step = alg.train_step(
            self._time_step._replace(prev_action=action_spec.zeros(
                outer_dims=(1, ))), state)

        # the inverse net should predict a zero action vector
        self.assertTensorClose(
            torch.sum(alg_step.info.loss.extra['inverse_loss']),
            torch.as_tensor(0))
Ejemplo n.º 13
0
    def test_encoding_network_nonimg(self, last_layer_size, last_activation,
                                     output_tensor_spec):
        input_spec = TensorSpec((100, ), torch.float32)
        embedding = input_spec.zeros(outer_dims=(1, ))

        if (last_layer_size is None and last_activation is not None) or (
                last_activation is None and last_layer_size is not None):
            with self.assertRaises(AssertionError):
                network = EncodingNetwork(
                    input_tensor_spec=input_spec,
                    output_tensor_spec=output_tensor_spec,
                    fc_layer_params=(30, 40, 50),
                    activation=torch.tanh,
                    last_layer_size=last_layer_size,
                    last_activation=last_activation)
        else:
            network = EncodingNetwork(input_tensor_spec=input_spec,
                                      output_tensor_spec=output_tensor_spec,
                                      fc_layer_params=(30, 40, 50),
                                      activation=torch.tanh,
                                      last_layer_size=last_layer_size,
                                      last_activation=last_activation)

            num_layers = 3 if last_layer_size is None else 4
            self.assertLen(list(network.parameters()), num_layers * 2)

            if last_activation is None:
                self.assertEqual(network._fc_layers[-1]._activation,
                                 torch.tanh)
            else:
                self.assertEqual(network._fc_layers[-1]._activation,
                                 last_activation)

            output, _ = network(embedding)

            if output_tensor_spec is None:
                if last_layer_size is None:
                    self.assertEqual(output.size()[1], 50)
                else:
                    self.assertEqual(output.size()[1], last_layer_size)
                self.assertEqual(network.output_spec.shape,
                                 tuple(output.size()[1:]))
            else:
                self.assertEqual(tuple(output.size()[1:]),
                                 output_tensor_spec.shape)
                self.assertEqual(network.output_spec.shape,
                                 output_tensor_spec.shape)
Ejemplo n.º 14
0
    def test_non_rnn(self):
        input_spec = TensorSpec((100, ), torch.float32)
        embedding = input_spec.zeros(outer_dims=(6, ))

        network = EncodingNetwork(input_tensor_spec=input_spec,
                                  fc_layer_params=(30, 40, 50),
                                  activation=torch.tanh)
        replicas = 4
        num_layers = 3

        pnet = NaiveParallelNetwork(network, replicas)

        self.assertEqual(len(list(pnet.parameters())),
                         num_layers * 2 * replicas)

        output, _ = pnet(embedding)
        self.assertEqual(output.shape, (6, replicas, 50))
        self.assertEqual(pnet.output_spec.shape, (replicas, 50))
Ejemplo n.º 15
0
    def test_actor_networks(self, lstm_hidden_size):
        obs_spec = TensorSpec((3, 20, 20), torch.float32)
        action_spec = BoundedTensorSpec((5, ), torch.float32, 2., 3.)
        conv_layer_params = ((8, 3, 1), (16, 3, 2, 1))
        fc_layer_params = (10, 8)

        image = obs_spec.zeros(outer_dims=(1, ))

        network_ctor, state = self._init(lstm_hidden_size)

        actor_net = network_ctor(obs_spec,
                                 action_spec,
                                 conv_layer_params=conv_layer_params,
                                 fc_layer_params=fc_layer_params)

        action, state = actor_net(image, state)

        # (batch_size, num_actions)
        self.assertEqual(action.shape, (1, 5))
Ejemplo n.º 16
0
    def test_agent_steps(self):
        batch_size = 1
        observation_spec = TensorSpec((10, ))
        action_spec = BoundedTensorSpec((), dtype='int64')
        time_step = TimeStep(
            observation=observation_spec.zeros(outer_dims=(batch_size, )),
            prev_action=action_spec.zeros(outer_dims=(batch_size, )))

        actor_net = functools.partial(ActorDistributionNetwork,
                                      fc_layer_params=(100, ))
        value_net = functools.partial(ValueNetwork, fc_layer_params=(100, ))

        # TODO: add a goal generator and an entropy target algorithm once they
        # are implemented.
        agent = Agent(observation_spec=observation_spec,
                      action_spec=action_spec,
                      rl_algorithm_cls=functools.partial(
                          ActorCriticAlgorithm,
                          actor_network_ctor=actor_net,
                          value_network_ctor=value_net),
                      intrinsic_reward_module=ICMAlgorithm(
                          action_spec=action_spec,
                          observation_spec=observation_spec))

        predict_state = agent.get_initial_predict_state(batch_size)
        rollout_state = agent.get_initial_rollout_state(batch_size)
        train_state = agent.get_initial_train_state(batch_size)

        pred_step = agent.predict_step(time_step,
                                       predict_state,
                                       epsilon_greedy=0.1)
        self.assertEqual(pred_step.state.irm, ())

        rollout_step = agent.rollout_step(time_step, rollout_state)
        self.assertNotEqual(rollout_step.state.irm, ())

        exp = make_experience(time_step, rollout_step, rollout_state)

        train_step = agent.train_step(exp, train_state)
        self.assertNotEqual(train_step.state.irm, ())

        self.assertTensorEqual(rollout_step.state.irm, train_step.state.irm)
Ejemplo n.º 17
0
    def test_rnn(self):
        input_spec = TensorSpec((100, ), torch.float32)
        embedding = input_spec.zeros(outer_dims=(6, ))

        network = LSTMEncodingNetwork(input_tensor_spec=input_spec,
                                      hidden_size=(30, 40))
        replicas = 4
        pnet = NaiveParallelNetwork(network, replicas)

        self.assertEqual(pnet.state_spec,
                         [(TensorSpec((4, 30)), TensorSpec((4, 30))),
                          (TensorSpec((4, 40)), TensorSpec((4, 40)))])
        state = alf.utils.common.zero_tensor_from_nested_spec(
            pnet.state_spec, 6)
        output, state = pnet(embedding, state)
        self.assertEqual(output.shape, (6, replicas, 40))
        self.assertEqual(pnet.output_spec.shape, (replicas, 40))
        self.assertEqual(alf.utils.dist_utils.extract_spec(state),
                         [(TensorSpec((4, 30)), TensorSpec((4, 30))),
                          (TensorSpec((4, 40)), TensorSpec((4, 40)))])
Ejemplo n.º 18
0
    def test_image_encoding_network(self, flatten_output, same_padding):
        input_spec = TensorSpec((3, 32, 32), torch.float32)
        img = input_spec.zeros(outer_dims=(1, ))
        network = ImageEncodingNetwork(input_channels=input_spec.shape[0],
                                       input_size=input_spec.shape[1:],
                                       conv_layer_params=((16, (2, 2), 1, (1,
                                                                           0)),
                                                          (15, 2, (1, 2), 1)),
                                       same_padding=same_padding,
                                       activation=torch.tanh,
                                       flatten_output=flatten_output)

        self.assertLen(list(network.parameters()), 4)  # two conv2d layers

        output, _ = network(img)
        if same_padding:
            output_shape = (15, 30, 15)
        else:
            output_shape = (15, 34, 16)
        if flatten_output:
            output_shape = (np.prod(output_shape), )
        self.assertEqual(output_shape, network.output_spec.shape)
        self.assertEqual(output_shape, tuple(output.size()[1:]))
Ejemplo n.º 19
0
    def test_image_decoding_network(self, preprocessing_fc_layers,
                                    same_padding):
        input_spec = TensorSpec((100, ), torch.float32)
        embedding = input_spec.zeros(outer_dims=(1, ))
        network = ImageDecodingNetwork(
            input_size=input_spec.shape[0],
            transconv_layer_params=((16, (2, 2), 1, (1, 0)), (64, 3, (1, 2),
                                                              0)),
            start_decoding_size=(20, 31),
            start_decoding_channels=8,
            same_padding=same_padding,
            preprocess_fc_layer_params=preprocessing_fc_layers)

        num_layers = 3 if preprocessing_fc_layers is None else 5
        self.assertLen(list(network.parameters()), num_layers * 2)

        output, _ = network(embedding)
        if same_padding:
            output_shape = (64, 21, 63)
        else:
            output_shape = (64, 21, 65)
        self.assertEqual(output_shape, network.output_spec.shape)
        self.assertEqual(output_shape, tuple(output.size()[1:]))
Ejemplo n.º 20
0
class EntropyTargetAlgorithmTest(parameterized.TestCase, alf.test.TestCase):
    def setUp(self):
        self._input_tensor_spec = TensorSpec((10, ))
        self._time_step = TimeStep(
            step_type=torch.as_tensor(StepType.MID),
            reward=0,
            discount=1,
            observation=self._input_tensor_spec.zeros(outer_dims=(1, )),
            prev_action=None,
            env_id=None)
        self._hidden_size = 100

    @parameterized.parameters((NormalProjectionNetwork, False),
                              (NormalProjectionNetwork, True),
                              (StableNormalProjectionNetwork, False),
                              (StableNormalProjectionNetwork, True))
    def test_run_entropy_target_algorithm(self, network_ctor, scaled):
        action_spec = BoundedTensorSpec((1, ), minimum=0, maximum=3)
        alg = EntropyTargetAlgorithm(action_spec=action_spec)
        net = network_ctor(self._input_tensor_spec.shape[0],
                           action_spec,
                           projection_output_init_gain=1.0,
                           squash_mean=True,
                           scale_distribution=scaled)

        embedding = 10 * torch.rand(
            (100, ) + self._input_tensor_spec.shape, dtype=torch.float32)

        dist, _ = net(embedding)

        alg_step = alg.train_step(dist, self._time_step.step_type)

        info = EntropyTargetInfo(loss=alg_step.info.loss)
        for i in range(-3, 1):
            alg._stage = torch.tensor(i, dtype=torch.int32)
            alg.calc_loss(self._time_step, info)
Ejemplo n.º 21
0
 def testTensorSpecZero(self, dtype):
     spec = TensorSpec(self._shape, dtype)
     sample = spec.zeros(outer_dims=(3, 10))
     self.assertEqual(sample.shape, (3, 10) + self._shape)
     self.assertTrue(torch.all(sample == 0))