Ejemplo n.º 1
0
    def test_value_distribution(self, lstm_hidden_size):
        input_spec1 = TensorSpec((3, 20, 20))
        input_spec2 = TensorSpec((100, ))
        conv_layer_params = ((8, 3, 1), (16, 3, 2, 1))
        embedding_dim = 100

        image = input_spec1.zeros(outer_dims=(1, ))
        vector = input_spec2.zeros(outer_dims=(1, ))

        network_ctor, state = self._init(lstm_hidden_size)

        value_net = network_ctor(
            input_tensor_spec=[input_spec1, input_spec2],
            input_preprocessors=[
                EmbeddingPreprocessor(
                    input_spec1,
                    embedding_dim=embedding_dim,
                    conv_layer_params=conv_layer_params), None
            ],
            preprocessing_combiner=NestConcat())

        value, state = value_net([image, vector], state)

        self.assertEqual(value_net._processed_input_tensor_spec.shape[0], 200)
        self.assertEqual(value_net.output_spec, TensorSpec(()))
        # (batch_size,)
        self.assertEqual(value.shape, (1, ))
Ejemplo n.º 2
0
    def test_discrete_action(self, net_ctor):
        obs_spec = TensorSpec((20, ))
        action_spec = BoundedTensorSpec((), dtype='int64')

        # doesn't support discrete action spec ...
        self.assertRaises(AssertionError, net_ctor, (obs_spec, action_spec))

        # ... unless an preprocessor is specified
        net_ctor(
            (obs_spec, action_spec),
            action_input_processors=EmbeddingPreprocessor(action_spec,
                                                          embedding_dim=10))
Ejemplo n.º 3
0
    def test_encoding_network_nested_input(self, lstm):
        input_spec = dict(a=TensorSpec((3, 80, 80)),
                          b=[
                              TensorSpec((80, )),
                              BoundedTensorSpec((), dtype="int64"),
                              dict(x=TensorSpec((100, )),
                                   y=TensorSpec((200, )))
                          ])
        imgs = common.zero_tensor_from_nested_spec(input_spec, batch_size=1)
        input_preprocessors = dict(
            a=EmbeddingPreprocessor(input_spec["a"],
                                    conv_layer_params=((1, 2, 2, 0), ),
                                    embedding_dim=100),
            b=[
                EmbeddingPreprocessor(input_spec["b"][0], embedding_dim=50),
                EmbeddingPreprocessor(input_spec["b"][1], embedding_dim=50),
                dict(x=None, y=torch.relu)
            ])

        if lstm:
            network_ctor = functools.partial(LSTMEncodingNetwork,
                                             hidden_size=(100, ))
        else:
            network_ctor = EncodingNetwork

        network = network_ctor(input_tensor_spec=input_spec,
                               input_preprocessors=input_preprocessors,
                               preprocessing_combiner=NestConcat())
        output, _ = network(imgs, state=[(torch.zeros((1, 100)), ) * 2])

        if lstm:
            self.assertEqual(network.output_spec, TensorSpec((100, )))
            self.assertEqual(output.size()[-1], 100)
        else:
            self.assertEqual(len(list(network.parameters())), 4 + 2 + 1)
            self.assertEqual(network.output_spec, TensorSpec((500, )))
            self.assertEqual(output.size()[-1], 500)
Ejemplo n.º 4
0
    def test_mixed_actions(self, net_ctor):
        obs_spec = TensorSpec((20, ))
        action_spec = dict(x=BoundedTensorSpec((), dtype='int64'),
                           y=BoundedTensorSpec((3, )))

        input_preprocessors = dict(x=EmbeddingPreprocessor(action_spec['x'],
                                                           embedding_dim=10),
                                   y=None)

        net_ctor = functools.partial(
            net_ctor, action_input_processors=input_preprocessors)

        # doesn't support mixed actions
        self.assertRaises(AssertionError, net_ctor, (obs_spec, action_spec))

        # ... unless a combiner is specified
        net_ctor((obs_spec, action_spec),
                 action_preprocessing_combiner=NestConcat())
Ejemplo n.º 5
0
class TestInputpreprocessor(parameterized.TestCase, alf.test.TestCase):
    input_spec = TensorSpec((10, ))
    preproc = EmbeddingPreprocessor(input_tensor_spec=input_spec,
                                    embedding_dim=10)

    shared_preproc = preproc.copy().singleton()

    @parameterized.parameters((False, preproc), (True, preproc),
                              (False, shared_preproc), (True, shared_preproc))
    def test_input_preprocessor(self, lstm, preproc):
        def _check_with_shared_param(net1, net2, shared_subnet=None):
            net1_params = set(net1.parameters())
            net2_params = set(net2.parameters())
            # check that net1 and net2 share paramsters with shared_subnet
            if shared_subnet is not None:
                shared_params = set(shared_subnet.parameters())
                for p in shared_params:
                    self.assertTrue((p in net1_params) and (p in net2_params))

            # for the rest part, net1 and net2 do not share parameters
            for p1, p2 in zip(net1_params, net2_params):
                if shared_subnet is None or p1 not in shared_params:
                    self.assertTrue(p1 is not p2)

        # 1) test input_preprocessor copy and each copy has its own parameters
        input_preprocessor = preproc
        input_preprocessor_copy = input_preprocessor.copy()

        if not preproc._singleton_instance:
            _check_with_shared_param(input_preprocessor,
                                     input_preprocessor_copy)
        elif preproc._singleton_instance:
            _check_with_shared_param(input_preprocessor,
                                     input_preprocessor_copy,
                                     input_preprocessor)

        if lstm:
            network_ctor = functools.partial(LSTMEncodingNetwork,
                                             hidden_size=(1, ),
                                             post_fc_layer_params=(2, 2))
        else:
            network_ctor = functools.partial(EncodingNetwork,
                                             fc_layer_params=(10, 10))

        net = network_ctor(
            input_tensor_spec=[
                TestInputpreprocessor.input_spec,
                TestInputpreprocessor.input_spec
            ],
            input_preprocessors=[input_preprocessor, torch.relu],
            preprocessing_combiner=NestConcat(dim=1))

        # 2) test copied network has its own parameters, including
        # parameters from input preprocessors
        copied_net = net.copy()
        if not preproc._singleton_instance:
            _check_with_shared_param(net, copied_net)
        else:
            _check_with_shared_param(net, copied_net, input_preprocessor)

        # 3) test for each replica of the NaiveParallelNetwork has its own
        # parameters, including parameters from input preprocessors
        replicas = 2
        p_net = alf.networks.network.NaiveParallelNetwork(net, replicas)
        if not preproc._singleton_instance:
            _check_with_shared_param(p_net._networks[0], p_net._networks[1])
        else:
            _check_with_shared_param(p_net._networks[0], p_net._networks[1],
                                     input_preprocessor)

        # 4) test network forward
        batch_size = 6
        batch = TestInputpreprocessor.input_spec.zeros(
            outer_dims=(batch_size, ))

        if lstm:
            state = [(torch.zeros((batch_size, 1)), ) * 2]
            p_state = [(torch.zeros((batch_size, replicas, 1)), ) * 2]
        else:
            state = ()
            p_state = ()

        net([batch, batch], state)
        p_net([batch, batch], p_state)

    @parameterized.parameters(preproc, shared_preproc)
    def test_input_preprocessor_state(self, input_preprocessor):
        batch_size = 6
        batch = TestInputpreprocessor.input_spec.zeros(
            outer_dims=(batch_size, ))

        input_preprocessor(batch)
        self.assertRaises(AssertionError,
                          input_preprocessor,
                          inputs=batch,
                          state=batch)
Ejemplo n.º 6
0
    def test_parallel_network_state_dict_and_params(self, lstm):
        input_spec = TensorSpec((10, ))

        input_preprocessors = EmbeddingPreprocessor(
            input_spec, embedding_dim=10)

        if lstm:
            network_ctor = functools.partial(
                LSTMEncodingNetwork,
                hidden_size=(1, ),
                post_fc_layer_params=(2, 2))
        else:
            network_ctor = functools.partial(
                EncodingNetwork, fc_layer_params=(10, 10))

        network_wo_preprocessor = network_ctor(input_tensor_spec=input_spec)
        network_w_preprocessor = network_ctor(
            input_tensor_spec=input_spec,
            input_preprocessors=input_preprocessors)

        # 1) test parameter copy for the corresponding parallel net
        def _check_parallel_param(p_net_source):
            p_net_target = p_net_source.copy()
            p_net_target.load_state_dict(p_net_source.state_dict())
            for ws, wt in zip(p_net_source.parameters(),
                              p_net_target.parameters()):
                self.assertTensorEqual(ws, wt)

        replicas = 2
        for network in [network_wo_preprocessor, network_w_preprocessor]:
            p_net = network.make_parallel(replicas)
            _check_parallel_param(p_net)
            n_net = alf.networks.network.NaiveParallelNetwork(
                network, replicas)
            _check_parallel_param(n_net)

        # 2) test parameter number for the case of using non-shared preprocessor
        p_net_wo_preprocessor = alf.networks.network.NaiveParallelNetwork(
            network_wo_preprocessor, replicas)
        p_net_w_preprocessor = network_w_preprocessor.make_parallel(replicas)

        # the number of parameters of parallel network with input_preprocessor
        # should be equal to that of the naive parallel network without
        # input processor + the number of parameters of input processor * replicas
        self.assertEqual(
            len(p_net_w_preprocessor.state_dict()),
            len(p_net_wo_preprocessor.state_dict()) +
            replicas * len(input_preprocessors.state_dict()))

        self.assertEqual(
            len(p_net_w_preprocessor.state_dict()),
            len(list(p_net_w_preprocessor.parameters())))

        # 3) test parameter number when using shared preprocessor

        network_w_shared_preprocessor = network_ctor(
            input_tensor_spec=input_spec,
            input_preprocessors=EmbeddingPreprocessor(
                input_spec, embedding_dim=10).singleton())

        p_net_w_shared_preprocessor = network_w_shared_preprocessor.make_parallel(
            replicas)

        # the number of parameters of parallel network with a shared
        # input_preprocessor should be equal to that of the naive parallel
        # network without input processor + the number of parameters of input processor

        self.assertEqual(
            len(p_net_w_shared_preprocessor.state_dict()),
            len(p_net_wo_preprocessor.state_dict()) + len(
                input_preprocessors.state_dict()))

        self.assertEqual(
            len(p_net_w_shared_preprocessor.state_dict()),
            len(list(p_net_w_shared_preprocessor.parameters())))
Ejemplo n.º 7
0
    def __init__(self,
                 observation_spec,
                 action_spec,
                 skill_spec,
                 config: TrainerConfig,
                 skill_discriminator_ctor=EncodingNetwork,
                 skill_encoder_ctor=None,
                 observation_transformer=math_ops.identity,
                 optimizer=None,
                 sparse_reward=False,
                 debug_summaries=False,
                 num_steps_per_skill=3,
                 skill_type="state_difference",
                 name="Discriminator"):
        """If ``sparse_reward=True``, then the discriminator will only predict
        at the skill switching steps.
        """
        if skill_spec.is_discrete:
            assert isinstance(skill_spec, BoundedTensorSpec)
            skill_dim = skill_spec.maximum - skill_spec.minimum + 1
        else:
            assert len(
                skill_spec.shape) == 1, "Only 1D skill vector is supported"
            skill_dim = skill_spec.shape[0]

        supported_skill_types = [
            "state_concatenation",
            "state_difference",
            "state",
            "action",
            "action_difference",
            "state_action",
            "action_concatenation",
        ]
        assert skill_type in supported_skill_types, (
            "Skill type must be in: %s" % supported_skill_types)

        self._skill_type = skill_type

        subtrajectory_spec = get_subtrajectory_spec(num_steps_per_skill,
                                                    observation_spec,
                                                    action_spec)

        if skill_type == "state_concatenation":
            discriminator_spec = flatten(subtrajectory_spec.observation)
        elif skill_type == "action_concatenation":
            discriminator_spec = flatten(subtrajectory_spec.prev_action)
        else:
            discriminator_spec = get_discriminator_spec(
                skill_type, observation_spec, action_spec)

        input_preprocessors, preprocessing_combiner = None, None
        if is_action_skill(skill_type):
            # first project
            input_preprocessors = (None, None)
            preprocessing_combiner = NestConcat()
            discriminator_spec = (observation_spec, discriminator_spec)

        skill_encoder = None
        if skill_encoder_ctor is not None:
            step_spec = BoundedTensorSpec((),
                                          maximum=num_steps_per_skill,
                                          dtype='int64')
            skill_encoder = skill_encoder_ctor(
                input_preprocessors=(None,
                                     EmbeddingPreprocessor(
                                         input_tensor_spec=step_spec,
                                         embedding_dim=skill_dim)),
                preprocessing_combiner=NestConcat(),
                input_tensor_spec=(skill_spec, step_spec))
            if input_preprocessors is None:
                input_preprocessors = (None, )
                discriminator_spec = (discriminator_spec, )
            input_preprocessors = input_preprocessors + (EmbeddingPreprocessor(
                input_tensor_spec=step_spec, embedding_dim=skill_dim), )
            discriminator_spec = discriminator_spec + (step_spec, )
            skill_dim = skill_encoder.output_spec.shape[0]

        skill_disc_inputs = dict(input_preprocessors=input_preprocessors,
                                 preprocessing_combiner=preprocessing_combiner,
                                 input_tensor_spec=discriminator_spec)

        if skill_discriminator_ctor.__name__ == "EncodingNetwork":
            skill_disc_inputs["last_layer_size"] = skill_dim
        else:  # ActorDistributionNetwork
            skill_disc_inputs["action_spec"] = skill_spec

        skill_discriminator = skill_discriminator_ctor(**skill_disc_inputs)

        train_state_spec = DiscriminatorState(
            first_observation=observation_spec,
            untrans_observation=
            observation_spec,  # prev untransformed observation diff for pred
            subtrajectory=subtrajectory_spec)

        super().__init__(train_state_spec=train_state_spec,
                         predict_state_spec=DiscriminatorState(
                             subtrajectory=subtrajectory_spec,
                             first_observation=observation_spec),
                         config=config,
                         optimizer=optimizer,
                         debug_summaries=debug_summaries,
                         name=name)

        self._skill_discriminator = skill_discriminator
        self._skill_encoder = skill_encoder
        # exp observation won't be automatically transformed when it's sampled
        # from the replay buffer. We will do this manually.
        self._observation_transformer = observation_transformer
        self._num_steps_per_skill = num_steps_per_skill
        self._sparse_reward = sparse_reward
        self._skill_dim = skill_dim
        self._high_rl = None
Ejemplo n.º 8
0
def get_low_rl_input_preprocessors(low_rl_input_specs, embedding_dim):
    return alf.nest.map_structure(
        lambda spec: EmbeddingPreprocessor(input_tensor_spec=spec,
                                           embedding_dim=embedding_dim),
        low_rl_input_specs)