Beispiel #1
0
class SACValueNetwork(ValueFunction):
    """
    Value network for SAC which must be able to merge different input types.
    """
    def __init__(self, scope="sac-value-network", **kwargs):
        super(SACValueNetwork, self).__init__(scope=scope, **kwargs)

        # Add all sub-components to this one.
        if self.image_stack is not None:
            self.add_components(self.image_stack)
        self.concat_layer = ConcatLayer()
        self.add_components(self.concat_layer, self.dense_stack)

    def build_value_function(self):
        """
        Builds a dense stack and optionally an image stack.
        """
        if self.use_image_stack:
            image_components = []
            dense_components = []
            for layer_spec in self.network_spec:
                if layer_spec["type"] in ["conv2d", "reshape"]:
                    image_components.append(Layer.from_spec(layer_spec))

            self.image_stack = Stack(image_components, scope="image-stack")

            # Remainings layers should be dense.
            for layer_spec in self.network_spec[len(image_components):]:
                assert layer_spec["type"] == "dense", "Only expecting dense layers after image " \
                                                      "stack but found spec: {}.".format(layer_spec)
                dense_components.append(layer_spec)

            dense_components.append(self.value_layer)
            self.dense_stack = Stack(dense_components, scope="dense-stack")
        else:
            # Assume dense network otherwise -> onyl a single stack.
            dense_components = []
            for layer_spec in self.network_spec:
                assert layer_spec["type"] == "dense", "Only dense layers allowed if not using" \
                                                      " image stack in this network."
                dense_components.append(Layer.from_spec(layer_spec))
            dense_components.append(self.value_layer)
            self.dense_stack = Stack(dense_components, scope="dense-stack")

    @rlgraph_api
    def state_action_value(self, states, actions, internal_states=None):
        """
        Computes Q(s,a) by passing states and actions through one or multiple processing stacks..
        """
        if self.use_image_stack:
            image_processing_output = self.image_stack.call(states)
            state_actions = self.concat_layer.call(image_processing_output,
                                                   actions)
            dense_output = self.dense_stack.call(state_actions)
        else:
            # Concat states and actions, then pass through.
            state_actions = self.concat_layer.call(states, actions)
            dense_output = self.dense_stack.call(state_actions)
        return dense_output
Beispiel #2
0
    def build_image_processing_stack():
        """
        Constructs a ReShape preprocessor to fold the time rank into the batch rank.

        Then builds the 3 sequential Conv2D blocks that process the image information.
        Each of these 3 blocks consists of:
        - 1 Conv2D layer followed by a MaxPool2D
        - 2 residual blocks, each of which looks like:
            - ReLU + Conv2D + ReLU + Conv2D + element-wise add with original input

        Then adds: ReLU + fc(256) + ReLU.
        """
        # Collect components for image stack before unfolding time-rank going into main LSTM.
        sub_components = list()

        # Divide by 255
        sub_components.append(Divide(divisor=255, scope="divide-255"))

        for i, num_filters in enumerate([16, 32, 32]):
            # Conv2D plus MaxPool2D.
            conv2d_plus_maxpool = Stack(
                Conv2DLayer(filters=num_filters, kernel_size=3, strides=1, padding="same"),
                MaxPool2DLayer(pool_size=3, strides=2, padding="same"),
                scope="conv-max"
            )

            # Single unit for the residual layers (ReLU + Conv2D 3x3 stride=1).
            residual_unit = Stack(
                NNLayer(activation="relu"),  # single ReLU
                Conv2DLayer(filters=num_filters, kernel_size=3, strides=1, padding="same"),
                scope="relu-conv"
            )
            # Residual Layer.
            residual_layer = ResidualLayer(residual_unit=residual_unit, repeats=2)
            # Repeat same residual layer 2x.
            residual_repeater = RepeaterStack(sub_component=residual_layer, repeats=2)

            sub_components.append(Stack(conv2d_plus_maxpool, residual_repeater, scope="conv-unit-{}".format(i)))

        # A Flatten preprocessor and then an fc block (surrounded by ReLUs) and a time-rank-unfolding.
        sub_components.extend([
            ReShape(flatten=True, scope="flatten"),  # Flattener (to flatten Conv2D output for the fc layer).
            NNLayer(activation="relu", scope="relu-1"),  # ReLU 1
            DenseLayer(units=256),  # Dense layer.
            NNLayer(activation="relu", scope="relu-2"),  # ReLU 2
        ])

        image_stack = Stack(sub_components, scope="image-stack")

        return image_stack
Beispiel #3
0
    def build_image_processing_stack():
        """
        Constructs a ReShape preprocessor to fold the time rank into the batch rank.

        Then builds the 2 Conv2D Layers followed by ReLUs.

        Then adds: fc(256) + ReLU.
        """
        # Collect components for image stack before unfolding time-rank going into main LSTM.
        sub_components = list()

        # Divide by 255
        sub_components.append(Divide(divisor=255, scope="divide-255"))

        for i, (num_filters, kernel_size, stride) in enumerate(zip([16, 32], [8, 4], [4, 2])):
            # Conv2D plus ReLU activation function.
            conv2d = Conv2DLayer(
                filters=num_filters, kernel_size=kernel_size, strides=stride, padding="same",
                activation="relu", scope="conv2d-{}".format(i)
            )
            sub_components.append(conv2d)

        # A Flatten preprocessor and then an fc block (surrounded by ReLUs) and a time-rank-unfolding.
        sub_components.extend([
            ReShape(flatten=True, scope="flatten"),  # Flattener (to flatten Conv2D output for the fc layer).
            DenseLayer(units=256),  # Dense layer.
            NNLayer(activation="relu", scope="relu-before-lstm"),
        ])

        #stack_before_unfold = <- formerly known as
        image_stack = Stack(sub_components, scope="image-stack")

        return image_stack
    def test_reshape_with_batch_and_time_ranks_with_folding_and_unfolding_0D_shape(
            self):
        # Flip time and batch rank via folding, then unfolding.
        in_space = FloatBox(shape=(),
                            add_batch_rank=True,
                            add_time_rank=True,
                            time_major=True)
        reshape_fold = ReShape(fold_time_rank=True, scope="fold-time-rank")
        reshape_unfold = ReShape(unfold_time_rank=True,
                                 scope="unfold-time-rank",
                                 time_major=True)

        def custom_apply(self_, inputs):
            folded = reshape_fold.apply(inputs)
            unfolded = reshape_unfold.apply(folded, inputs)
            return unfolded

        stack = Stack(reshape_fold,
                      reshape_unfold,
                      api_methods={("apply", custom_apply)})

        test = ComponentTest(component=stack,
                             input_spaces=dict(inputs=in_space))

        # seq-len=16, batch-size=8
        inputs = in_space.sample(size=(16, 8))

        test.test(("apply", inputs), expected_outputs=inputs)
    def test_reshape_with_batch_and_time_ranks_with_folding_and_explicit_unfolding(
            self):
        time_rank = 8
        in_space = FloatBox(shape=(2, 3),
                            add_batch_rank=True,
                            add_time_rank=True,
                            time_major=True)
        reshape_fold = ReShape(fold_time_rank=True, scope="fold-time-rank")
        reshape_unfold = ReShape(unfold_time_rank=time_rank,
                                 scope="unfold-time-rank",
                                 time_major=True)

        def custom_call(self_, inputs):
            folded = reshape_fold.call(inputs)
            unfolded = reshape_unfold.call(
                folded)  # no need for orig input here as unfolding is explicit
            return unfolded

        stack = Stack(reshape_fold,
                      reshape_unfold,
                      api_methods={("call", custom_call)})

        test = ComponentTest(component=stack,
                             input_spaces=dict(inputs=in_space))

        # seq-len=time_rank, batch-size=n
        inputs = in_space.sample(size=(time_rank, 12))

        test.test(("call", inputs), expected_outputs=inputs)
    def test_reshape_with_batch_and_time_ranks_and_with_folding_and_unfolding(
            self):
        # Flip time and batch rank via folding, then unfolding.
        in_space = FloatBox(shape=(3, 2),
                            add_batch_rank=True,
                            add_time_rank=True,
                            time_major=False)
        reshape_fold = ReShape(fold_time_rank=True)
        reshape_unfold = ReShape(unfold_time_rank=True, time_major=False)

        def custom_call(self_, inputs):
            folded = reshape_fold.call(inputs)
            unfolded = reshape_unfold.call(folded, inputs)
            return unfolded

        stack = Stack(reshape_fold,
                      reshape_unfold,
                      api_methods={("call", custom_call)})

        test = ComponentTest(component=stack,
                             input_spaces=dict(inputs=in_space))

        # batch-size=4, seq-len=2
        inputs = in_space.sample(size=(4, 2))

        test.test(("call", inputs), expected_outputs=inputs)
Beispiel #7
0
    def test_reshape_with_batch_vs_time_flipping_with_folding_and_unfolding(
            self):
        # Flip time and batch rank via folding, then unfolding.
        in_space = FloatBox(shape=(3, 2),
                            add_batch_rank=True,
                            add_time_rank=True,
                            time_major=False)
        reshape_fold = ReShape(fold_time_rank=True, scope="fold-time-rank")
        reshape_unfold = ReShape(unfold_time_rank=True,
                                 flip_batch_and_time_rank=True,
                                 scope="unfold-time-rank-with-flip",
                                 time_major=True)

        def custom_apply(self, inputs):
            folded = reshape_fold.apply(inputs)
            unfolded = reshape_unfold.apply(folded, inputs)
            return unfolded

        stack = Stack(reshape_fold,
                      reshape_unfold,
                      api_methods={("apply", custom_apply)})

        test = ComponentTest(component=stack,
                             input_spaces=dict(inputs=in_space))

        #test.test("reset")
        # batch-size=4, seq-len=2
        inputs = in_space.sample(size=(4, 2))
        # Flip the first two dimensions.
        expected = np.transpose(inputs, axes=(1, 0, 2, 3))

        test.test(("apply", inputs), expected_outputs=expected)
Beispiel #8
0
    def build_text_processing_stack():
        """
        Helper function to build the text processing pipeline for both the large and small architectures, consisting of:
        - ReShape preprocessor to fold the incoming time rank into the batch rank.
        - StringToHashBucket Layer taking a batch of sentences and converting them to an indices-table of dimensions:
          cols=length of longest sentences in input
          rows=number of items in the batch
          The cols dimension could be interpreted as the time rank into a consecutive LSTM. The StringToHashBucket
          Component returns the sequence length of each batch item for exactly that purpose.
        - Embedding Lookup Layer of embedding size 20 and number of rows == num_hash_buckets (see previous layer).
        - LSTM processing the batched sequences of words coming from the embedding layer as batches of rows.
        """
        num_hash_buckets = 1000

        # Create a hash bucket from the sentences and use that bucket to do an embedding lookup (instead of
        # a vocabulary).
        string_to_hash_bucket = StringToHashBucket(
            num_hash_buckets=num_hash_buckets)
        embedding = EmbeddingLookup(embed_dim=20,
                                    vocab_size=num_hash_buckets,
                                    pad_empty=True)
        # The time rank for the LSTM is now the sequence of words in a sentence, NOT the original env time rank.
        # We will only use the last output of the LSTM-64 for further processing as that is the output after having
        # seen all words in the sentence.
        # The original env stepping time rank is currently folded into the batch rank and must be unfolded again before
        # passing it into the main LSTM.
        lstm64 = LSTMLayer(units=64, scope="lstm-64", time_major=False)

        tuple_splitter = ContainerSplitter(tuple_length=2,
                                           scope="tuple-splitter")

        def custom_apply(self, inputs):
            hash_bucket, lengths = self.sub_components[
                "string-to-hash-bucket"].apply(inputs)

            embedding_output = self.sub_components["embedding-lookup"].apply(
                hash_bucket)

            # Return only the last output (sentence of words, where we are not interested in intermediate results
            # where the LSTM has not seen the entire sentence yet).
            # Last output is the final internal h-state (slot 1 in the returned LSTM tuple; slot 0 is final c-state).
            lstm_output = self.sub_components["lstm-64"].apply(
                embedding_output, sequence_length=lengths)
            lstm_final_internals = lstm_output["last_internal_states"]

            # Need to split once more because the LSTM state is always a tuple of final c- and h-states.
            _, lstm_final_h_state = self.sub_components[
                "tuple-splitter"].split(lstm_final_internals)

            return lstm_final_h_state

        text_processing_stack = Stack(string_to_hash_bucket,
                                      embedding,
                                      lstm64,
                                      tuple_splitter,
                                      api_methods={("apply", custom_apply)},
                                      scope="text-stack")

        return text_processing_stack
Beispiel #9
0
    def build_value_function(self):
        """
        Builds a dense stack and optionally an image stack.
        """
        if self.use_image_stack:
            image_components = []
            dense_components = []
            for layer_spec in self.network_spec:
                if layer_spec["type"] in ["conv2d", "reshape"]:
                    image_components.append(Layer.from_spec(layer_spec))

            self.image_stack = Stack(image_components, scope="image-stack")

            # Remainings layers should be dense.
            for layer_spec in self.network_spec[len(image_components):]:
                assert layer_spec["type"] == "dense", "Only expecting dense layers after image " \
                                                      "stack but found spec: {}.".format(layer_spec)
                dense_components.append(layer_spec)

            dense_components.append(self.value_layer)
            self.dense_stack = Stack(dense_components, scope="dense-stack")
        else:
            # Assume dense network otherwise -> onyl a single stack.
            dense_components = []
            for layer_spec in self.network_spec:
                assert layer_spec["type"] == "dense", "Only dense layers allowed if not using" \
                                                      " image stack in this network."
                dense_components.append(Layer.from_spec(layer_spec))
            dense_components.append(self.value_layer)
            self.dense_stack = Stack(dense_components, scope="dense-stack")