def __init__(self, num_units_a=3, num_units_b=2, scope="dummy-nn-with-dict-input", **kwargs):
        super(DummyNNWithDictInput, self).__init__(scope=scope, **kwargs)

        self.num_units_a = num_units_a
        self.num_units_b = num_units_b

        # Splits the input into two streams.
        self.splitter = ContainerSplitter("a", "b")
        self.stack_a = DenseLayer(units=self.num_units_a, scope="dense-a")
        self.stack_b = DenseLayer(units=self.num_units_b, scope="dense-b")
        self.concat_layer = ConcatLayer()

        # Add all sub-components to this one.
        self.add_components(self.splitter, self.stack_a, self.stack_b, self.concat_layer)
class DummyNNWithDictInput(NeuralNetwork):
    """
    Dummy NN with dict input taking a dict with keys "a" and "b" passes them both through two different (parallel,
    not connected in any way) dense layers and then concatenating the outputs to yield the final output.
    """

    def __init__(self, num_units_a=3, num_units_b=2, scope="dummy-nn-with-dict-input", **kwargs):
        super(DummyNNWithDictInput, self).__init__(scope=scope, **kwargs)

        self.num_units_a = num_units_a
        self.num_units_b = num_units_b

        # Splits the input into two streams.
        self.splitter = ContainerSplitter("a", "b")
        self.stack_a = DenseLayer(units=self.num_units_a, scope="dense-a")
        self.stack_b = DenseLayer(units=self.num_units_b, scope="dense-b")
        self.concat_layer = ConcatLayer()

        # Add all sub-components to this one.
        self.add_components(self.splitter, self.stack_a, self.stack_b, self.concat_layer)

    @rlgraph_api
    def call(self, input_dict):
        # Split the input dict into two streams.
        input_a, input_b = self.splitter.call(input_dict)

        # Get the two stack outputs.
        output_a = self.stack_a.call(input_a)
        output_b = self.stack_b.call(input_b)

        # Concat everything together, that's the output.
        concatenated_data = self.concat_layer.call(output_a, output_b)

        return concatenated_data
Esempio n. 3
0
    def __init__(self, worker_sample_size=100, scope="impala-network", **kwargs):
        """
        Args:
            worker_sample_size (int): How many time-steps an IMPALA actor will have performed in one rollout.
        """
        super(IMPALANetwork, self).__init__(scope=scope, **kwargs)

        self.worker_sample_size = worker_sample_size

        # Create all needed sub-components.

        # ContainerSplitter for the Env signal (dict of 4 keys: for env image, env text, previous action and reward).
        self.splitter = ContainerSplitter("RGB_INTERLEAVED", "INSTR", "previous_action", "previous_reward",
                                          scope="input-splitter")

        # Fold the time rank into the batch rank.
        self.time_rank_fold_before_lstm = ReShape(fold_time_rank=True, scope="time-rank-fold-before-lstm")
        self.time_rank_unfold_before_lstm = ReShape(unfold_time_rank=True, time_major=True,
                                                    scope="time-rank-unfold-before-lstm")

        # The Image Processing Stack (left side of "Large Architecture" Figure 3 in [1]).
        # Conv2D column + ReLU + fc(256) + ReLU.
        self.image_processing_stack = self.build_image_processing_stack()

        # The text processing pipeline: Takes a batch of string tensors as input, creates a hash-bucket thereof,
        # and passes the output of the hash bucket through an embedding-lookup(20) layer. The output of the embedding
        # lookup is then passed through an LSTM(64).
        self.text_processing_stack = self.build_text_processing_stack()

        #self.debug_slicer = Slice(scope="internal-states-slicer", squeeze=True)

        # The concatenation layer (concatenates outputs from image/text processing stacks, previous action/reward).
        self.concat_layer = ConcatLayer()

        # The main LSTM (going into the ActionAdapter (next in the Policy Component that uses this NN Component)).
        # Use time-major as it's faster (say tf docs).
        self.main_lstm = LSTMLayer(units=256, scope="lstm-256", time_major=True, static_loop=self.worker_sample_size)

        # Add all sub-components to this one.
        self.add_components(
            self.splitter, self.image_processing_stack, self.text_processing_stack,
            self.concat_layer,
            self.main_lstm,
            self.time_rank_fold_before_lstm, self.time_rank_unfold_before_lstm,
            #self.debug_slicer
        )
    def __init__(self, input_network_specs, post_network_spec=None, **kwargs):
        """
        Args:
            input_network_specs (Union[Dict[str,dict],Tuple[dict]]): A specification dict or tuple with values being
                the spec dicts for the single streams. The `call` method expects a dict input or a single tuple input
                (not as *args) in its first parameter.

            post_network_spec (Optional[]): The specification dict of the post-concat network or the post-concat
                network object itself.
        """
        super(MultiInputStreamNeuralNetwork,
              self).__init__(scope="multi-input-stream-nn", **kwargs)

        # Create all streams' networks.
        if isinstance(input_network_specs, dict):
            self.input_stream_nns = {}
            for i, (flat_key, nn_spec) in enumerate(
                    flatten_op(input_network_specs).items()):
                self.input_stream_nns[flat_key] = NeuralNetwork.from_spec(
                    nn_spec, scope="input-stream-nn-{}".format(i))
            # Create the concat layer to merge all streams.
            self.concat_layer = ConcatLayer(dict_keys=list(
                self.input_stream_nns.keys()),
                                            axis=-1)
        else:
            assert isinstance(input_network_specs, (list, tuple)),\
                "ERROR: `input_network_specs` must be dict or tuple/list!"
            self.input_stream_nns = []
            for i, nn_spec in enumerate(input_network_specs):
                self.input_stream_nns.append(
                    NeuralNetwork.from_spec(
                        nn_spec, scope="input-stream-nn-{}".format(i)))
            # Create the concat layer to merge all streams.
            self.concat_layer = ConcatLayer(axis=-1)

        # Create the post-network (after the concat).
        self.post_nn = NeuralNetwork.from_spec(
            post_network_spec, scope="post-concat-nn")  # type: NeuralNetwork

        # Add all sub-Components.
        self.add_components(
            self.post_nn, self.concat_layer,
            *list(self.input_stream_nns.values() if isinstance(
                input_network_specs, dict) else self.input_stream_nns))
Esempio n. 5
0
class IMPALANetwork(NeuralNetwork):
    """
    The base class for both "large and small architecture" versions of the networks used in [1].

    [1] IMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner Architectures - Espeholt, Soyer,
        Munos et al. - 2018 (https://arxiv.org/abs/1802.01561)
    """
    def __init__(self,
                 worker_sample_size=100,
                 scope="impala-network",
                 **kwargs):
        """
        Args:
            worker_sample_size (int): How many time-steps an IMPALA actor will have performed in one rollout.
        """
        super(IMPALANetwork, self).__init__(scope=scope, **kwargs)

        self.worker_sample_size = worker_sample_size

        # Create all needed sub-components.

        # ContainerSplitter for the Env signal (dict of 4 keys: for env image, env text, previous action and reward).
        self.splitter = ContainerSplitter("RGB_INTERLEAVED",
                                          "INSTR",
                                          "previous_action",
                                          "previous_reward",
                                          scope="input-splitter")

        # Fold the time rank into the batch rank.
        self.time_rank_fold_before_lstm = ReShape(
            fold_time_rank=True, scope="time-rank-fold-before-lstm")
        self.time_rank_unfold_before_lstm = ReShape(
            unfold_time_rank=True,
            time_major=True,
            scope="time-rank-unfold-before-lstm")

        # The Image Processing Stack (left side of "Large Architecture" Figure 3 in [1]).
        # Conv2D column + ReLU + fc(256) + ReLU.
        self.image_processing_stack = self.build_image_processing_stack()

        # The text processing pipeline: Takes a batch of string tensors as input, creates a hash-bucket thereof,
        # and passes the output of the hash bucket through an embedding-lookup(20) layer. The output of the embedding
        # lookup is then passed through an LSTM(64).
        self.text_processing_stack = self.build_text_processing_stack()

        #self.debug_slicer = Slice(scope="internal-states-slicer", squeeze=True)

        # The concatenation layer (concatenates outputs from image/text processing stacks, previous action/reward).
        self.concat_layer = ConcatLayer()

        # The main LSTM (going into the ActionAdapter (next in the Policy Component that uses this NN Component)).
        # Use time-major as it's faster (say tf docs).
        self.main_lstm = LSTMLayer(units=256,
                                   scope="lstm-256",
                                   time_major=True,
                                   static_loop=self.worker_sample_size)

        # Add all sub-components to this one.
        self.add_components(
            self.splitter,
            self.image_processing_stack,
            self.text_processing_stack,
            self.concat_layer,
            self.main_lstm,
            self.time_rank_fold_before_lstm,
            self.time_rank_unfold_before_lstm,
            #self.debug_slicer
        )

    @staticmethod
    def build_image_processing_stack():
        """
        Builds the image processing pipeline for IMPALA and returns it.
        """
        raise NotImplementedError

    @staticmethod
    def build_text_processing_stack():
        """
        Helper function to build the text processing pipeline for both the large and small architectures, consisting of:
        - ReShape preprocessor to fold the incoming time rank into the batch rank.
        - StringToHashBucket Layer taking a batch of sentences and converting them to an indices-table of dimensions:
          cols=length of longest sentences in input
          rows=number of items in the batch
          The cols dimension could be interpreted as the time rank into a consecutive LSTM. The StringToHashBucket
          Component returns the sequence length of each batch item for exactly that purpose.
        - Embedding Lookup Layer of embedding size 20 and number of rows == num_hash_buckets (see previous layer).
        - LSTM processing the batched sequences of words coming from the embedding layer as batches of rows.
        """
        num_hash_buckets = 1000

        # Create a hash bucket from the sentences and use that bucket to do an embedding lookup (instead of
        # a vocabulary).
        string_to_hash_bucket = StringToHashBucket(
            num_hash_buckets=num_hash_buckets)
        embedding = EmbeddingLookup(embed_dim=20,
                                    vocab_size=num_hash_buckets,
                                    pad_empty=True)
        # The time rank for the LSTM is now the sequence of words in a sentence, NOT the original env time rank.
        # We will only use the last output of the LSTM-64 for further processing as that is the output after having
        # seen all words in the sentence.
        # The original env stepping time rank is currently folded into the batch rank and must be unfolded again before
        # passing it into the main LSTM.
        lstm64 = LSTMLayer(units=64, scope="lstm-64", time_major=False)

        tuple_splitter = ContainerSplitter(tuple_length=2,
                                           scope="tuple-splitter")

        def custom_apply(self, inputs):
            hash_bucket, lengths = self.sub_components[
                "string-to-hash-bucket"].apply(inputs)

            embedding_output = self.sub_components["embedding-lookup"].apply(
                hash_bucket)

            # Return only the last output (sentence of words, where we are not interested in intermediate results
            # where the LSTM has not seen the entire sentence yet).
            # Last output is the final internal h-state (slot 1 in the returned LSTM tuple; slot 0 is final c-state).
            lstm_output = self.sub_components["lstm-64"].apply(
                embedding_output, sequence_length=lengths)
            lstm_final_internals = lstm_output["last_internal_states"]

            # Need to split once more because the LSTM state is always a tuple of final c- and h-states.
            _, lstm_final_h_state = self.sub_components[
                "tuple-splitter"].split(lstm_final_internals)

            return lstm_final_h_state

        text_processing_stack = Stack(string_to_hash_bucket,
                                      embedding,
                                      lstm64,
                                      tuple_splitter,
                                      api_methods={("apply", custom_apply)},
                                      scope="text-stack")

        return text_processing_stack

    @rlgraph_api
    def apply(self, input_dict, internal_states=None):
        # Split the input dict coming directly from the Env.
        _, _, _, orig_previous_reward = self.splitter.split(input_dict)

        folded_input = self.time_rank_fold_before_lstm.apply(input_dict)
        image, text, previous_action, previous_reward = self.splitter.split(
            folded_input)

        # Get the left-stack (image) and right-stack (text) output (see [1] for details).
        text_processing_output = self.text_processing_stack.apply(text)
        image_processing_output = self.image_processing_stack.apply(image)

        # Concat everything together.
        concatenated_data = self.concat_layer.apply(image_processing_output,
                                                    text_processing_output,
                                                    previous_action,
                                                    previous_reward)

        unfolded_concatenated_data = self.time_rank_unfold_before_lstm.apply(
            concatenated_data, orig_previous_reward)

        # Feed concat'd input into main LSTM(256).
        lstm_output = self.main_lstm.apply(unfolded_concatenated_data,
                                           internal_states)

        return lstm_output
Esempio n. 6
0
    def __init__(self,
                 num_lstms,
                 units,
                 use_peepholes=False,
                 cell_clip=None,
                 static_loop=False,
                 forget_bias=1.0,
                 parallel_iterations=32,
                 return_sequences=True,
                 swap_memory=False,
                 skip_connections=None,
                 **kwargs):
        """
        Args:
            num_lstms (int): The number of LSTMs to stack deep.
            units (Union[List[int],int]): The number of units in the different LSTMLayers' cells.
            use_peepholes (Union[List[bool],bool]): True to enable diagonal/peephole connections from the c-state into
                each of the layers. Default: False.
            cell_clip (Optional[Union[List[float],float]]): If provided, the cell state is clipped by this value prior
                to the cell output activation. Default: None.
            static_loop (Union[bool,int]): If an int, will perform a static RNN loop (with fixed sequence lengths
                of size `static_loop`) instead of a dynamic one (where the lengths for each input can be different).
                In this case, time_major must be set to True (as transposing for this case has not been automated yet).
                Default: False.
            forget_bias (float): The forget gate bias to use. Default: 1.0.
            parallel_iterations (int): The number of iterations to run in parallel.
                Default: 32.
            return_sequences (bool): Whether to return one output for each input or only the last output.
                Default: True.
            swap_memory (bool): Transparently swap the tensors produced in forward inference but needed for back
                prop from GPU to CPU. This allows training RNNs which would typically not fit on a single GPU,
                with very minimal (or no) performance penalty.
                Default: False.
            skip_connections (Optional[List[List[bool]]]): An optional list of lists (2D) of bools indicating the skip
                connections for the input as well as outputs of each layer and whether these should be concatenated
                with the "regular" input for each layer. "Regular" here means the output from the previous layer.
                Example:
                A 4-layer LSTM:
                skip_connections=[
                    #   x    out0   out1   out2   out3    <- outputs (or x)
                                                          # layer 0 (never specified, only takes x as input)
                    [ True,  True, False, False, False],  # layer 1
                    True (for all outputs)                # layer 2
                    [ False, False, False, True, False],  # layer 3
                    ...
                ]
                0) Layer0 does not need to be specified (only takes x, obviously).
                1) Layer1 takes x concatenated with the output of layer0.
                2) Layer2 takes x and both out0 and out1, all concatenated.
                3) Layer3 takes only out2 as input.
                4) A missing sub-list in the main `skip_connections` list means that this layer only takes the previous
                    layer's output (no further skip connections for that layer).
        """
        super(MultiLSTMLayer, self).__init__(
            graph_fn_num_outputs=dict(
                _graph_fn_apply=2
            ),  # LSTMs: unrolled output, final c_state, final h_state
            scope=kwargs.pop("scope", "multi-lstm-layer"),
            activation=kwargs.pop("activation", "tanh"),
            **kwargs)

        self.num_lstms = num_lstms
        assert self.num_lstms > 1, "ERROR: Must have more than 1 LSTM layer for MultiLSTMLayer Component!"
        self.units = units
        self.use_peepholes = use_peepholes
        self.cell_clip = cell_clip
        self.static_loop = static_loop
        assert self.static_loop is False or (self.static_loop > 0 and self.static_loop is not True), \
            "ERROR: `static_loop` in LSTMLayer must either be False or an int value (is {})!".format(self.static_loop)
        self.forget_bias = forget_bias

        self.parallel_iterations = parallel_iterations
        self.return_sequences = return_sequences
        self.swap_memory = swap_memory
        self.skip_connections = skip_connections or [[]
                                                     for _ in range(num_lstms +
                                                                    1)]

        self.in_space = None

        # tf RNNCell
        # torch lstm and hidden state placeholder
        self.lstms = []
        # The concat layers to concat together the different skip_connection outputs.
        self.concat_layers = []
        self.hidden_states = None

        for i in range(self.num_lstms):
            # Per layer or global settings?
            units = self.units[i] if isinstance(self.units,
                                                (list, tuple)) else self.units
            use_peepholes = self.use_peepholes[i] if isinstance(self.use_peepholes, (list, tuple)) else \
                self.use_peepholes
            cell_clip = self.cell_clip[i] if isinstance(
                self.cell_clip, (list, tuple)) else self.cell_clip
            forget_bias = self.forget_bias[i] if isinstance(
                self.forget_bias, (list, tuple)) else self.forget_bias
            activation = self.activation[i] if isinstance(
                self.activation, (list, tuple)) else self.activation

            # Generate the single layers.
            self.lstms.append(
                LSTMLayer(
                    units=units,
                    use_peepholes=use_peepholes,
                    cell_clip=cell_clip,
                    static_loop=self.static_loop,
                    parallel_iterations=self.parallel_iterations,
                    forget_bias=forget_bias,
                    # Always return sequences except for last layer (there, return whatever the user wants).
                    return_sequences=True
                    if i < self.num_lstms - 1 else self.return_sequences,
                    scope="lstm-layer-{}".format(i),
                    swap_memory=self.swap_memory,
                    activation=activation))
            self.concat_layers.append(
                ConcatLayer(scope="concat-layer-{}".format(i)))

        self.add_components(*self.lstms)
        self.add_components(*self.concat_layers)
    def __init__(
            self, action_space, world_option_model_network, encoder_network, num_features, num_mixtures, beta=0.2,
            post_phi_concat_network=None,
            reward_clipping=1.0,
            intrinsic_rewards_weight=0.1,
            concat_with_command_vector=False,
            optimizer=None, deterministic=False, scope="intrinsic-curiosity-world-option-model",
            **kwargs
    ):
        """
        Args:
            action_space (Space): The action Space to be fed into the model together with the latent feature vector
                for the states. Will be flattened automatically and then concatenated by this component.

            world_option_model_network (Union[NeuralNetwork,dict]): A specification dict (or NN object directly) to
                construct the world-option-model's neural network.

            encoder_network (Union[NeuralNetwork,dict]): A specification dict (or NN object directly) to
                construct the inverse dynamics model's encoder network leading from s to phi (feature vector).

            num_features (int): The size of the feature vectors phi.

            num_mixtures (int): The number of mixture Normals to use for the next-state distribution output.

            beta (float): The weight for the phi' loss (action loss is then 1.0 - beta).

            post_phi_concat_network

            reward_clipping (float): 0.0 for no clipping, some other value for +/- reward value clipping.
                Default: 1.0.

            concat_with_command_vector (bool): If True, this model needs an additional command vector (coming from the
                policy above) to concat it together with the latent state vector.

            optimizer (Optional[Optimizer]): The optimizer to use for supervised learning of the two networks
                (ICM and WOM).
        """
        self.num_features = num_features
        self.num_mixtures = num_mixtures
        self.deterministic = deterministic
        self.beta = beta
        assert 0.0 < self.beta < 1.0, "ERROR: `beta` must be between 0 and 1!"
        self.reward_clipping = reward_clipping
        self.intrinsic_rewards_weight = intrinsic_rewards_weight

        # Create the encoder network inside a SupervisedPredictor (so we get the adapter + distribution with it).
        self.state_encoder = SupervisedPredictor(
            network_spec=encoder_network, output_space=FloatBox(shape=(num_features,), add_batch_rank=True),
            scope="state-encoder"
        )

        # Create the container loss function for the two prediction tasks:
        # a) Action prediction and b) next-state prediction, each of them using a simple neg log likelihood loss
        # comparing the actual action and s' with their log-likelihood value vs the respective distributions.
        self.loss_functions = dict(
            # Action prediction loss (neg log likelihood of observed action vs the parameterized distribution).
            predicted_actions=NegativeLogLikelihoodLoss(
                distribution_spec=get_default_distribution_from_space(action_space),
                scope="action-loss"
            ),
            # s' prediction loss (neg log likelihood of observed s' vs the parameterized mixed normal distribution).
            predicted_phi_=NegativeLogLikelihoodLoss(distribution_spec=dict(type="mixture", _args=[
                "multi-variate-normal" for _ in range(num_mixtures)
            ]), scope="phi-loss")
        )

        # TODO: Support for command vector concatenation.
        #self.concat_with_command_vector = concat_with_command_vector

        # Define the Model's network's custom call method.
        def custom_call(self, inputs):
            phi = inputs["phi"]
            actions = inputs["actions"]
            phi_ = inputs["phi_"]
            actions_flat = self.get_sub_component_by_name("action-flattener").call(actions)
            concat_phis = self.get_sub_component_by_name("concat-phis").call(phi, phi_)
            # Predict the action that lead from s to s'.
            predicted_actions = self.get_sub_component_by_name("post-phi-concat-nn").call(concat_phis)

            # Concat phi with flattened actions.
            phi_and_actions = self.get_sub_component_by_name("concat-states-and-actions").call(
                phi, actions_flat
            )
            # Add stop-gradient to phi here before predicting phi'
            # (the phis should only be trained by the inverse dynamics model, not by the world option model).
            # NOT DONE IN ORIGINAL PAPER's CODE AND ALSO NOT IN MLAGENTS EQUIVALENT.
            # phi_and_actions = self.get_sub_component_by_name("stop-gradient").stop(phi_and_actions)
            # Predict phi' (through a mixture gaussian distribution).
            predicted_phi_ = self.get_sub_component_by_name("wom-nn").call(phi_and_actions)

            return dict(
                # Predictions (actions and next-state-features (mixture distribution)).
                predicted_actions=predicted_actions,
                predicted_phi_=predicted_phi_
                ## Also return the two feature vectors for s and s'.
                #phi=phi, phi_=phi_
            )

        # Create the SupervisedPredictor's neural network.
        predictor_network = NeuralNetwork(
            # The world option model network taking action-cat-phi and mapping them to the predicted phi'.
            NeuralNetwork.from_spec(world_option_model_network, scope="wom-nn"),
            # The concat component concatenating both latent state vectors (phi and phi').
            ConcatLayer(scope="concat-phis"),
            # The NN mapping from phi-cat-phi' to the action prediction.
            NeuralNetwork.from_spec(post_phi_concat_network, scope="post-phi-concat-nn"),
            # The ReShape component for flattening all actions in arbitrary action spaces.
            ReShape(flatten=True, flatten_categories=True, flatten_containers=True, scope="action-flattener"),
            # The concat component concatenating latent state feature vector and incoming (flattened) actions.
            ConcatLayer(scope="concat-states-and-actions"),
            # Set the `call` method.
            api_methods={("call", custom_call)}
        )

        if optimizer is None:
            optimizer = dict(type="adam", learning_rate=3e-4)

        super(IntrinsicCuriosityWorldOptionModel, self).__init__(
            predictor=dict(
                network_spec=predictor_network,
                output_space=Dict({
                    "predicted_actions": action_space,
                    "predicted_phi_": FloatBox(shape=(self.num_features,))
                }, add_batch_rank=action_space.has_batch_rank, add_time_rank=action_space.has_time_rank),
                distribution_adapter_spec=dict(
                    # for `predicted_actions`: use default adapter
                    # for predicted_phi': use normal-mixture adapter & distribution.
                    predicted_phi_={"type": "normal-mixture-adapter", "num_mixtures": num_mixtures}
                ),
                deterministic=deterministic
            ),
            loss_function=self.loss_functions["predicted_actions"],
            optimizer=optimizer, scope=scope, **kwargs
        )

        self.add_components(self.state_encoder, self.loss_functions["predicted_phi_"])
class MultiInputStreamNeuralNetwork(NeuralNetwork):
    """
    A NeuralNetwork that takes n separate input-streams and feeds each of them separately through a different NN.
    The final outputs of these NNs are then all concatenated and fed further through an (optional) post-network.
    """
    def __init__(self, input_network_specs, post_network_spec=None, **kwargs):
        """
        Args:
            input_network_specs (Union[Dict[str,dict],Tuple[dict]]): A specification dict or tuple with values being
                the spec dicts for the single streams. The `call` method expects a dict input or a single tuple input
                (not as *args) in its first parameter.

            post_network_spec (Optional[]): The specification dict of the post-concat network or the post-concat
                network object itself.
        """
        super(MultiInputStreamNeuralNetwork,
              self).__init__(scope="multi-input-stream-nn", **kwargs)

        # Create all streams' networks.
        if isinstance(input_network_specs, dict):
            self.input_stream_nns = {}
            for i, (flat_key, nn_spec) in enumerate(
                    flatten_op(input_network_specs).items()):
                self.input_stream_nns[flat_key] = NeuralNetwork.from_spec(
                    nn_spec, scope="input-stream-nn-{}".format(i))
            # Create the concat layer to merge all streams.
            self.concat_layer = ConcatLayer(dict_keys=list(
                self.input_stream_nns.keys()),
                                            axis=-1)
        else:
            assert isinstance(input_network_specs, (list, tuple)),\
                "ERROR: `input_network_specs` must be dict or tuple/list!"
            self.input_stream_nns = []
            for i, nn_spec in enumerate(input_network_specs):
                self.input_stream_nns.append(
                    NeuralNetwork.from_spec(
                        nn_spec, scope="input-stream-nn-{}".format(i)))
            # Create the concat layer to merge all streams.
            self.concat_layer = ConcatLayer(axis=-1)

        # Create the post-network (after the concat).
        self.post_nn = NeuralNetwork.from_spec(
            post_network_spec, scope="post-concat-nn")  # type: NeuralNetwork

        # Add all sub-Components.
        self.add_components(
            self.post_nn, self.concat_layer,
            *list(self.input_stream_nns.values() if isinstance(
                input_network_specs, dict) else self.input_stream_nns))

    @rlgraph_api
    def call(self, inputs):
        """
        Feeds all inputs through the sub networks' apply methods and concats their outputs and sends that
        concat'd output through the post-network.
        """
        # Feed all inputs through their respective NNs.
        if isinstance(self.input_stream_nns, dict):
            outputs = {}
            # TODO: Support last-timestep returning LSTMs in input-stream-networks.
            for input_stream_flat_key, input_stream_nn in self.input_stream_nns.items(
            ):
                outputs[input_stream_flat_key] = input_stream_nn.call(
                    inputs[input_stream_flat_key])
            # Concat everything.
            concat_output = self.concat_layer.call(outputs)
        else:
            outputs = []
            # TODO: Support last-timestep returning LSTMs in input-stream-networks.
            for i, input_stream_nn in enumerate(self.input_stream_nns):
                outputs.append(input_stream_nn.call(inputs[i]))
            # Concat everything.
            concat_output = self.concat_layer.call(*outputs)

        # Send everything through post-network.
        post_nn_out = self.post_nn.call(concat_output)

        return post_nn_out

    def add_layer(self, layer_component):
        """
        Overwrite this by adding any new layer to the post-network (most obvious behavior).
        """
        return self.post_nn.add_layer(layer_component)