Ejemplo n.º 1
0
    def __init__(self, config: Config, dataset):
        """Example for convolutional network with convLSTM and convolutional output layer; Plots cell states, hidden
        states, X, y_, and a argmax over the convLSTM units outputs;
        
        Command-line usage:
        >>> python3 samples/main_convlstm.py --config=samples/config_convlstm.json
        
        Example input shapes: [n_samples, n_sequence_positions, x_dim, y_dim, n_features]
        Example output shapes: [n_samples, 1, x_dim, y_dim, n_features] (with return_states=True),
        [n_samples, 1, n_features] (with return_states=False)
        """
        #
        # Some convenience objects
        #
        # We will use a list to store all layers for regularization etc. (this is optional)
        layers = []
        # We will use xavier initialization later
        conv_W_initializer = weight_xavier_conv2d
        
        #
        # Create placeholders for feeding an input frame and a label at the each sequence position
        #
        n_seq_pos = dataset.X_shape[1]  # dataset.X_shape is [sample, seq_pos, x, y, features)
        X = tf.placeholder(tf.float32, shape=dataset.X_shape)
        y_ = tf.placeholder(tf.float32, shape=dataset.y_shape)  # dataset.y_shape is [sample, seq_pos, features)
        
        # ----------------------------------------------------------------------------------------------------------
        # Define network architecture
        # ----------------------------------------------------------------------------------------------------------
        
        #
        # Initialize input to network of shape [sample, 1, x, y, features] with zero tensor of size of a frame
        #
        input_shape = dataset.X_shape[:1] + (1,) + dataset.X_shape[2:]
        rnn_input_layer = RNNInputLayer(tf.zeros(input_shape, dtype=tf.float32))
        layers.append(rnn_input_layer)
        
        #
        # ConvLSTM Layer
        #
        n_lstm = config.n_lstm  # number of output feature channels
        lstm_x_fwd = config.kernel_lstm_fwd  # x/y size of kernel for forward connections
        lstm_y_fwd = config.kernel_lstm_fwd  # x/y size of kernel for forward connections
        lstm_x_bwd = config.kernel_lstm_bwd  # x/y size of kernel for recurrent connections
        lstm_y_bwd = config.kernel_lstm_bwd  # x/y size of kernel for recurrent connections
        lstm_input_channels_fwd = rnn_input_layer.get_output_shape()[-1]  # number of input channels
        if config.reduced_rec_lstm:
            lstm_input_channels_bwd = config.reduced_rec_lstm  # number of recurrent connections (after squashing)
        else:
            lstm_input_channels_bwd = n_lstm  # number of recurrent connections
        
        # Here we create our kernels and biases for the convLSTM; See ConvLSTMLayer() documentation for more info;
        lstm_init = dict(W_ci=[conv_W_initializer([lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm]),
                               conv_W_initializer([lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm])],
                         W_ig=[conv_W_initializer([lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm]),
                               conv_W_initializer([lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm])],
                         W_og=[conv_W_initializer([lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm]),
                               conv_W_initializer([lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm])],
                         W_fg=[conv_W_initializer([lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm]),
                               conv_W_initializer([lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm])],
                         b_ci=constant([n_lstm]),
                         b_ig=constant([n_lstm]),
                         b_og=constant([n_lstm]),
                         b_fg=constant([n_lstm], 1))
        
        print("\tConvLSTM...")
        conv_lstm = ConvLSTMLayer(incoming=rnn_input_layer, n_units=n_lstm, **lstm_init,
                                  a_out=get_rec_attr(tf, config.lstm_act),
                                  forgetgate=getattr(config, 'forgetgate', True), store_states=config.store_states,
                                  return_states=False, precomp_fwds=False, tickerstep_biases=tf.zeros)
        layers.append(conv_lstm)
        
        #
        # Optional feature squashing of recurrent convLSTM connections
        #  We can use an additional convolutional layer to squash the number of LSTM output features and use its output
        #  to replace the convLSTM recurrent connections
        #
        if config.reduced_rec_lstm:
            print("\tFeatureSquashing...")
            # Define a weight kernel and create the convolutional layer for squashing
            kernel = conv_W_initializer([config.kernel_conv_out, config.kernel_conv_out,
                                         conv_lstm.get_output_shape()[-1], config.reduced_rec_lstm])
            squashed_recurrences = ConvLayer(incoming=conv_lstm, W=kernel, padding='SAME',
                                             name='ConvLayerFeatureSquashing', a=tf.nn.elu)
            layers.append(squashed_recurrences)
            
            print("\tConvLSTMRecurrence...")
            # Overwrite the existing ConvLSTM recurrences with the output of the feature squashing layer
            conv_lstm.add_external_recurrence(squashed_recurrences)
        
        #
        # Conventional ConvLayer after convLSTM as output layer (tf.identitiy output function because of cross-entropy)
        #
        print("\tConvLayer...")
        output_layer = ConvLayer(incoming=conv_lstm,
                                 W=conv_W_initializer([config.kernel_conv, config.kernel_conv,
                                                       conv_lstm.get_output_shape()[-1], dataset.y_shape[-1]]),
                                 padding='SAME', name='ConvLayerSemanticSegmentation', a=tf.identity)
        layers.append(output_layer)
        
        # ----------------------------------------------------------------------------------------------------------
        #  Create graph through sequence positions and ticker steps
        # ----------------------------------------------------------------------------------------------------------
        
        #
        # Loop through sequence positions
        #
        print("\tRNN Loop...")
        for seq_pos in range(n_seq_pos):
            with tf.name_scope("Sequence_pos_{}".format(seq_pos)):
                print("\t  seq. pos. {}...".format(seq_pos))
                # Set rnn input layer to current frame
                rnn_input_layer.update(X[:, seq_pos:seq_pos + 1, :])
                
                # Calculate new network state at new frame (this updates the network's hidden activations, cell states,
                # and dependencies automatically)
                _ = output_layer.get_output()
        
        #
        # Loop through tickersteps
        #
        # Use last frame as input during ticker steps
        tickerstep_input = X[:, -1:, :]
        
        for tickerstep in range(config.tickersteps):
            with tf.name_scope("Tickerstep_{}".format(tickerstep)):
                print("\t  tickerstep {}...".format(tickerstep))
                
                # Set rnn input layer to tickerstep input
                rnn_input_layer.update(tickerstep_input)

                # Calculate new network state at new frame and activate tickerstep biases
                output = output_layer.get_output(tickerstep_nodes=True)
                
        print("\tDone!")
        
        #
        # Publish
        #
        self.X = X
        self.y_ = y_
        self.output = output
        # We will use this list of layers for regularization in the main file
        self.__layers = layers
        # We will plot some parts of the convLSTM, so we will store it and set up some plotting functions
        self.__lstm_layer = conv_lstm
        self.__plot_dict, self.__plot_range_dict, self.__plotsink = self.__setup_plotting()
Ejemplo n.º 2
0
    def __init__(self, config: Config, dataset):
        """Architecture with LSTM layer followed by dense output layer; Inputs are fed to LSTM layer sequence position
        by sequence position in a for-loop; this is the most flexible design, as showed e.g. in ArchitectureLSTM3;
        
        Command-line usage:
        Change entry
        "architecture": "sample_architectures.ArchitectureLSTM"
        to
        "architecture": "sample_architectures.ArchitectureLSTMFlexible" in samples/config_lstm.json. Then run
        >>> python3 samples/main_lstm.py --config=samples/config_lstm.json
        
        Example input shapes: [n_samples, n_sequence_positions, n_features]
        Example output shapes: [n_samples, n_sequence_positions, n_features] (with return_states=True),
        [n_samples, 1, n_features] (with return_states=False)
        """
        #
        # Some convenience objects
        #
        # We will use a list to store all layers for regularization etc. (this is optional)
        layers = []
        # Prepare xavier initialization for weights
        w_init = tf.contrib.layers.xavier_initializer(uniform=False, seed=None, dtype=tf.float32)

        #
        # Create placeholders for input data (shape: [n_samples, n_sequence_positions, n_features])
        #
        X = tf.placeholder(tf.float32, shape=dataset.X_shape)
        y_ = tf.placeholder(tf.float32, shape=dataset.y_shape)
        n_output_units = dataset.y_shape[-1]  # nr of output features is number of classes
        n_seq_pos = dataset.X_shape[1]  # dataset.X_shape is [sample, seq_pos, features]
        
        # ----------------------------------------------------------------------------------------------------------
        # Define network architecture
        # ----------------------------------------------------------------------------------------------------------
        
        #
        # Input Layer
        #  RNNInputLayer will hold the input to network at each sequence position. We will initalize it with zeros-
        #  tensor of shape [sample, 1, features]
        #
        input_shape = dataset.X_shape[:1] + (1,) + dataset.X_shape[2:]
        rnn_input_layer = RNNInputLayer(tf.zeros(input_shape, dtype=tf.float32))
        layers.append(rnn_input_layer)
        
        #
        # LSTM Layer
        #
        print("\tLSTM...")
        lstm_layer = LSTMLayer(incoming=rnn_input_layer, n_units=config.n_lstm, name='LSTM',
                               W_ci=w_init, W_ig=w_init, W_og=w_init, W_fg=w_init,
                               b_ci=tf.zeros, b_ig=tf.zeros, b_og=tf.zeros, b_fg=tf.zeros,
                               a_ci=tf.tanh, a_ig=tf.sigmoid, a_og=tf.sigmoid, a_fg=tf.sigmoid, a_out=tf.nn.elu,
                               c_init=tf.zeros, h_init=tf.zeros, forgetgate=True, precomp_fwds=True, return_states=True)
        layers.append(lstm_layer)

        #
        # Output Layer
        #
        print("\tOutput layer...")
        output_layer = DenseLayer(incoming=lstm_layer, n_units=n_output_units, name='DenseLayerOut',
                                  W=w_init, b=tf.zeros, a=tf.sigmoid)
        layers.append(output_layer)
        
        # ----------------------------------------------------------------------------------------------------------
        # Loop through sequence positions and create graph
        # ----------------------------------------------------------------------------------------------------------
        
        #
        # Loop through sequence positions
        #
        print("\tRNN Loop...")
        for seq_pos in range(n_seq_pos):
            with tf.name_scope("Sequence_pos_{}".format(seq_pos)):
                print("\t  seq. pos. {}...".format(seq_pos))
                
                # Set rnn input layer to input at current sequence position
                rnn_input_layer.update(X[:, seq_pos:seq_pos + 1, :])

                # Calculate new network state at new frame (this updates the network's hidden activations, cell states,
                # and dependencies automatically)
                _ = lstm_layer.get_output()
        
        #
        # Loop through tickersteps
        #
        # Use zero input during ticker steps
        tickerstep_input = tf.zeros(dataset.X_shape[:1] + (1,) + dataset.X_shape[2:], dtype=tf.float32,
                                    name="tickerstep_input")
        
        for tickerstep in range(config.tickersteps):
            with tf.name_scope("Tickerstep_{}".format(tickerstep)):
                print("\t  tickerstep {}...".format(tickerstep))

                # Set rnn input layer to tickerstep input
                rnn_input_layer.update(tickerstep_input)

                # Calculate new network state at new frame (this updates the network's hidden activations, cell states,
                # and dependencies automatically)
                _ = lstm_layer.get_output(tickerstep_nodes=True)
        
        #
        # Calculate output but consider that the lstm_layer is already computed (i.e. do not modify cell states any
        # further)
        #
        output = output_layer.get_output(prev_layers=[lstm_layer])
        
        print("\tDone!")
        
        #
        # Publish
        #
        self.X = X
        self.y_ = y_
        self.output = output
        # Store layers in list for regularization in main file
        self.__layers = layers
Ejemplo n.º 3
0
    def __init__(self, config: Config, dataset):
        """Architecture with LSTM layer followed by 2 dense layers and a dense output layer; The outputs of the 2 dense
        layers are used as additional recurrent connections for the LSTM; Inputs are fed to LSTM layer sequence
        position by sequence position in RNN loop; This is an advanced example, see ArchitectureLSTM to get started;
        
        Command-line usage:
        >>> python3 samples/main_lstm.py --config=samples/config_lstm3.json
        
        Example input shapes: [n_samples, n_sequence_positions, n_features]
        Example output shapes: [n_samples, n_sequence_positions, n_features] (with return_states=True),
        [n_samples, 1, n_features] (with return_states=False)
        """
        #
        # Some convenience objects
        #
        # We will use a list to store all layers for regularization etc. (this is optional)
        layers = []
        # Prepare xavier initialization for weights
        w_init = tf.contrib.layers.xavier_initializer(uniform=False, seed=None, dtype=tf.float32)

        #
        # Create placeholders for input data (shape: [n_samples, n_sequence_positions, n_features])
        #
        X = tf.placeholder(tf.float32, shape=dataset.X_shape)
        y_ = tf.placeholder(tf.float32, shape=dataset.y_shape)
        n_output_units = dataset.y_shape[-1]  # nr of output features is number of classes
        n_seq_pos = dataset.X_shape[1]  # dataset.X_shape is [sample, seq_pos, features]
        
        # ----------------------------------------------------------------------------------------------------------
        # Define network architecture
        # ----------------------------------------------------------------------------------------------------------
        
        #
        # Input Layer
        #  RNNInputLayer will hold the input to network at each sequence position. We will initalize it with zeros-
        #  tensor of shape [sample, 1, features]
        #
        input_shape = dataset.X_shape[:1] + (1,) + dataset.X_shape[2:]
        rnn_input_layer = RNNInputLayer(tf.zeros(input_shape, dtype=tf.float32))
        layers.append(rnn_input_layer)
        
        #
        # LSTM Layer
        #
        print("\tLSTM...")
        # We want to modify the number of recurrent connections -> we have to specify the shape of the recurrent weights
        rec_w_shape = (sum(config.n_dense_units) + config.n_lstm, config.n_lstm)
        # The forward weights can be initialized automatically, for the recurrent ones we will use our rec_w_shape
        lstm_w = [w_init, w_init(rec_w_shape)]
        lstm_layer = LSTMLayer(incoming=rnn_input_layer, n_units=config.n_lstm, name='LSTM',
                               W_ci=lstm_w, W_ig=lstm_w, W_og=lstm_w, W_fg=lstm_w,
                               b_ci=tf.zeros, b_ig=tf.zeros, b_og=tf.zeros, b_fg=tf.zeros,
                               a_ci=tf.tanh, a_ig=tf.sigmoid, a_og=tf.sigmoid, a_fg=tf.sigmoid, a_out=tf.nn.elu,
                               c_init=tf.zeros, h_init=tf.zeros, forgetgate=True, precomp_fwds=True, return_states=True)
        layers.append(lstm_layer)
        
        #
        # Dense Layers
        #
        print("\tDense layers...")
        dense_layers = list()
        for n_units in config.n_dense_units:
            dense_layers.append(DenseLayer(incoming=layers[-1], n_units=n_units, name='DenseLayer', W=w_init,
                                           b=tf.zeros, a=tf.nn.elu))
            layers.append(layers[-1])
        
        #
        # Use dense layers as additional recurrent input to LSTM
        #
        full_lstm_input = ConcatLayer([lstm_layer] + dense_layers, name='LSTMRecurrence')
        lstm_layer.add_external_recurrence(full_lstm_input)
        
        #
        # Output Layer
        #
        print("\tOutput layer...")
        output_layer = DenseLayer(incoming=dense_layers[-1], n_units=n_output_units, name='DenseLayerOut', W=w_init,
                                  b=tf.zeros, a=tf.sigmoid)
        layers.append(output_layer)
        
        # ----------------------------------------------------------------------------------------------------------
        # Loop through sequence positions and create graph
        # ----------------------------------------------------------------------------------------------------------
        
        #
        # Loop through sequence positions
        #
        print("\tRNN Loop...")
        for seq_pos in range(n_seq_pos):
            with tf.name_scope("Sequence_pos_{}".format(seq_pos)):
                print("\t  seq. pos. {}...".format(seq_pos))
                # Set rnn input layer to input at current sequence position
                layers[0].update(X[:, seq_pos:seq_pos + 1, :])
                
                # Calculate new lstm state (this automatically computes all dependencies, including rec. connections)
                _ = lstm_layer.get_output()
        
        #
        # Loop through tickersteps
        #
        # Use zeros as input during ticker steps
        tickerstep_input = tf.zeros(dataset.X_shape[:1] + (1,) + dataset.X_shape[2:], dtype=tf.float32,
                                    name="tickerstep_input")
        
        for tickerstep in range(config.tickersteps):
            with tf.name_scope("Tickerstep_{}".format(tickerstep)):
                print("\t  tickerstep {}...".format(tickerstep))
                # Set rnn input layer to input at current sequence position
                layers[0].update(tickerstep_input)

                # Calculate new lstm state (this automatically computes all dependencies, including rec. connections)
                _ = lstm_layer.get_output(tickerstep_nodes=True)
        
        #
        # Calculate output but consider that the lstm_layer is already computed
        #
        output = output_layer.get_output(prev_layers=[lstm_layer])
        
        print("\tDone!")
        
        #
        # Publish
        #
        self.X = X
        self.y_ = y_
        self.output = output
        # Store layers in list for regularization in main file
        self.__layers = layers
Ejemplo n.º 4
0
    def __init__(self, config: Config, dataset):
        """Example for convolutional network with convLSTM and convolutional output layer; Plots cell states, hidden
        states, X, y_, and a argmax over the convLSTM units outputs;
        
        Command-line usage:
        >>> python3 samples/main_convlstm_mnist.py --config=samples/config_convlstm_mnist.json
        """
        import TeLL

        #
        # Some convenience objects
        #
        # We will use a list to store all layers for regularization etc. (this is optional)
        layers = []

        depth = config.get_value("enc_dec_depth", 2)
        basenr_convs = config.get_value("enc_dec_conv_maps_base", 16)
        init_name = config.get_value("conv_W_initializer",
                                     "weight_xavier_conv2d")
        conv_W_initializer = getattr(TeLL.initializations, init_name)

        #
        # Create placeholders for feeding an input frame and a label at each sequence position
        #
        X_shape = dataset.mb_info['X'][
            0]  # dataset.X_shape is [sample, seq_pos, x, y, features)
        y_shape = dataset.mb_info['y'][0]
        frame_input_shape = X_shape[:1] + (1, ) + X_shape[2:]
        n_classes = 11
        frame_output_shape = y_shape[:1] + (1, ) + y_shape[2:] + (n_classes, )
        n_seq_pos = X_shape[1]
        X = tf.placeholder(tf.float32, shape=X_shape)
        y_ = tf.placeholder(
            tf.int32,
            shape=y_shape)  # dataset.y_shape is [sample, seq_pos, features)

        # ----------------------------------------------------------------------------------------------------------
        # Define network architecture
        # ----------------------------------------------------------------------------------------------------------

        #
        # Initialize input to network of shape [sample, 1, x, y, features] with zero tensor of size of a frame
        #
        rnn_input_layer = RNNInputLayer(
            tf.zeros(frame_input_shape, dtype=tf.float32))
        layers.append(rnn_input_layer)

        #
        # Encoder and maxpooling layers
        #
        encoders = list()
        for d in range(1, depth + 1):
            print("\tConvLayerEncoder{}...".format(d))
            layers.append(
                ConvLayer(incoming=layers[-1],
                          W=conv_W_initializer([
                              config.kernel_conv, config.kernel_conv,
                              layers[-1].get_output_shape()[-1],
                              basenr_convs * (2**d)
                          ]),
                          padding='SAME',
                          name='ConvLayerEncoder{}'.format(d),
                          a=tf.nn.elu))
            encoders.append(layers[-1])
            print("\tMaxpoolingLayer{}...".format(d))
            layers.append(
                MaxPoolingLayer(incoming=layers[-1],
                                ksize=(1, 3, 3, 1),
                                strides=(1, 2, 2, 1),
                                padding='SAME',
                                name='MaxpoolingLayer{}'.format(d)))

        #
        # ConvLSTM Layer
        #
        if config.n_lstm:
            n_lstm = config.n_lstm
            lstm_x_fwd = config.kernel_lstm_fwd
            lstm_y_fwd = config.kernel_lstm_fwd
            lstm_x_bwd = config.kernel_lstm_bwd
            lstm_y_bwd = config.kernel_lstm_bwd

            lstm_input_channels_fwd = layers[-1].get_output_shape()[-1]
            if config.reduced_rec_lstm:
                lstm_input_channels_bwd = config.reduced_rec_lstm
            else:
                lstm_input_channels_bwd = n_lstm

            lstm_init = dict(W_ci=[
                conv_W_initializer(
                    [lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm]),
                conv_W_initializer(
                    [lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm])
            ],
                             W_ig=[
                                 conv_W_initializer([
                                     lstm_x_fwd, lstm_y_fwd,
                                     lstm_input_channels_fwd, n_lstm
                                 ]),
                                 conv_W_initializer([
                                     lstm_x_bwd, lstm_y_bwd,
                                     lstm_input_channels_bwd, n_lstm
                                 ])
                             ],
                             W_og=[
                                 conv_W_initializer([
                                     lstm_x_fwd, lstm_y_fwd,
                                     lstm_input_channels_fwd, n_lstm
                                 ]),
                                 conv_W_initializer([
                                     lstm_x_bwd, lstm_y_bwd,
                                     lstm_input_channels_bwd, n_lstm
                                 ])
                             ],
                             W_fg=[
                                 conv_W_initializer([
                                     lstm_x_fwd, lstm_y_fwd,
                                     lstm_input_channels_fwd, n_lstm
                                 ]),
                                 conv_W_initializer([
                                     lstm_x_bwd, lstm_y_bwd,
                                     lstm_input_channels_bwd, n_lstm
                                 ])
                             ],
                             b_ci=constant([n_lstm]),
                             b_ig=constant([n_lstm]),
                             b_og=constant([n_lstm]),
                             b_fg=constant([n_lstm], 1))

            print("\tConvLSTM...")
            layers.append(
                ConvLSTMLayer(incoming=layers[-1],
                              n_units=n_lstm,
                              **lstm_init,
                              a_out=get_rec_attr(tf, config.lstm_act),
                              forgetgate=config.forgetgate,
                              store_states=config.store_states,
                              tickerstep_biases=tf.zeros,
                              output_dropout=config.lstm_output_dropout,
                              precomp_fwds=False))
            lstm_layer = layers[-1]

            #
            # Optional feature squashing
            #
            ext_lstm_recurrence = None

            if config.reduced_rec_lstm:
                print("\tFeatureSquashing...")
                layers.append(
                    ConvLayer(incoming=layers[-1],
                              W=conv_W_initializer([
                                  config.kernel_conv_out,
                                  config.kernel_conv_out,
                                  layers[-1].get_output_shape()[-1],
                                  config.reduced_rec_lstm
                              ]),
                              padding='SAME',
                              name='ConvLayerFeatureSquashing',
                              a=tf.nn.elu))
                print("\tConvLSTMRecurrence...")
                ext_lstm_recurrence = layers[-1]

            if ext_lstm_recurrence is not None:
                lstm_layer.add_external_recurrence(ext_lstm_recurrence)
        else:
            print("\tSubstituteConvLayer...")
            layers.append(
                ConvLayer(incoming=layers[-1],
                          W=conv_W_initializer([
                              config.kernel_conv, config.kernel_conv,
                              layers[-1].get_output_shape()[-1],
                              int(basenr_convs * (2**depth) * 4.5)
                          ]),
                          padding='SAME',
                          name='SubstituteConvLayer',
                          a=tf.nn.elu))
            lstm_layer = layers[-1]

        #
        # ConvLayer for semantic segmentation
        #
        print("\tConvLayerSemanticSegmentation...")
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer([
                          config.kernel_conv_out, config.kernel_conv_out,
                          layers[-1].get_output_shape()[-1], n_classes
                      ]),
                      padding='SAME',
                      name='ConvLayerSemanticSegmentation',
                      a=tf.identity))

        #
        # Upscaling layer
        #
        print("\tUpscalingLayer...")
        layers[-1] = ScalingLayer(incoming=layers[-1],
                                  size=frame_output_shape[-3:-1],
                                  name='UpscalingLayergLayer')

        output_layer = layers[-1]

        # ----------------------------------------------------------------------------------------------------------
        #  Create graph through sequence positions and ticker steps
        # ----------------------------------------------------------------------------------------------------------
        outputs_all_timesteps = []

        #
        # Loop through sequence positions
        #
        print("\tRNN Loop...")
        for seq_pos in range(n_seq_pos):
            with tf.name_scope("Sequence_pos_{}".format(seq_pos)):
                print("\t  seq. pos. {}...".format(seq_pos))
                # Set rnn input layer to current frame
                rnn_input_layer.update(X[:, seq_pos:seq_pos + 1, :])

                # Calculate new network state at new frame (this updates the network's hidden activations, cell states,
                # and dependencies automatically)
                output = output_layer.get_output()
                outputs_all_timesteps.append(output)

        #
        # Loop through tickersteps
        #
        # Use last frame as input during ticker steps
        tickerstep_input = X[:, -1:, :]

        for tickerstep in range(config.tickersteps):
            with tf.name_scope("Tickerstep_{}".format(tickerstep)):
                print("\t  tickerstep {}...".format(tickerstep))

                # Set rnn input layer to tickerstep input
                rnn_input_layer.update(tickerstep_input)

                # Calculate new network state at new frame and activate tickerstep biases
                output = output_layer.get_output(tickerstep_nodes=True)
                outputs_all_timesteps.append(output)

        print("\tDone!")

        #
        # Publish
        #
        self.X = X
        self.y_ = y_
        self.output = tf.concat(outputs_all_timesteps,
                                axis=1,
                                name='outputs_all_timesteps')
        pixel_weights = tf.ones_like(y_, dtype=tf.float32)
        # pixel_weights -= tf.cast(y_ == 0, dtype=tf.float32) * tf.constant(1.-0.2)
        self.pixel_weights = pixel_weights
        # We will use this list of layers for regularization in the main file
        self.__layers = layers
        # We will plot some parts of the lstm, so we make it accessible as attribute
        self.lstm_layer = lstm_layer
Ejemplo n.º 5
0
    def __init__(self, config: Config, dataset):
        """Architecture with LSTM layer followed by dense output layer; Inputs are fed to LSTM layer sequence position
        by sequence position in tensorflow tf.while_loop();
        This is not as flexible as using a for-loop and more difficult to use but can be faster and optimized
        differently; LSTM return_states is not possible here unless manually implemented into the tf.while_loop (that is
        why we are only using the prediction at the last sequence position in this example);
        This is an advanced example, see ArchitectureLSTM to get started;
        
        Command-line usage:
        Change entry
        "architecture": "sample_architectures.ArchitectureLSTM"
        to
        "architecture": "sample_architectures.ArchitectureLSTMOptimized" in samples/config_lstm.json. Then run
        >>> python3 samples/main_lstm.py --config=samples/config_lstm.json
        
        Example input shapes: [n_samples, n_sequence_positions, n_features]
        Example output shapes: [n_samples, 1, n_features]
        """
        #
        # Some convenience objects
        #
        # We will use a list to store all layers for regularization etc. (this is optional)
        layers = []
        # Prepare xavier initialization for weights
        w_init = tf.contrib.layers.xavier_initializer(uniform=False,
                                                      seed=None,
                                                      dtype=tf.float32)

        #
        # Create placeholders for input data (shape: [n_samples, n_sequence_positions, n_features])
        #
        X = tf.placeholder(tf.float32, shape=dataset.X_shape)
        y_ = tf.placeholder(tf.float32, shape=dataset.y_shape)
        n_output_units = dataset.y_shape[
            -1]  # nr of output features is number of classes
        n_seq_pos = dataset.X_shape[
            1]  # dataset.X_shape is [sample, seq_pos, features]

        # ----------------------------------------------------------------------------------------------------------
        # Define network architecture
        # ----------------------------------------------------------------------------------------------------------

        #
        # Input Layer
        #  RNNInputLayer will hold the input to network at each sequence position. We will initalize it with zeros-
        #  tensor of shape [sample, 1, features]
        #
        input_shape = dataset.X_shape[:1] + (1, ) + dataset.X_shape[2:]
        rnn_input_layer = RNNInputLayer(tf.zeros(input_shape,
                                                 dtype=tf.float32))
        layers.append(rnn_input_layer)

        #
        # LSTM Layer
        #
        print("\tLSTM...")
        lstm_layer = LSTMLayer(incoming=rnn_input_layer,
                               n_units=config.n_lstm,
                               name='LSTM',
                               W_ci=w_init,
                               W_ig=w_init,
                               W_og=w_init,
                               W_fg=w_init,
                               b_ci=tf.zeros,
                               b_ig=tf.zeros,
                               b_og=tf.zeros,
                               b_fg=tf.zeros,
                               a_ci=tf.tanh,
                               a_ig=tf.sigmoid,
                               a_og=tf.sigmoid,
                               a_fg=tf.sigmoid,
                               a_out=tf.nn.elu,
                               c_init=tf.zeros,
                               h_init=tf.zeros,
                               forgetgate=True,
                               precomp_fwds=True)
        layers.append(lstm_layer)

        #
        # Output Layer
        #
        print("\tOutput layer...")
        output_layer = DenseLayer(incoming=lstm_layer,
                                  n_units=n_output_units,
                                  name='DenseLayerOut',
                                  W=w_init,
                                  b=tf.zeros,
                                  a=tf.sigmoid)
        layers.append(output_layer)

        # ----------------------------------------------------------------------------------------------------------
        # Loop through sequence positions and create graph
        # ----------------------------------------------------------------------------------------------------------

        #
        # Loop through sequence positions
        #
        if n_seq_pos:

            def cond(seq_pos, *args):
                return seq_pos < n_seq_pos

            def body(seq_pos, lstm_h, lstm_c):
                # Set rnn input layer to input at current sequence position
                rnn_input_layer.update(X[:, seq_pos:seq_pos + 1, :])

                # Update lstm states
                lstm_layer.h[-1], lstm_layer.c[-1] = lstm_h, lstm_c

                # Calculate new network state at new frame (this updates the network's hidden activations, cell states,
                # and dependencies automatically)
                _ = lstm_layer.get_output()

                seq_pos = tf.add(seq_pos, 1)

                return seq_pos, lstm_layer.h[-1], lstm_layer.c[-1]

            with tf.name_scope("Sequence_pos"):
                print("\t  seq. pos. ...")
                wl_ret = tf.while_loop(cond=cond,
                                       body=body,
                                       loop_vars=(tf.constant(0),
                                                  lstm_layer.h[-1],
                                                  lstm_layer.c[-1]),
                                       parallel_iterations=10,
                                       back_prop=True,
                                       swap_memory=True)
                lstm_layer.h[-1], lstm_layer.c[-1] = wl_ret[-2], wl_ret[-1]

        #
        # Loop through tickersteps
        #
        if config.tickersteps:

            def cond(seq_pos, *args):
                return seq_pos < config.tickersteps

            def body(seq_pos, lstm_h, lstm_c):
                # Set rnn input layer to input at current sequence position
                rnn_input_layer.update(X[:, -1:, :])

                # Update lstm states
                lstm_layer.h[-1], lstm_layer.c[-1] = lstm_h, lstm_c

                # Calculate new network state at new frame (this updates the network's hidden activations, cell states,
                # and dependencies automatically)
                _ = lstm_layer.get_output(tickerstep_nodes=True)

                seq_pos = tf.add(seq_pos, 1)

                return seq_pos, lstm_layer.h[-1], lstm_layer.c[-1]

            with tf.name_scope("Tickersteps"):
                print("\t  tickersteps ...")
                wl_ret = tf.while_loop(cond=cond,
                                       body=body,
                                       loop_vars=(tf.constant(0),
                                                  lstm_layer.h[-1],
                                                  lstm_layer.c[-1]),
                                       parallel_iterations=10,
                                       back_prop=True,
                                       swap_memory=True)
                lstm_layer.h[-1], lstm_layer.c[-1] = wl_ret[-2], wl_ret[-1]

        #
        # Calculate output but consider that the lstm_layer is already computed
        #
        output = output_layer.get_output(prev_layers=[lstm_layer])

        print("\tDone!")

        #
        # Publish
        #
        self.X = X
        self.y_ = y_
        self.output = output
        # Store layers in list for regularization in main file
        self.__layers = layers
Ejemplo n.º 6
0
    def __init__(self, config: Config, dataset):
        """
             
        """

        depth = config.get_value("enc_dec_depth", 2)
        basenr_convs = config.get_value("enc_dec_conv_maps_base", 32)
        include_org_label = config.get_value("include_org_label", False)
        init_name = config.get_value("conv_W_initializer",
                                     "weight_xavier_conv2d")
        conv_W_initializer = getattr(TeLL.initializations, init_name)
        shared = False

        #
        # Layer list
        #
        layers = list()

        #
        # Create placeholders for feeding an input frame and a label at the first timestep
        #
        n_seq_pos = dataset.X_shape[
            1]  # dataset.X_shape is [sample, seq_pos, x, y, features)
        X = tf.placeholder(tf.float32, shape=dataset.X_shape)
        y_ = tf.placeholder(tf.int32, shape=dataset.y_shape)

        if include_org_label:
            y_org = tf.placeholder(tf.int32, shape=dataset.y_org_shape)

        #
        # Input Layer
        #
        input_shape = dataset.X_shape[:1] + (1, ) + dataset.X_shape[2:]
        layers.append(RNNInputLayer(tf.zeros(input_shape, dtype=tf.float32)))

        #
        # Scaler Structure
        #
        conv_weights_shape = [
            config.kernel_conv, config.kernel_conv,
            layers[-1].get_output_shape()[-1], basenr_convs
        ]

        if shared:
            shared_input_conv_weights = conv_W_initializer(conv_weights_shape)
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=shared_input_conv_weights,
                          a=tf.nn.elu,
                          dilation_rate=[11, 11]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=shared_input_conv_weights,
                          a=tf.nn.elu,
                          dilation_rate=[9, 9]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=shared_input_conv_weights,
                          a=tf.nn.elu,
                          dilation_rate=[7, 7]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=shared_input_conv_weights,
                          a=tf.nn.elu,
                          dilation_rate=[5, 5]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=shared_input_conv_weights,
                          a=tf.nn.elu,
                          dilation_rate=[3, 3]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=shared_input_conv_weights,
                          a=tf.nn.elu,
                          dilation_rate=[1, 1]))
        else:
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=conv_W_initializer(conv_weights_shape),
                          a=tf.nn.elu,
                          dilation_rate=[11, 11]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=conv_W_initializer(conv_weights_shape),
                          a=tf.nn.elu,
                          dilation_rate=[9, 9]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=conv_W_initializer(conv_weights_shape),
                          a=tf.nn.elu,
                          dilation_rate=[7, 7]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=conv_W_initializer(conv_weights_shape),
                          a=tf.nn.elu,
                          dilation_rate=[5, 5]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=conv_W_initializer(conv_weights_shape),
                          a=tf.nn.elu,
                          dilation_rate=[3, 3]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=conv_W_initializer(conv_weights_shape),
                          a=tf.nn.elu,
                          dilation_rate=[1, 1]))

        # concat feature maps of all scale levels and reduce the number of features with a 1x1 conv
        layers.append(ConcatLayer(incomings=layers[1:]))
        conv_weights_shape = [
            1, 1, layers[-1].get_output_shape()[-1], basenr_convs
        ]
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer(conv_weights_shape)))

        # add 3 more conv layers to have some depth
        conv_weights_shape = [
            config.kernel_conv, config.kernel_conv,
            layers[-1].get_output_shape()[-1], basenr_convs
        ]
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer(conv_weights_shape)))
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer(conv_weights_shape)))
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer(conv_weights_shape)))

        #
        # Output Layer
        #
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer([
                          config.kernel_conv_out, config.kernel_conv_out,
                          layers[-1].get_output_shape()[-1], 11
                      ]),
                      padding='SAME',
                      name='ConvLayerSemanticSegmentation',
                      a=tf.identity))
        sem_seg_layer = layers[-1]

        self.X = X
        self.y_ = y_
        self.output = sem_seg_layer.get_output()
Ejemplo n.º 7
0
    def __init__(self, config: Config, dataset):
        """Architecture for semantic segmentation as described in presentation using standard for loop."""
        depth = config.get_value("enc_dec_depth", 2)
        basenr_convs = config.get_value("enc_dec_conv_maps_base", 16)
        include_org_label = config.get_value("include_org_label", False)
        init_name = config.get_value("conv_W_initializer",
                                     "weight_xavier_conv2d")
        conv_W_initializer = getattr(TeLL.initializations, init_name)

        #
        # Layer list
        #
        layers = list()

        #
        # Create placeholders for feeding an input frame and a label at the first timestep
        #
        n_seq_pos = dataset.X_shape[
            1]  # dataset.X_shape is [sample, seq_pos, x, y, features)
        X = tf.placeholder(tf.float32, shape=dataset.X_shape)
        y_ = tf.placeholder(tf.int32, shape=dataset.y_shape)

        if include_org_label:
            y_org = tf.placeholder(tf.int32, shape=dataset.y_org_shape)

        # ----------------------------------------------------------------------------------------------------------
        # Define network architecture
        # ----------------------------------------------------------------------------------------------------------
        # initializer for weight values of kernels
        # conv_W_initializer = weight_xavier_conv2d

        #
        # Initialize input to network of shape [sample, 1, x, y, features] with zero tensor of size of a frame
        #
        input_shape = dataset.X_shape[:1] + (1, ) + dataset.X_shape[2:]
        layers.append(RNNInputLayer(tf.zeros(input_shape, dtype=tf.float32)))
        rnn_input_layer = layers[-1]

        #
        # Encoder and maxpooling layers
        #
        encoders = list()
        for d in range(1, depth + 1):
            print("\tConvLayerEncoder{}...".format(d))
            layers.append(
                ConvLayer(incoming=layers[-1],
                          W=conv_W_initializer([
                              config.kernel_conv, config.kernel_conv,
                              layers[-1].get_output_shape()[-1],
                              basenr_convs * (2**d)
                          ]),
                          padding='SAME',
                          name='ConvLayerEncoder{}'.format(d),
                          a=tf.nn.elu))
            encoders.append(layers[-1])
            print("\tMaxpoolingLayer{}...".format(d))
            layers.append(
                MaxPoolingLayer(incoming=layers[-1],
                                ksize=(1, 3, 3, 1),
                                strides=(1, 2, 2, 1),
                                padding='SAME',
                                name='MaxpoolingLayer{}'.format(d)))

        #
        # ConvLSTM Layer
        #
        if config.n_lstm:
            n_lstm = config.n_lstm
            lstm_x_fwd = config.kernel_lstm_fwd
            lstm_y_fwd = config.kernel_lstm_fwd
            lstm_x_bwd = config.kernel_lstm_bwd
            lstm_y_bwd = config.kernel_lstm_bwd

            lstm_input_channels_fwd = layers[-1].get_output_shape()[-1]
            if config.reduced_rec_lstm:
                lstm_input_channels_bwd = config.reduced_rec_lstm
            else:
                lstm_input_channels_bwd = n_lstm

            lstm_init = dict(W_ci=[
                conv_W_initializer(
                    [lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm]),
                conv_W_initializer(
                    [lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm])
            ],
                             W_ig=[
                                 conv_W_initializer([
                                     lstm_x_fwd, lstm_y_fwd,
                                     lstm_input_channels_fwd, n_lstm
                                 ]),
                                 conv_W_initializer([
                                     lstm_x_bwd, lstm_y_bwd,
                                     lstm_input_channels_bwd, n_lstm
                                 ])
                             ],
                             W_og=[
                                 conv_W_initializer([
                                     lstm_x_fwd, lstm_y_fwd,
                                     lstm_input_channels_fwd, n_lstm
                                 ]),
                                 conv_W_initializer([
                                     lstm_x_bwd, lstm_y_bwd,
                                     lstm_input_channels_bwd, n_lstm
                                 ])
                             ],
                             W_fg=[
                                 conv_W_initializer([
                                     lstm_x_fwd, lstm_y_fwd,
                                     lstm_input_channels_fwd, n_lstm
                                 ]),
                                 conv_W_initializer([
                                     lstm_x_bwd, lstm_y_bwd,
                                     lstm_input_channels_bwd, n_lstm
                                 ])
                             ],
                             b_ci=constant([n_lstm]),
                             b_ig=constant([n_lstm]),
                             b_og=constant([n_lstm]),
                             b_fg=constant([n_lstm], 1))

            print("\tConvLSTM...")
            layers.append(
                ConvLSTMLayer(incoming=layers[-1],
                              n_units=n_lstm,
                              **lstm_init,
                              a_out=get_rec_attr(tf, config.lstm_act),
                              forgetgate=config.forgetgate,
                              comb=config.lstm_comb,
                              store_states=config.store_states,
                              tickerstep_biases=tf.zeros,
                              output_dropout=config.lstm_output_dropout,
                              precomp_fwds=False))
            lstm_layer = layers[-1]

            #
            # Optional maxpooling and upscaling of rec LSTM connections combined with/or optional feature squashing
            #
            ext_lstm_recurrence = None
            if config.lstm_rec_maxpooling:
                print("\tMaxpoolingDeconv...")
                layers.append(
                    MaxPoolingLayer(incoming=layers[-1],
                                    ksize=(1, 3, 3, 1),
                                    strides=(1, 2, 2, 1),
                                    padding='SAME',
                                    name='MaxPoolingLayer'))
                layers.append(
                    DeConvLayer(incoming=layers[-1],
                                a=tf.nn.elu,
                                W=conv_W_initializer([
                                    3, 3, layers[-1].get_output_shape()[-1],
                                    layers[-1].get_output_shape()[-1]
                                ]),
                                strides=(1, 2, 2, 1),
                                padding='SAME',
                                name='DeConvLayer'))
                print("\tConvLSTMRecurrence...")
                ext_lstm_recurrence = layers[-1]

            if config.reduced_rec_lstm:
                print("\tFeatureSquashing...")
                layers.append(
                    ConvLayer(incoming=layers[-1],
                              W=conv_W_initializer([
                                  config.kernel_conv_out,
                                  config.kernel_conv_out,
                                  layers[-1].get_output_shape()[-1],
                                  config.reduced_rec_lstm
                              ]),
                              padding='SAME',
                              name='ConvLayerFeatureSquashing',
                              a=tf.nn.elu))
                print("\tConvLSTMRecurrence...")
                ext_lstm_recurrence = layers[-1]

            if ext_lstm_recurrence is not None:
                lstm_layer.add_external_recurrence(ext_lstm_recurrence)
        else:
            print("\tSubstituteConvLayer...")
            n_lstm = basenr_convs * (2**depth) * 4
            layers.append(
                ConvLayer(incoming=layers[-1],
                          W=conv_W_initializer([
                              config.kernel_conv, config.kernel_conv,
                              layers[-1].get_output_shape()[-1],
                              int(basenr_convs * (2**depth) * 4.5)
                          ]),
                          padding='SAME',
                          name='SubstituteConvLayer',
                          a=tf.nn.elu))
            lstm_layer = layers[-1]

        #
        # Decoder and upscaling layers
        #
        for d in list(range(1, depth + 1))[::-1]:
            print("\tUpscalingLayer{}...".format(d))
            layers[-1] = ScalingLayer(
                incoming=layers[-1],
                size=encoders[d - 1].get_output_shape()[-3:-1],
                name='UpscalingLayergLayer{}'.format(d))

            print("\tConcatLayer{}...".format(d))
            layers.append(
                ConcatLayer([encoders[d - 1], layers[-1]],
                            name='ConcatLayer{}'.format(d)))

            print("\tConvLayerDecoder{}...".format(d))
            layers.append(
                ConvLayer(incoming=layers[-1],
                          W=conv_W_initializer([
                              config.kernel_conv, config.kernel_conv,
                              layers[-1].get_output_shape()[-1],
                              basenr_convs * (2**d)
                          ]),
                          padding='SAME',
                          name='ConvLayerDecoder{}'.format(d),
                          a=tf.nn.elu))

        #
        # ConvLayer for semantic segmentation
        #
        print("\tConvLayerSemanticSegmentation...")
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer([
                          config.kernel_conv_out, config.kernel_conv_out,
                          layers[-1].get_output_shape()[-1], 11
                      ]),
                      padding='SAME',
                      name='ConvLayerSemanticSegmentation',
                      a=tf.identity))
        sem_seg_layer = layers[-1]

        # ----------------------------------------------------------------------------------------------------------
        # Loop through sequence positions and create graph
        # ----------------------------------------------------------------------------------------------------------

        #
        # Loop through sequence positions
        #
        print("\tRNN Loop...")
        sem_seg_out = list()
        for seq_pos in range(n_seq_pos):
            with tf.name_scope("Sequence_pos_{}".format(seq_pos)):
                print("\t  seq. pos. {}...".format(seq_pos))
                # Set input layer to X at frame (t) and outputs of upper layers at (t-1)
                layers[0].update(X[:, seq_pos:seq_pos + 1, :])

                # Calculate new network output at (t), including new hidden states
                _ = lstm_layer.get_output()
                sem_seg_out.append(
                    sem_seg_layer.get_output(prev_layers=encoders +
                                             [lstm_layer]))

        #
        # Loop through tickersteps
        #
        # # Use empty frame as X during ticker steps (did not work so good)
        # tickerstep_input = tf.zeros(dataset.X_shape[:1] + (1,) + dataset.X_shape[2:], dtype=tf.float32,
        #                             name="tickerframe")

        # Use last frame as X during ticker steps
        #tickerstep_input = X[:, -1:, :]
        #layers[0].update(tickerstep_input)

        #for tickerstep in range(config.tickersteps):
        #    with tf.name_scope("Tickerstep_{}".format(tickerstep)):
        #        print("\t  tickerstep {}...".format(tickerstep))
        #
        #        # Calculate new network output at (t), including new hidden states
        #        _ = lstm_layer.get_output(tickerstep_nodes=True)

        #sem_seg_out = sem_seg_layer.get_output(prev_layers=encoders + [lstm_layer])

        print("\tDone!")

        #
        # Publish
        #
        self.X = X
        self.y_feed = y_
        self.y_ = y_[:, 10:]
        self.output = tf.concat(sem_seg_out[10:], 1)
        self.__layers = layers
        self.__n_lstm = n_lstm
        self.__lstm_layer = lstm_layer
        self.lstm_layer = lstm_layer
        self.__plot_dict, self.__plot_range_dict, self.__plotsink = self.__setup_plotting(
            config)
        if include_org_label:
            self.y_org = y_org
Ejemplo n.º 8
0
state_shape_per_ts = (n_mb, 1, n_features)
action_shape_per_ts = (n_mb, 1, n_actions)
# This will encode the in-game time in multiple nodes
timestep_encoder = TriangularValueEncoding(max_value=max_timestep,
                                           triangle_span=int(max_timestep /
                                                             10))

states_placeholder = tf.placeholder(shape=(n_mb, None, n_features),
                                    dtype=tf.float32)
actions_placeholder = tf.placeholder(shape=(n_mb, None, n_actions),
                                     dtype=tf.float32)
rewards_placeholder = tf.placeholder(shape=(n_mb, None, 1), dtype=tf.float32)
n_timesteps = tf.shape(rewards_placeholder)[1] - 1

with tf.variable_scope('reward_redistribution_model', reuse=tf.AUTO_REUSE):
    state_input_layer = RNNInputLayer(
        tf.zeros(state_shape_per_ts, dtype=tf.float32))
    action_input_layer = RNNInputLayer(
        tf.zeros(action_shape_per_ts, dtype=tf.float32))
    time_input_layer = RNNInputLayer(
        timestep_encoder.encode_value(tf.constant(0, dtype=tf.int32)))
    time_input = ReshapeLayer(incoming=time_input_layer,
                              shape=(n_mb, 1, timestep_encoder.n_nodes_python))
    reward_redistibution_input = ConcatLayer(
        incomings=[state_input_layer, action_input_layer, time_input],
        name='RewardRedistributionInput')
    n_lstm_cells = 8
    truncated_normal_init = lambda mean, stddev: \
        lambda *args, **kwargs: tf.truncated_normal(mean=mean, stddev=stddev, *args, **kwargs)
    w_init = truncated_normal_init(mean=0, stddev=1)
    og_bias = truncated_normal_init(mean=0, stddev=1)
    ig_bias = truncated_normal_init(mean=-1, stddev=1)