def create_critic_network(self):
        ep_length = self.episode_length
        batch_state_x = Input(batch_shape=[None, ep_length, self.s_dim])
        batch_action_x = Input(batch_shape=[None, ep_length, self.a_dim])

        #  state branch
        state_net = TimeDistributed(Dense(400))(batch_state_x)
        state_net = BatchNormalization()(state_net)
        state_net = Activation('relu')(state_net)

        # action branch
        action_net = TimeDistributed(Dense(400))(batch_action_x)
        action_net = BatchNormalization()(action_net)
        action_net = Activation('relu')(action_net)

        # merge branches
        t1_layer = TimeDistributed(Dense(400))
        t1_layer_out = t1_layer(state_net)
        t2_layer = TimeDistributed(Dense(400))
        t2_layer_out = t2_layer(action_net)

        state_net_reshaped = tf.reshape(state_net, shape=[-1, 400])
        action_net_reshaped = tf.reshape(action_net, shape=[-1, 400])
        merged_net = tf.matmul(state_net_reshaped, t1_layer.get_weights()[0]) + tf.matmul(action_net_reshaped,
                                                                                          t2_layer.get_weights()[0]) \
                     + t1_layer.get_weights()[1] + t2_layer.get_weights()[1]
        merged_net = Activation('relu')(merged_net)
        merged_net = tf.reshape(merged_net, shape=[-1, ep_length, 400])

        # lstm cell
        rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.lstm_num_cells,
                                           state_is_tuple=True)
        val, state = tf.nn.dynamic_rnn(rnn_cell, merged_net, dtype=tf.float32)
        lstm_outputs = val

        # final dense layer
        w_init = RandomUniform(minval=-0.003, maxval=0.003)
        last_layer = Dense(1)
        batch_y = last_layer(lstm_outputs)
        return batch_state_x, batch_action_x, batch_y
    def create_world_modeler_network(self):
        ep_length = self.episode_length
        batchStateX = Input(batch_shape=[None, ep_length, self.s_dim])
        batchActionX = Input(batch_shape=[None, ep_length, self.a_dim])

        #  state branch
        state_net = TimeDistributed(Dense(400, activation='relu'))(batchStateX)

        # action branch
        action_net = TimeDistributed(Dense(400,
                                           activation='relu'))(batchActionX)

        # merge branches
        t1_layer = TimeDistributed(Dense(400))
        t1_layer_out = t1_layer(state_net)
        t2_layer = TimeDistributed(Dense(400))
        t2_layer_out = t2_layer(action_net)

        state_net_reshaped = tf.reshape(state_net, shape=[-1, 400])
        action_net_reshaped = tf.reshape(action_net, shape=[-1, 400])
        merged_net = tf.matmul(state_net_reshaped, t1_layer.get_weights()[0]) + tf.matmul(action_net_reshaped, t2_layer.get_weights()[0])\
                     + t1_layer.get_weights()[1] + t2_layer.get_weights()[1]
        merged_net = Activation('relu')(merged_net)
        merged_net = tf.reshape(merged_net, shape=[-1, ep_length, 400])

        # lstm cell
        rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.lstm_num_cells,
                                           state_is_tuple=True)
        val, state = tf.nn.dynamic_rnn(rnn_cell, merged_net, dtype=tf.float32)
        lstm_outputs = val

        # final dense layer
        w_init = RandomUniform(minval=-0.005, maxval=0.005)
        last_layer = Dense(self.s_dim)
        batchStateY = last_layer(lstm_outputs)
        batchStateY_scaled_out = tf.multiply(batchStateY, self.state_bound)

        return batchStateX, batchActionX, batchStateY, batchStateY_scaled_out