def build_model(embedding_matrix, word_index, max_len, lstm_units, 
                verbose = False, compile = True, multi=True, gpu_num=4):
    #logger.info('Build model')
    sequence_input = L.Input(shape=(max_len,), dtype='int32')
    embedding_layer = L.Embedding(*embedding_matrix.shape,
                                weights=[embedding_matrix],
                                trainable=False)
    x = embedding_layer(sequence_input)
    x = L.SpatialDropout1D(0.3)(x)
    x = L.Bidirectional(L.CuDNNLSTM(lstm_units, return_sequences=True))(x)
    x = L.Bidirectional(L.CuDNNLSTM(lstm_units, return_sequences=True))(x)
    att = Attention(max_len)(x)
    avg_pool1 = L.GlobalAveragePooling1D()(x)
    max_pool1 = L.GlobalMaxPooling1D()(x)
    x = L.concatenate([att,avg_pool1, max_pool1])
    preds = L.Dense(1, activation='sigmoid')(x)
    model = Model(sequence_input, preds)
    if multi:
        print('use multi gpus')
        model = ModelMGPU(model, gpus=gpu_num)
    if verbose:
        model.summary()
    if compile:
        model.compile(loss='binary_crossentropy',optimizer=Adam(0.005),metrics=['acc'])
    return model
Ejemplo n.º 2
0
 def HorisontalySweepLayer(input,filter):
     height = input._keras_shape[1]
     width = input._keras_shape[2]
     Channels = input._keras_shape[3]
     Timestep = int(width * height);
     input = layers.Lambda(Model.rotateMatrix)(input)
     reshapedinput = layers.Reshape((int(Timestep),int(Channels)),name='')(input)
     xUp =   layers.CuDNNLSTM(int(filter/2),unit_forget_bias=True, return_sequences=True)(reshapedinput)
     xDown = layers.CuDNNLSTM(int(filter/2),unit_forget_bias=True,go_backwards = True, return_sequences=True)(reshapedinput)
     xUp =   layers.Reshape((int(height),int(width),int(filter/2)),name='')(xUp)
     xDown = layers.Reshape((int(height),int(width),int(filter/2)),name='')(xDown)
     concatenate = layers.concatenate(inputs = [xUp,xDown],axis=-1)
     concatenate = layers.Lambda(Model.rotateMatrix)(concatenate)
     return concatenate
def LYRICS_RNN(num_classes, emb_size):
    model = Sequential()
    model.add(
        layers.Embedding(emb_size,
                         output_dim=200,
                         input_length=None,
                         name='embedding'))
    model.add(
        layers.CuDNNLSTM(units=256, return_sequences=True, name='rnn_layer_1'))
    model.add(
        layers.CuDNNLSTM(units=512, return_sequences=True, name='rnn_layer_2'))
    model.add(
        layers.CuDNNLSTM(units=1024, return_sequences=False, name='rnn_out'))
    model.add(layers.Dense(num_classes, name='logits'))
    return model
Ejemplo n.º 4
0
    def trainmodel(self, X=None, y=None, fit_args=None, use_generator=False, generator=None):

        # Copy paste this from the diag above.
        model = keras.models.Sequential()

        model.add(layers.TimeDistributed(layers.Dense(28),
                                         input_shape=(self.lookback, len(self.tokens_unique))))
        # model.add(layers.LeakyReLU(alpha=.001))
        model.add(layers.CuDNNLSTM(64, input_shape=(self.lookback, len(self.tokens_unique))))
        model.add(layers.Dropout(0.2, noise_shape=None, seed=None))
        model.add(layers.Dense(len(self.tokens_unique), activation='softmax'))

        optimizer = keras.optimizers.Adam(lr=0.01)
        model.compile(loss='categorical_crossentropy', optimizer=optimizer)

        if use_generator:
            if not generator:
                generator = self.generator

            model.fit_generator(generator, **fit_args)

        else:
            model.fit(x=X, y=y, **fit_args)

        self.model = model
Ejemplo n.º 5
0
 def FirstVerticalysweepLayer(*args):
     imageHeight = args[0] * 0.5
     imageWidth = args[1] * 0.5
     channels = args[2]
     input = args[3]
     Timestep = int(imageHeight * imageWidth);
     #256,12
     reshapedinput = layers.Reshape((Timestep,channels*4),name='')(input)
     #reshapedinput = keras.layers.transpose_shape(reshapedinput,'channels_first',spatial_axes=(0,3))
     xUp =   layers.CuDNNLSTM((Timestep),unit_forget_bias=True, return_sequences=True)(reshapedinput)
     xDown = layers.CuDNNLSTM((Timestep),unit_forget_bias=True,go_backwards = True, return_sequences=True)(reshapedinput)
     xUp = layers.Reshape((int(imageHeight),int(imageWidth),Timestep),name='')(xUp)
     xDown = layers.Reshape((int(imageHeight),int(imageWidth),Timestep),name='')(xDown)
     concatenate = layers.concatenate(inputs = [xUp,xDown],axis=-1)
     channels = Timestep
     return [concatenate,imageHeight,imageWidth,channels]
Ejemplo n.º 6
0
 def HorisontalySweepLayer(*args):
     imageHeight = args[0]
     imageWidth = args[1]
     channels = args[2]
     input = args[3]
     Timestep = int(imageHeight * imageWidth*2);
     input = layers.Lambda(Model.rotateMatrix)(input)
     reshapedinput = layers.Reshape((Timestep,channels),name='')(input)
     xUp =   layers.CuDNNLSTM(int(channels/2),unit_forget_bias=True, return_sequences=True)(reshapedinput)
     xDown = layers.CuDNNLSTM(int(channels/2),unit_forget_bias=True,go_backwards = True, return_sequences=True)(reshapedinput)
     xUp =   layers.Reshape((int(imageHeight),int(imageWidth),channels),name='')(xUp)
     xDown = layers.Reshape((int(imageHeight),int(imageWidth),channels),name='')(xDown)
     concatenate = layers.concatenate(inputs = [xUp,xDown],axis=-1)
     concatenate = layers.Lambda(Model.rotateMatrix)(concatenate)
     channels = channels
     return [concatenate,imageHeight,imageWidth,channels]
Ejemplo n.º 7
0
    def build_controller(self):
        """
        Builds controller computational graph
        """
        with tf.variable_scope("Controller"):
            input_layer = layers.Input(shape = INPUT_SHAPE)
            initializer = initializers.RandomUniform(minval=-0.1, maxval=0.1, seed=None)

            input_layers   = [input_layer]
            hidden_layers  = []
            output_softmaxes = []

            for i in range(N_SUBPOL):
                hidden_layers.append(layers.CuDNNLSTM(units = N_UNITS, kernel_initializer = initializer)(input_layers[-1]))
                output_layer = []
                for j in range(N_OPS):
                    name = "subpol_{}_operation_{}".format(i + 1, j + 1)
                    output_layer.extend([
                        layers.Dense(N_TYPES, activation ='softmax', name = name + '_type', kernel_initializer = initializer)(hidden_layers[-1]),
                        layers.Dense(N_PROBS, activation ='softmax', name = name + '_prob', kernel_initializer = initializer)(hidden_layers[-1]),
                        layers.Dense(N_MAG, activation ='softmax', name = name + '_magn', kernel_initializer = initializer)(hidden_layers[-1])
                    ])

                output_softmaxes.append(output_layer)
                input_layers.append(layers.Lambda(expand_dims)(layers.Concatenate()(output_layer)))
            output_list = [item for sublist in output_softmaxes for item in sublist]
            model = models.Model(input_layer, output_list)
        exists = os.path.isfile(os.path.join(LOG_DIR, "controller_model", "model.json"))
        if not exists:
            model_json = model.to_json() # Converts model to JSON
            with open(os.path.join(LOG_DIR, "controller_model", "model.json"), "w") as json_file:
                json_file.write(model_json) # Write to file

        return model
Ejemplo n.º 8
0
def build_model(verbose = False, compile = True):
    sequence_input = L.Input(shape=(maxlen,), dtype='int32')
    embedding_layer = L.Embedding(len(word_index) + 1,
                                300,
                                weights=[embedding_matrix],
                                input_length=maxlen,
                                trainable=False)
    x = embedding_layer(sequence_input)
    x = L.SpatialDropout1D(0.2)(x)
    x = L.Bidirectional(L.CuDNNLSTM(64, return_sequences=True))(x)

    att = Attention(maxlen)(x)
    avg_pool1 = L.GlobalAveragePooling1D()(x)
    max_pool1 = L.GlobalMaxPooling1D()(x)

    x = L.concatenate([att,avg_pool1, max_pool1])

    preds = L.Dense(1, activation='sigmoid')(x)


    model = Model(sequence_input, preds)
    if verbose:
        model.summary()
    if compile:
        model.compile(loss='binary_crossentropy',optimizer=Adam(0.005),metrics=['acc'])
    return model
Ejemplo n.º 9
0
 def VerticalysweepUpscaleLayer(*args):
     imageHeight = args[0]
     imageWidth = args[1]
     channels = args[2]
     input = args[3]
     Timestep = int(imageHeight * imageWidth);
     reshapedinput = layers.Reshape((Timestep,int(channels*2)),name='')(input)
     xUp =   layers.CuDNNLSTM(int(channels),unit_forget_bias=True, return_sequences=True)(reshapedinput)
     xDown = layers.CuDNNLSTM(int(channels),unit_forget_bias=True,go_backwards = True, return_sequences=True)(reshapedinput)
     xUp =   layers.Reshape((int(imageHeight*2),int(imageWidth*2),int(channels/4)),name='')(xUp)
     xDown = layers.Reshape((int(imageHeight*2),int(imageWidth*2),int(channels/4)),name='')(xDown)
     concatenate = layers.concatenate(inputs = [xUp,xDown],axis=-1)
     channels = int(channels/2)
     imageHeight = imageHeight*2
     imageWidth = imageWidth *2
     return [concatenate,imageHeight,imageWidth,channels]
Ejemplo n.º 10
0
 def get_lstm(self, size, return_sequences=True, name='monkeys'):
     if tf.test.is_gpu_available():
         return layers.CuDNNLSTM(size,
                                 return_sequences=return_sequences,
                                 name=name)
     else:
         return layers.LSTM(size,
                            return_sequences=return_sequences,
                            name=name)
Ejemplo n.º 11
0
 def verticalSweepLayer(input,filter,Scale,Down):
     height = input._keras_shape[1]
     width = input._keras_shape[2]
     Channels = input._keras_shape[3]
     Scale = Scale * 2
     Timestep = int(width * height*(1/Scale));
     reshapedinput = layers.Reshape((int(Timestep),int(Channels*Scale)),name='')(input)
     if(Down):
         xUp =   layers.CuDNNLSTM(int(filter/2),unit_forget_bias=True, return_sequences=True)(reshapedinput)
         xDown = layers.CuDNNLSTM(int(filter/2),unit_forget_bias=True,go_backwards = True, return_sequences=True)(reshapedinput)
         xUp =   layers.Reshape((int(height*(2/Scale)),int(width*(2/Scale)),int(filter/2)),name='')(xUp)
         xDown = layers.Reshape((int(height*(2/Scale)),int(width*(2/Scale)),int(filter/2)),name='')(xDown)
     else:
         xUp =   layers.CuDNNLSTM(int(filter*Scale*2),unit_forget_bias=True, return_sequences=True)(reshapedinput)
         xDown = layers.CuDNNLSTM(int(filter*Scale*2),unit_forget_bias=True,go_backwards = True, return_sequences=True)(reshapedinput)
         xUp =   layers.Reshape((int(height*(Scale/2)),int(width*(Scale/2)),int(filter/2)),name='')(xUp)
         xDown = layers.Reshape((int(height*(Scale/2)),int(width*(Scale/2)),int(filter/2)),name='')(xDown)
     concatenate = layers.concatenate(inputs = [xUp,xDown],axis=-1)
     return concatenate
Ejemplo n.º 12
0
    def build_model(self):
        """Build an actor (policy) model that maps states -> actions."""
        # Define input layer (state)
        states = layers.Input(shape=(self.state_size,), name='states')
        
        # Reshape action repeats into timesteps for recurrent layer
        reshape = layers.Reshape((9, 3))(states)

        # Add hidden layers
        net = layers.CuDNNLSTM(units=16, return_sequences=True)(reshape)
        net = layers.CuDNNLSTM(units=32)(net)
        net = layers.Dense(units=32, kernel_regularizer=regularizers.l2(0.01))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(alpha=0.1)(net)
        net = layers.Dense(units=64, kernel_regularizer=regularizers.l2(0.01))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(alpha=0.1)(net)

        # Try different layer sizes, activations, add batch normalization, regularizers, etc.

        # Add final output layer with sigmoid activation
        raw_actions = layers.Dense(units=self.action_size, activation='sigmoid', name='raw_actions')(net)
        
        actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions)
        
        # Create Keras model
        self.model = models.Model(inputs=states, outputs=actions)

        # Define loss function using action value (Q value) gradients
        action_gradients = layers.Input(shape=(self.action_size,))
        loss = K.mean(-action_gradients * actions)

        # Incorporate any additional losses here (e.g. from regularizers)

        # Define optimizer and training function
        optimizer = optimizers.Adam(lr=self.lr)
        updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, action_gradients, K.learning_phase()],
            outputs=[],
            updates=updates_op)
Ejemplo n.º 13
0
def RNN_LARGE(input_shape, test_type, num_classes):
    inputs = layers.Input(shape=input_shape)

    x = layers.CuDNNLSTM(units=256, return_sequences=True, name='rnn_layer_1')(inputs)

    x = layers.CuDNNLSTM(units=512, return_sequences=True, name='rnn_layer_2')(x)

    x = layers.CuDNNLSTM(units=1024, return_sequences=False, name='rnn_out')(x)

    x = layers.Dense(num_classes, name='logits')(x)
    
    if test_type == 'sgc':
        output_activation = 'softmax'
    elif test_type == 'mgc':
        output_activation = 'sigmoid'
    elif test_type in ['cos', 'mse']:
        output_activation = 'linear'
    
    pred = layers.Activation(output_activation, name=output_activation)(x)

    return  Model(inputs=inputs, outputs=pred)
Ejemplo n.º 14
0
    def build_model(self):
        """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
        # Define input layers
        states = layers.Input(shape=(self.state_size,), name='states')
        actions = layers.Input(shape=(self.action_size,), name='actions')

        # Add hidden layers for state pathway
        reshape = layers.Reshape((9, 3))(states)
        net_states = layers.CuDNNLSTM(units=16)(reshape)
        net_states = layers.Dense(units=32)(states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(alpha=0.3)(net_states)
        net_states = layers.Dense(units=64)(net_states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(alpha=0.3)(net_states)

        # Add hidden layers for action pathway
        net_actions = layers.Dense(units=32)(actions)
        net_actions = layers.BatchNormalization()(net_actions)
        net_actions = layers.LeakyReLU(alpha=0.3)(net_actions)
        net_actions = layers.Dense(units=64)(net_actions)
        net_actions = layers.BatchNormalization()(net_actions)
        net_actions = layers.LeakyReLU(alpha=0.3)(net_actions)

        # Try different layer sizes, activations, add batch normalization, regularizers, etc.

        # Combine state and action pathways
        net = layers.Add()([net_states, net_actions])
        net = layers.Activation('relu')(net)

        # Add more layers to the combined network if needed
        net = layers.Dense(units=32)(net)
        net = layers.BatchNormalization()(net)
        net = layers.Activation('relu')(net)

        # Add final output layer to produce action values (Q values)
        Q_values = layers.Dense(units=1, name='q_values')(net)

        # Create Keras model
        self.model = models.Model(inputs=[states, actions], outputs=Q_values)

        # Define optimizer and compile model for training with built-in loss function
        optimizer = optimizers.Adam(lr=self.lr)
        self.model.compile(optimizer=optimizer, loss='mse')

        # Compute action gradients (derivative of Q values w.r.t actions)
        action_gradients = K.gradients(Q_values, actions)

        # Define an addition function to fetch action gradients (to be used by actor model)
        self.get_action_gradients = K.function(
            inputs=[*self.model.input, K.learning_phase()],
            outputs=action_gradients)
Ejemplo n.º 15
0
    def create_model(self):
        # Implementation note: Keras requires an i  nput. I create an input and then feed
        # zeros to the network. Ugly, but it's the same as disabling those weights.
        # Furthermore, Keras LSTM input=output, so we cannot produce more than SUBPOLICIES
        # outputs. This is not desirable, since the paper produces 25 subpolicies in the
        # end.
        with tf.variable_scope("Controller"):
            input_layer = layers.Input(shape=INPUT_SHAPE)
            initializer = initializers.RandomUniform(minval=-0.1,
                                                     maxval=0.1,
                                                     seed=None)

            input_layers = [input_layer]
            hidden_layers = []
            output_softmaxes = []

            for i in range(5):
                hidden_layers.append(
                    layers.CuDNNLSTM(units=100,
                                     kernel_initializer=initializer)(
                                         input_layers[-1]))
                output_layer = []
                for j in range(2):
                    name = "subpol_{}_operation_{}".format(i + 1, j + 1)
                    output_layer.extend([
                        layers.Dense(OP_TYPES,
                                     activation='softmax',
                                     name=name + '_type',
                                     kernel_initializer=initializer)(
                                         hidden_layers[-1]),
                        layers.Dense(OP_PROBS,
                                     activation='softmax',
                                     name=name + '_prob',
                                     kernel_initializer=initializer)(
                                         hidden_layers[-1]),
                        layers.Dense(OP_MAGNITUDES,
                                     activation='softmax',
                                     name=name + '_magn',
                                     kernel_initializer=initializer)(
                                         hidden_layers[-1])
                    ])

                output_softmaxes.append(output_layer)
                input_layers.append(
                    layers.Lambda(expand_dims)(
                        layers.Concatenate()(output_layer)))
            output_list = [
                item for sublist in output_softmaxes for item in sublist
            ]
            model = models.Model(input_layer, output_list)
        return model
        ''' 
    def __init__(self, use_cudnn_lstm=True, plot_model_architecture=False):
        n_hidden = 50
        input_dim = 300

        # unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force  bias_initializer="zeros". This is recommended in Jozefowicz et al.
        # he_normal: Gaussian initialization scaled by fan_in (He et al., 2014)
        if use_cudnn_lstm:
            # Use CuDNNLSTM instead of LSTM, because it is faster
            lstm = layers.CuDNNLSTM(n_hidden,
                                    unit_forget_bias=True,
                                    kernel_initializer='he_normal',
                                    kernel_regularizer='l2',
                                    name='lstm_layer')
        else:
            lstm = layers.LSTM(n_hidden,
                               unit_forget_bias=True,
                               kernel_initializer='he_normal',
                               kernel_regularizer='l2',
                               name='lstm_layer')

        # Building the left branch of the model: inputs are variable-length sequences of vectors of size 128.
        left_input = Input(shape=(None, input_dim), name='input_1')
        #        left_masked_input = layers.Masking(mask_value=0)(left_input)
        left_output = lstm(left_input)

        # Building the right branch of the model: when you call an existing layer instance, you reuse its weights.
        right_input = Input(shape=(None, input_dim), name='input_2')
        #        right_masked_input = layers.Masking(mask_value=0)(right_input)
        right_output = lstm(right_input)

        # Builds the classifier on top
        l1_norm = lambda x: 1 - K.abs(x[0] - x[1])
        merged = layers.merge([left_output, right_output],
                              mode=l1_norm,
                              output_shape=lambda x: x[0],
                              name='L1_distance')
        predictions = layers.Dense(1,
                                   activation='sigmoid',
                                   name='Similarity_layer')(merged)

        # Instantiating and training the model: when you train such a model, the weights of the LSTM layer are updated based on both inputs.
        self.model = Model([left_input, right_input], predictions)

        self.__compile()
        print(self.model.summary())

        if plot_model_architecture:
            from keras.utils import plot_model
            plot_model(self.model, to_file='siamese_architecture.png')
Ejemplo n.º 17
0
def build_model(sentenceLength, word_index, verbose=False, compile=True):
    sequence_input = L.Input(shape=(sentenceLength, ), dtype='int32')
    print(sequence_input[0])
    topic_sequence_input = L.Input(shape=(sentenceLength, ), dtype='int32')
    print(topic_sequence_input.shape)
    embedding_layer = L.Embedding(len(word_index) + 1,
                                  300,
                                  weights=[embedding_matrix],
                                  input_length=sentenceLength,
                                  trainable=False)
    print(embedding_layer)
    topic_embedding_layer = L.Embedding(len(word_index) + 1,
                                        300,
                                        weights=[embedding_matrix],
                                        input_length=sentenceLength,
                                        trainable=False)
    x = embedding_layer(sequence_input)

    topic_x = topic_embedding_layer(topic_sequence_input)
    att_x = Attention(sentenceLength)([x, topic_x])

    topic_mean_x = Lambda(topic_mean,
                          output_shape=topic_mean_output_shape)(topic_x)

    distance = Lambda(cosine_distance, output_shape=cos_dist_output_shape)(
        [att_x, topic_mean_x])

    x = concatenate([att_x, distance])

    # att=K.Dropout(0.15)(att)
    x = L.Bidirectional(L.CuDNNLSTM(128, return_sequences=True))(x)

    avg_pool1 = L.GlobalAveragePooling1D()(x)
    max_pool1 = L.GlobalMaxPooling1D()(x)

    x = L.concatenate([avg_pool1, max_pool1])

    preds = L.Dense(3, activation='sigmoid')(x)

    model = Model(inputs=[sequence_input, topic_sequence_input], outputs=preds)
    if verbose:
        model.summary()
    if compile:
        model.compile(loss='binary_crossentropy',
                      optimizer=Adam(0.005),
                      metrics=['accuracy'])
    return model
Ejemplo n.º 18
0
def build_model(sentenceLength , word_index ,):
    maxlen = 150
    embed_size = 300
    max_features = 100000
    sequence_input = L.Input ( shape = (sentenceLength ,) , dtype = 'int32' )
    print ( sequence_input[ 0 ] )
    topic_sequence_input = L.Input ( shape = (sentenceLength ,) , dtype = 'int32' )
    print ( topic_sequence_input.shape )
    embedding_layer = L.Embedding ( len ( word_index ) + 1 ,
                                    300 ,
                                    weights = [ embedding_matrix ] ,
                                    input_length = sentenceLength ,
                                    trainable = False )
    print ( embedding_layer )
    topic_embedding_layer = L.Embedding ( len ( word_index ) + 1 ,
                                          300 ,
                                          weights = [ embedding_matrix ] ,
                                          input_length = sentenceLength ,
                                          trainable = False
                                    )
    x = embedding_layer ( sequence_input )
    topic_x = topic_embedding_layer ( topic_sequence_input )

    topic_mean_x=Lambda (topic_mean,output_shape = topic_mean_output_shape)(topic_x)


    distance = Lambda ( cosine_distance , output_shape = cos_dist_output_shape ) ( [ x , topic_mean_x ])

    x = concatenate ( [ x , distance ] )


    #dropout = 0.15 , recurrent_dropout = 0.15 )
    x = Bidirectional ( L.CuDNNLSTM  ( 96 , return_sequences = True )  ) ( x )
    x = Conv1D ( 64 , kernel_size = 3 , padding = "valid" , kernel_initializer = "glorot_uniform" ) ( x )
    avg_pool = GlobalAveragePooling1D ( ) ( x )
    max_pool = GlobalMaxPooling1D ( ) ( x )
    x = concatenate ( [ avg_pool , max_pool ] )
    preds = Dense ( 3 , activation = "sigmoid" ) ( x )
    print ( preds.shape )
    model = Model ( inputs = [ sequence_input , topic_sequence_input ] , outputs = preds )
    model.compile ( loss = 'binary_crossentropy' , optimizer = Adam ( lr = 1e-3 ) , metrics = [ 'accuracy' ] )
    return model
Ejemplo n.º 19
0
def build_model():
    maxlen = 150
    embed_size = 300
    max_features = 100000
    inp = Input(shape=(maxlen, ))
    x = Embedding(max_features,
                  embed_size)(inp)  # maxlen=200 as defined earlier
    #dropout = 0.15 , recurrent_dropout = 0.15 )
    x = Bidirectional(L.CuDNNLSTM(96, return_sequences=True))(x)
    x = Conv1D(64,
               kernel_size=3,
               padding="valid",
               kernel_initializer="glorot_uniform")(x)
    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    x = concatenate([avg_pool, max_pool])
    preds = Dense(3, activation="sigmoid")(x)
    model = Model(inp, preds)
    model.compile(loss='binary_crossentropy',
                  optimizer=Adam(lr=1e-3),
                  metrics=['accuracy'])
    return model
def build_model(embedding_matrix, word_index, verbose=False, compile=True):
    logger.info('Build model')
    sequence_input = L.Input(shape=(MAX_LEN, ), dtype='int32')
    embedding_layer = L.Embedding(*embedding_matrix.shape,
                                  weights=[embedding_matrix],
                                  trainable=False)
    x = embedding_layer(sequence_input)
    x = L.SpatialDropout1D(0.2)(x)
    x = L.Bidirectional(L.CuDNNLSTM(LSTM_UNITS, return_sequences=True))(x)
    att = Attention(MAX_LEN)(x)
    avg_pool1 = L.GlobalAveragePooling1D()(x)
    max_pool1 = L.GlobalMaxPooling1D()(x)
    x = L.concatenate([att, avg_pool1, max_pool1])
    preds = L.Dense(1, activation='sigmoid')(x)
    model = Model(sequence_input, preds)
    if verbose:
        model.summary()
    if compile:
        model.compile(loss='binary_crossentropy',
                      optimizer=Adam(0.005),
                      metrics=['acc'])
    return model
Ejemplo n.º 21
0
                                dtype='int64',
                                name='prehist_tracks_input')
x2 = track_embed(prehist_tracks_input)
x2 = track_bn(x2)
x2 = track_transformer(x2)

topred_tracks_input = kl.Input(shape=(None, ),
                               dtype='int64',
                               name='topred_tracks_input')
x3 = track_embed(topred_tracks_input)
x3 = track_bn(x3)
x3 = track_transformer(x3)

x = kl.concatenate([x1, x2], axis=-1)
lstm1 = kl.Bidirectional(
    kl.CuDNNLSTM(64, return_sequences=False, return_state=False, name='lstm1'))
prehist_sc_1 = lstm1(x)

x = kl.concatenate([x2, x3], axis=1)
lstm2 = kl.Bidirectional(
    kl.CuDNNLSTM(64, return_sequences=False, return_state=False, name='lstm2'))
prehist_sc_2 = lstm2(x)

prehist_sc = kl.concatenate([prehist_sc_1, prehist_sc_2])


def repeat_vector(args):
    layer_to_repeat = args[0]
    sequence_layer = args[1]
    return kl.RepeatVector(K.shape(sequence_layer)[1])(layer_to_repeat)
Ejemplo n.º 22
0
written_train0 = written_train.reshape(written_train.shape[0], img_height,
                                       img_width, 1)
written_test0 = written_test.reshape(written_test.shape[0], img_height,
                                     img_width, 1)

# ### Model Building:
# We choose multi model approach with lstm and Cnn based models used for speak and image respectively. And concatenated the both model output then apply binary cross entropy loss

# In[5]:

# a single input layer
input1 = Input(shape=(max_len_speak_frames, speak_frame_feature))
# x1 =layers.LSTM(40, activation="relu", dropout=0.25, recurrent_dropout=0.25)(input1)

x1 = layers.CuDNNLSTM(50)(input1)
x1 = layers.BatchNormalization()(x1)
x1 = layers.Activation('relu')(x1)
x1 = layers.Dropout(0.2)(x1)

x1 = layers.Dense(256)(x1)
x1 = layers.BatchNormalization()(x1)
x1 = layers.Activation('relu')(x1)
x1 = layers.Dropout(0.2)(x1)
x1 = layers.Dense(128, activation="relu")(x1)

input2 = Input(shape=(img_height, img_width, 1))
x2 = layers.Conv2D(32, kernel_size=(3, 3))(input2)
x2 = layers.BatchNormalization()(x2)
x2 = layers.Activation('relu')(x2)
x2 = layers.Dropout(0.1)(x2)
# In[49]:

plot_train_validation_loss(history, 'Dense')

# ## CuDNNLSTM

# ### 학습하기(CuDNNLSTM)

# In[51]:

start_time = time.time()

model = Sequential()
model.add(
    layers.Flatten(input_shape=(lookback // step, weather_df_value.shape[-1])))
model.add(layers.CuDNNLSTM(32, input_shape=(None, weather_df_value.shape[-1])))
model.add(layers.Dense(1))

model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen,
                              steps_per_epoch=400,
                              epochs=20,
                              validation_data=val_gen,
                              validation_steps=val_steps)

print("--- %s seconds ---" % (time.time() - start_time))

# ### Train Loss, Validation Loss 그래프

# In[ ]:
Ejemplo n.º 24
0
def build_keras_model(word_embedding_dims, num_words_name, emb_matrix_name,
                      max_seq_len_name, num_words_item_desc,
                      emb_matrix_item_desc, max_seq_len_item_desc,
                      cat_embedding_dims, num_categories, num_brands):

    cond_input = kl.Input(shape=(1, ), name='cond_input')
    ship_input = kl.Input(shape=(1, ), name='ship_input')
    category_input = kl.Input(shape=(1, ), name='category_input')
    brand_input = kl.Input(shape=(1, ), name='brand_input')
    item_desc_input = kl.Input(shape=(max_seq_len_item_desc, ),
                               name='item_desc_input')
    name_input = kl.Input(shape=(max_seq_len_name, ), name='name_input')

    item_desc_embedding = kl.Embedding(num_words_item_desc,
                                       word_embedding_dims,
                                       weights=[emb_matrix_item_desc],
                                       trainable=True,
                                       name='item_desc_embedding')
    item_desc_embedding_dropout = kl.SpatialDropout1D(
        0.5, name='item_desc_embedding_dropout')
    item_desc_lstm_1 = kl.CuDNNLSTM(units=200,
                                    name='item_desc_lstm_1',
                                    return_sequences=True)
    item_desc_lstm_2 = kl.CuDNNLSTM(units=200, name='item_desc_lstm_2')
    item_desc_lstm_dropout = kl.Dropout(0.5, name='item_desc_lstm_dropout')

    name_embedding = kl.Embedding(num_words_name,
                                  word_embedding_dims,
                                  weights=[emb_matrix_name],
                                  trainable=True,
                                  name='name_embedding')
    name_embedding_dropout = kl.SpatialDropout1D(0.5,
                                                 name='name_embedding_dropout')
    name_lstm_1 = kl.CuDNNLSTM(units=100,
                               name='name_lstm_1',
                               return_sequences=True)
    name_lstm_2 = kl.CuDNNLSTM(units=100, name='name_lstm_2')
    name_lstm_dropout = kl.Dropout(0.5, name='name_lstm_dropout')

    category_embedding = kl.Embedding(num_categories,
                                      cat_embedding_dims,
                                      name='category_embedding')
    category_embedding_dropout = kl.Dropout(0.5,
                                            name='category_embedding_dropout')
    category_reshape = kl.Reshape(target_shape=(cat_embedding_dims, ),
                                  name='category_reshape')

    brand_embedding = kl.Embedding(num_brands,
                                   cat_embedding_dims,
                                   name='brand_embedding')
    brand_embedding_dropout = kl.Dropout(0.5, name='brand_embedding_dropout')
    brand_reshape = kl.Reshape(target_shape=(cat_embedding_dims, ),
                               name='brand_reshape')

    input_fusion = kl.Concatenate(axis=1, name='input_fusion')
    fusion_dense_1 = kl.Dense(400, activation='relu', name='fusion_dense_1')
    #    fusion_dropout_1 = kl.Dropout(0.1, name='fusion_dropout_1')
    fusion_dense_2 = kl.Dense(200, activation='relu', name='fusion_dense_2')
    fusion_dense_3 = kl.Dense(1, activation='relu', name='fusion_dense_3')

    item_desc_output = item_desc_embedding(item_desc_input)
    item_desc_output = item_desc_embedding_dropout(item_desc_output)
    item_desc_output = item_desc_lstm_1(item_desc_output)
    item_desc_output = item_desc_lstm_2(item_desc_output)
    item_desc_output = item_desc_lstm_dropout(item_desc_output)

    name_output = name_embedding(name_input)
    name_output = name_embedding_dropout(name_output)
    name_output = name_lstm_1(name_output)
    name_output = name_lstm_2(name_output)
    name_output = name_lstm_dropout(name_output)

    category_output = category_embedding(category_input)
    category_output = category_embedding_dropout(category_output)
    category_output = category_reshape(category_output)

    brand_output = brand_embedding(brand_input)
    brand_output = brand_embedding_dropout(brand_output)
    brand_output = brand_reshape(brand_output)

    output = input_fusion([
        cond_input, ship_input, name_output, item_desc_output, category_output,
        brand_output
    ])
    output = fusion_dense_1(output)
    #    output = fusion_dropout_1(output)
    output = fusion_dense_2(output)
    prediction = fusion_dense_3(output)

    model = km.Model(inputs=[
        cond_input, ship_input, category_input, brand_input, name_input,
        item_desc_input
    ],
                     outputs=prediction)

    return model
Ejemplo n.º 25
0
test_data = all_data[467:584, 0:396900]
test_labels = all_data[467:584, 396900:396904, 0]

cnn = keras.models.Sequential()

cnn.add(
    layers.Conv1D(2,
                  kernel_size=(1),
                  strides=(1),
                  activation='relu',
                  input_shape=(396900, 1)))
cnn.add(layers.MaxPooling1D(pool_size=(2), strides=(2)))
cnn.add(layers.Conv1D(8, (1), activation='relu'))
cnn.add(layers.MaxPooling1D(pool_size=(2)))

cnn.add(layers.CuDNNLSTM(12, input_shape=(396900, 1)))
#cnn.add(layers.CuDNNGRU(12))
cnn.add(layers.Dense(4, activation='softmax'))
cnn.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['acc'])
history = cnn.fit(train_data,
                  train_labels,
                  epochs=5,
                  batch_size=4,
                  validation_data=(test_data, test_labels))

#results = models.evaluate(test_data, test_targets)
history_dict = history.history

epochs = range(1, 2 + 1)

train_loss = history_dict['loss']
Ejemplo n.º 26
0
    def compile_elmo(self):
        """
        Compiles a Language Model RNN based on the given parameters
        """
        if self.parameters.get('token_encoding') == 'word':
            # Train word embeddings from scratch
            word_inputs = layers.Input(shape=(None, ),
                                       name='word_indices',
                                       dtype='int32')
            embedding = layers.Embedding(
                input_dim=self.parameters.get('vocab_size'),
                output_dim=self.parameters.get('hidden_units_size'),
                trainable=True,
                name='token_encoding')
            inputs = embedding(word_inputs)

            # Token embeddings for Input
            drop_inputs = layers.SpatialDropout1D(
                self.parameters.get('dropout_rate'))(inputs)
            lstm_inputs = TimeStepDropout(
                self.parameters.get('word_dropout_rate'))(drop_inputs)

            # Pass outputs as inputs to apply sampled softmax
            next_ids = layers.Input(shape=(None, 1),
                                    name='next_ids',
                                    dtype='float32')
            previous_ids = layers.Input(shape=(None, 1),
                                        name='previous_ids',
                                        dtype='float32')
        elif self.parameters.get('token_encoding') == 'char':
            # Train character-level representation
            word_inputs = layers.Input(
                shape=(None, self.parameters.get('token_maxlen')),
                dtype='int32',
                name='char_indices')
            inputs = self.char_level_token_encoder()(word_inputs)

            # Token embeddings for Input
            drop_inputs = layers.SpatialDropout1D(
                self.parameters.get('dropout_rate'))(inputs)
            lstm_inputs = TimeStepDropout(
                self.parameters.get('word_dropout_rate'))(inputs)

            # Pass outputs as inputs to apply sampled softmax
            next_ids = layers.Input(shape=(None, 1),
                                    name='next_ids',
                                    dtype='float32')
            previous_ids = layers.Input(shape=(None, 1),
                                        name='previous_ids',
                                        dtype='float32')

        # Reversed input for backward LSTMs
        re_lstm_inputs = layers.Lambda(function=ELMo.reverse)(lstm_inputs)
        mask = layers.Lambda(function=ELMo.reverse)(drop_inputs)

        # Forward LSTMs
        for i in range(self.parameters.get('n_lstm_layers')):
            if self.parameters['cuDNN']:
                lstm = layers.CuDNNLSTM(
                    units=self.parameters.get('lstm_units_size'),
                    return_sequences=True,
                    kernel_constraint=constraints.MinMaxNorm(
                        -1 * self.parameters.get('cell_clip'),
                        self.parameters('cell_clip')),
                    recurrent_constraint=constraints.MinMaxNorm(
                        -1 * self.parameters.get('cell_clip'),
                        self.parameters.get('cell_clip')))(lstm_inputs)
            else:
                lstm = layers.LSTM(
                    units=self.parameters.get('lstm_units_size'),
                    return_sequences=True,
                    activation='tanh',
                    recurrent_activation='sigmoid',
                    kernel_constraint=constraints.MinMaxNorm(
                        -1 * self.parameters.get('cell_clip'),
                        self.parameters.get('cell_clip')),
                    recurrent_constraint=constraints.MinMaxNorm(
                        -1 * self.parameters.get('cell_clip'),
                        self.parameters.get('cell_clip')))(lstm_inputs)

            lstm = Camouflage(mask_value=0)(inputs=[lstm, drop_inputs])

            # Projection to hidden_units_size
            proj = layers.TimeDistributed(
                layers.Dense(self.parameters.get('hidden_units_size'),
                             activation='linear',
                             kernel_constraint=constraints.MinMaxNorm(
                                 -1 * self.parameters.get('proj_clip'),
                                 self.parameters.get('proj_clip'))))(lstm)

            # Merge Bi-LSTMs feature vectors with the previous ones
            lstm_inputs = layers.add([proj, lstm_inputs],
                                     name='f_block_{}'.format(i + 1))
            # Apply variational drop-out between BI-LSTM layers
            lstm_inputs = layers.SpatialDropout1D(
                self.parameters.get('dropout_rate'))(lstm_inputs)

        # Backward LSTMs
        for i in range(self.parameters.get('n_lstm_layers')):
            if self.parameters['cuDNN']:
                re_lstm = layers.CuDNNLSTM(
                    units=self.parameters.get('lstm_units_size'),
                    return_sequences=True,
                    kernel_constraint=constraints.MinMaxNorm(
                        -1 * self.parameters.get('cell_clip'),
                        self.parameters.get('cell_clip')),
                    recurrent_constraint=constraints.MinMaxNorm(
                        -1 * self.parameters('cell_clip'),
                        self.parameters.get('cell_clip')))(re_lstm_inputs)
            else:
                re_lstm = layers.LSTM(
                    units=self.parameters.get('lstm_units_size'),
                    return_sequences=True,
                    activation='tanh',
                    recurrent_activation='sigmoid',
                    kernel_constraint=constraints.MinMaxNorm(
                        -1 * self.parameters.get('cell_clip'),
                        self.parameters.get('cell_clip')),
                    recurrent_constraint=constraints.MinMaxNorm(
                        -1 * self.parameters.get('cell_clip'),
                        self.parameters.get('cell_clip')))(re_lstm_inputs)
            re_lstm = Camouflage(mask_value=0)(inputs=[re_lstm, mask])
            # Projection to hidden_units_size
            re_proj = layers.TimeDistributed(
                layers.Dense(self.parameters.get('hidden_units_size'),
                             activation='linear',
                             kernel_constraint=constraints.MinMaxNorm(
                                 -1 * self.parameters.get('proj_clip'),
                                 self.parameters.get('proj_clip'))))(re_lstm)
            # Merge Bi-LSTMs feature vectors with the previous ones
            re_lstm_inputs = layers.add([re_proj, re_lstm_inputs],
                                        name='b_block_{}'.format(i + 1))
            # Apply variational drop-out between BI-LSTM layers
            re_lstm_inputs = layers.SpatialDropout1D(
                self.parameters.get('dropout_rate'))(re_lstm_inputs)

        # Reverse backward LSTMs' outputs = Make it forward again
        re_lstm_inputs = layers.Lambda(function=ELMo.reverse,
                                       name='reverse')(re_lstm_inputs)

        # Project to Vocabulary with Sampled Softmax
        sampled_softmax = SampleSoftmax(
            num_classes=self.parameters.get('vocab_size'),
            num_sampled=int(self.parameters.get('num_sampled')),
            tied_to=embedding if self.parameters.get('weight_tying')
            and self.parameters.get('token_encoding') == 'word' else None)
        outputs = sampled_softmax([lstm_inputs, next_ids])
        re_outputs = sampled_softmax([re_lstm_inputs, previous_ids])

        self._model = models.Model(
            inputs=[word_inputs, next_ids, previous_ids],
            outputs=[outputs, re_outputs])
        self._model.compile(optimizer=optimizers.Adagrad(
            lr=self.parameters.get('lr'),
            clipvalue=self.parameters.get('clip_value')),
                            loss=None)
        print(self._model.summary())
Ejemplo n.º 27
0
    def gentext_diag(self, dirname='./sacredtexts', epochs=30, steps_per_epoch=200, genlength=30, maxbatch=1000,
                     epoch_keep=1, temp_keep=1):

        model = keras.models.Sequential()

        model.add(layers.TimeDistributed(layers.Dense(28),
                                         input_shape=(self.lookback, len(self.tokens_unique))))
        # model.add(layers.LeakyReLU(alpha=.001))
        model.add(layers.CuDNNLSTM(64, input_shape=(self.lookback, len(self.tokens_unique))))
        model.add(layers.Dropout(0.2, noise_shape=None, seed=None))
        model.add(layers.Dense(len(self.tokens_unique), activation='softmax'))

        optimizer = keras.optimizers.Adam(lr=0.01)
        model.compile(loss='categorical_crossentropy', optimizer=optimizer)

        # When generating, temperature = 0.5 seems to work best.
        def sample(preds, temperature):
            preds = np.asarray(preds).astype('float64')
            preds = np.log(preds) / temperature
            exp_preds = np.exp(preds)
            preds = exp_preds / np.sum(exp_preds)
            probas = np.random.multinomial(1, preds, 1)

            return np.argmax(probas)

        for epoch in range(1, epochs):
            print('epoch', epoch)

            # Fit the model for 1 epoch on the available data
            model.fit_generator(self.generator(dirname=dirname, batch_size=maxbatch),
                                steps_per_epoch=steps_per_epoch, epochs=1)

            # Select a text seed at random
            start_index = random.randint(0, len(self.tokens) - self.lookback - 1)

            generated_text = self.tokens[start_index: start_index + self.lookback]

            if self.kind == 'char':
                print('--- Generating with seed: "' + ''.join(generated_text) + '"')
            else:
                print('--- Generating with seed: "' + ' '.join(generated_text) + '"')

            for temperature in [0.5, 1.0]:
                print('------ temperature:', temperature)

                if self.kind == 'char':
                    sys.stdout.write(''.join(generated_text))
                else:
                    sys.stdout.write(' '.join(generated_text))

                for i in range(genlength):
                    sampled = np.zeros((1, self.lookback, len(self.tokens_unique)))
                    for t, token in enumerate(generated_text):
                        sampled[0, t, self.token_indices[token]] = 1.

                    preds = model.predict(sampled, verbose=0)[0]
                    next_index = sample(preds, temperature)
                    next_token = self.index_tokens[next_index]

                    generated_text.append(next_token)
                    generated_text = generated_text[1:]

                    if self.kind == 'char':
                        sys.stdout.write(next_token)
                    else:
                        sys.stdout.write(' ' + next_token)

                    sys.stdout.flush()

                if epoch == epoch_keep and temperature == temp_keep:
                    self.model = model

                print()
    def retain(ARGS):
        '''Create the model'''

        #Define the constant for model saving
        reshape_size = ARGS.emb_size + ARGS.numeric_size
        if ARGS.allow_negative:
            embeddings_constraint = FreezePadding()
            beta_activation = 'tanh'
            output_constraint = None
        else:
            embeddings_constraint = FreezePadding_Non_Negative()
            beta_activation = 'sigmoid'
            output_constraint = non_neg()

        #Get available gpus , returns empty list if none
        glist = get_available_gpus()

        def reshape(data):
            '''Reshape the context vectors to 3D vector'''
            return K.reshape(x=data, shape=(K.shape(data)[0], 1, reshape_size))

        #Code Input
        codes = L.Input((None, None), name='codes_input')
        inputs_list = [codes]
        #Calculate embedding for each code and sum them to a visit level
        codes_embs_total = L.Embedding(
            ARGS.num_codes + 1,
            ARGS.emb_size,
            name='embedding',
            embeddings_constraint=embeddings_constraint)(codes)
        codes_embs = L.Lambda(lambda x: K.sum(x, axis=2))(codes_embs_total)
        #Numeric input if needed
        if ARGS.numeric_size:
            numerics = L.Input((None, ARGS.numeric_size), name='numeric_input')
            inputs_list.append(numerics)
            full_embs = L.concatenate([codes_embs, numerics], name='catInp')
        else:
            full_embs = codes_embs

        #Apply dropout on inputs
        full_embs = L.Dropout(ARGS.dropout_input)(full_embs)

        #Time input if needed
        if ARGS.use_time:
            time = L.Input((None, 1), name='time_input')
            inputs_list.append(time)
            time_embs = L.concatenate([full_embs, time], name='catInp2')
        else:
            time_embs = full_embs

        #Setup Layers
        #This implementation uses Bidirectional LSTM instead of reverse order
        #    (see https://github.com/mp2893/retain/issues/3 for more details)

        #If training on GPU and Tensorflow use CuDNNLSTM for much faster training
        if glist:
            alpha = L.Bidirectional(L.CuDNNLSTM(ARGS.recurrent_size,
                                                return_sequences=True),
                                    name='alpha')
            beta = L.Bidirectional(L.CuDNNLSTM(ARGS.recurrent_size,
                                               return_sequences=True),
                                   name='beta')
        else:
            alpha = L.Bidirectional(L.LSTM(ARGS.recurrent_size,
                                           return_sequences=True,
                                           implementation=2),
                                    name='alpha')
            beta = L.Bidirectional(L.LSTM(ARGS.recurrent_size,
                                          return_sequences=True,
                                          implementation=2),
                                   name='beta')

        alpha_dense = L.Dense(1, kernel_regularizer=l2(ARGS.l2))
        beta_dense = L.Dense(ARGS.emb_size + ARGS.numeric_size,
                             activation=beta_activation,
                             kernel_regularizer=l2(ARGS.l2))

        #Compute alpha, visit attention
        alpha_out = alpha(time_embs)
        alpha_out = L.TimeDistributed(alpha_dense,
                                      name='alpha_dense_0')(alpha_out)
        alpha_out = L.Softmax(axis=1)(alpha_out)
        #Compute beta, codes attention
        beta_out = beta(time_embs)
        beta_out = L.TimeDistributed(beta_dense, name='beta_dense_0')(beta_out)
        #Compute context vector based on attentions and embeddings
        c_t = L.Multiply()([alpha_out, beta_out, full_embs])
        c_t = L.Lambda(lambda x: K.sum(x, axis=1))(c_t)
        #Reshape to 3d vector for consistency between Many to Many and Many to One implementations
        contexts = L.Lambda(reshape)(c_t)

        #Make a prediction
        contexts = L.Dropout(ARGS.dropout_context)(contexts)
        output_layer = L.Dense(1,
                               activation='sigmoid',
                               name='dOut',
                               kernel_regularizer=l2(ARGS.l2),
                               kernel_constraint=output_constraint)

        #TimeDistributed is used for consistency
        # between Many to Many and Many to One implementations
        output = L.TimeDistributed(output_layer,
                                   name='time_distributed_out')(contexts)
        #Define the model with appropriate inputs
        model = Model(inputs=inputs_list, outputs=[output])

        return model
Ejemplo n.º 29
0
def create_sample_rnn(input_shape: Tuple[int], num_classes):
    model = models.Sequential()
    model.add(layers.CuDNNLSTM(32, input_shape=input_shape))
    model.add(layers.Dense(num_classes, activation="softmax"))

    return model
Ejemplo n.º 30
0
	def build(self, mode, config):
		print("# Building LSTM %s model --------------- #\n" % (mode))

		# Model graph		
		# Bbox coord: [Batch, TS, 4]
		input_coord = KL.Input(batch_shape=(config.BATCH_SIZE, config.TIME_STEPS, config.MRCNNBBOX_SIZE))
		# Feature map: [Batch, TS, 1024]
		input_feat = KL.Input(batch_shape=(config.BATCH_SIZE, config.TIME_STEPS, config.FEATURE_SIZE))

		# LSTM
		if mode == 'training':
			# Separate
			x_coord = KL.CuDNNLSTM(
					units=config.L1_CELL_SIZE,
					return_sequences=True,
					stateful=False
					)(input_coord)
			x_feat = KL.CuDNNLSTM(
					units=config.L1_CELL_SIZE,
					return_sequences=True,
					stateful=False
					)(input_feat)
			x_coord = KL.TimeDistributed(KL.Dense(2048, activation='relu'))(x_coord)
			x_coord = KL.TimeDistributed(KL.Dense(1024, activation='relu'))(x_coord)
			x_feat = KL.TimeDistributed(KL.Dense(2048, activation='relu'))(x_feat)
			x_feat = KL.TimeDistributed(KL.Dense(1024, activation='relu'))(x_feat)
			x = KL.Concatenate()([x_coord, x_feat])
			delta = KL.TimeDistributed(KL.Dense(config.OUTPUT_SIZE, activation='tanh'))(x)
			out_bbox = KL.Add()([input_coord, delta])

		elif mode == 'inference':
			# Separate
			x_coord = KL.CuDNNLSTM(
					units=config.L1_CELL_SIZE,
					return_sequences=False,
					stateful=False
					)(input_coord)
			x_feat = KL.CuDNNLSTM(
					units=config.L1_CELL_SIZE,
					return_sequences=False,
					stateful=False
					)(input_feat)
			x_coord = KL.Dense(2048, activation='relu')(x_coord)
			x_coord = KL.Dense(1024, activation='relu')(x_coord)	
			x_feat = KL.Dense(2048, activation='relu')(x_feat)
			x_feat = KL.Dense(1024, activation='relu')(x_feat)
			x = KL.Concatenate()([x_coord, x_feat])
			delta = KL.Dense(config.OUTPUT_SIZE, activation='tanh')(x)
			last_coord = KL.Lambda(lambda x: x[:, config.TIME_STEPS-1, :], output_shape=(4, ))(input_coord)
			out_bbox = KL.Add()([last_coord, delta])

		# Create model
		model = KM.Model(inputs=[input_coord, input_feat],
						 outputs=out_bbox,
						 name='RRCNN')
		model.summary()
		if mode == 'training':
			# Initial loss function and optimizer
			adam = KO.Adam(config.LEARNING_RATE)
			model.compile(optimizer=adam, loss=smooth_l1_loss)
			plot_model(model, to_file='RRCNN.png', show_shapes=True, show_layer_names=False)

		return model