Beispiel #1
0
batch_size = 32 # バッチサイズ
vocab_size = 1000 # 扱う語彙の数
embedding_dim = 100 # 単語ベクトルの次元
seq_length1 = 20 # 質問の長さ
seq_length2 = 10 # 回答の長さ
lstm_units = 200 # LSTMの隠れ状態ベクトルの次元数
hidden_dim = lstm_units * 2 # 最終出力のベクトルの次元数

def abs_sub(x):
    return K.abs(x[0] - x[1])

embedding = Embedding(input_dim=vocab_size, output_dim=embedding_dim)

input1 = Input(shape=(seq_length1,))
embed1 = embedding(input1)
bilstm1 = Bidirectional(LSTM(lstm_units, return_sequences=True), merge_mode='concat')(embed1)
h1 = Dropout(0.2)(bilstm1)

input2 = Input(shape=(seq_length2,))
embed2 = embedding(input2)
bilstm2 = Bidirectional(LSTM(lstm_units, return_sequences=True), merge_mode='concat')(embed2)
h2 = Dropout(0.2)(bilstm2)

# 要素ごとの積を計算する
product = dot([h2, h1], axes=2) # サイズ:[バッチサイズ、回答の長さ、質問の長さ]
a = Activation('softmax')(product)
c = dot([a, h1], axes=[2, 1])
c_h2 = concatenate([c, h2], axis=2)
h = Dense(hidden_dim, activation='tanh')(c_h2)

mean_pooled_1 = AveragePooling1D(pool_size=seq_length1, strides=1, padding='valid')(h1)
Beispiel #2
0
    def train(params, checkpoint_directory, queue):
        # Hyper-parameters
        embedding_neuron = params['embedding_neuron']
        lstm_params = params['lstm']
        lstm_num_layer = lstm_params['layer']
        optimizer = params['optimizer']
        batch_size = params['batch_size']

        # Debug
        print("[Params]", params)

        # Initialize checkpoint directory
        tensorboard_directory = os.path.join(checkpoint_directory, "tensorboard")
        os.makedirs(checkpoint_directory)
        os.makedirs(tensorboard_directory)

        # Sequential model
        model = Sequential()

        # Embedding layer
        model.add(Embedding(constant.NUM_CHARS, embedding_neuron,
                            input_length=num_step))

        for i in range(lstm_num_layer):
            neuron = lstm_params['neuron'][i]
            dropout_rate = lstm_params['dropout'][i]

            # LSTM layer
            lstm = LSTM(neuron, return_sequences=True, unroll=True,
                        dropout=dropout_rate, recurrent_dropout=dropout_rate)

            # Bidirectional LSTM
            bi_lstm = Bidirectional(lstm)
            model.add(bi_lstm)

            # LSTM dropout
            model.add(Dropout(dropout_rate))

        # RNN
        model.add(TimeDistributed(Dense(constant.NUM_TAGS, activation="softmax"),
                                  input_shape=(num_step, lstm_params['neuron'][-1])))

        # Compile
        model.compile(loss="categorical_crossentropy", optimizer=optimizer,
                      metrics=["categorical_accuracy"])

        # Save model architecture to file
        with open(os.path.join(checkpoint_directory, "model.json"), "w") as file:
            file.write(model.to_json())

        # Save model config to file
        with open(os.path.join(checkpoint_directory, "model_config.txt"), "w") as file:
            pprint(model.get_config(), stream=file)

        # Display model summary before train
        model.summary()

        # Callback
        params = DottableDict({
            "es_enable": False,
            "es_min_delta": 0,
            "es_patience": 0
        })
        path = DottableDict({
            "checkpoint": checkpoint_directory,
            "tensorboard": tensorboard_directory,
            "loss_log": os.path.join(checkpoint_directory, "loss.csv"),
            "score_log": os.path.join(checkpoint_directory, "score.csv")
        })
        callbacks = CustomCallback(params, path).callbacks

        # Train
        model.fit(x_train, y_train, validation_data=(x_test, y_test),
                  epochs=epochs, batch_size=batch_size, verbose=2,
                  callbacks=callbacks, shuffle=shuffle)

        # Evaluate
        _, accuracy = model.evaluate(x_test, y_test, verbose=0)

        # Debug
        print("[Validation] categorical_accuracy:", accuracy)
        print("")

        # Put accuracy to queue
        queue.put(accuracy)
                    images = []
                    count = 0

embedding_size = 300
image_model = Sequential([
        Dense(embedding_size, input_shape=(2048,), activation='relu'),
        RepeatVector(max_len)
    ])
caption_model = Sequential([
        Embedding(vocab_size, embedding_size, input_length=max_len),
        LSTM(256, return_sequences=True),
        TimeDistributed(Dense(300))
    ])
final_model = Sequential([
        Merge([image_model, caption_model], mode='concat', concat_axis=1),
        Bidirectional(LSTM(256, return_sequences=False)),
        Dense(vocab_size),
        Activation('softmax')
    ])
final_model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])
print(final_model.summary())


final_model.load_weights('E:\\PycharmProjects\\image captioningg\\Image-Captioning-master11\\weights\\time_inceptionV3_1.5987_loss.h5')


def predict_captions(image):
    start_word = ["<start>"]
    while True:
        par_caps = [word2idx[i] for i in start_word]
        par_caps = sequence.pad_sequences([par_caps], maxlen=max_len, padding='post')
Beispiel #4
0
def create_model(params, computed_params):
    net_arch = params['net_arch']
    logging.info('Constructing neural net: {}...'.format(net_arch))

    max_inputseq_len = computed_params['max_inputseq_len']
    word_dims = computed_params['word_dims']
    max_nb_premises = computed_params['max_nb_premises']

    inputs = []
    input_question = Input(shape=(max_inputseq_len, word_dims,), dtype='float32', name='question')
    inputs.append(input_question)

    for ipremise in range(max_nb_premises):
        input_premise = Input(shape=(max_inputseq_len, word_dims,), dtype='float32', name='premise{}'.format(ipremise))
        inputs.append(input_premise)

    input_word = Input(shape=(word_dims,), dtype='float32', name='word')

    layers = []
    encoder_size = 0

    if net_arch == 'lstm':
        rnn_size = params['rnn_size']

        # Энкодер на базе LSTM, на выходе которого получаем вектор с упаковкой слов
        # предложения. Этот слой общий для всех входных предложений.
        shared_words_rnn = Bidirectional(recurrent.LSTM(rnn_size,
                                                        input_shape=(max_inputseq_len, word_dims),
                                                        return_sequences=False))

        for input in inputs:
            encoder_rnn = shared_words_rnn(input)
            layers.append(encoder_rnn)
            encoder_size += rnn_size*2
    elif net_arch == 'lstm(cnn)':
        rnn_size = params['rnn_size']
        nb_filters = params['nb_filters']
        max_kernel_size = params['max_kernel_size']

        for kernel_size in range(1, max_kernel_size+1):
            # сначала идут сверточные слои, образующие детекторы словосочетаний
            # и синтаксических конструкций
            conv = Conv1D(filters=nb_filters,
                          kernel_size=kernel_size,
                          padding='valid',
                          activation='relu',
                          strides=1,
                          name='shared_conv_{}'.format(kernel_size))

            lstm = recurrent.LSTM(rnn_size, return_sequences=False)

            for input in inputs:
                conv_layer1 = conv(input)

                if params['pooling'] == 'max':
                    pooling = keras.layers.MaxPooling1D()
                elif params['pooling'] == 'average':
                    pooling = keras.layers.AveragePooling1D()
                else:
                    raise NotImplementedError()

                conv_layer1 = pooling(conv_layer1)

                conv_layer1 = lstm(conv_layer1)
                layers.append(conv_layer1)
                encoder_size += rnn_size
    elif net_arch == 'cnn':
        nb_filters = params['nb_filters']
        max_kernel_size = params['max_kernel_size']

        for kernel_size in range(1, max_kernel_size+1):
            conv = Conv1D(filters=nb_filters,
                          kernel_size=kernel_size,
                          padding='valid',
                          activation='relu',
                          strides=1,
                          name='shared_conv_{}'.format(kernel_size))

            for input in inputs:
                conv_layer1 = conv(input)

                if params['pooling'] == 'max':
                    pooling = keras.layers.GlobalMaxPooling1D()
                elif params['pooling'] == 'average':
                    pooling = keras.layers.GlobalAveragePooling1D()
                else:
                    raise NotImplementedError()

                conv_layer1 = pooling(conv_layer1)
                layers.append(conv_layer1)
    else:
        raise NotImplementedError()

    layers.append(input_word)

    encoder_merged = keras.layers.concatenate(inputs=list(layers))
    decoder = encoder_merged

    if params['units1'] > 0:
        decoder = Dense(params['units1'], activation='relu')(decoder)

        if params['units2'] > 0:
            decoder = Dense(params['units2'], activation='relu')(decoder)

            if params['units3'] > 0:
                decoder = Dense(params['units3'], activation='relu')(decoder)

    output_dims = 2
    decoder = Dense(output_dims, activation='softmax', name='output')(decoder)

    inputs2 = list(itertools.chain(inputs, [input_word]))
    model = Model(inputs=inputs2, outputs=decoder)
    model.compile(loss='categorical_crossentropy', optimizer=params['optimizer'], metrics=['accuracy'])
    #model.summary()

    return model
Beispiel #5
0
    feats[:, :, dim_counter] = (feats[:, :, dim_counter] - np.mean(
        feats[:, :, dim_counter])) / np.std(feats[:, :, dim_counter])

train_ind, test_ind = train_test_split(range(len(labels_ind)), test_size=0.05)

feats_train = feats[train_ind, :, :]
labels_ind_train = labels_1hot[train_ind, :]

feats_test = feats[test_ind, :, :]
labels_ind_test = labels_1hot[test_ind, :]

model = Sequential()
model.add(
    Bidirectional(LSTM(LAYER_SIZE1,
                       dropout=0.2,
                       recurrent_dropout=0.2,
                       return_sequences=True),
                  input_shape=(frame_dim, vec_dim)))
model.add(
    Bidirectional(
        LSTM(LAYER_SIZE2,
             dropout=0.2,
             recurrent_dropout=0.2,
             return_sequences=False)))
model.add(Dense(out_dict_size, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])
Beispiel #6
0

trainx1, trainx2, trainy = GetXY(train_questions,
                                 train_entitys)  #(num_sample,max_len)
testx1, testx2, testy = GetXY(test_questions, test_entitys)
print(trainx1.shape)

#搭建模型
bert_model = load_trained_model_from_checkpoint(
    config_path, checkpoint_path, seq_len=None)  #这里预训练的bert模型被看待为一个keras层
for l in bert_model.layers:
    l.trainable = True
x1_in = Input(shape=(None, ))
x2_in = Input(shape=(None, ))
x = bert_model([x1_in, x2_in])  #(batch,step,feature)
x = Bidirectional(LSTM(512, return_sequences=True, recurrent_dropout=0.2))(x)
p = Dense(1, activation='sigmoid')(x)
model = Model([x1_in, x2_in], p)
model.compile(loss='binary_crossentropy',
              optimizer=Adam(1e-5),
              metrics=['accuracy'])
model.summary()

#训练模型
maxf = 0.0


def computeF(gold_entity, pre_entity):
    '''
    根据标注的实体位置和预测的实体位置,计算prf,完全匹配
    输入: Python-list  3D,值为每个实体的起始位置列表[begin,end]
Beispiel #7
0
 def build(input_shape=(32, None, 1),
           rnn_unit=256,
           num_classes=5991,
           max_string_len=10):
     input = Input(shape=input_shape, name='the_input')
     m = Conv2D(64,
                kernel_size=(3, 3),
                activation='relu',
                padding='same',
                name='conv1')(input)
     m = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(m)
     m = Conv2D(128,
                kernel_size=(3, 3),
                activation='relu',
                padding='same',
                name='conv2')(m)
     m = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool2')(m)
     m = Conv2D(256,
                kernel_size=(3, 3),
                activation='relu',
                padding='same',
                name='conv3')(m)
     m = Conv2D(256,
                kernel_size=(3, 3),
                activation='relu',
                padding='same',
                name='conv4')(m)
     m = MaxPooling2D(pool_size=(2, 1),
                      strides=(2, 1),
                      padding='valid',
                      name='pool3')(m)
     m = Conv2D(512,
                kernel_size=(3, 3),
                activation='relu',
                padding='same',
                name='conv5')(m)
     m = BatchNormalization(axis=3)(m)
     m = Conv2D(512,
                kernel_size=(3, 3),
                activation='relu',
                padding='same',
                name='conv6')(m)
     m = BatchNormalization(axis=3)(m)
     m = MaxPooling2D(pool_size=(2, 1),
                      strides=(2, 1),
                      padding='valid',
                      name='pool4')(m)
     m = Conv2D(512,
                kernel_size=(2, 2),
                activation='relu',
                padding='valid',
                name='conv7')(m)
     m = Permute((2, 1, 3), name='permute')(m)
     m = TimeDistributed(Flatten(), name='timedistrib')(m)
     m = Bidirectional(GRU(rnn_unit,
                           return_sequences=True,
                           implementation=2),
                       name='blstm1')(m)
     m = Bidirectional(GRU(rnn_unit,
                           return_sequences=True,
                           implementation=2),
                       name='blstm2')(m)
     y_pred = Dense(num_classes, name='blstm2_out', activation='softmax')(m)
     base_model = Model(inputs=input, outputs=y_pred)
     label = Input(name='label', shape=[max_string_len], dtype='int64')
     seq_length = Input(name='seq_length', shape=[1], dtype='int64')
     label_length = Input(name='label_length', shape=[1], dtype='int64')
     loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')(
         [label, y_pred, seq_length, label_length])
     model = Model(input=[input, label, seq_length, label_length],
                   output=[loss_out])
     model.summary()
     return base_model, model
    states_train = list(f_train.keys())[0]
    projed_rep_Ru_train = list(f_train[states_train])
    projed_rep_Ru_train = np.array(projed_rep_Ru_train)
    

    # Load Russian test projected data
    f_test = h5py.File("./output_adv_NoMT/projected_rep_%s_test_1k.hdf5"%lang, 'r')
    states_test = list(f_test.keys())[0]
    projed_rep_Ru_test = list(f_test[states_test])
    projed_rep_Ru_val = np.array(projed_rep_Ru_test)


    
    # Building model 
    myInput = Input(shape=(150,200))
    LSTM_Russian = Bidirectional(LSTM(100,return_sequences=False))(myInput)
    #LSTM_Russian=Bidirectional(LSTM(32, return_sequences=False))(LSTM_Russian)
    predictions = Dense(1, activation='sigmoid')(LSTM_Russian)
    model_Ru = Model(inputs=myInput, outputs=predictions)
    model_Ru.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    print(model_Ru.summary())

    print(len(projed_rep_Ru_train))
    print(len(y_train_Ru))
    class_weight = {0: 1.,1: 1}
    early_stopping = EarlyStopping(monitor='val_loss', patience=3)
    model_Ru.fit(projed_rep_Ru_train, y_train_Ru, epochs=10, batch_size=32,
        validation_data=[projed_rep_Ru_val, y_val_Ru],
        callbacks=[early_stopping],class_weight=class_weight)
    
    # Load Russian test projected data
Beispiel #9
0
def create_model(params, computed_params):
    logging.info('Constructing the NN model arch={}...'.format(
        params['net_arch']))
    max_wordseq_len = computed_params['max_wordseq_len']
    word_dims = computed_params['word_dims']

    input_words = Input(shape=(
        max_wordseq_len,
        word_dims,
    ),
                        dtype='float32',
                        name='input_words')

    # суммарный размер выходных тензоров в conv1, то есть это сумма размеров векторов
    # для всех слоев в списке conv1, если их смерджить.
    layers = []
    if params['net_arch'] == 'rnn':
        # энкодер на базе LSTM, на выходе которого получаем вектор с упаковкой слов предложения.
        rnn_size = params['rnn_size']
        words_rnn = Bidirectional(
            recurrent.LSTM(rnn_size,
                           input_shape=(max_wordseq_len, word_dims),
                           return_sequences=False))

        encoder_rnn = words_rnn(input_words)
        layers.append(encoder_rnn)
    elif params['net_arch'] == 'rnn(cnn)':
        rnn_size = params['rnn_size']
        nb_filters = params['nb_filters']
        max_kernel_size = params['max_kernel_size']

        for kernel_size in range(1, max_kernel_size + 1):
            # сначала идут сверточные слои, образующие детекторы словосочетаний
            # и синтаксических конструкций
            conv = Conv1D(filters=nb_filters,
                          kernel_size=kernel_size,
                          padding='valid',
                          activation='relu',
                          strides=1,
                          name='shared_conv_{}'.format(kernel_size))

            lstm = recurrent.LSTM(rnn_size, return_sequences=False)

            conv_layer1 = conv(input_words)

            if params['pooling'] == 'max':
                pooling = keras.layers.MaxPooling1D()
            elif params['pooling'] == 'average':
                pooling = keras.layers.AveragePooling1D()
            else:
                raise NotImplementedError()

            conv_layer1 = pooling(conv_layer1)

            conv_layer1 = lstm(conv_layer1)
            layers.append(conv_layer1)

    elif params['net_arch'] == 'cnn':
        nb_filters = params['nb_filters']
        max_kernel_size = params['max_kernel_size']

        for kernel_size in range(1, max_kernel_size + 1):
            conv = Conv1D(filters=nb_filters,
                          kernel_size=kernel_size,
                          padding='valid',
                          activation='relu',
                          strides=1,
                          name='shared_conv_{}'.format(kernel_size))

            conv_layer1 = conv(input_words)

            if params['pooling'] == 'max':
                pooling = keras.layers.GlobalMaxPooling1D()
            elif params['pooling'] == 'average':
                pooling = keras.layers.GlobalAveragePooling1D()
            else:
                raise NotImplementedError()

            conv_layer1 = pooling(conv_layer1)
            layers.append(conv_layer1)
    else:
        raise NotImplementedError()

    if len(layers) == 1:
        classif = layers[0]
    else:
        classif = keras.layers.concatenate(inputs=layers)

    if params['units1'] > 0:
        classif = Dense(units=params['units1'],
                        activation=params['activation1'])(classif)

    classif = Dense(units=2, activation='softmax', name='output')(classif)
    model = Model(inputs=input_words, outputs=classif)
    model.compile(loss='categorical_crossentropy',
                  optimizer=params['optimizer'],
                  metrics=['accuracy'])
    model.summary()
    return model
def trainging(storage,
              exp,
              sampleweights,
              char_x,
              pos_x,
              unicate_x,
              trainy_interval,
              trainy_operator_ex,
              trainy_operator_im,
              char_x_cv,
              pos_x_cv,
              unicate_x_cv,
              cv_y_interval,
              cv_y_operator_ex,
              cv_y_operator_im,
              batchsize,
              epoch_size,
              n_char,
              n_pos,
              n_unicate,
              n_vocab,
              reload=False,
              modelpath=None,
              embedding_size_char=64,
              embedding_size_pos=48,
              embedding_size_unicate=32,
              embedding_size_vocab=32,
              gru_size1=128,
              gru_size2=160):

    seq_length = char_x.shape[1]
    type_size_interval = trainy_interval.shape[-1]
    type_size_operator_ex = trainy_operator_ex.shape[-1]
    type_size_operator_im = trainy_operator_im.shape[-1]

    if not os.path.exists(storage):
        os.makedirs(storage)

    CharEmbedding = Embedding(output_dim=embedding_size_char,
                              input_dim=n_char,
                              input_length=seq_length,
                              embeddings_regularizer=l2(.01),
                              mask_zero=True)

    PosEmbedding = Embedding(output_dim=embedding_size_pos,
                             input_dim=n_pos,
                             input_length=seq_length,
                             embeddings_regularizer=l2(.01),
                             mask_zero=True)

    UnicateEmbedding = Embedding(output_dim=embedding_size_unicate,
                                 input_dim=n_unicate,
                                 input_length=seq_length,
                                 embeddings_regularizer=l2(.01),
                                 mask_zero=True)

    Gru_out_1 = Bidirectional(
        GRU(gru_size1,
            return_sequences=True,
            input_shape=(seq_length, embedding_size_char + embedding_size_pos +
                         embedding_size_unicate)))

    Gru_out_2 = GRU(gru_size2, return_sequences=True)

    Interval_output = Dense(type_size_interval,
                            activation='softmax',
                            kernel_regularizer=l2(.01),
                            name='dense_1')

    Gru_out_3 = Bidirectional(GRU(gru_size1, return_sequences=True))

    Gru_out_4 = GRU(gru_size2, return_sequences=True)
    #
    Explicit_operator = Dense(type_size_operator_ex,
                              activation='softmax',
                              kernel_regularizer=l2(.01),
                              name='dense_2')

    Gru_out_5 = Bidirectional(GRU(gru_size1, return_sequences=True))

    Gru_out_6 = GRU(gru_size2, return_sequences=True)

    Implicit_operator = Dense(type_size_operator_im,
                              activation='softmax',
                              kernel_regularizer=l2(.01),
                              name='dense_3')

    char_input = Input(shape=(seq_length, ), dtype='int8', name='character')

    pos_input = Input(shape=(seq_length, ), dtype='int8', name='pos')

    unicate_input = Input(shape=(seq_length, ), dtype='int8', name='unicate')

    char_em = Dropout(0.25)(CharEmbedding(char_input))

    pos_em = Dropout(0.15)(PosEmbedding(pos_input))

    unicate_em = Dropout(0.15)(UnicateEmbedding(unicate_input))

    merged = keras.layers.concatenate([char_em, pos_em, unicate_em], axis=-1)

    gru_out1 = Gru_out_1(merged)
    gru_out2 = Gru_out_2(gru_out1)
    interval_output = Interval_output(gru_out2)

    gru_out3 = Gru_out_3(merged)
    gru_out4 = Gru_out_4(gru_out3)
    explicit_operator = Explicit_operator(gru_out4)

    gru_out5 = Gru_out_5(merged)
    gru_out6 = Gru_out_6(gru_out5)
    implicit_operator = Implicit_operator(gru_out6)

    model = Model(
        inputs=[char_input, pos_input, unicate_input],
        outputs=[interval_output, explicit_operator, implicit_operator])

    model.compile(optimizer='sgd',
                  loss={
                      'dense_1': 'categorical_crossentropy',
                      'dense_2': 'categorical_crossentropy',
                      'dense_3': 'categorical_crossentropy'
                  },
                  loss_weights={
                      'dense_1': 1.0,
                      'dense_2': 0.75,
                      'dense_3': 0.5
                  },
                  metrics=['categorical_accuracy'],
                  sample_weight_mode="temporal")

    print(model.summary())

    filepath = storage + "/weights-improvement-{epoch:02d}.hdf5"
    checkpoint = ModelCheckpoint(filepath, verbose=1, save_best_only=False)
    csv_logger = CSVLogger('training_%s.csv' % exp)

    callbacks_list = [checkpoint, csv_logger]

    hist = model.fit(x={
        'character': char_x,
        'pos': pos_x,
        'unicate': unicate_x
    },
                     y={
                         'dense_1': trainy_interval,
                         'dense_2': trainy_operator_ex,
                         'dense_3': trainy_operator_im
                     },
                     epochs=epoch_size,
                     batch_size=batchsize,
                     callbacks=callbacks_list,
                     validation_data=({
                         'character': char_x_cv,
                         'pos': pos_x_cv,
                         'unicate': unicate_x_cv
                     }, {
                         'dense_1': cv_y_interval,
                         'dense_2': cv_y_operator_ex,
                         'dense_3': cv_y_operator_im
                     }),
                     sample_weight=sampleweights)
    model.save(storage + '/model_result.hdf5')
    np.save(storage + '/epoch_history.npy', hist.history)
Beispiel #11
0
fd = {'shifted': shifted, 'lr': learning_rate, 'emdim': chord_embedding_dim, 'opt': optimizer,
'bi': bidirectional, 'lstms': lstm_size, 'trainsize': train_set_size, 'testsize': test_set_size}
model_name = 'Shifted_%(shifted)s_Lr_%(lr)s_EmDim_%(emdim)s_opt_%(opt)s_bi_%(bi)s_lstmsize_%(lstms)s_trainsize_%(trainsize)s_testsize_%(testsize)s' % fd

model_path = model_path + model_name + '/'
if not os.path.exists(model_path):
    os.makedirs(model_path) 



print('loading data...')
train_set, test_set = data_class.get_chord_train_and_test_set(train_set_size, test_set_size)
print('creating model...')
model = Sequential()
model.add(Embedding(num_chords, chord_embedding_dim, batch_size=1, input_length=1))
if bidirectional: model.add(Bidirectional(LSTM(lstm_size, stateful=True)))
else: model.add(LSTM(lstm_size, stateful=True))
model.add(Dense(num_chords))
model.add(Activation('softmax'))
if optimizer == 'Adam':
    optimizer = Adam(lr=learning_rate)
elif optimizer == 'RMS':
    optimizer = RMSprop(lr=learning_rate)
loss = 'categorical_crossentropy'
model.compile(optimizer, loss)


total_test_loss_array = [] 
total_train_loss_array = []
total_test_loss = 0
Beispiel #12
0
def create_model(args, initial_mean_value, overal_maxlen, vocab):

    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    if args.dropout_w > 0:
        dropout_W = args.dropout_w
    else:
        dropout_W = args.dropout_prob  # default=0.5
    if args.dropout_u > 0:
        dropout_U = args.dropout_u
    else:
        dropout_U = args.dropout_prob  # default=0.1

    cnn_border_mode = 'same'

    if args.model_type == 'reg':
        if initial_mean_value.ndim == 0:
            initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
        num_outputs = len(initial_mean_value)
    else:
        num_outputs = initial_mean_value

    ###############################################################################################################################
    ## Initialize embeddings if requested
    #

    if args.emb_path:

        def my_init(shape, name=None):
            from nea.w2vEmbReader import W2VEmbReader as EmbReader
            logger.info('Initializing lookup table')
            emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
            emb_matrix = np.random.random(shape)
            # 			logger.info(' initial matrix \n %s ' % (emb_matrix,))
            emb_matrix = emb_reader.get_emb_matrix_given_vocab(
                vocab, emb_matrix)
            # 			from keras.backend import set_value, get_value
            # 			set_value(model.layers[model.emb_index].W, get_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W)))
            # 			model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value()))
            # 			logger.info(' pre-trained matrix \n %s ' % (emb_matrix,))
            return K.variable(emb_matrix, name=name)

        logger.info(' Use pre-trained embedding')
    else:
        my_init = 'uniform'
        logger.info(' Use default initializing embedding')

    ###############################################################################################################################
    ## Model Stacking
    #

    if args.model_type == 'cls':
        logger.info('Building a CLASSIFICATION model with POOLING')
        dense_activation = 'tanh'
        dense_init = 'glorot_normal'
        if args.loss == 'cnp':
            final_activation = 'softmax'
            final_init = 'glorot_uniform'
        elif args.loss == 'hng':
            final_activation = 'linear'
            final_init = 'glorot_uniform'
    elif args.model_type == 'reg':
        logger.info('Building a REGRESSION model with POOLING')
        dense_activation = 'tanh'
        dense_init = 'he_normal'
        if args.normalize:
            final_activation = 'sigmoid'
            final_init = 'he_normal'
        else:
            final_activation = 'relu'
            final_init = 'he_uniform'
    else:
        raise NotImplementedError

    sequence = Input(shape=(overal_maxlen, ), dtype='int32')
    x = Embedding(len(vocab),
                  args.emb_dim,
                  mask_zero=True,
                  init=my_init,
                  trainable=args.embd_train)(sequence)

    # Conv Layer
    if args.cnn_dim > 0:
        x = Conv1DWithMasking(nb_filter=args.cnn_dim,
                              filter_length=args.cnn_window_size,
                              border_mode=cnn_border_mode,
                              subsample_length=1)(x)

    # RNN Layer
    if args.rnn_dim > 0:
        rnn_layer = RNN(args.rnn_dim,
                        return_sequences=True,
                        consume_less=args.rnn_opt,
                        dropout_W=dropout_W,
                        dropout_U=dropout_U)
        if args.bi:
            rnn_layer = Bidirectional(rnn_layer)
        x = rnn_layer(x)
        if args.dropout_prob > 0:
            x = Dropout(args.dropout_prob)(x)

        # Stack 2 Layers
        if args.rnn_2l or args.rnn_3l:
            rnn_layer2 = RNN(args.rnn_dim,
                             return_sequences=True,
                             consume_less=args.rnn_opt,
                             dropout_W=dropout_W,
                             dropout_U=dropout_U)
            if args.bi:
                rnn_layer2 = Bidirectional(rnn_layer2)
            x = rnn_layer2(x)
            if args.dropout_prob > 0:
                x = Dropout(args.dropout_prob)(x)
            # Stack 3 Layers
            if args.rnn_3l:
                rnn_layer3 = RNN(args.rnn_dim,
                                 return_sequences=True,
                                 consume_less=args.rnn_opt,
                                 dropout_W=dropout_W,
                                 dropout_U=dropout_U)
                if args.bi:
                    rnn_layer3 = Bidirectional(rnn_layer3)
                x = rnn_layer3(x)
                if args.dropout_prob > 0:
                    x = Dropout(args.dropout_prob)(x)

    # Mean over Time
    if args.aggregation == 'mot':
        x = MeanOverTime(mask_zero=True)(x)
    elif args.aggregation == 'att':
        attention_rnn = RNN(args.rnn_dim,
                            return_sequences=False,
                            consume_less=args.rnn_opt,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U)
        attention_rnn = Attention(attention_rnn)
        x = attention_rnn(x)
    else:
        raise NotImplementedError

    # Augmented TF/IDF Layer
    if args.tfidf > 0:
        pca_input = Input(shape=(args.tfidf, ), dtype='float32')
        merged = merge([x, pca_input], mode='concat')
    else:
        merged = x

    # Augmented Numerical Features
    if args.features:
        ftr_input = Input(shape=(13, ), dtype='float32')
        merged = merge([merged, ftr_input], mode='concat')

    # Optional Dense Layer
    if args.dense > 0:
        if args.loss == 'hng':
            merged = DenseWithMasking(num_outputs,
                                      init=dense_init,
                                      W_regularizer=l2(0.001),
                                      activity_regularizer=l2(0.001))(merged)
        else:
            merged = DenseWithMasking(num_outputs, init=dense_init)(merged)
        if final_activation == 'relu' or final_activation == 'linear':
            merged = BatchNormalization()(merged)
        merged = Activation(dense_activation)(merged)
        if args.dropout_prob > 0:
            merged = Dropout(args.dropout_prob)(merged)

    # Final Prediction Layer
    if args.loss == 'hng':
        merged = DenseWithMasking(num_outputs,
                                  init=final_init,
                                  W_regularizer=l2(0.001),
                                  activity_regularizer=l2(0.001))(merged)
    else:
        merged = DenseWithMasking(num_outputs, init=final_init)(merged)
    if final_activation == 'relu' or final_activation == 'linear':
        merged = BatchNormalization()(merged)
    predictions = Activation(final_activation)(merged)

    # Model Input/Output
    model_input = [
        sequence,
    ]
    if args.tfidf > 0:
        model_input.append(pca_input)
    if args.features:
        model_input.append(ftr_input)

    model = Model(input=model_input, output=predictions)

    logger.info('  Model Done')
    return model
Beispiel #13
0
    def build(self):
        if K.image_data_format() == 'channels_first':
            input_shape = (self.img_c, self.frames_n, self.img_w, self.img_h)
        else:
            input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c)

        self.input_data = Input(name='the_input',
                                shape=input_shape,
                                dtype='float32')

        self.zero1 = ZeroPadding3D(padding=(1, 2, 2),
                                   name='zero1')(self.input_data)
        self.conv1 = Conv3D(32, (3, 5, 5),
                            strides=(1, 2, 2),
                            kernel_initializer='he_normal',
                            name='conv1')(self.zero1)
        self.batc1 = BatchNormalization(name='batc1')(self.conv1)
        self.actv1 = Activation('relu', name='actv1')(self.batc1)
        self.drop1 = SpatialDropout3D(0.5)(self.actv1)
        self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max1')(self.drop1)

        self.zero2 = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(self.maxp1)
        self.conv2 = Conv3D(64, (3, 5, 5),
                            strides=(1, 1, 1),
                            kernel_initializer='he_normal',
                            name='conv2')(self.zero2)
        self.batc2 = BatchNormalization(name='batc2')(self.conv2)
        self.actv2 = Activation('relu', name='actv2')(self.batc2)
        self.drop2 = SpatialDropout3D(0.5)(self.actv2)
        self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max2')(self.drop2)

        self.zero3 = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(self.maxp2)
        self.conv3 = Conv3D(96, (3, 3, 3),
                            strides=(1, 1, 1),
                            kernel_initializer='he_normal',
                            name='conv3')(self.zero3)
        self.batc3 = BatchNormalization(name='batc3')(self.conv3)
        self.actv3 = Activation('relu', name='actv3')(self.batc3)
        self.drop3 = SpatialDropout3D(0.5)(self.actv3)
        self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max3')(self.drop3)

        self.resh1 = TimeDistributed(Flatten())(self.maxp3)

        self.lstm_1 = Bidirectional(LSTM(256,
                                         return_sequences=True,
                                         kernel_initializer='Orthogonal',
                                         name='lstm1'),
                                    merge_mode='concat')(self.resh1)
        self.lstm_2 = Bidirectional(LSTM(256,
                                         return_sequences=True,
                                         kernel_initializer='Orthogonal',
                                         name='lstm2'),
                                    merge_mode='concat')(self.lstm_1)

        # transforms RNN output to character activations:
        self.dense1 = Dense(self.output_size,
                            kernel_initializer='he_normal',
                            name='dense1')(self.lstm_2)

        self.y_pred = Activation('softmax', name='softmax')(self.dense1)

        self.labels = Input(name='the_labels',
                            shape=[self.absolute_max_string_len],
                            dtype='float32')
        self.input_length = Input(name='input_length',
                                  shape=[1],
                                  dtype='int64')
        self.label_length = Input(name='label_length',
                                  shape=[1],
                                  dtype='int64')

        self.loss_out = CTC(
            'ctc',
            [self.y_pred, self.labels, self.input_length, self.label_length])

        self.model = Model(inputs=[
            self.input_data, self.labels, self.input_length, self.label_length
        ],
                           outputs=self.loss_out)
Beispiel #14
0
def build_model(embedding_layer):  #, params: Params):

    question_input = layers.Input(
        shape=(MAX_SEQUENCE_LENGTH, ),
        dtype='int32')  # * 2 since doubling the question and passage
    answer_input = layers.Input(
        shape=(MAX_SEQUENCE_LENGTH, ),
        dtype='int32')  # * 2 since doubling the question and passage

    question_embedding = embedding_layer(question_input)
    answer_embedding = embedding_layer(answer_input)

    # Min's model has some highway layers here, with relu activations.  Note that highway
    # layers don't change the tensor's shape.  We need to have two different `TimeDistributed`
    # layers instantiated here, because Keras doesn't like it if a single `TimeDistributed`
    # layer gets applied to two inputs with different numbers of time steps.
    highway_layers = 2
    for i in range(highway_layers):
        highway_layer = highway.Highway(activation='relu',
                                        name='highway_{}'.format(i))
        question_layer = layers.TimeDistributed(highway_layer,
                                                name=highway_layer.name +
                                                "_qtd",
                                                trainable=False)
        question_embedding = question_layer(question_embedding)
        passage_layer = layers.TimeDistributed(highway_layer,
                                               name=highway_layer.name +
                                               "_ptd",
                                               trainable=False)
        answer_embedding = passage_layer(answer_embedding)

    # Then we pass the question and passage through a seq2seq encoder (like a biLSTM).  This
    # essentially pushes phrase-level information into the embeddings of each word.
    phrase_layer = Bidirectional(
        layers.GRU(return_sequences=True,
                   units=500,
                   activation='relu',
                   recurrent_dropout=0.2,
                   dropout=0.3,
                   trainable=False)
    )  #, **(params["encoder_params"]), **(params["wrapper_params"])))

    # Shape: (batch_size, num_question_words, embedding_dim * 2)
    encoded_question = phrase_layer(question_embedding)

    # Shape: (batch_size, num_passage_words, embedding_dim * 2)
    encoded_answer = phrase_layer(answer_embedding)

    # PART 2:
    # Now we compute a similarity between the passage words and the question words, and
    # normalize the matrix in a couple of different ways for input into some more layers.
    matrix_attention_layer = MatrixAttention(
        similarity_function={
            'type': 'linear',
            'combination': 'x,y,x*y'
        },
        name='passage_question_similarity',
        trainable=False)

    # Shape: (batch_size, num_passage_words, num_question_words)
    answer_question_similarity = matrix_attention_layer(
        [encoded_answer, encoded_question])

    # Shape: (batch_size, num_passage_words, num_question_words), normalized over question
    # words for each passage word.
    answer_question_attention = MaskedSoftmax()(answer_question_similarity)
    # Shape: (batch_size, num_passage_words, embedding_dim * 2)
    weighted_sum_layer = WeightedSum(name="answer_question_vectors",
                                     use_masking=False,
                                     trainable=False)
    answer_question_vectors = weighted_sum_layer(
        [encoded_question, answer_question_attention])

    # Min's paper finds, for each document word, the most similar question word to it, and
    # computes a single attention over the whole document using these max similarities.
    # Shape: (batch_size, num_passage_words)
    question_answer_similarity = Max(axis=-1)(answer_question_similarity)
    # Shape: (batch_size, num_passage_words)
    question_answer_attention = MaskedSoftmax()(question_answer_similarity)
    # Shape: (batch_size, embedding_dim * 2)
    weighted_sum_layer = WeightedSum(name="question_passage_vector",
                                     use_masking=False,
                                     trainable=False)
    question_answer_vector = weighted_sum_layer(
        [encoded_answer, question_answer_attention])

    # Then he repeats this question/passage vector for every word in the passage, and uses it
    # as an additional input to the hidden layers above.
    repeat_layer = RepeatLike(axis=1, copy_from_axis=1)
    # Shape: (batch_size, num_passage_words, embedding_dim * 2)
    tiled_question_answer_vector = repeat_layer(
        [question_answer_vector, encoded_answer])

    # Shape: (batch_size, num_passage_words, embedding_dim * 8)
    complex_concat_layer = complex_concat.ComplexConcat(
        combination='1,2,1*2,1*3', name='final_merged_passage')
    final_merged_answer = complex_concat_layer([
        encoded_answer, answer_question_vectors, tiled_question_answer_vector
    ])

    # PART 3:
    # Having computed a combined representation of the document that includes attended question
    # vectors, we'll pass this through a few more bi-directional encoder layers, then predict
    # the span_begin word.  Hard to find a good name for this; Min calls this part of the
    # network the "modeling layer", so we'll call this the `modeled_passage`.
    modeled_answer = final_merged_answer
    for i in range(1):
        hidden_layer = Bidirectional(
            layers.GRU(
                return_sequences=True,
                units=300,
                activation='relu',
                recurrent_dropout=0.2,
                dropout=0.3,
            ))  #, **(params["encoder_params"]), **(params["wrapper_params"])))
        modeled_answer = hidden_layer(modeled_answer)

    #PART 4: BY HELEN
    #get the maximum for each word
    max_answer = Max(axis=-1)(modeled_answer)
    print("max answer shape", max_answer.shape)
    print("modeled_answer shape", modeled_answer.shape)

    preds = layers.Dense(1, activation='sigmoid',
                         name='prediction')(max_answer)

    print("pred shape", preds.shape)

    model = models.Model(inputs=[question_input, answer_input], outputs=preds)

    return model
Beispiel #15
0
 model.add(
     Embedding(
         numwords + 1,
         embedding,
         input_length=seq_len,
         mask_zero=True,
         embeddings_regularizer=regularizers.l2(l2_regularizer_embeddings),
         # embeddings_initializer=he_normal(seed=42)
     ))
 model.add(Dropout(0.2))
 # model.add(SpatialDropout1D(0.2))
 if nlayers == 1:
     model.add(
         Bidirectional(
             RNN(neurons,
                 implementation=impl,
                 recurrent_dropout=rdrop,
                 dropout=drop,
                 kernel_regularizer=regularizers.l2(l2_regularizer))))
 else:
     model.add(
         Bidirectional(
             RNN(neurons,
                 implementation=impl,
                 recurrent_dropout=rdrop,
                 dropout=drop,
                 return_sequences=True,
                 kernel_regularizer=regularizers.l2(l2_regularizer))))
     for i in range(1, nlayers - 1):
         model.add(
             Bidirectional(
                 RNN(neurons,
    batch_size = 250
    feat_dim = 512
    w = 7

    # build model
    print("Build model...")

    tweet = Input(shape=(maxlen, ), dtype='int32')  # input_1

    embedding = Embedding(input_dim=vocab_size,
                          output_dim=embedding_dim,
                          input_length=maxlen,
                          mask_zero=False)(tweet)

    lstm = Bidirectional(
        LSTM(embedding_dim,
             return_sequences=True,
             input_shape=(maxlen, embedding_dim)))(embedding)
    # lstm = LSTM(embedding_dim, return_sequences=True, i nput_shape=(maxlen, embedding_dim))(embedding)
    dropout = Dropout(0.5)(lstm)
    # img = Input(shape=(1, 14, 14,512))

    img = Input(shape=(7, 7, 512))  # input_2

    # -------Image Bcnn start
    cnn_out_a = img
    cnn_out_shape = img.shape
    cnn_out_a = Reshape(
        [cnn_out_shape[1] * cnn_out_shape[2], cnn_out_shape[-1]])(cnn_out_a)
    print("cnn_out_a.shape is:---------", cnn_out_a.shape)  # (,196,512)
    cnn_out_b = cnn_out_a
def embedding_cnn_glove(training_list, validation_list, test_list):

    tweets_train = list()
    score_train = list()
    total_dataset = list()
    for tweet in training_list:
        tweets_train.append(tweet.text)
        score_train.append(float(tweet.intensity))
        total_dataset.append(tweet.text)

    tweets_val = list()
    score_val = list()
    for tweet in validation_list:
        tweets_val.append(tweet.text)
        score_val.append(float(tweet.intensity))
        total_dataset.append(tweet.text)

    tweets_test = list()
    score_test = list()
    for tweet in test_list:
        tweets_test.append(tweet.text)
        score_test.append(float(tweet.intensity))
        total_dataset.append(tweet.text)

    t = Tokenizer()
    t.fit_on_texts(total_dataset)
    word_index = t.word_index
    print(t.document_count)
    vocab_size = len(t.word_counts)
    print(vocab_size)
    print(len(word_index))
    max_len = 50
    sequences_train = t.texts_to_sequences(tweets_train)
    # print (tweets_train[0])
    # print (sequences_train[0])
    # print (tweets_train[0][0:3])
    # print (word_index.get(tweets_train[0][0:3]))
    # print (word_index.get(sequences_train[0][0]))

    sequences_val = t.texts_to_sequences(tweets_val)
    sequences_test = t.texts_to_sequences(tweets_test)

    padded_train = pad_sequences(sequences_train,
                                 maxlen=max_len,
                                 padding='post')
    padded_val = pad_sequences(sequences_val, maxlen=max_len, padding='post')
    padded_test = pad_sequences(sequences_test, maxlen=max_len, padding='post')

    EMBEDDING_DIM = 100
    X = np.ones(
        (len(padded_train), max_len, EMBEDDING_DIM, 1), dtype=np.int64) * -1
    y = np.array(score_train)

    X_val = np.ones(
        (len(padded_val), max_len, EMBEDDING_DIM, 1), dtype=np.int64) * -1
    y_val = np.array(score_val)

    X_test = np.ones(
        (len(padded_test), max_len, EMBEDDING_DIM, 1), dtype=np.int64) * -1
    y_test = np.array(score_test)
    print(len(y_val))
    print(len(X_val))
    print(len(y_test))
    print(len(X_test))
    GLOVE_DIR = "./Data/glove.twitter.27B/glove.twitter.27B.100d.txt"
    embeddings_index = {}
    f = open(GLOVE_DIR)
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()
    print('Read Glove and Made Dict')
    embedding_matrix = np.zeros((vocab_size + 1, EMBEDDING_DIM))
    number_found = 0
    number_not_found = 0
    embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))
    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector
            number_found += 1
        else:
            print(word)
            number_not_found += 1

    print(number_found)
    print(number_not_found)

    for i in range(len(padded_train)):
        for j in range(max_len):
            X[i, j, :, 0] = embedding_matrix[padded_train[i][j]]

    for i in range(len(padded_val)):
        for j in range(max_len):
            X_val[i, j, :, 0] = embedding_matrix[padded_val[i][j]]

    for i in range(len(padded_test)):
        for j in range(max_len):
            X_test[i, j, :, 0] = embedding_matrix[padded_test[i][j]]

    conv_1 = Conv1D(64,
                    5,
                    activation='relu',
                    name='conv1',
                    input_shape=(max_len, ))
    conv_2 = Conv1D(32, 3, activation='relu', name='conv2')
    conv_3 = Conv1D(32, 3, activation='relu', name='conv3')
    #pooling layers
    pool_1 = AveragePooling1D(pool_size=3, strides=2, name='pool1')
    pool_2 = AveragePooling1D(pool_size=3, strides=2, name='pool2')
    pool_3 = MaxPooling1D(pool_size=3, strides=2, name='pool3')
    pool_4 = MaxPooling1D(pool_size=3, strides=2, name='pool4')

    #LSTM Layers
    lstm_1 = LSTM(256,
                  dropout=0.2,
                  recurrent_dropout=0.2,
                  name='lstm1',
                  return_sequences=True)
    lstm_2 = LSTM(128,
                  dropout=0.2,
                  recurrent_dropout=0.2,
                  name='lstm2',
                  return_sequences=True)
    lstm_3 = LSTM(64, dropout=0.2, recurrent_dropout=0.2, name='lstm3')
    lstm_4 = LSTM(32,
                  dropout=0.2,
                  recurrent_dropout=0.2,
                  name='lstm4',
                  return_sequences=True)
    #GRU Layers

    gru_1 = GRU(256,
                dropout=0.2,
                recurrent_dropout=0.2,
                name='gru1',
                return_sequences=True)
    gru_2 = GRU(128,
                dropout=0.2,
                recurrent_dropout=0.2,
                name='gru2',
                return_sequences=True)
    gru_3 = GRU(64, dropout=0.2, recurrent_dropout=0.2, name='gru3')

    #Bidirectional Layers

    bi_lstm_1 = Bidirectional(lstm_1)
    bi_lstm_2 = Bidirectional(lstm_2)
    bi_lstm_3 = Bidirectional(lstm_3)
    bi_lstm_4 = Bidirectional(lstm_4)

    #Dense layers
    dense_1 = Dense(200, activation='relu', name='dense1')
    dense_2 = Dense(1, activation='sigmoid', name='dense2')

    def get_model():
        model = Sequential()
        model.add(conv_1)
        model.add(Dropout(0.3))
        model.add(pool_3)
        model.add(conv_2)
        model.add(Dropout(0.3))
        model.add(pool_3)
        # model.add(conv_3)
        # model.add(Dropout(0.3))
        # model.add(pool_3)

        model.add(Flatten())
        #model.add(Dense(200, activation='relu', name='dense3'))
        model.add(dense_1)
        model.add(dense_2)
        #compile the model
        model.compile(optimizer='adam', loss='mean_squared_error')
        # summarize the model
        print(model.summary())
        # fit the model
        return model

    estimator = KerasRegressor(build_fn=get_model,
                               epochs=50,
                               batch_size=32,
                               verbose=1)
    estimator.fit(X, y, validation_data=(X_val, y_val))
    train_prediction = estimator.predict(X)
    print(pearsonr(train_prediction, y))
    print(spearmanr(train_prediction, y))

    val_prediction = estimator.predict(X_val)
    print(pearsonr(val_prediction, y_val))
    print(spearmanr(val_prediction, y_val))

    test_prediction = estimator.predict(X_test)
    print(pearsonr(test_prediction, y_test))
    print(spearmanr(test_prediction, y_test))
Beispiel #18
0
def CRNN(input_shape,
         num_classes,
         prediction_only=False,
         gru=True,
         alpha=0.75,
         gamma=0.5):
    """CRNN architecture.
    
    # Arguments
        input_shape: Shape of the input image, (256, 32, 1).
        num_classes: Number of characters in alphabet, including CTC blank.
        
    # References
        https://arxiv.org/abs/1507.05717
    """
    #K.clear_session()

    act = LeakyReLU(alpha=0.05)
    #act = 'relu'

    x = image_input = Input(shape=input_shape, name='image_input')
    x = Conv2D(64, (3, 3),
               strides=(1, 1),
               activation=act,
               padding='same',
               name='conv1_1')(x)
    x = MaxPool2D(pool_size=(2, 2),
                  strides=(2, 2),
                  name='pool1',
                  padding='same')(x)
    x = Conv2D(128, (3, 3),
               strides=(1, 1),
               activation=act,
               padding='same',
               name='conv2_1')(x)
    x = MaxPool2D(pool_size=(2, 2),
                  strides=(2, 2),
                  name='pool2',
                  padding='same')(x)
    x = Conv2D(256, (3, 3),
               strides=(1, 1),
               activation=act,
               padding='same',
               name='conv3_1')(x)
    x = Conv2D(256, (3, 3),
               strides=(1, 1),
               activation=act,
               padding='same',
               name='conv3_2')(x)
    x = MaxPool2D(pool_size=(2, 2),
                  strides=(1, 2),
                  name='pool3',
                  padding='same')(x)
    x = Conv2D(512, (3, 3),
               strides=(1, 1),
               activation=act,
               padding='same',
               name='conv4_1')(x)
    x = BatchNormalization(name='batchnorm1')(x)
    x = Conv2D(512, (3, 3),
               strides=(1, 1),
               activation=act,
               padding='same',
               name='conv5_1')(x)
    x = BatchNormalization(name='batchnorm2')(x)
    x = MaxPool2D(pool_size=(2, 2),
                  strides=(1, 2),
                  name='pool5',
                  padding='valid')(x)
    x = Conv2D(512, (2, 2),
               strides=(1, 1),
               activation=act,
               padding='valid',
               name='conv6_1')(x)
    x = Reshape((-1, 512))(x)
    if gru:
        x = Bidirectional(
            GRU(256, dropout=0.1, recurrent_dropout=0.1,
                return_sequences=True))(x)
        x = Bidirectional(
            GRU(256, dropout=0.1, recurrent_dropout=0.1,
                return_sequences=True))(x)
    else:
        x = Bidirectional(
            LSTM(256,
                 return_sequences=True,
                 dropout=0.1,
                 recurrent_dropout=0.1,
                 name='lstm1'))(x)
        x = Bidirectional(
            LSTM(256,
                 return_sequences=True,
                 dropout=0.1,
                 recurrent_dropout=0.1,
                 name='lstm2'))(x)

    x = Dense(
        num_classes,
        #kernel_regularizer=regularizers.l2(0.01),
        #activity_regularizer=regularizers.l1(0.01),
        name='dense1')(x)
    #x = Dropout(0.1)(x)
    x = y_pred = Activation('softmax', name='softmax')(x)

    model_pred = Model(image_input, x)

    if prediction_only:
        return model_pred

    max_string_len = int(y_pred.shape[1])

    def focal_ctc_lambda_func(args):
        labels, y_pred, input_length, label_length = args
        ctc_loss = K.ctc_batch_cost(labels, y_pred, input_length, label_length)
        p = tf.exp(-ctc_loss)
        focal_ctc_loss = alpha * tf.pow((1 - p), gamma) * ctc_loss
        return focal_ctc_loss

    labels = Input(name='label_input', shape=[max_string_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')

    focal_ctc_loss = Lambda(focal_ctc_lambda_func,
                            output_shape=(1, ),
                            name='focal_ctc_loss')(
                                [labels, y_pred, input_length, label_length])

    model_train = Model(
        inputs=[image_input, labels, input_length, label_length],
        outputs=focal_ctc_loss)

    return model_train, model_pred
Beispiel #19
0
def create_model(X_vocab_len, X_max_len, y_vocab_len, y_max_len,
                 n_phonetic_features, y1, n1, y2, n2, y3, n3, y4, n4, y5, n5,
                 y6, n6, hidden_size, num_layers):
    def smart_merge(vectors, **kwargs):
        return vectors[0] if len(vectors) == 1 else merge(vectors, **kwargs)

    current_word = Input(shape=(X_max_len, ), dtype='float32',
                         name='input1')  # for encoder (shared)
    decoder_input = Input(shape=(X_max_len, ), dtype='float32',
                          name='input3')  # for decoder -- attention
    right_word1 = Input(shape=(X_max_len, ), dtype='float32', name='input4')
    right_word2 = Input(shape=(X_max_len, ), dtype='float32', name='input5')
    right_word3 = Input(shape=(X_max_len, ), dtype='float32', name='input6')
    right_word4 = Input(shape=(X_max_len, ), dtype='float32', name='input7')
    left_word1 = Input(shape=(X_max_len, ), dtype='float32', name='input8')
    left_word2 = Input(shape=(X_max_len, ), dtype='float32', name='input9')
    left_word3 = Input(shape=(X_max_len, ), dtype='float32', name='input10')
    left_word4 = Input(shape=(X_max_len, ), dtype='float32', name='input11')
    phonetic_input = Input(shape=(n_phonetic_features, ),
                           dtype='float32',
                           name='input12')

    emb_layer1 = Embedding(X_vocab_len,
                           EMBEDDING_DIM,
                           input_length=X_max_len,
                           mask_zero=False,
                           name='Embedding')

    list_of_inputs = [
        current_word, right_word1, right_word2, right_word3, right_word4,
        left_word1, left_word2, left_word3, left_word4
    ]

    current_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \
     left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4 = [emb_layer1(i) for i in list_of_inputs]

    print("Type:: ", type(current_word_embedding))
    list_of_embeddings1 = [current_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \
     left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4]

    list_of_embeddings = [
        Dropout(0.50, name='drop1_' + str(j))(i)
        for i, j in zip(list_of_embeddings1, range(len(list_of_embeddings1)))
    ]
    list_of_embeddings = [
        GaussianNoise(0.05, name='noise1_' + str(j))(i)
        for i, j in zip(list_of_embeddings, range(len(list_of_embeddings)))
    ]

    conv4_curr, conv4_right1, conv4_right2, conv4_right3, conv4_right4, conv4_left1, conv4_left2, conv4_left3, conv4_left4 =\
      [Conv1D(filters=no_filters,
       kernel_size=4, padding='valid',activation='relu',
       strides=1, name='conv4_'+str(j))(i) for i,j in zip(list_of_embeddings, range(len(list_of_embeddings)))]

    conv4s = [
        conv4_curr, conv4_right1, conv4_right2, conv4_right3, conv4_right4,
        conv4_left1, conv4_left2, conv4_left3, conv4_left4
    ]
    maxPool4 = [
        MaxPooling1D(name='max4_' + str(j))(i)
        for i, j in zip(conv4s, range(len(conv4s)))
    ]
    avgPool4 = [
        AveragePooling1D(name='avg4_' + str(j))(i)
        for i, j in zip(conv4s, range(len(conv4s)))
    ]

    pool4_curr, pool4_right1, pool4_right2, pool4_right3, pool4_right4, pool4_left1, pool4_left2, pool4_left3, pool4_left4 = \
     [merge([i,j], name='merge_conv4_'+str(k)) for i,j,k in zip(maxPool4, avgPool4, range(len(maxPool4)))]

    conv5_curr, conv5_right1, conv5_right2, conv5_right3, conv5_right4, conv5_left1, conv5_left2, conv5_left3, conv5_left4 = \
      [Conv1D(filters=no_filters,
       kernel_size=5,
       padding='valid',
       activation='relu',
       strides=1, name='conv5_'+str(j))(i) for i,j in zip(list_of_embeddings, range(len(list_of_embeddings)))]

    conv5s = [
        conv5_curr, conv5_right1, conv5_right2, conv5_right3, conv5_right4,
        conv5_left1, conv5_left2, conv5_left3, conv5_left4
    ]
    maxPool5 = [
        MaxPooling1D(name='max5_' + str(j))(i)
        for i, j in zip(conv5s, range(len(conv5s)))
    ]
    avgPool5 = [
        AveragePooling1D(name='avg5_' + str(j))(i)
        for i, j in zip(conv5s, range(len(conv5s)))
    ]

    pool5_curr, pool5_right1, pool5_right2, pool5_right3, pool5_right4, pool5_left1, pool5_left2, pool5_left3, pool5_left4 = \
     [merge([i,j], name='merge_conv5_'+str(k)) for i,j,k in zip(maxPool5, avgPool5, range(len(maxPool5)))]


    maxPools = [pool4_curr, pool4_right1, pool4_right2, pool4_right3, pool4_right4, \
     pool4_left1, pool4_left2, pool4_left3, pool4_left4, \
     pool5_curr, pool5_right1, pool5_right2, pool5_right3, pool5_right4, \
     pool5_left1, pool5_left2, pool5_left3, pool5_left4]

    concat = merge(maxPools, mode='concat', name='main_merge')

    x = Dropout(0.15, name='drop_single1')(concat)
    x = Bidirectional(RNN(rnn_output_size), name='bidirec1')(x)

    total_features = [x, phonetic_input]
    concat2 = merge(total_features, mode='concat', name='phonetic_merging')

    x = Dense(HIDDEN_DIM,
              activation='relu',
              kernel_initializer='he_normal',
              kernel_constraint=maxnorm(3),
              bias_constraint=maxnorm(3),
              name='dense1')(concat2)
    x = Dropout(0.15, name='drop_single2')(x)
    x = Dense(HIDDEN_DIM,
              kernel_initializer='he_normal',
              activation='tanh',
              kernel_constraint=maxnorm(3),
              bias_constraint=maxnorm(3),
              name='dense2')(x)
    x = Dropout(0.15, name='drop_single3')(x)

    out1 = Dense(n1,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output1')(x)
    out2 = Dense(n2,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output2')(x)
    out3 = Dense(n3,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output3')(x)
    out4 = Dense(n4,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output4')(x)
    out5 = Dense(n5,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output5')(x)
    out6 = Dense(n6,
                 kernel_initializer='he_normal',
                 activation='softmax',
                 name='output6')(x)

    # Luong et al. 2015 attention model
    emb_layer = Embedding(X_vocab_len,
                          EMBEDDING_DIM,
                          input_length=X_max_len,
                          mask_zero=True,
                          name='Embedding_for_seq2seq')

    current_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \
     left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4 = [emb_layer(i) for i in list_of_inputs]

    # current_word_embedding = smart_merge([ current_word_embedding, right_word_embedding1,  left_word_embedding1])

    encoder, state = GRU(rnn_output_size,
                         return_sequences=True,
                         unroll=True,
                         return_state=True,
                         name='encoder')(current_word_embedding)
    encoder_last = encoder[:, -1, :]

    decoder = emb_layer(decoder_input)
    decoder = GRU(rnn_output_size,
                  return_sequences=True,
                  unroll=True,
                  name='decoder')(decoder, initial_state=[encoder_last])

    attention = dot([decoder, encoder], axes=[2, 2], name='dot')
    attention = Activation('softmax', name='attention')(attention)

    context = dot([attention, encoder], axes=[2, 1], name='dot2')
    decoder_combined_context = concatenate([context, decoder],
                                           name='concatenate')

    outputs = TimeDistributed(Dense(64, activation='tanh'),
                              name='td1')(decoder_combined_context)
    outputs = TimeDistributed(Dense(X_vocab_len, activation='softmax'),
                              name='td2')(outputs)

    all_inputs = [current_word, decoder_input, right_word1, right_word2, right_word3, right_word4, left_word1, left_word2, left_word3,\
         left_word4, phonetic_input]
    all_outputs = [outputs, out1, out2, out3, out4, out5, out6]

    model = Model(input=all_inputs, output=all_outputs)
    opt = Adam()

    return model
Beispiel #20
0
    def __init__(self,
                 dim,
                 batch_norm,
                 dropout,
                 rec_dropout,
                 task,
                 target_repl=False,
                 deep_supervision=False,
                 num_classes=1,
                 depth=1,
                 input_dim=69,
                 **kwargs):

        print("==> not used params in network class:", kwargs.keys())

        self.dim = dim
        self.batch_norm = batch_norm
        self.dropout = dropout
        self.rec_dropout = rec_dropout
        self.depth = depth

        #sess = tf.InteractiveSession()
        if task in ['ihm']:
            # final_activation = 'softmax'
            final_activation = 'sigmod'
        elif task in ['los']:
            if num_classes == 1:
                final_activation = 'relu'
            else:
                final_activation = 'softmax'
        else:
            raise ValueError("Wrong value for task")

        # Input layers and masking
        X = Input(shape=(48, input_dim), name='X')
        inputs = [X]
        #mX = Masking()(X)

        # Configurations
        is_bidirectional = True

        # Main part of the network
        for i in range(depth - 1):
            num_units = dim
            if is_bidirectional:
                num_units = num_units // 2

            lstm = LSTM(units=num_units,
                        activation='tanh',
                        return_sequences=True,
                        recurrent_dropout=rec_dropout,
                        kernel_regularizer=regularizers.l2(0.01),
                        dropout=dropout)

            if is_bidirectional:
                X = Bidirectional(lstm)(X)
            else:
                X = lstm(X)

        # Output module of the network
        #return_sequences = (target_repl or deep_supervision)
        L = LSTM(units=dim,
                 activation='tanh',
                 return_sequences=True,
                 dropout=dropout,
                 kernel_regularizer=regularizers.l2(0.01),
                 recurrent_dropout=True)(X)

        A_L = AttentionLayer()(L)
        if dropout > 0:
            A_L = Dropout(dropout)(A_L)

        y = Dense(num_classes, activation=final_activation)(A_L)
        outputs = [y]

        super(Network, self).__init__(inputs=inputs, outputs=outputs)
Beispiel #21
0
def createHierarchicalAttentionModel(
        maxSeq,
        embWeights=None,
        embeddingSize=None,
        vocabSize=None,  #embedding
        recursiveClass=GRU,
        wordRnnSize=100,
        sentenceRnnSize=100,  #rnn 
        #wordDenseSize = 100, sentenceHiddenSize = 128, #dense
    dropWordEmb=0.2,
        dropWordRnnOut=0.2,
        dropSentenceRnnOut=0.5):
    '''
    Creates a model based on the Hierarchical Attention model according to : https://arxiv.org/abs/1606.02393
    inputs:
        maxSeq : max size for sentences
        embedding
            embWeights : numpy matrix with embedding values
            embeddingSize (if embWeights is None) : embedding size
            vocabSize (if embWeights is None) : vocabulary size
        Recursive Layers 
            recursiveClass : class for recursive class. Default is GRU
            wordRnnSize : RNN size for word sequence 
            sentenceRnnSize :  RNN size for sentence sequence
        Dense Layers
            wordDenseSize: dense layer at exit from RNN , on sentence at word level
            sentenceHiddenSize : dense layer at exit from RNN , on document at sentence level 
        Dropout
            
    returns : Two models. They are the same, but the second contains multiple outputs that can be use to analyse attention. 
    '''

    ##
    ## Sentence level logic
    wordsInputs = Input(shape=(maxSeq, ), dtype='int32', name='words_input')
    if embWeights is None:
        # , mask_zero=True
        emb = Embedding(vocabSize, embeddingSize)(wordsInputs)
    else:
        emb = Embedding(embWeights.shape[0],
                        embWeights.shape[1],
                        weights=[embWeights],
                        trainable=False)(wordsInputs)
    if dropWordEmb != 0.0:
        emb = Dropout(dropWordEmb)(emb)
    wordRnn = Bidirectional(recursiveClass(wordRnnSize, return_sequences=True),
                            merge_mode='concat')(emb)
    if dropWordRnnOut > 0.0:
        wordRnn = Dropout(dropWordRnnOut)(wordRnn)
    attention = AttentionLayer()(wordRnn)
    sentenceEmb = Lambda(lambda x: x[1] * x[0],
                         output_shape=lambda x: x[0])([wordRnn, attention])
    # sentenceEmb = Concatenate([wordRnn, attention], mode=lambda x:x[1]*x[0], output_shape=lambda x:x[0])
    sentenceEmb = Lambda(lambda x: K.sum(x, axis=1),
                         output_shape=lambda x: (x[0], x[2]))(sentenceEmb)
    modelSentence = Model(wordsInputs, sentenceEmb)
    modelSentAttention = Model(wordsInputs, attention)

    documentInputs = Input(shape=(None, maxSeq),
                           dtype='int32',
                           name='document_input')
    # sentenceMasking = Masking(mask_value=0)(documentInputs)
    sentenceEmbbeding = TimeDistributed(modelSentence)(documentInputs)
    sentenceAttention = TimeDistributed(modelSentAttention)(documentInputs)
    sentenceRnn = Bidirectional(recursiveClass(wordRnnSize,
                                               return_sequences=True),
                                merge_mode='concat')(sentenceEmbbeding)
    if dropSentenceRnnOut > 0.0:
        sentenceRnn = Dropout(dropSentenceRnnOut)(sentenceRnn)
    attentionSent = AttentionLayer()(sentenceRnn)

    documentEmb = multiply(inputs=[sentenceRnn, attentionSent])
    # documentEmb = Merge([sentenceRnn, attentionSent], mode=lambda x:x[1]*x[0], output_shape=lambda x:x[0])
    documentEmb = Lambda(lambda x: K.sum(x, axis=1),
                         output_shape=lambda x: (x[0], x[2]),
                         name="att2")(documentEmb)
    documentOut = Dense(1, activation="sigmoid",
                        name="documentOut")(documentEmb)

    model = Model(input=[documentInputs], output=[documentOut])
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    modelAttentionEv = Model(
        input=[documentInputs],
        output=[documentOut, sentenceAttention, attentionSent])
    modelAttentionEv.compile(loss='binary_crossentropy',
                             optimizer='rmsprop',
                             metrics=['accuracy'])
    return model, modelAttentionEv
    def __init__(self,
                 inputs=None,
                 outputs=None,
                 N=None,
                 M=None,
                 C=None,
                 word2vec_dim=None,
                 label_size=None,
                 embedding_matrix=None,
                 hdim=None,
                 dropout_rate=None,
                 output_type=None,
                 unroll=False,
                 **kwargs):
        # Load model from config
        if inputs is not None and outputs is not None:
            super(RNet, self).__init__(inputs=inputs,
                                       outputs=outputs,
                                       **kwargs)
            return
        '''Dimensions'''
        B = None
        H = hdim
        W = word2vec_dim

        v = SharedWeight(size=(H, 1), name='v')
        WQ_u = SharedWeight(size=(2 * H, H), name='WQ_u')
        WP_u = SharedWeight(size=(2 * H, H), name='WP_u')
        WP_v = SharedWeight(size=(H, H), name='WP_v')
        W_g1 = SharedWeight(size=(4 * H, 4 * H), name='W_g1')
        W_g2 = SharedWeight(size=(2 * H, 2 * H), name='W_g2')
        WP_h = SharedWeight(size=(2 * H, H), name='WP_h')
        Wa_h = SharedWeight(size=(2 * H, H), name='Wa_h')
        WQ_v = SharedWeight(size=(2 * H, H), name='WQ_v')
        WPP_v = SharedWeight(size=(H, H), name='WPP_v')
        VQ_r = SharedWeight(size=(H, H), name='VQ_r')
        shared_weights = [
            v, WQ_u, WP_u, WP_v, W_g1, W_g2, WP_h, Wa_h, WQ_v, WPP_v, VQ_r
        ]

        P_vecs = Input(shape=(N, ), name='P_vecs')
        P = Embedding(len(embedding_matrix),
                      W,
                      weights=[embedding_matrix],
                      trainable=False,
                      input_length=N)(P_vecs)

        Q_vecs = Input(shape=(M, ), name='Q_vecs')
        Q = Embedding(len(embedding_matrix),
                      W,
                      weights=[embedding_matrix],
                      trainable=False,
                      input_length=M)(Q_vecs)

        input_placeholders = [P_vecs, Q_vecs]

        uP = Masking()(P)
        for i in range(1):
            uP = Bidirectional(
                GRU(units=H,
                    return_sequences=True,
                    dropout=dropout_rate,
                    unroll=False))(uP)
        uP = VariationalDropout(rate=dropout_rate,
                                noise_shape=(None, 1, 2 * H),
                                name='uP')(uP)

        uQ = Masking()(Q)
        for i in range(1):
            uQ = Bidirectional(
                GRU(units=H,
                    return_sequences=True,
                    dropout=dropout_rate,
                    unroll=False))(uQ)
        uQ = VariationalDropout(rate=dropout_rate,
                                noise_shape=(None, 1, 2 * H),
                                name='uQ')(uQ)

        vP = QuestionAttnGRU(units=H, return_sequences=True, unroll=unroll)(
            [uP, uQ, WQ_u, WP_v, WP_u, v, W_g1])
        vP = VariationalDropout(rate=dropout_rate,
                                noise_shape=(None, 1, H),
                                name='vP')(vP)

        hP = Bidirectional(
            SelfAttnGRU(units=H, return_sequences=True,
                        unroll=unroll))([vP, vP, WP_v, WPP_v, v, W_g2])

        hP = VariationalDropout(rate=dropout_rate,
                                noise_shape=(None, 1, 2 * H),
                                name='hP')(hP)

        #         rQ = QuestionPooling() ([uQ, WQ_u, WQ_v, v, VQ_r])
        #         rQ = Dropout(rate=dropout_rate, name='rQ') (rQ)

        if output_type == "bi":
            gP = Bidirectional(
                GRU(units=H, return_sequences=True, unroll=unroll))(hP)
            preds = TimeDistributed(Dense(1, activation='sigmoid'))(gP)
        elif output_type == "multi":
            gP = Bidirectional(
                GRU(units=H, return_sequences=False, unroll=unroll))(hP)
            preds = Dense(label_size, activation='softmax')(gP)

        inputs = input_placeholders + shared_weights
        outputs = preds

        super(RNet, self).__init__(inputs=inputs, outputs=outputs, **kwargs)
Beispiel #23
0
    def __init__(self, params, mask_zero=True):
        # input words
        self.wds = tf.placeholder(tf.float32, [None, params['words']['dim']],
                                  name='words')
        # input pos
        self.pos = tf.placeholder(tf.float32, [None, params['pos']['dim']],
                                  name='pos')
        # output Y0
        self.Y0 = tf.placeholder(tf.float32, [None, params['Y0']['dim']],
                                 name='Y0')
        # output Y1
        self.Y1 = tf.placeholder(tf.float32, [None, params['Y1']['dim']],
                                 name='Y1')

        # 1.base layers: embedding
        wd_embedding = Embedding(output_dim=params['embed_size'],
                                 input_dim=params['voc_size'],
                                 input_length=params['words']['dim'],
                                 mask_zero=mask_zero,
                                 name='wd_embedding')(self.wds)
        # wd_embedding = BatchNormalization(momentum=0.9, name='wd_embedding_BN')(wd_embedding)

        pos_embedding = Embedding(output_dim=params['embed_size'],
                                  input_dim=params['pos_size'],
                                  input_length=params['pos']['dim'],
                                  mask_zero=mask_zero,
                                  name='pos_embedding')(self.pos)
        # pos_embedding = BatchNormalization(momentum=0.9, name='pos_embedding_BN')(pos_embeding)

        # 2. semantic layers: Bidirectional GRU
        wd_Bi_GRU = Bidirectional(GRU(
            params['words']['RNN']['cell'],
            dropout=params['words']['RNN']['drop_out'],
            recurrent_dropout=params['words']['RNN']['rnn_drop_out']),
                                  merge_mode='concat',
                                  name='word_Bi_GRU')(wd_embedding)
        if 'batch_norm' in params['words']['RNN']:
            wd_Bi_GRU = BatchNormalization(
                momentum=params['words']['RNN']['batch_norm'],
                name='word_Bi_GRU_BN')(wd_Bi_GRU)

        pos_Bi_GRU = Bidirectional(GRU(
            params['pos']['RNN']['cell'],
            dropout=params['pos']['RNN']['drop_out'],
            recurrent_dropout=params['pos']['RNN']['rnn_drop_out']),
                                   merge_mode='concat',
                                   name='word_Bi_GRU')(pos_embedding)
        if 'batch_norm' in params['pos']['RNN']:
            pos_Bi_GRU = BatchNormalization(
                momentum=params['pos']['RNN']['batch_norm'],
                name='pos_Bi_GRU_BN')(pos_Bi_GRU)

        # use pos as attention
        attention_probs = Dense(2 * params['pos']['RNN']['cell'],
                                activation='softmax',
                                name='attention_vec')(pos_Bi_GRU)
        attention_mul = multiply([wd_Bi_GRU, attention_probs],
                                 name='attention_mul')
        # ATTENTION PART FINISHES HERE

        # 3. middle layer for predict Y0
        kwargs = params['Y0']['kwargs'] if 'kwargs' in params['Y0'] else {}
        if 'W_regularizer' in kwargs:
            kwargs['W_regularizer'] = l2(kwargs['W_regularizer'])
        self.Y0_probs = Dense(
            params['Y0']['dim'],
            # activation='softmax',
            name='Y0_probs',
            bias_regularizer=l2(0.01),
            **kwargs)(pos_Bi_GRU)
        # batch_norm
        if 'batch_norm' in params['Y0']:
            self.Y0_probs = BatchNormalization(**params['Y0']['batch_norm'])(
                self.Y0_probs)
        self.Y0_probs = Activation(params['Y0']['activate_func'])(
            self.Y0_probs)

        if 'activity_reg' in params['Y0']:
            self.Y0_probs = ActivityRegularization(
                name='Y0_activity_reg',
                **params['Y0']['activity_reg'])(self.Y0_probs)

        # 4. upper hidden layers
        # Firstly, learn a hidden layer from Bi_GRU
        # Secondly, consider Y0_preds as middle feature and combine it with hidden layer

        combine_layer = concatenate([self.Y0_probs, attention_mul],
                                    axis=-1,
                                    name='combine_layer')

        hidden_layer = Dense(params['H']['dim'],
                             name='hidden_layer')(combine_layer)
        if 'batch_norm' in params['H']:
            hidden_layer = BatchNormalization(
                momentum=0.9, name='hidden_layer_BN')(hidden_layer)
        hidden_layer = Activation('relu')(hidden_layer)
        if 'drop_out' in params['H']:
            hidden_layer = Dropout(params['H']['drop_out'],
                                   name='hidden_layer_dropout')(hidden_layer)

        # 5. layer for predict Y1
        kwargs = params['Y1']['kwargs'] if 'kwargs' in params['Y1'] else {}
        if 'W_regularizer' in kwargs:
            kwargs['W_regularizer'] = l2(kwargs['W_regularizer'])
        self.Y1_probs = Dense(
            params['Y1']['dim'],
            # activation='softmax',
            name='Y1_probs',
            bias_regularizer=l2(0.01),
            **kwargs)(hidden_layer)
        # batch_norm
        if 'batch_norm' in params['Y1']:
            self.Y1_probs = BatchNormalization(**params['Y1']['batch_norm'])(
                self.Y1_probs)
        self.Y1_probs = Activation(params['Y1']['activate_func'])(
            self.Y1_probs)

        if 'activity_reg' in params['Y1']:
            self.Y1_probs = ActivityRegularization(
                name='Y1_activity_reg',
                **params['Y1']['activity_reg'])(self.Y1_probs)

        # 6. Calculate loss
        with tf.name_scope('loss'):
            Y0_loss = tf.reduce_mean(binary_crossentropy(
                self.Y0, self.Y0_probs),
                                     name='Y0_loss')
            Y1_loss = tf.reduce_mean(binary_crossentropy(
                self.Y1, self.Y1_probs),
                                     name='Y1_loss')
            self.loss = tf.add_n([Y0_loss, Y1_loss], name='loss')

        self.train_op = tf.train.RMSPropOptimizer(
            params['learning_rate']).minimize(self.loss)
Beispiel #24
0
def create_model(params, computed_params):
    logging.info('Constructing the NN model...')

    max_inputseq_len = computed_params['max_inputseq_len']
    word_dims = computed_params['word_dims']
    max_outputseq_len = computed_params['max_outputseq_len']
    max_nb_premises = computed_params['max_nb_premises']

    inputs = []
    input_question = Input(shape=(
        max_inputseq_len,
        word_dims,
    ),
                           dtype='float32',
                           name='question')
    inputs.append(input_question)

    for ipremise in range(max_nb_premises):
        input_premise = Input(shape=(
            max_inputseq_len,
            word_dims,
        ),
                              dtype='float32',
                              name='premise{}'.format(ipremise))
        inputs.append(input_premise)

    layers = []

    net_arch = params['net_arch']

    if net_arch == 'lstm':
        # Энкодер на базе LSTM, на выходе которого получаем вектор с упаковкой слов
        # предложения. Этот слой общий для всех входных предложений.
        rnn_size = params['rnn_size']
        shared_words_rnn = Bidirectional(
            recurrent.LSTM(rnn_size,
                           input_shape=(max_inputseq_len, word_dims),
                           return_sequences=False))

        for input in inputs:
            encoder_rnn = shared_words_rnn(input)
            layers.append(encoder_rnn)
    elif net_arch == 'lstm(cnn)':
        nb_filters = params['nb_filters']
        rnn_size = params['rnn_size']
        for kernel_size in range(1, 4):
            # сначала идут сверточные слои, образующие детекторы словосочетаний
            # и синтаксических конструкций
            conv = Conv1D(filters=nb_filters,
                          kernel_size=kernel_size,
                          padding='valid',
                          activation='relu',
                          strides=1,
                          name='shared_conv_{}'.format(kernel_size))

            lstm = recurrent.LSTM(rnn_size, return_sequences=False)

            for input in inputs:
                conv_layer1 = conv(input)
                conv_layer1 = keras.layers.MaxPooling1D(
                    pool_size=kernel_size, strides=None,
                    padding='valid')(conv_layer1)
                conv_layer1 = lstm(conv_layer1)
                layers.append(conv_layer1)

    encoder_merged = keras.layers.concatenate(inputs=list(layers))

    # финальный классификатор определяет длину ответа
    output_dims = max_outputseq_len
    decoder = encoder_merged

    if 'units1' in params and params['units1'] > 0:
        decoder = Dense(units=params['units1'], activation='relu')(decoder)

    if 'units2' in params and params['units2'] > 0:
        decoder = Dense(params['units2'], activation='relu')(decoder)

    if 'units3' in params and params['units3'] > 0:
        decoder = Dense(params['units3'], activation='relu')(decoder)

    decoder = Dense(output_dims, activation='softmax', name='output')(decoder)

    model = Model(inputs=inputs, outputs=decoder)
    model.compile(loss='categorical_crossentropy',
                  optimizer=params['optimizer'],
                  metrics=['accuracy'])

    return model
Beispiel #25
0
            For Keras internal compatability checking
        """
        if self.return_probabilities:
            return (None, self.timesteps, self.timesteps)
        else:
            return (None, self.timesteps, self.output_dim)

    def get_config(self):
        """
            For rebuilding models on load time.
        """
        config = {
            'output_dim': self.output_dim,
            'units': self.units,
            'return_probabilities': self.return_probabilities
        }
        base_config = super(AttentionDecoder, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


# check to see if it compiles
if __name__ == '__main__':
    from keras.layers import Input, LSTM
    from keras.models import Model
    from keras.layers.wrappers import Bidirectional
    i = Input(shape=(100, 104), dtype='float32')
    enc = Bidirectional(LSTM(64, return_sequences=True),
                        merge_mode='concat')(i)
    dec = AttentionDecoder(32, 4)(enc)
    model = Model(inputs=i, outputs=dec)
    model.summary()
Beispiel #26
0
            conv = Conv1D(filters=nb_filters,
                          kernel_size=kernel_size,
                          padding='valid',
                          activation='relu',
                          strides=1)

            conv_layer = conv(words_input)
            conv_layer = GlobalMaxPooling1D()(conv_layer)
            convs.append(conv_layer)
            repr_size += nb_filters

    elif NET_ARCH == 'lstm+cnn':
        # энкодер на базе LSTM, на выходе которого получаем вектор с упаковкой слов
        # предложения.
        encoder_rnn = Bidirectional(
            recurrent.LSTM(rnn_size,
                           input_shape=(max_inputseq_len, word_dims),
                           return_sequences=False))(words_input)

        convs.append(encoder_rnn)
        repr_size += rnn_size * 2

        # добавляем входы со сверточными слоями
        for kernel_size in range(2, 4):
            conv = Conv1D(filters=nb_filters,
                          kernel_size=kernel_size,
                          padding='valid',
                          activation='relu',
                          strides=1)

            conv_layer = conv(words_input)
            conv_layer = GlobalMaxPooling1D()(conv_layer)
Beispiel #27
0
A = Embedding(nb_words + 1,
              EMBEDDING_DIM,
              weights=[word_embedding_matrix],
              input_length=100,
              trainable=False)(Answer)
A2 = Embedding(nb_words + 1,
               EMBEDDING_DIM,
               weights=[word_embedding_matrix],
               input_length=100,
               trainable=False)(Answer2)

#e1_aligned, e2_aligned = align(q1, q2)
#q1 = concatenate([q1,e2_aligned])
#q2 = concatenate([q2,e1_aligned])

Encoder = Bidirectional(LSTM(units=300, return_sequences=True))
q1_encoded = Dropout(DROPOUT)(Encoder(q1))
q2_encoded = Dropout(DROPOUT)(Encoder(q2))

Encoder_A = Bidirectional(LSTM(units=300, return_sequences=True))
A_encoded = Dropout(DROPOUT)(Encoder_A(A))
A2_encoded = Dropout(DROPOUT)(Encoder_A(A2))

q1_aligned, q2_aligned = align(q1_encoded, q2_encoded)
A1_aligned, A2_aligned = align2(A_encoded, A2_encoded)
q1_A_aligned, A_q1_aligned = align_A(A_encoded, q1_encoded)
q2_A_aligned, A_q2_aligned = align_B(A2_encoded, q1_encoded)

#q1_combined = concatenate([q1_encoded, q2_aligned, subtract(q1_encoded, q2_aligned), multiply([q1_encoded, q2_aligned]),q1_A_aligned])
#q2_combined = concatenate([q2_encoded, q1_aligned, subtract(q2_encoded, q1_aligned), multiply([q2_encoded, q1_aligned]),q2_A_aligned])
#A1_combined = concatenate([A_encoded, A2_aligned, subtract(A_encoded, A2_aligned), multiply([A_encoded, A2_aligned]),A_q1_aligned])
Beispiel #28
0
    def build(self):
        """  构造联合训练模型

        模型框架采用:embedding+BiLSTM语义表征+全连接层compare self-attention和attention +BiLSTM Align + Dense layer + softmax
        其中embedding和第一个BiLSTM和comapre层共享,且模型参数不可训练。
        从compared之后,分为三个BiLSTM, 分为为源数据独享,源数据和目标数据共享,目标数据独享。
        模型损失包括: 基于任务的损失,adversary损失,基于协方差的损失
        Return: 联合训练模型
        """
        senA = Input(shape=(self.senMaxLen, ), name='senA')
        senB = Input(shape=(self.senMaxLen, ), name='senB')
        CharA = Input(shape=(self.senMaxLen, ), name='CharA')
        CharB = Input(shape=(self.senMaxLen, ), name='CharB')

        senA1 = Input(shape=(self.senMaxLen, ), name='senA1')
        senB1 = Input(shape=(self.senMaxLen, ), name='senB1')
        CharA1 = Input(shape=(self.senMaxLen, ), name='CharA1')
        CharB1 = Input(shape=(self.senMaxLen, ), name='CharB1')

        i = 0
        for layerA, layerB in zip(self.basemodelA.layers,
                                  self.basemodelB.layers):
            # 固定matching layer前面的层的权重
            if i < 26:
                layerA.trainable = False
                layerB.trainable = False
            i += 1
            print(layerA.name)

        mergedVectorA = self.basemodelA.get_layer('mergedVectorA').output
        mergedVectorB = self.basemodelA.get_layer('mergedVectorB').output

        _mergedVectorA = self.basemodelB.get_layer('mergedVectorA').output
        _mergedVectorB = self.basemodelB.get_layer('mergedVectorA').output

        cross = self.basemodelA.get_layer('cross')
        cross.trainable = False

        mergedVectorA = TimeDistributed(cross)(mergedVectorA)
        mergedVectorA = TimeDistributed(BatchNormalization())(mergedVectorA)

        mergedVectorB = TimeDistributed(cross)(mergedVectorB)
        mergedVectorB = TimeDistributed(BatchNormalization())(mergedVectorB)

        _mergedVectorA = TimeDistributed(BatchNormalization())(_mergedVectorA)
        _mergedVectorA = TimeDistributed(cross)(_mergedVectorA)

        _mergedVectorB = TimeDistributed(BatchNormalization())(_mergedVectorB)
        _mergedVectorB = TimeDistributed(cross)(_mergedVectorB)

        # 构造共享BiLSTM context layer

        sharedBiLSTM = self.basemodelA.get_layer('bidirectional_2')
        special1BiLSTM = self.basemodelA.get_layer('bidirectional_2')

        special2BiLSTM = Bidirectional(
            LSTM(units=jointTaskParamSetting.SharedTaskLSTMUnits,
                 return_sequences=False,
                 dropout=self.dropout,
                 recurrent_dropout=self.dropout))

        # ********************************************************************
        sharedLSTMSenA = sharedBiLSTM(mergedVectorA)

        sharedLSTMSenB = sharedBiLSTM(mergedVectorB)

        _sharedLSTMSenA = sharedBiLSTM(_mergedVectorA)

        _sharedLSTMSenB = sharedBiLSTM(_mergedVectorB)

        # *********************************************************************
        specialLSTMSenA = special1BiLSTM(mergedVectorA)

        specialLSTMSenB = sharedBiLSTM(mergedVectorB)

        # *********************************************************************
        _specialLSTMSenA = special2BiLSTM(_mergedVectorA)

        _specialLSTMSenB = special2BiLSTM(_mergedVectorB)

        # **********************************************************************

        # 合并生成不同task的input

        task1Input = concatenate(
            [specialLSTMSenA, specialLSTMSenB, sharedLSTMSenA, sharedLSTMSenB],
            axis=-1,
            name="taskInput1")

        task2Input = concatenate([
            _specialLSTMSenA, _specialLSTMSenB, _sharedLSTMSenA,
            _sharedLSTMSenB
        ],
                                 axis=-1,
                                 name="taskInput2")

        # *************************************************=
        SpecialTask1 = Dense(jointTaskParamSetting.SpecialTaskAUnits,
                             activation="relu")(task1Input)
        SpecialTask1 = Dropout(self.dropout)(SpecialTask1)
        SpecialTask1 = BatchNormalization()(SpecialTask1)

        SpecialTask2 = Dense(jointTaskParamSetting.SpecialTaskBUnits,
                             activation="relu")(task2Input)
        SpecialTask2 = Dropout(self.dropout)(SpecialTask2)
        SpecialTask2 = BatchNormalization()(SpecialTask2)

        # 构造一个分类器,用于判断接收的数据来自于源数据还是目标数据。
        # 详见论文:Adversarial Multi-task Learning for Text Classification中adversarial loss部分
        sharedDenseLayer = Dense(units=jointTaskParamSetting.SharedTaskUnits,
                                 activation='relu')

        SharedTask1 = sharedDenseLayer(
            concatenate([sharedLSTMSenA, sharedLSTMSenB],
                        axis=-1,
                        name='share1Input'))
        SharedTask1 = Dropout(self.dropout)(SharedTask1)
        SharedTask1 = BatchNormalization()(SharedTask1)

        SharedTask2 = sharedDenseLayer(
            concatenate([_sharedLSTMSenA, _sharedLSTMSenB],
                        axis=-1,
                        name='share2Input'))
        SharedTask2 = Dropout(self.dropout)(SharedTask2)
        SharedTask2 = BatchNormalization()(SharedTask2)

        # *************************************************************

        # feature1用于计算 task1 的loss; task1对应于原任务
        feature1 = concatenate([SpecialTask1, SharedTask1], axis=-1)
        logits1 = Dense(2, activation="softmax", name="taskAloss")(feature1)

        # feature2用于计算 task2的 loss; task2对应于目标任务
        feature2 = concatenate([SpecialTask2, SharedTask2], axis=-1)
        logits2 = Dense(2, activation="softmax", name="taskBloss")(feature2)

        # 计算GAN损失
        ganLayer = Dense(2, activation='softmax', name='GAN')
        tasklabel1 = ganLayer(SharedTask1)
        tasklabel2 = ganLayer(SharedTask2)

        # 计算基于协方差矩阵的loss
        diff_loss = Lambda(self.diff_loss, name="diff_loss")

        dif1 = diff_loss(task1Input)
        dif2 = diff_loss(task2Input)

        # 参数说明:
        # SpecialTask1: 任务1 special task的output
        # SpecialTask2: 任务2 special task的output
        # SharedTaks1: 任务1 shared task的output
        # SharedTaks2: 任务2 shared task的output
        # logits1: 任务1分类标签
        # logits2: 任务2分类标签

        myModel = Model(
            inputs=[senA, senB, CharA, CharB, senA1, senB1, CharA1, CharB1],
            outputs=[logits1, logits2, tasklabel1, tasklabel2, dif1, dif2])

        myModel.compile(optimizer="adam",
                        loss={
                            "taskAloss": "mse",
                            "taskBloss": "mse",
                            "GAN": "mse",
                            "diff_loss": self.sumloss
                        },
                        loss_weights={
                            "taskAloss": 1.,
                            "taskBloss": .2,
                            "GAN": .5,
                            "diff_loss": 1
                        },
                        metrics=["accuracy"])

        print(myModel.summary())
        return myModel
Beispiel #29
0
    def _build_model(self,
                     input_size,
                     stacked_sizes=None,
                     fully_connected_sizes=None,
                     optimizer_name=None,
                     learning_rate=None,
                     decay=None,
                     gpus=0,
                     custom_batch_size=None):
        """
        Build Keras Sequential model architecture with given parameters
        :param input_size: Dimensionality of input vector (number of features)
        :param stacked_sizes: Add given number of additional Bi-LSTM layers after first Bi-LSTM layer, provided as list of sizes
        :param fully_connected_sizes: Add a given number of additional fully connected layers after the Bi-LSTM layers, provided as list of sizes
        :param optimizer_name: Name of Keras optimizer, default 'adam'
        :param learning_rate: Keras learning rate
        :param decay: Optimizer decay
        :param gpus: Number of gpus to train on (Not implemented)
        :param custom_batch_size: Use different batch size than self.batch_size
        :return: Keras Sequential model
        """
        from keras.layers.core import Dense
        from keras.layers.recurrent import LSTM
        from keras.layers.wrappers import TimeDistributed, Bidirectional
        from keras.models import Sequential
        from keras import optimizers
        if stacked_sizes is None:
            stacked_sizes = []
        if fully_connected_sizes is None:
            fully_connected_sizes = []

        model = Sequential()

        model.add(
            Bidirectional(layer=LSTM(units=self.hidden_size,
                                     return_sequences=True,
                                     dropout=0.2,
                                     recurrent_dropout=0.2,
                                     stateful=self.stateful),
                          batch_input_shape=(custom_batch_size
                                             or self.batch_size, None,
                                             input_size)))

        for size in stacked_sizes:
            model.add(
                Bidirectional(layer=LSTM(units=size,
                                         return_sequences=True,
                                         stateful=self.stateful)))

        for size in fully_connected_sizes:
            model.add(TimeDistributed(Dense(size, activation='sigmoid')))

        model.add(TimeDistributed(Dense(1, activation='sigmoid')))

        if gpus > 1:
            raise NotImplementedError(
                "Multi GPU model not implemented due to input size mismatch.")
            #model = multi_gpu_model(model, gpus=gpus)

        if optimizer_name is None:
            optimizer_name = "adam"

        optimizer_args = {}
        if learning_rate is not None:
            optimizer_args['lr'] = learning_rate
        if decay is not None:
            optimizer_args['decay'] = decay

        if optimizer_name == 'adam':
            optimizer = optimizers.Adam(**optimizer_args)
        elif optimizer_args:
            raise ValueError(
                'Optimizer {} not implemented for custom params yet'.format(
                    optimizer_name))
        else:
            optimizer = optimizer_name

        print('Using optimizer', optimizer_name, optimizer_args)
        model.compile(loss=self.loss,
                      optimizer=optimizer,
                      sample_weight_mode='temporal',
                      metrics=["accuracy", precision, recall, auc_roc])
        return model
    def __init__(self,
                 dim,
                 batch_norm,
                 dropout,
                 rec_dropout,
                 header,
                 task,
                 mask_demographics,
                 target_repl=False,
                 deep_supervision=False,
                 num_classes=1,
                 depth=1,
                 input_dim=94,
                 size_coef=4,
                 **kwargs):

        self.dim = dim
        self.batch_norm = batch_norm
        self.dropout = dropout
        self.rec_dropout = rec_dropout
        self.depth = depth
        self.size_coef = size_coef

        # (0) demographics: adjust input dimension and record retained variables
        included = ['GEN', 'ETH', 'INS']
        for dem in mask_demographics:
            if dem == 'Gender':
                input_dim -= 5
                included.remove('GEN')
            elif dem == 'Ethnicity':
                input_dim -= 6
                included.remove('ETH')
            elif dem == 'Insurance':
                input_dim -= 7
                included.remove('INS')
        if len(included) == 0:
            included.append("NONE")
        self._included = included

        # (1) define task-specific final activation layer
        if task in ['decomp', 'ihm', 'ph']:
            final_activation = 'sigmoid'
        elif task in ['los']:
            if num_classes == 1:
                final_activation = 'relu'
            else:
                final_activation = 'softmax'
        else:
            raise ValueError("Wrong value for task")

        print("==> not used params in network class:", kwargs.keys())

        # (2) Parse channels
        channel_names = set()

        # find: returns lowest index in string where substring is found
        # step necessary to clean up header after doing one-hot encoding
        for ch in header:
            # (a) not include if "mask->" is found
            if ch.find("mask->") != -1:
                continue
            pos = ch.find("->")
            # (b) add header up to "->"
            if pos != -1:
                channel_names.add(ch[:pos])
            # (c) add full header
            else:
                channel_names.add(ch)

        channel_names = sorted(list(channel_names))
        self.channel_names = channel_names
        print("==> excluded demographics:", mask_demographics)
        print("==> found {} channels: {}".format(len(channel_names),
                                                 channel_names))

        # each channel is a list of columns
        # step: select all channels associated with a certain header name (due to one-hot encoding)
        channels = []
        for ch in channel_names:
            indices = range(len(header))
            # only keep indices that correspond to retained channel names from header
            indices = list(filter(lambda i: header[i].find(ch) != -1, indices))
            channels.append(indices)

        # (3) Input layers and masking
        X = Input(shape=(None, input_dim), name='X')
        inputs = [X]
        mX = Masking()(
            X)  # Masks a sequence by using a mask value to skip timesteps

        # (4) Deep supervision and bidirectionality
        if deep_supervision:
            M = Input(shape=(None, ), name='M')
            inputs.append(M)

        is_bidirectional = True
        if deep_supervision:
            is_bidirectional = False

        # (5) Preprocess each channel
        cX = []
        for ch in channels:
            cX.append(Slice(ch)(mX))  # Slice 3D tensor by taking mX[:, :, ch]
        pX = []  # LSTM processed version of cX
        for x in cX:
            p = x
            for i in range(depth):
                num_units = dim
                if is_bidirectional:
                    num_units = num_units // 2

                lstm = LSTM(units=num_units,
                            activation='tanh',
                            return_sequences=True,
                            dropout=dropout,
                            recurrent_dropout=rec_dropout)

                if is_bidirectional:
                    p = Bidirectional(lstm)(p)
                else:
                    p = lstm(p)
            pX.append(p)

        # (6) Concatenate processed channels
        Z = Concatenate(axis=2)(pX)

        # (7) Main part of the network
        for i in range(depth - 1):
            num_units = int(size_coef * dim)
            if is_bidirectional:
                num_units = num_units // 2

            lstm = LSTM(units=num_units,
                        activation='tanh',
                        return_sequences=True,
                        dropout=dropout,
                        recurrent_dropout=rec_dropout)

            if is_bidirectional:
                Z = Bidirectional(lstm)(Z)
            else:
                Z = lstm(Z)

        # (8) Output module of the network
        return_sequences = (target_repl or deep_supervision)
        L = LSTM(units=int(size_coef * dim),
                 activation='tanh',
                 return_sequences=return_sequences,
                 dropout=dropout,
                 recurrent_dropout=rec_dropout)(Z)

        # (9) Additional tuning
        if dropout > 0:
            L = Dropout(dropout)(L)

        # (10) Output
        if target_repl:
            y = TimeDistributed(Dense(num_classes,
                                      activation=final_activation),
                                name='seq')(L)
            y_last = LastTimestep(name='single')(y)
            outputs = [y_last, y]
        elif deep_supervision:
            y = TimeDistributed(Dense(num_classes,
                                      activation=final_activation))(L)
            y = ExtendMask()([y, M])  # this way we extend mask of y to M
            outputs = [y]
        else:
            y = Dense(num_classes, activation=final_activation)(L)
            outputs = [y]

        # (11) build the specified network in keras
        super(Network, self).__init__(inputs=inputs, outputs=outputs)