def get_test_model_gru_stateful_optional(stateful):
    """Returns a test model for Gated Recurrent Unit (GRU) layers."""
    input_shapes = [
        (17, 4),
        (1, 10)
    ]
    stateful_batch_size = 1
    inputs = [Input(batch_shape=(stateful_batch_size,) + s) for s in input_shapes]
    outputs = []

    for inp in inputs:
        gru_sequences = GRU(
            stateful=stateful,
            units=8,
            recurrent_activation='relu',
            reset_after=True,
            return_sequences=True,
            use_bias=True
        )(inp)
        gru_regular = GRU(
            stateful=stateful,
            units=3,
            recurrent_activation='sigmoid',
            reset_after=True,
            return_sequences=False,
            use_bias=False
        )(gru_sequences)
        outputs.append(gru_regular)

        gru_bidi_sequences = Bidirectional(
            GRU(
                stateful=stateful,
                units=4,
                recurrent_activation='hard_sigmoid',
                reset_after=False,
                return_sequences=True,
                use_bias=True
            )
        )(inp)
        gru_bidi = Bidirectional(
            GRU(
                stateful=stateful,
                units=6,
                recurrent_activation='sigmoid',
                reset_after=True,
                return_sequences=False,
                use_bias=False
            )
        )(gru_bidi_sequences)
        outputs.append(gru_bidi)

        gru_gpu_regular = GRU(
            stateful=stateful,
            units=3,
            activation='tanh',
            recurrent_activation='sigmoid',
            reset_after=True,
            use_bias=True
        )(inp)

        gru_gpu_bidi = Bidirectional(
            GRU(
                stateful=stateful,
                units=3,
                activation='tanh',
                recurrent_activation='sigmoid',
                reset_after=True,
                use_bias=True
            )
        )(inp)
        outputs.append(gru_gpu_regular)
        outputs.append(gru_gpu_bidi)

    model = Model(inputs=inputs, outputs=outputs, name='test_model_gru')
    model.compile(loss='mse', optimizer='nadam')
    # fit to dummy data
    training_data_size = 2
    data_in = generate_input_data(training_data_size, input_shapes)
    initial_data_out = model.predict(data_in)
    data_out = generate_output_data(training_data_size, initial_data_out)
    model.fit(data_in, data_out, batch_size=stateful_batch_size, epochs=10)
    return model
# Model Building params for multiple models
LAYERS = [2, 3]
SIZE = [64, 128]
LEARNER = [tf.keras.optimizers.RMSprop()]

# Build each model and run for 5 epochs to get good idea of possibilities
for layer in LAYERS:
    for s in SIZE:
        for l in LEARNER:
            print(f"Building new model: layers:{layer} size:{s} learner:{l}")
            # Build Model
            model = Sequential()
            model.add(
                Bidirectional(
                    GRU(s,
                        input_shape=(trainX.shape[1:]),
                        return_sequences=True)))
            model.add(Dropout(0.2))
            model.add(BatchNormalization())

            for eachLayer in range(layer - 2):
                model.add(Bidirectional(GRU(s, return_sequences=True)))
                model.add(Dropout(0.2))
                model.add(BatchNormalization())

            model.add(Bidirectional(GRU(s, return_sequences=False)))
            model.add(Dropout(0.2))
            model.add(BatchNormalization())

            model.add(Dense(32, activation='relu'))
            model.add(Dropout(0.2))
Example #3
0
def train(data,batch_size,epoch,maxlen,output_dir,selected_layer):

    # 한국어, 영어, 숫자만 텍스트에 남기고 형태소 분석 수행함
    cleaned = preprocess(data['document'].values)

    if selected_layer=='bert':
        # multi_cased 모델의 vocab 파일을 불러와 tokenizer 생성
        FullTokenizer = bert.bert_tokenization.FullTokenizer
        tokenizer = FullTokenizer(vocab_file=vocab_file, do_lower_case=False)

        # 전처리된 텍스트를 BERT의 input 형태에 맞게 변환
        train_tokens = [["[CLS]"] + tokenizer.tokenize(sentence) + ["[SEP]"] for sentence in cleaned]
        train_tokens_ids = [tokenizer.convert_tokens_to_ids(token) for token in train_tokens]
        train_data = pad_sequences(train_tokens_ids, maxlen=maxlen, dtype="long", truncating="post",
                                         padding="post")
        # bert_layer에 들어갈 input 형태 지정
        input_1 = Input(shape=(maxlen,), dtype=tf.int32, name="input_word_ids")

        # pre-trained 된 BERT 모델을 keras 레이어의 형태로 불러옴
        bert_params = bert.params_from_pretrained_ckpt('./multi_cased_L-12_H-768_A-12')
        bert_layer = bert.BertModelLayer.from_params(bert_params, name="bert")
        # bert_layer에 input 레이어를 넣은 후, 신경망 레이어를 flatten하여 가중치를 보존하면서 2차원의 형태로 변환시킴
        bert_l = bert_layer(input_1)
        flatten = Flatten()(bert_l)
        layer=flatten

    else:
        # cnn, bilstm을 선택한 경우, tokenizer가 가질 최대 단어 개수 지정(voca_size) 후 tokenizer 생성
        voca_size = 1000000
        tokenizer = Tokenizer(num_words=voca_size)
        tokenizer.fit_on_texts(cleaned)
        # eval, predict 시 빠르게 불러오기 위해 tokenizer를 json 파일으로 저장
        tokenizer_json=tokenizer.to_json()
        with open('tokenizer.json', 'w', encoding='utf-8') as f:
            f.write(json.dumps(tokenizer_json, ensure_ascii=False))

        # 전처리된 텍스트를 정수로 변환한 후, 길이를 maxlen에 맞춤
        train_data = tokenizer.texts_to_sequences(cleaned)
        train_data = pad_sequences(train_data, padding='post', maxlen=maxlen)

        ## cnn, bilstm layer 에 들어갈 input 형태 지정
        input_1 = Input(shape=(maxlen,))

        # pre-trained glove 임베딩 모델 로딩
        vocab_size = len(tokenizer.word_index) + 1
        embedding_dim = 100
        embedding_matrix = pretrained_embedding_load(
            'glove.txt', vocab_size=vocab_size,
            num_demension=embedding_dim, tokenizer=tokenizer)
        embedding_layer = Embedding(output_dim=embedding_dim, input_dim=vocab_size, weights=[embedding_matrix],
                                    input_length=maxlen, trainable=False)(input_1)

        if selected_layer == 'bilstm':
            # 위에서 생성한 embedding_layer를 가져와서 bi-lstm 레이어에 넣음
            bilstm1 = Bidirectional(LSTM(256, dropout=0.3, recurrent_dropout=0.3))(embedding_layer)
            layer = bilstm1

        elif selected_layer == 'cnn':
            filter_sizes = 3  # convolutional filter 사이즈 지정, 3개의 단어를 보는것으로 지정함
            num_filters = 512  # filter의 수

            # conv2d 함수가 요구하는 차원수로 만들어주기 위해 차원을 하나 추가함(reshape)
            reshape = Reshape((maxlen, embedding_dim, 1))(embedding_layer)
            # 합성곱층과 풀링층을 거치면서 cnn레이어 구축
            conv1 = Conv2D(num_filters, kernel_size=(filter_sizes, embedding_dim), padding='valid',
                           kernel_initializer='normal',
                           activation='relu')(reshape)
            maxpool1 = MaxPool2D(pool_size=(maxlen - filter_sizes + 1, 1), strides=(1, 1), padding='valid')(conv1)
            # 이진 분류를 위해 2차원으로 레이어를 flatten함
            flatten = Flatten()(maxpool1)
            layer = flatten

    # 데이터의 라벨을 numpy 배열의 형태로 변환
    label=np.array(data['label'])

    # 은닉층의 출력 뉴런수를 줄이기 위해 relu 활성 함수 사용함
    dense_layer = Dense(16, activation='relu')(layer)
    # 과적합 방지를 위해 drop out 수행
    drop = Dropout(rate=0.1)(dense_layer)
    # 이진 분류이므로 출력 뉴런의 수를 1로 설정하고, sigmoid 활성 함수 사용함
    output = Dense(1, activation='sigmoid')(drop)
    model = Model(inputs=input_1, outputs=output)
    #이진 분류이므로 loss function로는 binary_crossentropy, optimizer로는 adam 사용함
    model.compile(loss='binary_crossentropy', optimizer=tf.optimizers.Adam(lr=0.00001), metrics=['accuracy'])
    print(model.summary())

    if selected_layer=='bert':
        # 모델 가중치 저장위해 callback 생성
        checkpointName = os.path.join(output_dir, "bert_model.ckpt")
        cp_callback = ModelCheckpoint(filepath=checkpointName, save_weights_only=True, verbose=1)
        model.fit(x=train_data, y=label, batch_size=batch_size, epochs=epoch, verbose=1, validation_split=0.2,callbacks=[cp_callback])
    else:
        model.fit(x=train_data, y=label, batch_size=batch_size, epochs=epoch, verbose=1, validation_split=0.2)

    model.save(output_dir)
Example #4
0
    limit = n_timesteps / 4.0

    y = np.array([0 if x < limit else 1 for x in np.cumsum(X)])

    X = X.reshape(1, n_timesteps, 1)
    y = y.reshape(1, n_timesteps, 1)
    return X, y


n_units = 20
n_timesteps = 4

model = Sequential()
model.add(
    Bidirectional(
        LSTM(n_units, return_sequences=True, input_shape=(n_timesteps, 1))))
model.add(TimeDistributed(Dense(1, activation='sigmoid')))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

for spoch in range(1000):
    X, y = get_sequence(n_timesteps)
    model.fit(X, y, epochs=1, batch_size=1, verbose=2)

X, y = get_sequence(n_timesteps)
yhat = model.predict_classes(X, verbose=0)
for i in range(n_timesteps):
    print('실젯값 : ', y[0, i], '예측값 : ', yhat[0, i])
Example #5
0
		n_gram_sequence = token_list[:i+1]
		input_sequences.append(n_gram_sequence)


# pad sequences 
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# create predictors and label
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]

label = ku.to_categorical(label, num_classes=total_words)

model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))  #(# Your Embedding Layer)
model.add(Bidirectional(LSTM(150, return_sequences=True)))  #(# An LSTM Layer)
model.add(Dropout(0.2))  #(# A dropout layer)
model.add(LSTM(100))  #(# Another LSTM Layer)
model.add(Dense(total_words/2, activation='relu'))  #(# A Dense Layer including regularizers)
model.add(Dense(total_words, activation='softmax'))  #(# A Dense Layer)
# Pick an optimizer
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])  #(# Pick a loss function and an optimizer)
print(model.summary())

history = model.fit(predictors, label, epochs=100, verbose=1)

import matplotlib.pyplot as plt
acc = history.history['acc']
loss = history.history['loss']

epochs = range(len(acc))
Example #6
0
    callbackslist = [
        TensorBoard(log_dir='logs'),
        ModelCheckpoint(filepath='Model.h5',
                        monitor='val_loss',
                        save_best_only=True)
    ]

    input1 = Input(shape=(2048, ))
    imodel1 = Dropout(0.5)(input1)
    imodel2 = Dense(512, activation='relu')(imodel1)

    input2 = Input(shape=(34, ))
    tmodel1 = Embedding(vocabulary_size, 50, mask_zero=True,
                        trainable=False)(input2)
    tmodel2 = Dropout(0.4)(tmodel1)
    tmodel3 = Bidirectional(LSTM(256, return_sequences=True))(tmodel2)
    tmodel4 = Dropout(0.4)(tmodel3)
    tmodel5 = Bidirectional(LSTM(256, return_sequences=False))(tmodel4)

    decoder1 = Add()([imodel2, tmodel5])
    decoder2 = Dense(256, activation='relu')(decoder1)
    outputs = Dense(vocabulary_size, activation='softmax')(decoder2)

    model = Model(inputs=[input1, input2], outputs=outputs)
    model.summary()
    model.layers[1].set_weights([emb_matrix])
    model.compile(loss='binary_crossentropy', optimizer='adam')

    model.fit([TX2, TX1],
              TY,
              epochs=10,
Example #7
0
def build_model_hpconfig(args):
    """
    Description:
        Building models for hyperparameter Tuning

    Args:
        args: input arguments

    Returns:
        model (keras model)
    """

    #parsing and assigning hyperparameter variables from argparse
    conv1_filters = int(args.conv1_filters)
    conv2_filters = int(args.conv2_filters)
    conv3_filters = int(args.conv3_filters)
    window_size = int(args.window_size)
    kernel_regularizer = args.kernel_regularizer
    max_pool_size = int(args.pool_size)
    conv_dropout = float(args.conv_dropout)
    conv1d_initializer = args.conv_weight_initializer
    recurrent_layer1 = int(args.recurrent_layer1)
    recurrent_layer2 = int(args.recurrent_layer2)
    recurrent_dropout = float(args.recurrent_dropout)
    after_recurrent_dropout = float(args.after_recurrent_dropout)
    recurrent_recurrent_dropout = float(args.recurrent_recurrent_dropout)
    recurrent_initalizer = args.recurrent_weight_initializer
    optimizer = args.optimizer
    learning_rate = float(args.learning_rate)
    bidirection = args.bidirection
    recurrent_layer = str(args.recurrent_layer)
    dense_dropout = float(args.dense_dropout)
    dense_1 = int(args.dense_1)
    dense_initializer = args.dense_weight_initializer
    train_data = str(args.train_input_data)

    #main input is the length of the amino acid in the protein sequence (700,)
    main_input = Input(shape=(700, ), dtype='float32', name='main_input')

    #Embedding Layer used as input to the neural network
    embed = Embedding(output_dim=21, input_dim=21,
                      input_length=700)(main_input)

    #secondary input is the protein profile features
    auxiliary_input = Input(shape=(700, 21), name='aux_input')

    #get shape of input layers
    print("Protein Sequence shape: ", main_input.get_shape())
    print("Protein Profile shape: ", auxiliary_input.get_shape())

    #concatenate input layers
    concat = Concatenate(axis=-1)([embed, auxiliary_input])

    #3x1D Convolutional Hidden Layers with BatchNormalization, Dropout and MaxPooling
    conv_layer1 = Conv1D(conv1_filters,
                         window_size,
                         kernel_regularizer=kernel_regularizer,
                         padding='same',
                         kernel_initializer=conv1d_initializer)(concat)
    batch_norm = BatchNormalization()(conv_layer1)
    conv_act = activations.relu(batch_norm)
    conv_dropout = Dropout(conv_dropout)(conv_act)
    max_pool_1D_1 = MaxPooling1D(pool_size=max_pool_size,
                                 strides=1,
                                 padding='same')(conv_dropout)

    conv_layer2 = Conv1D(conv2_filters,
                         window_size,
                         padding='same',
                         kernel_initializer=conv1d_initializer)(concat)
    batch_norm = BatchNormalization()(conv_layer2)
    conv_act = activations.relu(batch_norm)
    conv_dropout = Dropout(conv_dropout)(conv_act)
    max_pool_1D_2 = MaxPooling1D(pool_size=max_pool_size,
                                 strides=1,
                                 padding='same')(conv_dropout)

    conv_layer3 = Conv1D(conv3_filters,
                         window_size,
                         kernel_regularizer=kernel_regularizer,
                         padding='same',
                         kernel_initializer=conv1d_initializer)(concat)
    batch_norm = BatchNormalization()(conv_layer3)
    conv_act = activations.relu(batch_norm)
    conv_dropout = Dropout(conv_dropout)(conv_act)
    max_pool_1D_3 = MaxPooling1D(pool_size=max_pool_size,
                                 strides=1,
                                 padding='same')(conv_dropout)

    #concat pooling layers
    conv_features = Concatenate(axis=-1)(
        [max_pool_1D_1, max_pool_1D_2, max_pool_1D_3])
    print("Shape of convolutional output: ", conv_features.get_shape())

    conv_features = Dense(600, activation='relu')(conv_features)

    ######## Recurrent Layers ########
    if (recurrent_layer == 'lstm'):
        if (bidirection):
            print('Entering LSTM Layers')
            #Creating Bidirectional LSTM layers
            lstm_f1 = Bidirectional(
                LSTM(recurrent_layer1,
                     return_sequences=True,
                     activation='tanh',
                     recurrent_activation='sigmoid',
                     dropout=recurrent_dropout,
                     recurrent_dropout=recurrent_recurrent_dropout,
                     kernel_initializer=recurrent_initalizer))(conv_features)
            lstm_f2 = Bidirectional(
                LSTM(recurrent_layer2,
                     return_sequences=True,
                     activation='tanh',
                     recurrent_activation='sigmoid',
                     dropout=recurrent_dropout,
                     recurrent_dropout=recurrent_recurrent_dropout,
                     kernel_initializer=recurrent_initalizer))(lstm_f1)

            #concatenate LSTM with convolutional layers
            concat_features = Concatenate(axis=-1)(
                [lstm_f1, lstm_f2, conv_features])
            concat_features = Dropout(after_recurrent_dropout)(concat_features)
            print('Concatenated LSTM layers')

        else:
            #Creating unidirectional LSTM Layers
            lstm_f1 = LSTM(
                recurrent_layer1,
                return_sequences=True,
                activation='tanh',
                recurrent_activation='sigmoid',
                dropout=recurrent_dropout,
                recurrent_dropout=recurrent_recurrent_dropout,
                kernel_initializer=recurrent_initalizer)(conv_features)

            lstm_f2 = LSTM(recurrent_layer2,
                           return_sequences=True,
                           activation='tanh',
                           recurrent_activation='sigmoid',
                           dropout=recurrent_dropout,
                           recurrent_dropout=recurrent_recurrent_dropout,
                           kernel_initializer=recurrent_initalizer)(lstm_f1)

            #concatenate LSTM with convolutional layers
            concat_features = Concatenate(axis=-1)(
                [lstm_f1, lstm_f2, conv_features])
            concat_features = Dropout(after_recurrent_dropout)(concat_features)

    elif (recurrent_layer == 'gru'):
        if (bidirection):

            #Creating Bidirectional GRU layers
            gru_f1 = Bidirectional(
                GRU(recurrent_layer1,
                    return_sequences=True,
                    activation='tanh',
                    recurrent_activation='sigmoid',
                    dropout=recurrent_dropout,
                    recurrent_dropout=recurrent_recurrent_dropout,
                    kernel_initializer=recurrent_initalizer))(conv_features)

            gru_f2 = Bidirectional(
                GRU(recurrent_layer2,
                    return_sequences=True,
                    activation='tanh',
                    recurrent_activation='sigmoid',
                    dropout=recurrent_dropout,
                    recurrent_dropout=recurrent_recurrent_dropout,
                    kernel_initializer=recurrent_initalizer))(gru_f1)

            #concatenate LSTM with convolutional layers
            concat_features = Concatenate(axis=-1)(
                [gru_f1, gru_f2, conv_features])
            concat_features = Dropout(after_recurrent_dropout)(concat_features)

        else:
            #Creating unidirectional GRU Layers
            gru_f1 = GRU(
                recurrent_layer1,
                return_sequences=True,
                activation='tanh',
                recurrent_activation='sigmoid',
                dropout=recurrent_dropout,
                recurrent_dropout=recurrent_recurrent_dropout,
                kernel_initializer=recurrent_initalizer)(conv_features)

            gru_f2 = GRU(recurrent_layer1,
                         return_sequences=True,
                         activation='tanh',
                         recurrent_activation='sigmoid',
                         dropout=recurrent_dropout,
                         recurrent_dropout=recurrent_recurrent_dropout,
                         kernel_initializer=recurrent_initalizer)(gru_f1)

            #concatenate LSTM with convolutional layers
            concat_features = Concatenate(axis=-1)(
                [gru_f1, gru_f2, conv_features])
            concat_features = Dropout(after_recurrent_dropout)(concat_features)
    else:

        print('Only LSTM and GRU recurrent layers are used in this model')
        return

    #Dense Fully-Connected DNN layers
    fc_dense1 = Dense(dense_1,
                      activation='relu',
                      kernel_initializer=dense_initializer)(concat_features)
    fc_dense1_dropout = Dropout(dense_dropout)(fc_dense1)

    #Final Output layer with 8 nodes for the 8 output classifications
    main_output = Dense(8, activation='softmax',
                        name='main_output')(fc_dense1_dropout)

    #create model from inputs and outputs
    model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output])

    #Set optimizer to be used with the model, default is Adam
    if optimizer == 'adam':
        optimizer = Adam(lr=learning_rate, name='adam')
    elif optimizer == 'sgd':
        optimizer = SGD(lr=0.01, momentum=0.0, nesterov=False, name='SGD')
    elif optimizer == 'rmsprop':
        optimizer = RMSprop(learning_rate=learning_rate,
                            centered=True,
                            name='RMSprop')
    elif optimizer == 'adagrad':
        optimizer = Adagrad(learning_rate=learning_rate, name='Adagrad')
    elif optimizer == 'adamax':
        optimizer = Adamax(learning_rate=learning_rate, name='Adamax')
    else:
        optimizer = 'adam'
        optimizer = Adam(lr=learning_rate, name='adam')

    #compile model using optimizer and the cateogorical crossentropy loss function
    model.compile(optimizer=optimizer,
                  loss={'main_output': 'categorical_crossentropy'},
                  metrics=[
                      'accuracy',
                      MeanSquaredError(),
                      FalseNegatives(),
                      FalsePositives(),
                      TrueNegatives(),
                      TruePositives(),
                      MeanAbsoluteError(),
                      Recall(),
                      Precision()
                  ])

    #get summary of model including its layers and num parameters
    model.summary()

    return model
Example #8
0
    def doaV0(self):
        inputs = tf.keras.Input(self.input_shape)
        drop_rate = 1. - self.params['dropout_keep_prob_cnn']

        x = Conv2D(name='conv1', filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs)
        x = BatchNormalization(name='bn1', center=True, scale=True, trainable=True)(x)
        x = tf.keras.activations.relu(x)

        x = Conv2D(name='conv2', filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
        x = BatchNormalization(name='bn2', center=True, scale=True, trainable=True)(x)
        x = tf.keras.activations.relu(x)
        x = MaxPool2D(name='maxpool2', pool_size=(5, 2), strides=(5, 2), padding='same')(x)
        x = Dropout(rate=drop_rate)(x)

        x = Conv2D(name='conv3', filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
        x = BatchNormalization(name='bn3', center=True, scale=True, trainable=True)(x)
        x = tf.keras.activations.relu(x)
        x = MaxPool2D(name='maxpool3', pool_size=(1, 2), strides=(1, 2), padding='valid')(x)
        x = Dropout(rate=drop_rate)(x)

        x = Conv2D(name='conv4', filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
        x = BatchNormalization(name='bn4', center=True, scale=True, trainable=True)(x)
        x = tf.keras.activations.relu(x)
        x = MaxPool2D(name='maxpool4', pool_size=(1, 2), strides=(1, 2), padding='valid')(x)
        x = Dropout(rate=drop_rate)(x)

        x = Conv2D(name='conv5', filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
        x = BatchNormalization(name='bn5', center=True, scale=True, trainable=True)(x)
        x = tf.keras.activations.relu(x)
        x = MaxPool2D(name='maxpool5', pool_size=(1, 2), strides=(1, 2), padding='valid')(x)
        x = Dropout(rate=drop_rate)(x)

        x = Conv2D(name='conv6', filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
        x = BatchNormalization(name='bn6', center=True, scale=True, trainable=True)(x)
        x = tf.keras.activations.relu(x)
        x = MaxPool2D(name='maxpool6', pool_size=(1, 2), strides=(1, 2), padding='valid')(x)
        x = Dropout(rate=drop_rate)(x)

        x = tf.reshape(x, [-1, self.out_shape_doa[0], 2 * 256])

        x = Bidirectional(GRU(units=self.params['rnn_hidden_size'], return_sequences=True),
                                 name='bidirecrtionalGRU')(x)

        x = SelfAttention(attention_size=self.params['attention_size'])(x)

        x = tf.reshape(x, [-1, 2 * self.params['rnn_hidden_size']])

        drop_rate_dnn = 1. - self.params['dropout_keep_prob_dnn']
        # -------------DOA----------------
        x = Dense(self.params['dnn_size'], activation='relu', name='dense_relu_doa1')(x)
        x = Dropout(rate=drop_rate_dnn)(x)
        x = Dense(self.params['dnn_size'], activation='relu', name='dense_relu_doa2')(x)
        x = Dropout(rate=drop_rate_dnn)(x)
        x = Dense(self.out_shape_doa[-1], name='dense_doa3')(x)
        x = tf.keras.activations.tanh(x)
        x = tf.reshape(x, shape=[-1, self.out_shape_doa[0], self.out_shape_doa[1]], name='output_doa')

        model = tf.keras.Model(
            inputs=inputs,
            outputs=x,
            name="Doa_net_v0")
        return model
Example #9
0
    def sedV0(self, *args, **kwargs):
        out_shape_sed = self.out_shape_sed
        params = self.params
        inputs = tf.keras.Input(self.input_shape)
        drop_rate = 1. - params['dropout_keep_prob_cnn']

        x = Conv2D(name='conv1', filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs)
        x = BatchNormalization(name='bn1', center=True, scale=True, trainable=True)(x)
        x = tf.keras.activations.relu(x)

        x = Conv2D(name='conv2', filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
        x = BatchNormalization(name='bn2', center=True, scale=True, trainable=True)(x)
        x = tf.keras.activations.relu(x)
        x = MaxPool2D(name='maxpool2', pool_size=(5, 2), strides=(5, 2), padding='same')(x)
        x = Dropout(rate=drop_rate)(x)

        x = Conv2D(name='conv3', filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
        x = BatchNormalization(name='bn3', center=True, scale=True, trainable=True)(x)
        x = tf.keras.activations.relu(x)
        x = MaxPool2D(name='maxpool3', pool_size=(1, 2), strides=(1, 2), padding='valid')(x)
        x = Dropout(rate=drop_rate)(x)

        x = Conv2D(name='conv4', filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
        x = BatchNormalization(name='bn4', center=True, scale=True, trainable=True)(x)
        x = tf.keras.activations.relu(x)
        x = MaxPool2D(name='maxpool4', pool_size=(1, 2), strides=(1, 2), padding='valid')(x)
        x = Dropout(rate=drop_rate)(x)

        x = Conv2D(name='conv5', filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
        x = BatchNormalization(name='bn5', center=True, scale=True, trainable=True)(x)
        x = tf.keras.activations.relu(x)
        x = MaxPool2D(name='maxpool5', pool_size=(1, 2), strides=(1, 2), padding='valid')(x)
        x = Dropout(rate=drop_rate)(x)

        x = Conv2D(name='conv6', filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
        x = BatchNormalization(name='bn6', center=True, scale=True, trainable=True)(x)
        x = tf.keras.activations.relu(x)
        x = MaxPool2D(name='maxpool6', pool_size=(1, 2), strides=(1, 2), padding='valid')(x)
        x = Dropout(rate=drop_rate)(x)

        x = tf.reshape(x, [-1, out_shape_sed[0], 2 * 256])

        x = Bidirectional(GRU(units=params['rnn_hidden_size'], return_sequences=True),
                                 name='bidirecrtionalGRU')(x)

        x = SelfAttention(attention_size=params['attention_size'])(x)

        x = tf.reshape(x, [-1, 2 * params['rnn_hidden_size']])

        drop_rate_dnn = 1. - params['dropout_keep_prob_dnn']
        # -------------SED----------------
        x_sed = Dense(params['dnn_size'], activation='relu', name='dense_relu_sed1')(x)
        x_sed = Dropout(rate=drop_rate_dnn)(x_sed)
        x_sed = Dense(params['dnn_size'], activation='relu', name='dense_relu_sed2')(x_sed)
        x_sed = Dropout(rate=drop_rate_dnn)(x_sed)
        x_sed = Dense(out_shape_sed[-1], name='dense_sed3')(x_sed)
        x_sed = tf.keras.activations.sigmoid(x_sed)
        x_sed = tf.reshape(x_sed, shape=[-1, out_shape_sed[0], out_shape_sed[1]], name='output_sed')

        model = tf.keras.Model(
            inputs=inputs,
            outputs=x_sed,
            name="Sed_net_v0")
        return model
    def train(self, dataset='all'):
        self.device_calibration()

        X_train, X_test, y_train, y_test, X, y = self._get_data(
            dataset, 'tensor')

        trn, val, preproc = ktrain.text.texts_from_array(X_train,
                                                         y_train,
                                                         X_test,
                                                         y_test,
                                                         maxlen=26)

        model = tf.keras.Sequential([
            Embedding(30000, 15),
            Dropout(0.2),
            Bidirectional(LSTM(15)),
            Dense(1, activation='sigmoid')
        ])

        print(model.summary())

        model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
                      optimizer=tf.keras.optimizers.Adam(1e-4),
                      metrics=[
                          f1,
                          Recall(name='recall'),
                          Precision(name='precision'), 'accuracy'
                      ])

        print(model.summary())

        class validate(tf.keras.callbacks.Callback):
            def on_epoch_end(self, epoch, logs=None):
                learner.validate(print_report=False,
                                 save_path='logs/rnn/e' + str(epoch + 1) +
                                 '.csv',
                                 class_names=preproc.get_classes())

        learner = ktrain.get_learner(model,
                                     train_data=trn,
                                     val_data=val,
                                     batch_size=100)

        learner.model.compile(metrics=['accuracy'],
                              loss=tf.keras.losses.BinaryCrossentropy(),
                              optimizer='adam')

        learner.set_weight_decay(0.01)

        learner.fit_onecycle(1e-4,
                             20,
                             callbacks=[
                                 tf.keras.callbacks.EarlyStopping(
                                     patience=5,
                                     monitor='val_loss',
                                     mode='min',
                                     restore_best_weights=True),
                                 validate()
                             ])

        self.y_train = y_train
        self.y_test = y_test
        self.X_test = X_test

        return model
Example #11
0
    def fit( self,
             X_train,
             y_train,
             maxlen        = 100,
             learning_rate = 1e-3,
             batch_size    = 8,
             dropout       = 0.3,
             units         = 124,
             wdir          = 'checkpoints/',
             ):
        '''
            Train a new model
        '''
        # create directory to save model files to
        cwd  = os.getcwd()
        wdir = os.path.join( cwd, wdir )
        if not os.path.exists( wdir ):
            os.makedirs( wdir )

        print('Preprocessing text')
        X_train = [ self.preprocess(t) for t in X_train ]
        new_maxlen = max( [len(i.split()) for i in X_train] )
        maxlen     = max(maxlen, new_maxlen)
        self.max_len = maxlen

        # KERAS TOKENIZER
        print('Tokenizing text')
        self.tokenizer = Tokenizer( num_words=6500,
                                    lower=True,
                                    oov_token='oov',
                                    filters='"#$%&()*+,-./:;<=>@[\\]^_`{|}~\t\n',        # removed '!' and '?'
                                  )
        self.tokenizer.fit_on_texts(X_train)
        X_train = self.tokenizer.texts_to_sequences(X_train)
        self.vocab_size = len(self.tokenizer.word_index) + 1
        print('Vocabulary size:', self.vocab_size)

        X_train = pad_sequences(X_train, padding='post', maxlen=maxlen)

        switch = 2
        embedding_matrix, EMBED_SIZE = self.get_embeddings(switch)

        optimizer   = Adam
        emb         = self.embeddings_switch[ switch ]
        time_stamp = time.strftime("%Y%m%dT%H%M")
        optimizer_name = optimizer.__module__.split('.')[-1].capitalize()
        params = f'\nEmbeddings={emb}, LR={learning_rate}, batch_size={batch_size}, dropout={dropout}, units={units}, optimizer={optimizer_name}'
        print( 'Classifier parameters:', params )
        print( 'Timestamp:', time_stamp)

        deep_inputs     = Input(shape=(maxlen,))
        embedding_layer = Embedding(self.vocab_size, EMBED_SIZE, weights=[embedding_matrix], trainable=False)(deep_inputs)
        LSTM_1          = Bidirectional(LSTM( units, dropout=dropout, return_sequences=True ))(embedding_layer)
        gmp1d           = GlobalMaxPool1D()(LSTM_1)
        dense_layer     = Dense(1, activation='sigmoid')(gmp1d)
        self.model           = Model(inputs=deep_inputs, outputs=dense_layer)


        self.model.compile( loss='binary_crossentropy', optimizer=optimizer(lr=learning_rate), metrics=['accuracy'] )

        early_stop = tf.keras.callbacks.EarlyStopping(
                                                       monitor='val_accuracy',
                                                       patience=5,
                                                       restore_best_weights=True,
                                                       verbose=2,
                                                     )

        reduce_lr  = tf.keras.callbacks.ReduceLROnPlateau(
                                                           monitor="val_loss",
                                                           patience=2,
                                                           factor=0.2,
                                                           min_lr=5e-5,
                                                           verbose=2,
                                                         )

        filepath   = wdir + time_stamp + '-epoch{epoch:02d}-val_accu_{val_accuracy:.2f}-val_loss_{val_loss:.2f}.hdf5'
        checkpoint = tf.keras.callbacks.ModelCheckpoint(
                                                            filepath,
                                                            verbose=0,
                                                          )

        history = self.model.fit( X_train,
                             y_train,
                             batch_size=batch_size,
                             epochs=21,
                             verbose=2,
                             validation_split=0.2,
                             callbacks=[ early_stop, reduce_lr, checkpoint ]
                            )

        plt.plot(history.history['accuracy'])
        plt.plot(history.history['val_accuracy'])

        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train','test'], loc='upper left')
        plt.show()

        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])

        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train','test'], loc='upper left')
        plt.show()

        with open(f'{wdir}/{time_stamp}_tokenizer.pkl', 'wb') as f:
            pickle.dump( self.tokenizer, f, protocol=pickle.HIGHEST_PROTOCOL )
Example #12
0
def execute(weeks):
    np.random.seed(RANDOM_SEED)

    # Parsing data
    contributions = []
    for week in weeks:
        for day in week['contributionDays']:
            contributions.append(day['contributionCount'])

    days = np.arange(0, len(contributions), 1)
    df = pd.DataFrame(dict(contributions=contributions),
                      index=days,
                      columns=['contributions'])
    # df = df.sort_values('Date')

    # logger.info(df.shape)

    # Normalization
    scaler = MinMaxScaler()
    contributions = df.contributions.values.reshape(-1, 1)
    scaled_contributions = scaler.fit_transform(contributions)

    # logger.info(np.isnan(scaled_contributions).any())

    # scaled_contributions = scaled_contributions[~np.isnan(scaled_contributions)]
    # scaled_contributions = scaled_contributions.reshape(-1, 1)

    # logger.info(np.isnan(scaled_contributions).any())

    # Preprocessing
    X_train, y_train, X_test, y_test = preprocess(scaled_contributions,
                                                  SEQ_LEN,
                                                  train_split=0.98)

    # logger.info(y_test)
    # logger.info(X_train.shape)
    # logger.info(X_test.shape)

    # Model
    model = keras.Sequential()

    model.add(
        Bidirectional(LSTM(WINDOW_SIZE,
                           return_sequences=True,
                           activation='tanh',
                           recurrent_activation='sigmoid'),
                      input_shape=(WINDOW_SIZE, X_train.shape[-1])))
    model.add(Dropout(rate=DROPOUT))

    model.add(
        Bidirectional(
            LSTM((WINDOW_SIZE * 2),
                 return_sequences=True,
                 activation='tanh',
                 recurrent_activation='sigmoid')))
    model.add(Dropout(rate=DROPOUT))

    model.add(Bidirectional(LSTM(WINDOW_SIZE, return_sequences=False)))

    model.add(Dense(units=1))

    model.add(Activation('linear'))

    # Training
    model.compile(loss='mean_squared_error', optimizer='adam')

    model.fit(X_train,
              y_train,
              epochs=EPOCHS,
              batch_size=BATCH_SIZE,
              shuffle=False,
              validation_split=0.1)

    model.evaluate(X_test, y_test)

    # Prediction
    y_hat = model.predict(X_test)
    y_hat_inverse = scaler.inverse_transform(y_hat)

    return y_hat_inverse.tolist()
Example #13
0
    mnist = datasets.mnist
    (x_train, t_train), (x_test, t_test) = mnist.load_data()

    x_train = (x_train.reshape(-1, 28, 28) / 255).astype(np.float32)
    x_test = (x_test.reshape(-1, 28, 28) / 255).astype(np.float32)

    x_train, x_val, t_train, t_val = \
        train_test_split(x_train, t_train, test_size=0.2)
    '''
    2. モデルの構築
    '''
    model = Sequential()
    model.add(
        Bidirectional(LSTM(25,
                           activation='tanh',
                           recurrent_activation='sigmoid',
                           kernel_initializer='glorot_normal',
                           recurrent_initializer='orthogonal'),
                      merge_mode='concat'))
    model.add(
        Dense(10, kernel_initializer='glorot_normal', activation='softmax'))
    '''
    3. モデルの学習
    '''
    optimizer = optimizers.Adam(learning_rate=0.001,
                                beta_1=0.9,
                                beta_2=0.999,
                                amsgrad=True)

    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
def get_test_model_lstm_stateful():
    stateful_batch_size = 1
    input_shapes = [
        (17, 4),
        (1, 10),
        (None, 4),
        (12,),
        (12,)
    ]

    inputs = [Input(batch_shape=(stateful_batch_size,) + s) for s in input_shapes]
    outputs = []
    for in_num, inp in enumerate(inputs[:2]):
        stateful = bool((in_num + 1) % 2)
        lstm_sequences = LSTM(
            stateful=stateful,
            units=8,
            recurrent_activation='relu',
            return_sequences=True,
            name='lstm_sequences_' + str(in_num) + '_st-' + str(stateful)
        )(inp)
        stateful = bool((in_num) % 2)
        lstm_regular = LSTM(
            stateful=stateful,
            units=3,
            recurrent_activation='sigmoid',
            return_sequences=False,
            name='lstm_regular_' + str(in_num) + '_st-' + str(stateful)
        )(lstm_sequences)
        outputs.append(lstm_regular)
        stateful = bool((in_num + 1) % 2)
        lstm_state, state_h, state_c = LSTM(
            stateful=stateful,
            units=3,
            recurrent_activation='sigmoid',
            return_state=True,
            name='lstm_state_return_' + str(in_num) + '_st-' + str(stateful)
        )(inp)
        outputs.append(lstm_state)
        outputs.append(state_h)
        outputs.append(state_c)
        stateful = bool((in_num + 1) % 2)
        lstm_bidi_sequences = Bidirectional(
            LSTM(
                stateful=stateful,
                units=4,
                recurrent_activation='hard_sigmoid',
                return_sequences=True,
                name='bi-lstm1_' + str(in_num) + '_st-' + str(stateful)
            )
        )(inp)
        stateful = bool((in_num) % 2)
        lstm_bidi = Bidirectional(
            LSTM(
                stateful=stateful,
                units=6,
                recurrent_activation='linear',
                return_sequences=False,
                name='bi-lstm2_' + str(in_num) + '_st-' + str(stateful)
            )
        )(lstm_bidi_sequences)
        outputs.append(lstm_bidi)

    initial_state_stateful = LSTM(units=12, return_sequences=True, stateful=True, return_state=True,
                                  name='initial_state_stateful')(inputs[2], initial_state=[inputs[3], inputs[4]])
    outputs.extend(initial_state_stateful)
    initial_state_not_stateful = LSTM(units=12, return_sequences=False, stateful=False, return_state=True,
                                      name='initial_state_not_stateful')(inputs[2],
                                                                         initial_state=[inputs[3], inputs[4]])
    outputs.extend(initial_state_not_stateful)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='mean_squared_error', optimizer='nadam')

    # fit to dummy data
    training_data_size = stateful_batch_size
    data_in = generate_input_data(training_data_size, input_shapes)
    initial_data_out = model.predict(data_in)
    data_out = generate_output_data(training_data_size, initial_data_out)

    model.fit(data_in, data_out, batch_size=stateful_batch_size, epochs=10)
    return model
Example #15
0
y = [[tag2idx[w[2]] for w in s] for s in sentences]
y = pad_sequences(maxlen=max_len,
                  sequences=y,
                  padding="post",
                  value=tag2idx["O"])

x_train, x_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)

input_word = Input(shape=(max_len, ))
model = Embedding(input_dim=num_words, output_dim=50,
                  input_length=max_len)(input_word)
model = SpatialDropout1D(0.1)(model)
model = Bidirectional(
    GRU(units=100, return_sequences=True, recurrent_dropout=0.1))(model)
model = TimeDistributed(Dense(num_tags))(model)
out = Activation("softmax", dtype="float32", name="predictions")(model)
model = Model(input_word, out)
model.summary()

model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

early_stopping = EarlyStopping(
    monitor="val_accuracy",
    min_delta=0,
    patience=1,
    verbose=0,
    mode="max",
Example #16
0
                  kernel_initializer='glorot_normal',
                  strides=1)(in_sequence)
embedded = Dropout(0.1)(embedded)
embedded = MaxPooling1D(pool_size=pool_length)(embedded)
for i in range(1, len(nb_filter)):
    embedded = Conv1D(filters=nb_filter[i],
                      kernel_size=filter_length[i],
                      padding='valid',
                      activation='relu',
                      kernel_initializer='glorot_normal',
                      strides=1)(embedded)
    embedded = Dropout(0.1)(embedded)
    embedded = MaxPooling1D(pool_size=pool_length)(embedded)

bi_lstm_seq = \
    Bidirectional(LSTM(64, return_sequences=False, dropout=0.15, recurrent_dropout=0.15, implementation=0))(embedded)

label = Dropout(0.3)(bi_lstm_seq)
label = Dense(64, activation='relu')(label)
label = Dense(2, activation='sigmoid')(label)
# sentence encoder
labeler = Model(inputs=in_sequence, outputs=label)
labeler.summary()

positives = []
pos_labels = []
for line in open(
        '/Users/emzodls/Dropbox/Lab/Warwick/RiPP_nnets/final_train_sets/positives_all.fa'
):
    if not line.startswith('>'):
        positives.append(line.strip().lower())
Example #17
0
    #128 10 1(0~999)
    x_train = np.random.randint(word_num, size=(train_size, max_length))
    #128 1(0~1)
    y_train = to_categorical(np.random.randint(class_num, size=(train_size, max_length)),class_num)
    
    x_val = np.random.randint(word_num, size=(val_size, max_length))
    y_val = to_categorical(np.random.randint(class_num, size=(val_size, max_length)),class_num)
    # print("x_train.shape={}".format(x_train.shape))
    # print("y_train.shape={}".format(y_train.shape))

    S_inputs = Input(shape=(max_length,), dtype='int32')
    # print(K.int_shape(S_inputs))
    embeddings = Embedding(word_num, emb_size)(S_inputs)
    # print(K.int_shape(embeddings))
    lstm_seq = Bidirectional(LSTM(128,return_sequences = True))(embeddings)
    lstm_seq = Position_Embedding()(lstm_seq)
    # print(K.int_shape(lstm_seq))
    O_seq = Attention(8, 16)([lstm_seq, lstm_seq, lstm_seq])
    O_seq = Attention(8, 16)([O_seq, O_seq, O_seq])
    # print(K.int_shape(O_seq))
    # O_seq = GlobalAveragePooling1D()(O_seq)
    # print(K.int_shape(O_seq))
    # O_seq = Dropout(0.5)(O_seq)
    # outputs = Dense(1, activation='sigmoid')(O_seq)
    # print(K.int_shape(outputs))
    outputs = TimeDistributed(Dense(class_num, activation='sigmoid'))(O_seq)
    # print(K.int_shape(outputs))
    model = Model(inputs=S_inputs, outputs=outputs)
    print(model.summary())
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
Example #18
0
# Building an RNN to train our text generation model will be very similar to the
# sentiment models you've built previously. The only real change necessary is to
# make sure to use Categorical instead of Binary Cross Entropy as the loss
# function - we could use Binary before since the sentiment was only 0 or 1, but
# now there are hundreds of categories.

# From there, we should also consider using *more* epochs than before, as text
# generation can take a little longer to converge than sentiment analysis, *and*
# we aren't working with all that much data yet. I'll set it at 200 epochs here
# since we're only use part of the dataset, and training will tail off quite a
# bit over that many epochs.

model = Sequential()
model.add(Embedding(total_words, 64, input_length=max_sequence_len - 1))
model.add(Bidirectional(LSTM(20)))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model, history = savefit(model,
                         input_sequences,
                         one_hot_labels,
                         epochs=200,
                         verbose=0)

### View the Training Graph

import matplotlib.pyplot as plt

Example #19
0
    def build(self,
              word_length,
              target_label_dims,
              word_vocab_size,
              char_vocab_size,
              word_embedding_dims=100,
              char_embedding_dims=16,
              word_lstm_dims=20,
              tagger_lstm_dims=200,
              dropout=0.5,
              crf_mode='pad'):
        """
        Build a NERCRF model

        Args:
            word_length (int): max word length in characters
            target_label_dims (int): number of entity labels (for classification)
            word_vocab_size (int): word vocabulary size
            char_vocab_size (int): character vocabulary size
            word_embedding_dims (int): word embedding dimensions
            char_embedding_dims (int): character embedding dimensions
            word_lstm_dims (int): character LSTM feature extractor output dimensions
            tagger_lstm_dims (int): word tagger LSTM output dimensions
            dropout (float): dropout rate
            crf_mode (string): CRF operation mode, select 'pad'/'reg' for supplied sequences in
                input or full sequence tagging. ('reg' is forced when use_cudnn=True)
        """
        self.word_length = word_length
        self.target_label_dims = target_label_dims
        self.word_vocab_size = word_vocab_size
        self.char_vocab_size = char_vocab_size
        self.word_embedding_dims = word_embedding_dims
        self.char_embedding_dims = char_embedding_dims
        self.word_lstm_dims = word_lstm_dims
        self.tagger_lstm_dims = tagger_lstm_dims
        self.dropout = dropout
        self.crf_mode = crf_mode

        assert crf_mode in ('pad', 'reg'), 'crf_mode is invalid'

        # build word input
        words_input = Input(shape=(None, ), name='words_input')
        embedding_layer = Embedding(self.word_vocab_size,
                                    self.word_embedding_dims,
                                    name='word_embedding')
        word_embeddings = embedding_layer(words_input)

        # create word character embeddings
        word_chars_input = Input(shape=(None, self.word_length),
                                 name='word_chars_input')
        char_embedding_layer = Embedding(
            self.char_vocab_size,
            self.char_embedding_dims,
            name='char_embedding')(word_chars_input)
        char_embeddings = TimeDistributed(
            Conv1D(128, 3, padding='same',
                   activation='relu'))(char_embedding_layer)
        char_embeddings = TimeDistributed(
            GlobalMaxPooling1D())(char_embeddings)

        # create the final feature vectors
        features = concatenate([word_embeddings, char_embeddings], axis=-1)

        # encode using a bi-LSTM
        features = Dropout(self.dropout)(features)
        bilstm = Bidirectional(
            self._rnn_cell(self.tagger_lstm_dims,
                           return_sequences=True))(features)
        bilstm = Bidirectional(
            self._rnn_cell(self.tagger_lstm_dims,
                           return_sequences=True))(bilstm)
        bilstm = Dropout(self.dropout)(bilstm)
        bilstm = Dense(self.target_label_dims)(bilstm)

        inputs = [words_input, word_chars_input]

        if self.use_cudnn:
            self.crf_mode = 'reg'
        with tf.device('/cpu:0'):
            crf = CRF(self.target_label_dims,
                      mode=self.crf_mode,
                      name='ner_crf')
            if self.crf_mode == 'pad':
                sequence_lengths = Input(batch_shape=(None, 1), dtype='int32')
                predictions = crf([bilstm, sequence_lengths])
                inputs.append(sequence_lengths)
            else:
                predictions = crf(bilstm)

        # compile the model
        model = tf.keras.Model(inputs=inputs, outputs=predictions)
        model.compile(loss={'ner_crf': crf.loss},
                      optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.),
                      metrics=[crf.viterbi_accuracy])
        self.model = model
Example #20
0
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

x_train = sequence.pad_sequences(x_train, maxlen=max_length)
x_test = sequence.pad_sequences(x_test, maxlen=max_length)

# skipgram model load
model_loaded = load_model('skipgram_model.h5')

# earlystopping callback
earlystopping = EarlyStopping(patience=10, monitor='val_accuracy')

# LSTM model
input_x_LSTM = Input(batch_shape=(None, max_length))
Embedding_LSTM = model_loaded.layers[2](
    input_x_LSTM)  # 미리 학습된 model1의 embedding layer를 불러와 그대로 쓰기
biLSTM_LSTM = Bidirectional(LSTM(64))(Embedding_LSTM)
Output_LSTM = Dense(1, activation='sigmoid')(biLSTM_LSTM)

model_LSTM = Model(input_x_LSTM, Output_LSTM)
model_LSTM.layers[1].trainable = False

model_LSTM.compile(loss='binary_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])
hist_LSTM = model_LSTM.fit(x_train,
                           y_train,
                           validation_data=[x_test, y_test],
                           batch_size=100,
                           epochs=100,
                           callbacks=[earlystopping])
Example #21
0
trainX, valX, trainY, valY = train_test_split( sentences, targets_final, test_size=0.2,  shuffle=True )


#prepare model for generating embedding
inp = Input( shape=( MAX_SEQ_LEN,) )
emb = Embedding( input_dim=VOCAB_SIZE, output_dim=50, weights=[embedding_matrix], 
                trainable=False, input_length=MAX_SEQ_LEN)(inp)
embedding = Model( inp, emb )
save_model( embedding, "embedding_model.h5" )

#Model for identifying tags of each word
inp = Input( shape=(MAX_SEQ_LEN, 50) )
drop = Dropout(0.1)(inp)
#two bidirectinal LSTM layers
lstm1 = LSTM( 50, return_sequences=True, recurrent_dropout=0.1)
seq1 = Bidirectional(lstm1)( drop )
lstm2 = LSTM( 50, return_sequences=True, recurrent_dropout=0.1)
seq2 = Bidirectional(lstm2)( seq1 )
# TIME_DISTRIBUTED -> ( MAX_SEQ_LEN, 50 ) -> (MAX_SEQ_LEN, POS_SIZE)
tags = TimeDistributed( Dense(POS_SIZE, activation="relu") )(seq2)
model = Model( inp, tags )
model.compile( optimizer="rmsprop", loss="categorical_crossentropy", metrics=[ "accuracy" ] )

#batch generator for model training
def getBatch(sentences, targets, batch_size=128):
  n = len(sentences)//batch_size
  for i in range( n+1 ):
    x = sentences[ i*batch_size : (i+1)*batch_size ]
    x = embedding.predict(x)
    y = targets[ i*batch_size : (i+1)*batch_size ]
    yield x,y
Example #22
0
def convolution_model(num_speakers=2):

	# == Audio convolution layers ==
	
	model = Sequential()
	
	# # Implicit input layer
	# inputs = Input(shape=(298, 257, 2))
	# model.add(inputs)
	
	# Convolution layers
	conv1 = Conv2D(96, kernel_size=(1,7), padding='same', dilation_rate=(1,1), input_shape=(298, 257, 2), name="input_layer")
	model.add(conv1)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv2 = Conv2D(96, kernel_size=(7,1), padding='same', dilation_rate=(1,1))
	model.add(conv2)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv3 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(1,1))
	model.add(conv3)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv4 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(2,1))
	model.add(conv4)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv5 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(4,1))
	model.add(conv5)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv6 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(8,1))
	model.add(conv6)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv7 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(16,1))
	model.add(conv7)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv8 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(32,1))
	model.add(conv8)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv9 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(1,1))
	model.add(conv9)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv10 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(2,2))
	model.add(conv10)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv11 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(4,4))
	model.add(conv11)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv12 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(8,8))
	model.add(conv12)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv13 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(16,16))
	model.add(conv13)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv14 = Conv2D(96, kernel_size=(5,5), padding='same', dilation_rate=(32,32))
	model.add(conv14)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	conv15 = Conv2D(8, kernel_size=(1,1), padding='same', dilation_rate=(1,1))
	model.add(conv15)
	model.add(BatchNormalization())
	model.add(Activation("relu"))
	
	# == AV fused neural network ==
	
	# AV fusion step(s)
	model.add(TimeDistributed(Flatten()))
	
	# BLSTM
	new_matrix_length = 400
	model.add(Bidirectional(LSTM(new_matrix_length//2, return_sequences=True, input_shape=(298, 257*8))))
	
	# Fully connected layers
	model.add(Dense(600, activation="relu"))
	model.add(Dense(600, activation="relu"))
	model.add(Dense(600, activation="relu"))
	
	# Output layer (i.e. complex masks)
	# outputs = Dense(257*2*num_speakers, activation="relu")
	outputs = Dense(257*2*num_speakers, activation="sigmoid")				# TODO: check if this is more correct (based on the paper)
	model.add(outputs)
	outputs_complex_masks = Reshape((298, 257, 2, num_speakers), name="output_layer")
	model.add(outputs_complex_masks)
	
	# Print the output shapes of each model layer
	for layer in model.layers:
		name = layer.get_config()["name"]
		if "batch_normal" in name or "activation" in name:
			continue
		print(layer.output_shape, "\t", name)
	
	# Alternatively, print the default keras model summary
	print(model.summary())
	
	# Compile the model before training
	# model.compile(optimizer='adam', loss='mse')
	model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
	
	return model
    def train(self, vocab_size=None, split_ratio=0.9, num_epochs=5):
        if self.classes is None:
            print(
                'Classes list is none, did you use parse method before calling train?'
            )
            return

        if self.train_data is None:
            print(
                'Train dataframe is none, did you use parse method before calling train?'
            )
            return

            # ----------- convert train df to numpy array for X and Y -----------
        train_test_data = []

        self.features_ordered = [
            'unsubscribe', 'extensions', 'sender', 'subject', 'text'
        ]
        for i, row in self.train_data.iterrows():

            x = ''
            for col in self.features_ordered:
                if row[col] is not None and len(row[col]) > 0:
                    x += str(row[col]).lower() + ' '
            x = x.strip()

            idx = self.classes.index(row['type'])
            if idx == -1:
                continue

            y = np.zeros(len(self.classes))
            y[idx] = 1

            train_test_data.append((x, y))

        train_test_data = np.array(train_test_data)

        # ----------- split to train x, y; test x, y-----------

        idx = int(len(train_test_data) * split_ratio)

        np.random.shuffle(train_test_data)
        train = train_test_data[:idx]
        test = train_test_data[idx:]

        train_x = np.array([i[0] for i in train])
        train_y = np.array([i[1] for i in train])

        test_x = np.array([i[0] for i in test])
        test_y = np.array([i[1] for i in test])

        # -------- build model --------
        encoder = TextVectorization(max_tokens=vocab_size)
        encoder.adapt(train_x)

        self.model = Sequential([
            encoder,
            Embedding(input_dim=len(encoder.get_vocabulary()),
                      output_dim=64,
                      mask_zero=True),
            Bidirectional(LSTM(64)),
            Dense(64, activation='relu'),
            Dense(len(self.classes), activation='softmax')
        ])
        self.model.compile(loss='categorical_crossentropy',
                           optimizer='adam',
                           metrics=['accuracy'])

        # -------- train model --------

        self.model.fit(x=train_x,
                       y=train_y,
                       batch_size=64,
                       epochs=num_epochs,
                       validation_data=(test_x, test_y))
Example #24
0
print(tokenizer.word_index['athy'])
print(tokenizer.word_index['one'])
print(tokenizer.word_index['jeremy'])
print(tokenizer.word_index['lanigan'])

print(xs[6])
print(ys[6])

print(xs[5])
print(ys[5])

print(tokenizer.word_index)

model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len - 1))
model.add(Bidirectional(LSTM(150)))
model.add(Dense(total_words, activation='softmax'))
adam = Adam(lr=0.01)
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])
#earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')
history = model.fit(xs, ys, epochs=100, verbose=1)
#print model.summary()
print(model)

import matplotlib.pyplot as plt


def plot_graphs(history, string):
    plt.plot(history.history[string])
def line_lstm_ctc(input_shape,
                  output_shape,
                  window_width=28,
                  window_stride=14):
    image_height, image_width = input_shape
    output_length, num_classes = output_shape

    num_windows = int((image_width - window_width) / window_stride) + 1
    if num_windows < output_length:
        raise ValueError(
            f'Window width/stride need to generate at least {output_length} windows (currently {num_windows})'
        )

    image_input = Input(shape=input_shape, name='image')
    y_true = Input(shape=(output_length, ), name='y_true')
    input_length = Input(shape=(1, ), name='input_length')
    label_length = Input(shape=(1, ), name='label_length')

    gpu_present = len(device_lib.list_local_devices()) > 1
    lstm_fn = CuDNNLSTM if gpu_present else LSTM

    # Your code should use slide_window and extract image patches from image_input.
    # Pass a convolutional model over each image patch to generate a feature vector per window.
    # Pass these features through one or more LSTM layers.
    # Convert the lstm outputs to softmax outputs.
    # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length).

    ##### Your code below (Lab 3)
    image_reshaped = Reshape((image_height, image_width, 1))(image_input)
    # (image_height, image_width, 1)

    image_patches = Lambda(slide_window,
                           arguments={
                               'window_width': window_width,
                               'window_stride': window_stride
                           })(image_reshaped)
    # (num_windows, image_height, window_width, 1)

    # Make a LeNet and get rid of the last two layers (softmax and dropout)
    convnet = lenet((image_height, window_width, 1), (num_classes, ))
    convnet = KerasModel(inputs=convnet.inputs,
                         outputs=convnet.layers[-2].output)
    convnet_outputs = TimeDistributed(convnet)(image_patches)
    # (num_windows, 128)

    lstm_output = Bidirectional(lstm_fn(
        256, return_sequences=True))(convnet_outputs)
    # add additional layer
    lstm_output = Bidirectional(lstm_fn(128,
                                        return_sequences=True))(lstm_output)
    # (num_windows, 128)

    softmax_output = Dense(num_classes,
                           activation='softmax',
                           name='softmax_output')(lstm_output)
    # (num_windows, num_classes)
    ##### Your code above (Lab 3)

    input_length_processed = Lambda(
        lambda x, num_windows=None: x * num_windows,
        arguments={'num_windows': num_windows})(input_length)

    ctc_loss_output = Lambda(
        lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss')(
            [y_true, softmax_output, input_length_processed, label_length])

    ctc_decoded_output = Lambda(
        lambda x: ctc_decode(x[0], x[1], output_length),
        name='ctc_decoded')([softmax_output, input_length_processed])

    model = KerasModel(
        inputs=[image_input, y_true, input_length, label_length],
        outputs=[ctc_loss_output, ctc_decoded_output])
    return model
Example #26
0
encoder_inputs = Input(shape=(max_qc_len, ), dtype='int32')

emb_matrix = np.zeros((vocab_len, emb_dim))
for word, index in word_to_index.items():
    if index != 0:
        emb_matrix[index, :] = word_to_vec_map[word]
embedding_layer = Embedding(vocab_len,
                            emb_dim,
                            trainable=False,
                            mask_zero=True)
embedding_layer.build((None, ))
embedding_layer.set_weights([emb_matrix])

encoder_embeddings = embedding_layer(encoder_inputs)

encoder = Bidirectional(LSTM(state_dim, return_state=True),
                        merge_mode='concat')(encoder_embeddings)

encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
encoder_states = [state_h, state_c]

decoder_inputs = Input(shape=(max_ans_len, ))
decoder_embeddings = embedding_layer(decoder_inputs)
decoder_lstm = LSTM(state_dim * 2, return_sequences=True, return_state=True)

decoder_outputs, _, _ = decoder_lstm(decoder_embeddings,
                                     initial_state=encoder_states)
outputs = TimeDistributed(Dense(vocab_len,
                                activation='softmax'))(decoder_outputs)
Example #27
0
def create_test_model(logdir, time):
    args = sys.argv[1:]
    pickle_in = open(
        "pickles/classifier_" + args[0] + "_network_input_" + args[1] +
        "_normalized.pickle", "rb")
    X = pickle.load(pickle_in)

    print(X[:3])
    pickle_in = open(
        "pickles/classifier_" + args[0] + "_network_" + args[1] +
        "_labels.pickle", "rb")
    y = pickle.load(pickle_in)
    print(y[:500])

    pickle_in = open(
        "pickles/classifier_" + args[0] + "_network_input_" + args[1] +
        "_normalized_EVALUATION.pickle", "rb")
    X_test = pickle.load(pickle_in)

    print(X[:3])
    pickle_in = open(
        "pickles/classifier_" + args[0] + "_network_" + args[1] +
        "_labels_EVALUATION.pickle", "rb")
    y_test = pickle.load(pickle_in)

    y = np.array(y)
    X = np.array(X)

    y_test = np.array(y_test)
    X_test = np.array(X_test)

    model = Sequential()

    model.add(LSTM(32, input_shape=(X.shape[1:]), return_sequences=True))
    model.add(Dropout(0.1))
    model.add(BatchNormalization())

    model.add(Dense(32))

    model.add(Bidirectional(LSTM(32)))
    model.add(Dropout(0.1))
    model.add(BatchNormalization())

    model.add(Dense(1, activation='sigmoid'))  # softmax

    # opt = tf.keras.optimizers.Adam(lr=0.002, decay=1e-6)

    # y = tf.keras.utils.to_categorical(y)

    # Compile model
    model.compile(
        loss='binary_crossentropy',  # categorical_crossentropy'
        optimizer='rmsprop',
        metrics=['accuracy'],
    )

    model.fit(
        X,
        y,
        epochs=int(args[2]),
        batch_size=128,
    )

    val_loss, val_acc = model.evaluate(X_test, y_test)
    print(val_loss)
    print(val_acc)
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    model.save(logdir + "/" + str(val_acc) + "_trained_model_" + time + ".h5")
    return val_acc
Example #28
0
    def CNN_BiLSTM(self, max_label_len):

        # Model architecture
        input_ = Input(shape=(32, 128, 1))
        # CNN
        conv2d_1 = Conv2D(filters=64,
                          kernel_size=(3, 3),
                          activation='relu',
                          padding='same')(input_)
        maxpool_2d_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv2d_1)

        conv2d_2 = Conv2D(filters=128,
                          kernel_size=(3, 3),
                          activation='relu',
                          padding='same')(maxpool_2d_1)
        maxpool_2d_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv2d_2)

        conv2d_3 = Conv2D(filters=256,
                          kernel_size=(3, 3),
                          activation='relu',
                          padding='same')(maxpool_2d_2)
        conv2d_4 = Conv2D(filters=256,
                          kernel_size=(3, 3),
                          activation='relu',
                          padding='same')(conv2d_3)

        maxpool_2d_3 = MaxPool2D(pool_size=(2, 1))(conv2d_4)

        conv2d_5 = Conv2D(filters=512,
                          kernel_size=(3, 3),
                          activation='relu',
                          padding='same')(maxpool_2d_3)
        batch_norm_5 = BatchNormalization()(conv2d_5)

        conv2d_6 = Conv2D(filters=512,
                          kernel_size=(3, 3),
                          activation='relu',
                          padding='same')(batch_norm_5)
        batch_norm_6 = BatchNormalization()(conv2d_6)

        maxpool_2d_4 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)

        conv2d_7 = Conv2D(filters=512, kernel_size=(2, 2),
                          activation='relu')(maxpool_2d_4)

        squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv2d_7)

        blstm1 = Bidirectional(LSTM(256, return_sequences=True,
                                    dropout=0.2))(squeezed)

        # = BatchNormalization()(blstm1)

        blstm2 = Bidirectional(LSTM(256, return_sequences=True,
                                    dropout=0.2))(blstm1)

        #blstm3 = Bidirectional(LSTM(256,return_sequences=True,dropout=0.2))(blstm2)

        outputs = Dense(len(self.char_list) + 1,
                        activation='softmax')(blstm2)  #(31,63)
        cnn_lstm_ = Model(input_, outputs)

        # LSTM layer inputs
        labels = Input(name='the_labels',
                       shape=[max_label_len],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')

        loss_out = Lambda(self.CTC_LOSS, output_shape=(1, ), name='ctc')(
            [outputs, labels, input_length, label_length])

        training_model = Model(
            inputs=[input_, labels, input_length, label_length],
            outputs=loss_out)
        return training_model, cnn_lstm_
Example #29
0
def AV_model(people_num=2):
    def UpSampling2DBilinear(size):
        return Lambda(lambda x: tf.image.resize_bilinear(x, size, align_corners=True))

    def sliced(x, index):
        return x[:, :, :, index]

    # --------------------------- AS start ---------------------------
    audio_input = Input(shape=(298, 257, 2))
    print('as_0:', audio_input.shape)
    as_conv1 = Convolution2D(96, kernel_size=(1, 7), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='as_conv1')(audio_input)
    as_conv1 = BatchNormalization()(as_conv1)
    as_conv1 = ReLU()(as_conv1)
    print('as_1:', as_conv1.shape)

    as_conv2 = Convolution2D(96, kernel_size=(7, 1), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='as_conv2')(as_conv1)
    as_conv2 = BatchNormalization()(as_conv2)
    as_conv2 = ReLU()(as_conv2)
    print('as_2:', as_conv2.shape)

    as_conv3 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='as_conv3')(as_conv2)
    as_conv3 = BatchNormalization()(as_conv3)
    as_conv3 = ReLU()(as_conv3)
    print('as_3:', as_conv3.shape)

    as_conv4 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(2, 1), name='as_conv4')(as_conv3)
    as_conv4 = BatchNormalization()(as_conv4)
    as_conv4 = ReLU()(as_conv4)
    print('as_4:', as_conv4.shape)

    as_conv5 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(4, 1), name='as_conv5')(as_conv4)
    as_conv5 = BatchNormalization()(as_conv5)
    as_conv5 = ReLU()(as_conv5)
    print('as_5:', as_conv5.shape)

    as_conv6 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(8, 1), name='as_conv6')(as_conv5)
    as_conv6 = BatchNormalization()(as_conv6)
    as_conv6 = ReLU()(as_conv6)
    print('as_6:', as_conv6.shape)

    as_conv7 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(16, 1), name='as_conv7')(as_conv6)
    as_conv7 = BatchNormalization()(as_conv7)
    as_conv7 = ReLU()(as_conv7)
    print('as_7:', as_conv7.shape)

    as_conv8 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(32, 1), name='as_conv8')(as_conv7)
    as_conv8 = BatchNormalization()(as_conv8)
    as_conv8 = ReLU()(as_conv8)
    print('as_8:', as_conv8.shape)

    as_conv9 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='as_conv9')(as_conv8)
    as_conv9 = BatchNormalization()(as_conv9)
    as_conv9 = ReLU()(as_conv9)
    print('as_9:', as_conv9.shape)

    as_conv10 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(2, 2), name='as_conv10')(as_conv9)
    as_conv10 = BatchNormalization()(as_conv10)
    as_conv10 = ReLU()(as_conv10)
    print('as_10:', as_conv10.shape)

    as_conv11 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(4, 4), name='as_conv11')(as_conv10)
    as_conv11 = BatchNormalization()(as_conv11)
    as_conv11 = ReLU()(as_conv11)
    print('as_11:', as_conv11.shape)

    as_conv12 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(8, 8), name='as_conv12')(as_conv11)
    as_conv12 = BatchNormalization()(as_conv12)
    as_conv12 = ReLU()(as_conv12)
    print('as_12:', as_conv12.shape)

    as_conv13 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(16, 16), name='as_conv13')(as_conv12)
    as_conv13 = BatchNormalization()(as_conv13)
    as_conv13 = ReLU()(as_conv13)
    print('as_13:', as_conv13.shape)

    as_conv14 = Convolution2D(96, kernel_size=(5, 5), strides=(1, 1), padding='same', dilation_rate=(32, 32), name='as_conv14')(as_conv13)
    as_conv14 = BatchNormalization()(as_conv14)
    as_conv14 = ReLU()(as_conv14)
    print('as_14:', as_conv14.shape)

    as_conv15 = Convolution2D(8, kernel_size=(1, 1), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='as_conv15')(as_conv14)
    as_conv15 = BatchNormalization()(as_conv15)
    as_conv15 = ReLU()(as_conv15)
    print('as_15:', as_conv15.shape)

    AS_out = Reshape((298, 8 * 257))(as_conv15)
    print('AS_out:', AS_out.shape)
    # --------------------------- AS end ---------------------------

    # --------------------------- VS_model start ---------------------------
    VS_model = Sequential()
    VS_model.add(Convolution2D(256, kernel_size=(7, 1), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='vs_conv1'))
    VS_model.add(BatchNormalization())
    VS_model.add(ReLU())
    VS_model.add(Convolution2D(256, kernel_size=(5, 1), strides=(1, 1), padding='same', dilation_rate=(1, 1), name='vs_conv2'))
    VS_model.add(BatchNormalization())
    VS_model.add(ReLU())
    VS_model.add(Convolution2D(256, kernel_size=(5, 1), strides=(1, 1), padding='same', dilation_rate=(2, 1), name='vs_conv3'))
    VS_model.add(BatchNormalization())
    VS_model.add(ReLU())
    VS_model.add(Convolution2D(256, kernel_size=(5, 1), strides=(1, 1), padding='same', dilation_rate=(4, 1), name='vs_conv4'))
    VS_model.add(BatchNormalization())
    VS_model.add(ReLU())
    VS_model.add(Convolution2D(256, kernel_size=(5, 1), strides=(1, 1), padding='same', dilation_rate=(8, 1), name='vs_conv5'))
    VS_model.add(BatchNormalization())
    VS_model.add(ReLU())
    VS_model.add(Convolution2D(256, kernel_size=(5, 1), strides=(1, 1), padding='same', dilation_rate=(16, 1), name='vs_conv6'))
    VS_model.add(BatchNormalization())
    VS_model.add(ReLU())
    VS_model.add(Reshape((75, 256, 1)))
    VS_model.add(UpSampling2DBilinear((298, 256)))
    VS_model.add(Reshape((298, 256)))
    # --------------------------- VS_model end ---------------------------

    video_input = Input(shape=(75, 1, 1792, people_num))
    AVfusion_list = [AS_out]
    for i in range(people_num):
        single_input = Lambda(sliced, arguments={'index': i})(video_input)
        VS_out = VS_model(single_input)
        AVfusion_list.append(VS_out)

    AVfusion = concatenate(AVfusion_list, axis=2)
    AVfusion = TimeDistributed(Flatten())(AVfusion)
    print('AVfusion:', AVfusion.shape)

    lstm = Bidirectional(LSTM(400, input_shape=(298, 8 * 257), return_sequences=True), merge_mode='sum')(AVfusion)
    print('lstm:', lstm.shape)

    fc1 = Dense(600, name="fc1", activation='relu', kernel_initializer=he_normal(seed=27))(lstm)
    print('fc1:', fc1.shape)
    fc2 = Dense(600, name="fc2", activation='relu', kernel_initializer=he_normal(seed=42))(fc1)
    print('fc2:', fc2.shape)
    fc3 = Dense(600, name="fc3", activation='relu', kernel_initializer=he_normal(seed=65))(fc2)
    print('fc3:', fc3.shape)

    complex_mask = Dense(257 * 2 * people_num, name="complex_mask", kernel_initializer=glorot_uniform(seed=87))(fc3)
    print('complex_mask:', complex_mask.shape)

    complex_mask_out = Reshape((298, 257, 2, people_num))(complex_mask)
    print('complex_mask_out:', complex_mask_out.shape)

    AV_model = Model(inputs=[audio_input, video_input], outputs=complex_mask_out)

    # # compile AV_model
    # AV_model.compile(optimizer='adam', loss='mse')

    return AV_model
def get_test_model_lstm():
    """Returns a test model for Long Short-Term Memory (LSTM) layers."""

    input_shapes = [
        (17, 4),
        (1, 10),
        (None, 4),
        (12,),
        (12,)
    ]
    inputs = [Input(shape=s) for s in input_shapes]
    outputs = []

    for inp in inputs[:2]:
        lstm_sequences = LSTM(
            units=8,
            recurrent_activation='relu',
            return_sequences=True
        )(inp)
        lstm_regular = LSTM(
            units=3,
            recurrent_activation='sigmoid',
            return_sequences=False
        )(lstm_sequences)
        outputs.append(lstm_regular)
        lstm_state, state_h, state_c = LSTM(
            units=3,
            recurrent_activation='sigmoid',
            return_state=True
        )(inp)
        outputs.append(lstm_state)
        outputs.append(state_h)
        outputs.append(state_c)

        lstm_bidi_sequences = Bidirectional(
            LSTM(
                units=4,
                recurrent_activation='hard_sigmoid',
                return_sequences=True
            )
        )(inp)
        lstm_bidi = Bidirectional(
            LSTM(
                units=6,
                recurrent_activation='linear',
                return_sequences=False
            )
        )(lstm_bidi_sequences)
        outputs.append(lstm_bidi)

        lstm_gpu_regular = LSTM(
            units=3,
            activation='tanh',
            recurrent_activation='sigmoid',
            use_bias=True
        )(inp)

        lstm_gpu_bidi = Bidirectional(
            LSTM(
                units=3,
                activation='tanh',
                recurrent_activation='sigmoid',
                use_bias=True
            )
        )(inp)
    outputs.append(lstm_gpu_regular)
    outputs.append(lstm_gpu_bidi)

    outputs.extend(LSTM(units=12, return_sequences=True,
                        return_state=True)(inputs[2], initial_state=[inputs[3], inputs[4]]))

    model = Model(inputs=inputs, outputs=outputs, name='test_model_lstm')
    model.compile(loss='mse', optimizer='nadam')

    # fit to dummy data
    training_data_size = 2
    data_in = generate_input_data(training_data_size, input_shapes)
    initial_data_out = model.predict(data_in)
    data_out = generate_output_data(training_data_size, initial_data_out)
    model.fit(data_in, data_out, epochs=10)
    return model