def test_model(modelName, datasetName, x_test, y_test, sequenceInput=None): x_train, y_train, x_val, y_val, x_test, y_test = dataset_load(datasetName) word_index = dict_load(datasetName + '_word_index') embedding_layer = None if ModelParams['use_pretrained'] == 1: embedding_matrix = embedding_matrix_load(word_index) embedding_layer = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=True) else: embedding_layer = Embedding(len(word_index) + 1, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH, trainable=True) sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences = embedding_layer(sequence_input) l_sdrop1 = SpatialDropout1D(ModelParams['dropout'])(embedded_sequences) l_conv1 = None l_sdrop2 = None l_act1 = None l_pool1 = None preds = None if ModelParams['activation'] == 'relu': l_conv1 = Conv1D(ModelParams['conv_filters'], ModelParams['conv_size'], padding=ModelParams['padding'], activation='relu')(l_sdrop1) l_sdrop2 = SpatialDropout1D(ModelParams['dropout'])(l_conv1) else: acts = getModelParamData('activation') l_conv1 = Conv1D(ModelParams['conv_filters'], ModelParams['conv_size'], padding=ModelParams['padding'])(l_sdrop1) if len(acts) == 1: if acts[0].lower() == 'leakyrelu': l_act1 = LeakyReLU(0.3)(l_conv1) if len(acts) > 1: if acts[0].lower() == 'leakyrelu': l_act1 = LeakyReLU(acts[1])(l_conv1) if l_act1 == None: print( 'Cannot parse activation parameter. LeakyReLU layer with default param(0.3) will be used in model' ) l_act1 = LeakyReLU(0.3)(l_conv1) l_sdrop2 = SpatialDropout1D(ModelParams['dropout'])(l_act1) if ModelParams['pooling'] == 'global': l_pool1 = GlobalMaxPool1D()(l_sdrop2) preds = Dense(y_val.shape[1], activation='softmax')(l_pool1) else: if ModelParams['padding'] == 'same': l_pool1 = MaxPooling1D(MAX_SEQUENCE_LENGTH)(l_sdrop2) else: l_pool1 = MaxPooling1D(MAX_SEQUENCE_LENGTH - ModelParams['conv_size'] + 1)(l_sdrop2) l_flat = Flatten()(l_pool1) preds = Dense(y_val.shape[1], activation='softmax')(l_flat) print('training model ' + modelName + ' on dataset ' + datasetName + ' start') model = Model(sequence_input, preds) model.compile(loss='categorical_crossentropy', optimizer=ModelParams['optimizer'], metrics=['acc']) model.summary() try: model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=ModelParams['epochs'], batch_size=ModelParams['batch_size'], callbacks=[ keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.01, patience=1, verbose=1, mode='auto') ]) except Exception as exc: print(exc) model_save(model, modelName + '_temp') print('trainModels finished') sfName = modelName + '_' + datasetName for key, value in ModelParams.items(): sfName += ('_' + str(value)) model_save(model, sfName) scores = model.evaluate(x_test, y_test) print(scores) print('training model finished') del model reloadSession() return scores[1]
def get_model(): CONC = [] IGLOO_V = [] inin = Input(shape=input_shape, name='input') #inin=Lambda(lambda q: q[:,1:,:]) (inin) a = Conv1D(40, 2, padding="causal")(inin) b = Conv1D(40, 4, padding="causal")(inin) c = Conv1D(40, 8, padding="causal")(inin) x = Concatenate(axis=-1)([a, b, c]) x = Activation("relu")(x) x = BatchNormalization(axis=-1)(x) a = Conv1D(40, 2, padding="causal")(x) b = Conv1D(40, 4, padding="causal")(x) c = Conv1D(40, 8, padding="causal")(x) x = Concatenate(axis=-1)([a, b, c]) x = Activation("relu")(x) x = BatchNormalization(axis=-1)(x) a = Conv1D(40, 2, padding="causal")(x) b = Conv1D(40, 4, padding="causal")(x) c = Conv1D(40, 8, padding="causal")(x) x = Concatenate(axis=-1)([a, b, c]) x = Activation("relu")(x) x = BatchNormalization(axis=-1)(x) x = Lambda(lambda q: q[:, 1:, :])(x) x = Conv1D(64, 1, strides=1, padding=padding)(x) x = BatchNormalization(axis=-1)(x) x = Activation("relu")(x) x = SpatialDropout1D(mDR)(x) IGLOO_V.append( IGLOO_RETURNFULLSEQ(x, nb_patches_FULL, Conv1D_dim_full_seq, patch_size=patch_size, padding_style=padding, stretch_factor=stretch_factor, l2reg=igloo_l2reg, add_residual=add_residual, nb_stacks=nb_stacks_full, build_backbone=build_backbone)) CONC.append(IGLOO_V[0]) for kk in range(5): x = Conv1D(C1D_K, 1, strides=1, padding=padding)(CONC[kk]) x = BatchNormalization(axis=-1)(x) x = Activation("relu")(x) x = SpatialDropout1D(mDR)(x) IGLOO_V.append( IGLOO_RETURNFULLSEQ(x, nb_patches_FULL, Conv1D_dim_full_seq, patch_size=patch_size, padding_style=padding, stretch_factor=stretch_factor, l2reg=igloo_l2reg, add_residual=add_residual, nb_stacks=nb_stacks_full, build_backbone=build_backbone)) ###second residual connection co = Add()([IGLOO_V[kk + 1], CONC[kk]]) CONC.append(Activation("relu")(co)) x = Conv1D(C1D_K, 1, strides=1, padding=padding)(CONC[-1]) x = BatchNormalization(axis=-1)(x) x = Activation("relu")(x) x = SpatialDropout1D(mDR)(x) y = IGLOO(x, nb_patches, CONV1D_dim, patch_size=patch_size, return_sequences=False, l2reg=igloo_l2reg, padding_style=padding, nb_stacks=nb_stacks, DR=mDR, max_pooling_kernel=MAXPOOL_size) y = Dense(64, activation='relu')(y) y = Dropout(0.4)(y) output_1 = Dense(1, activation='softmax')(y) word_input = Input(shape=(9, ), name='decoder_input') embedded_word = Embedding( input_dim=1149, output_dim=500, name='word_embedding', input_length=9, trainable=True, weights=[balloony])(word_input) #trainable is false, weight=ballooney input_ = embedded_word #input_ = BatchNormalization(axis=-1)(input_) gru_out = GRU(700, activation='tanh', recurrent_activation='sigmoid', dropout=0.22, return_sequences=True, return_state=False, unroll=False, reset_after=True)(input_) input_ = gru_out input_ = BatchNormalization(axis=-1)(input_) gru_out = GRU(700, activation='tanh', recurrent_activation='sigmoid', dropout=0.22, return_sequences=True, return_state=False, unroll=False, reset_after=True)(input_) input_ = gru_out features = Permute((2, 1))(x) part1 = Dense(700)(features) gru_out = Permute((2, 1))(gru_out) shape = K.int_shape(part1) #should features be part1? part2 = Dense(shape[1])(gru_out) part2 = Permute((2, 1))(part2) part3 = Add()([part1, part2]) score = Activation("tanh")(part3) part4 = Dense(1)(score) attention_weights = Lambda(lambda x: softmax(x, axis=1))(part4) context_vector = multiply([attention_weights, features]) context_vector = Lambda(lambda x: K.sum(x, axis=1))(context_vector) context_vector_mod = Dense(600)(context_vector) context_vector_mod = Lambda(lambda x: K.expand_dims(x, -1))( context_vector_mod) context_vector_mod = Permute((2, 1))(context_vector_mod) gru_out_mod = Dense(600)(gru_out) input_ = Concatenate(axis=1)([context_vector_mod, gru_out_mod]) input_ = Activation("tanh")(input_) input_ = BatchNormalization(axis=-1)(input_) gru_out = GRU(9, activation='tanh', recurrent_activation='sigmoid', dropout=0.22, return_sequences=True, return_state=False, unroll=False, reset_after=True)(input_) #gru_out = LSTM(units=9, #return_sequences=True, #dropout=0.22, #recurrent_dropout=0.22)(input_) gru_out = Permute((2, 1))(gru_out) #gru_out=GRU(700, activation='tanh', recurrent_activation='sigmoid', #dropout=0.22,return_sequences=True, return_state=False,unroll=False,reset_after=True)(input_) #input_= gru_out #gru_out=Flatten()(input_) gru_out = Activation("tanh")(gru_out) sequence_output = TimeDistributed(Dense(units=vocab_size))(gru_out) opt = optimizers.Adam(lr=0.0005, clipnorm=1.0, decay=0.001) model = Model(inputs=[inin, word_input], outputs=[output_1, sequence_output]) model.compile(loss=['binary_crossentropy', cross_entropy2], optimizer=opt, metrics=['accuracy'], loss_weights=[100000, 1]) #return model model.fit_generator(Mygenerator(2), epochs=70) model.save('eegv2.h5') test_file()
if embedding_vector is not None: embedding_matrix[i] = embedding_vector lr = 1e-3 lr_d = 1e-10 units = 64 spatial_dr = 0.3 kernel_size1 = 3 kernel_size2 = 2 dense_units = 32 dr = 0.1 conv_size = 32 inp = Input(shape = (max_len,)) x = Embedding(16530, embed_size, weights = [embedding_matrix], trainable = False)(inp) x1 = SpatialDropout1D(spatial_dr)(x) x_lstm = Bidirectional(CuDNNLSTM(units, return_sequences = True))(x1) x1 = Conv1D(conv_size, kernel_size=kernel_size1, padding="valid", kernel_initializer="he_uniform")(x_lstm) avg_pool1_lstm = GlobalAveragePooling1D()(x1) max_pool1_lstm = GlobalMaxPooling1D()(x1) x2 = Conv1D(conv_size, kernel_size=kernel_size2, padding="valid", kernel_initializer="he_uniform")(x_lstm) avg_pool2_lstm = GlobalAveragePooling1D()(x2) max_pool2_lstm = GlobalMaxPooling1D()(x2) x = concatenate([avg_pool1_lstm, max_pool1_lstm, avg_pool2_lstm, max_pool2_lstm, avg_pool1_lstm, max_pool1_lstm]) x = Dense(5, activation = "softmax")(x) model = Model(inputs = inp, outputs = x) model.compile(loss = "categorical_crossentropy", optimizer = Adam(lr = lr, decay = lr_d), metrics = ["accuracy"]) history = model.fit(X_train, y_ohe, batch_size = 16, epochs = 16, validation_split=0.1, verbose = 1) preds = model.predict(X_test, batch_size = 1024, verbose = 1) predictions = np.round(np.argmax(preds, axis=1)).astype(int)
def classifier(model, emb_mean, emb_std, embeddings_index): train = pd.read_csv('./input/TIL_NLP_train1_dataset.csv') test = pd.read_csv('./input/TIL_NLP_unseen_dataset.csv') print('running classifier') max_features = 4248 print(max_features) maxlen = 200 embed_size = 100 X_train = train["word_representation"].fillna("fillna").values y_train = train[[ "outwear", "top", "trousers", "women dresses", "women skirts" ]].values X_test = test["word_representation"].fillna("fillna").values y_test = test[[ "outwear", "top", "trousers", "women dresses", "women skirts" ]].values print('preprocessing start') tokenizer = text.Tokenizer(num_words=max_features) tokenizer.fit_on_texts(list(X_train) + list(X_test)) X_train = tokenizer.texts_to_sequences(X_train) X_test = tokenizer.texts_to_sequences(X_test) x_train = sequence.pad_sequences(X_train, maxlen=maxlen) x_test = sequence.pad_sequences(X_test, maxlen=maxlen) del X_train, X_test, train, test gc.collect() word_index = tokenizer.word_index nb_words = min(max_features, len(word_index)) embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size)) for word, i in word_index.items(): if i >= max_features: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i - 1] = embedding_vector print('preprocessing done') # session_conf = tf.ConfigProto(intra_op_parallelism_threads=4, inter_op_parallelism_threads=4) # K.set_session(tf.Session(graph=tf.get_default_graph(), config=session_conf)) #model #wrote out all the blocks instead of looping for simplicity filter_nr = 64 filter_size = 3 max_pool_size = 3 max_pool_strides = 2 dense_nr = 256 spatial_dropout = 0.2 dense_dropout = 0.5 train_embed = False conv_kern_reg = regularizers.l2(0.00001) conv_bias_reg = regularizers.l2(0.00001) comment = Input(shape=(maxlen, )) emb_comment = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=train_embed)(comment) emb_comment = SpatialDropout1D(spatial_dropout)(emb_comment) block1 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(emb_comment) block1 = BatchNormalization()(block1) block1 = PReLU()(block1) block1 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block1) block1 = BatchNormalization()(block1) block1 = PReLU()(block1) #we pass embedded comment through conv1d with filter size 1 because it needs to have the same shape as block output #if you choose filter_nr = embed_size (300 in this case) you don't have to do this part and can add emb_comment directly to block1_output resize_emb = Conv1D(filter_nr, kernel_size=1, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(emb_comment) resize_emb = PReLU()(resize_emb) block1_output = add([block1, resize_emb]) block1_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block1_output) block2 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block1_output) block2 = BatchNormalization()(block2) block2 = PReLU()(block2) block2 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block2) block2 = BatchNormalization()(block2) block2 = PReLU()(block2) block2_output = add([block2, block1_output]) block2_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block2_output) block3 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block2_output) block3 = BatchNormalization()(block3) block3 = PReLU()(block3) block3 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block3) block3 = BatchNormalization()(block3) block3 = PReLU()(block3) block3_output = add([block3, block2_output]) block3_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block3_output) block4 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block3_output) block4 = BatchNormalization()(block4) block4 = PReLU()(block4) block4 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block4) block4 = BatchNormalization()(block4) block4 = PReLU()(block4) block4_output = add([block4, block3_output]) block4_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block4_output) block5 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block4_output) block5 = BatchNormalization()(block5) block5 = PReLU()(block5) block5 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block5) block5 = BatchNormalization()(block5) block5 = PReLU()(block5) block5_output = add([block5, block4_output]) block5_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block5_output) block6 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block5_output) block6 = BatchNormalization()(block6) block6 = PReLU()(block6) block6 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block6) block6 = BatchNormalization()(block6) block6 = PReLU()(block6) block6_output = add([block6, block5_output]) block6_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block6_output) block7 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block6_output) block7 = BatchNormalization()(block7) block7 = PReLU()(block7) block7 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block7) block7 = BatchNormalization()(block7) block7 = PReLU()(block7) block7_output = add([block7, block6_output]) output = GlobalMaxPooling1D()(block7_output) output = Dense(dense_nr, activation='linear')(output) output = BatchNormalization()(output) output = PReLU()(output) output = Dropout(dense_dropout)(output) output = Dense(5, activation='sigmoid')(output) # model = Model(comment, output) # print("Correct model: ", type(model)) model.compile(loss='binary_crossentropy', optimizer=optimizers.Adam(), metrics=['accuracy']) batch_size = 128 epochs = 4 Xtrain, Xval, ytrain, yval = train_test_split(x_train, y_train, train_size=0.95, random_state=233) lr = callbacks.LearningRateScheduler(schedule) ra_val = RocAucEvaluation(validation_data=(Xval, yval), interval=1) model.fit(Xtrain, ytrain, batch_size=batch_size, epochs=epochs, validation_data=(Xval, yval), callbacks=[lr, ra_val], verbose=0) y_pred = model.predict(x_test) y_pred = [[1 if i > 0.5 else 0 for i in r] for r in y_pred] y_test = y_test.tolist() accuracy = sum([y_pred[i] == y_test[i] for i in range(len(y_pred))]) / len(y_pred) * 100 print([y_pred[i] == y_test[i] for i in range(len(y_pred))]) print(accuracy, "%") """ submission = pd.read_csv('../input/jigsaw-toxic-comment-classification-challenge/sample_submission.csv') submission[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]] = y_pred submission.to_csv('dpcnn_test_preds.csv', index=False) """ return model
embeddedc = Embedding(len(words) + 1, actors_size, embeddings_initializer=Constant(networkcore_emb), input_length=seqlen, mask_zero=False, trainable=True)(seqsc) # # concat = concatenate([embedded, embeddedc]) dropout = Dropout(rate=Dropoutrate)(seqsa) middle = Dense(Hidden, activation='relu', kernel_regularizer=regularizers.l2(Regularization))(dropout) batchNorm = BatchNormalization()(middle) dropoutb = SpatialDropout1D(rate=Dropoutrate)(embedded) blstm = Bidirectional(CuDNNLSTM(Hidden, return_sequences=False), merge_mode='sum')(dropoutb) batchNormb = BatchNormalization()(blstm) dropoutc = SpatialDropout1D(rate=Dropoutrate)(embeddedc) blstmc = Bidirectional(CuDNNLSTM(Hidden, return_sequences=False), merge_mode='sum')(dropoutc) batchNormc = BatchNormalization()(blstmc) concat = concatenate([batchNorm, batchNormb, batchNormc]) dense = Dense(nState, activation='softmax', kernel_regularizer=regularizers.l2(Regularization))(concat) model = Model(input=sequence, output=dense)
def get_model(embedding_matrix, channels, wordnet_emb, id_to_index): """ :param embedding_matrix: :param channels: :param wordnet_emb: :param id_to_index: :return: """ inputs = [] pool_layers = [] if 'words' in channels: words_input_left = Input(shape=(max_sentence_length, ), name='left_words') words_input_right = Input(shape=(max_sentence_length, ), name='right_words') inputs += [words_input_left, words_input_right] words_pool = get_words_channel_conc( (words_input_left, words_input_right), embedding_matrix) pool_layers += [words_pool] if 'wordnet' in channels: wordnet_left = Input(shape=(max_sentence_length, ), name='left_wordnet') wordnet_right = Input(shape=(max_sentence_length, ), name='right_wordnet') inputs += [wordnet_left, wordnet_right] e_wn_left = Embedding(len(wordnet_emb), wordnet_embedding_size, input_length=max_sentence_length, trainable=True, name='wn_emb_left') e_wn_left.build((None, )) e_wn_left = e_wn_left(wordnet_left) e_wn_left = SpatialDropout1D(0.5)(e_wn_left) e_wn_right = Embedding(len(wordnet_emb), wordnet_embedding_size, input_length=max_sentence_length, trainable=True, name='wn_emb_right') e_wn_right.build((None, )) e_wn_right = e_wn_right(wordnet_right) e_wn_right = SpatialDropout1D(0.5)(e_wn_right) wn_lstm_left = LSTM(LSTM_units, input_shape=(max_sentence_length, wordnet_embedding_size), name='wn_lstm_left', return_sequences=True)(e_wn_left) wn_lstm_right = LSTM(LSTM_units, input_shape=(max_sentence_length, wordnet_embedding_size), name='wn_lstm_right', return_sequences=True)(e_wn_right) wn_pool_left = GlobalMaxPooling1D()(wn_lstm_left) wn_pool_right = GlobalMaxPooling1D()(wn_lstm_right) pool_layers += [wn_pool_left, wn_pool_right] if 'concatenation_ancestors' in channels: # Uses just one chain without LSTM (order is always the same) ancestors_input_left = Input(shape=(max_ancestors_length, ), name='left_ancestors') ancestors_input_right = Input(shape=(max_ancestors_length, ), name='right_ancestors') inputs += [ancestors_input_left, ancestors_input_right] ancestors_pool_left, ancestors_pool_right = get_ontology_concat_channel( (ancestors_input_left, ancestors_input_right), id_to_index) pool_layers += [ancestors_pool_left, ancestors_pool_right] if 'common_ancestors' in channels: ancestors_common = Input(shape=(max_ancestors_length * 2, ), name='common_ancestors') inputs += [ancestors_common] ancestors_pool = get_ontology_common_channel(ancestors_common, id_to_index) pool_layers.append(ancestors_pool) if len(pool_layers) > 1: concatenate = keras.layers.concatenate(pool_layers, axis=-1) else: concatenate = pool_layers[0] final_hidden = Dense(sigmoid_units, activation='sigmoid', name='hidden_layer')(concatenate) output = Dense(n_classes, activation='softmax', name='output')(final_hidden) model = Model(inputs=inputs, outputs=[output]) model.compile( loss= 'categorical_crossentropy', # options: categorical | binary_crossentropy optimizer=RMSprop(0.0001), # options: Adam(0.001) | SGD(0.1) # sample_weight_mode = None, # optional # weighted_metrics = [recall], # optional metrics=['accuracy', precision, recall, f1]) print(model.summary()) return model
word_index = tokenizer.word_index nb_words = min(max_features, len(word_index)) embedding_matrix = np.zeros((nb_words, embed_size)) for word, i in word_index.items(): if i >= max_features: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector print_step('Build model...') comment = Input(shape=(maxlen, )) emb_comment = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=train_embed)(comment) emb_comment = SpatialDropout1D(spatial_dropout)(emb_comment) block1 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear')(emb_comment) block1 = BatchNormalization()(block1) block1 = PReLU()(block1) block1 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear')(block1) block1 = BatchNormalization()(block1) block1 = PReLU()(block1) #we pass embedded comment through conv1d with filter size 1 because it needs to have the same shape as block output
def build_model(embedding_matrix): words = Input(shape=(None,)) x = Embedding(*embedding_matrix.shape, weights=[embedding_matrix], trainable=False)(words) x = SpatialDropout1D(0.3)(x) x = Bidirectional(LSTM(256, return_sequences=True))(x) hidden = concatenate([ GlobalMaxPooling1D()(x), GlobalAveragePooling1D()(x), ]) hidden = Dense(512, activation='relu')(hidden) result = Dense(5, activation='softmax')(hidden) model = Model(inputs=words, outputs=result) model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'] ) return model % % time CHARS_TO_REMOVE = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n“”’\'8?÷a•à-ßسp‘?´°£€\×™v²—' tokenizer = text.Tokenizer(filters=CHARS_TO_REMOVE) tokenizer.fit_on_texts(list(x_train) + list(x_test)) embedding_matrix = build_matrix(tokenizer.word_index, 'C:\Users\NEHA GUPTA\Desktop\input\fasttext-crawl-300d-2m\crawl-300d-2M.vec') maxlen = 512 x_train = tokenizer.texts_to_sequences(x_train) x_test = tokenizer.texts_to_sequences(x_test) x_train = sequence.pad_sequences(x_train, maxlen=512) x_test = sequence.pad_sequences(x_test, maxlen=512) model = build_model(embedding_matrix) model.summary() checkpoint = ModelCheckpoint( 'model.h5', monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto' ) history = model.fit( x_train, y_train, batch_size=512, callbacks=[checkpoint], epochs=10, validation_split=0.1 ) print(history) # Plot training & validation accuracy values plt.plot(history.history['val_accuracy']) plt.plot(history.history['accuracy']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.show() # Plot training & validation loss values plt.plot(history.history['val_loss']) plt.plot(history.history['loss']) plt.title('Model Loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.show()
def inference(self): """main computation graph here: 1. embeddding layer, 2.Bi-LSTM layer, 3.concat, 4.FC layer 5.softmax """ #1.get emebedding of words in the sentence self.embedded_words = tf.nn.embedding_lookup( self.Embedding, self.input_x) #shape:[None,sentence_length,embed_size] self.embedded_words = SpatialDropout1D(0.2)(self.embedded_words) #2. Bi-lstm layer # define lstm cess:get lstm cell output lstm_fw_cell = rnn.GRUCell(self.hidden_size) #forward direction cell lstm_bw_cell = rnn.GRUCell(self.hidden_size) #backward direction cell if self.dropout_keep_prob is not None: lstm_fw_cell = rnn.DropoutWrapper( lstm_fw_cell, output_keep_prob=self.dropout_keep_prob) lstm_bw_cell = rnn.DropoutWrapper( lstm_bw_cell, output_keep_prob=self.dropout_keep_prob) # bidirectional_dynamic_rnn: input: [batch_size, max_time, input_size] # output: A tuple (outputs, output_states) # where:outputs: A tuple (output_fw, output_bw) containing the forward and the backward rnn output `Tensor`. outputs, _ = tf.nn.bidirectional_dynamic_rnn( lstm_fw_cell, lstm_bw_cell, self.embedded_words, dtype=tf.float32 ) #[batch_size,sequence_length,hidden_size] #creates a dynamic bidirectional recurrent neural network print( "outputs:===>", outputs ) #outputs:(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose:0' shape=(?, 5, 100) dtype=float32>, <tf.Tensor 'ReverseV2:0' shape=(?, 5, 100) dtype=float32>)) #3. concat output output_rnn_last = tf.concat( outputs, axis=2) #[batch_size,sequence_length,hidden_size*2] #output_rnn_last = output_rnn_last[:,-1,:] ''' output_fw = outputs[0] output_bw = outputs[1] output_rnn_fw_max = tf.reduce_max(output_fw,axis=2) output_rnn_bw_max = tf.reduce_max(output_bw,axis=2) output_rnn_fw_avg = tf.reduce_mean(output_fw,axis=2) output_rnn_bw_avg = tf.reduce_mean(output_bw,axis=2) self.output_rnn_last = tf.concat([output_rnn_fw_max, output_rnn_bw_max, output_rnn_fw_avg, output_rnn_bw_avg, output_rnn_last], axis=1) ''' output_rnn_avg = GlobalAveragePooling1D()(output_rnn_last) output_rnn_max = GlobalMaxPooling1D()(output_rnn_last) self.output_rnn_last = tf.concat([output_rnn_avg, output_rnn_max], axis=1) #self.output_rnn_last = tf.concat(outputs,axis=2) #[batch_size,sequence_length,hidden_size*2] #self.output_rnn_last = self.output_rnn_last[:,-1,:] #self.output_rnn_last=tf.reduce_max(output_rnn,axis=2) #[batch_size,hidden_size*2] #output_rnn_last=output_rnn[:,-1,:] ##[batch_size,hidden_size*2] #TODO print("output_rnn_last:", self.output_rnn_last ) # <tf.Tensor 'strided_slice:0' shape=(?, 200) dtype=float32> #4. logits(use linear layer) with tf.name_scope( "output" ): #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. #self.output_rnn_last = tf.nn.dropout(self.output_rnn_last, self.dropout_keep_prob) #temp = tf.nn.relu(tf.matmul(self.output_rnn_last, self.W_relu) + self.b_relu) # [batch_size,num_classes] #temp_drop = tf.nn.dropout(temp, self.dropout_keep_prob) #logits = tf.matmul(temp, self.W_projection) + self.b_projection logits = tf.matmul(self.output_rnn_last, self.W_projection) + self.b_projection return logits
def train_RNN(data, features, name): # read files data = preprocess_data(data) tokenizer = get_tokenizer(data, features) # transforms each text in texts to a sequence of integers X = tokenizer.texts_to_sequences(data['text'].values) # ensures all seuqnces in a list have the same length by padding 0s in the beginning and end of each X = pad_sequences(X) # create LSTM network # embed_dim, lstm_out, batch_size and dropout_x are hyperparameters # ie they need to be tweaked manually embed_dim = 128 lstm_out = 196 # initialize model model = Sequential() # Embedding layer # first argument: number of distinct words in the training set # second arg: size of embedding vectors # input length: size of each input sequence model.add(Embedding(features, embed_dim, input_length=X.shape[1])) # Dropout layer # arg: rate = fraction of the input units to drop # it is the probability of setting each input to the layer to zero # added to a model between existing layers # helps prevent overfitting. regularization method model.add(SpatialDropout1D(0.4)) # LSTM Recurrent NN layer # arg1: dimensionality of the output space # dropout: fraction of the units to drop for the linear transformation of the inputs # recurrent dropout: Fraction of the units to drop for the linear transformation of the recurrent state. model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2)) # Dense layer # regular deeply connected neural network layer # performs activation(dot(input, kernel) + bias) # use softmax as activation because network is using categorical # crossentropy and softmax is right for that # arg 1 : number of units which will determine the output shape model.add(Dense(2, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # print(model.summary()) # converts categorical variable into indicator variables # a matrix where the columns are characters and the rows are indexes Y = pd.get_dummies(data['sentiment']).values # split training and test sets X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=0.33, random_state=42) # print(X_train.shape,Y_train.shape) # print(X_test.shape,Y_test.shape) # number of samples that will be propagated through the network batch_size = 32 # verbose has to do with the information displayed when training the model. 0 for no output model.fit(X_train, Y_train, epochs=7, batch_size=batch_size, verbose=2) model.save("api//model//" + name) # the lower the loss, the better the model # accuracy: number of misclassified print("Evaluating model") print(X_test.shape) print(Y_test.shape) loss, acc = model.evaluate( X_test, Y_test, verbose=2, batch_size=batch_size) print("loss: %.2f" % (loss)) print("acc: %.2f" % (acc))
vocab=w_vocab, tokenizer="word", pad=o.w_maxlen) w_dev, _ = doc_to_numseq(np.array(data.docs)[dev_idx], vocab=w_vocab, tokenizer="word", pad=o.w_maxlen) c_inp = Input(shape=(o.c_maxlen, ), name='char_input') w_inp = Input(shape=(o.w_maxlen, ), name='word_input') c_emb = Embedding(len(c_vocab) + 4, o.c_embdim, mask_zero=True, name='char_embedding')(c_inp) c_emb = SpatialDropout1D(o.c_embdrop)(c_emb) w_emb = Embedding(len(w_vocab) + 4, o.w_embdim, mask_zero=True, name='word_embedding')(w_inp) w_emb = SpatialDropout1D(o.w_embdrop)(w_emb) if o.rnn == 'LSTM': rnn = LSTM else: rnn = GRU c_fw = rnn(o.c_featdim, dropout=o.c_featdrop, name='char_fw_rnn')(c_emb) c_bw = rnn(o.c_featdim, dropout=o.c_featdrop, go_backwards=True,
embeddedb = Embedding(len(chars) + 1, word_size,embeddings_initializer=Constant(zhwiki_emb), input_length=maxlen, mask_zero=False)(seqsb) embeddedc = Embedding(len(chars) + 1, word_size,embeddings_initializer=Constant(zhwiki_emb), input_length=maxlen, mask_zero=False)(seqsc) maximuma = Maximum()([embeddeda, embeddedb]) maximumb = Maximum()([embeddedc, embeddedb]) # zhwiki_biemb = numpy.load("model/zhwiki_biembedding.npy") zhwiki_biemb = sparse.load_npz("model/zhwiki_biembedding.npz").todense() embeddedd = Embedding(len(bigrams) + 1, word_size, input_length=maxlen,embeddings_initializer=Constant(zhwiki_biemb), mask_zero=False)(seqsd) embeddede = Embedding(len(bigrams) + 1, word_size, input_length=maxlen,embeddings_initializer=Constant(zhwiki_biemb), mask_zero=False)(seqse) concat = concatenate([embeddeda, maximuma, maximumb, embeddedd, embeddede]) dropout = SpatialDropout1D(rate=Dropoutrate)(concat) blstm = Bidirectional(CuDNNLSTM(Hidden,batch_input_shape=(maxlen,nFeatures), return_sequences=True), merge_mode='sum')(dropout) # dropout = Dropout(rate=Dropoutrate)(blstm) batchNorm = BatchNormalization()(blstm) dense = Dense(nState, activation='softmax', kernel_regularizer=regularizers.l2(Regularization))(batchNorm) # crf = CRF(nState, activation='softmax', kernel_regularizer=regularizers.l2(Regularization))(dropout) model = Model(input=sequence, output=dense) # model.compile(loss='categorical_crossentropy', optimizer=adagrad, metrics=["accuracy"]) # optimizer = Adagrad(lr=learningrate) model.compile(loss=loss, optimizer=optimizer, metrics=[metric]) model.summary() with codecs.open('model/f5/msr_train_crffeatures.pkl', 'rb') as fx: with codecs.open('model/f5/msr_train_crfstates.pkl', 'rb') as fy:
def main(): #sqlite3 = sqlite3.connect('quit.db') # Establish Connection with sqlite3.connect('quit.db') as db: c = db.cursor() c.execute( 'CREATE TABLE IF NOT EXISTS user (username TEXT NOT NULL, password TEXT NOT NULL);' ) db.commit() #db.close() class MyFrame1(wx.Frame): global path def __init__(self, parent): wx.Frame.__init__(self, parent, id=wx.ID_ANY, title=wx.EmptyString, pos=wx.DefaultPosition, size=wx.Size(500, 300), style=wx.DEFAULT_FRAME_STYLE | wx.TAB_TRAVERSAL) self.SetSizeHintsSz(wx.DefaultSize, wx.DefaultSize) bSizer1 = wx.BoxSizer(wx.VERTICAL) self.m_staticText1 = wx.StaticText(self, wx.ID_ANY, u"Select CSV File", wx.DefaultPosition, wx.DefaultSize, 0) self.m_staticText1.Wrap(-1) bSizer1.Add(self.m_staticText1, 0, wx.ALL, 5) self.m_filePicker1 = wx.FilePickerCtrl(self, wx.ID_ANY, wx.EmptyString, u"Select a file", u"*.csv", wx.DefaultPosition, wx.Size(500, -1), wx.FLP_DEFAULT_STYLE) bSizer1.Add(self.m_filePicker1, 0, wx.ALL, 5) self.m_button1 = wx.Button(self, wx.ID_ANY, u"Submit", wx.DefaultPosition, wx.DefaultSize, 0) bSizer1.Add(self.m_button1, 0, wx.ALL, 5) self.SetSizer(bSizer1) self.Layout() self.Centre(wx.BOTH) # Connect Events self.m_button1.Bind(wx.EVT_BUTTON, self.submit) def __del__(self): pass # Virtual event handlers, overide them in your derived class def submit(self, event): self.path = self.m_filePicker1.Path self.Close() app = wx.App(False) frame = MyFrame1(None) frame.Show(True) app.MainLoop() data = pd.read_csv(frame.path, encoding='unicode_escape') # Keeping only the necessary columns data = data[['text', 'sentiment']] data = data[data.sentiment != "Neutral"] data['text'] = data['text'].apply(lambda x: x.lower()) data['text'] = data['text'].apply( (lambda x: re.sub('[^a-zA-z0-9\s]', '', x))) print(data[data['sentiment'] == 'Positive'].size) print(data[data['sentiment'] == 'Negative'].size) for idx, row in data.iterrows(): row[0] = row[0].replace('rt', ' ') # pos tagging for j in data["text"]: tokenized = sent_tokenize(j) for i in tokenized: print(i) wordsList = nltk.word_tokenize(i) wordsList = [w for w in wordsList if not w in stop_words] tagged = nltk.pos_tag(wordsList) print(tagged) ps = PorterStemmer() for j in data["text"]: print(j) words = word_tokenize(j) for w in words: print(ps.stem(w)) max_fatures = 2000 tokenizer = Tokenizer(num_words=max_fatures, split=' ') tokenizer.fit_on_texts(data['text'].values) X = tokenizer.texts_to_sequences(data['text'].values) X = pad_sequences(X) embed_dim = 128 lstm_out = 196 model = Sequential() model.add(Embedding(max_fatures, embed_dim, input_length=X.shape[1])) model.add(SpatialDropout1D(0.4)) model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(2, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) Y = pd.get_dummies(data['sentiment']).values X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42) print(X_train.shape, Y_train.shape) print(X_test.shape, Y_test.shape) batch_size = 32 model.fit(X_train, Y_train, epochs=7, batch_size=batch_size, verbose=2) validation_size = 1500 X_validate = X_test[-validation_size:] Y_validate = Y_test[-validation_size:] X_test = X_test[:-validation_size] Y_test = Y_test[:-validation_size] # score,acc = model.evaluate(X_test, Y_test, verbose = 2, batch_size = batch_size) # print("score: %.2f" % (score)) # print("acc: %.2f" % (acc)) pos_cnt, neg_cnt, pos_correct, neg_correct = 0, 0, 0, 0 for x in range(len(X_validate)): result = model.predict(X_validate[x].reshape(1, X_test.shape[1]), batch_size=1, verbose=2)[0] if np.argmax(result) == np.argmax(Y_validate[x]): if np.argmax(Y_validate[x]) == 0: neg_correct += 1 else: pos_correct += 1 if np.argmax(Y_validate[x]) == 0: neg_cnt += 1 else: pos_cnt += 1 pos_acc = (pos_correct / pos_cnt * 100) neg_acc = (neg_correct / neg_cnt * 100) print("pos_acc", pos_correct / pos_cnt * 100, "%") print("neg_acc", neg_correct / neg_cnt * 100, "%") labels = ['Positive', 'Negative'] # sizes = [5, neg_per, neu_per] sizes = [pos_acc, neg_acc] colors = ['yellowgreen', 'gold'] patches, texts = plt.pie(sizes, colors=colors, shadow=True, labels=labels, startangle=90) plt.legend(patches, labels, loc="best") plt.axis('equal') plt.tight_layout() plt.show() class MyFrame4(wx.Frame): def __init__(self, parent): wx.Frame.__init__(self, parent, id=wx.ID_ANY, title=wx.EmptyString, pos=wx.DefaultPosition, size=wx.Size(603, 397), style=wx.DEFAULT_FRAME_STYLE | wx.TAB_TRAVERSAL) self.SetSizeHintsSz(wx.DefaultSize, wx.DefaultSize) bSizer3 = wx.BoxSizer(wx.VERTICAL) #self.m_staticText4 = wx.StaticText( self, wx.ID_ANY, u"Enter Text", wx.DefaultPosition, wx.DefaultSize, 0 ) #self.m_staticText4.Wrap( -1 ) #bSizer3.Add( self.m_staticText4, 0, wx.ALL, 4 ) #self.m_textCtrl3 = wx.TextCtrl( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 500,100 ), 0 ) #bSizer3.Add( self.m_textCtrl3, 0, wx.ALL, 4 ) self.m_staticText6 = wx.StaticText(self, wx.ID_ANY, u"Search tweet", wx.DefaultPosition, wx.DefaultSize, 0) self.m_staticText6.Wrap(-1) bSizer3.Add(self.m_staticText6, 0, wx.ALL, 4) self.m_textCtrl5 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(500, 100), 0) bSizer3.Add(self.m_textCtrl5, 0, wx.ALL, 4) self.m_button3 = wx.Button(self, wx.ID_ANY, u"Submit", wx.DefaultPosition, wx.DefaultSize, 0) bSizer3.Add(self.m_button3, 0, wx.ALL, 4) #self.m_staticText5 = wx.StaticText( self, wx.ID_ANY, u"View Polarity", wx.DefaultPosition, wx.DefaultSize, 0 ) #self.m_staticText5.Wrap( -1 ) #bSizer3.Add( self.m_staticText5, 0, wx.ALL, 4 ) #self.m_textCtrl4 = wx.TextCtrl( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 300,-1 ), 0 ) #bSizer3.Add( self.m_textCtrl4, 0, wx.ALL, 4 ) self.SetSizer(bSizer3) self.Layout() self.Centre(wx.BOTH) # Connect Events self.m_button3.Bind(wx.EVT_BUTTON, self.click) def __del__(self): pass # Virtual event handlers, overide them in your derived class def click(self, event): txt1 = self.m_textCtrl5.Value global txt_search txt_search = self.m_textCtrl5.Value # twt = 'Meetings: Because none of us is as dumb as all of us.' # vectorizing the tweet by the pre-fitted tokenizer instance twt = tokenizer.texts_to_sequences(txt1) # padding the tweet to have exactly the same shape as `embedding_2` input twt = pad_sequences(twt, maxlen=28, dtype='int32', value=0) # print(twt) sentiment = model.predict(twt, batch_size=1, verbose=2)[0] if (np.argmax(sentiment) == 0): self.m_textCtrl4.SetValue(str("negative")) elif (np.argmax(sentiment) == 1): self.m_textCtrl4.SetValue(str("positive")) event.Skip() def click(self, event): txt1 = self.m_textCtrl4.Value app4 = wx.App(False) frame = MyFrame4(None) frame.Show(True) app4.MainLoop() class MyFrame5(wx.Frame): def __init__(self, parent): wx.Frame.__init__(self, parent, id=wx.ID_ANY, title=wx.EmptyString, pos=wx.DefaultPosition, size=wx.Size(603, 397), style=wx.DEFAULT_FRAME_STYLE | wx.TAB_TRAVERSAL) self.SetSizeHintsSz(wx.DefaultSize, wx.DefaultSize) bSizer3 = wx.BoxSizer(wx.VERTICAL) self.m_staticText4 = wx.StaticText(self, wx.ID_ANY, u"Enter Text", wx.DefaultPosition, wx.DefaultSize, 0) self.m_staticText4.Wrap(-1) bSizer3.Add(self.m_staticText4, 0, wx.ALL, 5) self.m_textCtrl3 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(500, 100), 0) bSizer3.Add(self.m_textCtrl3, 0, wx.ALL, 5) #self.m_staticText6 = wx.StaticText( self, wx.ID_ANY, u"Search tweet", wx.DefaultPosition, wx.DefaultSize, 0 ) #self.m_staticText6.Wrap( -1 ) #bSizer3.Add( self.m_staticText6, 0, wx.ALL, 5 ) #self.m_textCtrl5 = wx.TextCtrl( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 500,100 ), 0 ) #bSizer3.Add( self.m_textCtrl5, 0, wx.ALL, 5 ) self.m_button3 = wx.Button(self, wx.ID_ANY, u"Submit", wx.DefaultPosition, wx.DefaultSize, 0) bSizer3.Add(self.m_button3, 0, wx.ALL, 5) self.m_staticText5 = wx.StaticText(self, wx.ID_ANY, u"View Polarity", wx.DefaultPosition, wx.DefaultSize, 0) self.m_staticText5.Wrap(-1) bSizer3.Add(self.m_staticText5, 0, wx.ALL, 5) self.m_textCtrl4 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(300, -1), 0) bSizer3.Add(self.m_textCtrl4, 0, wx.ALL, 5) self.SetSizer(bSizer3) self.Layout() self.Centre(wx.BOTH) # Connect Events self.m_button3.Bind(wx.EVT_BUTTON, self.click) def __del__(self): pass # Virtual event handlers, overide them in your derived class def click(self, event): txt1 = self.m_textCtrl3.Value global txt_search txt_search = self.m_textCtrl3.Value # twt = 'Meetings: Because none of us is as dumb as all of us.' # vectorizing the tweet by the pre-fitted tokenizer instance twt = tokenizer.texts_to_sequences(txt1) # padding the tweet to have exactly the same shape as `embedding_2` input twt = pad_sequences(twt, maxlen=28, dtype='int32', value=0) # print(twt) sentiment = model.predict(twt, batch_size=1, verbose=2)[0] if (np.argmax(sentiment) == 0): self.m_textCtrl4.SetValue(str("negative")) elif (np.argmax(sentiment) == 1): self.m_textCtrl4.SetValue(str("positive")) event.Skip() app5 = wx.App(False) frame = MyFrame5(None) frame.Show(True) app5.MainLoop() tweets = [] n_cnt = 0 p_cnt = 0 try: fetched_tweets = api.search(q=txt_search, count=200) for tweet in fetched_tweets: parsed_tweet = {} parsed_tweet['text'] = tweet.text twt = tokenizer.texts_to_sequences(tweet.text) twt = pad_sequences(twt, maxlen=28, dtype='int32', value=0) # print(twt) sentiment = model.predict(twt, batch_size=1, verbose=2)[0] if (np.argmax(sentiment) <= 0.3): n_cnt += 1 elif (np.argmax(sentiment) == 1): p_cnt += 1 except tweepy.TweepError as e: print("Error : " + str(e)) print("Final_Result_Positive--->" + str(p_cnt)) print("Final_Result_negative--->" + str(n_cnt)) labels = ['Positive', 'Negative'] # sizes = [5, neg_per, neu_per] sizes = [p_cnt, n_cnt] colors = ['yellowgreen', 'gold'] patches, texts = plt.pie(sizes, colors=colors, shadow=True, labels=labels, startangle=90) plt.legend(patches, labels, loc="best") plt.axis('equal') plt.tight_layout() plt.show()
def run(self): hyperparams = self._parent.hyperparams output_dir = hyperparams['output_dir'] epochs = int(hyperparams['epochs']) batch_size = int(hyperparams['batch_size']) # vector-space embedding: n_dim = int(hyperparams['n_dim']) embedding_1_input_dim = int(hyperparams['embedding_1_input_dim']) max_review_length = int(hyperparams['max_review_length']) pad_type = hyperparams['pad_type'] trunc_type = hyperparams['trunc_type'] drop_embed = float(hyperparams['drop_embed']) # convolutional layer architecture: n_conv = int(hyperparams['n_conv']) k_conv = int(hyperparams['k_conv']) n_dense = int(hyperparams['n_dense']) dropout_1_rate = float(hyperparams['dropout_1_rate']) (x_train, y_train), (x_valid, y_valid) = imdb.load_data(num_words=embedding_1_input_dim) x_train = pad_sequences(x_train, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0) x_valid = pad_sequences(x_valid, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0) model = Sequential() model.add( Embedding(embedding_1_input_dim, n_dim, input_length=max_review_length)) model.add(SpatialDropout1D(drop_embed)) model.add(Conv1D(n_conv, k_conv, activation='relu')) model.add(GlobalMaxPooling1D()) model.add(Dense(n_dense, activation='relu')) model.add(Dropout(dropout_1_rate)) model.add(Dense(1, activation='sigmoid')) model.summary() model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) modelcheckpoint = ModelCheckpoint(filepath=output_dir + "/weights.{epoch:02d}.hdf5") if not os.path.exists(output_dir): os.makedirs(output_dir) self._parent.history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_valid, y_valid), callbacks=[modelcheckpoint]) weights_filepath = output_dir + "/weights.{:02d}.hdf5".format( best_epoch(self._parent.history) + 1) print("Using " + weights_filepath) model.load_weights(weights_filepath) y_hat = model.predict_proba(x_valid) for cfg in model.get_config(): print(cfg) print("epochs={}, batch_size={} roc_auc_score {:0.2f}".format( epochs, batch_size, roc_auc_score(y_valid, y_hat) * 100.0)) evt = TrainingDoneEvent(TrainingDoneEvent.EVT_WORK_DONE_TYPE, -1, y_hat) wx.PostEvent(self._parent, evt)
def build(self, depth=[4, 4, 10, 10], pooling_type='maxpool', use_shortcut=False): input_text = Input(shape=(self.max_len, )) embedding_layer = Embedding( self.word_embeddings.shape[0], self.word_embeddings.shape[1], weights=[self.word_embeddings], trainable=self.config.word_embed_trainable)(input_text) text_embed = SpatialDropout1D(0.2)(embedding_layer) # first temporal conv layer conv_out = Conv1D(filters=64, kernel_size=3, kernel_initializer='he_uniform', padding='same')(text_embed) shortcut = conv_out # temporal conv block: 64 for i in range(depth[0]): if i < depth[0] - 1: shortcut = conv_out conv_out = self.conv_block(inputs=conv_out, filters=64, use_shortcut=use_shortcut, shortcut=shortcut) else: # shortcut is not used at the last conv block conv_out = self.conv_block(inputs=conv_out, filters=64, use_shortcut=use_shortcut, shortcut=None) # down-sampling # shortcut is the second last conv block output conv_out = self.dowm_sampling(inputs=conv_out, pooling_type=pooling_type, use_shortcut=use_shortcut, shortcut=shortcut) shortcut = conv_out # temporal conv block: 128 for i in range(depth[1]): if i < depth[1] - 1: shortcut = conv_out conv_out = self.conv_block(inputs=conv_out, filters=128, use_shortcut=use_shortcut, shortcut=shortcut) else: # shortcut is not used at the last conv block conv_out = self.conv_block(inputs=conv_out, filters=128, use_shortcut=use_shortcut, shortcut=None) # down-sampling conv_out = self.dowm_sampling(inputs=conv_out, pooling_type=pooling_type, use_shortcut=use_shortcut, shortcut=shortcut) shortcut = conv_out # temporal conv block: 256 for i in range(depth[2]): if i < depth[1] - 1: shortcut = conv_out conv_out = self.conv_block(inputs=conv_out, filters=256, use_shortcut=use_shortcut, shortcut=shortcut) else: # shortcut is not used at the last conv block conv_out = self.conv_block(inputs=conv_out, filters=256, use_shortcut=use_shortcut, shortcut=None) # down-sampling conv_out = self.dowm_sampling(inputs=conv_out, pooling_type=pooling_type, use_shortcut=use_shortcut, shortcut=shortcut) # temporal conv block: 512 for i in range(depth[3]): if i < depth[1] - 1: shortcut = conv_out conv_out = self.conv_block(inputs=conv_out, filters=128, use_shortcut=use_shortcut, shortcut=shortcut) else: # shortcut is not used at the last conv block conv_out = self.conv_block(inputs=conv_out, filters=128, use_shortcut=use_shortcut, shortcut=None) # 8-max pooling conv_out = KMaxPooling(k=8)(conv_out) flatten = Flatten()(conv_out) fc1 = Dense(2048, activation='relu')(flatten) sentence_embed = Dense(2048, activation='relu')(fc1) dense_layer = Dense(256, activation='relu')(sentence_embed) if self.config.loss_function == 'binary_crossentropy': output = Dense(1, activation='sigmoid')(dense_layer) else: output = Dense(self.n_class, activation='softmax')(dense_layer) model = Model(input_text, output) model.compile(loss=self.config.loss_function, metrics=['acc'], optimizer=self.config.optimizer) return model
def make_model(): input_models = [] output_embeddings = [] numerics = ['float16', 'float32', 'float64'] categoricals = ['int8', 'int16', 'int32', 'int64'] for categorical_var in X_train.select_dtypes(include=categoricals): #Name of the categorical variable that will be used in the Keras Embedding layer cat_emb_name = categorical_var.replace(" ", "") + '_Embedding' # Define the embedding_size no_of_unique_cat = X_train[categorical_var].nunique() embedding_size = int(min(np.ceil((no_of_unique_cat) / 2), 50)) #One Embedding Layer for each categorical variable input_model = Input(shape=(1, )) output_model = Embedding(no_of_unique_cat, embedding_size, name=cat_emb_name)(input_model) output_model = SpatialDropout1D(0.3)(output_model) output_model = Reshape(target_shape=(embedding_size, ))(output_model) #Appending all the categorical inputs input_models.append(input_model) #Appending all the embeddings output_embeddings.append(output_model) shape_numeric = len( X_train.select_dtypes(include=numerics).columns.tolist()) #Other non-categorical data columns (numerical). #I define single another network for the other columns and add them to our models list. input_numeric = Input(shape=(shape_numeric, )) embedding_numeric = BatchNormalization()(input_numeric) input_models.append(input_numeric) output_embeddings.append(embedding_numeric) #At the end we concatenate altogther and add other Dense layers output = Concatenate()(output_embeddings) output = BatchNormalization()(output) output = Dense(317, activation='relu', kernel_initializer="uniform")(output) output = Dropout(0.3)(output) # To reduce ovwefiting output = BatchNormalization()(output) output = Dense(150, activation='relu', kernel_initializer="uniform")(output) output = Dropout(0.2)(output) # To reduce ovwefiting output = BatchNormalization()(output) output = Dense(30, activation='relu')(output) output = Dropout(0.1)(output) # To reduce ovwefiting output = BatchNormalization()(output) output = Dense(1, activation='sigmoid')(output) model = Model(inputs=input_models, outputs=output) return model
def MTL_model(cnn_filter_num=64): #Input layer char_input = Input(shape=(16, ), dtype='int32', name='input10') entity_1_loc_input = Input(shape=(16, ), dtype='int32', name='input11') entity_2_loc_input = Input(shape=(16, ), dtype='int32', name='input12') tag_input = Input(shape=(16, ), dtype='int32', name='input15') #embedding layer char_embedding = Embedding( len(tokenizer_api.word_index) + 1, 200, input_length=16, #weights=[char_embedding_matrix], trainable=True) location_embedding = Embedding(150 + 1, 50, input_length=16, trainable=True, name='location_embedding') tag_embedding = Embedding(133, 50, input_length=16, trainable=True, name='tag_embedding') emb_char = char_embedding(char_input) emb_entity_1_loc = location_embedding(entity_1_loc_input) emb_entity_2_loc = location_embedding(entity_2_loc_input) emb_tag = tag_embedding(tag_input) emb_char = SpatialDropout1D(0.2)(emb_char) emb_entity_1_loc = SpatialDropout1D(0.2)(emb_entity_1_loc) emb_entity_2_loc = SpatialDropout1D(0.2)(emb_entity_2_loc) emb_tag = SpatialDropout1D(0.2)(emb_tag) merge_embedding = concatenate( [emb_char, emb_entity_1_loc, emb_entity_2_loc, emb_tag]) #multi size CNN for emb_char kernel_sizes = [ 1, 2, 3, ] pooled_char = [] pooled_char_mean = [] for kernel in kernel_sizes: conv_char = Conv1D(filters=cnn_filter_num, kernel_size=kernel, padding='same', strides=1, kernel_initializer='he_uniform', activation='relu')(merge_embedding) pool_char = MaxPooling1D(pool_size=16)(conv_char) pool_char_2 = AvgPool1D(pool_size=16)(conv_char) pooled_char.append(pool_char) pooled_char_mean.append(pool_char_2) merged_pooled_char = Concatenate(axis=-1)(pooled_char) flatten_pooled_char = Flatten()(merged_pooled_char) merged_pooled_char2 = Concatenate(axis=-1)(pooled_char_mean) flatten_pooled_char2 = Flatten()(merged_pooled_char2) merge_all = concatenate([flatten_pooled_char, flatten_pooled_char2]) #rnn_output merge_all = BatchNormalization()(merge_all) merge_all = Dropout(0.5)(merge_all) merge_all = Dense(128, activation='relu')(merge_all) merge_all = BatchNormalization()(merge_all) merge_all = Dropout(0.2)(merge_all) pred1 = Dense(6, name='loss_1')(merge_all) pred2 = Dense(2, activation='softmax', name='loss_2')(merge_all) model = Model( inputs=[char_input, entity_1_loc_input, entity_2_loc_input, tag_input], outputs=[pred1, pred2]) return model
embedding_matrix = np.zeros((num_words, embed_size)) for word, i in word_index.items(): if i >= max_features: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector # Build Model inp = Input(shape=(maxlen,)) x = Embedding(max_features, embed_size, weights=[ embedding_matrix], trainable=True)(inp) x = SpatialDropout1D(0.35)(x) x = Bidirectional(LSTM(128, return_sequences=True, dropout=0.15, recurrent_dropout=0.15))(x) x = Conv1D(64, kernel_size=3, padding='valid', kernel_initializer='glorot_uniform')(x) avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPooling1D()(x) x = concatenate([avg_pool, max_pool]) out = Dense(6, activation='sigmoid')(x) model = Model(inp, out) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
embedding_matrix[index] = embedding_vector #---------------------------------------------------------------------------- #### Create the CNN model #---------------------------------------------------------------------------- batch_size = 128 epochs = 10 model = Sequential() #weights=[embedding_matrix], embedding_layer = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=maxLength, trainable=True) model.add(embedding_layer) model.add(SpatialDropout1D(0.5)) model.add(Conv1D(64, 20, activation='relu')) model.add(GlobalMaxPooling1D()) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc', f1_m, precision_m, recall_m]) print(model.summary()) #---------------------------------------------------------------------------- ### Fit and evaluate the model #---------------------------------------------------------------------------- history = model.fit(X_train, y_train, batch_size=batch_size,
from keras.callbacks import EarlyStopping from keras.callbacks import LearningRateScheduler from keras.callbacks import BaseLogger import matplotlib.pyplot as plt from keras.callbacks import ReduceLROnPlateau from keras.models import Sequential from keras.layers import Dense from keras.layers.embeddings import Embedding from keras.callbacks import ModelCheckpoint from keras.layers import TimeDistributed from keras.regularizers import L1L2 model = Sequential() model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1])) print(X.shape[1]) model.add(SpatialDropout1D(0.80)) model.add(LSTM(200,dropout=0.80,recurrent_dropout=0.80,return_sequences=True,\ recurrent_regularizer=L1L2(l1=0.8, l2=0.8))) model.add(SpatialDropout1D(0.80)) model.add(LSTM(200,dropout=0.80,return_sequences=True,recurrent_dropout=0.80,\ recurrent_regularizer=L1L2(l1=0.8, l2=0.8))) model.add(SpatialDropout1D(0.80)) # from keras.layers import Flatten model.add(TimeDistributed(Dense(200))) model.add(Flatten()) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])
for col in cont_cols: X[col] = dataset[col].values return X # Dictionary of inputs emb_n = 40 dense_n = 1000 # Build the inputs, embeddings and concatenate them all for each column emb_inputs = dict((col, Input(shape=[1], name=col)) for col in embids) cont_inputs = dict((col, Input(shape=[1], name=col)) for col in cont_cols) emb_model = dict( (col, Embedding(embmaxs[col], emb_n)(emb_inputs[col])) for col in embids) fe = concatenate([(emb_) for emb_ in emb_model.values()]) # Rest of the model s_dout = SpatialDropout1D(0.2)(fe) fl1 = Flatten()(s_dout) conv = Conv1D(100, kernel_size=4, strides=1, padding='same')(s_dout) fl2 = Flatten()(conv) concat = concatenate([(fl1), (fl2)] + [(c_inp) for c_inp in cont_inputs.values()]) x = Dropout(0.4)(Dense(dense_n, activation='relu')(concat)) x = Dropout(0.4)(Dense(dense_n, activation='relu')(x)) outp = Dense(1, activation='sigmoid')(x) model = Model(inputs=[inp for inp in emb_inputs.values()] + [(c_inp) for c_inp in cont_inputs.values()], outputs=outp) batch_size = 200000 epochs = 4 exp_decay = lambda init, fin, steps: (init / fin)**(1 / (steps - 1)) - 1
def DPCNN(num_block=6, ngram=4, drop_ratio=0.15, last_drop_ratio=0.5): main_input = Input(shape=(maxlen, )) embedded_sequences = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)(main_input) embedded_sequences = SpatialDropout1D(0.22)(embedded_sequences) assert num_block > 1 X_shortcut1 = embedded_sequences x = Conv1D(filters=hidden_dim, padding='same', kernel_size=ngram)(embedded_sequences) x = BatchNormalization()(x) x = Dropout(drop_ratio)(x) x = PReLU()(x) x = Conv1D(filters=hidden_dim, padding='same', kernel_size=ngram)(x) x = BatchNormalization()(x) x = Dropout(drop_ratio)(x) x = PReLU()(x) embedding_reshape = Conv1D(nb_filter=hidden_dim, kernel_size=1, padding='same', activation='linear')(X_shortcut1) # connect shortcut to the main path embedding_reshape = PReLU()(embedding_reshape) # pre activation x = Add()([embedding_reshape, x]) x = MaxPool1D(pool_size=4, strides=2, padding='valid')(x) for i in range(2, num_block): X_shortcut = x x = Conv1D(filters=hidden_dim, padding='same', kernel_size=ngram)(x) x = BatchNormalization()(x) x = Dropout(drop_ratio)(x) x = PReLU()(x) x = Conv1D(filters=hidden_dim, padding='same', kernel_size=ngram)(x) x = BatchNormalization()(x) x = Dropout(drop_ratio)(x) x = PReLU()(x) x = Add()([X_shortcut, x]) x = MaxPool1D(pool_size=4, strides=2, padding='valid')(x) X_shortcut_final = x x = Conv1D(filters=hidden_dim, padding='same', kernel_size=ngram)(x) x = BatchNormalization()(x) x = Dropout(drop_ratio)(x) x = PReLU()(x) x = Conv1D(filters=hidden_dim, padding='same', kernel_size=ngram)(x) x = BatchNormalization()(x) x = Dropout(drop_ratio)(x) x = PReLU()(x) x = Add()([X_shortcut_final, x]) x = GlobalMaxPool1D()(x) x = Dense(dense_filter, activation='linear')(x) x = BatchNormalization()(x) x = PReLU()(x) x = Add()([X_shortcut6, x]) x = GlobalMaxPool1D()(x) x = Dense(256, activation='linear')(x) x = BatchNormalization()(x) x = PReLU()(x) x = Dropout(last_drop_ratio)(x) x = Dense(6, activation="sigmoid", kernel_regularizer=regularizers.l2(1e-8))(x) model = Model(inputs=main_input, outputs=x) nadam = Nadam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.0022) model.compile(loss='binary_crossentropy', optimizer=nadam, metrics=['accuracy', f1_score, auc]) print(model.summary()) return model
def create_model(): # Inputs 输入数据的shape为(n_samples, timestamps, features) main_input = Input(shape=(MAX_MAXWIND_SEQ_LEN, ), name='main_input') main_input_rev = Input(shape=(MAX_MAXWIND_SEQ_LEN, ), name='main_input_rev') main_input_lstm = Input(shape=(MAX_MAXWIND_SEQ_LEN, 1), name='main_input_lstm') aux_month_input = Input(shape=(1, ), name='aux_month_input') aux_time_input = Input(shape=(1, ), name='aux_time_input') aux_lalo_input = Input(shape=(2, ), name='aux_lalo_input') aux_len_input = Input(shape=(1, ), name='aux_len_input') aux_stat_input = Input(shape=(1, ), name='aux_stat_input') if EMBEDDING_DIM == -1: masked = Masking(mask_value=0)(main_input_lstm) else: masked = Embedding(input_dim=int(max_maxWind), output_dim=EMBEDDING_DIM, input_length=MAX_MAXWIND_SEQ_LEN, mask_zero=True)(main_input) prev_x = main_input_lstm x = Conv1D(filters=NUM_FILTERS, kernel_size=KERNEL_SIZE, dilation_rate=1, padding='causal')(prev_x) # x = BatchNormalization()(x) # TODO should be WeightNorm here. x = Activation('relu')(x) x = SpatialDropout1D(rate=DROPOUT_RATE)(x) x = Conv1D(filters=NUM_FILTERS, kernel_size=KERNEL_SIZE, dilation_rate=1, padding='causal')(x) # x = BatchNormalization()(x) # TODO should be WeightNorm here. x = Activation('relu')(x) x = SpatialDropout1D(rate=DROPOUT_RATE)(x) # 1x1 conv to match the shapes (channel dimension). prev_x = Conv1D(NUM_FILTERS, 1, padding='same')(prev_x) res_x = Add()([prev_x, x]) prev_x = res_x x = Conv1D(filters=NUM_FILTERS, kernel_size=KERNEL_SIZE, dilation_rate=2, padding='causal')(prev_x) # x = BatchNormalization()(x) # TODO should be WeightNorm here. x = Activation('relu')(x) x = SpatialDropout1D(rate=DROPOUT_RATE)(x) x = Conv1D(filters=NUM_FILTERS, kernel_size=KERNEL_SIZE, dilation_rate=2, padding='causal')(x) # x = BatchNormalization()(x) # TODO should be WeightNorm here. x = Activation('relu')(x) x = SpatialDropout1D(rate=DROPOUT_RATE)(x) # 1x1 conv to match the shapes (channel dimension). prev_x = Conv1D(NUM_FILTERS, 1, padding='same')(prev_x) res_x = Add()([prev_x, x]) prev_x = res_x x = Conv1D(filters=NUM_FILTERS, kernel_size=KERNEL_SIZE, dilation_rate=4, padding='causal')(prev_x) # x = BatchNormalization()(x) # TODO should be WeightNorm here. x = Activation('relu')(x) x = SpatialDropout1D(rate=DROPOUT_RATE)(x) x = Conv1D(filters=NUM_FILTERS, kernel_size=KERNEL_SIZE, dilation_rate=4, padding='causal')(x) # x = BatchNormalization()(x) # TODO should be WeightNorm here. x = Activation('relu')(x) x = SpatialDropout1D(rate=DROPOUT_RATE)(x) # 1x1 conv to match the shapes (channel dimension). prev_x = Conv1D(NUM_FILTERS, 1, padding='same')(prev_x) res_x = Add()([prev_x, x]) prev_x = res_x x = Conv1D(filters=NUM_FILTERS, kernel_size=KERNEL_SIZE, dilation_rate=8, padding='causal')(prev_x) # x = BatchNormalization()(x) # TODO should be WeightNorm here. x = Activation('relu')(x) x = SpatialDropout1D(rate=DROPOUT_RATE)(x) x = Conv1D(filters=NUM_FILTERS, kernel_size=KERNEL_SIZE, dilation_rate=8, padding='causal')(x) # x = BatchNormalization()(x) # TODO should be WeightNorm here. x = Activation('relu')(x) x = SpatialDropout1D(rate=DROPOUT_RATE)(x) # 1x1 conv to match the shapes (channel dimension). prev_x = Conv1D(NUM_FILTERS, 1, padding='same')(prev_x) res_x = Add()([prev_x, x]) prev_x = res_x x = Conv1D(filters=NUM_FILTERS, kernel_size=KERNEL_SIZE, dilation_rate=16, padding='causal')(prev_x) # x = BatchNormalization()(x) # TODO should be WeightNorm here. x = Activation('relu')(x) x = SpatialDropout1D(rate=DROPOUT_RATE)(x) x = Conv1D(filters=NUM_FILTERS, kernel_size=KERNEL_SIZE, dilation_rate=16, padding='causal')(x) # x = BatchNormalization()(x) # TODO should be WeightNorm here. x = Activation('relu')(x) x = SpatialDropout1D(rate=DROPOUT_RATE)(x) # 1x1 conv to match the shapes (channel dimension). prev_x = Conv1D(NUM_FILTERS, 1, padding='same')(prev_x) res_x = Add()([prev_x, x]) res_x = Flatten()(res_x) x = Dense(MAX_MAXWIND_SEQ_LEN, activation='relu')(res_x) main_output = Dense(1, activation='sigmoid', name='finalDense')(x) model = Model(inputs=[ main_input, main_input_rev, main_input_lstm, aux_month_input, aux_time_input, aux_lalo_input, aux_len_input, aux_stat_input ], outputs=main_output) model.compile(loss=LOSS_FUNCTION, optimizer=OPTIMIZER) return model
y_pred = self.model.predict(self.X_val, verbose=0) scores = [ roc_auc_score(self.y_val[:, i], y_pred[:, i]) for i in range(1) ] print("\n ROC-AUC - epoch: %d - score: %.6f \n" \ % (epoch+1, scores[0])) ############################################################################## """ モデル構造本体 """ inp = Input(shape=(maxlen, )) x = Embedding(max_features, embed_size)(inp) # ワードを Embedding x = SpatialDropout1D(0.2)(x) # ドロップアウト. Spatial? が何かは忘れた # Bidirectional RNN (Cell: GRU) x = Bidirectional(GRU(rnn_size, return_sequences=True))(x) # 各セルの全出力を結合(平均 & 最大) # (このやり方は若干特殊で、出力最後尾を線形結合するのがスタンダードな気がします) avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPooling1D()(x) # 上記の 平均 & 最大 を結合 # (6ラベルが予測対象なので、最終出力のサイズは(6, ) ) conc = concatenate([avg_pool, max_pool]) outp = Dense(1, activation="sigmoid")(conc) model = Model(inputs=inp, outputs=outp) # lossは クロスエントロピー
def deepmoji_architecture(nb_classes, nb_tokens, maxlen, feature_output=False, embed_dropout_rate=0, final_dropout_rate=0, embed_l2=1E-6, return_attention=False): """ Returns the DeepMoji architecture uninitialized and without using the pretrained model weights. # Arguments: nb_classes: Number of classes in the dataset. nb_tokens: Number of tokens in the dataset (i.e. vocabulary size). maxlen: Maximum length of a token. feature_output: If True the model returns the penultimate feature vector rather than Softmax probabilities (defaults to False). embed_dropout_rate: Dropout rate for the embedding layer. final_dropout_rate: Dropout rate for the final Softmax layer. embed_l2: L2 regularization for the embedding layerl. # Returns: Model with the given parameters. """ # define embedding layer that turns word tokens into vectors # an activation function is used to bound the values of the embedding model_input = Input(shape=(maxlen, ), dtype='int32') embed_reg = L1L2(l2=embed_l2) if embed_l2 != 0 else None embed = Embedding(input_dim=nb_tokens, output_dim=256, mask_zero=True, input_length=maxlen, embeddings_regularizer=embed_reg, name='embedding') x = embed(model_input) x = Activation('tanh')(x) # entire embedding channels are dropped out instead of the # normal Keras embedding dropout, which drops all channels for entire words # many of the datasets contain so few words that losing one or more words can alter the emotions completely if embed_dropout_rate != 0: embed_drop = SpatialDropout1D(embed_dropout_rate, name='embed_drop') x = embed_drop(x) # skip-connection from embedding to output eases gradient-flow and allows access to lower-level features # ordering of the way the merge is done is important for consistency with the pretrained model lstm_0_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_0")(x) lstm_1_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_1")(lstm_0_output) x = concatenate([lstm_1_output, lstm_0_output, x]) # if return_attention is True in AttentionWeightedAverage, an additional tensor # representing the weight at each timestep is returned weights = None x = AttentionWeightedAverage(name='attlayer', return_attention=return_attention)(x) if return_attention: x, weights = x if not feature_output: # output class probabilities if final_dropout_rate != 0: x = Dropout(final_dropout_rate)(x) if nb_classes > 2: outputs = [ Dense(nb_classes, activation='softmax', name='softmax')(x) ] else: outputs = [Dense(1, activation='sigmoid', name='softmax')(x)] else: # output penultimate feature vector outputs = [x] if return_attention: # add the attention weights to the outputs if required outputs.append(weights) return Model(inputs=[model_input], outputs=outputs, name="DeepMoji")
if not use_char_embedding: x1_tr, x2_tr, y_tr = x1_p[tr_index], x2_p[tr_index], label[tr_index] x1_te, x2_te, y_te = x1_p[te_index], x2_p[te_index], label[te_index] else: x1_tr, x2_tr, y_tr = x1_p_c[tr_index], x2_p_c[tr_index], label[ tr_index] x1_te, x2_te, y_te = x1_p_c[te_index], x2_p_c[te_index], label[ te_index] input1c = Input(shape=input_shape_c, ) input2c = Input(shape=input_shape_c, ) embed1c = Embedding(embedding_matrix_c.shape[0], embedding_matrix_c.shape[1], weights=[embedding_matrix_c], trainable=False, input_shape=input_shape_c) dropout_layer = SpatialDropout1D(dropout_embedding) lstm0 = Bidirectional(CuDNNLSTM(128, return_sequences=True)) v1c = dropout_layer((embed1c(input1c))) v2c = dropout_layer((embed1c(input2c))) v1h = (lstm0(v1c)) v2h = (lstm0(v2c)) if e % 3 != 0 or not structure_shuffle: lstm1 = Bidirectional(CuDNNLSTM(128, return_sequences=True)) v1hh = lstm1(v1h) v2hh = lstm1(v2h) else: conv = Conv1D(128, kernel_size=2, padding='same', kernel_initializer='he_uniform',
def nn(x1, x2, y1, y2): y1 = np.log(y1 + 1) y2 = np.log(y2 + 1) print(x1.shape) features = sorted(x1.keys()) inputs = [] flatten_layers = [] max_dict = {} for f in x1.keys(): if f == "main": continue max_dict[f] = int(max(x1[f].max().max(), x2[f].max().max())) for f in features: if f == "main": dim_1 = x1[f].shape[1] dim_2 = x1[f].shape[2] x = Input(shape=(dim_1, dim_2), dtype="float32", name=f) inputs.append(x) flatten_layers.append(x) flatten_layers = x else: dim_1 = x1[f].shape[1] x = Input(shape=(dim_1, ), dtype="float32", name=f) inputs.append(x) inp_dim = max_dict[f] + 1 out_dim = int(emb_dim(inp_dim)) x = Embedding( inp_dim, out_dim, input_length=dim_1, embeddings_regularizer=keras.regularizers.l2(0.01))(x) x = SpatialDropout1D(0.2)(x) flatten_layers.append(x) x = Concatenate()(flatten_layers) x = flatten_layers feature_size = x.shape[2].value x = Reshape([18, feature_size, 1])(x) layer_num = 3 layer_size = 512 bn_axis = -1 momentum = 0.0 dropout_rate = 0.2 activation = "relu" x = Conv2D(layer_size, [1, feature_size], padding="valid", kernel_regularizer=keras.regularizers.l2(0.01))(x) x = Activation(activation)(x) x = BatchNormalization(axis=bn_axis, momentum=momentum)(x) x = Dropout(dropout_rate)(x) for i in range(layer_num): x = Conv2D(layer_size, [1, 1], padding="valid", kernel_regularizer=keras.regularizers.l2(0.01))(x) x = Activation(activation)(x) x = BatchNormalization(axis=bn_axis, momentum=momentum)(x) x = Dropout(dropout_rate)(x) x = Conv2D(1, [1, 1], padding="valid", kernel_regularizer=keras.regularizers.l2(0.01))(x) #x = Conv2D(layer_size,[1,1],padding = "valid")(x) x = Flatten()(x) #x = Dense(units = 1024)(x) #x = Activation(activation)(x) #x = Dense(units = 18)(x) #x = Activation("softmax")(x) model = Model(inputs=inputs, outputs=x) early_stopping = EarlyStopping(monitor='val_acc', patience=50, verbose=0) check_point = ModelCheckpoint("./models/pair_nn.hd", period=1, verbose=0, save_best_only=True) #model.compile(loss = rank_sigmoid,optimizer="adam", metrics = [rank_accuracy]) model.compile(loss='mse', optimizer='adam', metrics=[rank_accuracy]) #model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(x1, y1, epochs=50, batch_size=32, validation_data=(x2, y2), callbacks=[early_stopping, check_point]) print(model.predict(x2)[0]) return model
def build_model1(lr=0.0, lr_d=0.0, units=0, spatial_dr=0.0, kernel_size1=3, kernel_size2=2, dense_units=128, dr=0.1, conv_size=32): file_path = "best_model.hdf5" check_point = ModelCheckpoint(file_path, monitor="val_loss", verbose=1, save_best_only=True, mode="min") early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=3) inp = Input(shape=(MAXLEN, )) x = Embedding(vocab_size, EMBEDDING_DIM, weights=[embedding_matrix], trainable=False)(inp) x1 = SpatialDropout1D(spatial_dr)(x) x_gru = Bidirectional(GRU(units, return_sequences=True))(x1) x1 = Conv1D(conv_size, kernel_size=kernel_size1, padding='valid', kernel_initializer='he_uniform')(x_gru) avg_pool1_gru = GlobalAveragePooling1D()(x1) max_pool1_gru = GlobalMaxPooling1D()(x1) x3 = Conv1D(conv_size, kernel_size=kernel_size2, padding='valid', kernel_initializer='he_uniform')(x_gru) avg_pool3_gru = GlobalAveragePooling1D()(x3) max_pool3_gru = GlobalMaxPooling1D()(x3) x_lstm = Bidirectional(LSTM(units, return_sequences=True))(x1) x1 = Conv1D(conv_size, kernel_size=kernel_size1, padding='valid', kernel_initializer='he_uniform')(x_lstm) avg_pool1_lstm = GlobalAveragePooling1D()(x1) max_pool1_lstm = GlobalMaxPooling1D()(x1) x3 = Conv1D(conv_size, kernel_size=kernel_size2, padding='valid', kernel_initializer='he_uniform')(x_lstm) avg_pool3_lstm = GlobalAveragePooling1D()(x3) max_pool3_lstm = GlobalMaxPooling1D()(x3) x = concatenate([ avg_pool1_gru, max_pool1_gru, avg_pool3_gru, max_pool3_gru, avg_pool1_lstm, max_pool1_lstm, avg_pool3_lstm, max_pool3_lstm ]) x = BatchNormalization()(x) x = Dropout(dr)(Dense(dense_units, activation='relu')(x)) x = BatchNormalization()(x) x = Dropout(dr)(Dense(int(dense_units / 2), activation='relu')(x)) x = Dense(3, activation="softmax")(x) model = Model(inputs=inp, outputs=x) model.compile(loss="sparse_categorical_crossentropy", optimizer=Adam(lr=lr, decay=lr_d), metrics=["accuracy"]) # model.compile(loss="sparse_categorical_crossentropy", optimizer='adam', metrics=["accuracy"]) history = model.fit(x_train, y_train, batch_size=64, epochs=100, validation_data=(x_val, y_val), verbose=1, shuffle=True, callbacks=[check_point, early_stop]) model = load_model(file_path) return model
trn_x, trn_y = X_t, y def get_coefs(word,*arr): return word.split('/')[-1], np.asarray(arr, dtype='float32') embeddings_index = dict(get_coefs(*o.strip().split()) for o in open(EMBEDDING_FILE)) word_index = tokenizer.word_index nb_words = min(max_features, len(word_index)) embedding_matrix = np.zeros([nb_words, embed_size]) for word, i in word_index.items(): if i >= max_features: break embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector inp = Input(shape=(maxlen,)) x = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=True)(inp) x = SpatialDropout1D(0.25)(x) x = Bidirectional(GRU(rnn_units, return_sequences=True))(x) # x = PReLU()(x) # x = SpatialDropout1D(0.3)(x) avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPool1D()(x) x = concatenate([avg_pool, max_pool]) x = Dense(6, activation="sigmoid")(x) print 'Timer before Model: ', timer()-t0 model = Model(inputs=inp, outputs=x) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) early_stop = EarlyStopping(monitor='val_acc', min_delta=1e-5, patience=1) if kfold != 0:
def get_convo_nn2(no_word=200, n_gram=21, no_char=178): input1 = Input(shape=(n_gram, )) input2 = Input(shape=(n_gram, )) a = Embedding(no_char, 32, input_length=n_gram)(input1) a = SpatialDropout1D(0.2)(a) a2 = Conv1D(no_word, 2, strides=1, padding="valid", activation='relu')(a) a2 = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(a2) a2 = ZeroPadding1D(padding=(0, 1))(a2) a3 = Conv1D(no_word, 3, strides=1, padding="valid", activation='relu')(a) a3 = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(a3) a3 = ZeroPadding1D(padding=(0, 2))(a3) a4 = Conv1D(no_word, 4, strides=1, padding="valid", activation='relu')(a) a4 = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(a4) a4 = ZeroPadding1D(padding=(0, 3))(a4) a5 = Conv1D(no_word, 5, strides=1, padding="valid", activation='relu')(a) a5 = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(a5) a5 = ZeroPadding1D(padding=(0, 4))(a5) a6 = Conv1D(no_word, 6, strides=1, padding="valid", activation='relu')(a) a6 = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(a6) a6 = ZeroPadding1D(padding=(0, 5))(a6) a7 = Conv1D(no_word, 7, strides=1, padding="valid", activation='relu')(a) a7 = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(a7) a7 = ZeroPadding1D(padding=(0, 6))(a7) a8 = Conv1D(no_word, 8, strides=1, padding="valid", activation='relu')(a) a8 = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(a8) a8 = ZeroPadding1D(padding=(0, 7))(a8) a9 = Conv1D(no_word - 50, 9, strides=1, padding="valid", activation='relu')(a) a9 = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(a9) a9 = ZeroPadding1D(padding=(0, 8))(a9) a10 = Conv1D(no_word - 50, 10, strides=1, padding="valid", activation='relu')(a) a10 = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(a10) a10 = ZeroPadding1D(padding=(0, 9))(a10) a11 = Conv1D(no_word - 50, 11, strides=1, padding="valid", activation='relu')(a) a11 = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(a11) a11 = ZeroPadding1D(padding=(0, 10))(a11) a12 = Conv1D(no_word - 100, 12, strides=1, padding="valid", activation='relu')(a) a12 = TimeDistributed(Dense(5, input_shape=(n_gram, no_word)))(a12) a12 = ZeroPadding1D(padding=(0, 11))(a12) a_concat = [a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12] a_sum = Maximum()(a_concat) b = Embedding(12, 12, input_length=n_gram)(input2) b = SpatialDropout1D(0.2)(b) x = Concatenate(axis=-1)([a, a_sum, b]) x = BatchNormalization()(x) x = Flatten()(x) x = Dense(100, activation='relu')(x) out = Dense(1, activation='sigmoid')(x) model = Model(inputs=[input1, input2], outputs=out) model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['acc']) return model