tag2idx, n_words, n_tags) X_val, y_val, n_words_val, n_tags_val = data_process(df_data_val, word2idx, tag2idx, n_words, n_tags) BATCH_SIZE = 32 EPOCHS = 20 EMBEDDING = 50 model = Sequential() model.add( Embedding(input_dim=n_words + 2, output_dim=EMBEDDING, input_length=MAX_LEN, mask_zero=True, input_shape=(MAX_LEN, ))) model.add(Bidirectional(LSTM(units=100, return_sequences=True))) model.add(TimeDistributed(Dense(50, activation="relu"))) crf = CRF(n_tags + 1) # CRF layer, n_tags+1(PAD) model.add(CRF(n_tags + 1)) rmsprop = optimizers.RMSprop(lr=0.001) model.compile(optimizer=rmsprop, loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() print('Train...') labels = [ 'B-ADR', 'I-ADR', 'B-Drug', 'B-Disease', 'B-Symptom', 'I-Symptom', 'I-Disease', 'I-Drug', 'B-Finding', 'I-Finding' ] history = model.fit(X_tr,
def elsa_architecture(nb_classes, nb_tokens, maxlen, feature_output=False, embed_dropout_rate=0, final_dropout_rate=0, embed_dim=300, embed_l2=1E-6, return_attention=False, load_embedding=False, pre_embedding=None, high=False, LSTM_hidden=512, LSTM_drop=0.5): """ Returns the DeepMoji architecture uninitialized and without using the pretrained model weights. # Arguments: nb_classes: Number of classes in the dataset. nb_tokens: Number of tokens in the dataset (i.e. vocabulary size). maxlen: Maximum length of a token. feature_output: If True the model returns the penultimate feature vector rather than Softmax probabilities (defaults to False). embed_dropout_rate: Dropout rate for the embedding layer. final_dropout_rate: Dropout rate for the final Softmax layer. embed_l2: L2 regularization for the embedding layerl. high: use or not the highway network # Returns: Model with the given parameters. """ class NonMasking(Layer): def __init__(self, **kwargs): self.supports_masking = True super(NonMasking, self).__init__(**kwargs) def build(self, input_shape): input_shape = input_shape def compute_mask(self, input, input_mask=None): # do not pass the mask to the next layers return None def call(self, x, mask=None): return x def get_output_shape_for(self, input_shape): return input_shape # define embedding layer that turns word tokens into vectors # an activation function is used to bound the values of the embedding model_input = Input(shape=(maxlen,), dtype='int32') embed_reg = L1L2(l2=embed_l2) if embed_l2 != 0 else None if not load_embedding and pre_embedding is None: embed = Embedding(input_dim=nb_tokens, output_dim=embed_dim, mask_zero=True,input_length=maxlen,embeddings_regularizer=embed_reg, name='embedding') else: embed = Embedding(input_dim=nb_tokens, output_dim=embed_dim, mask_zero=True,input_length=maxlen, weights=[pre_embedding], embeddings_regularizer=embed_reg,trainable=True, name='embedding') if high: x = NonMasking()(embed(model_input)) else: x = embed(model_input) x = Activation('tanh')(x) # entire embedding channels are dropped out instead of the # normal Keras embedding dropout, which drops all channels for entire words # many of the datasets contain so few words that losing one or more words can alter the emotions completely if embed_dropout_rate != 0: embed_drop = SpatialDropout1D(embed_dropout_rate, name='embed_drop') x = embed_drop(x) # skip-connection from embedding to output eases gradient-flow and allows access to lower-level features # ordering of the way the merge is done is important for consistency with the pretrained model lstm_0_output = Bidirectional(LSTM(LSTM_hidden, return_sequences=True, dropout=LSTM_drop), name="bi_lstm_0" )(x) lstm_1_output = Bidirectional(LSTM(LSTM_hidden, return_sequences=True, dropout=LSTM_drop), name="bi_lstm_1" )(lstm_0_output) x = concatenate([lstm_1_output, lstm_0_output, x]) if high: x = TimeDistributed(Highway(activation='tanh', name="high"))(x) # if return_attention is True in AttentionWeightedAverage, an additional tensor # representing the weight at each timestep is returned weights = None x = AttentionWeightedAverage(name='attlayer', return_attention=return_attention)(x) #x = MaskAverage(name='attlayer', return_attention=return_attention)(x) if return_attention: x, weights = x if not feature_output: # output class probabilities if final_dropout_rate != 0: x = Dropout(final_dropout_rate)(x) if nb_classes > 2: outputs = [Dense(nb_classes, activation='softmax', name='softmax')(x)] else: outputs = [Dense(1, activation='sigmoid', name='softmax')(x)] else: # output penultimate feature vector outputs = [x] if return_attention: # add the attention weights to the outputs if required outputs.append(weights) return Model(inputs=[model_input], outputs=outputs)
def build_rnn(self, embedding_size=128, is_bidirectional=False, depth=3, cell='GRU', cell_size=128, dense_size=20, dr=0.4): ''' << summary >> build keras model << inputs >> embedding_size: dimension of the embedding layer is_bidirectional: whether the model is bidirectional depth: depth of the RNN neural network cell: cell of the RNN neuron, 'SimpleRNN'/'GRU'/'LSTM' cell_size: number of neurons of each cell dense_size: size of the final fully-connected layer dr: dropout rate for RNN and the final fully-connected layer << outputs >> [file]: self.wkdir+'/output/model.h5': the model file [var]: model: the keras model object ''' print('\n\n>>>>>>>>>> build RNN model <<<<<<<<<<') # load token_dict_size and padding_size token_dict_size, padding_size = \ pickle.load(open(self.wkdir+'/output/model_preprocessing_tmp.pkl','rb')) # define layer wrapper layer_wrap = [] for n in range(depth): if n == depth - 1: return_sequences = False else: return_sequences = True if cell == 'Simple': layer_tmp = SimpleRNN(cell_size, dropout=dr, recurrent_dropout=dr, return_sequences=return_sequences) elif cell == 'LSTM': layer_tmp = LSTM(cell_size, dropout=dr, recurrent_dropout=dr, return_sequences=return_sequences) elif cell == 'GRU': layer_tmp = GRU(cell_size, dropout=dr, recurrent_dropout=dr, return_sequences=return_sequences) if is_bidirectional: layer_tmp = Bidirectional(layer_tmp) layer_wrap.append(layer_tmp) # construct model model = Sequential() model.add( Embedding(token_dict_size + 1, embedding_size, input_length=padding_size)) [model.add(layer_wrap[n]) for n in range(depth)] model.add(Dense(dense_size, activation='relu')) if self.tot_class == 2: model.add(Dense(1, activation='sigmoid')) else: model.add(Dense(self.tot_class, activation='softmax')) print(model.summary()) try: import pydot except: print( '\n ==> plot_model is not available, model will not output in png format\n' ) else: print('\n ==> model has been output to ' + self.wkdir + 'output/model.png\n') plot_model(model, self.wkdir + 'output/model.png', show_shapes=True) model.save(self.wkdir + '/output/model.h5') return model
vector_i = word2vec_model.wv[word] if vector_i is not None: embedding_l_weights[idx] = vector_i # In[55]: # BiLSTM RNN model = Sequential() model.add( Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=embedding_vector_size, input_length=avg_doc_len, weights=[embedding_l_weights])) model.add(Bidirectional(LSTM(128, dropout=0.25, recurrent_dropout=0.1))) model.add(Dense(10)) model.add(Dropout(0.3)) model.add(Dense(1, activation='sigmoid')) # In[56]: # Keras: Reduce learning rate when a metric has stopped improving. learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=2, factor=0.5, min_lr=0.0001, verbose=1) # In[57]:
embed_dim = 128 lstm_out = 196 from keras.layers import Bidirectional,GRU model = Sequential() model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1])) #model.add(LSTM(128,dropout=0.4, recurrent_dropout=0.4,return_sequences=True)) #model.add(LSTM(128,dropout=0.5, recurrent_dropout=0.5,return_sequences=True)) model.add(LSTM(128,dropout=0.4, recurrent_dropout=0.4,return_sequences=True)) model.add(LSTM(128,dropout=0.5, recurrent_dropout=0.5,return_sequences=True)) model.add(LSTM(64,dropout=0.5, recurrent_dropout=0.5,return_sequences=True)) model.add(LSTM(64,dropout=0.5, recurrent_dropout=0.5,return_sequences=True)) model.add(Bidirectional(GRU(lstm_out, recurrent_dropout=0.2, dropout=0.2, return_sequences=True))) model.add(Bidirectional(LSTM(lstm_out, recurrent_dropout=0.2, dropout=0.2, return_sequences=True))) model.add(Bidirectional(GRU(lstm_out, recurrent_dropout=0.2, dropout=0.2, return_sequences=True))) model.add(Bidirectional(LSTM(lstm_out, recurrent_dropout=0.2, dropout=0.2, return_sequences=False))) model.add(Dense(2,activation='sigmoid',kernel_initializer='TruncatedNormal')) model.compile(loss = 'categorical_crossentropy', optimizer='Adam',metrics = ['accuracy']) print(model.summary()) # In[ ]: Y = pd.get_dummies(df['Label']).values X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.20, random_state = 42) print(X_train.shape,Y_train.shape)
return (x + y) / 2 def data_normal(x): min_max_scaler = preprocessing.MinMaxScaler() x = min_max_scaler.fit_transform(x) return x # Audio branch frame_input = Input(shape=(513, 64)) mask_frame_input = Masking(mask_value=0.)(frame_input) print('mask_frame_input shape: ', mask_frame_input.shape) frame_l1 = Bidirectional( LSTM(100, return_sequences=True, recurrent_dropout=0.25, name='LSTM_audio_1'))(mask_frame_input) frame_l1 = BatchNormalization()(frame_l1) print('frame_l1 shape: ', frame_l1.shape) frame_weight = AttentionLayer()(frame_l1) frame_weight = BatchNormalization()(frame_weight) print('frame_att shape: ', frame_weight.shape) frame_weight_exp = Lambda(weight_expand)(frame_weight) frame_att = Lambda(weight_dot)([frame_l1, frame_weight_exp]) frame_att = Lambda(lambda x: backend.sum(x, axis=1))(frame_att) print('frame_att shape: ', frame_att.shape) dropout_frame = Dropout(0.5)(frame_att) model_frame = Model(frame_input, dropout_frame) word_input = Input(shape=(98, 513, 64))
testY = test[(2) * Step - 1:len(test), 2] trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) print(trainX[1]) print(trainY[1]) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) #to design and fit our LSTM network for this problem. # visible layer with 1 input, a hidden layer with 4 LSTM blocks or neurons, and an output layer that makes a single value prediction. The default sigmoid activation function is used for the LSTM blocks. The network is trained for 100 epochs and a batch size of 1 is used. #stacked Model model = Sequential() model.add( Bidirectional(LSTM(28, activation='relu'), input_shape=(1, look_back))) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(trainX, trainY, epochs=500, batch_size=1, verbose=2) #the number of epochs have been choosen 10 to decrease the computaional complexity. # make predictions trainPredict = model.predict(trainX) testPredict = model.predict(testX) trainScore = math.sqrt(mean_squared_error(trainY, trainPredict)) trainScore_mse = mean_squared_error(trainY, trainPredict) print('Train Score: %.2f RMSE' % (trainScore)) print('Train Score: %.2f MSE' % (trainScore_mse)) testScore = math.sqrt(mean_squared_error(testY, testPredict)) testScore_mse = mean_squared_error(testY, testPredict)
def test_load_layers(): from keras.layers import ConvLSTM2D, TimeDistributed, Bidirectional, Conv2D, Input from keras.models import Model if K.backend() == 'tensorflow' or K.backend() == 'cntk': inputs = Input(shape=(10, 20, 20, 1)) else: inputs = Input(shape=(10, 1, 20, 20)) td_conv = TimeDistributed(Conv2D(15, (5, 5)))(inputs) bi_convlstm2d = Bidirectional(ConvLSTM2D(10, (3, 3)), merge_mode='concat')(td_conv) model = Model(inputs=inputs, outputs=bi_convlstm2d) weight_value_tuples = [] # TimeDistributed Conv2D layer # use 'channels_first' data format to check that the function is being called correctly for Conv2D # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) weight_tensor_td_conv_old = list() weight_tensor_td_conv_old.append(np.zeros((15, 1, 5, 5))) weight_tensor_td_conv_old.append(np.zeros((15, ))) td_conv_layer = model.layers[1] td_conv_layer.layer.data_format = 'channels_first' weight_tensor_td_conv_new = topology.preprocess_weights_for_loading( td_conv_layer, weight_tensor_td_conv_old, original_keras_version='1') symbolic_weights = td_conv_layer.weights assert (len(symbolic_weights) == len(weight_tensor_td_conv_new)) weight_value_tuples += zip(symbolic_weights, weight_tensor_td_conv_new) # Bidirectional ConvLSTM2D layer # old ConvLSTM2D took a list of 12 weight tensors, returns a list of 3 concatenated larger tensors. weight_tensor_bi_convlstm_old = [] for j in range(2): # bidirectional for i in range(4): weight_tensor_bi_convlstm_old.append(np.zeros( (3, 3, 15, 10))) # kernel weight_tensor_bi_convlstm_old.append(np.zeros( (3, 3, 10, 10))) # recurrent kernel weight_tensor_bi_convlstm_old.append(np.zeros((10, ))) # bias bi_convlstm_layer = model.layers[2] weight_tensor_bi_convlstm_new = topology.preprocess_weights_for_loading( bi_convlstm_layer, weight_tensor_bi_convlstm_old, original_keras_version='1') symbolic_weights = bi_convlstm_layer.weights assert (len(symbolic_weights) == len(weight_tensor_bi_convlstm_new)) weight_value_tuples += zip(symbolic_weights, weight_tensor_bi_convlstm_new) K.batch_set_value(weight_value_tuples) assert np.all( K.eval(model.layers[1].weights[0]) == weight_tensor_td_conv_new[0]) assert np.all( K.eval(model.layers[1].weights[1]) == weight_tensor_td_conv_new[1]) assert np.all( K.eval(model.layers[2].weights[0]) == weight_tensor_bi_convlstm_new[0]) assert np.all( K.eval(model.layers[2].weights[1]) == weight_tensor_bi_convlstm_new[1]) assert np.all( K.eval(model.layers[2].weights[2]) == weight_tensor_bi_convlstm_new[2]) assert np.all( K.eval(model.layers[2].weights[3]) == weight_tensor_bi_convlstm_new[3]) assert np.all( K.eval(model.layers[2].weights[4]) == weight_tensor_bi_convlstm_new[4]) assert np.all( K.eval(model.layers[2].weights[5]) == weight_tensor_bi_convlstm_new[5])
def train_model(self, sentences_pair, is_similar, embedding_meta_data, model_save_directory='./'): """ Train Siamese network to find similarity between sentences in `sentences_pair` Steps Involved: 1. Pass the each from sentences_pairs to bidirectional LSTM encoder. 2. Merge the vectors from LSTM encodes and passed to dense layer. 3. Pass the dense layer vectors to sigmoid output layer. 4. Use cross entropy loss to train weights Args: sentences_pair (list): list of tuple of sentence pairs is_similar (list): target value 1 if same sentences pair are similar otherwise 0 embedding_meta_data (dict): dict containing tokenizer and word embedding matrix model_save_directory (str): working directory for where to save models Returns: return (best_model_path): path of best model """ tokenizer, embedding_matrix = embedding_meta_data[ 'tokenizer'], embedding_meta_data['embedding_matrix'] train_data_x1, train_data_x2, train_labels, leaks_train, \ val_data_x1, val_data_x2, val_labels, leaks_val = create_train_dev_set(tokenizer, sentences_pair, is_similar, self.max_sequence_length, self.validation_split_ratio) if train_data_x1 is None: print("++++ !! Failure: Unable to train model ++++") return None nb_words = len(tokenizer.word_index) + 1 # Creating word embedding layer embedding_layer = Embedding(nb_words, self.embedding_dim, weights=[embedding_matrix], input_length=self.max_sequence_length, trainable=False) # Creating LSTM Encoder lstm_layer = Bidirectional( LSTM(self.number_lstm_units, dropout=self.rate_drop_lstm, recurrent_dropout=self.rate_drop_lstm)) # Creating LSTM Encoder layer for First Sentence sequence_1_input = Input(shape=(self.max_sequence_length, ), dtype='int32') embedded_sequences_1 = embedding_layer(sequence_1_input) x1 = lstm_layer(embedded_sequences_1) # Creating LSTM Encoder layer for Second Sentence sequence_2_input = Input(shape=(self.max_sequence_length, ), dtype='int32') embedded_sequences_2 = embedding_layer(sequence_2_input) x2 = lstm_layer(embedded_sequences_2) # Creating leaks input leaks_input = Input(shape=(leaks_train.shape[1], )) leaks_dense = Dense(self.number_dense_units / 2, activation=self.activation_function)(leaks_input) # Merging two LSTM encodes vectors from sentences to # pass it to dense layer applying dropout and batch normalisation merged = concatenate([x1, x2, leaks_dense]) merged = BatchNormalization()(merged) merged = Dropout(self.rate_drop_dense)(merged) merged = Dense(self.number_dense_units, activation=self.activation_function)(merged) merged = BatchNormalization()(merged) merged = Dropout(self.rate_drop_dense)(merged) preds = Dense(1, activation='sigmoid')(merged) model = Model(inputs=[sequence_1_input, sequence_2_input, leaks_input], outputs=preds) model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['acc']) early_stopping = EarlyStopping(monitor='val_loss', patience=3) STAMP = 'lstm_%d_%d_%.2f_%.2f' % ( self.number_lstm_units, self.number_dense_units, self.rate_drop_lstm, self.rate_drop_dense) checkpoint_dir = model_save_directory + 'checkpoints/' + str( int(time.time())) + '/' if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) bst_model_path = checkpoint_dir + STAMP + '.h5' model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=False) tensorboard = TensorBoard(log_dir=checkpoint_dir + "logs/{}".format(time.time())) model.fit([train_data_x1, train_data_x2, leaks_train], train_labels, validation_data=([val_data_x1, val_data_x2, leaks_val], val_labels), epochs=1, batch_size=64, shuffle=True, callbacks=[model_checkpoint, tensorboard]) return bst_model_path
ai = K.exp(eij) weights = ai / K.sum(ai, axis=1).dimshuffle(0, 'x') weighted_input = x * weights.dimshuffle(0, 1, 'x') return weighted_input.sum(axis=1) def compute_output_shape(self, input_shape): return (input_shape[0], input_shape[-1]) def get_output_shape_for(self, input_shape): return (input_shape[0], input_shape[-1]) sentence_input = Input(shape=(MAX_SENT_LENGTH, ), dtype='int32') embedded_sequences = embedding_layer(sentence_input) l_lstm = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences) l_dense = TimeDistributed(Dense(200))(l_lstm) l_att = AttLayer()(l_dense) sentEncoder = Model(sentence_input, l_att) Lyrics_input = Input(shape=(MAX_SENTS, MAX_SENT_LENGTH), dtype='int32') Lyrics_encoder = TimeDistributed(sentEncoder)(Lyrics_input) l_lstm_sent = Bidirectional(GRU(100, return_sequences=True))(Lyrics_encoder) # l_dense_sent = TimeDistributed(Dense(200))(l_lstm_sent) l_att_sent = AttLayer()(l_dense_sent) preds = Dense(3, activation='softmax')(l_att_sent) #change to number of classes model = Model(Lyrics_input, preds) print("Predicting") model.load_weights('nlpOutput2.h5')
), dtype='int32') embedded_sequences_c = embedding_layer_c(sequence_input_c) dropout_c = Dropout(rate=drop)(embedded_sequences_c) rone = Lambda(reshape_one)(dropout_c) merge_m = 'concat' # raw_input('Enter merge mode for GRU Karakter: ') merge_m_c = merge_m dropout_gru = 0.5 # input('Enter dropout for GRU: ') rec_dropout = dropout_gru gru_karakter = Bidirectional(GRU(CHAR_EMBEDDING_DIM, return_sequences=False, dropout=dropout_gru, recurrent_dropout=rec_dropout, trainable=gtrainable), merge_mode=merge_m, weights=None)(rone) rtwo = Lambda(reshape_two)(gru_karakter) """ Combine word + char model """ print "Model Choice:" model_choice = 3 # input('Enter 1 for WE only, 2 for CE only, 3 for both: ') merge_m = 'concat' # raw_input('Enter merge mode for GRU Kata: ') # dropout = input('Enter GRU Karakter dropout: ') # rec_dropout = input('Enter GRU Karakter recurrent dropout: ') combine = 0
for word, i in word_index.items(): if i >= MAX_NB_WORDS: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # 文本数据中的词在词向量字典中没有,向量为取0;如果有则取词向量中该词的向量 embedding_matrix[i] = embedding_vector # build models rnn_model = Sequential() rnn_model.add( Embedding(num_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)) rnn_model.add(Bidirectional(LSTM(128, implementation=2))) rnn_model.add(Dropout(0.5)) rnn_model.add(Dense(len(labels_index), activation='softmax')) rnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) # optimizer=RMSprop # 如果希望短一些时间可以,epochs调小 rnn_model.fit(x_train, y_train, batch_size=128, epochs=20, validation_data=(x_val, y_val)) res = rnn_model.evaluate(x_val, y_val) print(res)
return metrics.accuracy_score(predictions, ytest), predictions ''' lx = len(embedding_matrix) inp = Input(shape=(maxlen, )) x = Embedding(lx, embed_size, weights=[embedding_matrix])(inp) x = Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(x) x = Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) x = Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(x) x = Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) x = Bidirectional( LSTM(300, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(x) x = GlobalMaxPool1D()(x) x = Dense(300, activation="relu")(x) x = Dense(100, activation="relu")(x) x = Dropout(0.1)(x) #ylayer=numpy.asarray(ylayer) x = Dense(3, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', f1_m, precision_m, recall_m]) X_t[X_t == lx] = lx - 1 X_te[X_te == lx] = lx - 1 model.fit(X_t, ytrain, batch_size=32, epochs=2, validation_split=0.1)
def create_base_network(input_shape): input = Input(shape=input_shape) x = Bidirectional(LSTM(64))(input) m = Model(input, x) print(m.summary()) return m
print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) #returns numpy array x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) y_train = np.array(y_train) #keras only accepts numpy arrays, not python lists y_test = np.array(y_test) x_train = np.array(x_train) x_test = np.array(x_test) #instantiate a model by using Input and Output tensors, rather than Sequential() inputs = Input(shape=(maxlen, )) #maxlen number of words per training example #tensors are passed through the layers x = Embedding(max_features, 128)(inputs) #embeddings are 128 dim vectors x = Bidirectional(LSTM(64))(x) #LSTM layer has 64 units x = Dropout(0.5)(x) #what proportion of inputs to set to 0 predictions = Dense(1, activation='sigmoid')( x ) #single sigmoidal output, predicting either 0 or 1, negative or positive sentiment #create the model that includes the various layers, including the input layer model = Model(inputs=inputs, outputs=predictions) model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) #train the model print('Train...') hist = model.fit(x_train, y_train, batch_size=batch_size, epochs=1,
def create_model(self): if self.config.model_type == 'LSTM': # model = Sequential() # model.add(Embedding(len(self.word_dict), self.config.max_words ,input_length = self.X.shape[1])) # model.add(LSTM(self.config.dim, return_sequences=True , recurrent_dropout=self.config.dropout)) # model.add(Dropout(self.config.dropout)) # model.add(LSTM(self.config.dim, return_sequences=True , recurrent_dropout=self.config.dropout )) # model.add(Dropout(self.config.dropout)) # model.add(LSTM(self.config.dim , recurrent_dropout=self.config.dropout)) # model.add(Dense(self.config.dim,activation='relu')) # model.add(Dense(3,activation='softmax')) # model = Sequential() # model.add(Embedding(len(self.word_dict), self.config.max_words ,input_length = self.X.shape[1])) # model.add(LSTM(self.config.dim, dropout=self.config.dropout , recurrent_dropout=self.config.dropout)) # model.add(Dropout(self.config.dropout)) # model.add(Dense(3,activation='softmax')) inp = Input(shape=(self.config.max_len,)) x = Embedding(self.config.max_words, self.config.max_len ,input_length = self.X.shape[1])(inp) x = Bidirectional(LSTM(self.config.dim, return_sequences=True, dropout=self.config.dropout, recurrent_dropout=self.config.dropout , kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01)))(x) x = GlobalMaxPool1D()(x) x = Dense(self.config.dim, activation="sigmoid")(x) x = Dropout(self.config.dropout)(x) x = Dense(3, activation='softmax')(x) model = Model(inputs=inp, outputs=x) self.model = model if self.config.model_type == 'GRU': model = Sequential() model.add(Embedding(len(self.word_dict), self.config.max_words ,input_length = self.X.shape[1])) model.add(GRU(self.config.dim, return_sequences=True , recurrent_dropout=self.config.dropout)) model.add(Dropout(self.config.dropout)) model.add(GRU(self.config.dim, return_sequences=True , recurrent_dropout=self.config.dropout )) model.add(Dropout(self.config.dropout)) model.add(GRU(self.config.dim , recurrent_dropout=self.config.dropout)) model.add(Dense(self.config.dim,activation='relu')) model.add(Dense(3,activation='softmax')) self.model = model if self.config.model_type == 'MLP': model = Sequential() model.add(Dense(len(self.word_dict), input_shape=(self.X.shape[1],) , activation="relu")) model.add(Dropout(self.config.dropout)) model.add(Dense(self.config.dim,activation='relu')) model.add(Dropout(self.config.dropout)) model.add(Dense(self.config.dim,activation="relu")) model.add(Dropout(self.config.dropout)) model.add(Dense(self.config.dim , activation="relu")) model.add(Dense(3,activation='softmax')) self.model = model if self.config.debug: print(self.model.summary())
binary=True) embedding_matrix = get_embedding_matrix(word_vec, word_index) print(embedding_matrix.shape) model = Sequential() model.add( Embedding(len(vocab) + 1, DW, weights=[embedding_matrix], trainable=False)) model.add( Bidirectional(SimpleRNN(DH, dropout=0.2, recurrent_dropout=0.1, return_sequences=True), merge_mode='concat')) model.add( Bidirectional(SimpleRNN(int(DH / 2), dropout=0.2, recurrent_dropout=0.1), merge_mode='concat')) model.add(Flatten()) model.add(Dense(4, activation='softmax')) model.summary() model.compile( loss='categorical_crossentropy',
print('y_train shape', y_train.shape) print('x_val shape', x_val.shape) print('y_val shape', y_val.shape) print('x_test shape', x_test.shape) print('y_test shape', y_test.shape) ########################################################## # DEFINING THE NEURAL NETWORK ########################################################## inp = Input(batch_shape=(batch_size,) + x_train.shape[1:]) inp_resh = Reshape((x_train.shape[1] / 4, 4))(inp) inp_drop = Dropout(0.15)(inp_resh) bi = Bidirectional( LSTM(N_LSTM, recurrent_dropout=0.25, return_sequences=True, kernel_regularizer='l2', recurrent_regularizer='l2'), name="BLSTM_layer")(inp_drop) flat = Flatten()(bi) out = Dense(1, activation='sigmoid')(flat) ########################################################## # Model ########################################################## lstm_model = Model(inp, out)
def rnn(embedding_matrix, config): if config['rnn'] == 'gru' and config['gpu']: encode = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) else: encode = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True)) q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input') q2 = Input((config['max_length'], ), dtype='int32', name='q2_input') embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[embedding_matrix] # mask_zero=True ) q1_embed = embedding_layer(q1) q2_embed = embedding_layer(q2) # bsz, 1, emb_dims q1_embed = BatchNormalization(axis=2)(q1_embed) q2_embed = BatchNormalization(axis=2)(q2_embed) q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed) q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed) q1_encoded = encode(q1_embed) q2_encoded = encode(q2_embed) q1_encoded = Dropout(0.2)(q1_encoded) q2_encoded = Dropout(0.2)(q2_encoded) # 双向 # q1_encoded = encode2(q1_encoded) # q2_encoded = encode2(q2_encoded) # resnet rnn_layer2_input1 = concatenate([q1_embed, q1_encoded]) rnn_layer2_input2 = concatenate([q2_embed, q2_encoded]) q1_encoded2 = encode2(rnn_layer2_input1) q2_encoded2 = encode2(rnn_layer2_input2) # add res shortcut res_block1 = add([q1_encoded, q1_encoded2]) res_block2 = add([q2_encoded, q2_encoded2]) rnn_layer3_input1 = concatenate([q1_embed, res_block1]) rnn_layer3_input2 = concatenate([q2_embed, res_block2]) # rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2]) # rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2]) q1_encoded3 = encode3(rnn_layer3_input1) q2_encoded3 = encode3(rnn_layer3_input2) # merged1 = GlobalMaxPool1D()(q1_encoded3) # merged2 = GlobalMaxPool1D()(q2_encoded3) # q1_encoded = concatenate([q1_encoded, q1_encoded2], axis=-1) # q2_encoded = concatenate([q2_encoded, q2_encoded2], axis=-1) # merged1 = concatenate([q1_encoded2, q1_embed], axis=-1) # merged2 = concatenate([q2_encoded2, q2_embed], axis=-1) # # TODO add attention rep , maxpooling rep q1_encoded3 = concatenate([q1_encoded, q1_encoded2, q1_encoded3]) q2_encoded3 = concatenate([q2_encoded, q2_encoded2, q2_encoded3]) merged1 = GlobalMaxPool1D()(q1_encoded3) merged2 = GlobalMaxPool1D()(q2_encoded3) # avg1 = GlobalAvgPool1D()(q1_encoded3) # avg2 = GlobalAvgPool1D()(q2_encoded3) # merged1 = concatenate([max1,avg1]) # merged2 = concatenate([max2,avg2]) sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2]) mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2]) # jaccard_rep = Lambda(lambda x: x[0]*x[1]/(K.sum(x[0]**2,axis=1,keepdims=True)+K.sum(x[1]**2,axis=1,keepdims=True)- # K.sum(K.abs(x[0]*x[1]),axis=1,keepdims=True)))([merged1,merged2]) # merged = Concatenate()([merged1, merged2, mul_rep, sub_rep,jaccard_rep]) feature_input = Input(shape=(config['feature_length'], )) feature_dense = BatchNormalization()(feature_input) feature_dense = Dense(config['dense_dim'], activation='relu')(feature_dense) merged = Concatenate()([merged1, merged2, mul_rep, sub_rep, feature_dense]) # Classifier dense = Dropout(config['dense_dropout'])(merged) dense = BatchNormalization()(dense) dense = Dense(config['dense_dim'], activation='relu')(dense) dense = Dropout(config['dense_dropout'])(dense) dense = BatchNormalization()(dense) predictions = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2, feature_input], outputs=predictions) opt = optimizers.get(config['optimizer']) K.set_value(opt.lr, config['learning_rate']) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1]) return model
x = [] y = [] for lyrics in non_taylor_lyrics: for i in range(0, len(lyrics) - sequence_len, step): x.append(lyrics[i:i + sequence_len]) y.append(lyrics[i + sequence_len]) train_x, test_x, train_y, test_y = train_test_split(x, y, train_size=0.8, random_state=2) model = Sequential() model.add(Bidirectional(LSTM(128), input_shape=(sequence_len, total_words))) model.add(Dropout(0.2)) model.add(Dense(total_words, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam') def generator(sentence_list, next_word_list, batch_size): index = 0 while True: X = np.zeros((batch_size, sequence_len, total_words), dtype=np.bool) Y = np.zeros((batch_size, total_words), dtype=np.bool) for i in range(batch_size): for t, w in enumerate(sentence_list[index]): X[i, t, word_to_indices[w]] = 1 Y[i, word_to_indices[next_word_list[index]]] = 1
print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) #returns numpy array x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) y_train = np.array(y_train) #keras only accepts numpy arrays, not python lists y_test = np.array(y_test) x_train = np.array(x_train) x_test = np.array(x_test) #instantiate sequential model model = Sequential() #add layers to model (in order! because you are using a sequential model) model.add(Embedding(max_features, 128)) #embeddings are 128 dim vectors model.add(Bidirectional(LSTM(64))) #LSTM layer has 64 units model.add(Dropout(0.5)) #what proportion of inputs to set to 0 model.add( Dense(1, activation='sigmoid') ) #single sigmoidal output, predicting either 0 or 1, negative or positive sentiment #compile the model model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) #train the model print('Train...') hist = model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=[x_test, y_test])
print(sequences.shape) X, y = sequences[:,:-1], sequences[:,-1] #sequences = [to_categorical(x, num_classes=vocab_size) for x in X] #X = array(sequences) y = to_categorical(y, num_classes=vocab_size) ''' print(X) print(y) exit() ''' #X = X[:100,:] #y = y[:100,:] model = Sequential() model.add(Embedding(vocab_size, 64, input_length=maxlen)) model.add(Bidirectional(LSTM(64))) model.add(Dropout(0.5)) #model.add(Dense(1, activation='sigmoid')) model.add(Dense(vocab_size, activation='sigmoid')) # try using different optimizers and different optimizer configs model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) print('Train...') model.fit(X, y, batch_size=batch_size, epochs=5) # get embeddings embeddings = model.layers[0].get_weights()[0]
num_features = W.shape[1] # 400 logging.info("dimension num of word vector [num_features]: %d" % num_features) Routings = 20 Num_capsule = 60 Dim_capsule = 120 sequence_input = Input(shape=(maxlen, ), dtype='int32') embedded_sequences = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False)(sequence_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x) capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True)(x) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) capsule = Flatten()(capsule) capsule = Dropout(0.1)(capsule) output = Dense(4, activation='softmax')(capsule) model = Model(inputs=[sequence_input], outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', f1]) checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc',
x[j, 0] = 1 return x except: print(text) def decoder_data(x): x = x.argmax(axis=-1) return "".join(alphabet[i] for i in x) print(encoder_data("Tôi tên là việt hoàng").shape) print(decoder_data(encoder_data("Tôi tên là Việt Hoàng"))) encoder = LSTM(256, input_shape=(MAXLEN, len(alphabet)), return_sequences=True) decoder = Bidirectional(LSTM(256, return_sequences=True, dropout=0.2)) model = Sequential() model.add(encoder) model.add(decoder) model.add(TimeDistributed(Dense(256))) model.add(Activation("relu")) model.add(TimeDistributed(Dense(len(alphabet)))) model.add(Activation("softmax")) model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001), metrics=["accuracy"]) model.summary() from keras.utils import plot_model plot_model(model,
test_size=0.2, random_state=43) batch_size = 32 epochs = 1 hash_bits = 128 def custom_activation(x): return (K.sigmoid(x) * 10) visible = Input(shape=(X.shape[1], X.shape[2])) blstm_1 = Bidirectional( LSTM(1024, dropout=0.1, recurrent_dropout=0.5, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))(visible) blstm_2 = Bidirectional( LSTM(1024, dropout=0.1, recurrent_dropout=0.5, input_shape=(X.shape[1], X.shape[2]), return_sequences=False))(blstm_1) Dense_2 = Dense(hash_bits, activation=custom_activation)(blstm_2) batchNorm = BatchNormalization()(Dense_2) enver = Dense(128, activation='sigmoid')(batchNorm) batchNorm2 = BatchNormalization()(enver) Dense_3 = Dense(4, activation='sigmoid')(batchNorm2) model = Model(input=visible, output=Dense_3) print(model.summary())
# 000001 00000001 [ TRASH - UNSTABLE ] # 000001 0000001 [ TRASH - UNSTABLE ] # 0000001 000000001 [ TRASH - UNSTABLE ] # 00000001 0000000001 [BEST 32.32 + 21.58] # 00000001 000000001 [MEDIUM 35.54 + 13.77] # 0000001 0000000001 [MEDIUM 31.94 + 21.13] # 000000001 00000000001 [ TRASH ] # 000000001 0000000001 [ TRASH ] # recurrent_dropout=0.1 (recurrent_dropout: 10% possibility to drop of the connections that simulate LSTM memory cells) # units = 100 / 0.55 = 182 neurons (to account for 0.55 dropout) model = Bidirectional( LSTM(units=100, return_sequences=True, activity_regularizer=l1(0.0000000001), recurrent_constraint=max_norm(2)))( model) # input_shape=(1, MAX_LEN, VECT_SIZE) # model = Dropout(0.3)(model) # 0.5 # model = TimeDistributed(Dense(number_labels, activation="relu"))(model) # a dense layer as suggested by neuralNer model = Dense(number_labels, activation=None)( model) # activation='linear' (they are the same) crf = CRF() # CRF layer { SHOULD I SET -> number_labels+1 (+1 -> PAD) } out = crf(model) # output model = Model(inputs=inpt, outputs=out) # set learning rate #lr_rate = InverseTimeDecay(initial_learning_rate=0.05, decay_rate=4, decay_steps=steps_per_epoch) # lr_rate = ExponentialDecay(initial_learning_rate=0.01, decay_rate=0.5, decay_steps=10000)
hash_embedding = pd.read_csv('../preprocessing/chunk-auto-encoder-2/auto-encoder-embeddings.txt', delimiter=' ', header=None) hash_embedding = hash_embedding.values hash_embedding = np.concatenate([np.zeros((1,hash_length)),hash_embedding, np.random.rand(1,hash_length)]) embed_index_input = Input(shape=(step_length,)) embedding = Embedding(emb_vocab+2, emb_length, weights=[word_embedding], mask_zero=True, input_length=step_length)(embed_index_input) hash_index_input = Input(shape=(step_length,)) encoder_embedding = Embedding(hash_vocab+2, hash_length, weights=[hash_embedding], mask_zero=True, input_length=step_length)(hash_index_input) pos_input = Input(shape=(step_length, pos_length)) senna_hash_pos_merge = merge([embedding, encoder_embedding, pos_input], mode='concat') input_mask = Masking(mask_value=0)(senna_hash_pos_merge) dp_1 = Dropout(0.6)(input_mask) hidden_1 = Bidirectional(LSTM(128, return_sequences=True))(dp_1) hidden_2 = Bidirectional(LSTM(128, return_sequences=True))(hidden_1) dp_2 = Dropout(0.6)(hidden_2) output = TimeDistributed(Dense(output_length, activation='softmax'))(dp_2) model = Model(input=[embed_index_input,hash_index_input,pos_input], output=output) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) print(model.summary()) number_of_train_batches = int(math.ceil(float(train_samples)/batch_size)) number_of_dev_batches = int(math.ceil(float(dev_samples)/batch_size))
pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4) conv_5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool_4) # Batch normalization layer batch_norm_5 = BatchNormalization()(conv_5) conv_6 = Conv2D(512, (3, 3), activation='relu', padding='same')(batch_norm_5) batch_norm_6 = BatchNormalization()(conv_6) pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6) conv_7 = Conv2D(512, (2, 2), activation='relu')(pool_6) squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7) # bidirectional LSTM layers with units=128 blstm_1 = Bidirectional(LSTM(500, return_sequences=True, dropout=0.2))(squeezed) blstm_2 = Bidirectional(LSTM(500, return_sequences=True, dropout=0.2))(blstm_1) outputs = Dense(len(char_list) + 1, activation='softmax')(blstm_2) # model to be used at test time act_model = Model(inputs, outputs) labels = Input(name='the_labels', shape=[max_label_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args # the 2 is critical here since the first couple outputs of the RNN
print('data preprocessing finished.') #model1 start Inputshape = (len(PPG_train[0]), len(PPG_train[0][0])) X_input = Input(Inputshape) X_CNN = Conv1D(64, 3, border_mode='same', activation='relu')(X_input) X_CNN = Dropout(0.25)(X_CNN) X_CNN = Conv1D(64, 3, border_mode='same', activation='relu')(X_CNN) X_CNN = Dropout(0.25)(X_CNN) X_CNN = Conv1D(128, 3, border_mode='same', activation='relu')(X_CNN) X_CNN = Dropout(0.25)(X_CNN) X_CNN = Conv1D(128, 3, border_mode='same', activation='relu')(X_CNN) X_CNN = Dropout(0.25)(X_CNN) LayerUnits = 256 X0 = Bidirectional(GRU(LayerUnits, return_sequences=True), merge_mode='concat')(X_CNN) X0 = Dropout(0.2)(X0) X0 = Activation('relu')(X0) X = GRU(LayerUnits, return_sequences=True)(X0) X = Dropout(0.2)(X) X = Activation('relu')(X) X1 = GRU(LayerUnits, return_sequences=True)(X) X1 = Dropout(0.2)(X1) X1 = Activation('relu')(X1) #X2 = concatenate([X, X1]) X2 = GRU(LayerUnits, return_sequences=True)(X1) X2 = Dropout(0.2)(X2) X2 = Activation('relu')(X2)
ret = pad_sequences(ret, maxlen=MAX_LENGTH) return ret emb = pickle.load(open('emb.pickle', 'rb')) model = None if len(sys.argv) == 2: print("load " + sys.argv[1] + " to keep training") model = load_model(sys.argv[1]) else: model = Sequential() model.add(emb) model.add( Bidirectional( GRU(units=64, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))) model.add( Bidirectional( GRU(units=32, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))) model.add( Bidirectional( GRU(units=16, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))) model.add(