def make_embedding(vocab_size, wv_size, init=None, fixed=False, constraint=ConstNorm(3.0, True), **kwargs): ''' Takes parameters and makes a word vector embedding Args: ------ vocab_size: integer -- how many words in your vocabulary wv_size: how big do you want the word vectors init: initial word vectors -- defaults to None. If you specify initial word vectors, needs to be an np.array of shape (vocab_size, wv_size) fixed: boolean -- do you want the word vectors fixed or not? Returns: --------- a Keras Embedding layer ''' if (init is not None) and len(init.shape) == 2: emb = Embedding(vocab_size, wv_size, weights=[init], W_constraint=constraint) # keras needs a list for initializations else: emb = Embedding(vocab_size, wv_size, W_constraint=constraint) # keras needs a list for initializations if fixed: emb.trainable = False # emb.params = [] return emb
def pretrained_word_emb(vocab, emb_dim): word2emb = vocab['word'].load_word2emb() word_emb = Embedding(len(vocab['word']), emb_dim) W = word_emb.get_weights()[0] for i, word in enumerate(word2emb.keys()): W[i] = word2emb[word] word_emb.set_weights([W]) return word_emb
def build_graph(graph, embedding_size=100, embedding_path=None, token2idx=None, input_dropout_rate=0.25, dropout_rate=0.5, l1=None, l2=None, convolutional_kernels=16, filter_extensions=[3, 4, 5], fix_embeddings=False, max_features=100000, max_len=100, output_dim=80): ''' Builds Keras Graph model that, given a query (in the form of a list of indices), returns a vector of output_dim non-negative weights that sum up to 1. The Convolutional Neural Network architecture is inspired by the following paper: Yoon Kim - Convolutional Neural Networks for Sentence Classification - arXiv:1408.5882v2 ''' regularizer = utils.get_regularizer(l1, l2) graph.add_input(name='input_query', input_shape=(None,), dtype='int32') E = None if embedding_path is not None: E = utils.read_embeddings(embedding_path, token2idx=token2idx, max_features=max_features) embedding_layer = Embedding(input_dim=max_features, output_dim=embedding_size, input_length=max_len, weights=E) if fix_embeddings is True: embedding_layer.params = [] embedding_layer.updates = [] graph.add_node(embedding_layer, name='embedding', input='input_query') graph.add_node(Dropout(input_dropout_rate), name='embedding_dropout', input='embedding') flatten_layer_names = [] for w_size in filter_extensions: convolutional_layer = Convolution1D(input_dim=embedding_size, nb_filter=convolutional_kernels, filter_length=w_size, border_mode='valid', activation='relu', W_regularizer=regularizer, subsample_length=1) convolutional_layer_name = 'convolutional' + str(w_size) graph.add_node(convolutional_layer, name=convolutional_layer_name , input='embedding_dropout') pool_length = convolutional_layer.output_shape[1] pooling_layer = MaxPooling1D(pool_length=pool_length) pooling_layer_name = 'pooling' + str(w_size) graph.add_node(pooling_layer, name=pooling_layer_name, input=convolutional_layer_name) flatten_layer_name = 'flatten' + str(w_size) flatten_layer = Flatten() graph.add_node(flatten_layer, name=flatten_layer_name, input=pooling_layer_name) flatten_layer_names += [flatten_layer_name] graph.add_node(Dropout(dropout_rate), name='dropout', inputs=flatten_layer_names, merge_mode='concat') dense_layer = Dense(output_dim=output_dim, W_regularizer=regularizer) graph.add_node(dense_layer, name='dense', input='dropout') softmax_layer = Activation('softmax') graph.add_node(softmax_layer, name='softmax', input='dense') return graph
def lstm(): data, targets, filenames, embedding_matrix, word_index = preprocess_embedding() EMBEDDING_DIM = 300 MAX_SEQUENCE_LENGTH = 50 embedding_layer = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable= False, name='layer_embedding') #mask_zero=True, sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') embedded_sequences = embedding_layer(sequence_input) x1 = LSTM(150, return_sequences=True,name='lstm_1')(embedded_sequences) #x2 = LSTM(75, return_sequences=True,name='lstm_2')(x1) encoded = LSTM(30,name='lstm_3')(x1) x3 = RepeatVector(MAX_SEQUENCE_LENGTH,name='layer_repeat')(encoded) # x4 = LSTM(75, return_sequences=True,name='lstm_4')(x3) x5 = LSTM(150, return_sequences=True,name='lstm_5')(x3) decoded = LSTM(300, return_sequences=True,activation='linear',name='lstm_6')(x5) sequence_autoencoder = Model(sequence_input, decoded) #print sequence_autoencoder.get_layer('lstm_6').output encoder = Model(sequence_input, encoded) sequence_autoencoder.compile(loss='cosine_proximity', optimizer='sgd')#, metrics=['acc']) embedding_layer = Model(inputs=sequence_autoencoder.input, outputs=sequence_autoencoder.get_layer('layer_embedding').output) sequence_autoencoder.fit(data, embedding_layer.predict(data), epochs=5) # for i in sequence_autoencoder.layers[3].get_weights()[0]: # print i # # print sequence_autoencoder.layers[3].get_weights()[0][1] # print sequence_autoencoder.layers[1].get_weights()[0][1].shape # print sequence_autoencoder.layers[2].get_weights()[0][1].shape # print sequence_autoencoder.layers[3].get_weights()[0][1].shape # print sequence_autoencoder.layers[4].get_weights()[0][1].shape # #print sequence_autoencoder.layers[5].get_weights()[0][1].shape # print sequence_autoencoder.layers[6].get_weights()[0][1].shape # print sequence_autoencoder.layers[7].get_weights()[0][1].shape csvname = 'lstm_autoencoder_weight' write_vec_to_csv(sequence_autoencoder.layers[3].get_weights()[0],targets,filenames,csvname)
# In[32]: maxword = 400 x_train = sequence.pad_sequences(x_train, maxlen=maxword) x_test = sequence.pad_sequences(x_test, maxlen=maxword) vocab_size = np.max([np.max(x_train[i]) for i in range(x_train.shape[0])]) + 1 print(vocab_size) # 网络搭建 # In[33]: model = Sequential() #embeding layer model.add(Embedding(vocab_size, 64, input_length=maxword)) #vectorization model.add(Flatten()) #full connected layer model.add(Dense(2048, activation='relu')) model.add(Dense(1024, activation='relu')) model.add(Dense(256, activation='relu')) model.add(Dense(64, activation='relu')) model.add(Dense(16, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary())
def run_model_varyembed(dataset, numhidden, hiddendim, idx2word, idx2label, w2v, basedir, embedding_dim=400, validate=True, num_epochs=30): train_toks, valid_toks, test_toks, \ train_lex, valid_lex, test_lex, \ train_y, valid_y, test_y = dataset maxlen = max([len(l) for l in train_lex]) if len(valid_lex) > 0: maxlen = max(maxlen, max([len(l) for l in valid_lex])) maxlen = max(maxlen, max([len(l) for l in test_lex])) vocsize = max(idx2word.keys()) + 1 nclasses = max(idx2label.keys()) + 1 # Pad inputs to max sequence length and turn into one-hot vectors train_lex = sequence.pad_sequences(train_lex, maxlen=maxlen) valid_lex = sequence.pad_sequences(valid_lex, maxlen=maxlen) test_lex = sequence.pad_sequences(test_lex, maxlen=maxlen) train_y = sequence.pad_sequences(train_y, maxlen=maxlen) valid_y = sequence.pad_sequences(valid_y, maxlen=maxlen) test_y = sequence.pad_sequences(test_y, maxlen=maxlen) train_y = vectorize_set(train_y, maxlen, nclasses) valid_y = vectorize_set(valid_y, maxlen, nclasses) test_y = vectorize_set(test_y, maxlen, nclasses) # Build the model ## BI-DIRECTIONAL print('Building the model...') H = numhidden model = Graph() model.add_input(name='input', input_shape=[maxlen], dtype='int') # Add embedding layer if w2v is None: model.add_node(Embedding(vocsize, embedding_dim, init='lecun_uniform', input_length=maxlen), name='embed', input='input') else: embeds = init_embedding_weights(idx2word, w2v) embed_dim = w2v.syn0norm.shape[1] model.add_node(Embedding(vocsize, embed_dim, input_length=maxlen, weights=[embeds], mask_zero=True), name='embed', input='input') # Build first hidden layer model.add_node(LSTM(hiddendim, return_sequences=True, activation='tanh'), name='forward0', input='embed') model.add_node(Dropout(0.1), name='dropout0f', input='forward0') model.add_node(LSTM(hiddendim, return_sequences=True, go_backwards=True, activation='tanh'), name='backwards0', input='embed') model.add_node(Dropout(0.1), name='dropout0b', input='backwards0') # Build subsequent hidden layers if H > 1: for i in range(1, H): model.add_node(LSTM(hiddendim, return_sequences=True, activation='tanh'), name='forward%d' % i, input='dropout%df' % (i - 1)) model.add_node(Dropout(0.1), name='dropout%df' % i, input='forward%d' % i) model.add_node(LSTM(hiddendim, return_sequences=True, go_backwards=True, activation='tanh'), name='backwards%d' % i, input='dropout%db' % (i - 1)) model.add_node(Dropout(0.1), name='dropout%db' % i, input='backwards%d' % i) # Finish up the network model.add_node(TimeDistributedDense(nclasses), name='tdd', inputs=['dropout%df' % (H - 1), 'dropout%db' % (H - 1)], merge_mode='ave') model.add_node(Activation('softmax'), name='softmax', input='tdd') model.add_output(name='output', input='softmax') model.compile(optimizer='rmsprop', loss={'output': 'categorical_crossentropy'}) # Set up callbacks fileprefix = 'embed_varied_' am = approximateMatch.ApproximateMatch_SEQ(valid_toks, valid_y, valid_lex, idx2label, pred_dir=os.path.join( basedir, 'predictions'), fileprefix=fileprefix) mc = callbacks.ModelCheckpoint( os.path.join(basedir, 'models', 'embedding.model.weights.{epoch:02d}.hdf5')) cbs = [am, mc] if validate: early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=3) cbs.append(early_stopping) # Train the model print('Training...') hist = model.fit({ 'input': train_lex, 'output': train_y }, nb_epoch=num_epochs, batch_size=1, validation_data={ 'input': valid_lex, 'output': valid_y }, callbacks=cbs) if validate: val_f1, best_model = learning_curve( hist, preddir=os.path.join(basedir, 'predictions'), pltname=os.path.join( basedir, 'charts', 'hist_varyembed%d_nhidden%d.pdf' % (hiddendim, numhidden)), fileprefix=fileprefix) else: best_model = num_epochs - 1 val_f1 = 0.0 # Save model json_string = model.to_json() open(os.path.join(basedir, 'models', 'embedding_model_architecture.json'), 'w').write(json_string) # Test bestmodelfile = os.path.join( basedir, 'models', 'embedding.model.weights.%02d.hdf5' % best_model) shutil.copyfile(bestmodelfile, bestmodelfile.replace('.hdf5', '.best.hdf5')) if validate: model = model_from_json( open( os.path.join(basedir, 'models', 'embedding_model_architecture.json')).read()) model.load_weights(bestmodelfile) scores = predict_score(model, test_lex, test_toks, test_y, os.path.join(basedir, 'predictions'), idx2label, maxlen, fileprefix=fileprefix) scores['val_f1'] = val_f1 return scores, hist.history, best_model
from keras.layers import Flatten from keras.layers.embeddings import Embedding from keras.models import Sequential from keras.preprocessing import sequence # load the dataset but only keep the top n words, zero the rest top_words = 5000 (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words) max_words = 500 X_train = sequence.pad_sequences(X_train, maxlen=max_words) X_test = sequence.pad_sequences(X_test, maxlen=max_words) print(X_test.shape) # create the model model = Sequential() model.add(Embedding(top_words, 32, input_length=max_words)) model.add(Flatten()) model.add(Dense(250, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) # Fit the model model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=2, batch_size=128, verbose=2)
numpy.random.seed(7) top_words = 5000 #only keep 5000 most used words (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words) max_review_length = 500 X_train = sequence.pad_sequences(X_train, maxlen=max_review_length) X_test = sequence.pad_sequences(X_test, maxlen=max_review_length) embedding_vector_length = 32 #32 length vector represents each word model = Sequential() model.add( Embedding(top_words, embedding_vector_length, input_length=max_review_length)) model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')) model.add(MaxPooling1D(pool_size=2)) model.add(LSTM(100)) #model.add(Dropout(0.2)) #model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2)) #model.add(Dropout(0.2)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) model.fit(X_train,
tokenizer.fit_on_texts(x_train) word_index = tokenizer.word_index print(word_index) num_words = len(word_index) + 1 X_train = tokenizer.texts_to_sequences(x_train) print ('example:',X[0]) max_length = 2083 X_train = pad_sequences(X_train, maxlen=max_length , padding = 'pre') X_train = X_train.astype(float) Y_train= Y_train.astype(float) model = Sequential() model.add(Embedding(num_words, 64, input_length=max_length)) model.add(LSTM(32, return_sequences=True)) model.add(LSTM(64,return_sequences=True)) model.add(LSTM(128 )) model.add(Dense(20, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) model.fit(X_train, Y_train, epochs=20 , batch_size= 128) x_train = X[0:729] Y_train = Y[0:729], Y[1458:] for i in range (1458, len(X)): x_train.append(X[i])
print('train_y shape:', train_y.shape) print('\nConfiguring session...') sess = tf.Session() sess.as_default() set_session( sess) # set this TensorFlow session as the default session for Keras print('\nSetting up model...') g = tf.get_default_graph() with g.device('/device:GPU:0'): model = Sequential() model.add( Embedding(input_dim=vocab_size, output_dim=emdedding_size, weights=[pretrained_weights])) model.add(LSTM(units=emdedding_size)) model.add(Dense(units=vocab_size)) model.add(Activation('softmax')) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy') print("Vocab size: " + str(vocab_size)) print("Embed size: " + str(emdedding_size)) # Initializing the variables init = tf.global_variables_initializer() fileExists = os.path.isfile('model.h5') if False: model = load_model("model.h5") sess = get_session()
batch_size = 64 max_len = 500 print "max_len ", max_len print('Pad sequences (samples x time)') X_train = sequence.pad_sequences(X_train, maxlen=max_len) X_test = sequence.pad_sequences(X_test, maxlen=max_len) max_features = 5000 model = Sequential() print('Build model...') embedding_vecor_length = 32 model = Sequential() model.add(Embedding(max_features, embedding_vecor_length, input_length=max_len)) model.add(Dropout(0.2)) model.add(LSTM(100)) model.add(Dropout(0.2)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=50,
train_amount = int(train_ratio * len(labels)) #len(labels)是全体样本的个数 train_x = padded_docs[:train_amount] train_y = labels[:train_amount] test_x = padded_docs[train_amount:] test_y = labels[train_amount:] #模型构建 from keras.models import Sequential from keras.layers.embeddings import Embedding from keras.layers.core import Flatten, Dense from keras.layers import Conv1D, LSTM, Dropout, Bidirectional from keras.layers.convolutional import MaxPooling1D from keras import regularizers model = Sequential() model.add(Embedding(vocab_size, vector_size, input_length=max_length)) model.add(Conv1D(filters=30, kernel_size=3, padding='same', activation='relu')) model.add(Dropout(0.5)) model.add(MaxPooling1D(pool_size=2)) model.add(Bidirectional(LSTM(50, return_sequences=True))) # model.add(Bidirectional(LSTM(50))) model.add(Dropout(0.5)) model.add(Flatten()) # model.add(Dense(1, activation='softmax', kernel_regularizer=regularizers.l2(0.01),activity_regularizer=regularizers.l2(0.01))) model.add(Dense(1, activation='sigmoid')) #模型编译 model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) print(model.summary()) #打印模型信息 #模型拟合
sampling_table = sequence.make_sampling_table(vocab_size) couples, labels = skipgrams(data, vocab_size, window_size=window_size, sampling_table=sampling_table) word_target, word_context = zip(*couples) word_target = np.array(word_target, dtype="int32") word_context = np.array(word_context, dtype="int32") print(couples[:10], labels[:10]) # create some input variables input_target = Input((1, )) input_context = Input((1, )) embedding = Embedding(vocab_size, vector_dim, input_length=1, name='embedding') target = embedding(input_target) target = Reshape((vector_dim, 1))(target) context = embedding(input_context) context = Reshape((vector_dim, 1))(context) # setup a cosine similarity operation which will be output in a secondary model similarity = merge([target, context], mode='cos', dot_axes=0) # now perform the dot product operation to get a similarity measure dot_product = merge([target, context], mode='dot', dot_axes=1) dot_product = Reshape((1, ))(dot_product) # add the sigmoid output layer output = Dense(1, activation='sigmoid')(dot_product) # create the primary training model
X_test = np.load(path + "aclImdb/X_val.npy") y_test = np.load(path + "aclImdb/y_val.npy") y_test = np.reshape(y_test,(-1,1)) print(X_train[0]) # Pad the sequence to the same length max_review_length = 500 X_train = sequence.pad_sequences(X_train, maxlen=max_review_length) X_test = sequence.pad_sequences(X_test, maxlen=max_review_length) # Using embedding from Keras embedding_vecor_length = 300 model = Sequential() model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length)) # Convolutional model (3x conv, flatten, 2x dense) model.add(Convolution1D(64, 3, padding='same')) model.add(Convolution1D(32, 3, padding='same')) model.add(Convolution1D(16, 3, padding='same')) model.add(Flatten()) model.add(Dropout(0.2)) model.add(Dense(180,activation='sigmoid')) model.add(Dropout(0.2)) model.add(Dense(1,activation='sigmoid')) # Log to tensorboard tensorBoardCallback = TensorBoard(log_dir='./logs', write_graph=True) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
def _build_network(self, vocab_size, maxlen, emb_weights=None, hidden_units=256, trainable=False): print('Build model...') model = Sequential() if (emb_weights == None): model.add( Embedding(vocab_size, 128, input_length=maxlen, embeddings_initializer='glorot_normal')) else: model.add( Embedding(vocab_size, emb_weights.shape[1], input_length=maxlen, weights=[emb_weights], trainable=trainable)) print(model.output_shape) model.add(Reshape((30, 128, 1))) model.add(BatchNormalization(momentum=0.9)) #CNN model.add( Convolution2D(64, (3, 5), kernel_initializer='he_normal', padding='valid', activation='relu')) model.add(MaxPooling2D(2, 2)) model.add(Dropout(0.5)) model.add( Convolution2D(128, (3, 5), kernel_initializer='he_normal', padding='valid', activation='relu')) model.add(MaxPooling2D(2, 2)) model.add(Dropout(0.5)) model.add(Flatten()) #DNN model model.add( Dense(hidden_units, kernel_initializer='he_normal', activation='relu')) model.add(BatchNormalization(momentum=0.9)) model.add(Dense(2)) model.add(Activation('softmax')) adam = Adam(lr=0.0001) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) print('No of parameter:', model.count_params()) print(model.summary()) return model
def SEExecute(self): #Batch Size bSize = 128 #Max Length of a Sentence actStep = 10 #Total Samples vsamp = self.WT1.shape[0] #Converting the labels from categories to integers self.lab[self.lab == 'others'] = 0 self.lab[self.lab == 'angry'] = 1 self.lab[self.lab == 'happy'] = 2 self.lab[self.lab == 'sad'] = 3 YTemp = self.lab #Sampling to get correct ratio in classes Y0idx = np.where(YTemp == 0)[0] Y1idx = np.where(YTemp == 1)[0] Y2idx = np.where(YTemp == 2)[0] Y3idx = np.where(YTemp == 3)[0] #Collecting the testing data XTest_1 = self.WT1D XTest_2 = self.WT2D XTest_3 = self.WT3D XTest_F = self.WTFD #Populating X and Y for Train/Test splitting X = list(range(vsamp)) Y = self.lab Idx = X XTr, XVa, YTr, YVa, IdxTr, IdxVa = train_test_split(X, Y, Idx, stratify=Y, test_size=0.20) #Collecting the data based on split - Training XTrain_1 = self.WT1[IdxTr] XTrain_2 = self.WT2[IdxTr] XTrain_3 = self.WT3[IdxTr] XTrain_F = self.WTF[IdxTr] YTrain = Y[IdxTr] YTr = Y[IdxTr] YTr = np.transpose(list(itertools.chain(*YTr))) YTr = YTr[0:24064] XTrain_1 = XTrain_1[0:24064].astype(int) XTrain_2 = XTrain_2[0:24064].astype(int) XTrain_3 = XTrain_3[0:24064].astype(int) XTrain_F = XTrain_F[0:24064].astype(int) YTrain = to_categorical(YTrain[0:24064], num_classes=4) #Collecting the data based on split - Validation XValid_1 = self.WT1[IdxVa] XValid_2 = self.WT2[IdxVa] XValid_3 = self.WT3[IdxVa] XValid_F = self.WTF[IdxVa] YValid = Y[IdxVa] YVa = Y[IdxVa] YVa = np.transpose(list(itertools.chain(*YVa))) #Sub sampling the validation dataset to get correct ratio in classes Y0idx = np.where(YVa == 0)[0] Y1idx = np.where(YVa == 1)[0] Y2idx = np.where(YVa == 2)[0] Y3idx = np.where(YVa == 3)[0] Yidx = np.append(np.append(np.append(Y0idx[0:2358], Y1idx[0:110]), Y2idx[0:110]), Y3idx[0:110]) YVa = YVa[Yidx] XValid_1 = XValid_1[Yidx].astype(int) XValid_2 = XValid_2[Yidx].astype(int) XValid_3 = XValid_3[Yidx].astype(int) XValid_F = XValid_F[Yidx].astype(int) YValid = to_categorical(YValid[Yidx], num_classes=4) print (np.sum(YTr == 0) / len(YTr)) print (np.sum(YTr == 1) / len(YTr)) print (np.sum(YTr == 2) / len(YTr)) print (np.sum(YTr == 3) / len(YTr)) print (np.sum(YVa == 0) / len(YVa)) print (np.sum(YVa == 1) / len(YVa)) print (np.sum(YVa == 2) / len(YVa)) print (np.sum(YVa == 3) / len(YVa)) print (len(YTr)) print (len(YVa)) #Flipping the Train and Valid Data # XTrain_1 = np.fliplr(XTrain_1) # XTrain_2 = np.fliplr(XTrain_2) # XTrain_3 = np.fliplr(XTrain_3) # XValid_1 = np.fliplr(XValid_1) # XValid_2 = np.fliplr(XValid_2) # XValid_3 = np.fliplr(XValid_3) print (XTrain_3.shape) print (XValid_3.shape) count = 0 ########################################################## #Word Vectors based on Pre Trained GloVe ########################################################## #Reading the 50 dimensional word vectors from GloVe embedding_matrix = pd.read_csv('./GloVe/glove_Embed_50d.csv', header=None).as_matrix() ########################################################## #Defining the Network ########################################################## #Input Dimension(Vocabulary) iDim = 20000 #Embedding Dimensions Edim = 50 #Input Layer - Encoder Seqin_1 = Input(batch_shape=(bSize, actStep)) Seqin_2 = Input(batch_shape=(bSize, actStep)) Seqin_3 = Input(batch_shape=(bSize, actStep)) #Embedding Layer - Encoder Embed_1 = Embedding(input_dim=iDim, output_dim=Edim, input_length=actStep, mask_zero=False, weights=[embedding_matrix], trainable=True, embeddings_constraint=unit_norm())(Seqin_1) Embed_2 = Embedding(input_dim=iDim, output_dim=Edim, input_length=actStep, mask_zero=False, weights=[embedding_matrix], trainable=True, embeddings_constraint=unit_norm())(Seqin_2) Embed_3 = Embedding(input_dim=iDim, output_dim=Edim, input_length=actStep, mask_zero=False, weights=[embedding_matrix], trainable=True, embeddings_constraint=unit_norm())(Seqin_3) # ELayer = Embedding(input_dim=iDim, output_dim=Edim, input_length=actStep, mask_zero=False, weights=[embedding_matrix], trainable=True, embeddings_constraint=unit_norm()) XMask = Embed_1#ELayer(Seqin_1) XTemp = Bidirectional(GRU(32, kernel_initializer='he_normal', return_sequences=True))(XMask) XDrp = Dropout(0.3)(XTemp) XTemp = Bidirectional(GRU(32, kernel_initializer='he_normal', return_sequences=True))(XDrp) XDrp = Dropout(0.3)(XTemp) XEnc1 = Bidirectional(GRU(32, kernel_initializer='he_normal', return_sequences=True))(XDrp) XAtt1 = AttLayer2(64)(XEnc1) XMask = Embed_2#ELayer(Seqin_2) XTemp = Bidirectional(GRU(32, kernel_initializer='he_normal', return_sequences=True))(XMask) XDrp = Dropout(0.3)(XTemp) XTemp = Bidirectional(GRU(32, kernel_initializer='he_normal', return_sequences=True))(XDrp) XDrp = Dropout(0.3)(XTemp) XEnc2 = Bidirectional(GRU(32, kernel_initializer='he_normal', return_sequences=True))(XDrp) XAtt2 = AttLayer2(64)(XEnc2) XMask = Embed_3#ELayer(Seqin_3) XTemp = Bidirectional(GRU(32, kernel_initializer='he_normal', return_sequences=True))(XMask) XDrp = Dropout(0.3)(XTemp) XTemp = Bidirectional(GRU(32, kernel_initializer='he_normal', return_sequences=True))(XDrp) XDrp = Dropout(0.3)(XTemp) XEnc3 = Bidirectional(GRU(32, kernel_initializer='he_normal', return_sequences=True))(XDrp) XAtt3 = AttLayer2(64)(XEnc3) #Input Layer for the complete conversation Seqin_F = Input(batch_shape=(bSize, actStep+5)) #Embedding Layer for the complete conversation Embed_F = Embedding(input_dim=iDim, output_dim=Edim, input_length=actStep+5, mask_zero=False, weights=[embedding_matrix], trainable=True, embeddings_constraint=unit_norm())(Seqin_F) Xcon = [] fSize = [3,4,5] for fil in fSize: cTemp = Conv1D(nb_filter=512, filter_length=fil)(Embed_F) bTemp = BatchNormalization()(cTemp) aTemp = Activation('relu')(bTemp) pTemp = MaxPooling1D(4)(aTemp) Xcon.append(pTemp) Xmer = Concatenate(axis=1)(Xcon) XFlat = Flatten()(Xmer) Xcnn = Dense(64)(XFlat) bTemp = BatchNormalization()(Xcnn) Xcnn = Activation('relu')(bTemp) XDec = Concatenate(axis=1)([XAtt1, XAtt2, XAtt3, Xcnn]) #Fully Connected Xout = Dense(4, activation='softmax')(XDec) model = Model(inputs=[Seqin_1, Seqin_2, Seqin_3, Seqin_F], outputs=Xout) model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=0.0001), metrics=['accuracy']) print (model.summary()) count = 0 maxf1 = 1 while(count < 10): #Fitting the model on the sequential data Mod = model.fit([XTrain_1, XTrain_2, XTrain_3, XTrain_F], YTrain, validation_data=([XValid_1, XValid_2, XValid_3, XValid_F], YValid), epochs=1, batch_size=bSize, verbose=2) count = count + 1 loss = Mod.history['val_loss'][0] print (loss) if(maxf1 > loss): maxf1 = loss print ('Saving Start') model.save('SemMod_CL.h5') print ('Saving Stop') val2 = model.predict([XValid_1, XValid_2, XValid_3, XValid_F], batch_size=bSize, verbose=2) res = val2.argmax(axis=-1) print (roc_auc_score(YValid, val2)) f1 = f1_score(YVa, res, labels=[1, 2, 3], average='micro') print (f1) model = load_model('SemMod_CL.h5', custom_objects={'AttLayer2' : AttLayer2}) #Evaluation and Prediction scores1 = model.evaluate([XTrain_1, XTrain_2, XTrain_3, XTrain_F], YTrain, batch_size=bSize, verbose=2) val1 = model.predict([XTrain_1, XTrain_2, XTrain_3, XTrain_F], batch_size=bSize, verbose=2) scores2 = model.evaluate([XValid_1, XValid_2, XValid_3, XValid_F], YValid, batch_size=bSize, verbose=2) val2 = model.predict([XValid_1, XValid_2, XValid_3, XValid_F], batch_size=bSize, verbose=2) print ("****************************************************************************") res = val1.argmax(axis=-1) print (scores1[1]) print (roc_auc_score(YTrain, val1)) print (f1_score(YTr, res, labels=[1, 2, 3], average='micro')) print ("****************************************************************************") res = val2.argmax(axis=-1) print (scores2[1]) print (roc_auc_score(YValid, val2)) print (f1_score(YVa, res, labels=[1, 2, 3], average='micro')) print (np.sum(res == 1)) print (np.sum(res == 2)) print (np.sum(res == 3)) print (np.sum(res == 0)) print ("****************************************************************************") #Prediction - Development valD = model.predict([XTest_1, XTest_2, XTest_3, XTest_F], batch_size=bSize, verbose=2) resD = valD.argmax(axis=-1) print (np.sum(resD == 1)) print (np.sum(resD == 2)) print (np.sum(resD == 3)) print (np.sum(resD == 0)) print ("****************************************************************************")
def lstm_model(vocab_size, embedding_index): max_len = 50 embedding_dim = 300 embedding_weights = embedding_index hidden_units = 128 max_len = 50 input = Input(shape=(max_len, )) lr = 0.02 embeddings = Embedding( vocab_size, embedding_dim, input_length=max_len, weights=[embedding_weights], )(input) print('-' * 100) print("LSTM Model selected") print('-' * 100) lstm_output = LSTM(hidden_units)(embeddings) lstm_output = Dense(256, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(lstm_output) lstm_output = Dropout(0.3)(lstm_output) lstm_output = Dense(128, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(lstm_output) lstm_output = Dropout(0.3)(lstm_output) final_output = Dense(1, activation='sigmoid')(lstm_output) # print('-' * 100) # print("Model Selected: Bidirectional LSTM without attention") # print('-' * 100) # lstm_output = Bidirectional(LSTM(hidden_units))(embeddings) # lstm_output = Dense(256, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(lstm_output) # lstm_output = Dropout(0.3)(lstm_output) # lstm_output = Dense(128, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(lstm_output) # lstm_output = Dropout(0.3)(lstm_output) # final_output = Dense(1, activation='sigmoid')(lstm_output) # # print('-' * 100) # print("Model Selected: Bidirectional LSTM with attention") # print('-' * 100) # lstm_output = Bidirectional(LSTM(hidden_units, return_sequences=True), merge_mode='ave')(embeddings) # # calculating the attention coefficient for each hidden state # attention_vector = Dense(1, activation='tanh')(lstm_output) # attention_vector = Flatten()(attention_vector) # attention_vector = Activation('softmax')(attention_vector) # attention_vector = RepeatVector(hidden_units)(attention_vector) # attention_vector = Permute([2, 1])(attention_vector) # # Multiplying the hidden states with the attention coefficients and # # finding the weighted average # final_output = multiply([lstm_output, attention_vector]) # final_output = Lambda(lambda xin: K.sum( # xin, axis=-2), output_shape=(hidden_units,))(final_output) # # passing the above weighted vector representation through single Dense # # layer for classification # final_output = Dropout(0.5)(final_output) # final_output = Dense(256, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(final_output) # lstm_output = Dropout(0.3)(final_output) # final_output = Dense(128, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(final_output) # final_output = Dense(1, activation='sigmoid')(final_output) # print('-' * 100) print("Model Selected: CNN-Bidirectional LSTM with attention") print('-' * 100) # Hyper parameters for 1D Conv layer filters = 100 kernel_size = 5 embeddings = Dropout(0.3)(embeddings) conv_output = Conv1D(filters, kernel_size, activation='relu')(embeddings) lstm_output = Bidirectional(LSTM(hidden_units, return_sequences=True), merge_mode='ave')(conv_output) # calculating the attention coefficient for each hidden state attention_vector = Dense(1, activation='tanh')(lstm_output) attention_vector = Flatten()(attention_vector) attention_vector = Activation('softmax')(attention_vector) attention_vector = RepeatVector(hidden_units)(attention_vector) attention_vector = Permute([2, 1])(attention_vector) # Multiplying the hidden states with the attention coefficients and # finding the weighted average final_output = multiply([lstm_output, attention_vector]) final_output = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(hidden_units, ))(final_output) # passing the above weighted vector representation through single Dense # layer for classification final_output = Dropout(0.5)(final_output) final_output = Dense(128, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(final_output) lstm_output = Dropout(0.3)(final_output) final_output = Dense(128, activation='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(final_output) final_output = Dense(1, activation='sigmoid')(final_output) model = Model(inputs=input, outputs=final_output) opt = SGD(lr=lr) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['acc']) #print model summary print(model.summary()) return model
print('inputs_train shape:', inputs_train.shape) print('inputs_test shape:', inputs_test.shape) print('-') print('queries: integer tensor of shape (samples, max_length)') print('queries_train shape:', queries_train.shape) print('queries_test shape:', queries_test.shape) print('-') print('answers: binary (1 or 0) tensor of shape (samples, vocab_size)') print('answers_train shape:', answers_train.shape) print('answers_test shape:', answers_test.shape) print('-') print('Compiling...') input_sequence = Input((story_maxlen,)) question = Input((query_maxlen,)) input_encoder_m = Sequential() input_encoder_m.add(Embedding(input_dim=vocab_size, output_dim=64)) input_encoder_m.add(Dropout(0.3)) input_encoder_c = Sequential() input_encoder_c.add(Embedding(input_dim=vocab_size, output_dim=query_maxlen)) input_encoder_c.add(Dropout(0.3)) question_encoder = Sequential() question_encoder.add(Embedding(input_dim=vocab_size, output_dim=64, input_length=query_maxlen)) question_encoder.add(Dropout(0.3)) input_encoded_m = input_encoder_m(input_sequence) input_encoded_c = input_encoder_c(input_sequence) question_encoded = question_encoder(question) match = dot([input_encoded_m, question_encoded], axes=(2, 2)) match = Activation('softmax')(match) response = add([match, input_encoded_c]) response = Permute((2, 1))(response) answer = concatenate([response, question_encoded])
train_x_right_pad = sequence.pad_sequences(trainingRight,maxlen=maxRight) train_x_np = sequence.pad_sequences(trainingAspects,maxlen=maxAspect) tune_x_left_pad = sequence.pad_sequences(tuningLeft,maxlen=maxLeft) tune_x_right_pad = sequence.pad_sequences(tuningRight,maxlen=maxRight) tune_x_np = sequence.pad_sequences(tuningAspects,maxlen=maxAspect) test_x_left_pad = sequence.pad_sequences(testingLeft,maxlen=maxLeft) test_x_right_pad = sequence.pad_sequences(testingRight,maxlen=maxRight) test_x_np = sequence.pad_sequences(testingAspects,maxlen=maxAspect) leftInput = Input(shape=(maxLeft,),dtype='int32') rightInput = Input(shape=(maxRight,),dtype='int32') npInput = Input(shape=(maxAspect,),dtype='int32') shared_embedding = Embedding(len(wordEmbeddings),embeddingsDim) embLeft = shared_embedding(leftInput) embRight = shared_embedding(rightInput) embNP = shared_embedding(npInput) npLSTMf = LSTM(hiddenSize)(embNP) npLSTMf = Dropout(0.5)(npLSTMf) embNPRepeatLeft = RepeatVector(maxLeft)(npLSTMf) embNPRepeatRight = RepeatVector(maxRight)(npLSTMf) embLeft = merge([embLeft,embNPRepeatLeft],mode='concat',concat_axis=-1) embRight = merge([embRight,embNPRepeatRight],mode='concat',concat_axis=-1)
print(report) # Accuracy of the model accuracy = accuracy_score(y_test, y_pred) print('SGD Classifier Accuracy of the model: {:.2f}% '.format(accuracy * 100)) # # CNN # # In[153]: model = Sequential() embedding_layer = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=maxlen, trainable=False) model.add(embedding_layer) model.add(Conv1D(128, 5, activation='relu')) model.add(GlobalMaxPooling1D()) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) # In[154]: print(model.summary()) # In[155]:
seed = 8 X = np.array(list(prepared_data['w2v'])) Y = np.array(list(prepared_data["c2id"])) from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=seed) from keras.utils.np_utils import to_categorical y_train = to_categorical(y_train) y_test = to_categorical(y_test) print(y_train.shape) # 创建model,开始训练 from keras.models import Sequential model = Sequential() from keras.layers.embeddings import Embedding model.add(Embedding(len(word_dict)+1, 256)) from keras.layers.recurrent import LSTM model.add(LSTM(256)) from keras.layers.core import Dense, Dropout, Activation model.add(Dropout(0.5)) model.add(Dense(y_train.shape[1])) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # 训练 model.fit(x_train, y_train, batch_size=128, epochs=20) # 测试 print(model.evaluate(x=x_test, y=y_test))
token = Tokenizer(num_words=100) token.fit_on_texts(train_x) c = 0 for t, i in token.word_index.items(): #print("\t'{}'\t{}".format(t, i)) c += 1 if c == 10: break x_train_seq = token.texts_to_sequences(train_x) x_train = sequence.pad_sequences(x_train_seq, maxlen=MAX_LEN_OF_TOKEN) y_train = np.array(train_y) y_train = to_categorical(y_train) model = Sequential() model.add(Embedding(10000, 128, input_length=MAX_LEN_OF_TOKEN)) model.add(Bidirectional(LSTM(64))) model.add(Dropout(0.5)) model.add(Dense(2, activation='softmax')) model.compile('adam', 'categorical_crossentropy', metrics=['accuracy']) model.summary() #進行建模 train_history = model.fit(x_train, y_train, batch_size=32, epochs=10, verbose=2) #建立訓練模型檔案 model.save(modelname)
labels = np.array(labels)[index] TRAIN_SIZE = int(0.8 * len(data)) X_train, X_test = data[0:TRAIN_SIZE], data[TRAIN_SIZE:] Y_train, Y_test = labels[0:TRAIN_SIZE], labels[TRAIN_SIZE:] session = tf.Session() K.set_session(session) DROPOUT_RATE = 0.3 model = Sequential() model.add( Embedding(len(tokenizer.word_index) + 1, EMBEDDING_DIM, input_length=MAX_LENGTH)) model.add( Bidirectional( LSTM(64, return_sequences=False, dropout=DROPOUT_RATE, recurrent_dropout=DROPOUT_RATE))) model.add(Dense(64)) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dense(1)) model.add(BatchNormalization()) model.add(Activation('sigmoid')) model.summary()
max_length=120 trunc_type='post' oov_tok='<OOV>' tokenizer=Tokenizer(num_words=vocab_size,oov_token=oov_tok) tokenizer.fit_on_texts(training_sentences) word_index=tokenizer.word_index sequences=tokenizer.texts_to_sequences(training_sentences) padded=pad_sequences(sequences,maxlen=max_length,truncating=trunc_type) testing_sequences=tokenizer.texts_to_sequences(testing_sentences) testing_padded=pad_sequences(testing_sequences,maxlen=max_length) #model with single layer LSTM model=Sequential() model.add(Embedding(vocab_size,embedding_dim,input_length=max_length)) model.add(Bidirectional(LSTM(32))) model.add(Dense(6,activation='relu')) model.add(Dense(1,activation='sigmoid')) model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) model.summary() model.fit(padded, training_labels_final, epochs=10, validation_data=(testing_padded, testing_labels_final)) #MOdel with double layer LSTM model=Sequential() model.add(Embedding(vocab_size,embedding_dim,input_length=max_length)) model.add(Bidirectional(LSTM(64,return_sequences=True))) model.add(Bidirectional(LSTM(32))) model.add(Dense(6,activation='relu'))
test_labels = labelencoder_test_labels.fit_transform(test_labels) onehotencoder = OneHotEncoder(categorical_features=[0]) test_labels = test_labels.reshape((-1, 1)) test_labels = onehotencoder.fit_transform(test_labels).toarray() # CNN # =============================================================================================== print("Method = CNN for Arabic Sentiment Analysis'") model_variation = 'CNN-non-static' np.random.seed(0) nb_filter = embeddings_dim main_input = Input(shape=(max_sent_len, )) embedding = Embedding(max_features, embeddings_dim, input_length=max_sent_len, mask_zero=False, weights=[embedding_weights])(main_input) Drop1 = Dropout(dropout_prob[0])(embedding) i = 0 conv_name = ["" for x in range(len(filter_sizes))] pool_name = ["" for x in range(len(filter_sizes))] flat_name = ["" for x in range(len(filter_sizes))] for n_gram in filter_sizes: conv_name[i] = str('conv_' + str(n_gram)) conv_name[i] = Convolution1D(nb_filter=nb_filter, filter_length=n_gram, border_mode='valid', activation='relu', subsample_length=1, input_dim=embeddings_dim,
from keras.callbacks import ModelCheckpoint from sklearn.utils import shuffle, class_weight from sklearn import metrics from sklearn.metrics import confusion_matrix import pylab import itertools lr = 0.001 # Learning rate pl = 5 l2value = 0.001 # L2 regularization value stride_ = 1 stride_max = 1 #border = 'same' main_input = Input(shape=(800, ), dtype='int32', name='main_input') x = Embedding(output_dim=50, input_dim=22, input_length=800)(main_input) a = Convolution1D(64, 2, activation='relu', border_mode='same', W_regularizer=l2(l2value))(x) apool = MaxPooling1D(pool_length=pl, stride=stride_max, border_mode='same')(a) b = Convolution1D(64, 3, activation='relu', border_mode='same', W_regularizer=l2(l2value))(x) bpool = MaxPooling1D(pool_length=pl, stride=stride_max, border_mode='same')(b) c = Convolution1D(64, 8, activation='relu',
for line in glove_file: records = line.split() word = records[0] vector_dimensions = asarray(records[1:], dtype='float32') embeddings_dictionary[word] = vector_dimensions glove_file.close() embedding_matrix = zeros((vocab_size, 100)) for word, index in tokenizer.word_index.items(): embedding_vector = embeddings_dictionary.get(word) if embedding_vector is not None: embedding_matrix[index] = embedding_vector deep_inputs = Input(shape=(maxlen,)) embedding_layer = Embedding(vocab_size, 100, weights=[embedding_matrix], trainable=False)(deep_inputs) LSTM_Layer_1 = LSTM(256)(embedding_layer) dense_layer_1 = Dense(71, activation='sigmoid')(LSTM_Layer_1) model = Model(inputs=deep_inputs, outputs=dense_layer_1) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.AUC(), 'acc']) print(model.summary()) history = model.fit(X_train, y_train, batch_size=128, epochs=1, verbose=1, validation_split=0.2) score = model.evaluate(X_test, y_test, verbose=1) print("Test Score:", score[0]) print("Test Accuracy:", score[2]) model.save('my_model_1.h5')
print(len(X_train[0])) print(X_train[0][:10]) print(X_train[0][-10:]) # truncate and pad input sequences X_train = sequence.pad_sequences(X_train, maxlen=max_length) X_test = sequence.pad_sequences(X_test, maxlen=max_length) print('X_train shape: ', X_train.shape) print(len(X_train[0])) print(X_train[0][:10]) print(X_train[0][-10:]) # create the model model = Sequential() model.add(Embedding(n_words, embedding_length, input_length=max_length)) model.add(Dropout(0.1)) model.add(LSTM(100)) model.add(Dropout(0.2)) model.add(Dense(nb_classes, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() model.fit(X_train, y_train, epochs=nb_epoch, batch_size=batch_size,
model.add_input(name='left', input_shape=(window, ), dtype=np.int32) model.add_input(name='right', input_shape=(window, ), dtype=np.int32) #For future #model.add_input(name='left_s', input_shape=(window, )) #model.add_input(name='right_s', input_shape=(window, )) vec_size=128 #Hack Embeddings in from keras.layers.embeddings import Embedding #Oh dear! What a hacky way to do this! left_emb = Embedding(len(vocab), vec_size, input_length=window) right_emb = Embedding(len(vocab), vec_size, input_length=window) #right_emb.params = left_emb.params right_emb.W = left_emb.W model.add_node(left_emb, name='emb_left', input='left') model.add_node(right_emb, name='emb_right', input='right') #Left & Right LSTM #model.add_node(LSTM(128, return_sequences=False, dropout_W=0.1, dropout_U=0.1, input_shape=(window, vec_size)), name='left_lstm', input='emb_left') #model.add_node(LSTM(128, return_sequences=False, dropout_W=0.1, dropout_U=0.1, input_shape=(window, vec_size)), name='right_lstm', input='emb_right') model.add_node(LSTM(128, return_sequences=False, input_shape=(window, vec_size)), name='left_lstm', input='emb_left') model.add_node(LSTM(128, return_sequences=False, input_shape=(window, vec_size)), name='right_lstm', input='emb_right') #Time to Predict
print('inputs_test shape:', inputs_test.shape) print('-') print('queries: integer tensor of shape (samples, max_length)') print('queries_train shape:', queries_train.shape) print('queries_test shape:', queries_test.shape) print('-') print('answers: binary (1 or 0) tensor of shape (samples, vocab_size)') print('answers_train shape:', answers_train.shape) print('answers_test shape:', answers_test.shape) print('-') print('Compiling...') # embed the input sequence into a sequence of vectors input_encoder_m = Sequential() input_encoder_m.add( Embedding(input_dim=vocab_size, output_dim=64, input_length=story_maxlen)) # output: (samples, story_maxlen, embedding_dim) # embed the question into a single vector question_encoder = Sequential() question_encoder.add( Embedding(input_dim=vocab_size, output_dim=64, input_length=query_maxlen)) # output: (samples, query_maxlen, embedding_dim) # compute a 'match' between input sequence elements (which are vectors) # and the question vector match = Sequential() match.add( Merge([input_encoder_m, question_encoder], mode='dot', dot_axes=[(2, ), (2, )])) # output: (samples, story_maxlen, query_maxlen) # embed the input into a single vector with size = story_maxlen:
logger.debug('Loaded ' + str(len(embeddings_index))) logger.debug("Create word matrix") # create a weight matrix for words in training docs embedding_matrix = np.zeros((vocab_size, OUTPUT_DIM)) for word, i in tokenizer.word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector # define the model logger.debug("Model definition") model = Sequential() e = Embedding(vocab_size, OUTPUT_DIM, weights=[embedding_matrix], input_length=SEQUENCE_LENGTH, trainable=False) # model.add(Embedding(vocab_size, OUTPUT_DIM, input_length=SEQUENCE_LENGTH)) model.add(e) model.add(Flatten()) model.add(Dropout(dropout, seed=random_state)) for i in range(KERAS_LAYERS): model.add( Dense(nodes, activation='relu', kernel_constraint=keras.constraints.maxnorm(KERAS_MAXNORM))) model.add(BatchNormalization()) model.add(Dropout(dropout, seed=random_state)) dropout = dropout - 0.1 if dropout < 0.1:
print 'val_X.shape = {}'.format(val_X.shape) print 'val_Y.shape = {}'.format(val_Y.shape) # Let's define and compile CNN model with Keras # Number of feature maps (outputs of convolutional layer) N_fm = 300 # kernel size of convolutional layer kernel_size = 8 model = Sequential() # Embedding layer (lookup table of trainable word vectors) model.add( Embedding(input_dim=W.shape[0], output_dim=W.shape[1], input_length=conv_input_height, weights=[W], W_constraint=unitnorm())) # Reshape word vectors from Embedding to tensor format suitable for Convolutional layer model.add(Reshape((1, conv_input_height, conv_input_width))) # first convolutional layer model.add( Convolution2D(N_fm, kernel_size, conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) # ReLU activation model.add(Activation('relu'))
y_train = np_utils.to_categorical(imdb_train['sentiment'][0:4]) #load pre-trained word embeddings embedding_vectors = loadGloveWordEmbeddings(glove_file) print(len(embedding_vectors)) #get embedding layer weight matrix embedding_weight_matrix = getEmbeddingWeightMatrix(embedding_vectors, tokenizer.word_index) print(embedding_weight_matrix.shape) #build model input = Input(shape=(X_train.shape[1], )) inner = Embedding(input_dim=vocab_size, output_dim=word_embed_size, input_length=seq_maxlen, weights=[embedding_weight_matrix], trainable=False)(input) inner = Conv1D(64, 5, padding='valid', activation='relu', strides=1)(inner) inner = MaxPooling1D(pool_size=4)(inner) inner = Bidirectional(LSTM(100, return_sequences=False)(inner)) inner = Dropout(0.3)(inner) inner = Dense(50, activation='relu')(inner) output = Dense(2, activation='softmax')(inner) model = Model(inputs=input, outputs=output) model.compile(Adam(lr=0.01), 'categorical_crossentropy', metrics=['accuracy']) save_weights = ModelCheckpoint('model.h5', monitor='val_loss', save_best_only=True)
# hash_embedding = hash_embedding.values # hash_embedding = np.concatenate([np.zeros((1,hash_length)),hash_embedding, np.random.rand(1,hash_length)]) random_embedding = pd.read_csv('../preprocessing/random/ner_embeddings.txt', delimiter=' ', header=None) random_embedding = random_embedding.values random_embedding = np.concatenate([ np.zeros((1, hash_length)), random_embedding, np.random.rand(1, hash_length) ]) embed_index_input = Input(shape=(step_length, )) embedding = Embedding(emb_vocab + 2, emb_length, weights=[word_embedding], mask_zero=True, input_length=step_length)(embed_index_input) hash_index_input = Input(shape=(step_length, )) encoder_embedding = Embedding(hash_vocab + 2, hash_length, weights=[random_embedding], mask_zero=True, input_length=step_length)(hash_index_input) pos_input = Input(shape=(step_length, pos_length)) #chunk_input = Input(shape=(step_length, chunk_length)) gazetteer_input = Input(shape=(step_length, gazetteer_length)) senna_hash_pos_chunk_gazetteer_merge = merge(
print('Loading data...') (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2) print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print('Pad sequences (samples x time)') X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) print('Build model...') model = Sequential() model.add(Embedding(max_features, 128, input_length=maxlen, dropout=0.5)) model.add(LSTM(128, dropout_W=0.5, dropout_U=0.1)) # try using a GRU instead, for fun model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('sigmoid')) # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy', optimizer='adam') print('Train...') model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15, validation_data=(X_test, y_test), show_accuracy=True) score, acc = model.evaluate(X_test, y_test, batch_size=batch_size, show_accuracy=True)