def create_model(args, maxlen, vocab): def ortho_reg(weight_matrix): ### orthogonal regularization for aspect embedding matrix ### w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx()) reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye((w_n.shape[0]).eval()))) return args.ortho_reg*reg # 词汇表大小 vocab_size = len(vocab) ##### Inputs ##### # 正例的形状:batch_size * dim, 每个元素是在词汇表中的索引值, 每个句子有多少个词就有多少索引值 # 负例的形状:batch_size * args.neg_size * dim, ditto # 得到w sentence_input = Input(batch_shape=(None, maxlen), dtype='int32', name='sentence_input') neg_input = Input(batch_shape=(None, args.neg_size, maxlen), dtype='int32', name='neg_input') ##### Construct word embedding layer ##### # 嵌入层将正整数(下标)转换为具有固定大小的向量,如[[4],[20]]->[[0.25,0.1],[0.6,-0.2]] # keras.layers.embeddings.Embedding(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, activity_regularizer=None, embeddings_constraint=None, mask_zero=False, input_length=None) word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') ##### Compute sentence representation ##### # 计算句子嵌入,这里设计到keras的很多细节,日后还需要深入学习 e_w = word_emb(sentence_input) y_s = Average()(e_w) att_weights = Attention(name='att_weights')([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) ##### Compute representations of negative instances ##### # 计算负例的z_n e_neg = word_emb(neg_input) z_n = Average()(e_neg) ##### Reconstruction ##### # 重构过程 p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb', W_regularizer=ortho_reg)(p_t) ##### Loss ##### # 损失函数 loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) model = Model(input=[sentence_input, neg_input], output=loss) ### Word embedding and aspect embedding initialization ###### # 如果定义了emb_path, 就用文件中的数值初始化E矩阵, T使用K-means初始化 if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) logger.info('Initializing word embedding matrix') model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights())) logger.info('Initializing aspect embedding matrix as centroid of kmean clusters') model.get_layer('aspect_emb').W.set_value(emb_reader.get_aspect_matrix(args.aspect_size)) return model
def create_model(args, maxlen, vocab): def ortho_reg(weight_matrix): ### orthogonal regularization for aspect embedding matrix ### w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx()) reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value))) return args.ortho_reg*reg vocab_size = len(vocab) ##### Inputs ##### sentence_input = Input(shape=(maxlen,), dtype='int32', name='sentence_input') neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input') ##### Construct word embedding layer ##### word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') ##### Compute sentence representation ##### e_w = word_emb(sentence_input) #y_s = LSTM(args.emb_dim, return_sequences=False)(e_w) y_s = Average()(e_w) att_weights = Attention(name='att_weights')([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) ##### Compute representations of negative instances ##### e_neg = word_emb(neg_input) #z_n = TimeDistributed(LSTM(args.emb_dim, return_sequences=False))(e_neg) z_n = Average()(e_neg) ##### Reconstruction ##### p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb', W_regularizer=ortho_reg)(p_t) ##### Loss ##### loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) model = Model(inputs=[sentence_input, neg_input], outputs=loss) ### Word embedding and aspect embedding initialization ###### if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) logger.info('Initializing word embedding matrix') # model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights()[0])) K.set_value( model.get_layer('word_emb').embeddings, emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(model.get_layer('word_emb').embeddings))) logger.info('Initializing aspect embedding matrix as centroid of kmean clusters') K.set_value( model.get_layer('aspect_emb').W, emb_reader.get_aspect_matrix(args.aspect_size)) return model
def create_model(args, vocab, num_outputs): ############################################################################################################################### ## Create Model # dropout = 0.5 recurrent_dropout = 0.1 vocab_size = len(vocab) ##### Inputs ##### sentence_input = Input(shape=(None, ), dtype='int32', name='sentence_input') word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') output = word_emb(sentence_input) print 'use a rnn layer' output = LSTM(args.rnn_dim, return_sequences=False, dropout=dropout, recurrent_dropout=recurrent_dropout, name='lstm')(output) print 'use 0.5 dropout layer' output = Dropout(0.5)(output) densed = Dense(num_outputs, name='dense')(output) probs = Activation('softmax')(densed) model = Model(inputs=[sentence_input], outputs=probs) ##### Initialization ##### from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_path = '../glove/%s.txt' % (args.domain) emb_reader = EmbReader(emb_path, emb_dim=args.emb_dim) model.get_layer('word_emb').set_weights( emb_reader.get_emb_matrix_given_vocab( vocab, model.get_layer('word_emb').get_weights())) logger.info(' Done') return model
if vocab_path: with open(vocab_path, 'rb') as vocab_file: vocab = pk.load(vocab_file) else: vocab = create_vocab(train_df['text'].values, tokenize_text, to_lower, min_word_freq, emb_words) vocab_size = len(vocab) logger.info(' Vocab size: %i' % (vocab_size)) pd.options.mode.chained_assignment = None train_df.loc[:,'text'] = tokenize_dataset(train_df['text'].values, vocab, tokenize_text, to_lower) dev_df.loc[:,'text'] = tokenize_dataset(dev_df['text'].values, vocab, tokenize_text, to_lower) test_df.loc[:,'text'] = tokenize_dataset(test_df['text'].values, vocab, tokenize_text, to_lower) train_maxlen = train_df['text'].map(len).max() dev_maxlen = dev_df['text'].map(len).max() test_maxlen = test_df['text'].map(len).max() overal_maxlen = max(train_maxlen, dev_maxlen, test_maxlen) return train_df, dev_df, test_df, vocab, overal_maxlen if __name__ == '__main__': from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader('/home/david/data/embed/glove.6B.50d.txt', emb_dim=50) emb_words = emb_reader.load_words() train_df, dev_df, test_df, vocab, overal_maxlen, qwks = get_data('/home/david/data/ats/ets/54147', emb_words=emb_words) print(qwks) print('Done.')
def create_model(args, maxlen, vocab): def ortho_reg(weight_matrix): ### orthogonal regularization for aspect embedding matrix ### w_n = K.l2_normalize(weight_matrix, axis=-1) reg = K.sum( K.square( K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.get_shape().as_list()[0]))) return args.ortho_reg * reg vocab_size = len(vocab) if args.emb_name: from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader( os.path.join("..", "preprocessed_data", args.domain), args.emb_name) aspect_matrix = emb_reader.get_aspect_matrix(args.aspect_size) args.aspect_size = emb_reader.aspect_size args.emb_dim = emb_reader.emb_dim ##### Inputs ##### sentence_input = Input(shape=(maxlen, ), dtype='int32', name='sentence_input') neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input') ##### Construct word embedding layer ##### word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') #, embeddings_constraint=MaxNorm(10)) ##### Compute sentence representation ##### e_w = word_emb(sentence_input) y_s = Average()(e_w) att_weights = Attention(name='att_weights')([e_w, y_s]) #W_constraint=MaxNorm(10), #b_constraint=MaxNorm(10))([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) ##### Compute representations of negative instances ##### e_neg = word_emb(neg_input) z_n = Average()(e_neg) ##### Reconstruction ##### p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) r_s = WeightedAspectEmb( args.aspect_size, args.emb_dim, name='aspect_emb', #W_constraint=MaxNorm(10), W_regularizer=ortho_reg)(p_t) ##### Loss ##### loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) model = Model(inputs=[sentence_input, neg_input], outputs=[loss]) ### Word embedding and aspect embedding initialization ###### if args.emb_name: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing word embedding matrix') embs = model.get_layer('word_emb').embeddings K.set_value( embs, emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(embs))) logger.info( 'Initializing aspect embedding matrix as centroid of kmean clusters' ) K.set_value(model.get_layer('aspect_emb').W, aspect_matrix) return model
def create_model(args, initial_mean_value, overal_maxlen, vocab): import keras.backend as K from keras import layers from keras.layers import * from deepats.my_layers import Attention, Conv1DWithMasking, MeanOverTime, TemporalMeanPooling, MeanPool, GlobalMeanPooling from keras.models import Sequential, Model from keras.initializers import Constant ############################################################################################################################### ## Create Model # vocab_size = len(vocab) dropout_W = 0.5 # default=0.5 dropout_U = 0.1 # default=0.1 cnn_border_mode='same' if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) if args.model_type == 'cls': raise NotImplementedError elif args.model_type == 'rwa': logger.info('Building a RWA model') from deepats.rwa import RWA # from deepats.RWACell import RWACell as RWA model = Sequential() model.add(Embedding(vocab_size, args.emb_dim)) for i in range(args.stack-1): model.add(LSTM(args.rnn_dim, return_sequences=True, dropout=dropout_W, recurrent_dropout=dropout_U)) model.add(Dropout(args.dropout_prob)) model.add(RWA(args.rnn_dim)) #model.add(Bidirectional(RWA(args.rnn_dim), merge_mode='ave'))# {'sum', 'mul', 'concat', 'ave'***, None} model.add(Dropout(args.dropout_prob)) bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.add(Dense(num_outputs, bias_initializer=Constant(value=bias_value))) #model.add(Activation('sigmoid')) model.add(Activation('tanh')) model.emb_index = 0 elif args.model_type == 'regp': logger.info('Building an LSTM REGRESSION model with POOLING') POOL=2 #2 if POOL==1: mask_zero=False else: mask_zero=True model = Sequential() model.add(Embedding(vocab_size, args.emb_dim, mask_zero=mask_zero)) for i in range(args.stack): model.add(LSTM(args.rnn_dim, return_sequences=True, dropout=dropout_W, recurrent_dropout=dropout_U)) model.add(Dropout(args.dropout_prob)) ## MEAN POOLING. if POOL==1: model.add(GlobalAveragePooling1D()) elif POOL==2: model.add(MeanOverTime())#A/B elif POOL==3: model.add(TemporalMeanPooling()) elif POOL==4: model.add(MeanPool()) elif POOL==5: model.add(GlobalMeanPooling()) bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.add(Dense(num_outputs, bias_initializer=Constant(value=bias_value))) model.add(Activation('sigmoid')) #model.add(Activation('tanh')) model.emb_index = 0 elif args.model_type == 'regp_ORIG': logger.info('Building a REGRESSION model with POOLING') model = Sequential() model.add(Embedding(vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add(LSTM(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) if args.aggregation == 'mot': model.add(MeanOverTime(mask_zero=True)) elif args.aggregation.startswith('att'): model.add(Attention(op=args.aggregation, activation='tanh', init_stdev=0.01)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) model.add(Activation('sigmoid')) model.emb_index = 0 logger.info(' Done') ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) emb_reader.load_embeddings(vocab) emb_wts = emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].get_weights()[0]) wts = model.layers[model.emb_index].get_weights() wts[0] = emb_wts model.layers[model.emb_index].set_weights(wts) logger.info(' Done') return model
def create_model(args, kstep, node_size): def ortho_reg(weight_matrix): ### orthogonal regularization for aspect embedding matrix ### w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx()) reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].eval()))) return args.ortho_reg*reg ##### Inputs ##### sentence_input = Input(shape=(kstep, node_size), dtype='float32', name='sentence_input') neg_input = Input(shape=(args.neg_size, kstep, node_size), dtype='float32', name='neg_input') print("sentence_input.ndim", sentence_input.ndim) print("neg_input.ndim", neg_input.ndim) e_w = sentence_input y_s = Average()(sentence_input) print(y_s.ndim) print(e_w.ndim) print(K.int_shape(e_w), K.int_shape(y_s)) att_weights = Attention(name='att_weights')([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) print("z_s----------- %d", (z_s.ndim)) ##### Compute representations of negative instances ##### #e_neg = word_emb(neg_input) e_neg = neg_input z_n = Average()(e_neg) print("e_neg.ndim") print(e_neg.ndim) print("z_n.ndim") print(z_n.ndim) ##### Reconstruction ##### p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) r_s = WeightedAspectEmb(args.aspect_size, 2405, name='aspect_emb', W_regularizer=ortho_reg)(p_t) ##### Loss ##### print("losssssssssssssssssssssssssssssssssssssssssssssssssssssssssss") print(K.int_shape(z_s), K.int_shape(r_s)) loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) model = Model(input=[sentence_input, neg_input], output=loss) ### Word embedding and aspect embedding initialization ###### if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) logger.info('Initializing word embedding matrix') model.get_layer('word_emb').W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').W.get_value())) logger.info('Initializing aspect embedding matrix as centroid of kmean clusters') model.get_layer('aspect_emb').W.set_value(emb_reader.get_aspect_matrix(args.aspect_size)) return model
def create_model(args, maxlen, vocab): def ortho_reg(weight_matrix): ### orthogonal regularization for aspect embedding matrix ### w_n = K.l2_normalize(weight_matrix, axis=-1) # K表示调用该函数的当前layer reg = K.sum( K.square( K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value))) # 自身矩阵的内积的平方根-自身特征值 = 越小, 越说明分量为0 return args.ortho_reg * reg # 这东西越小越好, 因为能保证各个特征分的越开 vocab_size = len(vocab) if args.emb_name: # 获取已经保存的embedding??? from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader( os.path.join( "/content/drive/My Drive/Attention-Based-Aspect-Extraction-master", "preprocessed_data", args.domain), args.emb_name) aspect_matrix = emb_reader.get_aspect_matrix(args.aspect_size) args.aspect_size = emb_reader.aspect_size args.emb_dim = emb_reader.emb_dim ##### Inputs ##### sentence_input = Input(shape=(maxlen, ), dtype='int32', name='sentence_input') neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input') ##### Construct word embedding layer ##### word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb', embeddings_constraint=MaxNorm(10)) ##### Compute sentence representation ##### pre-processing 根据attention组合句子 e_w = word_emb(sentence_input) # 将input转换为embedding y_s = Average()(e_w) # 默认求平均 layer att_weights = Attention(name='att_weights', W_constraint=MaxNorm(10), b_constraint=MaxNorm(10))([e_w, y_s]) # attention layer z_s = WeightedSum()([e_w, att_weights]) # encoding layer ##### Compute representations of negative instances ##### 增加准确性的tricks e_neg = word_emb(neg_input) z_n = Average()(e_neg) ##### Reconstruction ##### 构建dense层, 希望能够decoding attention sentences的特征 p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) # softmax一下, nodes数量不改变, 数值被soft了一下 r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb', W_constraint=MaxNorm(10), W_regularizer=ortho_reg)( p_t) # 标准化0-10区间, 且正则项为自定义的ortho_reg ##### Loss ##### loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) # 自定义loss function??? 这是在做啥??? model = Model(inputs=[sentence_input, neg_input], outputs=[loss]) # negative input是需要自己分开数据集的吗?? ### Word embedding and aspect embedding initialization ###### if args.emb_name: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing word embedding matrix') embs = model.get_layer('word_emb').embeddings K.set_value( embs, emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(embs))) logger.info( 'Initializing aspect embedding matrix as centroid of kmean clusters' ) # 为何初始化要用到kmeans K.set_value(model.get_layer('aspect_emb').W, aspect_matrix) # r-s return model
concat_axis=-1) densed = Dense(1)(merged) score = Activation('sigmoid')(densed) model = Model(input=[sequenceQn, sequenceAns], output=score) # get the WordEmbedding layer index model.emb_index = 0 model_layer_index = 0 for test in model.layers: if (test.name == 'QnEmbedding' or test.name == 'AnsEmbedding'): model.emb_index = model_layer_index # Initialize embeddings if requested if emb_path: logger.info('Initializing lookup table') emb_reader = EmbReader(emb_path, emb_dim=emb_dim) model.layers[model.emb_index].W.set_value( emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].W.get_value())) model_layer_index += 1 if model_type == 'cnnwang2016': logger.info( 'Building a CNN model (Zhiguo Wang, 2016) with S+,S-,T+,T- as input, and MaxPooling)' ) from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge, pooling assert cnn_dim > 0 cnn_border_mode = 'same'
vocab, train_x, test_x, overall_maxlen = dataset.get_data(args.domain, vocab_size=args.vocab_size, maxlen=args.maxlen) vocab_inv = {ind:w for w,ind in vocab.items()} model = Model(args, maxlen, vocab) sen_gen = sentence_batch_generator(trainx, batch_size) neg_gen = negative_batch_generator(trainx, batch_size, args.neg_size) batches_per_epoch = 1000 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader(args.emb_path, emb_dim = args.emb_dim) word_emb = sess.run(model.word_emb) sess.run(tf.assign(model.word_emb, emb_reader.get_emb_matrix_given_vocab(vocab, word_emb))) sess.run(tf.assign(model.aspect_emb, emb_reader.get_aspect_matrix(args.aspect_size))) checkpoint_dir = './ckpt/' min_loss = float('inf') for ii in range(args.epochs): loss,max_margin_loss = 0.,0. for b in range(batches_per_epoch):
def create_model(args, initial_mean_value, overal_maxlen, vocab): import keras.backend as K from keras.layers.embeddings import Embedding from keras.models import Sequential, Model from keras.layers.core import Dense, Dropout, Activation from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # dropout_W = 0.5 # default=0.5 dropout_U = 0.1 # default=0.1 cnn_border_mode='same' if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) if args.model_type == 'cls': raise NotImplementedError elif args.model_type == 'reg': logger.info('Building a REGRESSION model') model = Sequential() model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add(RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].bias = bias_value model.add(Activation('sigmoid')) model.emb_index = 0 elif args.model_type == 'regp': logger.info('Building a REGRESSION model with POOLING') model = Sequential() model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add(RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) if args.aggregation == 'mot': model.add(MeanOverTime(mask_zero=True)) elif args.aggregation.startswith('att'): model.add(Attention(op=args.aggregation, activation='tanh', init_stdev=0.01)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].bias = bias_value model.add(Activation('sigmoid')) model.emb_index = 0 elif args.model_type == 'breg': logger.info('Building a BIDIRECTIONAL REGRESSION model') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() sequence = Input(shape=(overal_maxlen,), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) merged = merge([forwards, backwards], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 elif args.model_type == 'bregp': logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() sequence = Input(shape=(overal_maxlen,), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) forwards_mean = MeanOverTime(mask_zero=True)(forwards) backwards_mean = MeanOverTime(mask_zero=True)(backwards) merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 logger.info(' Done') ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) model.layers[model.emb_index].set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].get_weights())) logger.info(' Done') return model
def create_model(args, vocab, num_outputs, overal_maxlen, maxlen_aspect): ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # dropout = args.dropout_W recurrent_dropout = args.dropout_U vocab_size = len(vocab) logger.info('Building a LSTM attention model to predict term/aspect sentiment') print '\n\n' ##### Inputs ##### sentence_input = Input(shape=(overal_maxlen,), dtype='int32', name='sentence_input') aspect_input = Input(shape=(maxlen_aspect,), dtype='int32', name='aspect_input') pretrain_input = Input(shape=(None,), dtype='int32', name='pretrain_input') ##### construct word embedding layer ##### word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') ### represent aspect as averaged word embedding ### print 'use average term embs as aspect embedding' aspect_term_embs = word_emb(aspect_input) aspect_embs = Average(mask_zero=True, name='aspect_emb')(aspect_term_embs) ### sentence representation ### sentence_output = word_emb(sentence_input) pretrain_output = word_emb(pretrain_input) print 'use a rnn layer' rnn = RNN(args.rnn_dim, return_sequences=True, dropout=dropout, recurrent_dropout=recurrent_dropout, name='lstm') sentence_output = rnn(sentence_output) pretrain_output = rnn(pretrain_output) print 'use content attention to get term weights' att_weights = Attention(name='att_weights')([sentence_output, aspect_embs]) sentence_output = WeightedSum()([sentence_output, att_weights]) pretrain_output = Average(mask_zero=True)(pretrain_output) if args.dropout_prob > 0: print 'use dropout layer' sentence_output = Dropout(args.dropout_prob)(sentence_output) pretrain_output = Dropout(args.dropout_prob)(pretrain_output) sentence_output = Dense(num_outputs, name='dense_1')(sentence_output) pretrain_output = Dense(num_outputs, name='dense_2')(pretrain_output) aspect_probs = Activation('softmax', name='aspect_model')(sentence_output) doc_probs = Activation('softmax', name='pretrain_model')(pretrain_output) model = Model(inputs=[sentence_input, aspect_input, pretrain_input], outputs=[aspect_probs, doc_probs]) logger.info(' Done') ############################################################################################################################### ## Initialize embeddings if requested # if args.is_pretrain: import pickle print 'Set embedding, lstm, and dense weights from pre-trained models' if args.domain == 'lt': f_1 = open('../pretrained_weights/lstm_weights_lt%.1f.pkl'%(args.percetage), 'rb') f_2 = open('../pretrained_weights/dense_weights_lt%.1f.pkl'%(args.percetage), 'rb') else: f_1 = open('../pretrained_weights/lstm_weights_res%.1f.pkl'%(args.percetage), 'rb') f_2 = open('../pretrained_weights/dense_weights_res%.1f.pkl'%(args.percetage), 'rb') lstm_weights = pickle.load(f_1) dense_weights = pickle.load(f_2) model.get_layer('lstm').set_weights(lstm_weights) model.get_layer('dense_1').set_weights(dense_weights) model.get_layer('dense_2').set_weights(dense_weights) from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_path = '../glove/%s.txt'%(args.domain) emb_reader = EmbReader(args, emb_path) model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights())) logger.info(' Done') return model
def create_model(args, initial_mean_value, overal_maxlen, vocab): import keras.backend as K from keras.layers.embeddings import Embedding from keras.models import Sequential, Model from keras.layers.core import Dense, Dropout, Activation from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # dropout_W = 0.5 # default=0.5 dropout_U = 0.1 # default=0.1 cnn_border_mode = 'same' if initial_mean_value.ndim == 0: #expand the dims initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) #预测的分数种类数 if args.model_type == 'cls': raise NotImplementedError #embedding-->cnn-->rnn(return_sequence=false)-->dropout-->dense-->sigmoid elif args.model_type == 'reg': logger.info('Building a REGRESSION model') model = Sequential() #确定是否将输入中的‘0’看作是应该被忽略的‘填充’(padding)值设置为True的话,模型中后续的层必须都支持masking,否则会抛出异常。 #如果该值为True,则下标0在字典中不可用,input_dim应设置为|vocabulary| + 1 #此处,input层省略是因为input_length有默认值 model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: #border_mode==padding?? subsample_length==pooling?? where is the activation?? model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: #return_sequence 只返回最后一个 state model.add( RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) model.add(Dense(num_outputs)) if not args.skip_init_bias: #初始化最后一层layer的bias bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) model.add(Activation('sigmoid')) #输出区间为(0,1) #设置model的embed层的序号,方便后续用预训练词向量的初始化,model的所有层都存在 model.layers 里 model.emb_index = 0 #embedding-->cnn-->rnn(return_sequence=true)-->dropout-->MeanoverTime or Attention(mean or sum)-->Dense-->sigmoid elif args.model_type == 'regp': logger.info('Building a REGRESSION model with POOLING') model = Sequential() model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add( RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) if args.aggregation == 'mot': model.add(MeanOverTime(mask_zero=True)) elif args.aggregation.startswith('att'): model.add( Attention(op=args.aggregation, activation='tanh', init_stdev=0.01)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) model.add(Activation('sigmoid')) model.emb_index = 0 #embedding-->cnn-->birnn(return_sequence=false)-->dropout-->merge(concat the forRnn&backRnn)-->dense-->sigmoid elif args.model_type == 'breg': logger.info('Building a BIDIRECTIONAL REGRESSION model') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() #这句应该是多余的 sequence = Input(shape=(overal_maxlen, ), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) merged = merge([forwards, backwards], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 #embedding-->cnn-->biRnn(return_sequence=true)-->dropout-->meanOverTime-->merge(concat)-->dense-->sigmoid elif args.model_type == 'bregp': logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() #多余的 sequence = Input(shape=(overal_maxlen, ), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) forwards_mean = MeanOverTime(mask_zero=True)(forwards) backwards_mean = MeanOverTime(mask_zero=True)(backwards) merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 logger.info(' Done') ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) model.layers[model.emb_index].W.set_value( emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].W.get_value())) logger.info(' Done') return model