def create_model(args, maxlen, vocab): def ortho_reg(weight_matrix): ### orthogonal regularization for aspect embedding matrix ### w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx()) reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye((w_n.shape[0]).eval()))) return args.ortho_reg*reg # 词汇表大小 vocab_size = len(vocab) ##### Inputs ##### # 正例的形状:batch_size * dim, 每个元素是在词汇表中的索引值, 每个句子有多少个词就有多少索引值 # 负例的形状:batch_size * args.neg_size * dim, ditto # 得到w sentence_input = Input(batch_shape=(None, maxlen), dtype='int32', name='sentence_input') neg_input = Input(batch_shape=(None, args.neg_size, maxlen), dtype='int32', name='neg_input') ##### Construct word embedding layer ##### # 嵌入层将正整数(下标)转换为具有固定大小的向量,如[[4],[20]]->[[0.25,0.1],[0.6,-0.2]] # keras.layers.embeddings.Embedding(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, activity_regularizer=None, embeddings_constraint=None, mask_zero=False, input_length=None) word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') ##### Compute sentence representation ##### # 计算句子嵌入,这里设计到keras的很多细节,日后还需要深入学习 e_w = word_emb(sentence_input) y_s = Average()(e_w) att_weights = Attention(name='att_weights')([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) ##### Compute representations of negative instances ##### # 计算负例的z_n e_neg = word_emb(neg_input) z_n = Average()(e_neg) ##### Reconstruction ##### # 重构过程 p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb', W_regularizer=ortho_reg)(p_t) ##### Loss ##### # 损失函数 loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) model = Model(input=[sentence_input, neg_input], output=loss) ### Word embedding and aspect embedding initialization ###### # 如果定义了emb_path, 就用文件中的数值初始化E矩阵, T使用K-means初始化 if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) logger.info('Initializing word embedding matrix') model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights())) logger.info('Initializing aspect embedding matrix as centroid of kmean clusters') model.get_layer('aspect_emb').W.set_value(emb_reader.get_aspect_matrix(args.aspect_size)) return model
def create_model(args, maxlen, vocab): def ortho_reg(weight_matrix): ### orthogonal regularization for aspect embedding matrix ### w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx()) reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value))) return args.ortho_reg*reg vocab_size = len(vocab) ##### Inputs ##### sentence_input = Input(shape=(maxlen,), dtype='int32', name='sentence_input') neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input') ##### Construct word embedding layer ##### word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') ##### Compute sentence representation ##### e_w = word_emb(sentence_input) #y_s = LSTM(args.emb_dim, return_sequences=False)(e_w) y_s = Average()(e_w) att_weights = Attention(name='att_weights')([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) ##### Compute representations of negative instances ##### e_neg = word_emb(neg_input) #z_n = TimeDistributed(LSTM(args.emb_dim, return_sequences=False))(e_neg) z_n = Average()(e_neg) ##### Reconstruction ##### p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb', W_regularizer=ortho_reg)(p_t) ##### Loss ##### loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) model = Model(inputs=[sentence_input, neg_input], outputs=loss) ### Word embedding and aspect embedding initialization ###### if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) logger.info('Initializing word embedding matrix') # model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights()[0])) K.set_value( model.get_layer('word_emb').embeddings, emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(model.get_layer('word_emb').embeddings))) logger.info('Initializing aspect embedding matrix as centroid of kmean clusters') K.set_value( model.get_layer('aspect_emb').W, emb_reader.get_aspect_matrix(args.aspect_size)) return model
############################################################################################################################### ## Building model sentence_input = Input(shape=(args.kstep, node_size), dtype='float32', name='sentence_inputt') neg_input = Input(shape=(args.neg_size, args.kstep, node_size), dtype='float32', name='neg_inputt') predict = Input(shape=(17, ), dtype='int32', name='predictt') e_w = sentence_input y_s = Average()(sentence_input) att_weights = Attention(name='att_weights')([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) ##### Compute representations of negative instances ##### e_neg = neg_input z_n = Average()(e_neg) ##### Reconstruction2 ##### dense1 = Dense(512, activation='relu')(z_s) #dense1 = noise.GaussianNoise(0.09)(dense1) p_t = Dense(128, activation='relu')(dense1) dense2 = Dense(512, activation='relu')(p_t) new_p_t = Activation('softmax', name='p_t')(dense2) r_s = Dense(node_size, init='uniform')(new_p_t) ##### Loss1 #####
def create_model(args, maxlen, vocab): def ortho_reg(weight_matrix): ### orthogonal regularization for aspect embedding matrix ### w_n = K.l2_normalize(weight_matrix, axis=-1) reg = K.sum( K.square( K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.get_shape().as_list()[0]))) return args.ortho_reg * reg vocab_size = len(vocab) if args.emb_name: from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader( os.path.join("..", "preprocessed_data", args.domain), args.emb_name) aspect_matrix = emb_reader.get_aspect_matrix(args.aspect_size) args.aspect_size = emb_reader.aspect_size args.emb_dim = emb_reader.emb_dim ##### Inputs ##### sentence_input = Input(shape=(maxlen, ), dtype='int32', name='sentence_input') neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input') ##### Construct word embedding layer ##### word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') #, embeddings_constraint=MaxNorm(10)) ##### Compute sentence representation ##### e_w = word_emb(sentence_input) y_s = Average()(e_w) att_weights = Attention(name='att_weights')([e_w, y_s]) #W_constraint=MaxNorm(10), #b_constraint=MaxNorm(10))([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) ##### Compute representations of negative instances ##### e_neg = word_emb(neg_input) z_n = Average()(e_neg) ##### Reconstruction ##### p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) r_s = WeightedAspectEmb( args.aspect_size, args.emb_dim, name='aspect_emb', #W_constraint=MaxNorm(10), W_regularizer=ortho_reg)(p_t) ##### Loss ##### loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) model = Model(inputs=[sentence_input, neg_input], outputs=[loss]) ### Word embedding and aspect embedding initialization ###### if args.emb_name: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing word embedding matrix') embs = model.get_layer('word_emb').embeddings K.set_value( embs, emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(embs))) logger.info( 'Initializing aspect embedding matrix as centroid of kmean clusters' ) K.set_value(model.get_layer('aspect_emb').W, aspect_matrix) return model
def create_model(args, kstep, node_size): def ortho_reg(weight_matrix): ### orthogonal regularization for aspect embedding matrix ### w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx()) reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].eval()))) return args.ortho_reg*reg ##### Inputs ##### sentence_input = Input(shape=(kstep, node_size), dtype='float32', name='sentence_input') neg_input = Input(shape=(args.neg_size, kstep, node_size), dtype='float32', name='neg_input') print("sentence_input.ndim", sentence_input.ndim) print("neg_input.ndim", neg_input.ndim) e_w = sentence_input y_s = Average()(sentence_input) print(y_s.ndim) print(e_w.ndim) print(K.int_shape(e_w), K.int_shape(y_s)) att_weights = Attention(name='att_weights')([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) print("z_s----------- %d", (z_s.ndim)) ##### Compute representations of negative instances ##### #e_neg = word_emb(neg_input) e_neg = neg_input z_n = Average()(e_neg) print("e_neg.ndim") print(e_neg.ndim) print("z_n.ndim") print(z_n.ndim) ##### Reconstruction ##### p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) r_s = WeightedAspectEmb(args.aspect_size, 2405, name='aspect_emb', W_regularizer=ortho_reg)(p_t) ##### Loss ##### print("losssssssssssssssssssssssssssssssssssssssssssssssssssssssssss") print(K.int_shape(z_s), K.int_shape(r_s)) loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) model = Model(input=[sentence_input, neg_input], output=loss) ### Word embedding and aspect embedding initialization ###### if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) logger.info('Initializing word embedding matrix') model.get_layer('word_emb').W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').W.get_value())) logger.info('Initializing aspect embedding matrix as centroid of kmean clusters') model.get_layer('aspect_emb').W.set_value(emb_reader.get_aspect_matrix(args.aspect_size)) return model
def create_model(args, maxlen, vocab): def ortho_reg(weight_matrix): ### orthogonal regularization for aspect embedding matrix ### w_n = K.l2_normalize(weight_matrix, axis=-1) # K表示调用该函数的当前layer reg = K.sum( K.square( K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value))) # 自身矩阵的内积的平方根-自身特征值 = 越小, 越说明分量为0 return args.ortho_reg * reg # 这东西越小越好, 因为能保证各个特征分的越开 vocab_size = len(vocab) if args.emb_name: # 获取已经保存的embedding??? from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader( os.path.join( "/content/drive/My Drive/Attention-Based-Aspect-Extraction-master", "preprocessed_data", args.domain), args.emb_name) aspect_matrix = emb_reader.get_aspect_matrix(args.aspect_size) args.aspect_size = emb_reader.aspect_size args.emb_dim = emb_reader.emb_dim ##### Inputs ##### sentence_input = Input(shape=(maxlen, ), dtype='int32', name='sentence_input') neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input') ##### Construct word embedding layer ##### word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb', embeddings_constraint=MaxNorm(10)) ##### Compute sentence representation ##### pre-processing 根据attention组合句子 e_w = word_emb(sentence_input) # 将input转换为embedding y_s = Average()(e_w) # 默认求平均 layer att_weights = Attention(name='att_weights', W_constraint=MaxNorm(10), b_constraint=MaxNorm(10))([e_w, y_s]) # attention layer z_s = WeightedSum()([e_w, att_weights]) # encoding layer ##### Compute representations of negative instances ##### 增加准确性的tricks e_neg = word_emb(neg_input) z_n = Average()(e_neg) ##### Reconstruction ##### 构建dense层, 希望能够decoding attention sentences的特征 p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) # softmax一下, nodes数量不改变, 数值被soft了一下 r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb', W_constraint=MaxNorm(10), W_regularizer=ortho_reg)( p_t) # 标准化0-10区间, 且正则项为自定义的ortho_reg ##### Loss ##### loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) # 自定义loss function??? 这是在做啥??? model = Model(inputs=[sentence_input, neg_input], outputs=[loss]) # negative input是需要自己分开数据集的吗?? ### Word embedding and aspect embedding initialization ###### if args.emb_name: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing word embedding matrix') embs = model.get_layer('word_emb').embeddings K.set_value( embs, emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(embs))) logger.info( 'Initializing aspect embedding matrix as centroid of kmean clusters' ) # 为何初始化要用到kmeans K.set_value(model.get_layer('aspect_emb').W, aspect_matrix) # r-s return model
def create_model(args, maxlen, vocab): def ortho_reg(weight_matrix): ### orthogonal regularization for aspect embedding matrix ### w_n = K.l2_normalize(weight_matrix, axis=-1) reg = K.sum( K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value))) return args.ortho_reg * reg vocab_size = len(vocab) if args.emb_name: if args.emb_technique == "w2v": logger.info("Load {} glove embedding for {}".format( args.lang, config.word_emb_training_type)) if args.lang == 'en': emb_reader = EmbReader( config.emb_dir_en["w2v"].format( config.word_emb_training_type), args.emb_name) elif args.lang == 'de': #emb_reader = EmbReader(config.emb_dir_de["w2v"].format(config.word_emb_training_type), #args.emb_name) emb_reader = FineTuneEmbed_cca( '../preprocessed_data/german/w2v/fine_tuned', 'w2v_emb', '../preprocessed_data/german/w2v/full_trained', 'w2v_embedding_300') elif args.emb_technique == 'fasttext': if args.lang == 'en': #emb_reader = FastTextEmbReader(config.emb_dir_en["fasttext"].format(config.word_emb_training_type), #args.emb_name, config.fine_tuned_enabled) emb_reader = FineTuneEmbed_ortho_procrustes( '../preprocessed_data/fasttext/fine_tuned', 'fasttext_pre_trained', '../preprocessed_data/fasttext/full_trained', 'w2v_embedding_skipgram_300') elif args.lang == 'de': emb_reader = FastTextEmbReader( config.emb_dir_de["fasttext"].format( config.word_emb_training_type), args.emb_name, config.fine_tuned_enabled) #emb_reader = FineTuneEmbed_ortho_procrustes('../preprocessed_data/fasttext/fine_tuned','fasttext_pre_trained','../preprocessed_data/fasttext/full_trained', 'w2v_embedding_skipgram_300') elif args.emb_technique == "glove": if args.lang == 'de': logger.info('Load german glove embedding') emb_reader = GloveEmbedding(config.emb_dir_de["glove"], args.emb_name) else: logger.info("Load en glove embedding for {}".format( config.word_emb_training_type)) emb_reader = GloveEmbedding( config.emb_dir_en["glove"].format( config.word_emb_training_type), args.emb_name) elif args.emb_technique == "MUSE_supervised": emb_reader = MUSEEmbedding(config.emb_dir_biling['supervised'], args.emb_name) elif args.emb_technique == "MUSE_unsupervised": emb_reader = MUSEEmbedding(config.emb_dir_biling['unsupervised'], args.emb_name) aspect_matrix = emb_reader.get_aspect_matrix(args.aspect_size) args.aspect_size = emb_reader.aspect_size args.emb_dim = emb_reader.emb_dim ##### Inputs ##### sentence_input = Input(shape=(maxlen, ), dtype='int32', name='sentence_input') neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input') ##### Construct word embedding layer ##### word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb', embeddings_constraint=MaxNorm(10)) ##### Compute sentence representation ##### e_w = word_emb(sentence_input) y_s = Average()(e_w) att_weights = Attention(name='att_weights', W_constraint=MaxNorm(10), b_constraint=MaxNorm(10))([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) ##### Compute representations of negative instances ##### e_neg = word_emb(neg_input) z_n = Average()(e_neg) ##### Reconstruction ##### p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb', W_constraint=MaxNorm(10), W_regularizer=ortho_reg)(p_t) ##### Loss ##### loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) model = Model(inputs=[sentence_input, neg_input], outputs=[loss]) ### Word embedding and aspect embedding initialization ###### if args.emb_name: logger.info('Initializing word embedding matrix') embs = model.get_layer('word_emb').embeddings K.set_value( embs, emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(embs))) logger.info( 'Initializing aspect embedding matrix as centroid of kmean clusters' ) K.set_value(model.get_layer('aspect_emb').W, aspect_matrix) return model
def create_model(args, vocab, num_outputs, overal_maxlen, maxlen_aspect): ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # dropout = args.dropout_W recurrent_dropout = args.dropout_U vocab_size = len(vocab) logger.info('Building a LSTM attention model to predict term/aspect sentiment') print '\n\n' ##### Inputs ##### sentence_input = Input(shape=(overal_maxlen,), dtype='int32', name='sentence_input') aspect_input = Input(shape=(maxlen_aspect,), dtype='int32', name='aspect_input') pretrain_input = Input(shape=(None,), dtype='int32', name='pretrain_input') ##### construct word embedding layer ##### word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') ### represent aspect as averaged word embedding ### print 'use average term embs as aspect embedding' aspect_term_embs = word_emb(aspect_input) aspect_embs = Average(mask_zero=True, name='aspect_emb')(aspect_term_embs) ### sentence representation ### sentence_output = word_emb(sentence_input) pretrain_output = word_emb(pretrain_input) print 'use a rnn layer' rnn = RNN(args.rnn_dim, return_sequences=True, dropout=dropout, recurrent_dropout=recurrent_dropout, name='lstm') sentence_output = rnn(sentence_output) pretrain_output = rnn(pretrain_output) print 'use content attention to get term weights' att_weights = Attention(name='att_weights')([sentence_output, aspect_embs]) sentence_output = WeightedSum()([sentence_output, att_weights]) pretrain_output = Average(mask_zero=True)(pretrain_output) if args.dropout_prob > 0: print 'use dropout layer' sentence_output = Dropout(args.dropout_prob)(sentence_output) pretrain_output = Dropout(args.dropout_prob)(pretrain_output) sentence_output = Dense(num_outputs, name='dense_1')(sentence_output) pretrain_output = Dense(num_outputs, name='dense_2')(pretrain_output) aspect_probs = Activation('softmax', name='aspect_model')(sentence_output) doc_probs = Activation('softmax', name='pretrain_model')(pretrain_output) model = Model(inputs=[sentence_input, aspect_input, pretrain_input], outputs=[aspect_probs, doc_probs]) logger.info(' Done') ############################################################################################################################### ## Initialize embeddings if requested # if args.is_pretrain: import pickle print 'Set embedding, lstm, and dense weights from pre-trained models' if args.domain == 'lt': f_1 = open('../pretrained_weights/lstm_weights_lt%.1f.pkl'%(args.percetage), 'rb') f_2 = open('../pretrained_weights/dense_weights_lt%.1f.pkl'%(args.percetage), 'rb') else: f_1 = open('../pretrained_weights/lstm_weights_res%.1f.pkl'%(args.percetage), 'rb') f_2 = open('../pretrained_weights/dense_weights_res%.1f.pkl'%(args.percetage), 'rb') lstm_weights = pickle.load(f_1) dense_weights = pickle.load(f_2) model.get_layer('lstm').set_weights(lstm_weights) model.get_layer('dense_1').set_weights(dense_weights) model.get_layer('dense_2').set_weights(dense_weights) from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_path = '../glove/%s.txt'%(args.domain) emb_reader = EmbReader(args, emb_path) model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights())) logger.info(' Done') return model
def create_model(args, vocab, nb_class, overall_maxlen, doc_maxlen_1, doc_maxlen_2): # Funtion that initializes word embeddings def init_emb(emb_matrix, vocab, emb_file_gen, emb_file_domain): print 'Loading pretrained general word embeddings and domain word embeddings ...' counter_gen = 0. pretrained_emb = open(emb_file_gen) for line in pretrained_emb: tokens = line.split() if len(tokens) != 301: continue word = tokens[0] vec = tokens[1:] try: emb_matrix[0][vocab[word]][:300] = vec counter_gen += 1 except KeyError: pass if args.use_domain_emb: counter_domain = 0. pretrained_emb = open(emb_file_domain) for line in pretrained_emb: tokens = line.split() if len(tokens) != 101: continue word = tokens[0] vec = tokens[1:] try: emb_matrix[0][vocab[word]][300:] = vec counter_domain += 1 except KeyError: pass pretrained_emb.close() logger.info( '%i/%i word vectors initialized by general embeddings (hit rate: %.2f%%)' % (counter_gen, len(vocab), 100 * counter_gen / len(vocab))) if args.use_domain_emb: logger.info( '%i/%i word vectors initialized by domain embeddings (hit rate: %.2f%%)' % (counter_domain, len(vocab), 100 * counter_domain / len(vocab))) return emb_matrix # Build model logger.info('Building model ...') print 'Building model ...' print '\n\n' vocab_size = len(vocab) ################################### # Inputs ################################### print 'Input layer' # sequence of token indices for aspect-level data sentence_input = Input(shape=(overall_maxlen, ), dtype='int32', name='sentence_input') # gold opinion label for aspect-level data. op_label_input = Input(shape=(overall_maxlen, 3), dtype=K.floatx(), name='op_label_input') # probability of sending gold opinion labels at opinion transmission step p_gold_op = Input(shape=(overall_maxlen, ), dtype=K.floatx(), name='p_gold_op') if args.use_doc: # doc_input_1 denotes the data for sentiment classification # doc_input_2 denotes the data for domain classification doc_input_1 = Input(shape=(doc_maxlen_1, ), dtype='int32', name='doc_input_1') doc_input_2 = Input(shape=(doc_maxlen_2, ), dtype='int32', name='doc_input_2') ######################################### # Shared word embedding layer ######################################### print 'Word embedding layer' word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') # aspect-level inputs word_embeddings = word_emb(sentence_input) sentence_output = word_embeddings # doc-level inputs if args.use_doc: doc_output_1 = word_emb(doc_input_1) # we only use general embedding for domain classification doc_output_2 = word_emb(doc_input_2) if args.use_domain_emb: # mask out the domain embeddings doc_output_2 = Remove_domain_emb()(doc_output_2) ###################################### # Shared CNN layers ###################################### for i in xrange(args.shared_layers): print 'Shared CNN layer %s' % i sentence_output = Dropout(args.dropout_prob)(sentence_output) if args.use_doc: doc_output_1 = Dropout(args.dropout_prob)(doc_output_1) doc_output_2 = Dropout(args.dropout_prob)(doc_output_2) if i == 0: conv_1 = Conv1DWithMasking(filters=args.cnn_dim/2, kernel_size=3, \ activation='relu', padding='same', kernel_initializer=my_init, name='cnn_0_1') conv_2 = Conv1DWithMasking(filters=args.cnn_dim/2, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='cnn_0_2') sentence_output_1 = conv_1(sentence_output) sentence_output_2 = conv_2(sentence_output) sentence_output = Concatenate()( [sentence_output_1, sentence_output_2]) if args.use_doc: doc_output_1_1 = conv_1(doc_output_1) doc_output_1_2 = conv_2(doc_output_1) doc_output_1 = Concatenate()([doc_output_1_1, doc_output_1_2]) doc_output_2_1 = conv_1(doc_output_2) doc_output_2_2 = conv_2(doc_output_2) doc_output_2 = Concatenate()([doc_output_2_1, doc_output_2_2]) else: conv = Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='cnn_%s'%i) sentence_output = conv(sentence_output) if args.use_doc: doc_output_1 = conv(doc_output_1) doc_output_2 = conv(doc_output_2) word_embeddings = Concatenate()([word_embeddings, sentence_output]) init_shared_features = sentence_output ####################################### # Define task-specific layers ####################################### # AE specific layers aspect_cnn = Sequential() for a in xrange(args.aspect_layers): print 'Aspect extraction layer %s' % a aspect_cnn.add(Dropout(args.dropout_prob)) aspect_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='aspect_cnn_%s'%a)) aspect_dense = Dense(nb_class, activation='softmax', name='aspect_dense') # AS specific layers sentiment_cnn = Sequential() for b in xrange(args.senti_layers): print 'Sentiment classification layer %s' % b sentiment_cnn.add(Dropout(args.dropout_prob)) sentiment_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='sentiment_cnn_%s'%b)) sentiment_att = Self_attention(args.use_opinion, name='sentiment_att') sentiment_dense = Dense(3, activation='softmax', name='sentiment_dense') if args.use_doc: # DS specific layers doc_senti_cnn = Sequential() for c in xrange(args.doc_senti_layers): print 'Document-level sentiment layers %s' % c doc_senti_cnn.add(Dropout(args.dropout_prob)) doc_senti_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='doc_sentiment_cnn_%s'%c)) doc_senti_att = Attention(name='doc_senti_att') doc_senti_dense = Dense(3, name='doc_senti_dense') # The reason not to use the default softmax is that it reports errors when input_dims=2 due to # compatibility issues between the tf and keras versions used. softmax = Lambda(lambda x: K.tf.nn.softmax(x), name='doc_senti_softmax') # DD specific layers doc_domain_cnn = Sequential() for d in xrange(args.doc_domain_layers): print 'Document-level domain layers %s' % d doc_domain_cnn.add(Dropout(args.dropout_prob)) doc_domain_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='doc_domain_cnn_%s'%d)) doc_domain_att = Attention(name='doc_domain_att') doc_domain_dense = Dense(1, activation='sigmoid', name='doc_domain_dense') # re-encoding layer enc = Dense(args.cnn_dim, activation='relu', name='enc') #################################################### # aspect-level operations involving message passing #################################################### for i in xrange(args.interactions + 1): print 'Interaction number ', i aspect_output = sentence_output sentiment_output = sentence_output # note that the aspet-level data will also go through the doc-level models doc_senti_output = sentence_output doc_domain_output = sentence_output ### AE ### if args.aspect_layers > 0: aspect_output = aspect_cnn(aspect_output) # concate word embeddings and task-specific output for prediction aspect_output = Concatenate()([word_embeddings, aspect_output]) aspect_output = Dropout(args.dropout_prob)(aspect_output) aspect_probs = aspect_dense(aspect_output) ### AS ### if args.senti_layers > 0: sentiment_output = sentiment_cnn(sentiment_output) sentiment_output = sentiment_att( [sentiment_output, op_label_input, aspect_probs, p_gold_op]) sentiment_output = Concatenate()( [init_shared_features, sentiment_output]) sentiment_output = Dropout(args.dropout_prob)(sentiment_output) sentiment_probs = sentiment_dense(sentiment_output) if args.use_doc: ### DS ### if args.doc_senti_layers > 0: doc_senti_output = doc_senti_cnn(doc_senti_output) # output attention weights with two activation functions senti_att_weights_softmax, senti_att_weights_sigmoid = doc_senti_att( doc_senti_output) # reshape the sigmoid attention weights, will be used in message passing senti_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))( senti_att_weights_sigmoid) doc_senti_output = WeightedSum()( [doc_senti_output, senti_att_weights_softmax]) doc_senti_output = Dropout(args.dropout_prob)(doc_senti_output) doc_senti_output = doc_senti_dense(doc_senti_output) doc_senti_probs = softmax(doc_senti_output) # reshape the doc-level sentiment predictions, will be used in message passing doc_senti_probs = Lambda(lambda x: K.expand_dims(x, axis=-2))( doc_senti_probs) doc_senti_probs = Lambda(lambda x: K.repeat_elements( x, overall_maxlen, axis=1))(doc_senti_probs) ### DD ### if args.doc_domain_layers > 0: doc_domain_output = doc_domain_cnn(doc_domain_output) domain_att_weights_softmax, domain_att_weights_sigmoid = doc_domain_att( doc_domain_output) domain_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))( domain_att_weights_sigmoid) doc_domain_output = WeightedSum()( [doc_domain_output, domain_att_weights_softmax]) doc_domain_output = Dropout(args.dropout_prob)(doc_domain_output) doc_domain_probs = doc_domain_dense(doc_domain_output) # update sentence_output for the next iteration sentence_output = Concatenate()([ sentence_output, aspect_probs, sentiment_probs, doc_senti_probs, senti_weights, domain_weights ]) else: # update sentence_output for the next iteration sentence_output = Concatenate()( [sentence_output, aspect_probs, sentiment_probs]) sentence_output = enc(sentence_output) aspect_model = Model(inputs=[sentence_input, op_label_input, p_gold_op], outputs=[aspect_probs, sentiment_probs]) #################################################### # doc-level operations without message passing #################################################### if args.use_doc: if args.doc_senti_layers > 0: doc_output_1 = doc_senti_cnn(doc_output_1) att_1, _ = doc_senti_att(doc_output_1) doc_output_1 = WeightedSum()([doc_output_1, att_1]) doc_output_1 = Dropout(args.dropout_prob)(doc_output_1) doc_output_1 = doc_senti_dense(doc_output_1) doc_prob_1 = softmax(doc_output_1) if args.doc_domain_layers > 0: doc_output_2 = doc_domain_cnn(doc_output_2) att_2, _ = doc_domain_att(doc_output_2) doc_output_2 = WeightedSum()([doc_output_2, att_2]) doc_output_2 = Dropout(args.dropout_prob)(doc_output_2) doc_prob_2 = doc_domain_dense(doc_output_2) doc_model = Model(inputs=[doc_input_1, doc_input_2], outputs=[doc_prob_1, doc_prob_2]) else: doc_model = None #################################################### # initialize word embeddings #################################################### logger.info('Initializing lookup table') # Load pre-trained word vectors. # To save the loading time, here we load from the extracted subsets of the original embeddings, # which only contains the embeddings of words in the vocab. if args.use_doc: emb_path_gen = '../glove/%s_.txt' % (args.domain) emb_path_domain = '../domain_specific_emb/%s_.txt' % (args.domain) else: emb_path_gen = '../glove/%s.txt' % (args.domain) emb_path_domain = '../domain_specific_emb/%s.txt' % (args.domain) # Load pre-trained word vectors from the orginal large files # If you are loading from ssd, the process would only take 1-2 mins # If you are loading from hhd, the process would take a few hours at first try, # and would take 1-2 mins in subsequent repeating runs (due to cache performance). # emb_path_gen = '../glove.840B.300d.txt' # if args.domain == 'lt': # emb_path_domain = '../laptop_emb.vec' # else: # emb_path_domain = '../restaurant_emb.vec' aspect_model.get_layer('word_emb').set_weights( init_emb( aspect_model.get_layer('word_emb').get_weights(), vocab, emb_path_gen, emb_path_domain)) logger.info(' Done') return aspect_model, doc_model
def create_model(args, vocab, nb_class, overall_maxlen, doc_maxlen_1, doc_maxlen_2, num_relations): # Funtion that initializes word embeddings def init_emb(emb_matrix, vocab, emb_file_gen, emb_file_domain): print 'Loading pretrained general word embeddings and domain word embeddings ...' counter_gen = 0. pretrained_emb = open(emb_file_gen) for line in pretrained_emb: tokens = line.split() if len(tokens) != 301: continue word = tokens[0] vec = tokens[1:] try: emb_matrix[0][vocab[word]][:300] = vec counter_gen += 1 except KeyError: pass if args.use_domain_emb: counter_domain = 0. pretrained_emb = open(emb_file_domain) for line in pretrained_emb: tokens = line.split() if len(tokens) != 101: continue word = tokens[0] vec = tokens[1:] try: emb_matrix[0][vocab[word]][300:] = vec counter_domain += 1 except KeyError: pass pretrained_emb.close() logger.info( '%i/%i word vectors initialized by general embeddings (hit rate: %.2f%%)' % (counter_gen, len(vocab), 100 * counter_gen / len(vocab))) if args.use_domain_emb: logger.info( '%i/%i word vectors initialized by domain embeddings (hit rate: %.2f%%)' % (counter_domain, len(vocab), 100 * counter_domain / len(vocab))) return emb_matrix # Build model logger.info('Building model ...') print 'Building model ...' print '\n\n' vocab_size = len(vocab) ################################### # Inputs ################################### print 'Input layer' # sequence of token indices for aspect-level data sentence_input = Input(shape=(overall_maxlen, ), dtype='int32', name='sentence_input') A_in = [ Input(shape=(overall_maxlen, overall_maxlen), name='A_in%s' % i) for i in range(num_relations) ] # gold opinion label for aspect-level data. op_label_input = Input(shape=(overall_maxlen, 3), dtype=K.floatx(), name='op_label_input') # probability of sending gold opinion labels at opinion transmission step p_gold_op = Input(shape=(overall_maxlen, ), dtype=K.floatx(), name='p_gold_op') mask = K.not_equal(sentence_input, 0) if args.use_doc: # doc_input_1 denotes the data for sentiment classification # doc_input_2 denotes the data for domain classification doc_input_1 = Input(shape=(doc_maxlen_1, ), dtype='int32', name='doc_input_1') doc_input_2 = Input(shape=(doc_maxlen_2, ), dtype='int32', name='doc_input_2') if args.use_bert: if args.bert_type == 'base': hs = 768 bert_input = Input(shape=(overall_maxlen + 1, hs), dtype=K.floatx(), name='bert_input') # +1 denote +cls ######################################### # Shared word embedding layer ######################################### print 'Word embedding layer' word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') # aspect-level inputs word_embeddings = word_emb(sentence_input) sentence_output = word_embeddings # doc-level inputs if args.use_doc: doc_output_1 = word_emb(doc_input_1) # we only use general embedding for domain classification doc_output_2 = word_emb(doc_input_2) if args.use_domain_emb: # mask out the domain embeddings doc_output_2 = Remove_domain_emb()(doc_output_2) def slice(x, index): return x[:, index, :] def slice1(x, index): return x[:, index:, :] expand_dim = Lambda(lambda x: K.expand_dims(x, axis=1)) if args.use_bert: #code.interact(local=locals()) bert_inp = Lambda(slice1, arguments={'index': 1})(bert_input) bert_cls = Lambda(slice, arguments={'index': 0})(bert_input) sentence_output = Concatenate()([sentence_output, bert_inp]) # if args.use_bert_cls: #code.interact(local=locals()) #bert_cls = bert_input[:,0,:] node_num = sentence_output.shape.as_list()[1] bert_cls1 = expand_dim(bert_cls) bert_cls = Lambda(lambda x: K.tile(x, [1, node_num, 1]))(bert_cls1) if args.use_bert_cls == 0 and args.use_bert: sentence_output = Concatenate()([sentence_output, bert_cls]) ###################################### # Shared GCN + CNN layers ###################################### # iter_gcn = Sequential() # iter_gcn.add(SpectralGraphConvolution(150, activation='relu',name='GCN')) expand_dim = Lambda(lambda x: K.expand_dims(x, axis=1)) share_gcn_dense = Dense(300, activation='relu', name='share_gcn_dense') for i in xrange(args.shared_layers): print 'Shared GCN layer %s' % i sentence_output = Dropout(args.dropout_prob)(sentence_output) if args.use_doc: doc_output_1 = Dropout(args.dropout_prob)(doc_output_1) doc_output_2 = Dropout(args.dropout_prob)(doc_output_2) if i == 0: gcn_0 = SpectralGraphConvolution(args.gcn_dim, args.relation_dim, activation='relu', name='GCN_0') # conv_2 = SpectralGraphConvolution(args.cnn_dim, activation='relu',name='GCN_0_2') #expand_dim = Lambda(lambda x: K.expand_dims(x, axis = 1)) sentence_output_0 = gcn_0([sentence_output] + A_in) H = Dropout(args.dropout_prob)(sentence_output_0) Global_graph = MyMeanPool(axis=1, smask=mask)(H) node_num = H.shape.as_list()[1] Gg = expand_dim(Global_graph) GG = Lambda(lambda x: K.tile(x, [1, node_num, 1]))(Gg) # code.interact(local=locals()) # HG = Concatenate()([H, GG]) # sentence_output = HG if args.use_cnn: conv_1 = Conv1DWithMasking(filters=args.cnn_dim, kernel_size=3, \ activation='relu', padding='same', kernel_initializer=my_init, name='CNN_0_1') conv_2 = Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='CNN_0_2') sentence_output_1 = conv_1(sentence_output) sentence_output_2 = conv_2(sentence_output) if args.use_meanpool: sentence_output = Concatenate()( [sentence_output_1, sentence_output_2, GG, H]) else: sentence_output = Concatenate()( [sentence_output_1, sentence_output_2, H]) else: if args.use_meanpool: sentence_output = Concatenate()([GG, H]) else: sentence_output = H if args.use_bert_cls == 1 and args.use_bert: sentence_output = Concatenate()([sentence_output, bert_cls]) if args.use_doc: doc_output_1_1 = conv_1(doc_output_1) doc_output_1_2 = conv_2(doc_output_1) doc_output_1 = Concatenate()([doc_output_1_1, doc_output_1_2]) doc_output_2_1 = conv_1(doc_output_2) doc_output_2_2 = conv_2(doc_output_2) doc_output_2 = Concatenate()([doc_output_2_1, doc_output_2_2]) if args.shared_layers == 1: sentence_output = share_gcn_dense(sentence_output) else: # conv = Conv1DWithMasking(filters=args.cnn_dim/3, kernel_size=5, \ # activation='relu', padding='same', kernel_initializer=my_init, name='CNN_%s'%i) gcn = SpectralGraphConvolution(args.gcn_dim, args.relation_dim, activation='relu', name='GCN_%s' % i) #expand_dim = Lambda(lambda x: K.expand_dims(x, axis = 1)) # sentence_output1 = conv(sentence_output) sentence_output2 = gcn([sentence_output] + A_in) H = Dropout(args.dropout_prob)(sentence_output2) Global_graph = MyMeanPool(axis=1, smask=mask)(H) node_num = H.shape.as_list()[1] Gg = expand_dim(Global_graph) GG = Lambda(lambda x: K.tile(x, [1, node_num, 1]))(Gg) # code.interact(local=locals()) if args.use_cnn: conv = Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='CNN_%s'%i) sentence_output1 = conv(sentence_output) # sentence_output = Concatenate()([sentence_output_1, GG, H]) if args.use_meanpool: sentence_output = Concatenate()([sentence_output_1, GG, H]) else: sentence_output = Concatenate()([sentence_output_1, H]) else: if args.use_meanpool: sentence_output = Concatenate()([GG, H]) else: sentence_output = H if args.use_bert_cls == 2 and args.use_bert: sentence_output = Concatenate()([sentence_output, bert_cls]) sentence_output = share_gcn_dense(sentence_output) if args.use_doc: doc_output_1 = conv(doc_output_1) doc_output_2 = conv(doc_output_2) word_embeddings = Concatenate()([word_embeddings, sentence_output]) init_shared_features = sentence_output ####################################### # Define task-specific layers ####################################### # AE specific layers aspect_cnn = Sequential() for a in xrange(args.aspect_layers): print 'Aspect extraction layer %s' % a aspect_cnn.add(Dropout(args.dropout_prob)) aspect_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='aspect_cnn_%s'%a)) aspect_dense = Dense(nb_class, activation='softmax', name='aspect_dense') aspect_dense_ = Dense(nb_class, name='aspect_dense_') # AS specific layers sentiment_cnn = Sequential() for b in xrange(args.senti_layers): print 'Sentiment classification layer %s' % b sentiment_cnn.add(Dropout(args.dropout_prob)) sentiment_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='sentiment_cnn_%s'%b)) sentiment_att = Self_attention(args.use_opinion, name='sentiment_att') sentiment_dense = Dense(3, activation='softmax', name='sentiment_dense') # sentiment_dense_ = Dense(3, name='sentiment_dense_') if args.use_doc: # DS specific layers doc_senti_cnn = Sequential() for c in xrange(args.doc_senti_layers): print 'Document-level sentiment layers %s' % c doc_senti_cnn.add(Dropout(args.dropout_prob)) doc_senti_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='doc_sentiment_cnn_%s'%c)) doc_senti_att = Attention(name='doc_senti_att') doc_senti_dense = Dense(3, name='doc_senti_dense') # The reason not to use the default softmax is that it reports errors when input_dims=2 due to # compatibility issues between the tf and keras versions used. softmax = Lambda(lambda x: K.tf.nn.softmax(x), name='doc_senti_softmax') # DD specific layers doc_domain_cnn = Sequential() for d in xrange(args.doc_domain_layers): print 'Document-level domain layers %s' % d doc_domain_cnn.add(Dropout(args.dropout_prob)) doc_domain_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='doc_domain_cnn_%s'%d)) doc_domain_att = Attention(name='doc_domain_att') doc_domain_dense = Dense(1, activation='sigmoid', name='doc_domain_dense') # re-encoding layer enc = Dense(300, activation='relu', name='enc') #################################################### # aspect-level operations involving message passing #################################################### for i in xrange(args.interactions + 1): print 'Interaction number ', i aspect_output = sentence_output sentiment_output = sentence_output # note that the aspet-level data will also go through the doc-level models doc_senti_output = sentence_output doc_domain_output = sentence_output ### AE ### if args.aspect_layers > 0: aspect_output = aspect_cnn(aspect_output) # concate word embeddings and task-specific output for prediction aspect_output = Concatenate()([word_embeddings, aspect_output]) if args.use_bert_cls == 3 and args.use_bert: aspect_output = Concatenate()([aspect_output, bert_cls]) aspect_output = Dropout(args.dropout_prob)(aspect_output) aspect_probs = aspect_dense(aspect_output) use_crf = 0 if use_crf: aspect_prob = aspect_dense_(aspect_output) aspect_crf = CRF(nb_class, sparse_target=True) # False aspect_crf_output = aspect_crf(aspect_prob) ### AS ### if args.senti_layers > 0: sentiment_output = sentiment_cnn(sentiment_output) sentiment_output = sentiment_att( [sentiment_output, op_label_input, aspect_probs, p_gold_op]) sentiment_output = Concatenate()( [init_shared_features, sentiment_output]) if args.use_bert_cls == 3 and args.use_bert: sentiment_output = Concatenate()([sentiment_output, bert_cls]) sentiment_output = Dropout(args.dropout_prob)(sentiment_output) sentiment_probs = sentiment_dense(sentiment_output) # use_crf = 0 # if use_crf: # sentiment_prob = sentiment_dense_(sentiment_output) # senti_crf = CRF(3, sparse_target=False) # False # senti_crf_output = senti_crf(sentiment_prob) if args.use_doc: ### DS ### if args.doc_senti_layers > 0: doc_senti_output = doc_senti_cnn(doc_senti_output) # output attention weights with two activation functions senti_att_weights_softmax, senti_att_weights_sigmoid = doc_senti_att( doc_senti_output) # reshape the sigmoid attention weights, will be used in message passing senti_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))( senti_att_weights_sigmoid) doc_senti_output = WeightedSum()( [doc_senti_output, senti_att_weights_softmax]) doc_senti_output = Dropout(args.dropout_prob)(doc_senti_output) doc_senti_output = doc_senti_dense(doc_senti_output) doc_senti_probs = softmax(doc_senti_output) # reshape the doc-level sentiment predictions, will be used in message passing doc_senti_probs = Lambda(lambda x: K.expand_dims(x, axis=-2))( doc_senti_probs) doc_senti_probs = Lambda(lambda x: K.repeat_elements( x, overall_maxlen, axis=1))(doc_senti_probs) ### DD ### if args.doc_domain_layers > 0: doc_domain_output = doc_domain_cnn(doc_domain_output) domain_att_weights_softmax, domain_att_weights_sigmoid = doc_domain_att( doc_domain_output) domain_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))( domain_att_weights_sigmoid) doc_domain_output = WeightedSum()( [doc_domain_output, domain_att_weights_softmax]) doc_domain_output = Dropout(args.dropout_prob)(doc_domain_output) doc_domain_probs = doc_domain_dense(doc_domain_output) # update sentence_output for the next iteration sentence_output = Concatenate()([ sentence_output, aspect_probs, sentiment_probs, doc_senti_probs, senti_weights, domain_weights ]) else: # update sentence_output for the next iteration #sentence_output = Concatenate()([sentence_output, aspect_probs, sentiment_probs]) if args.use_prob: sentence_output = Concatenate()( [sentence_output, aspect_probs, sentiment_probs]) else: sentence_output = Concatenate()( [sentence_output, aspect_output, sentiment_output]) if args.use_bert_cls == 4 and args.use_bert: sentence_output = Concatenate()([sentence_output, bert_cls]) sentence_output = enc(sentence_output) use_crf = 0 if use_crf: aspect_prob = aspect_dense_(aspect_output) aspect_crf = CRF(nb_class, sparse_target=True) # False aspect_crf_output = aspect_crf(aspect_prob) aspect_model = Model(inputs=[sentence_input] + A_in + [op_label_input] + [p_gold_op], outputs=[aspect_crf_output, sentiment_probs]) else: if args.use_bert: aspect_model = Model(inputs=[sentence_input] + A_in + [op_label_input] + [p_gold_op] + [bert_input], outputs=[aspect_probs, sentiment_probs]) else: aspect_model = Model(inputs=[sentence_input] + A_in + [op_label_input] + [p_gold_op], outputs=[aspect_probs, sentiment_probs]) #################################################### # doc-level operations without message passing #################################################### if args.use_doc: if args.doc_senti_layers > 0: doc_output_1 = doc_senti_cnn(doc_output_1) att_1, _ = doc_senti_att(doc_output_1) doc_output_1 = WeightedSum()([doc_output_1, att_1]) doc_output_1 = Dropout(args.dropout_prob)(doc_output_1) doc_output_1 = doc_senti_dense(doc_output_1) doc_prob_1 = softmax(doc_output_1) if args.doc_domain_layers > 0: doc_output_2 = doc_domain_cnn(doc_output_2) att_2, _ = doc_domain_att(doc_output_2) doc_output_2 = WeightedSum()([doc_output_2, att_2]) doc_output_2 = Dropout(args.dropout_prob)(doc_output_2) doc_prob_2 = doc_domain_dense(doc_output_2) doc_model = Model(inputs=[doc_input_1, doc_input_2], outputs=[doc_prob_1, doc_prob_2]) else: doc_model = None #################################################### # initialize word embeddings #################################################### logger.info('Initializing lookup table') # Load pre-trained word vectors. # To save the loading time, here we load from the extracted subsets of the original embeddings, # which only contains the embeddings of words in the vocab. if args.use_doc: emb_path_gen = '../glove/%s_.txt' % (args.domain) emb_path_domain = '../domain_specific_emb/%s_.txt' % (args.domain) else: emb_path_gen = '../glove/%s.txt' % (args.domain) emb_path_domain = '../domain_specific_emb/%s.txt' % (args.domain) # Load pre-trained word vectors from the orginal large files # If you are loading from ssd, the process would only take 1-2 mins # If you are loading from hhd, the process would take a few hours at first try, # and would take 1-2 mins in subsequent repeating runs (due to cache performance). # emb_path_gen = '../glove.840B.300d.txt' # if args.domain == 'lt': # emb_path_domain = '../laptop_emb.vec' # else: # emb_path_domain = '../restaurant_emb.vec' aspect_model.get_layer('word_emb').set_weights( init_emb( aspect_model.get_layer('word_emb').get_weights(), vocab, emb_path_gen, emb_path_domain)) logger.info(' Done') ## Optimizaer algorithm # from optimizers import get_optimizer optimizer = get_optimizer(args) if args.use_doc == 1 and args.interactions > 0: # fix the document-specific parameters when updating aspect model aspect_model.get_layer('doc_senti_att').trainable = False aspect_model.get_layer('doc_senti_dense').trainable = False aspect_model.get_layer('doc_domain_att').trainable = False if use_crf: aspect_model.compile( optimizer=optimizer, loss=[aspect_crf.loss_function, 'categorical_crossentropy'], loss_weights=[1., 1.]) else: aspect_model.compile( optimizer=optimizer, loss=['categorical_crossentropy', 'categorical_crossentropy'], loss_weights=[1., 1.]) #aspect_model.summary() if args.use_doc == 1: doc_model.get_layer('doc_senti_att').trainable = True doc_model.get_layer('doc_senti_dense').trainable = True doc_model.get_layer('doc_domain_att').trainable = True doc_model.get_layer('doc_domain_dense').trainable = True if args.use_doc: doc_model.compile( optimizer=optimizer, loss=['categorical_crossentropy', 'binary_crossentropy'], loss_weights=[1., 1.], metrics=['categorical_accuracy', 'accuracy']) return aspect_model, doc_model
def create_model(args, vocab, nb_class, overall_maxlen, doc_maxlen_1, doc_maxlen_2): # Funtion that initializes word embeddings def init_emb(emb_matrix, vocab, emb_file_gen, emb_file_domain): print 'Loading pretrained general word embeddings and domain word embeddings ...' counter_gen = 0. pretrained_emb = open(emb_file_gen) for line in pretrained_emb: tokens = line.split() if len(tokens) != 301: continue word = tokens[0] vec = tokens[1:] try: emb_matrix[0][vocab[word]][:300] = vec counter_gen += 1 except KeyError: pass if args.use_domain_emb: counter_domain = 0. pretrained_emb = open(emb_file_domain) for line in pretrained_emb: tokens = line.split() if len(tokens) != 101: continue word = tokens[0] vec = tokens[1:] try: emb_matrix[0][vocab[word]][300:] = vec counter_domain += 1 except KeyError: pass pretrained_emb.close() logger.info( '%i/%i word vectors initialized by general embeddings (hit rate: %.2f%%)' % (counter_gen, len(vocab), 100 * counter_gen / len(vocab))) if args.use_domain_emb: logger.info( '%i/%i word vectors initialized by domain embeddings (hit rate: %.2f%%)' % (counter_domain, len(vocab), 100 * counter_domain / len(vocab))) return emb_matrix # Build model logger.info('Building model ...') print 'Building model ...' print '\n\n' vocab_size = len(vocab) ################################### # Inputs ################################### print 'Input layer' # sequence of token indices for aspect-level data sentence_input = Input(shape=(overall_maxlen, ), dtype='int32', name='sentence_input') # gold opinion label for aspect-level data. op_label_input = Input(shape=(overall_maxlen, 3), dtype=K.floatx(), name='op_label_input') # probability of sending gold opinion labels at opinion transmission step p_gold_op = Input(shape=(overall_maxlen, ), dtype=K.floatx(), name='p_gold_op') A_in = Input(shape=(overall_maxlen, overall_maxlen), dtype=K.floatx(), name='A_input') if args.use_doc: # doc_input_1 denotes the data for sentiment classification # doc_input_2 denotes the data for domain classification doc_input_1 = Input(shape=(doc_maxlen_1, ), dtype='int32', name='doc_input_1') doc_input_2 = Input(shape=(doc_maxlen_2, ), dtype='int32', name='doc_input_2') if args.use_bert: bert_input = Input(shape=(overall_maxlen + 1, 768), dtype=K.floatx(), name='bert_input') # +1 denote +cls ######################################### # Shared word embedding layer ######################################### print 'Word embedding layer' word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') # aspect-level inputs word_embeddings = word_emb(sentence_input) sentence_output = word_embeddings # doc-level inputs if args.use_doc: doc_output_1 = word_emb(doc_input_1) # we only use general embedding for domain classification doc_output_2 = word_emb(doc_input_2) if args.use_domain_emb: # mask out the domain embeddings doc_output_2 = Remove_domain_emb()(doc_output_2) def slice(x, index): return x[:, index, :] def slice1(x, index): return x[:, index:, :] expand_dim = Lambda(lambda x: K.expand_dims(x, axis=1)) if args.use_bert: #code.interact(local=locals()) bert_inp = Lambda(slice1, arguments={'index': 1})(bert_input) bert_cls = Lambda(slice, arguments={'index': 0})(bert_input) #sentence_output = Concatenate()([sentence_output, bert_inp]) # if args.use_bert_cls: #code.interact(local=locals()) #bert_cls = bert_input[:,0,:] node_num = sentence_output.shape.as_list()[1] bert_cls1 = expand_dim(bert_cls) bert_cls = Lambda(lambda x: K.tile(x, [1, node_num, 1]))(bert_cls1) from my_layers_algo import DigiCaps, Length, Capsule if args.use_bert_cls == 0 and args.use_bert: sentence_output = Concatenate()([sentence_output, bert_cls]) ###################################### # Shared CNN layers ###################################### for i in xrange(args.shared_layers): print 'Shared CNN layer %s' % i sentence_output = Dropout(args.dropout_prob)(sentence_output) if args.use_doc: doc_output_1 = Dropout(args.dropout_prob)(doc_output_1) doc_output_2 = Dropout(args.dropout_prob)(doc_output_2) if i == 0: #conv_0 = Conv1DWithMasking(filters=args.cnn_dim/2, kernel_size=2, \ # activation='relu', padding='same', kernel_initializer=my_init, name='cnn_0_0') conv_1 = Conv1DWithMasking(filters=args.cnn_dim/2, kernel_size=3, \ activation='relu', padding='same', kernel_initializer=my_init, name='cnn_0_1') #conv_2 = Conv1DWithMasking(filters=args.cnn_dim/2, kernel_size=4, \ # activation='relu', padding='same', kernel_initializer=my_init, name='cnn_0_2') conv_3 = Conv1DWithMasking(filters=args.cnn_dim/2, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='cnn_0_3') #sentence_output_0 = conv_0(sentence_output) sentence_output_1 = conv_1(sentence_output) #sentence_output_2 = conv_2(sentence_output) sentence_output_3 = conv_3(sentence_output) #sentence_output = Concatenate()([sentence_output_0, sentence_output_1, sentence_output_2, sentence_output_3]) sentence_output = Concatenate()( [sentence_output_1, sentence_output_3]) if args.use_doc: #doc_output_1_0 = conv_0(doc_output_1) doc_output_1_1 = conv_1(doc_output_1) #doc_output_1_2 = conv_2(doc_output_1) doc_output_1_3 = conv_3(doc_output_1) #doc_output_1 = Concatenate()([doc_output_1_0, doc_output_1_1, doc_output_1_2, doc_output_1_3]) doc_output_1 = Concatenate()([doc_output_1_1, doc_output_1_3]) #doc_output_2_0 = conv_0(doc_output_2) doc_output_2_1 = conv_1(doc_output_2) #doc_output_2_2 = conv_2(doc_output_2) doc_output_2_3 = conv_3(doc_output_2) #doc_output_2 = Concatenate()([doc_output_2_0, doc_output_2_1, doc_output_2_2, doc_output_2_3]) doc_output_2 = Concatenate()([doc_output_2_1, doc_output_2_3]) else: #conv = Conv1DWithMasking(filters=args.cnn_dim, kernel_size=3, \ # activation='relu', padding='same', kernel_initializer=my_init, name='cnn_3_%s'%i) conv_ = Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='cnn_5_%s'%i) #sentence_output1 = conv(sentence_output) sentence_output = conv_(sentence_output) #sentence_output = Concatenate()([sentence_output1, sentence_output2]) if args.use_doc: doc_output_1 = conv_(doc_output_1) doc_output_2 = conv_(doc_output_2) word_embeddings = Concatenate()([word_embeddings, sentence_output]) init_shared_features = sentence_output ####################################### # Define task-specific layers ####################################### #if args.which_dual == 'dual': # from my_layers import Conv1DWithMasking, Remove_domain_emb, Self_attention, Attention, WeightedSum, Dual_attention #else: # from my_layers_algo import Conv1DWithMasking, Remove_domain_emb, Self_attention, Attention, WeightedSum, Dual_attention # AE specific layers aspect_cnn = Sequential() for a in xrange(args.aspect_layers): print 'Aspect extraction layer %s' % a aspect_cnn.add(Dropout(args.dropout_prob)) aspect_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='aspect_cnn_%s'%a)) aspect_dense = Dense(nb_class, activation='softmax', name='aspect_dense') # OE specific layers opinion_cnn = Sequential() for a in xrange(args.opinion_layers): print 'Opinion extraction layer %s' % a opinion_cnn.add(Dropout(args.dropout_prob)) opinion_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='opinion_cnn_%s'%a)) opinion_dense = Dense(nb_class, activation='softmax', name='opinion_dense') # AS specific layers sentiment_cnn = Sequential() for b in xrange(args.senti_layers): print 'Sentiment classification layer %s' % b sentiment_cnn.add(Dropout(args.dropout_prob)) sentiment_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='sentiment_cnn_%s'%b)) sentiment_att = Self_attention(args.use_opinion, name='sentiment_att') sentiment_dense = Dense(3, activation='softmax', name='sentiment_dense') aspect_dual_att = Dual_attention(name='aspect_dualatt') opinion_dual_att = Dual_attention(name='opinion_dualatt') sentiment_dual_att = Dual_attention(name='sentiment_dualatt') asp_caps = Capsule(num_capsule=overall_maxlen, A=A_in, dim_capsule=args.capsule_dim, routings=3, name='asp_caps') senti_caps = Capsule(num_capsule=overall_maxlen, A=A_in, dim_capsule=args.capsule_dim, routings=3, name='senti_caps') opin_caps = Capsule(num_capsule=overall_maxlen, A=A_in, dim_capsule=args.capsule_dim, routings=3, name='opin_caps') #probs = Length(name='out_caps') if args.use_doc: # DS specific layers doc_senti_cnn = Sequential() for c in xrange(args.doc_senti_layers): print 'Document-level sentiment layers %s' % c doc_senti_cnn.add(Dropout(args.dropout_prob)) doc_senti_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='doc_sentiment_cnn_%s'%c)) doc_senti_att = Attention(name='doc_senti_att') doc_senti_dense = Dense(3, name='doc_senti_dense') # The reason not to use the default softmax is that it reports errors when input_dims=2 due to # compatibility issues between the tf and keras versions used. softmax = Lambda(lambda x: K.tf.nn.softmax(x), name='doc_senti_softmax') # DD specific layers doc_domain_cnn = Sequential() for d in xrange(args.doc_domain_layers): print 'Document-level domain layers %s' % d doc_domain_cnn.add(Dropout(args.dropout_prob)) doc_domain_cnn.add(Conv1DWithMasking(filters=args.cnn_dim, kernel_size=5, \ activation='relu', padding='same', kernel_initializer=my_init, name='doc_domain_cnn_%s'%d)) doc_domain_att = Attention(name='doc_domain_att') doc_domain_dense = Dense(1, activation='sigmoid', name='doc_domain_dense') # re-encoding layer enc = Dense(args.cnn_dim, activation='relu', name='enc') enc_a = Dense(args.cnn_dim, activation='relu', name='enc_a') enc_o = Dense(args.cnn_dim, activation='relu', name='enc_o') enc_s = Dense(args.cnn_dim, activation='relu', name='enc_s') enc_d = Dense(args.cnn_dim, activation='relu', name='enc_d') #################################################### # aspect-level operations involving message passing #################################################### print(sentence_output) # sentence_output = enc(sentence_output) aspect_output = sentence_output opinion_output = sentence_output sentiment_output = sentence_output doc_senti_output = sentence_output doc_domain_output = sentence_output for i in xrange(args.interactions + 1): print 'Interaction number ', i if args.use_doc: ### DS ### if args.doc_senti_layers > 0: doc_senti_output = doc_senti_cnn(doc_senti_output) # output attention weights with two activation functions senti_att_weights_softmax, senti_att_weights_sigmoid = doc_senti_att( doc_senti_output) # reshape the sigmoid attention weights, will be used in message passing senti_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))( senti_att_weights_sigmoid) doc_senti_output1 = WeightedSum()( [doc_senti_output, senti_att_weights_softmax]) doc_senti_output1 = Dropout(args.dropout_prob)(doc_senti_output1) doc_senti_output1 = doc_senti_dense(doc_senti_output1) doc_senti_probs = softmax(doc_senti_output1) # reshape the doc-level sentiment predictions, will be used in message passing doc_senti_probs = Lambda(lambda x: K.expand_dims(x, axis=-2))( doc_senti_probs) doc_senti_probs = Lambda(lambda x: K.repeat_elements( x, overall_maxlen, axis=1))(doc_senti_probs) ### DD ### if args.doc_domain_layers > 0: doc_domain_output = doc_domain_cnn(doc_domain_output) domain_att_weights_softmax, domain_att_weights_sigmoid = doc_domain_att( doc_domain_output) domain_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))( domain_att_weights_sigmoid) #code.interact(local=locals()) doc_domain_output1 = WeightedSum()( [doc_domain_output, domain_att_weights_softmax]) doc_domain_output1 = Dropout(args.dropout_prob)(doc_domain_output1) doc_domain_probs = doc_domain_dense(doc_domain_output1) if args.use_bert: aspect_output = Concatenate()([aspect_output, bert_inp]) opinion_output = Concatenate()([opinion_output, bert_inp]) sentiment_output = Concatenate()([sentiment_output, bert_inp]) aspect_output = Dropout(args.dropout_prob)(aspect_output) opinion_output = Dropout(args.dropout_prob)(opinion_output) sentiment_output = Dropout(args.dropout_prob)(sentiment_output) ### AE ### if args.aspect_layers > 0: aspect_output = aspect_cnn(aspect_output) # concate word embeddings and task-specific output for prediction ### OE ### if args.aspect_layers > 0: opinion_output = opinion_cnn(opinion_output) ### AS ### if args.senti_layers > 0: sentiment_output = sentiment_cnn(sentiment_output) opin2asp = asp_caps([aspect_output, opinion_output]) senti2asp = asp_caps([aspect_output, sentiment_output]) asp = Concatenate()([opin2asp, senti2asp]) asp2opin = opin_caps([opinion_output, aspect_output]) senti2opin = opin_caps([opinion_output, sentiment_output]) opin = Concatenate()([asp2opin, senti2opin]) asp2senti = senti_caps([sentiment_output, aspect_output]) opin2senti = senti_caps([sentiment_output, opinion_output]) senti = Concatenate()([asp2senti, opin2senti]) #sentiment_output = sentiment_att([sentiment_output, op_label_input, opinion_probs, p_gold_op]) # aspect_output += asp # opinion_output += opin # sentiment_output += senti if args.use_doc: aspect_output = Concatenate()( [word_embeddings, aspect_output, asp, domain_weights]) opinion_output = Concatenate()( [word_embeddings, opinion_output, opin, domain_weights]) sentiment_output = Concatenate()([ init_shared_features, sentiment_output, senti, doc_senti_probs, senti_weights ]) else: aspect_output = Concatenate()( [word_embeddings, aspect_output, asp]) opinion_output = Concatenate()( [word_embeddings, opinion_output, opin]) sentiment_output = Concatenate()( [init_shared_features, sentiment_output, senti]) #aspect_output = Concatenate()([init_shared_features, aspect_output]) aspect_output = Dropout(args.dropout_prob)(aspect_output) aspect_probs = aspect_dense(aspect_output) #opinion_output = Concatenate()([init_shared_features, opinion_output]) opinion_output = Dropout(args.dropout_prob)(opinion_output) opinion_probs = opinion_dense(opinion_output) #sentiment_output = Concatenate()([word_embeddings, sentiment_output]) sentiment_output = Dropout(args.dropout_prob)(sentiment_output) sentiment_probs = sentiment_dense(sentiment_output) # update sentence_output for the next iteration opinion_output = Concatenate()([ opinion_output, aspect_probs, opinion_probs, sentiment_probs, domain_weights ]) aspect_output = Concatenate()([ aspect_output, aspect_probs, opinion_probs, sentiment_probs, domain_weights ]) sentiment_output = Concatenate()([ sentiment_output, aspect_probs, opinion_probs, sentiment_probs, doc_senti_probs, senti_weights ]) sentence_output_ = Concatenate()([ sentence_output, aspect_probs, opinion_probs, sentiment_probs, doc_senti_probs, senti_weights, domain_weights ]) #code.interact(local=locals()) aspect_output = enc_a(aspect_output) opinion_output = enc_o(opinion_output) sentiment_output = enc_s(sentiment_output) if args.use_doc: doc_senti_output = enc_d(sentence_output_) doc_domain_output = enc_d(sentence_output_) if args.use_bert: aspect_model = Model( inputs=[ sentence_input, A_in, op_label_input, p_gold_op, bert_input ], outputs=[aspect_probs, opinion_probs, sentiment_probs]) else: aspect_model = Model( inputs=[sentence_input, A_in, op_label_input, p_gold_op], outputs=[aspect_probs, opinion_probs, sentiment_probs]) #################################################### # doc-level operations without message passing #################################################### if args.use_doc: if args.doc_senti_layers > 0: doc_output_1 = doc_senti_cnn(doc_output_1) att_1, _ = doc_senti_att(doc_output_1) doc_output_1 = WeightedSum()([doc_output_1, att_1]) doc_output_1 = Dropout(args.dropout_prob)(doc_output_1) doc_output_1 = doc_senti_dense(doc_output_1) doc_prob_1 = softmax(doc_output_1) if args.doc_domain_layers > 0: doc_output_2 = doc_domain_cnn(doc_output_2) att_2, _ = doc_domain_att(doc_output_2) doc_output_2 = WeightedSum()([doc_output_2, att_2]) doc_output_2 = Dropout(args.dropout_prob)(doc_output_2) doc_prob_2 = doc_domain_dense(doc_output_2) doc_model = Model(inputs=[doc_input_1, doc_input_2], outputs=[doc_prob_1, doc_prob_2]) else: doc_model = None #################################################### # initialize word embeddings #################################################### logger.info('Initializing lookup table') # Load pre-trained word vectors. # To save the loading time, here we load from the extracted subsets of the original embeddings, # which only contains the embeddings of words in the vocab. if args.use_doc: emb_path_gen = '../glove/%s_.txt' % (args.domain) emb_path_domain = '../domain_specific_emb/%s_.txt' % (args.domain) else: emb_path_gen = '../glove/%s.txt' % (args.domain) emb_path_domain = '../domain_specific_emb/%s.txt' % (args.domain) # Load pre-trained word vectors from the orginal large files # If you are loading from ssd, the process would only take 1-2 mins # If you are loading from hhd, the process would take a few hours at first try, # and would take 1-2 mins in subsequent repeating runs (due to cache performance). # emb_path_gen = '../glove.840B.300d.txt' # if args.domain == 'lt': # emb_path_domain = '../laptop_emb.vec' # else: # emb_path_domain = '../restaurant_emb.vec' aspect_model.get_layer('word_emb').set_weights( init_emb( aspect_model.get_layer('word_emb').get_weights(), vocab, emb_path_gen, emb_path_domain)) logger.info(' Done') return aspect_model, doc_model