def create_model(args, maxlen, vocab):

    def ortho_reg(weight_matrix):
        ### orthogonal regularization for aspect embedding matrix ###
        w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx())
        reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye((w_n.shape[0]).eval())))
        return args.ortho_reg*reg

    # 词汇表大小
    vocab_size = len(vocab)

    ##### Inputs #####
	# 正例的形状:batch_size * dim, 每个元素是在词汇表中的索引值, 每个句子有多少个词就有多少索引值
	# 负例的形状:batch_size * args.neg_size * dim, ditto
	# 得到w
    sentence_input = Input(batch_shape=(None, maxlen), dtype='int32', name='sentence_input')
    neg_input = Input(batch_shape=(None, args.neg_size, maxlen), dtype='int32', name='neg_input')

    ##### Construct word embedding layer #####
	# 嵌入层将正整数(下标)转换为具有固定大小的向量,如[[4],[20]]->[[0.25,0.1],[0.6,-0.2]]
	# keras.layers.embeddings.Embedding(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, activity_regularizer=None, embeddings_constraint=None, mask_zero=False, input_length=None)
    word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb')

    ##### Compute sentence representation #####
    # 计算句子嵌入,这里设计到keras的很多细节,日后还需要深入学习
    e_w = word_emb(sentence_input)
    y_s = Average()(e_w)
    att_weights = Attention(name='att_weights')([e_w, y_s])
    z_s = WeightedSum()([e_w, att_weights])

    ##### Compute representations of negative instances #####
    # 计算负例的z_n
    e_neg = word_emb(neg_input)
    z_n = Average()(e_neg)

    ##### Reconstruction #####
    # 重构过程
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax', name='p_t')(p_t)
    r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb',
            W_regularizer=ortho_reg)(p_t)

    ##### Loss #####
    # 损失函数
    loss = MaxMargin(name='max_margin')([z_s, z_n, r_s])
    model = Model(input=[sentence_input, neg_input], output=loss)

    ### Word embedding and aspect embedding initialization ######
    # 如果定义了emb_path, 就用文件中的数值初始化E矩阵, T使用K-means初始化
    if args.emb_path:
        from w2vEmbReader import W2VEmbReader as EmbReader
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        logger.info('Initializing word embedding matrix')
        model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights()))
        logger.info('Initializing aspect embedding matrix as centroid of kmean clusters')
        model.get_layer('aspect_emb').W.set_value(emb_reader.get_aspect_matrix(args.aspect_size))

    return model
Beispiel #2
0
def create_model(args, maxlen, vocab):

    def ortho_reg(weight_matrix):
        ### orthogonal regularization for aspect embedding matrix ###
        w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx())
        reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value)))
        return args.ortho_reg*reg

    vocab_size = len(vocab)

    ##### Inputs #####
    sentence_input = Input(shape=(maxlen,), dtype='int32', name='sentence_input')
    neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input')

    ##### Construct word embedding layer #####
    word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb')

    ##### Compute sentence representation #####
    e_w = word_emb(sentence_input)
    #y_s = LSTM(args.emb_dim, return_sequences=False)(e_w)
    y_s = Average()(e_w)
    att_weights = Attention(name='att_weights')([e_w, y_s])
    z_s = WeightedSum()([e_w, att_weights])

    ##### Compute representations of negative instances #####
    e_neg = word_emb(neg_input)
    #z_n = TimeDistributed(LSTM(args.emb_dim, return_sequences=False))(e_neg)
    z_n = Average()(e_neg)

    ##### Reconstruction #####
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax', name='p_t')(p_t)
    r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb',
            W_regularizer=ortho_reg)(p_t)

    ##### Loss #####
    loss = MaxMargin(name='max_margin')([z_s, z_n, r_s])
    model = Model(inputs=[sentence_input, neg_input], outputs=loss)

    ### Word embedding and aspect embedding initialization ######
    if args.emb_path:
        from w2vEmbReader import W2VEmbReader as EmbReader
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        logger.info('Initializing word embedding matrix')
        # model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights()[0]))
        K.set_value(
            model.get_layer('word_emb').embeddings,
            emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(model.get_layer('word_emb').embeddings)))
        logger.info('Initializing aspect embedding matrix as centroid of kmean clusters')
        K.set_value(
            model.get_layer('aspect_emb').W,
            emb_reader.get_aspect_matrix(args.aspect_size))
    return model
Beispiel #3
0
def create_model(args, vocab, num_outputs):

    ###############################################################################################################################
    ## Create Model
    #

    dropout = 0.5
    recurrent_dropout = 0.1
    vocab_size = len(vocab)

    ##### Inputs #####
    sentence_input = Input(shape=(None, ),
                           dtype='int32',
                           name='sentence_input')

    word_emb = Embedding(vocab_size,
                         args.emb_dim,
                         mask_zero=True,
                         name='word_emb')
    output = word_emb(sentence_input)

    print 'use a rnn layer'
    output = LSTM(args.rnn_dim,
                  return_sequences=False,
                  dropout=dropout,
                  recurrent_dropout=recurrent_dropout,
                  name='lstm')(output)

    print 'use 0.5 dropout layer'
    output = Dropout(0.5)(output)

    densed = Dense(num_outputs, name='dense')(output)
    probs = Activation('softmax')(densed)
    model = Model(inputs=[sentence_input], outputs=probs)

    ##### Initialization #####
    from w2vEmbReader import W2VEmbReader as EmbReader
    logger.info('Initializing lookup table')
    emb_path = '../glove/%s.txt' % (args.domain)
    emb_reader = EmbReader(emb_path, emb_dim=args.emb_dim)
    model.get_layer('word_emb').set_weights(
        emb_reader.get_emb_matrix_given_vocab(
            vocab,
            model.get_layer('word_emb').get_weights()))
    logger.info('  Done')

    return model
Beispiel #4
0
	
	if vocab_path:
		with open(vocab_path, 'rb') as vocab_file:
			vocab = pk.load(vocab_file)
	else:
		vocab = create_vocab(train_df['text'].values, tokenize_text, to_lower, min_word_freq, emb_words)	
	vocab_size = len(vocab)
	logger.info('  Vocab size: %i' % (vocab_size))
	
	pd.options.mode.chained_assignment = None
	train_df.loc[:,'text'] = tokenize_dataset(train_df['text'].values, vocab, tokenize_text, to_lower)
	dev_df.loc[:,'text'] = tokenize_dataset(dev_df['text'].values, vocab, tokenize_text, to_lower)
	test_df.loc[:,'text'] = tokenize_dataset(test_df['text'].values, vocab, tokenize_text, to_lower)
	
	train_maxlen = train_df['text'].map(len).max()
	dev_maxlen = dev_df['text'].map(len).max()
	test_maxlen = test_df['text'].map(len).max()
	overal_maxlen = max(train_maxlen, dev_maxlen, test_maxlen)
	
	return train_df, dev_df, test_df, vocab, overal_maxlen

if __name__ == '__main__':
	from w2vEmbReader import W2VEmbReader as EmbReader
	emb_reader = EmbReader('/home/david/data/embed/glove.6B.50d.txt', emb_dim=50)
	emb_words = emb_reader.load_words()
	
	train_df, dev_df, test_df, vocab, overal_maxlen, qwks = get_data('/home/david/data/ats/ets/54147', emb_words=emb_words)
	print(qwks)
	
	print('Done.')
	
Beispiel #5
0
def create_model(args, maxlen, vocab):
    def ortho_reg(weight_matrix):
        ### orthogonal regularization for aspect embedding matrix ###
        w_n = K.l2_normalize(weight_matrix, axis=-1)
        reg = K.sum(
            K.square(
                K.dot(w_n, K.transpose(w_n)) -
                K.eye(w_n.get_shape().as_list()[0])))
        return args.ortho_reg * reg

    vocab_size = len(vocab)

    if args.emb_name:
        from w2vEmbReader import W2VEmbReader as EmbReader
        emb_reader = EmbReader(
            os.path.join("..", "preprocessed_data", args.domain),
            args.emb_name)
        aspect_matrix = emb_reader.get_aspect_matrix(args.aspect_size)
        args.aspect_size = emb_reader.aspect_size
        args.emb_dim = emb_reader.emb_dim

    ##### Inputs #####
    sentence_input = Input(shape=(maxlen, ),
                           dtype='int32',
                           name='sentence_input')
    neg_input = Input(shape=(args.neg_size, maxlen),
                      dtype='int32',
                      name='neg_input')

    ##### Construct word embedding layer #####
    word_emb = Embedding(vocab_size,
                         args.emb_dim,
                         mask_zero=True,
                         name='word_emb')
    #, embeddings_constraint=MaxNorm(10))

    ##### Compute sentence representation #####
    e_w = word_emb(sentence_input)
    y_s = Average()(e_w)
    att_weights = Attention(name='att_weights')([e_w, y_s])
    #W_constraint=MaxNorm(10),
    #b_constraint=MaxNorm(10))([e_w, y_s])
    z_s = WeightedSum()([e_w, att_weights])

    ##### Compute representations of negative instances #####
    e_neg = word_emb(neg_input)
    z_n = Average()(e_neg)

    ##### Reconstruction #####
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax', name='p_t')(p_t)
    r_s = WeightedAspectEmb(
        args.aspect_size,
        args.emb_dim,
        name='aspect_emb',
        #W_constraint=MaxNorm(10),
        W_regularizer=ortho_reg)(p_t)

    ##### Loss #####
    loss = MaxMargin(name='max_margin')([z_s, z_n, r_s])
    model = Model(inputs=[sentence_input, neg_input], outputs=[loss])

    ### Word embedding and aspect embedding initialization ######
    if args.emb_name:
        from w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Initializing word embedding matrix')
        embs = model.get_layer('word_emb').embeddings
        K.set_value(
            embs,
            emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(embs)))
        logger.info(
            'Initializing aspect embedding matrix as centroid of kmean clusters'
        )
        K.set_value(model.get_layer('aspect_emb').W, aspect_matrix)

    return model
Beispiel #6
0
def create_model(args, initial_mean_value, overal_maxlen, vocab):
	
	import keras.backend as K
	
	from keras import layers
	from keras.layers import *
	
	from deepats.my_layers import Attention, Conv1DWithMasking, MeanOverTime, TemporalMeanPooling, MeanPool, GlobalMeanPooling
	
	from keras.models import Sequential, Model
	from keras.initializers import Constant


	###############################################################################################################################
	## Create Model
	#
	
	vocab_size = len(vocab)
	
	dropout_W = 0.5		# default=0.5
	dropout_U = 0.1		# default=0.1
	
	cnn_border_mode='same'
	if initial_mean_value.ndim == 0:
		initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
	num_outputs = len(initial_mean_value)
	
	if args.model_type == 'cls':
		raise NotImplementedError
	
	elif args.model_type == 'rwa':
		logger.info('Building a RWA model')
		
		from deepats.rwa import RWA
# 		from deepats.RWACell import RWACell as RWA
		
		model = Sequential()
		model.add(Embedding(vocab_size, args.emb_dim))
		
		for i in range(args.stack-1):
			model.add(LSTM(args.rnn_dim, return_sequences=True, dropout=dropout_W, recurrent_dropout=dropout_U))
			model.add(Dropout(args.dropout_prob))
			
		model.add(RWA(args.rnn_dim))
		#model.add(Bidirectional(RWA(args.rnn_dim), merge_mode='ave'))# {'sum', 'mul', 'concat', 'ave'***, None}
		
		model.add(Dropout(args.dropout_prob))
		
		bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
		model.add(Dense(num_outputs, bias_initializer=Constant(value=bias_value)))
		
		#model.add(Activation('sigmoid'))
		model.add(Activation('tanh'))
		model.emb_index = 0
	
	elif args.model_type == 'regp':
		logger.info('Building an LSTM REGRESSION model with POOLING')
		
		POOL=2 #2
		
		if POOL==1:
			mask_zero=False
		else:
			mask_zero=True
		model = Sequential()
		model.add(Embedding(vocab_size, args.emb_dim, mask_zero=mask_zero))
		
		for i in range(args.stack):
			model.add(LSTM(args.rnn_dim, return_sequences=True, dropout=dropout_W, recurrent_dropout=dropout_U))
			model.add(Dropout(args.dropout_prob))
		
		## MEAN POOLING.
		if POOL==1:
			model.add(GlobalAveragePooling1D())
		elif POOL==2:
			model.add(MeanOverTime())#A/B
		elif POOL==3:
			model.add(TemporalMeanPooling())
		elif POOL==4:
			model.add(MeanPool())
		elif POOL==5:
			model.add(GlobalMeanPooling())
		
		bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
		model.add(Dense(num_outputs, bias_initializer=Constant(value=bias_value)))
		
		model.add(Activation('sigmoid'))
		#model.add(Activation('tanh'))
		model.emb_index = 0
		
	elif args.model_type == 'regp_ORIG':
		logger.info('Building a REGRESSION model with POOLING')
		model = Sequential()
		model.add(Embedding(vocab_size, args.emb_dim, mask_zero=True))
		if args.cnn_dim > 0:
			model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1))
		if args.rnn_dim > 0:
			model.add(LSTM(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U))
		if args.dropout_prob > 0:
			model.add(Dropout(args.dropout_prob))
		if args.aggregation == 'mot':
			model.add(MeanOverTime(mask_zero=True))
		elif args.aggregation.startswith('att'):
			model.add(Attention(op=args.aggregation, activation='tanh', init_stdev=0.01))
		model.add(Dense(num_outputs))
		if not args.skip_init_bias:
			bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
			model.layers[-1].b.set_value(bias_value)
		model.add(Activation('sigmoid'))
		model.emb_index = 0
		
	
	logger.info('  Done')
	
	###############################################################################################################################
	## Initialize embeddings if requested
	#

	if args.emb_path:
		from w2vEmbReader import W2VEmbReader as EmbReader
		logger.info('Initializing lookup table')
		emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
		emb_reader.load_embeddings(vocab)
		emb_wts = emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].get_weights()[0])
		wts = model.layers[model.emb_index].get_weights()
		wts[0] = emb_wts
		model.layers[model.emb_index].set_weights(wts)
		logger.info('  Done')
	
	return model
Beispiel #7
0
def create_model(args, kstep, node_size):

    def ortho_reg(weight_matrix):
        ### orthogonal regularization for aspect embedding matrix ###
        w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx())
        reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].eval())))
        return args.ortho_reg*reg


    ##### Inputs #####
    sentence_input = Input(shape=(kstep, node_size), dtype='float32', name='sentence_input')
    neg_input = Input(shape=(args.neg_size, kstep, node_size), dtype='float32', name='neg_input')

    print("sentence_input.ndim", sentence_input.ndim)
    print("neg_input.ndim", neg_input.ndim)

    e_w = sentence_input
    y_s = Average()(sentence_input)




    print(y_s.ndim)
    print(e_w.ndim)
    print(K.int_shape(e_w),   K.int_shape(y_s))



    att_weights = Attention(name='att_weights')([e_w, y_s])
    z_s = WeightedSum()([e_w, att_weights])

    print("z_s----------- %d", (z_s.ndim))

    ##### Compute representations of negative instances #####
    #e_neg = word_emb(neg_input)
    e_neg = neg_input
    z_n = Average()(e_neg)


    print("e_neg.ndim")
    print(e_neg.ndim)
    print("z_n.ndim")
    print(z_n.ndim)




    ##### Reconstruction #####
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax', name='p_t')(p_t)
    r_s = WeightedAspectEmb(args.aspect_size, 2405, name='aspect_emb',
            W_regularizer=ortho_reg)(p_t)

    ##### Loss #####

    print("losssssssssssssssssssssssssssssssssssssssssssssssssssssssssss")

    print(K.int_shape(z_s), K.int_shape(r_s))


    loss = MaxMargin(name='max_margin')([z_s, z_n, r_s])
    model = Model(input=[sentence_input, neg_input], output=loss)







    ### Word embedding and aspect embedding initialization ######
    if args.emb_path:
        from w2vEmbReader import W2VEmbReader as EmbReader
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        logger.info('Initializing word embedding matrix')
        model.get_layer('word_emb').W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').W.get_value()))
        logger.info('Initializing aspect embedding matrix as centroid of kmean clusters')
        model.get_layer('aspect_emb').W.set_value(emb_reader.get_aspect_matrix(args.aspect_size))

    return model
Beispiel #8
0
def create_model(args, maxlen, vocab):
    def ortho_reg(weight_matrix):
        ### orthogonal regularization for aspect embedding matrix ###
        w_n = K.l2_normalize(weight_matrix, axis=-1)  # K表示调用该函数的当前layer
        reg = K.sum(
            K.square(
                K.dot(w_n, K.transpose(w_n)) -
                K.eye(w_n.shape[0].value)))  # 自身矩阵的内积的平方根-自身特征值 = 越小, 越说明分量为0
        return args.ortho_reg * reg  # 这东西越小越好, 因为能保证各个特征分的越开

    vocab_size = len(vocab)

    if args.emb_name:  # 获取已经保存的embedding???
        from w2vEmbReader import W2VEmbReader as EmbReader
        emb_reader = EmbReader(
            os.path.join(
                "/content/drive/My Drive/Attention-Based-Aspect-Extraction-master",
                "preprocessed_data", args.domain), args.emb_name)
        aspect_matrix = emb_reader.get_aspect_matrix(args.aspect_size)
        args.aspect_size = emb_reader.aspect_size
        args.emb_dim = emb_reader.emb_dim

    ##### Inputs #####
    sentence_input = Input(shape=(maxlen, ),
                           dtype='int32',
                           name='sentence_input')
    neg_input = Input(shape=(args.neg_size, maxlen),
                      dtype='int32',
                      name='neg_input')

    ##### Construct word embedding layer #####
    word_emb = Embedding(vocab_size,
                         args.emb_dim,
                         mask_zero=True,
                         name='word_emb',
                         embeddings_constraint=MaxNorm(10))

    ##### Compute sentence representation ##### pre-processing 根据attention组合句子
    e_w = word_emb(sentence_input)  # 将input转换为embedding
    y_s = Average()(e_w)  # 默认求平均 layer
    att_weights = Attention(name='att_weights',
                            W_constraint=MaxNorm(10),
                            b_constraint=MaxNorm(10))([e_w,
                                                       y_s])  # attention layer
    z_s = WeightedSum()([e_w, att_weights])  # encoding layer

    ##### Compute representations of negative instances #####  增加准确性的tricks
    e_neg = word_emb(neg_input)
    z_n = Average()(e_neg)

    ##### Reconstruction ##### 构建dense层, 希望能够decoding attention sentences的特征
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax',
                     name='p_t')(p_t)  # softmax一下, nodes数量不改变, 数值被soft了一下
    r_s = WeightedAspectEmb(args.aspect_size,
                            args.emb_dim,
                            name='aspect_emb',
                            W_constraint=MaxNorm(10),
                            W_regularizer=ortho_reg)(
                                p_t)  # 标准化0-10区间, 且正则项为自定义的ortho_reg

    ##### Loss #####
    loss = MaxMargin(name='max_margin')([z_s, z_n,
                                         r_s])  # 自定义loss function??? 这是在做啥???
    model = Model(inputs=[sentence_input, neg_input],
                  outputs=[loss])  # negative input是需要自己分开数据集的吗??

    ### Word embedding and aspect embedding initialization ######
    if args.emb_name:
        from w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Initializing word embedding matrix')
        embs = model.get_layer('word_emb').embeddings
        K.set_value(
            embs,
            emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(embs)))
        logger.info(
            'Initializing aspect embedding matrix as centroid of kmean clusters'
        )  # 为何初始化要用到kmeans
        K.set_value(model.get_layer('aspect_emb').W, aspect_matrix)  # r-s

    return model
Beispiel #9
0
                   concat_axis=-1)

    densed = Dense(1)(merged)
    score = Activation('sigmoid')(densed)
    model = Model(input=[sequenceQn, sequenceAns], output=score)

    # get the WordEmbedding layer index
    model.emb_index = 0
    model_layer_index = 0
    for test in model.layers:
        if (test.name == 'QnEmbedding' or test.name == 'AnsEmbedding'):
            model.emb_index = model_layer_index
            # Initialize embeddings if requested
            if emb_path:
                logger.info('Initializing lookup table')
                emb_reader = EmbReader(emb_path, emb_dim=emb_dim)
                model.layers[model.emb_index].W.set_value(
                    emb_reader.get_emb_matrix_given_vocab(
                        vocab, model.layers[model.emb_index].W.get_value()))

        model_layer_index += 1

if model_type == 'cnnwang2016':
    logger.info(
        'Building a CNN model (Zhiguo Wang, 2016) with S+,S-,T+,T- as input, and MaxPooling)'
    )
    from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge, pooling

    assert cnn_dim > 0

    cnn_border_mode = 'same'
Beispiel #10
0

	vocab, train_x, test_x, overall_maxlen = dataset.get_data(args.domain, vocab_size=args.vocab_size, maxlen=args.maxlen)
	vocab_inv = {ind:w for w,ind in vocab.items()}

	model = Model(args, maxlen, vocab)
	sen_gen = sentence_batch_generator(trainx, batch_size)
	neg_gen = negative_batch_generator(trainx, batch_size, args.neg_size)

	batches_per_epoch = 1000
	with tf.Session() as sess:
		sess.run(tf.global_variables_initializer())
		saver = tf.train.Saver(tf.global_variables())
		if args.emb_path:
			from w2vEmbReader import W2VEmbReader as EmbReader 
			emb_reader = EmbReader(args.emb_path, emb_dim = args.emb_dim)
			word_emb = sess.run(model.word_emb)
			sess.run(tf.assign(model.word_emb, emb_reader.get_emb_matrix_given_vocab(vocab, word_emb)))
			sess.run(tf.assign(model.aspect_emb, emb_reader.get_aspect_matrix(args.aspect_size)))
		checkpoint_dir = './ckpt/'
		min_loss = float('inf')
		
		for ii in range(args.epochs):
			loss,max_margin_loss = 0.,0.






			for b in range(batches_per_epoch):
Beispiel #11
0
def create_model(args, initial_mean_value, overal_maxlen, vocab):
	
	import keras.backend as K
	from keras.layers.embeddings import Embedding
	from keras.models import Sequential, Model
	from keras.layers.core import Dense, Dropout, Activation
	from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking
	
	###############################################################################################################################
	## Recurrence unit type
	#

	if args.recurrent_unit == 'lstm':
		from keras.layers.recurrent import LSTM as RNN
	elif args.recurrent_unit == 'gru':
		from keras.layers.recurrent import GRU as RNN
	elif args.recurrent_unit == 'simple':
		from keras.layers.recurrent import SimpleRNN as RNN

	###############################################################################################################################
	## Create Model
	#
	
	dropout_W = 0.5		# default=0.5
	dropout_U = 0.1		# default=0.1
	cnn_border_mode='same'
	if initial_mean_value.ndim == 0:
		initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
	num_outputs = len(initial_mean_value)
	
	if args.model_type == 'cls':
		raise NotImplementedError
	
	elif args.model_type == 'reg':
		logger.info('Building a REGRESSION model')
		model = Sequential()
		model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
		if args.cnn_dim > 0:
			model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1))
		if args.rnn_dim > 0:
			model.add(RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U))
		if args.dropout_prob > 0:
			model.add(Dropout(args.dropout_prob))
		model.add(Dense(num_outputs))
		if not args.skip_init_bias:
			bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
			model.layers[-1].bias = bias_value
		model.add(Activation('sigmoid'))
		model.emb_index = 0
	
	elif args.model_type == 'regp':
		logger.info('Building a REGRESSION model with POOLING')
		model = Sequential()
		model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
		if args.cnn_dim > 0:
			model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1))
		if args.rnn_dim > 0:
			model.add(RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U))
		if args.dropout_prob > 0:
			model.add(Dropout(args.dropout_prob))
		if args.aggregation == 'mot':
			model.add(MeanOverTime(mask_zero=True))
		elif args.aggregation.startswith('att'):
			model.add(Attention(op=args.aggregation, activation='tanh', init_stdev=0.01))
		model.add(Dense(num_outputs))
		if not args.skip_init_bias:
			bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
			model.layers[-1].bias = bias_value
		model.add(Activation('sigmoid'))
		model.emb_index = 0

	elif args.model_type == 'breg':
		logger.info('Building a BIDIRECTIONAL REGRESSION model')
		from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
		model = Sequential()
		sequence = Input(shape=(overal_maxlen,), dtype='int32')
		output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence)
		if args.cnn_dim > 0:
			output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output)
		if args.rnn_dim > 0:
			forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output)
			backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output)
		if args.dropout_prob > 0:
			forwards = Dropout(args.dropout_prob)(forwards)
			backwards = Dropout(args.dropout_prob)(backwards)
		merged = merge([forwards, backwards], mode='concat', concat_axis=-1)
		densed = Dense(num_outputs)(merged)
		if not args.skip_init_bias:
			raise NotImplementedError
		score = Activation('sigmoid')(densed)
		model = Model(input=sequence, output=score)
		model.emb_index = 1
	
	elif args.model_type == 'bregp':
		logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING')
		from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
		model = Sequential()
		sequence = Input(shape=(overal_maxlen,), dtype='int32')
		output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence)
		if args.cnn_dim > 0:
			output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output)
		if args.rnn_dim > 0:
			forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output)
			backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output)
		if args.dropout_prob > 0:
			forwards = Dropout(args.dropout_prob)(forwards)
			backwards = Dropout(args.dropout_prob)(backwards)
		forwards_mean = MeanOverTime(mask_zero=True)(forwards)
		backwards_mean = MeanOverTime(mask_zero=True)(backwards)
		merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1)
		densed = Dense(num_outputs)(merged)
		if not args.skip_init_bias:
			raise NotImplementedError
		score = Activation('sigmoid')(densed)
		model = Model(input=sequence, output=score)
		model.emb_index = 1
	
	logger.info('  Done')
	
	###############################################################################################################################
	## Initialize embeddings if requested
	#

	if args.emb_path:
		from w2vEmbReader import W2VEmbReader as EmbReader
		logger.info('Initializing lookup table')
		emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
		model.layers[model.emb_index].set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].get_weights()))
		logger.info('  Done')
	
	return model
def create_model(args, vocab, num_outputs, overal_maxlen, maxlen_aspect):
    
    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    dropout = args.dropout_W       
    recurrent_dropout = args.dropout_U  
    vocab_size = len(vocab)

    logger.info('Building a LSTM attention model to predict term/aspect sentiment')
    print '\n\n'

    ##### Inputs #####
    sentence_input = Input(shape=(overal_maxlen,), dtype='int32', name='sentence_input')
    aspect_input = Input(shape=(maxlen_aspect,), dtype='int32', name='aspect_input')
    pretrain_input = Input(shape=(None,), dtype='int32', name='pretrain_input')

    ##### construct word embedding layer #####
    word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb')

    ### represent aspect as averaged word embedding ###
    print 'use average term embs as aspect embedding'
    aspect_term_embs = word_emb(aspect_input)
    aspect_embs = Average(mask_zero=True, name='aspect_emb')(aspect_term_embs)

    ### sentence representation ###
    sentence_output = word_emb(sentence_input)
    pretrain_output = word_emb(pretrain_input)


    print 'use a rnn layer'
    rnn = RNN(args.rnn_dim, return_sequences=True, dropout=dropout, recurrent_dropout=recurrent_dropout, name='lstm')
    sentence_output = rnn(sentence_output)
    pretrain_output = rnn(pretrain_output)

    print 'use content attention to get term weights'
    att_weights = Attention(name='att_weights')([sentence_output, aspect_embs])
    sentence_output = WeightedSum()([sentence_output, att_weights])

    pretrain_output = Average(mask_zero=True)(pretrain_output)
  
    if args.dropout_prob > 0:
        print 'use dropout layer'
        sentence_output = Dropout(args.dropout_prob)(sentence_output)
        pretrain_output = Dropout(args.dropout_prob)(pretrain_output)


    sentence_output = Dense(num_outputs, name='dense_1')(sentence_output)
    pretrain_output = Dense(num_outputs, name='dense_2')(pretrain_output)

    aspect_probs = Activation('softmax', name='aspect_model')(sentence_output)
    doc_probs = Activation('softmax', name='pretrain_model')(pretrain_output)

    model = Model(inputs=[sentence_input, aspect_input, pretrain_input], outputs=[aspect_probs, doc_probs])


    logger.info('  Done')

    ###############################################################################################################################
    ## Initialize embeddings if requested
    #

    if args.is_pretrain:

        import pickle

        print 'Set embedding, lstm, and dense weights from pre-trained models'
        if args.domain == 'lt':
            f_1 = open('../pretrained_weights/lstm_weights_lt%.1f.pkl'%(args.percetage), 'rb')
            f_2 = open('../pretrained_weights/dense_weights_lt%.1f.pkl'%(args.percetage), 'rb')
        else:
            f_1 = open('../pretrained_weights/lstm_weights_res%.1f.pkl'%(args.percetage), 'rb')
            f_2 = open('../pretrained_weights/dense_weights_res%.1f.pkl'%(args.percetage), 'rb')

        lstm_weights = pickle.load(f_1)
        dense_weights = pickle.load(f_2)
      
        model.get_layer('lstm').set_weights(lstm_weights)
        model.get_layer('dense_1').set_weights(dense_weights)
        model.get_layer('dense_2').set_weights(dense_weights)


    from w2vEmbReader import W2VEmbReader as EmbReader
    logger.info('Initializing lookup table')
    emb_path = '../glove/%s.txt'%(args.domain)
    emb_reader = EmbReader(args, emb_path)
    model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights()))
    logger.info('  Done')

    return model
Beispiel #13
0
def create_model(args, initial_mean_value, overal_maxlen, vocab):

    import keras.backend as K
    from keras.layers.embeddings import Embedding
    from keras.models import Sequential, Model
    from keras.layers.core import Dense, Dropout, Activation
    from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking

    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    dropout_W = 0.5  # default=0.5
    dropout_U = 0.1  # default=0.1
    cnn_border_mode = 'same'
    if initial_mean_value.ndim == 0:  #expand the dims
        initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
    num_outputs = len(initial_mean_value)  #预测的分数种类数

    if args.model_type == 'cls':
        raise NotImplementedError

    #embedding-->cnn-->rnn(return_sequence=false)-->dropout-->dense-->sigmoid
    elif args.model_type == 'reg':
        logger.info('Building a REGRESSION model')
        model = Sequential()
        #确定是否将输入中的‘0’看作是应该被忽略的‘填充’(padding)值设置为True的话,模型中后续的层必须都支持masking,否则会抛出异常。
        #如果该值为True,则下标0在字典中不可用,input_dim应设置为|vocabulary| + 1
        #此处,input层省略是因为input_length有默认值
        model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
        if args.cnn_dim > 0:  #border_mode==padding?? subsample_length==pooling?? where is the activation??
            model.add(
                Conv1DWithMasking(nb_filter=args.cnn_dim,
                                  filter_length=args.cnn_window_size,
                                  border_mode=cnn_border_mode,
                                  subsample_length=1))
        if args.rnn_dim > 0:  #return_sequence 只返回最后一个 state
            model.add(
                RNN(args.rnn_dim,
                    return_sequences=False,
                    dropout_W=dropout_W,
                    dropout_U=dropout_U))
        if args.dropout_prob > 0:
            model.add(Dropout(args.dropout_prob))
        model.add(Dense(num_outputs))
        if not args.skip_init_bias:  #初始化最后一层layer的bias
            bias_value = (np.log(initial_mean_value) -
                          np.log(1 - initial_mean_value)).astype(K.floatx())
            model.layers[-1].b.set_value(bias_value)
        model.add(Activation('sigmoid'))  #输出区间为(0,1)
        #设置model的embed层的序号,方便后续用预训练词向量的初始化,model的所有层都存在  model.layers 里
        model.emb_index = 0

    #embedding-->cnn-->rnn(return_sequence=true)-->dropout-->MeanoverTime or Attention(mean or sum)-->Dense-->sigmoid
    elif args.model_type == 'regp':
        logger.info('Building a REGRESSION model with POOLING')
        model = Sequential()
        model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
        if args.cnn_dim > 0:
            model.add(
                Conv1DWithMasking(nb_filter=args.cnn_dim,
                                  filter_length=args.cnn_window_size,
                                  border_mode=cnn_border_mode,
                                  subsample_length=1))
        if args.rnn_dim > 0:
            model.add(
                RNN(args.rnn_dim,
                    return_sequences=True,
                    dropout_W=dropout_W,
                    dropout_U=dropout_U))
        if args.dropout_prob > 0:
            model.add(Dropout(args.dropout_prob))
        if args.aggregation == 'mot':
            model.add(MeanOverTime(mask_zero=True))
        elif args.aggregation.startswith('att'):
            model.add(
                Attention(op=args.aggregation,
                          activation='tanh',
                          init_stdev=0.01))
        model.add(Dense(num_outputs))
        if not args.skip_init_bias:
            bias_value = (np.log(initial_mean_value) -
                          np.log(1 - initial_mean_value)).astype(K.floatx())
            model.layers[-1].b.set_value(bias_value)
        model.add(Activation('sigmoid'))
        model.emb_index = 0
    #embedding-->cnn-->birnn(return_sequence=false)-->dropout-->merge(concat the forRnn&backRnn)-->dense-->sigmoid
    elif args.model_type == 'breg':
        logger.info('Building a BIDIRECTIONAL REGRESSION model')
        from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
        model = Sequential()  #这句应该是多余的
        sequence = Input(shape=(overal_maxlen, ), dtype='int32')
        output = Embedding(args.vocab_size, args.emb_dim,
                           mask_zero=True)(sequence)
        if args.cnn_dim > 0:
            output = Conv1DWithMasking(nb_filter=args.cnn_dim,
                                       filter_length=args.cnn_window_size,
                                       border_mode=cnn_border_mode,
                                       subsample_length=1)(output)
        if args.rnn_dim > 0:
            forwards = RNN(args.rnn_dim,
                           return_sequences=False,
                           dropout_W=dropout_W,
                           dropout_U=dropout_U)(output)
            backwards = RNN(args.rnn_dim,
                            return_sequences=False,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U,
                            go_backwards=True)(output)
        if args.dropout_prob > 0:
            forwards = Dropout(args.dropout_prob)(forwards)
            backwards = Dropout(args.dropout_prob)(backwards)
        merged = merge([forwards, backwards], mode='concat', concat_axis=-1)
        densed = Dense(num_outputs)(merged)
        if not args.skip_init_bias:
            raise NotImplementedError
        score = Activation('sigmoid')(densed)
        model = Model(input=sequence, output=score)
        model.emb_index = 1
    #embedding-->cnn-->biRnn(return_sequence=true)-->dropout-->meanOverTime-->merge(concat)-->dense-->sigmoid
    elif args.model_type == 'bregp':
        logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING')
        from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
        model = Sequential()  #多余的
        sequence = Input(shape=(overal_maxlen, ), dtype='int32')
        output = Embedding(args.vocab_size, args.emb_dim,
                           mask_zero=True)(sequence)
        if args.cnn_dim > 0:
            output = Conv1DWithMasking(nb_filter=args.cnn_dim,
                                       filter_length=args.cnn_window_size,
                                       border_mode=cnn_border_mode,
                                       subsample_length=1)(output)
        if args.rnn_dim > 0:
            forwards = RNN(args.rnn_dim,
                           return_sequences=True,
                           dropout_W=dropout_W,
                           dropout_U=dropout_U)(output)
            backwards = RNN(args.rnn_dim,
                            return_sequences=True,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U,
                            go_backwards=True)(output)
        if args.dropout_prob > 0:
            forwards = Dropout(args.dropout_prob)(forwards)
            backwards = Dropout(args.dropout_prob)(backwards)
        forwards_mean = MeanOverTime(mask_zero=True)(forwards)
        backwards_mean = MeanOverTime(mask_zero=True)(backwards)
        merged = merge([forwards_mean, backwards_mean],
                       mode='concat',
                       concat_axis=-1)
        densed = Dense(num_outputs)(merged)
        if not args.skip_init_bias:
            raise NotImplementedError
        score = Activation('sigmoid')(densed)
        model = Model(input=sequence, output=score)
        model.emb_index = 1

    logger.info('  Done')

    ###############################################################################################################################
    ## Initialize embeddings if requested
    #

    if args.emb_path:
        from w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Initializing lookup table')
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        model.layers[model.emb_index].W.set_value(
            emb_reader.get_emb_matrix_given_vocab(
                vocab, model.layers[model.emb_index].W.get_value()))
        logger.info('  Done')

    return model