def create_model(args, initial_mean_value, overal_maxlen, vocab): import keras.backend as K from keras.layers.embeddings import Embedding from keras.models import Sequential, Model from keras.layers.core import Dense, Dropout, Activation from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # dropout_W = 0.5 # default=0.5 dropout_U = 0.1 # default=0.1 cnn_border_mode = 'same' if initial_mean_value.ndim == 0: #expand the dims initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) #预测的分数种类数 if args.model_type == 'cls': raise NotImplementedError #embedding-->cnn-->rnn(return_sequence=false)-->dropout-->dense-->sigmoid elif args.model_type == 'reg': logger.info('Building a REGRESSION model') model = Sequential() #确定是否将输入中的‘0’看作是应该被忽略的‘填充’(padding)值设置为True的话,模型中后续的层必须都支持masking,否则会抛出异常。 #如果该值为True,则下标0在字典中不可用,input_dim应设置为|vocabulary| + 1 #此处,input层省略是因为input_length有默认值 model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: #border_mode==padding?? subsample_length==pooling?? where is the activation?? model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: #return_sequence 只返回最后一个 state model.add( RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) model.add(Dense(num_outputs)) if not args.skip_init_bias: #初始化最后一层layer的bias bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) model.add(Activation('sigmoid')) #输出区间为(0,1) #设置model的embed层的序号,方便后续用预训练词向量的初始化,model的所有层都存在 model.layers 里 model.emb_index = 0 #embedding-->cnn-->rnn(return_sequence=true)-->dropout-->MeanoverTime or Attention(mean or sum)-->Dense-->sigmoid elif args.model_type == 'regp': logger.info('Building a REGRESSION model with POOLING') model = Sequential() model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add( RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) if args.aggregation == 'mot': model.add(MeanOverTime(mask_zero=True)) elif args.aggregation.startswith('att'): model.add( Attention(op=args.aggregation, activation='tanh', init_stdev=0.01)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) model.add(Activation('sigmoid')) model.emb_index = 0 #embedding-->cnn-->birnn(return_sequence=false)-->dropout-->merge(concat the forRnn&backRnn)-->dense-->sigmoid elif args.model_type == 'breg': logger.info('Building a BIDIRECTIONAL REGRESSION model') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() #这句应该是多余的 sequence = Input(shape=(overal_maxlen, ), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) merged = merge([forwards, backwards], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 #embedding-->cnn-->biRnn(return_sequence=true)-->dropout-->meanOverTime-->merge(concat)-->dense-->sigmoid elif args.model_type == 'bregp': logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() #多余的 sequence = Input(shape=(overal_maxlen, ), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) forwards_mean = MeanOverTime(mask_zero=True)(forwards) backwards_mean = MeanOverTime(mask_zero=True)(backwards) merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 logger.info(' Done') ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) model.layers[model.emb_index].W.set_value( emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].W.get_value())) logger.info(' Done') return model
def create_model(args, maxlen, vocab): def ortho_reg(weight_matrix): ### orthogonal regularization for aspect embedding matrix ### w_n = K.l2_normalize(weight_matrix, axis=-1) reg = K.sum( K.square( K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.get_shape().as_list()[0]))) return args.ortho_reg * reg vocab_size = len(vocab) if args.emb_name: from w2vEmbReader import W2VEmbReader as EmbReader emb_reader = EmbReader( os.path.join("..", "preprocessed_data", args.domain), args.emb_name) aspect_matrix = emb_reader.get_aspect_matrix(args.aspect_size) args.aspect_size = emb_reader.aspect_size args.emb_dim = emb_reader.emb_dim ##### Inputs ##### sentence_input = Input(shape=(maxlen, ), dtype='int32', name='sentence_input') neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input') ##### Construct word embedding layer ##### word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb', embeddings_constraint=MaxNorm(10)) ##### Compute sentence representation ##### e_w = word_emb(sentence_input) y_s = Average()(e_w) att_weights = Attention(name='att_weights', W_constraint=MaxNorm(10), b_constraint=MaxNorm(10))([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) ##### Compute representations of negative instances ##### e_neg = word_emb(neg_input) z_n = Average()(e_neg) ##### Reconstruction ##### p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb', W_constraint=MaxNorm(10), W_regularizer=ortho_reg)(p_t) ##### Loss ##### loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) model = Model(inputs=[sentence_input, neg_input], outputs=[loss]) ### Word embedding and aspect embedding initialization ###### if args.emb_name: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing word embedding matrix') embs = model.get_layer('word_emb').embeddings K.set_value( embs, emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(embs))) logger.info( 'Initializing aspect embedding matrix as centroid of kmean clusters' ) K.set_value(model.get_layer('aspect_emb').W, aspect_matrix) return model
def create_model(args, initial_mean_value, overal_maxlen, vocab): import keras.backend as K from keras.layers.embeddings import Embedding from keras.models import Sequential, Model from keras.layers.core import Dense, Dropout, Activation, Flatten from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # dropout_W = 0.5 # default=0.5 dropout_U = 0.1 # default=0.1 cnn_border_mode = 'same' if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=0) num_outputs = len(initial_mean_value) if args.model_type == 'cls': raise NotImplementedError elif args.model_type == 'reg': logger.info('Building a REGRESSION model') model = Sequential() model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add( RNN(args.rnn_dim, return_sequences=False, dropout=dropout_W, recurrent_dropout=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) model.add(Activation('sigmoid')) model.emb_index = 0 elif args.model_type == 'regp': logger.info('Building a REGRESSION model with POOLING') model = Sequential() model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add( RNN(args.rnn_dim, return_sequences=True, dropout=dropout_W, recurrent_dropout=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) if args.aggregation == 'mot': model.add(MeanOverTime(mask_zero=True)) elif args.aggregation.startswith('att'): model.add( Attention(op=args.aggregation, activation='tanh', init_stdev=0.01)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].bias = bias_value model.add(Activation('sigmoid')) model.emb_index = 0 elif args.model_type == 'breg': logger.info('Building a BIDIRECTIONAL REGRESSION model') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() sequence = Input(shape=(overal_maxlen, ), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=False, dropout=dropout_W, recurrent_dropout=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=False, dropout=dropout_W, recurrent_dropout=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) merged = merge([forwards, backwards], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 elif args.model_type == 'bregp': logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() sequence = Input(shape=(overal_maxlen, ), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=True, dropout=dropout_W, recurrent_dropout=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=True, dropout=dropout_W, recurrent_dropout=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) forwards_mean = MeanOverTime(mask_zero=True)(forwards) backwards_mean = MeanOverTime(mask_zero=True)(backwards) merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 logger.info(' Done') ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) model.layers[model.emb_index].set_weights( emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].get_weights())) logger.info(' Done') return model