def __init__(self, embedding_dim, aspect_embedding_dim, hidden_dim, output_dim, n_layers, embed_weights, at=True, ae=False, dropout=0): #Constructor super().__init__() # ATAE ? self.ae = ae self.at = at self.embedding_dim = embedding_dim # Embedding layer using GloVe or fasttext self.embedding = custom_word_embedding(embed_weights) # Embedding layer using Glove for aspects self.aspects_embedding = custom_word_embedding(embed_weights) # Embedding layer without GloVe # self.embedding = nn.Embedding(emb_mat.shape[0], emb_mat.shape[1]) # LSTM layer and initialization if self.ae: self.lstm = nn.LSTM(embedding_dim * 2, hidden_dim, num_layers=n_layers, bidirectional=False, dropout=dropout, batch_first=True) else: self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=False, dropout=dropout, batch_first=True) for name, param in self.lstm.named_parameters(): if 'bias' in name: nn.init.constant_(param, 0.0) elif 'weight' in name: nn.init.xavier_normal_(param) # Attention layer with initialization if self.at: self.attention = Attention(aspect_embedding_dim, hidden_dim) self.attention.xavier_init() # Final dense layer with initialization self.fc = nn.Linear(embedding_dim, output_dim) nn.init.xavier_normal_(self.fc.weight) #activation function #self.act = nn.Sigmoid() self.act = nn.Softmax(dim=1)
def getAttentionModel(model, foldname, lr, lr_decay): load_path = Result(foldname, find=True).df['model_path'] load_path = load_path.replace( load_path[-16:-12], str(int(load_path[-16:-12]) + 1).rjust(4, '0')) model.load_weights(load_path) layers = {x.name: x for x in model.layers[-5:]} layers while ('flatten' not in model.layers[-1].name): model.layers.pop() merged = Attention(name='att')(model.layers[-1].output) dann_in = layers['grl'](merged) dsc = layers['domain_dense'](dann_in) dsc = layers['domain'](dsc) merged = layers['class_dense'](merged) merged = layers['class'](merged) model = Model(inputs=model.layers[0].input, outputs=[merged, dsc]) for layer in model.layers: if 'flatten' in layer.name: break else: layer.trainable = False opt = SGD(lr=lr, decay=lr_decay) model.compile(optimizer=opt, loss={ 'class': 'categorical_crossentropy', 'domain': 'categorical_crossentropy' }, metrics=['accuracy']) return model
def simgnn(parser): inputA = Input(shape=(None,16)) GinputA = Input(shape=(None,None)) inputB = Input(shape=(None,16)) GinputB = Input(shape=(None,None)) shared_gcn1 = GraphConv(units=parser.filters_1,step_num=3, activation="relu") shared_gcn2 = GraphConv(units=parser.filters_2,step_num=3, activation="relu") shared_gcn3 = GraphConv(units=parser.filters_3,step_num=3, activation="relu") shared_attention = Attention(parser) x = shared_gcn1([inputA, GinputA]) x = shared_gcn2([x, GinputA]) x = shared_gcn3([x, GinputA]) x = shared_attention(x[0]) y = shared_gcn1([inputB, GinputB]) y = shared_gcn2([y, GinputB]) y = shared_gcn3([y, GinputB]) y = shared_attention(y[0]) z = NeuralTensorLayer(output_dim=16, input_dim=16)([x, y]) z = keras.layers.Dense(16, activation="relu")(z) z = keras.layers.Dense(8, activation="relu")(z) z = keras.layers.Dense(4, activation="relu")(z) z = keras.layers.Dense(1)(z) z = keras.activations.sigmoid(z) return Model(inputs=[inputA, GinputA, inputB, GinputB], outputs=z)
def build_discriminator_att(shape, gpus=2): ch = 64 layer_num = 5 input_tensor = Input(shape) x = Conv2D(ch, (3, 3), strides=(2, 2), padding='same')(input_tensor) # 112x112 x = BatchNormalization()(x) x = LeakyReLU(alpha=0.2)(x) x = residual_block(x, output_channels=ch) x = residual_block(x, output_channels=ch * 2, stride=2) x = Attention(ch * 2)(x) ch = ch * 2 for i in range(layer_num): if i == layer_num - 1: x = residual_block(x, output_channels=ch * 2) else: x = residual_block(x, output_channels=ch * 2, stride=2) ch = ch * 2 x = LeakyReLU(alpha=0.2)(x) x = Flatten()(x) output = Dense(1, activation='sigmoid')(x) model = Model(input_tensor, output) return model
def at_lstm(self): input_text = Input(shape=(self.max_len, )) input_aspect = Input(shape=(1, ), ) if self.use_elmo: elmo_embedding = ELMoEmbedding( output_mode=self.config.elmo_output_mode, idx2word=self.config.idx2token, mask_zero=True, hub_url=self.config.elmo_hub_url, elmo_trainable=self.config.elmo_trainable) if self.config.use_elmo_alone: text_embed = SpatialDropout1D(0.2)(elmo_embedding(input_text)) else: word_embedding = Embedding( input_dim=self.text_embeddings.shape[0], output_dim=self.config.word_embed_dim, weights=[self.text_embeddings], trainable=self.config.word_embed_trainable, mask_zero=True) text_embed = SpatialDropout1D(0.2)(concatenate( [word_embedding(input_text), elmo_embedding(input_text)])) else: word_embedding = Embedding( input_dim=self.text_embeddings.shape[0], output_dim=self.config.word_embed_dim, weights=[self.text_embeddings], trainable=self.config.word_embed_trainable, mask_zero=True) text_embed = SpatialDropout1D(0.2)(word_embedding(input_text)) if self.config.aspect_embed_type == 'random': asp_embedding = Embedding(input_dim=self.n_aspect, output_dim=self.config.aspect_embed_dim) else: asp_embedding = Embedding( input_dim=self.aspect_embeddings.shape[0], output_dim=self.config.aspect_embed_dim, trainable=self.config.aspect_embed_trainable) aspect_embed = asp_embedding(input_aspect) aspect_embed = Flatten()(aspect_embed) # reshape to 2d repeat_aspect = RepeatVector(self.max_len)( aspect_embed) # repeat aspect for every word in sequence hidden_vecs = LSTM(self.config.lstm_units, return_sequences=True)( text_embed) # hidden vectors output by lstm concat = concatenate([ hidden_vecs, repeat_aspect ], axis=-1) # mask after concatenate will be same as hidden_out's mask # apply attention mechanism attend_weight = Attention()(concat) attend_weight_expand = Lambda(lambda x: K.expand_dims(x))( attend_weight) attend_hidden = multiply([hidden_vecs, attend_weight_expand]) attend_hidden = Lambda(lambda x: K.sum(x, axis=1))(attend_hidden) return Model([input_text, input_aspect], attend_hidden)
def memnet(self): n_hop = 9 input_text = Input(shape=(self.max_len,)) input_aspect = Input(shape=(1,)) inputs = [input_text, input_aspect] # if self.use_elmo: # elmo_embedding = ELMoEmbedding(output_mode=self.config.elmo_output_mode, idx2word=self.config.idx2token, # mask_zero=True, hub_url=self.config.elmo_hub_url, # elmo_trainable=self.config.elmo_trainable) # if self.config.use_elmo_alone: # text_embed = SpatialDropout1D(0.2)(elmo_embedding(input_text)) # else: # word_embedding = Embedding(input_dim=self.text_embeddings.shape[0], # output_dim=self.config.word_embed_dim, # weights=[self.text_embeddings], trainable=self.config.word_embed_trainable, # mask_zero=True) # text_embed = SpatialDropout1D(0.2)(concatenate([word_embedding(input_text), elmo_embedding(input_text)])) # else: word_embedding = Embedding(input_dim=self.text_embeddings.shape[0], output_dim=self.config.word_embed_dim, weights=[self.text_embeddings], trainable=self.config.word_embed_trainable, mask_zero=True) text_embed = SpatialDropout1D(0.2)(word_embedding(input_text)) if self.config.use_loc_input: # location attention input_loc = Input(shape=(self.max_len,)) inputs.append(input_loc) input_loc_expand = Lambda(lambda x: K.expand_dims(x))(input_loc) text_embed = multiply([text_embed, input_loc_expand]) if self.config.aspect_embed_type == 'random': asp_embedding = Embedding(input_dim=self.n_aspect, output_dim=self.config.aspect_embed_dim) else: asp_embedding = Embedding(input_dim=self.aspect_embeddings.shape[0], output_dim=self.config.aspect_embed_dim, trainable=self.config.aspect_embed_trainable) aspect_embed = asp_embedding(input_aspect) aspect_embed = Flatten()(aspect_embed) # reshape to 2d # the parameter of attention and linear layers are shared in different hops attention_layer = Attention(use_W=False, use_bias=True) linear_layer = Dense(self.config.word_embed_dim) # output from each computation layer, representing text in different level of abstraction computation_layers_out = [aspect_embed] for h in range(n_hop): # content attention layer repeat_out = RepeatVector(self.max_len)(computation_layers_out[-1]) concat = concatenate([text_embed, repeat_out], axis=-1) attend_weight = attention_layer(concat) attend_weight_expand = Lambda(lambda x: K.expand_dims(x))(attend_weight) content_attend = multiply([text_embed, attend_weight_expand]) content_attend = Lambda(lambda x: K.sum(x, axis=1))(content_attend) # linear layer out_linear = linear_layer(computation_layers_out[-1]) computation_layers_out.append(add([content_attend, out_linear])) return Model(inputs, computation_layers_out[-1])
def create_model(args, maxlen, vocab): def ortho_reg(weight_matrix): # orthogonal regularization for aspect embedding matrix w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx()) reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value))) return args.ortho_reg * reg vocab_size = len(vocab) # Inputs sentence_input = Input(shape=(maxlen,), dtype='int32', name='sentence_input') neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input') # Construct word embedding layer word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') # Compute sentence representation e_w = word_emb(sentence_input) y_s = Average()(e_w) att_weights = Attention(name='att_weights')([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) # Compute representations of negative instances e_neg = word_emb(neg_input) z_n = Average()(e_neg) # Reconstruction p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb', W_regularizer=ortho_reg)(p_t) # Loss loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) model = Model(inputs=[sentence_input, neg_input], outputs=loss) # Word embedding and aspect embedding initialization if args.emb_path: emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) logger.info('Initializing word embedding matrix') K.set_value( model.get_layer('word_emb').embeddings, emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(model.get_layer('word_emb').embeddings))) logger.info('Initializing aspect embedding matrix as centroid of kmean clusters') K.set_value( model.get_layer('aspect_emb').W, emb_reader.get_aspect_matrix(args.aspect_size)) return model
def patch_discriminator(shape, gpus=2): init = RandomNormal(stddev=0.02) in_image = Input(shape=shape) d = Conv2D(64, (4, 4), strides=(2, 2), padding='same', kernel_initializer=init)(in_image) d = LeakyReLU(alpha=0.2)(d) d = Conv2D(128, (4, 4), strides=(2, 2), padding='same', kernel_initializer=init)(d) d = LeakyReLU(alpha=0.2)(d) d = Conv2D(256, (4, 4), strides=(2, 2), padding='same', kernel_initializer=init)(d) d = LeakyReLU(alpha=0.2)(d) d = Attention(256)(d) d = Conv2D(512, (4, 4), strides=(2, 2), padding='same', kernel_initializer=init)(d) d = LeakyReLU(alpha=0.2)(d) d = Conv2D(512, (4, 4), padding='same', kernel_initializer=init)(d) x = LeakyReLU(alpha=0.2)(d) x = Attention(512)(d) output = Conv2D(1, (4, 4), padding='same', activation='sigmoid', kernel_initializer=init)(d) with tf.device('/cpu:0'): model = Model(in_image, output) model = multi_gpu_model(model, gpus=2) return model
def model_lstm_atten(embedding_matrix, maxlen, max_features, embed_size): inp = Input(shape=(maxlen, )) x = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)(inp) x = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x) x = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x) x = Attention(maxlen)(x) x = Dense(128, activation="relu")(x) x = Dense(1, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def atae_lstm(self): input_text = Input(shape=(self.max_len,)) input_aspect = Input(shape=(1,), ) if self.use_elmo: elmo_embedding = ELMoEmbedding(output_mode=self.config.elmo_output_mode, idx2word=self.config.idx2token, mask_zero=True, hub_url=self.config.elmo_hub_url, elmo_trainable=self.config.elmo_trainable) if self.config.use_elmo_alone: text_embed = SpatialDropout1D(0.2)(elmo_embedding(input_text)) else: word_embedding = Embedding(input_dim=self.text_embeddings.shape[0], output_dim=self.config.word_embed_dim, weights=[self.text_embeddings], trainable=self.config.word_embed_trainable, mask_zero=True) text_embed = SpatialDropout1D(0.2)(concatenate([word_embedding(input_text), elmo_embedding(input_text)])) else: word_embedding = Embedding(input_dim=self.text_embeddings.shape[0], output_dim=self.config.word_embed_dim, weights=[self.text_embeddings], trainable=self.config.word_embed_trainable, mask_zero=True) text_embed = SpatialDropout1D(0.2)(word_embedding(input_text)) if self.config.aspect_embed_type == 'random': asp_embedding = Embedding(input_dim=self.n_aspect, output_dim=self.config.aspect_embed_dim) else: asp_embedding = Embedding(input_dim=self.aspect_embeddings.shape[0], output_dim=self.config.aspect_embed_dim, trainable=self.config.aspect_embed_trainable) aspect_embed = asp_embedding(input_aspect) aspect_embed = Flatten()(aspect_embed) # reshape to 2d repeat_aspect = RepeatVector(self.max_len)(aspect_embed) # repeat aspect for every word in sequence input_concat = concatenate([text_embed, repeat_aspect], axis=-1) hidden_vecs, state_h, _ = LSTM(self.config.lstm_units, return_sequences=True, return_state=True)(input_concat) concat = concatenate([hidden_vecs, repeat_aspect], axis=-1) # apply attention mechanism attend_weight = Attention()(concat) attend_weight_expand = Lambda(lambda x: K.expand_dims(x))(attend_weight) attend_hidden = multiply([hidden_vecs, attend_weight_expand]) attend_hidden = Lambda(lambda x: K.sum(x, axis=1))(attend_hidden) attend_hidden_dense = Dense(self.config.lstm_units)(attend_hidden) last_hidden_dense = Dense(self.config.lstm_units)(state_h) final_output = Activation('tanh')(add([attend_hidden_dense, last_hidden_dense])) return Model([input_text, input_aspect], final_output)
def atae_lstm_new(self): input_content = Input(shape=(self.max_len, )) input_aspect = Input(shape=(self.aspect_max_len, )) ###先将每个字进行embed,然后将aspect进行embed,然后根据content中字的个数重复,然后重复后的每个aspect embedding跟每个字的进行串联 content_embed = Embedding(input_dim=self.max_content_vocab_size, output_dim=self.content_embed_dim) aspect_embed = Embedding(input_dim=self.max_content_vocab_size, output_dim=self.aspect_embed_dim) content_embedding = content_embed(input_content) content_embedding = SpatialDropout1D(0.2)(content_embedding) aspect_embedding = aspect_embed(input_aspect) ##对aspect的字符串向量进行一个pooling 60*128=>1*128 aspect_embedding = AveragePooling1D( pool_size=self.aspect_max_len)(aspect_embedding) aspect_flatten = Flatten()(aspect_embedding) repeat_aspect_embedding = RepeatVector(self.max_len)(aspect_flatten) ##将重复后的aspect和content字进行串联 input_concat = concatenate( [content_embedding, repeat_aspect_embedding], axis=-1) ##再加个LSTM if (self.is_cudnn): hidden_vecs, state_h, _ = CuDNNLSTM( self.lstm_units, return_sequences=True, return_state=True)(input_concat) else: hidden_vecs, state_h, _ = LSTM(self.lstm_units, return_sequences=True, return_state=True)(input_concat) concat = concatenate([hidden_vecs, repeat_aspect_embedding], axis=-1) # apply attention mechanism attend_weight = Attention()(concat) attend_weight_expand = Lambda(lambda x: K.expand_dims(x))( attend_weight) attend_hidden = multiply([hidden_vecs, attend_weight_expand]) attend_hidden = Lambda(lambda x: K.sum(x, axis=1))(attend_hidden) attend_hidden_dense = Dense(self.lstm_units)(attend_hidden) last_hidden_dense = Dense(self.lstm_units)(state_h) final_output = Activation('tanh')(add( [attend_hidden_dense, last_hidden_dense])) dense_layer = Dense(self.dense_units, activation='relu')(final_output) output_layer = Dense(self.n_classes, activation='softmax')(dense_layer) return Model([input_content, input_aspect], output_layer)
def build_model(args, embeddings, emb_dim, vocab_size, max_len, words): if(args.wordratings): dense_layer = get_word_classification(args.wordratings) else: dense_layer = Dense(120, activation="relu") # Embedding layer #embeddings_matrix = np.zeros((vocab_size+1, emb_dim)) embeddings_matrix = np.random.rand(vocab_size + 1, emb_dim) embeddings_matrix[0] *= 0 for index, word in enumerate(words, start=1): try: embedding_vector = embeddings[word] embeddings_matrix[index] = embedding_vector except: print("Not found embedding for: <{0}>".format(word)) input_layer = Input(shape=(max_len,)) embedding_layer = Embedding(embeddings_matrix.shape[0], embeddings_matrix.shape[1], weights = [embeddings_matrix], mask_zero=True, trainable=False)(input_layer) layer1 = TimeDistributed(dense_layer)(embedding_layer) # Recurrent layer. Either LSTM or GRU if(args.rnn == "LSTM"): rnn_layer = LSTM(units=64, return_sequences=(args.attention or args.maxpooling)) else: rnn_layer = GRU(units=64, return_sequences=(args.attention or args.maxpooling)) rnn = Bidirectional(rnn_layer)(layer1) # Max Pooling and attention if(args.maxpooling and args.attention): max_pooling = GlobalMaxPooling1DMasked()(rnn) con = TimeDistributed(Dense(100))(rnn) attention = Attention()(con) connection = concatenate([max_pooling, attention]) elif(args.maxpooling): max_pooling = GlobalMaxPooling1DMasked() connection = max_pooling(rnn) elif(args.attention): con = TimeDistributed(Dense(100))(rnn) attention = Attention() connection = attention(con) else: connection = rnn connection = Dropout(0.2)(connection) valence_output = Dense(1, activation="sigmoid", name="valence_output")(connection) arousal_output = Dense(1, activation="sigmoid", name="arousal_output")(connection) # Build Model model = Model(inputs=[input_layer], outputs=[valence_output, arousal_output]) return model
class AT_LSTM(nn.Module): #define all the layers used in model def __init__(self, embedding_dim, aspect_embedding_dim, hidden_dim, output_dim, n_layers, embed_weights, at=True, ae=False, dropout=0): #Constructor super().__init__() # ATAE ? self.ae = ae self.at = at self.embedding_dim = embedding_dim # Embedding layer using GloVe or fasttext self.embedding = custom_word_embedding(embed_weights) # Embedding layer using Glove for aspects self.aspects_embedding = custom_word_embedding(embed_weights) # Embedding layer without GloVe # self.embedding = nn.Embedding(emb_mat.shape[0], emb_mat.shape[1]) # LSTM layer and initialization if self.ae: self.lstm = nn.LSTM(embedding_dim * 2, hidden_dim, num_layers=n_layers, bidirectional=False, dropout=dropout, batch_first=True) else: self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=False, dropout=dropout, batch_first=True) for name, param in self.lstm.named_parameters(): if 'bias' in name: nn.init.constant_(param, 0.0) elif 'weight' in name: nn.init.xavier_normal_(param) # Attention layer with initialization if self.at: self.attention = Attention(aspect_embedding_dim, hidden_dim) self.attention.xavier_init() # Final dense layer with initialization self.fc = nn.Linear(embedding_dim, output_dim) nn.init.xavier_normal_(self.fc.weight) #activation function #self.act = nn.Sigmoid() self.act = nn.Softmax(dim=1) def forward(self, inp, text_lengths=None): text = inp[0].view(inp[0].size()[1], -1) # Remove the useless 1st axis #text = [batch_size, sent_length] categories = inp[1].view( inp[1].size()[1]).long() #categories = [batch_size] embedded = self.embedding(text.long()) # ATAE if self.ae: embedded_input_aspect = self.aspects_embedding(categories) embedded_input_aspect = embedded_input_aspect.view( embedded_input_aspect.size()[0], 1, self.embedding_dim) embedded_input_aspect = embedded_input_aspect.repeat( 1, embedded.size()[1], 1) embedded = torch.cat((embedded, embedded_input_aspect), -1) #packed sequence #packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths,batch_first=True) #si = embedded.size() #embedded = embedded.view(si[1],si[2],si[3]) embedded = embedded.float().cuda() packed_output, (hidden, cell) = self.lstm(embedded) #packed_output = [batch_size, sent_length, hid_dim] #hidden = [batch size, num layers * num directions,hid dim] #cell = [batch size, num layers * num directions,hid dim] embedded_aspects = self.aspects_embedding(categories) embedded_aspects = embedded_aspects.float().cuda() #embedded_aspects = [batch_size, aspect_embedding_dim] if self.at: final_hidden = self.attention(embedded, embedded_aspects, packed_output) else: final_hidden = hidden #hidden = [batch size, hid dim * num directions] dense_outputs = self.fc(final_hidden) #Final activation function outputs = self.act(dense_outputs) return outputs