def __init__(self, wordEmbedding, hidden_size, self_att_hidden, n_hops, paddingId, updateEmbedding, dropout=None): super(Dense_Self_Attention, self).__init__() embeddingSize = wordEmbedding.getEmbeddingSize() self.embedding = nn.Embedding( wordEmbedding.getNumberOfVectors(), embeddingSize, padding_idx=wordEmbedding.getPaddingIdx()) self.embedding.weight.data.copy_( torch.from_numpy(wordEmbedding.getEmbeddingMatrix())) self.embedding.weight.requires_grad = updateEmbedding self.dense = nn.Linear(embeddingSize, embeddingSize) if hidden_size else None self.self_attention = SelfAttention(embeddingSize, self_att_hidden, n_hops) self.paddingId = paddingId self.output_size = self.self_attention.getOutputSize() self.dropout = nn.Dropout(dropout) if dropout else None
class Dense_Self_Attention(nn.Module): def __init__(self, wordEmbedding, hidden_size, self_att_hidden, n_hops, paddingId, updateEmbedding, dropout=None): super(Dense_Self_Attention, self).__init__() embeddingSize = wordEmbedding.getEmbeddingSize() self.embedding = nn.Embedding( wordEmbedding.getNumberOfVectors(), embeddingSize, padding_idx=wordEmbedding.getPaddingIdx()) self.embedding.weight.data.copy_( torch.from_numpy(wordEmbedding.getEmbeddingMatrix())) self.embedding.weight.requires_grad = updateEmbedding self.dense = nn.Linear(embeddingSize, embeddingSize) if hidden_size else None self.self_attention = SelfAttention(embeddingSize, self_att_hidden, n_hops) self.paddingId = paddingId self.output_size = self.self_attention.getOutputSize() self.dropout = nn.Dropout(dropout) if dropout else None def forward(self, x): mask = (x != self.paddingId).float() x = self.embedding(x) if self.dense is not None: res = F.relu(self.dense(x)) * mask.unsqueeze(2) x = x + res x, att = self.self_attention(x, mask) x = x.view(x.size(0), self.output_size) if self.dropout: self.dropout(x) return x def getOutputSize(self): return self.self_attention.getOutputSize()
def create_model(): token_ids=tf.placeholder(tf.int32,shape=(None,None),name="token_ids") score_matrix=tf.placeholder(tf.float32,shape=(None,None,None,n_type),name="score_matrix") max_seq_len=tf.placeholder(tf.int32,name="max_seq_len") mask=tf.cast(tf.greater(tf.expand_dims(token_ids,2),0),dtype=tf.float32) embedding=tf.get_variable(name="embedding",initializer=tf.cast(vec,tf.float32)) # embedding=tf.get_variable(name="embedding",dtype=tf.float32,shape=[char_size,100],initializer=tf.random_normal_initializer) inp_embed=tf.nn.embedding_lookup(embedding,token_ids) inp=inp_embed*mask inp=Bidirectional(LSTM(units=256,return_sequences=True))(inp) #GPU上使用CuDNNLSTM加速 CPU上使用LSTM inp=Bidirectional(LSTM(units=256,return_sequences=True))(inp) #ATT+BN self_attention=SelfAttention(d_model=512) lstm_output=self_attention(token_ids,inp) lstm_output=tf.nn.batch_normalization(lstm_output,mean=0.0,variance=3.0,offset=None,scale=1.0,variance_epsilon=0.01,name="BN1") # lstm_output=inp left=Conv1D(filters=64,kernel_size=3,activation="relu",padding="same")(lstm_output) right=Conv1D(filters=64,kernel_size=3,activation="relu",padding="same")(lstm_output) out=broadcasting(left,right) out = tf.nn.batch_normalization(out, mean=0.0, variance=3.0, offset=None, scale=1.0, variance_epsilon=0.01,name="BN2") pred_score=Dense(units=50,activation="softmax")(out) loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred_score,labels=score_matrix)) optimizer=tf.train.AdamOptimizer(learning_rate=0.001) train_op=optimizer.minimize(loss) return token_ids,score_matrix,max_seq_len,pred_score,loss,train_op
def __call__(self, x): for depth in range(4): filters=64*(depth+1) # add 2 convolutional layers for cv in range(2): x=Conv2D(filters=filters, kernel_size=3, padding='same', data_format='channels_last', kernel_initializer='glorot_uniform')(x) x=BatchNormalization(axis=-1)(x) x=Activation('relu')(x) x=MaxPooling2D(pool_size=(2,2), data_format='channels_last')(x) x=Dropout(rate=self.dropout)(x) if self.attention == True: x=SelfAttention(x, output_filters=256) #x=MultiHeadAttention(head_num)(x) #x=BatchNormalization(axis=-1)(x) #x=GaussianNoise(0.3)(x) # global max pooling 2D x=GlobalAveragePooling2D(data_format='channels_last')(x) # full connection output=Dense(units=ASC_CLASS, kernel_initializer='uniform', activation='softmax')(x) return output
def __init__(self, interface): super().__init__(interface) self.num_select_actions = self.interface.num_unit_selection_actions self.rnn_size = 256 self.self_attention = SelfAttention(hidden_size=128, num_heads=2, attention_dropout=0, train=True) self.lstm = tf.contrib.rnn.LSTMCell(self.rnn_size) self.memory_input = tf.placeholder(tf.float32, [2, None, self.rnn_size], name="memory_input") self.unit_embeddings_input = tf.placeholder(tf.float32, [None, None, self.interface.unit_embedding_size], name="unit_embeddings_input") self.unit_selection_input = tf.placeholder(tf.int32, [None], name="unit_selection_input") # TODO: Add in previous action index as an input self.prev_action_input = tf.placeholder(tf.int32, [None], name='prev_action_input') self.features = self.features() # Shape [batch_size, num_features] lstm_output, self.next_lstm_state = self._lstm_step() self.train_output = self._lstm_step_train() self.all_values = parts.value_head(self.train_output) self.nonspacial_probs, self.spacial_probs_x, self.spacial_probs_y = self._probs_from_features(lstm_output) self.nonspacial_train, self.spacial_train_x, self.spacial_train_y = \ self._probs_from_features(self.train_output[:-1]) self.unit_selection_probs = self._selection_probs_from_features(lstm_output, self.unit_embeddings_input) self._f1 = lstm_output self.unit_selection_probs_train = self._selection_probs_from_features(self.train_output[:-1], self.unit_embeddings_input[:-1])
def __init__(self, embed_size, heads, forward_expansion, dropout): super().__init__() self.attention = SelfAttention(embed_size, heads) self.norm = nn.LayerNorm(embed_size) self.transformer_block = TransformerBlock( embed_size, heads, dropout, forward_expansion ) self.dropout = nn.Dropout(dropout)
def build_baseline0_newatt(dataset, num_hid): w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) if not dataset.bert: q_att = SelfAttention(q_emb.num_hid, num_hid) v_att = NewAttention(dataset.v_dim + 2, q_emb.num_hid, num_hid) q_net = FCNet([q_emb.num_hid, num_hid]) else: q_att = SelfAttention(768, num_hid) q_emb = FCNet([768, 768]) v_att = NewAttention(dataset.v_dim, 768, num_hid) q_net = FCNet([768, num_hid]) v_net = FCNet([dataset.v_dim + 2, num_hid]) classifier = SimpleClassifier(num_hid, num_hid * 2, dataset.num_ans_candidates, 0.5) return BaseModel(w_emb, q_emb, q_att, v_att, q_net, v_net, classifier, dataset.bert)
def train_lstm(p_n_symbols, p_embedding_weights, p_X_train, p_y_train, p_X_test, p_y_test,vocab_dim,input_length,batch_size,n_epoch): """ 模型建置 """ print ('build model...') model = Sequential() model.add(Embedding(output_dim=vocab_dim, input_dim=p_n_symbols, mask_zero=True, weights=[p_embedding_weights], input_length=input_length)) model.add(Dropout(0.5)) model.add(LSTM(output_dim=50)) model.add(Dropout(0.5)) model.add(SelfAttention()) model.add(Dense(32,activation='tanh')) model.add(Dense(3,activation='softmax')) print ('processing...') #調整學習率 adam=keras.optimizers.Adam(lr=0.00003) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy', precision, recall, f1_score]) print ("training...") history=model.fit(p_X_train, p_y_train, batch_size=batch_size, nb_epoch=n_epoch, validation_data=(p_X_test, p_y_test)) #透過matplot繪圖顯示訓練過程 print ("counting...") score, acc,prec,re,f1 = model.evaluate(p_X_test, p_y_test, batch_size=batch_size) print ('Test score:', score) print ('Test accuracy:', acc) print ('Test precision:', prec) print ('Test recall:', re) print ('Test f1-score:', f1) plt.subplot(211) plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='best') plt.subplot(212) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='best') plt.show() plt.savefig('lstm') #儲存模型 model.save('lstm.h5') print('model is saved')
def __init__(self, h=8, d_model=512, d_ff=2048, drop_rate=0.1): super(DecoderLayer, self).__init__() # Self Attention Layer # query key and value come from previous layer. self.self_attn = SelfAttention(h, d_model, drop_rate) # Source Target Attention Layer # query come from encoded space. # key and value come from previous self attention layer self.st_attn = SourceTargetAttention(h, d_model, drop_rate) self.ff = FFN(d_model, d_ff)
def aggregate_feature(model_part, d_model, seq_len): if model_part == "Attention": attn = SelfAttention(d_model, d_model) output_size = d_model elif model_part == "Flatten": attn = None output_size = d_model * seq_len else: raise ValueError("Unsupported model choice: "+model_part+ ". Please use \"Attention\" or \"Flatten\".") return attn, output_size
def __init__(self, embed_size, heads, dropout, forward_expansion): super().__init__() self.attention = SelfAttention(embed_size, heads) self.norm1 = nn.LayerNorm(embed_size) self.norm2 = nn.LayerNorm(embed_size) self.feed_forward = nn.Sequential( nn.Linear(embed_size, forward_expansion * embed_size), nn.ReLU(), nn.Linear(forward_expansion * embed_size, embed_size), ) self.dropout = nn.Dropout(dropout)
def __init__(self, config, lossfct=None, CEL_type='mean' ,quick_return=False): super().__init__(config) self.num_labels = config.num_labels self.lossfct = lossfct self.roberta = RobertaModel(config) self.attn = SelfAttention(config.hidden_size) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.CEL_type = CEL_type self.quick_return = quick_return self.init_weights()
def __init__(self, config, lossfct=None, CEL_type='mean', quick_return=False): super().__init__(config) self.num_labels = config.num_labels self.lossfct = lossfct self.transformer = XLNetModel(config) self.attn = SelfAttention(config.hidden_size) #self.sequence_summary = SequenceSummary(config) #self.dropout = nn.Dropout(0.1) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.CEL_type = CEL_type self.quick_return = quick_return self.init_weights()
def build_generator(self): def create_conv_transp(filters): return Conv2DTranspose(filters, 5, padding="SAME", activation=None, use_bias=False, strides=2) model = Sequential() model.add( keras.layers.Dense(self.gf_dim * self.gf_dim * self.gfc_dim, input_dim=self.latent_dim)) model.add(Reshape((self.gf_dim, self.gf_dim, self.gfc_dim))) model.add(create_conv_transp(self.gfc_dim // 2)) model.add(BatchNormalization(momentum=0.8)) model.add(ReLU()) model.add(create_conv_transp(self.gfc_dim // 4)) model.add(BatchNormalization(momentum=0.8)) model.add(ReLU()) model.add(SelfAttention()) model.add(create_conv_transp(self.gfc_dim // 8)) model.add(BatchNormalization(momentum=0.8)) model.add(ReLU()) model.add(create_conv_transp(self.gfc_dim // 16)) model.add(BatchNormalization(momentum=0.8)) model.add(ReLU()) model.add( Conv2D(self.channels, 3, strides=1, padding='SAME', activation=None)) model.add(Activation('tanh')) model.summary() noise = Input(shape=(self.latent_dim, )) img = model(noise) return Model(noise, img)
def build_discriminator(self): def create_conv(filters): return Conv2D(filters, 5, padding="SAME", activation=None, use_bias=False, strides=2) model = Sequential() model.add( Conv2D(self.dfc_dim, 5, padding="SAME", activation=None, use_bias=False, strides=2, input_shape=self.img_shape)) model.add(create_conv(self.dfc_dim)) model.add(BatchNormalization(momentum=0.8)) model.add(LeakyReLU(alpha=self.alpha)) model.add(create_conv(self.dfc_dim * 2)) model.add(BatchNormalization(momentum=0.8)) model.add(LeakyReLU(alpha=self.alpha)) model.add(SelfAttention()) model.add(create_conv(self.dfc_dim * 4)) model.add(BatchNormalization(momentum=0.8)) model.add(LeakyReLU(alpha=self.alpha)) model.add(create_conv(self.dfc_dim * 8)) model.add(BatchNormalization(momentum=0.8)) model.add(LeakyReLU(alpha=self.alpha)) model.add(Flatten()) model.add(Dense(units=1, activation=None)) model.summary() img = Input(shape=self.img_shape) validity = model(img) return Model(img, validity)
def __init__(self, word_vocab_size, embedding_size, word_padding_idx, num_layers, hidden_size, dropout, bidirectional=True, encode_multi_key=True): super(FAEncoder, self).__init__(word_vocab_size, embedding_size, word_padding_idx, num_layers, hidden_size, dropout, bidirectional=True) self.attn = SelfAttention(hidden_size) self.encode_multi_key = encode_multi_key
def __init__(self, params): super(EncoderStack, self).__init__() self.layers = [] for _ in range(params.num_hidden_layers): # Create sublayers for each layer. self_attention_layer = SelfAttention(params.hidden_size, params.num_heads, params.attention_dropout) feed_forward_network = FeedFowardNetwork(params.hidden_size, params.filter_size, params.relu_dropout) self.layers.append([ PrePostProcessingWrapper(self_attention_layer, params), PrePostProcessingWrapper(feed_forward_network, params) ]) # Create final layer normalization layer. self.output_normalization = LayerNormalization(params.hidden_size)
def __init__(self, vocab_size, hopping_num, head_num, hidden_dim, dropout_rate, max_length, *args, **kwargs): super().__init__(*args, **kwargs) self.hopping_num = hopping_num self.head_num = head_num self.hidden_dim = hidden_dim self.dropout_rate = dropout_rate self.token_embedding = TokenEmbedding(vocab_size, hidden_dim) self.add_position_embedding = AddPositionalEncoding() self.input_dropout_layer = tf.keras.layers.Dropout(dropout_rate) self.attention_block_list = [] # List[List[tf.keras.models.Model]] for _ in range(hopping_num): attention_layer = SelfAttention(hidden_dim, head_num, dropout_rate, name='self_attention') ffn_layer = FeedForwardNetwork(hidden_dim, dropout_rate, name='ffn') self.attention_block_list.append([ ResidualNormalizationWrapper(attention_layer, dropout_rate, name='self_attention_wrapper'), ResidualNormalizationWrapper(ffn_layer, dropout_rate, name='ffn_wrapper'), ]) self.output_normalization = LayerNormalization()
def __call__(self, x): for depth in range(4): x=Conv2D(filters=64*(depth+1), # 64, 128, 192, 256 kernel_size=5, padding='same', data_format='channels_last', kernel_initializer='glorot_uniform')(x) x=BatchNormalization(axis=-1)(x) x=Activation('relu')(x) # 最大値プーリング, 各軸半分にする (40, 500) -> (20, 250) -> (10, 125) -> (5, 62) -> (2, 31) x=MaxPooling2D(pool_size=(2,2), data_format='channels_last')(x) x=Dropout(rate=self.dropout)(x) # 出力ベクトルの形状 (40//16, 500//16, 256) = (2, 31, 256) if self.attention == True: x=SelfAttention(x, output_filters=256) #x=MultiHeadAttention(head_num=64)(x) #x=BatchNormalization(axis=-1)(x) #x=GaussianNoise(0.3)(x) # 特徴量,時間方向に平均プーリング x=GlobalAveragePooling2D(data_format='channels_last')(x) # (256) 次元 -> (10) 次元 output=Dense(units=ASC_CLASS, kernel_initializer='uniform', activation='softmax')(x) return output # 10個の確率がおさまったベクトル
def processDescriptionParam(descOpts, bugReportDatabase, inputHandlers, preprocessors, encoders, databasePath, cacheFolder, logger, paddingSym): # Use summary and description (concatenated) to address this problem logger.info("Using Description information.") # Loading word embedding lexicon, embedding = load_embedding(descOpts, paddingSym) logger.info("Lexicon size: %d" % (lexicon.getLen())) logger.info("Word Embedding size: %d" % (embedding.getEmbeddingSize())) paddingId = lexicon.getLexiconIndex(paddingSym) # Loading Filters filters = loadFilters(descOpts['filters']) # Tokenizer if descOpts['tokenizer'] == 'default': logger.info("Use default tokenizer to tokenize summary information") tokenizer = MultiLineTokenizer() elif descOpts['tokenizer'] == 'white_space': logger.info( "Use white space tokenizer to tokenize summary information") tokenizer = WhitespaceTokenizer() else: raise ArgumentError( "Tokenizer value %s is invalid. You should choose one of these: default and white_space" % descOpts['tokenizer']) arguments = (databasePath, descOpts['word_embedding'], str(descOpts['lexicon']), ' '.join(sorted([fil.__class__.__name__ for fil in filters ])), descOpts['tokenizer'], "description") descCache = PreprocessingCache(cacheFolder, arguments) descPreprocessor = DescriptionPreprocessor(lexicon, bugReportDatabase, filters, tokenizer, paddingId, descCache) preprocessors.append(descPreprocessor) if descOpts['encoder_type'] == 'rnn': rnnType = descOpts.get('rnn_type') hiddenSize = descOpts.get('hidden_size') bidirectional = descOpts.get('bidirectional', False) numLayers = descOpts.get('num_layers', 1) dropout = descOpts.get('dropout', 0.0) updateEmb = descOpts.get('update_embedding', False) fixedOpt = descOpts.get('fixed_opt', False) descRNN = SortedRNNEncoder(rnnType, embedding, hiddenSize, numLayers, bidirectional, updateEmb, dropout) if fixedOpt == 'self_att': att = SelfAttention(descRNN.getOutputSize(), descOpts['self_att_hidden'], descOpts['n_hops']) descEncoder = RNN_Self_Attention(descRNN, att, paddingId, dropout) else: descEncoder = RNNFixedOuput(descRNN, fixedOpt, dropout) encoders.append(descEncoder) inputHandlers.append(RNNInputHandler(paddingId)) elif descOpts['encoder_type'] == 'cnn': windowSizes = descOpts.get('window_sizes', [3]) nFilters = descOpts.get('nfilters', 100) updateEmb = descOpts.get('update_embedding', False) actFunc = loadActivationFunction(descOpts.get('activation', 'relu')) batchNorm = descOpts.get('batch_normalization', False) dropout = descOpts.get('dropout', 0.0) descEncoder = TextCNN(windowSizes, nFilters, embedding, updateEmb, actFunc, batchNorm, dropout) encoders.append(descEncoder) inputHandlers.append(TextCNNInputHandler(paddingId, max(windowSizes))) elif descOpts['encoder_type'] == 'cnn+dense': windowSizes = descOpts.get('window_sizes', [3]) nFilters = descOpts.get('nfilters', 100) updateEmb = descOpts.get('update_embedding', False) actFunc = loadActivationFunction(descOpts.get('activation', 'relu')) batchNorm = descOpts.get('batch_normalization', False) dropout = descOpts.get('dropout', 0.0) hiddenSizes = descOpts.get('hidden_sizes') hiddenAct = loadActivationClass(descOpts.get('hidden_act')) hiddenDropout = descOpts.get('hidden_dropout') batchLast = descOpts.get("bn_last_layer", False) cnnEnc = TextCNN(windowSizes, nFilters, embedding, updateEmb, actFunc, batchNorm, dropout) descEncoder = MultilayerDense(cnnEnc, hiddenSizes, hiddenAct, batchNorm, batchLast, hiddenDropout) encoders.append(descEncoder) inputHandlers.append(TextCNNInputHandler(paddingId, max(windowSizes))) elif descOpts['encoder_type'] == 'dense+self_att': dropout = descOpts.get('dropout', 0.0) hiddenSize = descOpts.get('hidden_size') self_att_hidden = descOpts['self_att_hidden'] n_hops = descOpts['n_hops'] updateEmb = descOpts.get('update_embedding', False) descEncoder = Dense_Self_Attention(embedding, hiddenSize, self_att_hidden, n_hops, paddingId, updateEmb, dropout=dropout) encoders.append(descEncoder) inputHandlers.append(TextCNNInputHandler(paddingId, -1)) elif descOpts['encoder_type'] == 'word_mean': standardization = descOpts.get('standardization', False) dropout = descOpts.get('dropout', 0.0) updateEmb = descOpts.get('update_embedding', False) batch_normalization = descOpts.get('update_embedding', False) hiddenSize = descOpts.get('hidden_size') descEncoder = WordMean(embedding, updateEmb, hiddenSize, standardization, dropout, batch_normalization) encoders.append(descEncoder) inputHandlers.append(RNNInputHandler(paddingId)) else: raise ArgumentError( "Encoder type of summary and description is invalid (%s). You should choose one of these: cnn" % descOpts['encoder_type'])
def train_Lstm_BiLstm_e(p_n_symbols, p_embedding_weights, p_X_train, p_y_train, p_X_test, p_y_test, p_X_train_e, p_X_test_e, vocab_dim, input_length, input_length_e, batch_size, n_epoch): """ 模型建置 """ print('build model...') #text_Bilstm model_t = Sequential() model_t.add( Embedding(output_dim=vocab_dim, input_dim=p_n_symbols, mask_zero=True, weights=[p_embedding_weights], input_length=input_length)) model_t.add(LSTM(output_dim=50)) model_t.add(Dropout(0.5)) model_t.add(SelfAttention()) #emotion_Bilstm model_e = Sequential() model_e.add( Embedding(output_dim=vocab_dim, input_dim=p_n_symbols, mask_zero=True, weights=[p_embedding_weights], input_length=input_length_e)) model_e.add(Bidirectional(LSTM(output_dim=50))) model_e.add(Dropout(0.5)) model_e.add(SelfAttention()) #merge text 跟 emotion 兩部分模型 mergedOut = keras.layers.Add()([model_t.output, model_e.output]) mergedOut = Dense(3, activation='softmax')(mergedOut) model = Sequential() model = Model([model_t.input, model_e.input], mergedOut) print('processing...') #調整學習率 adam = keras.optimizers.Adam(lr=0.0001) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy', precision, recall, f1_score]) print("training...") history = model.fit([p_X_train, p_X_train_e], p_y_train, batch_size=batch_size, nb_epoch=n_epoch, validation_data=([p_X_test, p_X_test_e], p_y_test)) #透過matplot繪圖顯示訓練過程 print("counting...") score, acc, prec, re, f1 = model.evaluate(p_X_test, p_y_test, batch_size=batch_size) print('Test score:', score) print('Test accuracy:', acc) print('Test precision:', prec) print('Test recall:', re) print('Test f1-score:', f1) plt.subplot(211) plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='best') plt.subplot(212) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='best') plt.show() plt.savefig('Lstm_BiLstm_e') #儲存模型 model.save('Lstm_BiLstm_e.h5') print('model is saved')