def __init__(self, input_shape): self.model = Sequential() [self.model.add(x) for x in Encoder(input_shape).model.layers] [self.model.add(x) for x in Attention(self.model.layers[-1].output_shape).model.layers] [self.model.add(x) for x in State(self.model.layers[-1].output_shape).model.layers] [self.model.add(x) for x in Decoder(self.model.layers[-1].output_shape).model.layers] self.model.add(Activation('softmax'))
def __init__(self, is_train=True): self.is_train = is_train self.encoder = Encoder(FLAGS.encoder) self.selector = Selector(FLAGS.attention) # Placeholders for input # each time input 'batch_size' bags, every bag has several sentences with length of num_steps # so we assume the sum of sentences in bags is 'total_sentences' self.input_word = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.num_steps], name='input_word') self.input_pos1 = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.num_steps], name='input_pos1') self.input_pos2 = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.num_steps], name='input_pos2') self.input_type = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.num_steps], name='input_type') self.input_lens = tf.placeholder(dtype=tf.int32, shape=[None, 1], name='input_lens') self.input_mask = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.num_steps], name='input_mask') self.input_scope = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size + 1], name='input_scope') self.label = tf.placeholder(dtype=tf.int32, shape=[None], name='label') self.label_for_select = tf.placeholder(dtype=tf.int32, shape=[None], name='label_for_select') self.input_weights = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size], name='input_weights') self.dropout_keep = tf.placeholder(tf.float32, name='dropout_keep_prob')
def __init__(self, data_format='channels_last', groups=8, reduction=2, l2_scale=1e-5, dropout=0.2, downsampling='conv', upsampling='conv', base_filters=16, depth=4, in_ch=2, out_ch=3): """ Initializes the model, a cross between the 3D U-net and 2018 BraTS Challenge top model with VAE regularization. References: - [3D U-Net: Learning Dense Volumetric Segmentation from Sparse Annotation](https://arxiv.org/pdf/1606.06650.pdf) - [3D MRI brain tumor segmentation using autoencoder regularization](https://arxiv.org/pdf/1810.11654.pdf) """ super(Model, self).__init__() self.epoch = tf.Variable(0, name='epoch', trainable=False) self.encoder = Encoder(data_format=data_format, groups=groups, reduction=reduction, l2_scale=l2_scale, dropout=dropout, downsampling=downsampling, base_filters=base_filters, depth=depth) self.decoder = Decoder(data_format=data_format, groups=groups, reduction=reduction, l2_scale=l2_scale, upsampling=upsampling, base_filters=base_filters, depth=depth, out_ch=out_ch) self.vae = VariationalAutoencoder(data_format=data_format, groups=groups, reduction=reduction, l2_scale=l2_scale, upsampling=upsampling, base_filters=base_filters, depth=depth, out_ch=in_ch)
def __init__(self, word_vocab: Vocab, bio_vocab: Vocab, feat_vocab: Vocab, word_embed_size, bio_embed_size, feat_embed_size, hidden_size, enc_bidir, dropout=0.2): """ Init NMT Model. @param embed_size (int): Embedding size (dimensionality) @param hidden_size (int): Hidden Size (dimensionality) @param vocab (Vocab): Vocabulary object containing src and tgt languages See vocab.py for documentation. @param dropout_rate (float): Dropout probability, for attention """ super(NMT, self).__init__() self.word_vocab = word_vocab self.bio_vocab = bio_vocab self.feat_vocab = feat_vocab self.args = { 'word_embed_size': word_embed_size, 'bio_embed_size': bio_embed_size, 'feat_embed_size': feat_embed_size, 'hidden_size': hidden_size, 'enc_bidir': enc_bidir, 'dropout': dropout } self.embedding = FeatureRichEmbedding(len(word_vocab), word_embed_size, len(bio_vocab), bio_embed_size, len(feat_vocab), feat_embed_size) self.encoder = Encoder( word_embed_size + bio_embed_size + feat_embed_size * 3, hidden_size, dropout, enc_bidir) self.decoder_init_hidden_proj = nn.Linear(self.encoder.hidden_size, hidden_size) self.decoder = Decoder(word_embed_size, hidden_size, hidden_size, len(word_vocab), dropout)
def __init__(self, word_vocab: Vocab, bio_vocab: Vocab, feat_vocab: Vocab, albert: bool, word_embed_size, bio_embed_size, feat_embed_size, hidden_size, dropout, enc_bidir, n_head, max_out_cpy: bool, **kwargs): super(QGModel, self).__init__() self.word_vocab = word_vocab self.bio_vocab = bio_vocab self.feat_vocab = feat_vocab self.args = { 'albert': albert, 'word_embed_size': word_embed_size, 'bio_embed_size': bio_embed_size, 'feat_embed_size': feat_embed_size, 'hidden_size': hidden_size, 'dropout': dropout, 'enc_bidir': enc_bidir, 'n_head': n_head, 'max_out_cpy': max_out_cpy } self.args.update(kwargs) if albert: self.embedding = AlbertFeatureRichEmbedding( kwargs['albert_model_name'], len(bio_vocab), bio_embed_size, len(feat_vocab), feat_embed_size, kwargs['albert_cache_dir']) decoder_word_embed_size = kwargs['albert_word_embed_size'] else: self.embedding = FeatureRichEmbedding(len(word_vocab), word_embed_size, len(bio_vocab), bio_embed_size, len(feat_vocab), feat_embed_size) decoder_word_embed_size = word_embed_size self.encoder = Encoder( word_embed_size + bio_embed_size + feat_embed_size * 3, word_embed_size, hidden_size, dropout, enc_bidir, n_head) self.decoder = Decoder(decoder_word_embed_size, hidden_size, hidden_size, len(word_vocab), dropout, max_out_cpy)
def __init__(self, vocab, embed_size, hidden_size, enc_bidir, attn_size, dropout=0.2): super(QGModel, self).__init__() self.vocab = vocab self.args = { 'embed_size': embed_size, 'hidden_size': hidden_size, 'dropout': dropout, 'enc_bidir': enc_bidir, 'attn_size': attn_size } self.embeddings = ModelEmbeddings(embed_size, vocab) self.encoder = Encoder(embed_size, hidden_size, dropout, enc_bidir) self.decoder_init_hidden_proj = nn.Linear(self.encoder.hidden_size, hidden_size) self.decoder = Decoder(embed_size, hidden_size, attn_size, len(vocab.tgt), dropout)
def output(): conf = Config() # checkpoint # - 'Encoder': encoder 参数 # - 'Decoder': decoder 参数 # - 'Wordseq_src': ws_x 训练语料的 ws_x # - 'Wordseq_tar': ws_y 训练预料的 ws_y model = torch.load(conf.save_path + conf.save_version + 'checkpoint.tar') ws_x = model['Wordseq_src'] ws_y = model['Wordseq_tar'] x, y, ws_x, ws_y = test_preprocess(ws_x, ws_y) encoder = Encoder(hiddenDim=conf.encHiddenDim, encVocSize=len(ws_x), encEmbDim=conf.srcEmbDim, PADID=conf.PadId, layerNum=conf.encLayer, isBidirectional=conf.enc_isBid, dropout=conf.dropout) decoder = LuongDecoder( encHiddenDim=conf.encHiddenDim, decHiddenDim=conf.decHiddenDim, decVocSize=len(ws_y), decEmbDim=conf.srcEmbDim, PADID=conf.PadId, layerNum=conf.decLayer, enc_isBid=conf.enc_isBid, # attentionMethod = 'general' if conf.enc_isBid else 'dot', attentionMethod='multi', dropout=conf.dropout) flow = batch_flow_seq2seq([x, y], [ws_x, ws_y], batch_size=conf.output_batchSize) encoder.load_state_dict(model['Encoder']) decoder.load_state_dict(model['Decoder']) encoder.eval() decoder.eval() print('model has been loaded.') x, xl, y, yl = next(flow) x = torch.LongTensor(x) xl = torch.LongTensor(xl) y = torch.LongTensor(y) yl = torch.LongTensor(yl) x, xl, y, yl = batch_sort(x, xl, y, yl) enc_output, enc_hidden = encoder(x, xl) max_tar_len = conf.decode_max_len # 最多解码dec_max_len个字符,遇到结束字符则结束 dec_lasthidden = torch.zeros(decoder.layerNum, conf.output_batchSize, conf.decHiddenDim) all_decoder_outputs = torch.zeros(conf.output_batchSize, max_tar_len) dec_y = torch.LongTensor([WordSequence.START] * conf.output_batchSize) # [max_tar_len, batchSize, sequenceLen - att_Window_Size + 1] att_set = [] for t in range(max_tar_len): dec_output, dec_lasthidden, dec_att = decoder(dec_y, dec_lasthidden, enc_output) att_set.append(dec_att) # 在这里要根据概率分布,确定在当前时间步的输出 抽样、贪心、beam prob, idx = torch.max(dec_output, 1) dec_y = idx all_decoder_outputs[:, t] = idx # att_set_tenor = torch.cat(att_set) for sen in range(conf.output_batchSize): print('decode_sen ', sen, ': ', ws_y.inverse_transform(all_decoder_outputs[sen])) print('origin_sen ', sen, ': ', ws_y.inverse_transform(y[sen])) print(' ') # print('att_set_tensor: ', att_set_tenor) # print('att_set_tensor_size', att_set_tenor.size()) print('end.')
def __init__(self, options): super(Reader_LSTM2, self).__init__() self.Encoder = Encoder(options['Encoder'])
def main(): # preprocess x, y, ws_x, ws_y = train_preprocess() # model encoder = Encoder(hiddenDim=conf.encHiddenDim, encVocSize=len(ws_x), encEmbDim=conf.srcEmbDim, PADID=conf.PadId, layerNum=conf.encLayer, isBidirectional=conf.enc_isBid, dropout=conf.dropout) decoder = LuongDecoder( encHiddenDim=conf.encHiddenDim, decHiddenDim=conf.decHiddenDim, decVocSize=len(ws_y), decEmbDim=conf.srcEmbDim, PADID=conf.PadId, layerNum=conf.decLayer, enc_isBid=conf.enc_isBid, # attentionMethod = 'general' if conf.enc_isBid else 'dot', attentionMethod='multi', dropout=conf.dropout) encoder.train() decoder.train() print('attention method: ', decoder.attentionMethod) if conf.USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() # optimizer enc_optimizer = optim.Adam(encoder.parameters()) dec_optimizer = optim.Adam(decoder.parameters()) # data flow flow = batch_flow_seq2seq([x, y], [ws_x, ws_y], batch_size=conf.train_batchSize) epoch = 1 print('epoch: ', epoch) loss_recoder = {} for step in range(1, conf.max_steps + 1): # 梯度置0,此操作很重要,尤其是在复杂的模型当中 # 复杂有可能不收敛,或者收敛到完全的<unk> enc_optimizer.zero_grad() dec_optimizer.zero_grad() # 一个step是对一个batch进行训练 x, xl, y, yl = next(flow) x = torch.LongTensor(x) xl = torch.LongTensor(xl) y = torch.LongTensor(y) yl = torch.LongTensor(yl) if conf.USE_CUDA: x = x.cuda() xl = xl.cuda() y = y.cuda() yl = yl.cuda() x, xl, y, yl = batch_sort(x, xl, y, yl) # print(x) # print(y) # 输入的需要被排序,按照长度从大到小排序 enc_output, enc_hidden = encoder(x, xl) max_tar_len = max(yl.tolist()) # 第一个时间步的解码的dec_lasthidden应该是相同的, 因为所有句子都是从<s>开始的额 dec_lasthidden = torch.zeros(decoder.layerNum, conf.train_batchSize, conf.decHiddenDim) all_decoder_outputs = torch.zeros(conf.train_batchSize, max_tar_len, decoder.decVocSize) dec_y = torch.LongTensor([WordSequence.START] * conf.train_batchSize) if conf.USE_CUDA: enc_output = enc_output.cuda() enc_hidden = enc_hidden.cuda() dec_lasthidden = dec_lasthidden.cuda() all_decoder_outputs = all_decoder_outputs.cuda() dec_y = dec_y.cuda() for t in range(max_tar_len): # dec_y, dec_lasthidden是在不断变更的,体现了解码的时序性 dec_output, dec_lasthidden, dec_att = decoder( dec_y, dec_lasthidden, enc_output) dec_y = y[:, t] all_decoder_outputs[:, t, :] = dec_output loss = pad_cross_entropy(all_decoder_outputs, y, 0) loss.backward() if step % conf.display_step == 0 or step == 1: print('step: ', step, 'batch_loss: ', loss) loss_recoder[step] = loss.item() ec = clip_grad_norm(encoder.parameters(), 5.0) dc = clip_grad_norm(decoder.parameters(), 5.0) enc_optimizer.step() dec_optimizer.step() # 每一个epoch保存一个checkpoint: save_path + save_version + str(epoch) + 'checkpoint.tar' if step % (int(conf.data_size / conf.train_batchSize)) == 0: loss_recoder_file = open('./lossrecoder/' + conf.save_version + str(epoch) + '.txt', 'w', encoding='utf8') for key, value in loss_recoder.items(): loss_recoder_file.writelines( str(key) + ' ' + str(value) + '\n') loss_recoder_file.close() loss_recoder = {} torch.save( { 'Encoder': encoder.state_dict(), 'Decoder': decoder.state_dict(), 'Wordseq_src': ws_x, 'Wordseq_tar': ws_y }, conf.save_path + conf.save_version + str(epoch) + 'checkpoint.tar') epoch += 1 if epoch > conf.max_epochs: break print('epoch: ', epoch) print('training is ended !') # 训练结束之后,再次进行一次保存,这次保存和最后一个checkpoint的参数是相同的,仅仅是为了output方便,output函数默认读取最后一次的参数 # save_path + save_version + 'checkpoint.tar' torch.save( { 'Encoder': encoder.state_dict(), 'Decoder': decoder.state_dict(), 'Wordseq_src': ws_x, 'Wordseq_tar': ws_y }, conf.save_path + conf.save_version + 'checkpoint.tar') print('model has been saved !')
ffn_output = self.ffn(out2) # (batch_size, target_seq_len, d_model) ffn_output = self.dropout3(ffn_output, training=training) out3 = self.layernorm3(ffn_output + out2) # (batch_size, target_seq_len, d_model) return out3, attn_weights_block1, attn_weights_block2 if __name__ == '__main__': from layers.encoder import Encoder x = tf.random.uniform((32, 1000, 26)) y = tf.random.uniform((32, 100)) sample_encoder_layer = Encoder(2, 512, 8, 5048, 5000, rate=0.1) enc_output = sample_encoder_layer(x, mask=None, training=True) sample_decoder = Decoder(num_layers=2, d_model=512, num_heads=8, dff=2048, target_vocab_size=4337, maximum_position_encoding=5000) output, attn = sample_decoder(tf.random.uniform((32, 100)), enc_output=enc_output, look_ahead_mask=None, padding_mask=None, training=False)
class Model(object): def __init__(self, is_train=True): self.is_train = is_train self.encoder = Encoder(FLAGS.encoder) self.selector = Selector(FLAGS.attention) # Placeholders for input # each time input 'batch_size' bags, every bag has several sentences with length of num_steps # so we assume the sum of sentences in bags is 'total_sentences' self.input_word = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.num_steps], name='input_word') self.input_pos1 = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.num_steps], name='input_pos1') self.input_pos2 = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.num_steps], name='input_pos2') self.input_type = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.num_steps], name='input_type') self.input_lens = tf.placeholder(dtype=tf.int32, shape=[None, 1], name='input_lens') self.input_mask = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.num_steps], name='input_mask') self.input_scope = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size + 1], name='input_scope') self.label = tf.placeholder(dtype=tf.int32, shape=[None], name='label') self.label_for_select = tf.placeholder(dtype=tf.int32, shape=[None], name='label_for_select') self.input_weights = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size], name='input_weights') self.dropout_keep = tf.placeholder(tf.float32, name='dropout_keep_prob') def __assign_word2vec__(self, wordvec): self.word_vec = wordvec def build_model(self): self.global_step = tf.Variable(0, trainable=False) """embedding""" embedding_output = self.embedding_layer(use_type=FLAGS.use_type) """encoding""" encoder_out = self.encoder_layer(encoder_input=embedding_output) """attention""" self.logits, _ = self.attention_layer(encoder_out=encoder_out) """loss""" self.loss_layer(project_logits=self.logits) """model initilization""" with tf.variable_scope("optimizer"): optimizer = FLAGS.optimizer if optimizer == "sgd": self.opt = tf.train.GradientDescentOptimizer(FLAGS.lr) elif optimizer == "adam": self.opt = tf.train.AdamOptimizer(FLAGS.lr) elif optimizer == "adgrad": self.opt = tf.train.AdagradOptimizer(FLAGS.lr) else: raise KeyError # Define training procedure grads_vars = self.opt.compute_gradients(self.final_loss) """ capped_grads_vars = [[tf.clip_by_value(g, -FLAGS.clip, FLAGS.clip), v] for g, v in grads_vars] """ self.train_op = self.opt.apply_gradients(grads_vars, self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) def embedding_layer(self, use_type): """ :return: [total_sentences, num_steps, emb_size] """ with tf.name_scope('embedding'): temp_word_embedding = tf.get_variable(initializer=self.word_vec, name='temp_word_embedding', dtype=tf.float32) unk_word_embedding = tf.get_variable( 'unk_embedding', [FLAGS.embedding_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) self.word_embedding = tf.concat([ temp_word_embedding, tf.reshape(unk_word_embedding, [1, FLAGS.embedding_dim]), tf.reshape( tf.constant( np.zeros([FLAGS.embedding_dim], dtype=np.float32)), [1, FLAGS.embedding_dim]) ], axis=0) self.pos1_embedding = tf.concat([ tf.get_variable( 'pos1_embedding', shape=[FLAGS.pos_num, FLAGS.pos_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()), tf.reshape( tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)), [1, FLAGS.pos_size]) ], axis=0) self.pos2_embedding = tf.concat([ tf.get_variable( 'pos2_embedding', shape=[FLAGS.pos_num, FLAGS.pos_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()), tf.reshape( tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)), [1, FLAGS.pos_size]) ], axis=0) if use_type: self.type_embedding = tf.get_variable( 'type_embedding', [FLAGS.type_num, FLAGS.type_size]) embedded_chars = tf.concat([ tf.nn.embedding_lookup(self.word_embedding, self.input_word), tf.nn.embedding_lookup(self.pos1_embedding, self.input_pos1), tf.nn.embedding_lookup(self.pos2_embedding, self.input_pos2), tf.nn.embedding_lookup(self.type_embedding, self.input_type) ], 2) else: embedded_chars = tf.concat([ tf.nn.embedding_lookup(self.word_embedding, self.input_word), tf.nn.embedding_lookup(self.pos1_embedding, self.input_pos1), tf.nn.embedding_lookup(self.pos2_embedding, self.input_pos2) ], 2) return embedded_chars def encoder_layer(self, encoder_input): """ Encode instances """ with tf.name_scope('encoder'): return self.encoder.encode(self.is_train, self.input_mask, encoder_input) def attention_layer(self, encoder_out): """ Attention mechanism for bag level prediction """ with tf.name_scope('attention'): return self.selector.select(self.is_train, encoder_out, self.input_scope, self.label_for_select) def loss_layer(self, project_logits, name=None): """ Explainatin: In training process, the relation of a bag(with bag_id) is one-hot vector. But in testing process, it should be N-hot vector. However, we don't care about the losses in testing process, we don't need to use sigmoid_cross_entropy """ with tf.name_scope('loss'): onehot_label = tf.one_hot(indices=self.label, depth=FLAGS.classes_num, dtype=tf.int32) # losses = tf.reduce_mean(tf.losses.sigmoid_cross_entropy(logits=project_scores, multi_class_labels=onehot_label)) self.final_loss = tf.losses.softmax_cross_entropy( logits=project_logits, onehot_labels=onehot_label, weights=self.input_weights) with tf.name_scope('accuracy'): self.prediction = tf.argmax(project_logits, axis=1, name='prediction') corrent_predictions = tf.equal(self.prediction, tf.cast(self.label, dtype=tf.int64)) self.accuracy = tf.reduce_mean(tf.cast(corrent_predictions, 'float'), name='accuracy') def run_step(self, session, is_train, feed_dict, summary_op=None): if is_train: global_step, _, summaries, loss, accuracy = session.run([ self.global_step, self.train_op, summary_op, self.final_loss, self.accuracy ], feed_dict) return global_step, summaries, loss, accuracy else: scores = session.run([self.logits], feed_dict) return scores