def _make_model(self): # embedding embedding = nn.Embedding(num_embeddings=self._config.vocab_size, embedding_dim=self._config.embed_size) embedding.weight.data.copy_( torch.from_numpy(np.load(self._config.embedding_file_name))) embedding.weight.requires_grad = False # encoder encoder = Encoder(rnn_type=self._config.rnn_type, embed_size=self._config.embed_size, hidden_size=self._config.hidden_size, num_layers=self._config.num_layers, bidirectional=self._config.bidirectional, dropout=self._config.dropout) # birdge bridge = Bridge(rnn_type=self._config.rnn_type, hidden_size=self._config.hidden_size, bidirectional=self._config.bidirectional) # decoder rnn cell if self._config.rnn_type == 'LSTM': rnn_cell = MultiLayerLSTMCells( input_size=2 * self._config.embed_size, hidden_size=self._config.hidden_size, num_layers=self._config.num_layers, dropout=self._config.dropout) else: rnn_cell = MultiLayerGRUCells(input_size=2 * self._config.embed_size, hidden_size=self._config.hidden_size, num_layers=self._config.num_layers, dropout=self._config.dropout) # attention if self._config.attention_type == 'Dot': attention = DotAttention() elif self._config.attention_type == 'ScaledDot': attention = ScaledDotAttention() elif self._config.attention_type == 'Additive': attention = AdditiveAttention(query_size=self._config.hidden_size, key_size=self._config.hidden_size) elif self._config.attention_type == 'Multiplicative': attention = MultiplicativeAttention( query_size=self._config.hidden_size, key_size=self._config.hidden_size) elif self._config.attention_type == 'MLP': attention = MultiLayerPerceptronAttention( query_size=self._config.hidden_size, key_size=self._config.hidden_size, out_size=1) else: raise ValueError('No Supporting.') # decoder decoder = Decoder(embedding, rnn_cell, attention, self._config.hidden_size) # model model = Seq2Seq(embedding, encoder, bridge, decoder) return model
def make_model(self): model = Seq2Seq(vocab_size=self._config.vocab_size, embed_size=self._config.embed_size, hidden_size=self._config.hidden_size, rnn_type=self._config.rnn_type, num_layers=self._config.num_layers, bidirectional=self._config.bidirectional, attention_type=self._config.attention_type, dropout=self._config.dropout) model.load_pretrained_embeddings(self._config.embedding_file_name) return model
def __init__(self, vocab_size, config): super(OCR, self).__init__() self.cnn = CNN() self.config = config self.transformer = Seq2Seq( vocab_size, encoder_hidden=config['seq_parameters']['encoder_hidden'], decoder_hidden=config['seq_parameters']['decoder_hidden'], img_channel=config['seq_parameters']['img_channel'], decoder_embedded=config['seq_parameters']['decoder_embedded'], dropout=config['seq_parameters']['dropout'])
def load_model(self, weights, device): INPUT_DIM = len(self.SRC.vocab) OUTPUT_DIM = len(self.TRG.vocab) enc = Encoder(INPUT_DIM, HID_DIM, ENC_LAYERS, ENC_HEADS, ENC_PF_DIM, ENC_DROPOUT, device) dec = Decoder(OUTPUT_DIM, HID_DIM, DEC_LAYERS, DEC_HEADS, DEC_PF_DIM, DEC_DROPOUT, device) SRC_PAD_IDX = self.SRC.vocab.stoi[self.SRC.pad_token] TRG_PAD_IDX = self.TRG.vocab.stoi[self.TRG.pad_token] model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, device).to(device) model.load_state_dict(torch.load(weights)) return model
def main(): args = parse_arguments() hidden_size = 512 embed_size = 256 # assert torch.cuda.is_available() print("[!] preparing dataset...") train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size) de_size, en_size = len(DE.vocab), len(EN.vocab) print("[TRAIN]:%d (dataset:%d)\t[TEST]:%d (dataset:%d)" % (len(train_iter), len( train_iter.dataset), len(test_iter), len(test_iter.dataset))) print("[DE_vocab]:%d [en_vocab]:%d" % (de_size, en_size)) print("[!] Instantiating models...") encoder = Encoder(de_size, embed_size, hidden_size, n_layers=2, dropout=0.5) decoder = Decoder(embed_size, hidden_size, en_size, n_layers=1, dropout=0.5) seq2seq = Seq2Seq(encoder, decoder) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr) print(seq2seq) best_val_loss = None for e in range(1, args.epochs + 1): train(e, seq2seq, criterion, optimizer, train_iter, en_size, args.grad_clip, DE, EN) val_loss = evaluate(seq2seq, criterion, val_iter, en_size, DE, EN) print("[Epoch:%d] val_loss:%5.3f | val_pp:%5.2fS" % (e, val_loss, math.exp(val_loss))) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: print("[!] saving model...") if not os.path.isdir(".save"): os.makedirs(".save") torch.save(seq2seq.state_dict(), './.save/seq2seq_%d.pt' % (e)) best_val_loss = val_loss test_loss = evaluate(seq2seq, criterion, test_iter, en_size, DE, EN) print("[TEST] loss:%5.2f" % test_loss)
def main(args): checkpoint_path = os.path.join("saved/", args.name, args.checkpoint) checkpoint = torch.load(checkpoint_path) config = checkpoint['config'] #if args.task.lower() == 'caption': embedder = eval(config['embedder']['type']) embedder_path = os.path.join("saved/", args.name, "embedder.pkl") data_loader = CaptionDataLoader(config, embedder, mode='test', path=args.data_dir, embedder_path=embedder_path) model = Seq2Seq(config, embedder=data_loader.embedder) model.load_state_dict(checkpoint['state_dict']) if not args.no_cuda: model.cuda() model.eval() model.summary() result = [] for batch_idx, (in_seq, id) in enumerate(data_loader): in_seq = torch.FloatTensor(in_seq) in_seq = Variable(in_seq) if not args.no_cuda: in_seq = in_seq.cuda() if args.beam_size == 1: out_seq = model(in_seq, 24) out_seq = np.array([seq.data.cpu().numpy() for seq in out_seq]) out_seq = np.transpose(out_seq, (1, 0, 2)) out_seq = data_loader.embedder.decode_lines(out_seq) else: out_seq = beam_search(model, data_loader.embedder, in_seq, seq_len=24, beam_size=args.beam_size) out_seq = data_loader.embedder.decode_lines(out_seq) out_seq = [(str(id[0]), out_seq)] result.extend(out_seq) with open(args.output, 'w') as f: for video_id, caption in result: caption = postprocess(caption) f.write(video_id + ',' + caption + '\n')
def init_model_from_ckpt(): _, _, _, train_data, valid_data, test_data = get_dataloaders_and_data() SRC_PAD_IDX = DOC.vocab.stoi[DOC.pad_token] TRG_PAD_IDX = DOC.vocab.stoi[DOC.pad_token] INPUT_DIM = len(DOC.vocab) OUTPUT_DIM = len(DOC.vocab) attn = Attention(ENC_HID_DIM, DEC_HID_DIM) enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn) model = Seq2Seq(enc, dec, SRC_PAD_IDX, device).to(device) most_recent_ckpt = get_most_recent_ckpt('ckpts') model.load_state_dict(torch.load(most_recent_ckpt)) return model, train_data, valid_data, test_data
def create_model(vocab_size): embedding = nn.Embedding(vocab_size, config.hidden_size) \ if config.single_embedding else None encoder = Encoder(vocab_size, config.hidden_size, \ n_layers = config.n_encoder_layers, dropout=config.dropout) decoder = Decoder(config.hidden_size, vocab_size,\ n_layers = config.n_decoder_layers, dropout=config.dropout) model = Seq2Seq(encoder=encoder, decoder=decoder, max_length=config.max_length) if torch.cuda.is_available() and config.use_cuda: model.cuda() return model
def _make_model(self): embedding = nn.Embedding(self._config.vocab_size, self._config.embed_size) embedding.weight.data.copy_( torch.from_numpy(np.load(self._config.embedding_file_name))) embedding.weight.requires_grad = False encoder = Encoder(self._config.embed_size, self._config.hidden_size, self._config.num_layers, self._config.bidirectional, self._config.dropout) bridge = Bridge(self._config.hidden_size, self._config.bidirectional) lstm_cell = MultiLayerLSTMCells(2 * self._config.embed_size, self._config.hidden_size, self._config.num_layers, dropout=self._config.dropout) # attention = MultiplicativeAttention(self._config.hidden_size, self._config.hidden_size) attention = AdditiveAttention(self._config.hidden_size, self._config.hidden_size) decoder = Decoder(embedding, lstm_cell, attention, self._config.hidden_size) model = Seq2Seq(embedding, encoder, bridge, decoder) return model
# parser = argparse.ArgumentParser() # parser.add_argument('--batch-size', type=int, default=32) # parser.add_argument('--enc-emb-dim', type=int, default=64) # parser.add_argument('--dec-emb-dim', type=int, default=64) # parser.add_argument('--hid-dim', type=int, default=216) # parser.add_argument('--n-layers', type=int, default=2) # parser.add_argument('--enc-dropout', type=float, default=0.5) # parser.add_argument('--dec-dropout', type=float, default=0.5) # args = parser.parse_args() enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT) dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT) model = Seq2Seq(enc, dec, None) optimizer = Adam(model.parameters()) criterion = CrossEntropyLoss() train_loader = loader(train_data, BATCH_SIZE) valid_loader = loader(valid_data, BATCH_SIZE) N_EPOCHS = 10 CLIP = 1 best_valid_loss = float('inf') for epoch in range(N_EPOCHS): start_time = time.time()
} json.dump(data, f) ##start main args = get_args() train_data, test_data = data_loader(args, "data/processed_data.json", first=True) test_data = test_data if args.use_train_data == False else train_data device_kind = "cuda:{}".format( args.cuda_number) if torch.cuda.is_available() else "cpu" args.device = torch.device(device_kind) model=Seq2Seq(args) if args.model_version==1 else \ Seq2Seq2(args) if args.model_version==2 else \ Transformer(args) model.to(args.device) if args.model_name != "": param = torch.load("model_data/{}".format(args.model_name)) model.load_state_dict(param) #start_epochが0なら最初から、指定されていたら学習済みのものをロードする elif args.start_epoch >= 1: param = torch.load( "model_data/epoch_{}_model.pth".format(args.start_epoch - 1)) model.load_state_dict(param) else: args.start_epoch = 0
DEC_HEADS = config['DEC_HEADS'] ENC_PF_DIM = config['ENC_PF_DIM'] DEC_PF_DIM = config['DEC_PF_DIM'] ENC_DROPOUT = config['ENC_DROPOUT'] DEC_DROPOUT = config['DEC_DROPOUT'] enc = Encoder(INPUT_DIM, HID_DIM, ENC_LAYERS, ENC_HEADS, ENC_PF_DIM, ENC_DROPOUT, device) dec = Decoder(OUTPUT_DIM, HID_DIM, DEC_LAYERS, DEC_HEADS, DEC_PF_DIM, DEC_DROPOUT, device) SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token] TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token] model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, device).to(device) print(f'The model has {count_parameters(model):,} trainable parameters') model.apply(initialize_weights) if config['train_embeddings']: model.decoder.tok1_embedding.load_state_dict(glovemodel.wi.state_dict()) model.decoder.tok2_embedding.load_state_dict(glovemodel.wj.state_dict()) LEARNING_RATE = config['LEARNING_RATE'] N_EPOCHS = config['N_EPOCHS'] CLIP = config['CLIP'] optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)
from model.seq2seq import Seq2Seq import tensorflow as tf sess = tf.Session() # python3 -u train_s2s.py -e 10 -i 100 -u 512 -g 5.0 -n 2 -em 500 -l 1.0 -d 0.3 -b 32 -o output/ -s 2000 s2s = Seq2Seq(sess, 512, [10], [10], 2, tensorboard_id=1, cell_type='GRU', mode='train', learning_rate=0.1, dropout=0.3, gradient_clip=5.0) vars = tf.trainable_variables() for v in vars: print(v) print(sess.run(s2s.emb_test)) #s2s.restore('/Users/cem/Desktop/output/seq2seq')
else: yid.append(_yid[k]) scores.append(_socres[k]) yid = np.array(yid) return data.id2str(yid[np.argmax(scores)]) s1 = u'夏天来临,皮肤在强烈紫外线的照射下,晒伤不可避免,因此,晒后及时修复显得尤为重要,否则可能会造成长期伤害。专家表示,选择晒后护肤品要慎重,芦荟凝胶是最安全,有效的一种选择,晒伤严重者,还请及时就医。' s2 = u'8月28日,网络爆料称,华住集团旗下连锁酒店用户数据疑似发生泄露。从卖家发布的内容看,数据包含华住旗下汉庭、禧玥、桔子、宜必思等10余个品牌酒店的住客信息。泄露的信息包括华住官网注册资料、酒店入住登记的身份信息及酒店开房记录,住客姓名、手机号、邮箱、身份证号、登录账号密码等。卖家对这个约5亿条数据打包出售。第三方安全平台威胁猎人对信息出售者提供的三万条数据进行验证,认为数据真实性非常高。当天下午,华住集团发声明称,已在内部迅速开展核查,并第一时间报警。当晚,上海警方消息称,接到华住集团报案,警方已经介入调查。' class Evaluate(Callback): def __init__(self): self.lowest = 1e10 def on_epoch_end(self, epoch, logs=None): print (gen_titles(s1)) print (gen_titles(s2)) if logs['loss'] <= self.lowest: self.lowest = logs['loss'] model.save_weights('./model/best_model.weights') evaluator = Evaluate() model = Seq2Seq(config, chars).run() model.compile(optimizer=Adam(1e-3)) # lr model.fit_generator(data.get_data(), steps_per_epoch=1000, epochs=config.epochs, callbacks=[evaluator])
shuffle=False, collate_fn=collate_fn, drop_last=True) INPUT_DIM = len(src_vocab) OUTPUT_DIM = len(tag_vocab) ENC_EMB_DIM = 256 DEC_EMB_DIM = 256 HID_DIM = 512 N_LAYERS = 2 ENC_DROPOUT = 0.5 DEC_DROPOUT = 0.5 enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT) dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT) model = Seq2Seq(enc, dec) # init weights def init_weights(m): for name, param in m.named_parameters(): nn.init.uniform_(param.data, -0.08, 0.08) model.apply(init_weights) # calculate the number of trainable parameters in the model def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad)
train_loader = torchdata.DataLoader(dataset=data_loaded, collate_fn=data_loaded.custom_collate_fn, batch_size=batch_size) trg_max_seq_len = next( iter(train_loader))[1].size(1) - 1 # <s> is not included epochs = 1 interval = 1 learning_rate = 5e-2 model = Seq2Seq(hidden_size=hidden_size, vocab_len=vocab_len, embedding_size=embedding_size, batch_size=batch_size, pad_idx=pad_idx, trg_max_seq_len=trg_max_seq_len, device=device) model.to(device) optimizer = optim.Adadelta(model.parameters(), lr=learning_rate) criterion = nn.NLLLoss(ignore_index=pad_idx) def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) print(f'Model parameters : {count_parameters(model):,}')
def decode_answer(self, answers): decode_answers = [] for answer in answers: decode_answer = [] for token_id in answer: if token_id == 0: token = "<pad>" else: token = self.data_provider.decode_vocab[token_id] if token == "<end>" or token == "<pad>": break decode_answer.append(token) decode_answers.append(decode_answer) return decode_answers if __name__ == "__main__": data_processor = DataProcessor("./data/QA_data/varicocele/", "./data/QA_data/varicocele/varicocele.json", word2vec="./data/word2vec/varicocele") model = Seq2Seq(data_processor.start_token, data_processor.vocab_embedding) trainer = Trainer(model, data_processor, learning_rate=5e-3, batch_size=8) trainer.train(train_epoch=100, save_epoch=10, display_step=100, restore=True)
def main(_): """ Main function, loads and vectorizes data, builds model, then proceeds to start the training process. """ # Load data (paths to the vocab, and tokens) fr_vocab, en_vocab, fr_train, en_train, _ = load_data() # Bucket train data train_set = bucket(fr_train, en_train) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(buckets))] train_total_size = float(sum(train_bucket_sizes)) print "Total Number of Training Examples", train_total_size # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # Get size of vocabularies french_vocab, _ = init_vocab(fr_vocab) english_vocab, _ = init_vocab(en_vocab) # Start Tensorflow Session with tf.Session() as sess: model = Seq2Seq(len(french_vocab), len(english_vocab), buckets, FLAGS.size, FLAGS.num_layers, forward_only=False) ckpt = tf.train.get_checkpoint_state(FLAGS.log_dir) if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path): print "Reading model parameters from %s" % ckpt.model_checkpoint_path model.saver.restore(sess, ckpt.model_checkpoint_path) else: print "Created model with fresh parameters." sess.run(tf.initialize_all_variables()) # Start Training Loop step_time, loss, current_step = 0.0, 0.0, 0 previous_losses = [] while True: # Choose a bucket according to data distribution. We pick a random number in [0, 1] and # use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch(train_set, bucket_id) step_loss, embeddings, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 progress(current_step % FLAGS.steps_per_checkpoint, FLAGS.steps_per_checkpoint, "Step %s" % (current_step / FLAGS.steps_per_checkpoint)) # Once in a while, we save checkpoint, and print statistics. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. print "" perplexity = math.exp(loss) if loss < 300 else float('inf') print ("Global step %d, Learning rate %.4f, Step-time %.2f, Perplexity %.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.log_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 sys.stdout.flush()
def main(_): """ Main function, instantiates model, loads and vectorizes source data, translates and outputs English translations. """ # Load vocabularies fr_vocab_path = "data/vocabulary/fr.vocab" en_vocab_path = "data/vocabulary/en.vocab" fr2idx, idx2fr = init_vocab(fr_vocab_path) en2idx, idx2en = init_vocab(en_vocab_path) with tf.Session() as sess: # Create Model by Loading Parameters model = Seq2Seq(len(fr2idx), len(en2idx), buckets, FLAGS.size, FLAGS.num_layers, forward_only=True) ckpt = tf.train.get_checkpoint_state(FLAGS.log_dir) if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path): print "Reading model parameters from %s" % ckpt.model_checkpoint_path model.saver.restore(sess, ckpt.model_checkpoint_path) else: print "No model checkpoints found!" sys.exit(0) # Reset batch_size to 1 model.batch_size = 1 translations = [] with tf.gfile.GFile(SOURCE_PATH, 'rb') as f: sentence = f.readline() while sentence: # Source file is already tokenized, just need to split at spaces token_ids = sentence.split() if len(token_ids) >= 50: translations.append("") sentence = f.readline() continue # Pick which bucket it belongs to. bucket_id = min([ b for b in xrange(len(buckets)) if buckets[b][0] > len(token_ids) ]) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, embedding, output_logits = model.step( sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # If there is an EOS symbol in outputs, cut them at that point. if EOS_ID in outputs: outputs = outputs[:outputs.index(EOS_ID)] # Print out English sentence corresponding to outputs. translation = " ".join( [tf.compat.as_str(idx2en[output]) for output in outputs]) print translation translations.append(translation) sentence = f.readline() with tf.gfile.GFile(TARGET_PATH, 'wb') as f: for t in translations: f.write(t + "\n")
torch.save(model.state_dict(), "model_data/model.pth"\ .format(args.start_time,round(predict_rate,3),epoch)) logger(args, "save model") ##start main args = get_args() train_data, test_data = data_loader(args, "data/processed_data.json", first=True) device_kind = "cuda:{}".format( args.cuda_number) if torch.cuda.is_available() else "cpu" args.device = torch.device(device_kind) model = Seq2Seq(args) model.to(args.device) #start_epochが0なら最初から、指定されていたら学習済みのものをロードする if args.start_epoch >= 1: param = torch.load( "model_data/epoch_{}_model.pth".format(args.start_epoch - 1)) model.load_state_dict(param) else: args.start_epoch = 0 optimizer = optim.Adam(model.parameters(), lr=args.lr) logger(args, "use {}".format(device_kind)) for epoch in range(args.start_epoch, args.epoch_num):