def main(): parser = argparse.ArgumentParser(description='learning main') parser.add_argument('--loop', '-l', default=0, type=int, help='Set the number of steps to resume learning') parser.add_argument('--resume', '-r', type=str, default="", help='set whether to resume learning') args = parser.parse_args() # train data作成 word_list = get_word_lists("./aozora_text/files/files_all_rnp.txt") stop_word_list = get_word_lists( "./aozora_text/files/stop_files_all_rnp.txt") ds = DataShaping() if args.resume == "resume": seq2seq = Seq2Seq("resume") else: seq2seq = Seq2Seq("train") for i in range(args.loop, len(word_list)): train = ds.make_data_train(stop_word_list, i) teach, target = ds.make_data_teach_target(word_list, i) seq2seq.train(train, teach, target) seq2seq.save_model()
def main(): # train data作成 word_list = get_word_lists("./aozora_text/files/files_all_tmp.txt") # stop_word_list = get_word_lists( # "./aozora_text/files/stop_files_all_tmp.txt") ds = DataShaping() seq2seq = Seq2Seq("make") st = StringOperation() start_token = np.array([st.sentens_array_to_vec(["BOS"])]) sentens = word_list[rand.randint(0, len(word_list) - 1)][1:] while('' in sentens): sentens.remove('') sentens = str(sentens) w = WakachiMethod(Wakachi) for _ in range(3): print("sentens:", sentens) sentens_rm_stop_word = w.remove_stopword(sentens) print("rm stop word sentens", sentens_rm_stop_word) sentens_vec = np.array([st.sentens_array_to_vec(sentens_rm_stop_word)]) sentens_vec = seq2seq.make_sentens_vec(sentens_vec, start_token) sentens_vec = np.array(sentens_vec).reshape(len(sentens_vec), 128) sentens_arr = st.sentens_vec_to_sentens_arr_prob(sentens_vec) sentens = str(sentens_arr) print(sentens)
def test(): print('building model...') voc = Voc() seq2seq = Seq2Seq(voc.num_words).to(args.device) param_optimizer = args.optimiser(seq2seq.parameters(), lr=args.learning_rate) decoder_optimizer = args.optimiser(seq2seq.decoder.parameters(), lr=args.learning_rate * args.decoder_ratio) print('done') if args.param_file is None: print('please specify the saved param file.') exit(-1) else: print('loading saved parameters from ' + args.param_file + '...') checkpoint = torch.load(args.param_file) seq2seq.load_state_dict(checkpoint['model']) param_optimizer.load_state_dict(checkpoint['opt']) decoder_optimizer.load_state_dict(checkpoint['de_opt']) voc = checkpoint['voc'] print('done') print('loading test data...') test_set = FruitSeqDataset(voc, dataset_file_path=args.test_file) print('done') test_seq_acc, test_tok_acc, test_loss = eval_model(seq2seq, test_set) print( "[TEST]Loss: {:.4f}; Seq-level Accuracy: {:.4f}; Tok-level Accuracy: {:.4f}" .format(test_loss, test_seq_acc * 100, test_tok_acc * 100))
def test(args, vocab_size): device = t.device('cuda') if args.use_gpu else t.device('cpu') beam_size = args.beam_size topk = args.topk rev_model = args.load_model_path # print(rev_model) model = Seq2Seq(embed_size=args.embed_size, enc_dec_output_size=args.enc_dec_output_size, attn_size=args.attn_size, num_layers=args.num_layers, bidirectional=args.bidirectional, use_gpu=args.use_gpu, vocab_size=vocab_size).to(device) assert rev_model is not None # 读取已经保存的模型 rev_path = os.path.join(model_dir, rev_model) if os.path.exists(rev_path): print('read in model from', rev_path) model.load(load_path=rev_path) batch_size = args.batch_size test_set = Set(read_data(args.test_data_root)) test_loader = Loader(test_set, batch_size, shuffle=False, use_gpu=args.use_gpu, num_workers=args.num_workers).loader model.eval() with t.no_grad(): recorder.epoch_start(0, 'test', len(test_set)) for batch_id, batch in enumerate(test_loader): encoder_inputs, seq_len, decoder_inputs, weights = batch encoder_inputs = encoder_inputs.to(device) seq_len = seq_len.to(device) decoder_inputs = decoder_inputs.to(device) weights = weights.to(device) encoder_inputs.to(device) logits, output_symbols = model( encoder_inputs, seq_len, decoder_inputs[:, :-1], mode='test', max_len=args.max_len, beam_search=False if args.beam_size == 1 else True, beam_size=args.beam_size, topk=args.topk) nll_loss = compute_loss(logits, decoder_inputs[:, 1:], weights) ppl = perplexity(nll_loss) recorder.batch_end(batch_id, batch_size, nll_loss, ppl) recorder.log_text(encoder_inputs.tolist(), decoder_inputs[:, 1:].tolist(), output_symbols.tolist()) recorder.epoch_end()
shuffle=True, num_workers=16, pin_memory=True) test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True) # Create Model encoder = Encoder(lstm_hidden_size=enc_hid_dim, arch="resnet18").to(device) decoder = Decoder(output_dim=vocab_size, emb_dim=emb_dim, enc_hid_dim=enc_hid_dim, dec_hid_dim=dec_hid_dim, dropout=dropout).to(device) model = Seq2Seq(encoder=encoder, decoder=decoder, device=device).to(device) # Resume model if checkpoint is not None: start_epoch, best_wer = resume_model(model, checkpoint) # Run the model parallelly if torch.cuda.device_count() > 1: print("Using {} GPUs".format(torch.cuda.device_count())) model = nn.DataParallel(model) # Create loss criterion & optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # Start evaluation print("Evaluation Started".center(60, '#'))
if __name__ == "__main__": config = Config() device = config.device attn = Attention(config.s2s_enc_hid, config.s2s_dec_hid) enc = Encoder(config.s2s_emb_dim, config.s2s_enc_hid, config.s2s_dec_hid, config.s2s_enc_dropout) dec = Decoder(len(config.class_char), config.s2s_emb_dim, config.s2s_enc_hid, config.s2s_dec_hid, config.s2s_enc_dropout, attn) model = Seq2Seq(enc, dec, device).to(device) model.apply(init_weights) model.load_state_dict(torch.load('weight/s2s.pt')) model.eval() data = gen(["data/test/96.json"], 1, config.max_box_num, device) with torch.no_grad(): src, trg = next(data) output = model(src) output = output.permute(1, 0, 2).contiguous().view(-1, len(config.class_char)) output = torch.max(F.softmax(output, dim=1), 1) possible, label = output.values, output.indices
def train(args, vocab_size): # opt._parse(kwarg) print('enter train func') device = t.device('cuda') if args.use_gpu else t.device('cpu') model = Seq2Seq(embed_size=args.embed_size, enc_dec_output_size=args.enc_dec_output_size, attn_size=args.attn_size, num_layers=args.num_layers, bidirectional=args.bidirectional, use_gpu=args.use_gpu, vocab_size=vocab_size).to(device) print('Model structure') print(model) print('The model has %d parameters' % count_parameters(model)) if args.load_model_path is not None: rev_path = os.path.join(model_dir, args.load_model_path) if os.path.exists(rev_path): print('read in model from', rev_path) last_epoch = model.load(load_path=rev_path, return_list=['epoch'])[0] start_epoch = last_epoch + 1 else: start_epoch = 1 last_epoch = -1 optimizer = Adam(model.parameters(), lr=args.lr) if args.scheduler_type == 'exponential': scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=args.exponential_lr_decay, last_epoch=last_epoch) elif args.scheduler_type == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.step_lr_decay) print('read in data') # 读取数据 batch_size = args.batch_size train_set = Set(read_data(args.train_data_root)) valid_set = Set(read_data(args.valid_data_root)) # 构造dataloader train_loader = Loader(train_set, batch_size, shuffle=True, use_gpu=args.use_gpu, num_workers=args.num_workers).loader valid_loader = Loader(valid_set, batch_size, shuffle=False, use_gpu=args.use_gpu, num_workers=args.num_workers).loader # 统计数据量 print('data scale:') print('train data:', len(train_set), "batch_nums:", len(train_loader)) print('valid data:', len(valid_set), "batch_nums:", len(valid_loader)) # train print('start training...') epochs = args.max_epoch for epoch in range(start_epoch, epochs + 1): model.train() # epoch开始前记录 recorder.epoch_start(epoch, 'train', len(train_set)) if args.scheduler_type is not None: print(epoch, 'lr={:.10f}'.format(scheduler.get_lr()[0])) for batch_id, batch in enumerate(train_loader): encoder_inputs, seq_len, decoder_inputs, weights = batch encoder_inputs = encoder_inputs.to(device) seq_len = seq_len.to(device) decoder_inputs = decoder_inputs.to(device) weights = weights.to(device) encoder_inputs.to(device) optimizer.zero_grad() # 第三个参数, 最长的句子最后一个token为EOS_I,不需要作为输入,这样可以减少一些计算 logits, output_symbols = model( encoder_inputs, seq_len, decoder_inputs[:, :-1], mode='train', max_len=None, teacher_forcing_ratio=args.teacher_forcing_ratio) # print('train out',output_symbols) # 计算损失 nll_loss = compute_loss(logits, decoder_inputs[:, 1:], weights) # 计算困惑度 ppl = perplexity(nll_loss) # print(nll_loss.item(), ppl.item()) # 反向传播,更新参数 nll_loss.backward() # 减轻梯度爆炸 小trick nn.utils.clip_grad_norm_(model.parameters(), args.max_gradient_norm) optimizer.step() recorder.batch_end(batch_id, batch_size, nll_loss, ppl) if args.scheduler_type is not None: scheduler.step() recorder.epoch_end() # 保存模型 if epoch % 5 == 0: model.save(os.path.join( model_dir, f'{args.project}_{datetime.datetime.now().strftime("%y_%m_%d_%H:%M:%S")}_{nll_loss.item()}_{ppl.item()}' ), epoch=epoch) # 训练一轮后,在验证集上计算loss, ppl model.eval() with t.no_grad(): recorder.epoch_start(epoch, 'eval', len(valid_set)) for batch_id, batch in enumerate(valid_loader): encoder_inputs, seq_len, decoder_inputs, weights = batch encoder_inputs = encoder_inputs.to(device) seq_len = seq_len.to(device) decoder_inputs = decoder_inputs.to(device) weights = weights.to(device) encoder_inputs.to(device) logits, output_symbols = model( encoder_inputs, seq_len, decoder_inputs[:, :-1], mode='eval', max_len=args.max_len, beam_search=False if args.beam_size == 1 else True, beam_size=args.beam_size, topk=args.topk) # print('eval out: ', output_symbols) nll_loss = compute_loss(logits, decoder_inputs[:, 1:], weights) ppl = perplexity(nll_loss) recorder.batch_end(batch_id, batch_size, nll_loss, ppl) recorder.log_text(encoder_inputs.tolist(), decoder_inputs[:, 1:].tolist(), output_symbols.tolist()) recorder.epoch_end()
def train(): print('building vocabulary...') voc = Voc() print('done') print('loading data and building batches...') train_set = FruitSeqDataset(voc, dataset_file_path=args.train_file) dev_set = FruitSeqDataset(voc, dataset_file_path=args.dev_file) # test_set = FruitSeqDataset(voc, dataset_file_path=TEST_FILE_PATH) print('done') print('building model...') seq2seq = Seq2Seq(voc.num_words).to(args.device) param_optimizer = args.optimiser(seq2seq.parameters(), lr=args.learning_rate) decoder_optimizer = args.optimiser(seq2seq.decoder.parameters(), lr=args.learning_rate * args.speaker_ratio) if args.param_file is not None: print('\tloading saved parameters from ' + args.param_file + '...') checkpoint = torch.load(args.param_file) seq2seq.load_state_dict(checkpoint['model']) param_optimizer.load_state_dict(checkpoint['opt']) decoder_optimizer.load_state_dict(checkpoint['de_opt']) voc = checkpoint['voc'] print('\tdone') print('done') print('initialising...') start_iteration = 1 print_loss = 0. print_seq_acc = 0. print_tok_acc = 0. max_dev_seq_acc = 0. training_losses = [] training_tok_acc = [] training_seq_acc = [] training_sim = [] eval_tok_acc = [] eval_seq_acc = [] print('done') print('training...') for iter in range(start_iteration, args.iter_num + 1): for idx, data_batch in enumerate(train_set): seq_acc, tok_acc, loss = train_epoch(seq2seq, data_batch, param_optimizer, decoder_optimizer) print_loss += loss print_seq_acc += seq_acc print_tok_acc += tok_acc if iter % args.print_freq == 0: print_loss_avg = print_loss / (args.print_freq * len(train_set)) print_seq_acc_avg = print_seq_acc / (args.print_freq * len(train_set)) print_tok_acc_avg = print_tok_acc / (args.print_freq * len(train_set)) print( "Iteration: {}; Percent complete: {:.1f}%; Avg loss: {:.4f}; Avg seq acc: {:.4f}; Avg tok acc: {:.4f}" .format(iter, iter / args.iter_num * 100, print_loss_avg, print_seq_acc_avg, print_tok_acc_avg)) training_seq_acc.append(print_seq_acc_avg) training_tok_acc.append(print_tok_acc_avg) training_losses.append(print_loss_avg) print_seq_acc = 0. print_tok_acc = 0. print_loss = 0. if iter % args.eval_freq == 0: dev_seq_acc, dev_tok_acc, dev_loss = eval_model(seq2seq, dev_set) if dev_seq_acc > max_dev_seq_acc: max_dev_seq_acc = dev_seq_acc eval_seq_acc.append(dev_seq_acc) eval_tok_acc.append(dev_tok_acc) print( "[EVAL]Iteration: {}; Loss: {:.4f}; Avg Seq Acc: {:.4f}; Avg Tok Acc: {:.4f}; Best Seq Acc: {:.4f}" .format(iter, dev_loss, dev_seq_acc, dev_tok_acc, max_dev_seq_acc)) if iter % args.save_freq == 0: directory = os.path.join(args.save_dir, 'seq2seq') if not os.path.exists(directory): os.makedirs(directory) torch.save( { 'iteration': iter, 'model': seq2seq.state_dict(), 'opt': param_optimizer.state_dict(), 'de_opt': decoder_optimizer.state_dict(), 'loss': loss, 'voc': voc, 'args': args, 'records': { 'training_loss': training_losses, 'training_tok_acc': training_tok_acc, 'training_seq_acc': training_seq_acc, 'training_sim': training_sim, 'eval_tok_acc': eval_tok_acc, 'eval_seq_acc': eval_seq_acc } }, os.path.join( directory, '{}_{}_{}.tar'.format(args.seed, iter, 'checkpoint')))
shuffle=False, batch_size=args.batch_size, device=args.device, is_train=False) ############################### # get models ############################### encoder = Encoder(train_loader.train_inputs_vocab.word_counts, args.encoder_embedded_size, args.encoder_hidden_size).to(args.device) decoder = Decoder(train_loader.train_targets_vocab.word_counts, args.decoder_embedded_size, args.decoder_hidden_size, train_loader.SOS_IDX, train_loader.EOS_IDX, args.teacher_forcing_ratio, args.device).to(args.device) seq2seq = Seq2Seq(encoder, decoder, args.device) ############################### # get optimizer ############################### optimizer = torch.optim.Adam(seq2seq.parameters(), lr=args.learning_rate) ############################### # check direcotories exist ############################### os.makedirs(args.save_dir_path, exist_ok=True) def main(): global seq2seq if args.load_model:
default=True, help='Do you want to save the model plot' ' after the train') parser.add_argument('--batch_size', type=int, default=constants.DEFAULT_BATCH_SIZE) args = parser.parse_args() model_props = { 'hidden_units': args.hidden_units, 'embedding_size': args.embedding_size, 'trainable_embedding': args.train_emb, 'save_model': args.save_model, 'save_model_plot': args.save_plot_model, 'epochs': args.epochs, } inp, output = load_preprocessed_data() X_train, X_test, y_train, y_test = train_test_split(inp, output, test_size=0.2, random_state=42) config = load_config() seq2seq = Seq2Seq(config, model_props) seq2seq.build_model() seq2seq.fit(X_train, y_train, X_test, y_test, args.batch_size)