import json import pickle import os import torch from config import get_train_args from training import Training from general_utils import get_logger args = get_train_args() if not os.path.exists(args.output_path): os.makedirs(args.output_path) logger = get_logger(args.log_path) logger.info(json.dumps(args.__dict__, indent=4)) # Reading the int indexed text dataset train_data = torch.load(os.path.join(args.input, args.save_data + ".train.pth")) dev_data = torch.load(os.path.join(args.input, args.save_data + ".valid.pth")) test_data = torch.load(os.path.join(args.input, args.save_data + ".test.pth")) unlabel_data = torch.load( os.path.join(args.input, args.save_data + ".unlabel.pth")) # Reading the word vocab file with open(os.path.join(args.input, args.save_data + '.vocab.pickle'), 'rb') as f: id2w = pickle.load(f) # Reading the label vocab file with open(os.path.join(args.input, args.save_data + '.label.pickle'), 'rb') as f:
def main(): best_score = 0 args = get_train_args() logger = get_logger(args.log_path) logger.info(json.dumps(args.__dict__, indent=4)) # Set seed value torch.manual_seed(args.seed) random.seed(args.seed) if args.gpu: torch.cuda.manual_seed_all(args.seed) # Reading the int indexed text dataset train_data = np.load(os.path.join(args.input, args.data + ".train.npy"), allow_pickle=True) train_data = train_data.tolist() dev_data = np.load(os.path.join(args.input, args.data + ".valid.npy"), allow_pickle=True) dev_data = dev_data.tolist() test_data = np.load(os.path.join(args.input, args.data + ".test.npy"), allow_pickle=True) test_data = test_data.tolist() # Reading the vocab file with open(os.path.join(args.input, args.data + '.vocab.pickle'), 'rb') as f: id2w = pickle.load(f) args.id2w = id2w args.n_vocab = len(id2w) # Define Model model = eval(args.model)(args) model.apply(init_weights) tally_parameters(model) if args.gpu >= 0: model.cuda(args.gpu) logger.info(model) if args.optimizer == 'Noam': optimizer = NoamAdamTrainer(model, args) elif args.optimizer == 'Adam': params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate, betas=(args.optimizer_adam_beta1, args.optimizer_adam_beta2), eps=args.optimizer_adam_epsilon) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.7, patience=7, verbose=True) elif args.optimizer == 'Yogi': params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = Yogi(params, lr=args.learning_rate, betas=(args.optimizer_adam_beta1, args.optimizer_adam_beta2), eps=args.optimizer_adam_epsilon) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.7, patience=7, verbose=True) if args.fp16: model = FP16_Module(model) optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.static_loss_scale, dynamic_loss_scale=args.dynamic_loss_scale, dynamic_loss_args={'init_scale': 2**16}, verbose=False) ema = ExponentialMovingAverage(decay=args.ema_decay) ema.register(model.state_dict()) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.model_file): logger.info("=> loading checkpoint '{}'".format(args.model_file)) checkpoint = torch.load(args.model_file) args.start_epoch = checkpoint['epoch'] best_score = checkpoint['best_score'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( args.model_file, checkpoint['epoch'])) else: logger.info("=> no checkpoint found at '{}'".format( args.model_file)) src_data, trg_data = list(zip(*train_data)) total_src_words = len(list(itertools.chain.from_iterable(src_data))) total_trg_words = len(list(itertools.chain.from_iterable(trg_data))) iter_per_epoch = (total_src_words + total_trg_words) // (2 * args.wbatchsize) logger.info('Approximate number of iter/epoch = {}'.format(iter_per_epoch)) time_s = time() global_steps = 0 num_grad_steps = 0 if args.grad_norm_for_yogi and args.optimizer == 'Yogi': args.start_epoch = -1 l2_norm = 0.0 parameters = list( filter(lambda p: p.requires_grad is True, model.parameters())) n_params = sum([p.nelement() for p in parameters]) for epoch in range(args.start_epoch, args.epoch): random.shuffle(train_data) train_iter = data.iterator.pool( train_data, args.wbatchsize, key=lambda x: (len(x[0]), len(x[1])), batch_size_fn=batch_size_fn, random_shuffler=data.iterator.RandomShuffler()) report_stats = utils.Statistics() train_stats = utils.Statistics() if args.debug: grad_norm = 0. for num_steps, train_batch in enumerate(train_iter): global_steps += 1 model.train() if args.grad_accumulator_count == 1: optimizer.zero_grad() elif num_grad_steps % args.grad_accumulator_count == 0: optimizer.zero_grad() src_iter = list(zip(*train_batch))[0] src_words = len(list(itertools.chain.from_iterable(src_iter))) report_stats.n_src_words += src_words train_stats.n_src_words += src_words in_arrays = utils.seq2seq_pad_concat_convert(train_batch, -1) if len(args.multi_gpu) > 1: loss_tuple, stat_tuple = zip( *dp(model, in_arrays, device_ids=args.multi_gpu)) n_total = sum([obj.n_words.item() for obj in stat_tuple]) n_correct = sum([obj.n_correct.item() for obj in stat_tuple]) loss = 0 for l_, s_ in zip(loss_tuple, stat_tuple): loss += l_ * s_.n_words.item() loss /= n_total stat = utils.Statistics(loss=loss.data.cpu() * n_total, n_correct=n_correct, n_words=n_total) else: loss, stat = model(*in_arrays) if args.fp16: optimizer.backward(loss) else: loss.backward() if epoch == -1 and args.grad_norm_for_yogi and args.optimizer == 'Yogi': l2_norm += (utils.grad_norm(model.parameters())**2) / n_params continue num_grad_steps += 1 if args.debug: norm = utils.grad_norm(model.parameters()) grad_norm += norm if global_steps % args.report_every == 0: logger.info("> Gradient Norm: %1.4f" % (grad_norm / (num_steps + 1))) if args.grad_accumulator_count == 1: optimizer.step() ema.apply(model.state_dict(keep_vars=True)) elif num_grad_steps % args.grad_accumulator_count == 0: optimizer.step() ema.apply(model.state_dict(keep_vars=True)) num_grad_steps = 0 report_stats.update(stat) train_stats.update(stat) report_stats = report_func(epoch, num_steps, iter_per_epoch, time_s, report_stats, args.report_every) valid_stats = utils.Statistics() if global_steps % args.eval_steps == 0: with torch.no_grad(): dev_iter = data.iterator.pool( dev_data, args.wbatchsize, key=lambda x: (len(x[0]), len(x[1])), batch_size_fn=batch_size_fn, random_shuffler=data.iterator.RandomShuffler()) for dev_batch in dev_iter: model.eval() in_arrays = utils.seq2seq_pad_concat_convert( dev_batch, -1) if len(args.multi_gpu) > 1: _, stat_tuple = zip(*dp( model, in_arrays, device_ids=args.multi_gpu)) n_total = sum( [obj.n_words.item() for obj in stat_tuple]) n_correct = sum( [obj.n_correct.item() for obj in stat_tuple]) dev_loss = sum([obj.loss for obj in stat_tuple]) stat = utils.Statistics(loss=dev_loss, n_correct=n_correct, n_words=n_total) else: _, stat = model(*in_arrays) valid_stats.update(stat) logger.info('Train perplexity: %g' % train_stats.ppl()) logger.info('Train accuracy: %g' % train_stats.accuracy()) logger.info('Validation perplexity: %g' % valid_stats.ppl()) logger.info('Validation accuracy: %g' % valid_stats.accuracy()) if args.metric == "accuracy": score = valid_stats.accuracy() elif args.metric == "bleu": score, _ = CalculateBleu( model, dev_data, 'Dev Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha, max_sent=args.max_sent_eval)(logger) # Threshold Global Steps to save the model if not (global_steps % 2000): print('saving') is_best = score > best_score best_score = max(score, best_score) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'state_dict_ema': ema.shadow_variable_dict, 'best_score': best_score, 'optimizer': optimizer.state_dict(), 'opts': args, }, is_best, args.model_file, args.best_model_file) if args.optimizer == 'Adam' or args.optimizer == 'Yogi': scheduler.step(score) if epoch == -1 and args.grad_norm_for_yogi and args.optimizer == 'Yogi': optimizer.v_init = l2_norm / (num_steps + 1) logger.info("Initializing Yogi Optimizer (v_init = {})".format( optimizer.v_init)) # BLEU score on Dev and Test Data checkpoint = torch.load(args.best_model_file) logger.info("=> loaded checkpoint '{}' (epoch {}, best score {})".format( args.best_model_file, checkpoint['epoch'], checkpoint['best_score'])) model.load_state_dict(checkpoint['state_dict']) logger.info('Dev Set BLEU Score') _, dev_hyp = CalculateBleu(model, dev_data, 'Dev Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha, max_decode_len=args.max_decode_len)(logger) save_output(dev_hyp, id2w, args.dev_hyp) logger.info('Test Set BLEU Score') _, test_hyp = CalculateBleu(model, test_data, 'Test Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha, max_decode_len=args.max_decode_len)(logger) save_output(test_hyp, id2w, args.test_hyp) # Loading EMA state dict model.load_state_dict(checkpoint['state_dict_ema']) logger.info('Dev Set BLEU Score') _, dev_hyp = CalculateBleu(model, dev_data, 'Dev Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha, max_decode_len=args.max_decode_len)(logger) save_output(dev_hyp, id2w, args.dev_hyp + '.ema') logger.info('Test Set BLEU Score') _, test_hyp = CalculateBleu(model, test_data, 'Test Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha, max_decode_len=args.max_decode_len)(logger) save_output(test_hyp, id2w, args.test_hyp + '.ema')
def main(): best_score = 0 args = get_train_args() print(json.dumps(args.__dict__, indent=4)) # Reading the int indexed text dataset train_data = np.load(os.path.join(args.input, args.data + ".train.npy")) train_data = train_data.tolist() dev_data = np.load(os.path.join(args.input, args.data + ".valid.npy")) dev_data = dev_data.tolist() test_data = np.load(os.path.join(args.input, args.data + ".test.npy")) test_data = test_data.tolist() # Reading the vocab file with open(os.path.join(args.input, args.data + '.vocab.pickle'), 'rb') as f: id2w = pickle.load(f) args.id2w = id2w args.n_vocab = len(id2w) # Define Model model = net.Transformer(args) tally_parameters(model) if args.gpu >= 0: model.cuda(args.gpu) print(model) optimizer = optim.TransformerAdamTrainer(model, args) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.model_file): print("=> loading checkpoint '{}'".format(args.model_file)) checkpoint = torch.load(args.model_file) args.start_epoch = checkpoint['epoch'] best_score = checkpoint['best_score'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.model_file, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.model_file)) src_data, trg_data = list(zip(*train_data)) total_src_words = len(list(itertools.chain.from_iterable(src_data))) total_trg_words = len(list(itertools.chain.from_iterable(trg_data))) iter_per_epoch = (total_src_words + total_trg_words) // args.wbatchsize print('Approximate number of iter/epoch =', iter_per_epoch) time_s = time() global_steps = 0 for epoch in range(args.start_epoch, args.epoch): random.shuffle(train_data) train_iter = data.iterator.pool( train_data, args.wbatchsize, key=lambda x: data.utils.interleave_keys(len(x[0]), len(x[1])), batch_size_fn=batch_size_func, random_shuffler=data.iterator.RandomShuffler()) report_stats = utils.Statistics() train_stats = utils.Statistics() valid_stats = utils.Statistics() if args.debug: grad_norm = 0. for num_steps, train_batch in enumerate(train_iter): global_steps += 1 model.train() optimizer.zero_grad() src_iter = list(zip(*train_batch))[0] src_words = len(list(itertools.chain.from_iterable(src_iter))) report_stats.n_src_words += src_words train_stats.n_src_words += src_words in_arrays = utils.seq2seq_pad_concat_convert(train_batch, -1) loss, stat = model(*in_arrays) loss.backward() if args.debug: norm = utils.grad_norm(model.parameters()) grad_norm += norm if global_steps % args.report_every == 0: print("> Gradient Norm: %1.4f" % (grad_norm / (num_steps + 1))) optimizer.step() report_stats.update(stat) train_stats.update(stat) report_stats = report_func(epoch, num_steps, iter_per_epoch, time_s, report_stats, args.report_every) if (global_steps + 1) % args.eval_steps == 0: dev_iter = data.iterator.pool( dev_data, args.wbatchsize, key=lambda x: data.utils.interleave_keys( len(x[0]), len(x[1])), batch_size_fn=batch_size_func, random_shuffler=data.iterator.RandomShuffler()) for dev_batch in dev_iter: model.eval() in_arrays = utils.seq2seq_pad_concat_convert(dev_batch, -1) loss_test, stat = model(*in_arrays) valid_stats.update(stat) print('Train perplexity: %g' % train_stats.ppl()) print('Train accuracy: %g' % train_stats.accuracy()) print('Validation perplexity: %g' % valid_stats.ppl()) print('Validation accuracy: %g' % valid_stats.accuracy()) bleu_score, _ = CalculateBleu(model, dev_data, 'Dev Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha, max_sent=args.max_sent_eval)() if args.metric == "bleu": score = bleu_score elif args.metric == "accuracy": score = valid_stats.accuracy() is_best = score > best_score best_score = max(score, best_score) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_score': best_score, 'optimizer': optimizer.state_dict(), 'opts': args, }, is_best, args.model_file, args.best_model_file) # BLEU score on Dev and Test Data checkpoint = torch.load(args.best_model_file) print("=> loaded checkpoint '{}' (epoch {}, best score {})".format( args.best_model_file, checkpoint['epoch'], checkpoint['best_score'])) model.load_state_dict(checkpoint['state_dict']) print('Dev Set BLEU Score') _, dev_hyp = CalculateBleu(model, dev_data, 'Dev Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha)() save_output(dev_hyp, id2w, args.dev_hyp) print('Test Set BLEU Score') _, test_hyp = CalculateBleu(model, test_data, 'Test Bleu', batch=args.batchsize // 4, beam_size=args.beam_size, alpha=args.alpha)() save_output(test_hyp, id2w, args.test_hyp)
batch = label.shape[0] for i in range(batch): f.write(id[i] + "\t" + str(label[i].item()) + "\t" + str(predicted[i].item()) + "\n") f1_micro = f1_score(y_true, y_pred, labels=[0, 1, 2], average='micro') f1_macro = f1_score(y_true, y_pred, labels=[0, 1, 2], average='macro') if filepath is not None: f.write("f1_micro: " + str(f1_micro) + "\n") f.write("f1_macro: " + str(f1_macro) + "\n") f.close() return f1_micro, f1_macro if __name__ == '__main__': opt = get_train_args() if opt.gpu: torch.cuda.manual_seed(0) else: torch.manual_seed(0) index, label2idx = k_fold_split(opt.data_path, opt.k_fold) f1_micro_list = [] f1_macro_list = [] for i in range(opt.k_fold): model_path = opt.model_path + "_" + str(i) + ".pt" output_path = opt.output_path + "_" + str(i) + ".txt" trainloader, testloader = get_data(opt, label2idx, index[i]) model = GAIN_BERT(opt, len(label2idx)) # if opt.gpu: # model = nn.DataParallel(model)