Ejemplo n.º 1
0
import json
import pickle
import os
import torch

from config import get_train_args
from training import Training
from general_utils import get_logger

args = get_train_args()
if not os.path.exists(args.output_path):
    os.makedirs(args.output_path)
logger = get_logger(args.log_path)
logger.info(json.dumps(args.__dict__, indent=4))

# Reading the int indexed text dataset
train_data = torch.load(os.path.join(args.input,
                                     args.save_data + ".train.pth"))
dev_data = torch.load(os.path.join(args.input, args.save_data + ".valid.pth"))
test_data = torch.load(os.path.join(args.input, args.save_data + ".test.pth"))
unlabel_data = torch.load(
    os.path.join(args.input, args.save_data + ".unlabel.pth"))

# Reading the word vocab file
with open(os.path.join(args.input, args.save_data + '.vocab.pickle'),
          'rb') as f:
    id2w = pickle.load(f)

# Reading the label vocab file
with open(os.path.join(args.input, args.save_data + '.label.pickle'),
          'rb') as f:
Ejemplo n.º 2
0
def main():
    best_score = 0
    args = get_train_args()
    logger = get_logger(args.log_path)
    logger.info(json.dumps(args.__dict__, indent=4))

    # Set seed value
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    if args.gpu:
        torch.cuda.manual_seed_all(args.seed)

    # Reading the int indexed text dataset
    train_data = np.load(os.path.join(args.input, args.data + ".train.npy"),
                         allow_pickle=True)
    train_data = train_data.tolist()
    dev_data = np.load(os.path.join(args.input, args.data + ".valid.npy"),
                       allow_pickle=True)
    dev_data = dev_data.tolist()
    test_data = np.load(os.path.join(args.input, args.data + ".test.npy"),
                        allow_pickle=True)
    test_data = test_data.tolist()

    # Reading the vocab file
    with open(os.path.join(args.input, args.data + '.vocab.pickle'),
              'rb') as f:
        id2w = pickle.load(f)

    args.id2w = id2w
    args.n_vocab = len(id2w)

    # Define Model
    model = eval(args.model)(args)
    model.apply(init_weights)

    tally_parameters(model)
    if args.gpu >= 0:
        model.cuda(args.gpu)
    logger.info(model)

    if args.optimizer == 'Noam':
        optimizer = NoamAdamTrainer(model, args)
    elif args.optimizer == 'Adam':
        params = filter(lambda p: p.requires_grad, model.parameters())
        optimizer = torch.optim.Adam(params,
                                     lr=args.learning_rate,
                                     betas=(args.optimizer_adam_beta1,
                                            args.optimizer_adam_beta2),
                                     eps=args.optimizer_adam_epsilon)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               mode='max',
                                                               factor=0.7,
                                                               patience=7,
                                                               verbose=True)
    elif args.optimizer == 'Yogi':
        params = filter(lambda p: p.requires_grad, model.parameters())
        optimizer = Yogi(params,
                         lr=args.learning_rate,
                         betas=(args.optimizer_adam_beta1,
                                args.optimizer_adam_beta2),
                         eps=args.optimizer_adam_epsilon)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               mode='max',
                                                               factor=0.7,
                                                               patience=7,
                                                               verbose=True)

    if args.fp16:
        model = FP16_Module(model)
        optimizer = FP16_Optimizer(optimizer,
                                   static_loss_scale=args.static_loss_scale,
                                   dynamic_loss_scale=args.dynamic_loss_scale,
                                   dynamic_loss_args={'init_scale': 2**16},
                                   verbose=False)

    ema = ExponentialMovingAverage(decay=args.ema_decay)
    ema.register(model.state_dict())

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.model_file):
            logger.info("=> loading checkpoint '{}'".format(args.model_file))
            checkpoint = torch.load(args.model_file)
            args.start_epoch = checkpoint['epoch']
            best_score = checkpoint['best_score']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                args.model_file, checkpoint['epoch']))
        else:
            logger.info("=> no checkpoint found at '{}'".format(
                args.model_file))

    src_data, trg_data = list(zip(*train_data))
    total_src_words = len(list(itertools.chain.from_iterable(src_data)))
    total_trg_words = len(list(itertools.chain.from_iterable(trg_data)))
    iter_per_epoch = (total_src_words + total_trg_words) // (2 *
                                                             args.wbatchsize)
    logger.info('Approximate number of iter/epoch = {}'.format(iter_per_epoch))
    time_s = time()

    global_steps = 0
    num_grad_steps = 0
    if args.grad_norm_for_yogi and args.optimizer == 'Yogi':
        args.start_epoch = -1
        l2_norm = 0.0
        parameters = list(
            filter(lambda p: p.requires_grad is True, model.parameters()))
        n_params = sum([p.nelement() for p in parameters])

    for epoch in range(args.start_epoch, args.epoch):
        random.shuffle(train_data)
        train_iter = data.iterator.pool(
            train_data,
            args.wbatchsize,
            key=lambda x: (len(x[0]), len(x[1])),
            batch_size_fn=batch_size_fn,
            random_shuffler=data.iterator.RandomShuffler())
        report_stats = utils.Statistics()
        train_stats = utils.Statistics()
        if args.debug:
            grad_norm = 0.
        for num_steps, train_batch in enumerate(train_iter):
            global_steps += 1
            model.train()
            if args.grad_accumulator_count == 1:
                optimizer.zero_grad()
            elif num_grad_steps % args.grad_accumulator_count == 0:
                optimizer.zero_grad()
            src_iter = list(zip(*train_batch))[0]
            src_words = len(list(itertools.chain.from_iterable(src_iter)))
            report_stats.n_src_words += src_words
            train_stats.n_src_words += src_words
            in_arrays = utils.seq2seq_pad_concat_convert(train_batch, -1)
            if len(args.multi_gpu) > 1:
                loss_tuple, stat_tuple = zip(
                    *dp(model, in_arrays, device_ids=args.multi_gpu))
                n_total = sum([obj.n_words.item() for obj in stat_tuple])
                n_correct = sum([obj.n_correct.item() for obj in stat_tuple])
                loss = 0
                for l_, s_ in zip(loss_tuple, stat_tuple):
                    loss += l_ * s_.n_words.item()
                loss /= n_total
                stat = utils.Statistics(loss=loss.data.cpu() * n_total,
                                        n_correct=n_correct,
                                        n_words=n_total)
            else:
                loss, stat = model(*in_arrays)

            if args.fp16:
                optimizer.backward(loss)
            else:
                loss.backward()
            if epoch == -1 and args.grad_norm_for_yogi and args.optimizer == 'Yogi':
                l2_norm += (utils.grad_norm(model.parameters())**2) / n_params
                continue
            num_grad_steps += 1
            if args.debug:
                norm = utils.grad_norm(model.parameters())
                grad_norm += norm
                if global_steps % args.report_every == 0:
                    logger.info("> Gradient Norm: %1.4f" % (grad_norm /
                                                            (num_steps + 1)))
            if args.grad_accumulator_count == 1:
                optimizer.step()
                ema.apply(model.state_dict(keep_vars=True))
            elif num_grad_steps % args.grad_accumulator_count == 0:
                optimizer.step()
                ema.apply(model.state_dict(keep_vars=True))
                num_grad_steps = 0
            report_stats.update(stat)
            train_stats.update(stat)
            report_stats = report_func(epoch, num_steps, iter_per_epoch,
                                       time_s, report_stats, args.report_every)

            valid_stats = utils.Statistics()
            if global_steps % args.eval_steps == 0:
                with torch.no_grad():
                    dev_iter = data.iterator.pool(
                        dev_data,
                        args.wbatchsize,
                        key=lambda x: (len(x[0]), len(x[1])),
                        batch_size_fn=batch_size_fn,
                        random_shuffler=data.iterator.RandomShuffler())

                    for dev_batch in dev_iter:
                        model.eval()
                        in_arrays = utils.seq2seq_pad_concat_convert(
                            dev_batch, -1)
                        if len(args.multi_gpu) > 1:
                            _, stat_tuple = zip(*dp(
                                model, in_arrays, device_ids=args.multi_gpu))
                            n_total = sum(
                                [obj.n_words.item() for obj in stat_tuple])
                            n_correct = sum(
                                [obj.n_correct.item() for obj in stat_tuple])
                            dev_loss = sum([obj.loss for obj in stat_tuple])
                            stat = utils.Statistics(loss=dev_loss,
                                                    n_correct=n_correct,
                                                    n_words=n_total)
                        else:
                            _, stat = model(*in_arrays)
                        valid_stats.update(stat)

                    logger.info('Train perplexity: %g' % train_stats.ppl())
                    logger.info('Train accuracy: %g' % train_stats.accuracy())

                    logger.info('Validation perplexity: %g' %
                                valid_stats.ppl())
                    logger.info('Validation accuracy: %g' %
                                valid_stats.accuracy())

                    if args.metric == "accuracy":
                        score = valid_stats.accuracy()
                    elif args.metric == "bleu":
                        score, _ = CalculateBleu(
                            model,
                            dev_data,
                            'Dev Bleu',
                            batch=args.batchsize // 4,
                            beam_size=args.beam_size,
                            alpha=args.alpha,
                            max_sent=args.max_sent_eval)(logger)

                    # Threshold Global Steps to save the model
                    if not (global_steps % 2000):
                        print('saving')
                        is_best = score > best_score
                        best_score = max(score, best_score)
                        save_checkpoint(
                            {
                                'epoch': epoch + 1,
                                'state_dict': model.state_dict(),
                                'state_dict_ema': ema.shadow_variable_dict,
                                'best_score': best_score,
                                'optimizer': optimizer.state_dict(),
                                'opts': args,
                            }, is_best, args.model_file, args.best_model_file)

                    if args.optimizer == 'Adam' or args.optimizer == 'Yogi':
                        scheduler.step(score)

        if epoch == -1 and args.grad_norm_for_yogi and args.optimizer == 'Yogi':
            optimizer.v_init = l2_norm / (num_steps + 1)
            logger.info("Initializing Yogi Optimizer (v_init = {})".format(
                optimizer.v_init))

    # BLEU score on Dev and Test Data
    checkpoint = torch.load(args.best_model_file)
    logger.info("=> loaded checkpoint '{}' (epoch {}, best score {})".format(
        args.best_model_file, checkpoint['epoch'], checkpoint['best_score']))
    model.load_state_dict(checkpoint['state_dict'])

    logger.info('Dev Set BLEU Score')
    _, dev_hyp = CalculateBleu(model,
                               dev_data,
                               'Dev Bleu',
                               batch=args.batchsize // 4,
                               beam_size=args.beam_size,
                               alpha=args.alpha,
                               max_decode_len=args.max_decode_len)(logger)
    save_output(dev_hyp, id2w, args.dev_hyp)

    logger.info('Test Set BLEU Score')
    _, test_hyp = CalculateBleu(model,
                                test_data,
                                'Test Bleu',
                                batch=args.batchsize // 4,
                                beam_size=args.beam_size,
                                alpha=args.alpha,
                                max_decode_len=args.max_decode_len)(logger)
    save_output(test_hyp, id2w, args.test_hyp)

    # Loading EMA state dict
    model.load_state_dict(checkpoint['state_dict_ema'])
    logger.info('Dev Set BLEU Score')
    _, dev_hyp = CalculateBleu(model,
                               dev_data,
                               'Dev Bleu',
                               batch=args.batchsize // 4,
                               beam_size=args.beam_size,
                               alpha=args.alpha,
                               max_decode_len=args.max_decode_len)(logger)
    save_output(dev_hyp, id2w, args.dev_hyp + '.ema')

    logger.info('Test Set BLEU Score')
    _, test_hyp = CalculateBleu(model,
                                test_data,
                                'Test Bleu',
                                batch=args.batchsize // 4,
                                beam_size=args.beam_size,
                                alpha=args.alpha,
                                max_decode_len=args.max_decode_len)(logger)
    save_output(test_hyp, id2w, args.test_hyp + '.ema')
Ejemplo n.º 3
0
def main():
    best_score = 0
    args = get_train_args()
    print(json.dumps(args.__dict__, indent=4))

    # Reading the int indexed text dataset
    train_data = np.load(os.path.join(args.input, args.data + ".train.npy"))
    train_data = train_data.tolist()
    dev_data = np.load(os.path.join(args.input, args.data + ".valid.npy"))
    dev_data = dev_data.tolist()
    test_data = np.load(os.path.join(args.input, args.data + ".test.npy"))
    test_data = test_data.tolist()

    # Reading the vocab file
    with open(os.path.join(args.input, args.data + '.vocab.pickle'),
              'rb') as f:
        id2w = pickle.load(f)

    args.id2w = id2w
    args.n_vocab = len(id2w)

    # Define Model
    model = net.Transformer(args)

    tally_parameters(model)
    if args.gpu >= 0:
        model.cuda(args.gpu)
    print(model)

    optimizer = optim.TransformerAdamTrainer(model, args)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.model_file):
            print("=> loading checkpoint '{}'".format(args.model_file))
            checkpoint = torch.load(args.model_file)
            args.start_epoch = checkpoint['epoch']
            best_score = checkpoint['best_score']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.model_file, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.model_file))

    src_data, trg_data = list(zip(*train_data))
    total_src_words = len(list(itertools.chain.from_iterable(src_data)))
    total_trg_words = len(list(itertools.chain.from_iterable(trg_data)))
    iter_per_epoch = (total_src_words + total_trg_words) // args.wbatchsize
    print('Approximate number of iter/epoch =', iter_per_epoch)
    time_s = time()

    global_steps = 0
    for epoch in range(args.start_epoch, args.epoch):
        random.shuffle(train_data)
        train_iter = data.iterator.pool(
            train_data,
            args.wbatchsize,
            key=lambda x: data.utils.interleave_keys(len(x[0]), len(x[1])),
            batch_size_fn=batch_size_func,
            random_shuffler=data.iterator.RandomShuffler())
        report_stats = utils.Statistics()
        train_stats = utils.Statistics()
        valid_stats = utils.Statistics()

        if args.debug:
            grad_norm = 0.
        for num_steps, train_batch in enumerate(train_iter):
            global_steps += 1
            model.train()
            optimizer.zero_grad()
            src_iter = list(zip(*train_batch))[0]
            src_words = len(list(itertools.chain.from_iterable(src_iter)))
            report_stats.n_src_words += src_words
            train_stats.n_src_words += src_words
            in_arrays = utils.seq2seq_pad_concat_convert(train_batch, -1)
            loss, stat = model(*in_arrays)
            loss.backward()
            if args.debug:
                norm = utils.grad_norm(model.parameters())
                grad_norm += norm
                if global_steps % args.report_every == 0:
                    print("> Gradient Norm: %1.4f" % (grad_norm /
                                                      (num_steps + 1)))
            optimizer.step()

            report_stats.update(stat)
            train_stats.update(stat)
            report_stats = report_func(epoch, num_steps, iter_per_epoch,
                                       time_s, report_stats, args.report_every)

            if (global_steps + 1) % args.eval_steps == 0:
                dev_iter = data.iterator.pool(
                    dev_data,
                    args.wbatchsize,
                    key=lambda x: data.utils.interleave_keys(
                        len(x[0]), len(x[1])),
                    batch_size_fn=batch_size_func,
                    random_shuffler=data.iterator.RandomShuffler())

                for dev_batch in dev_iter:
                    model.eval()
                    in_arrays = utils.seq2seq_pad_concat_convert(dev_batch, -1)
                    loss_test, stat = model(*in_arrays)
                    valid_stats.update(stat)

                print('Train perplexity: %g' % train_stats.ppl())
                print('Train accuracy: %g' % train_stats.accuracy())

                print('Validation perplexity: %g' % valid_stats.ppl())
                print('Validation accuracy: %g' % valid_stats.accuracy())

                bleu_score, _ = CalculateBleu(model,
                                              dev_data,
                                              'Dev Bleu',
                                              batch=args.batchsize // 4,
                                              beam_size=args.beam_size,
                                              alpha=args.alpha,
                                              max_sent=args.max_sent_eval)()
                if args.metric == "bleu":
                    score = bleu_score
                elif args.metric == "accuracy":
                    score = valid_stats.accuracy()

                is_best = score > best_score
                best_score = max(score, best_score)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'best_score': best_score,
                        'optimizer': optimizer.state_dict(),
                        'opts': args,
                    }, is_best, args.model_file, args.best_model_file)

    # BLEU score on Dev and Test Data
    checkpoint = torch.load(args.best_model_file)
    print("=> loaded checkpoint '{}' (epoch {}, best score {})".format(
        args.best_model_file, checkpoint['epoch'], checkpoint['best_score']))
    model.load_state_dict(checkpoint['state_dict'])

    print('Dev Set BLEU Score')
    _, dev_hyp = CalculateBleu(model,
                               dev_data,
                               'Dev Bleu',
                               batch=args.batchsize // 4,
                               beam_size=args.beam_size,
                               alpha=args.alpha)()
    save_output(dev_hyp, id2w, args.dev_hyp)

    print('Test Set BLEU Score')
    _, test_hyp = CalculateBleu(model,
                                test_data,
                                'Test Bleu',
                                batch=args.batchsize // 4,
                                beam_size=args.beam_size,
                                alpha=args.alpha)()
    save_output(test_hyp, id2w, args.test_hyp)
Ejemplo n.º 4
0
                batch = label.shape[0]
                for i in range(batch):
                    f.write(id[i] + "\t" + str(label[i].item()) + "\t" +
                            str(predicted[i].item()) + "\n")

    f1_micro = f1_score(y_true, y_pred, labels=[0, 1, 2], average='micro')
    f1_macro = f1_score(y_true, y_pred, labels=[0, 1, 2], average='macro')
    if filepath is not None:
        f.write("f1_micro: " + str(f1_micro) + "\n")
        f.write("f1_macro: " + str(f1_macro) + "\n")
        f.close()
    return f1_micro, f1_macro


if __name__ == '__main__':
    opt = get_train_args()
    if opt.gpu:
        torch.cuda.manual_seed(0)
    else:
        torch.manual_seed(0)

    index, label2idx = k_fold_split(opt.data_path, opt.k_fold)
    f1_micro_list = []
    f1_macro_list = []
    for i in range(opt.k_fold):
        model_path = opt.model_path + "_" + str(i) + ".pt"
        output_path = opt.output_path + "_" + str(i) + ".txt"
        trainloader, testloader = get_data(opt, label2idx, index[i])
        model = GAIN_BERT(opt, len(label2idx))
        # if opt.gpu:
        # model = nn.DataParallel(model)