Example #1
0
def main():
    print("Generating data...", end="")
    voc_size = args.vocab_sz
    inp = np.arange(2, voc_size, 2)
    tgt = np.arange(3, voc_size, 2)
    data_x, data_y = get_numbers(inp, tgt)
    train_len = int(len(data_x) * 0.9)
    train_x, val_x = data_x[:train_len], data_x[train_len:]
    train_y, val_y = data_y[:train_len], data_y[train_len:]
    print("Done")

    print("Setting model...", end="")
    model = TransformerModel(
        input_sz=voc_size,
        output_sz=voc_size,
        d_model=args.d_model,
        nhead=args.n_head,
        num_encoder_layers=args.n_encoder_layers,
        num_decoder_layers=args.n_decoder_layers,
        dim_feedforward=args.dim_feedforward,
        dropout=args.dropout,
    )
    if args.load_dir != ".":
        model.load_state_dict(flow.load(args.load_dir))
    model = to_cuda(model)
    criterion = to_cuda(nn.CrossEntropyLoss())

    optimizer = flow.optim.Adam(model.parameters(), lr=args.lr)
    print("Done")

    print("Training...")

    min_loss = 100
    for i in range(1, args.n_epochs + 1):
        epoch_loss = train(model, criterion, optimizer, train_x, train_y)
        epoch_loss_val = validation(model, criterion, val_x, val_y)
        print("epoch: {} train loss: {}".format(i, epoch_loss))
        print("epoch: {} val loss: {}".format(i, epoch_loss_val))
        if epoch_loss < min_loss:
            if not os.path.exists(args.save_dir):
                os.mkdir(args.save_dir)
            else:
                shutil.rmtree(args.save_dir)
                assert not os.path.exists(args.save_dir)
                os.mkdir(args.save_dir)
            flow.save(model.state_dict(), args.save_dir)
        if i % 3 == 2:
            print(test(model, test_times=10))
Example #2
0
def train(opt, train_data, eval_data=None):
    logger.info("start training task")
    dim_input = 6
    dim_emb = 64
    num_class = train_data.num_class
    transformer_nhead = 2
    transformer_nlayers = 1
    model = TransformerModel(dim_input, dim_emb, transformer_nhead,
        num_class,
        transformer_nlayers)
    if model.cuda:
        model = move_to_gpu(model)
    summary(model, train_data[0]['x'].shape)
    try:
        dataloader = DataLoader(
            train_data,
            batch_size=opt.batch_size,
            shuffle=False,
            num_workers=4
        )
        logger.info("create training dataloader")
    except Exception as e:
        logger.error("fail to create dataloader", e)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=model.optimizer,
            milestones=[5, 10], gamma=0.1)

    model_path = os.path.join(opt.model_dir,opt.model_name+".pth")
    global_steps = 0
    best = 0
    for epoch in tqdm(list(range(opt.epoch)), desc='epoch'):
        for step, batch in enumerate(dataloader):
            global_steps += 1
            metrics = model.train(batch)
            if global_steps % opt.log_steps == 0:
                logger.debug(f"global steps={global_steps},{metrics}")
            if global_steps % opt.save_steps == 0:
                val_metrics, eval_result = eval(opt, model, eval_data)
                logger.info(f"global steps={global_steps}, current={val_metrics}, best={best}, result={eval_result}")
                if val_metrics > best:
                    best = val_metrics
                    torch.save(model.state_dict(), model_path)
                    logger.info(f"global steps={global_steps}, save model:{model_path}")
        lr_scheduler.step()
Example #3
0
def main(model_name=None, hidden=64, nlayers=1):
    voc_size = 10000
    inp = arange(2, voc_size, 2)
    tgt = arange(3, voc_size, 2)
    batch_size = 128
    epochs = 30
    dataset = NumberLoader(inp, tgt)
    train_len = int(len(dataset) * 0.9)
    val_len = len(dataset) - train_len
    train_set, val_set = random_split(dataset, [train_len, val_len])
    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=1)
    val_loader = DataLoader(val_set,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=1)
    model = TransformerModel(voc_size,
                             voc_size,
                             hidden=hidden,
                             nlayers=nlayers)
    if model_name is not None:
        model.load_state_dict(load(model_name))
    model = model.cuda()
    # optimizer = optim.SGD(model.parameters(), lr=0.5)
    optimizer = optim.Adam(model.parameters())
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
    criterion = nn.CrossEntropyLoss()
    best_loss = 100
    for i in range(epochs):
        epoch_loss = train(model, criterion, optimizer, train_loader)
        epoch_loss_val = validation(model, criterion, val_loader)
        # scheduler.step()
        print("epoch: {} train loss: {}".format(i, epoch_loss))
        print("epoch: {} val loss: {}".format(i, epoch_loss_val))
        if epoch_loss_val < best_loss:
            best_loss = epoch_loss_val
            model_name = "model/model_{0:.5f}.pt".format(epoch_loss_val)
            save(model.state_dict(), model_name)
    return model_name
Example #4
0
    for batch in pb:
        record_loss, perplexity = train_one_iter(batch, fp16=True)
        update_count += 1

        if update_count % num_gradients_accumulation == num_gradients_accumulation - 1:
            scheduler.step()
            optimizer.step()
            optimizer.zero_grad()

            # speed measure
            end = time.time()
            speed = batch_size * num_gradients_accumulation / (end - start)
            start = end

            pb.set_postfix(loss=record_loss,
                           perplexity=perplexity,
                           speed=speed)

    "Evaluation"
    encoder.eval()
    decoder.eval()
    ppl = validate(val_dataloader)
    checkpointer.save_checkpoint(str(ep), {
        "encoder": encoder.state_dict(),
        "decoder": decoder.state_dict()
    }, {"empty": None},
                                 is_best_so_far=True)

    logger.info(f"a={a} b={b} Epoch {ep} Validation perplexity: {ppl}")

logger.info(f"Finish training of alpha={a} beta={b}")
Example #5
0
    saved_loss = checkpoint['loss']

    if saved_loss < best_val_loss:
        best_val_loss = saved_loss

while epoch < epochs + 1:
    epoch_start_time = time.time()
    train(model, bptt, device, train_data, optimizer, criterion, ntokens,
          scheduler, epoch)
    val_loss = evaluate(model, model, val_data, bptt, device, ntokens,
                        criterion)
    epoch_losses = np.append(epoch_losses, val_loss)
    print('-' * 89)
    print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
          'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                     val_loss, math.exp(val_loss)))
    print('-' * 89)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = model
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': val_loss,
            }, MODEL_SAVE_NAME)

    epoch = epoch + 1
    scheduler.step()
Example #6
0
def main():
    ### settings
    args = set_args()
    save_path = args.save_path
    if not os.path.isdir(save_path):
        os.makedirs(save_path)
    logger.info(args)

    ### prepare for data
    train_dataset = COCOMultiLabel(args,
                                   train=True,
                                   image_path=args.image_path)
    test_dataset = COCOMultiLabel(args,
                                  train=False,
                                  image_path=args.image_path)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              num_workers=args.num_workers,
                              pin_memory=True,
                              shuffle=True,
                              drop_last=True,
                              collate_fn=my_collate)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             num_workers=args.num_workers,
                             pin_memory=True,
                             shuffle=False,
                             drop_last=False,
                             collate_fn=my_collate)

    ## prepare for models
    encoder = CNN_Encoder().cuda()
    decoder = TransformerModel(args).cuda()
    ## set different parameter for training or only evaluation'
    if args.use_eval:
        weights_dic = torch.load(args.use_model)
        encoder.load_state_dict(
            convert_weights(weights_dic['encoder_state_dict']))
        decoder.load_state_dict(
            convert_weights(weights_dic['decoder_state_dict']))
    else:
        encoder.load_state_dict(
            convert_weights(torch.load(args.encoder_weights)))
        encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                             lr=args.encoder_lr)
        decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                             lr=args.decoder_lr)

    ## whether using dataparallel'
    if torch.cuda.device_count() > 1:
        encoder = nn.DataParallel(encoder)
        decoder = nn.DataParallel(decoder)

    ## set hinge loss function'
    loss_hinge = torch.nn.HingeEmbeddingLoss(margin=args.C,
                                             size_average=None,
                                             reduce=None,
                                             reduction='mean')

    ## if only evaluation, return"
    if args.use_eval:
        f1 = test(args, encoder, decoder, test_loader, args.threshold, 1)
        return

    ##  training stage
    highest_f1 = 0
    epochs_without_improve = 0
    for epoch in range(args.epochs):
        ## train and test
        train(args, encoder, decoder, train_loader, encoder_optimizer,
              decoder_optimizer, epoch, loss_hinge)
        f1 = test(args, encoder, decoder, test_loader, args.threshold, epoch)

        ### save parameter
        save_dict = {
            'encoder_state_dict': encoder.state_dict(),
            'decoder_state_dict': decoder.state_dict(),
            'epoch': epoch,
            'f1': f1,
            'decoder_optimizer_state_dict': decoder_optimizer.state_dict(),
            'encoder_optimizer_state_dict': encoder_optimizer.state_dict(),
            'epochs_without_improve': epochs_without_improve
        }

        ### save models'
        torch.save(save_dict,
                   args.save_path + "/checkpoint_" + timestr + '.pt.tar')
        if f1 > highest_f1:
            torch.save(
                save_dict,
                args.save_path + "/BEST_checkpoint_" + timestr + '.pt.tar')
            logger.info("Now the highest f1 is {}, it was {}".format(
                100 * f1, 100 * highest_f1))
            highest_f1 = f1
            epochs_without_improve = 0
        else:
            epochs_without_improve += 1
            if epochs_without_improve == 3:
                adjust_learning_rate(decoder_optimizer, args.coeff)
                adjust_learning_rate(encoder_optimizer, args.coeff)
                epochs_without_imp = 0
Example #7
0
                                     input_pad_at='start',
                                     num_workers=8)
    logging.info(str(model))

    logging.info(str(print_hparams(hp)))

    logging.info('Data loaded!')
    logging.info('Data size: ' + str(len(training_data)))

    logging.info('Total Model parameters: ' + str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
    epoch = 1
    if hp.mode == 'train':
        while epoch < hp.training_epochs + 1:
            epoch_start_time = time.time()
            train()
            torch.save(model.state_dict(), '{log_dir}/{num_epoch}.pt'.format(log_dir=log_dir, num_epoch=epoch))
            scheduler.step(epoch)
            eval_all(evaluation_beam, word_dict_pickle_path=word_dict_pickle_path)
            eval_with_beam(evaluation_beam, max_len=30, eos_ind=9, word_dict_pickle_path=word_dict_pickle_path,
                           beam_size=2)
            eval_with_beam(evaluation_beam, max_len=30, eos_ind=9, word_dict_pickle_path=word_dict_pickle_path,
                           beam_size=3)
            eval_with_beam(evaluation_beam, max_len=30, eos_ind=9, word_dict_pickle_path=word_dict_pickle_path,
                           beam_size=4)
            epoch += 1

    if hp.mode == 'eval':
        # Evaluation model score
        model.load_state_dict(torch.load("./models/best.pt"))
        eval_all(evaluation_beam, word_dict_pickle_path=word_dict_pickle_path)
        eval_with_beam(evaluation_beam, max_len=30, eos_ind=9, word_dict_pickle_path=word_dict_pickle_path,
        # 记录最佳模型
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model

        # 调整学习率
        scheduler.step()

    # # 保存模型
    # if not os.path.exists('datasets/models'):
    #     os.makedirs('datasets/models')
    # torch.save({'state_dict': model.state_dict()}, 'datasets/models/best_model.pth.tar')

    # 保存模型
    if not os.path.exists('temp/models'):
        os.makedirs('temp/models')
    torch.save({'state_dict': model.state_dict()},
               'temp/models/best_model.pth.tar')
    print('train finish')

    # test
    # 计算交叉熵损失
    test_loss = evaluate(best_model, test_data)

    # 计算困惑度
    ppl = math.exp(test_loss)
    print('=' * 40)
    print('| End of training | test ppl {:8.2f}'.format(ppl))
    print('=' * 40)