Esempio n. 1
0
def cnn_embedding(args, h, data_file):
    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    train_loader = TripletString(h.xt,
                                 h.nt,
                                 h.train_knn,
                                 h.train_dist,
                                 K=args.k)

    model_file = "{}/model.torch".format(data_file)
    if os.path.isfile(model_file):
        model = torch.load(model_file)
    else:
        start_time = time.time()
        model = train_epoch(args, train_loader, device)
        if args.save_model:
            torch.save(model, model_file)
        train_time = time.time() - start_time
        print("# Training time: " + str(train_time))
    model.eval()

    xt = _batch_embed(args, model.embedding_net, h.xt, device)
    start_time = time.time()
    xb = _batch_embed(args, model.embedding_net, h.xb, device)
    embed_time = time.time() - start_time
    xq = _batch_embed(args, model.embedding_net, h.xq, device)
    print("# Embedding time: " + str(embed_time))
    if args.save_embed:
        if args.embed_dir != "":
            args.embed_dir = args.embed_dir + "/"
        os.makedirs("{}/{}".format(data_file, args.embed_dir), exist_ok=True)
        np.save("{}/{}embedding_xb".format(data_file, args.embed_dir), xb)
        np.save("{}/{}embedding_xt".format(data_file, args.embed_dir), xt)
        np.save("{}/{}embedding_xq".format(data_file, args.embed_dir), xq)

    if args.recall:
        test_recall(xb, xq, h.query_knn)
    return xq, xb, xt
Esempio n. 2
0
def main():
    transforms_args = [
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]
    train_dataset = CoCoDataset(
        args.coco_path,
        "training",
        target_size=args.target_size,
        transform=transforms.Compose(
            transforms_args +
            [RandomErasing(probability=args.p, sh=args.sh, r1=args.r1)]))
    test_dataset = CoCoDataset(args.coco_path,
                               "validation_wo_occlusion",
                               target_size=args.target_size,
                               transform=transforms.Compose(transforms_args))

    train_batch_sampler = TrainBalancedBatchSampler(torch.from_numpy(
        np.array(train_dataset.all_targets())),
                                                    K=args.K,
                                                    P=args.P,
                                                    n_batches=args.n_batches)

    test_batch_sampler = TestBalancedBatchSampler(torch.from_numpy(
        np.array(test_dataset.all_targets())),
                                                  K=args.K,
                                                  P=args.P,
                                                  n_batches=args.n_batches)

    train_loader = DataLoader(train_dataset,
                              batch_sampler=train_batch_sampler,
                              **kwargs)
    test_loader = DataLoader(test_dataset,
                             batch_sampler=test_batch_sampler,
                             **kwargs)

    # init model
    model, optim_state_dict, init_epoch = load_model(
        args.backbone,
        args.snapshot,
        imagenet_weights=args.imagenet_weights,
        freeze=args.freeze)
    print("Resume training from epoch", init_epoch)
    if cuda:
        model.cuda()

    # init optimizer
    if args.optim == "Adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=1e-4)
    elif args.optim == "SGD":
        optimizer = optim.SGD(model.parameters(),
                              momentum=0.9,
                              lr=args.lr,
                              weight_decay=1e-4)
    else:
        raise ValueError("Optimizer is not supported")

    if optim_state_dict is not None:
        optimizer.load_state_dict(optim_state_dict)

    # define loss function
    if args.triplet_selector == "hard":
        selector = HardestNegativeTripletSelector(args.soft_margin)
    elif args.triplet_selector == "semi":
        selector = SemihardNegativeTripletSelector(args.soft_margin)
    elif args.triplet_selector == "random":
        selector = RandomNegativeTripletSelector(args.soft_margin)
    else:
        selector = AllTripletSelector()

    train_loss_fn = TripletLoss(selector, soft_margin=args.soft_margin)
    test_loss_fn = TripletLoss(AllTripletSelector(),
                               soft_margin=args.soft_margin)

    # define learning rate scheduler
    lr_scheduler = LrScheduler(args.epoch_decay_start, args.n_epoch, args.lr)

    log_file = os.path.join(
        args.logger_dir, '%s_%s.csv' % (args.backbone, args.triplet_selector))
    for epoch in range(init_epoch + 1, args.n_epoch):
        lr_scheduler.adjust_learning_rate(optimizer, epoch, args.optim)
        for param_group in optimizer.param_groups:
            print("LR: ", param_group['lr'])

        train_loss = train_epoch(model, train_loader, train_loss_fn, optimizer,
                                 cuda)

        if epoch % args.eval_freq == 0:
            test_loss = test_epoch(model, test_loader, test_loss_fn, cuda)

            print('Epoch [%d/%d], Train loss: %.4f, Test loss: %.4f' %
                  (epoch, args.n_epoch, train_loss, test_loss))
            log = [epoch, train_loss, test_loss]
            if os.path.isfile(log_file):
                with open(log_file, mode='a', newline='') as csv_f:
                    writer = csv.writer(csv_f)
                    writer.writerow(log)
            else:
                with open(log_file, mode='w', newline='') as csv_f:
                    writer = csv.writer(csv_f)
                    # write header
                    writer.writerow(["epoch", "train_loss", "test_loss"])
                    writer.writerow(log)

        if epoch % args.save_freq == 0:
            torch.save(
                {
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'epoch': epoch
                },
                os.path.join(
                    args.snapshot_path, '%s_%s_%d.pth' %
                    (args.backbone, args.triplet_selector, epoch)))
Esempio n. 3
0
val_labels =torch.tensor(val_labels).to(torch.int64)

train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=BATCH_SIZE)
val_data = TensorDataset(val_inputs, val_masks, val_labels)
val_sampler = RandomSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=BATCH_SIZE)


optimizer = AdamW(model.parameters(),lr = config.optimizer.kwargs.lr, eps = 1e-8)
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([1,1]).cuda())
total_steps = len(train_dataloader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,num_training_steps=total_steps)

# min_loss = 1e8
max_f1 = -1

for epoch in range(EPOCHS):
    train_loss,train_acc,train_f1 = train_epoch(model,train_dataloader,optimizer,criterion,scheduler)
    val_loss,val_acc,val_f1 = val_epoch(model,val_dataloader,optimizer,criterion,scheduler)
    
    print(f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:3f}, Train F1: {train_f1:3f}, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:3f}, Val. F1: {val_f1:3f}')
    if val_f1 > max_f1:
        # torch.save(model.state_dict(), Save_path+'.pkl')
        model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
        model_to_save.save_pretrained(saved_dir)
        tokenizer.save_pretrained(saved_dir)
        # min_loss = val_loss
        max_f1 = val_f1
        print('model saved!')
Esempio n. 4
0
def run_training(model, train_dev_data_raw, optimizer, vocab, opt, device):
    ckpt_path = misc_utils.EXP_DIR + opt.exp_name + "/"
    if not os.path.exists(ckpt_path):
        os.mkdir(ckpt_path)
    elif os.listdir(ckpt_path) and not opt.debug:
        raise ValueError(
            "Output directory ({}) already exists and is not empty!".format(
                ckpt_path))

    with open(ckpt_path + "config.json", 'w') as f:
        json.dump(vars(opt), f)

    fout_log = open(ckpt_path + "training.log", 'w')
    tb_writer = SummaryWriter(os.path.join(ckpt_path + "tensorboard"))

    train_data = TASK_CONFIG[opt.task][1](set_type="train")
    train_data.load_data(raw_data=train_dev_data_raw["train"],
                         opt=opt,
                         vocab=vocab)
    train_data_sampler = DataSampler(dataset=train_data,
                                     sequential=False,
                                     opt=opt,
                                     device=device)

    dev_data = TASK_CONFIG[opt.task][1](set_type="dev")
    dev_data.load_data(raw_data=train_dev_data_raw["train"],
                       opt=opt,
                       vocab=vocab)
    dev_data_sampler = DataSampler(dataset=dev_data,
                                   sequential=True,
                                   opt=opt,
                                   device=device)

    model.eval()
    with torch.no_grad():

        avg_losses, avg_val_ppl, cs_acc, st_acc = valid_epoch(
            model, dev_data_sampler, opt, device)
        logging.info("--------------- BEFORE TRAINING ---------------")
        logging.info(
            "Validation Loss: {:.3f}\tValidation Perplexity: {:.3f}".format(
                avg_losses["total"], avg_val_ppl))
        if opt.task == "absgen":
            logging.info("Keyphrase selection accuracy: {:.2f}".format(cs_acc *
                                                                       100))
            fout_log.write(
                "epoch: -1\ttrain_loss: --\tval_loss: {:.3f}\tval_ppl: {:.3f}"
                "\tkp_selection_acc: {:.4f}\n".format(avg_losses["total"],
                                                      avg_val_ppl, cs_acc))
        else:
            logging.info(
                "Keyphrase selection accuracy: {:.2f}\tSentence type accuracy: {:.2f}"
                .format(cs_acc * 100, st_acc * 100))
            fout_log.write(
                "epoch: -1\ttrain_loss: --\tval_loss: {:.3f}\tval_ppl: {:.3f}"
                "\tkp_selection_acc: {:.4f}\tstype_acc: {:.4f}\n".format(
                    avg_losses["total"], avg_val_ppl, cs_acc, st_acc))
        fout_log.flush()

    for n_epoch in range(1, opt.num_train_epochs + 1):

        logging.info("--------------- STARTING EPOCH %d ---------------" %
                     n_epoch)
        model.train()

        avg_train_losses = train_epoch(model, train_data_sampler, opt,
                                       optimizer, device)
        with torch.no_grad():
            model.eval()
            avg_losses, avg_val_ppl, cs_acc, st_acc = valid_epoch(
                model, dev_data_sampler, opt, device)

        ckpt_name = ckpt_path + "epoch_%d_train_%.4f_val_%.4f_ppl_%.4f.tar" % \
                                (n_epoch, avg_train_losses["total"], avg_losses["total"], avg_val_ppl)
        ckpt_dict = {
            "embedding": model.word_emb.state_dict(),
            "encoder": model.encoder.state_dict(),
            "word_decoder": model.wd_dec.state_dict(),
            "planning_decoder": model.sp_dec.state_dict(),
            "optimizer": optimizer.state_dict,
            "epoch": n_epoch,
        }

        torch.save(ckpt_dict, ckpt_name)
        if opt.task == "absgen":
            fout_log.write(
                "epoch: {:3d}\ttrain_loss: {:.3f}\ttrain_kp_sel_loss: {:.3f}"
                "\tval_loss: {:.3f}\tval_ppl: {:.3f}\tkp_sel_acc: {:.4f}\n".
                format(n_epoch, avg_train_losses["total"],
                       avg_train_losses["content_selection"],
                       avg_losses["total"], avg_val_ppl, cs_acc))
        else:
            fout_log.write(
                "epoch: {:3d}\ttrain_loss: {:.3f}\ttrain_sent_type_loss: {:.3f}\ttrain_kp_sel_loss: {:.3f}"
                "\tval_loss: {:.3f}\tval_ppl: {:.3f}\tkp_sel_acc: {:.4f}\tsent_type_acc: {:.4f}\n"
                .format(n_epoch, avg_train_losses["total"],
                        avg_train_losses["sentence_type"],
                        avg_train_losses["content_selection"],
                        avg_losses["total"], avg_val_ppl, cs_acc, st_acc))

        fout_log.flush()
        for k in avg_train_losses:
            tb_writer.add_scalars("%s_loss" % k, {
                "train": avg_train_losses[k],
                "valid": avg_losses[k]
            }, n_epoch)

        tb_writer.add_scalar("valid_perplexity", avg_val_ppl, n_epoch)
        tb_writer.add_scalar("learning_rate", optimizer.param_groups[0]['lr'],
                             n_epoch)
        tb_writer.flush()
    fout_log.close()
    tb_writer.close()
logger = slog.Logger()
logger.json("__args__", args.__dict__)


def logbatch(trainer):
    entry = trainer.log[-1]
    entry["lr"] = trainer.last_lr
    logger.json("batch", trainer.log[-1], step=trainer.total)


def schedule(total):
    epoch = total // 1000000
    return args.learning_rate * (0.1**(epoch // args.learning_schedule))


model = eval(f"torchvision.models.{args.model}()").cuda()
trainer = trainer.Trainer(model, schedule=schedule)
trainer.after_batch = logbatch

loader = loaders.make_train_loader(**eval(f"dict({args.loaderargs})"))
val_loader = loaders.make_val_loader(**eval(f"dict({args.valloaderargs})"))

for epoch in range(args.epochs):
    trainer.train_epoch(loader)
    loss, err = trainer.errors(val_loader)
    print("test", trainer.total, loss, err)
    logger.add_scalar("val/loss", loss, trainer.total)
    logger.add_scalar("val/top1", err, trainer.total)
    logger.save("model", model, trainer.total)
Esempio n. 6
0
import argument_parser
import my_utils
import script
import time

opt = argument_parser.parser()
my_utils.plant_seeds(randomized_seed=opt.randomize)

import trainer

trainer = trainer.Trainer(opt)
trainer.build_dataset_train()
trainer.build_dataset_test()
trainer.build_network()
trainer.build_optimizer()
trainer.build_losses()
trainer.start_train_time = time.time()

for epoch in range(opt.nepoch):
    trainer.train_epoch()
    trainer.test_epoch()
    trainer.dump_stats()
    trainer.save_network()
    trainer.increment_epoch()

trainer.save_new_experiments_results()
script.main(opt, trainer.network)  #Inference
opt.faust = "INTRA"
script.main(opt, trainer.network)  #Inference
Esempio n. 7
0
if __name__ == '__main__':
    config = define_argparser()

    loader = DataLoader(config.train,
                        config.valid,
                        batch_size=config.batch_size,
                        device=config.gpu_id,
                        max_length=config.max_length)
    model = LM(len(loader.text.vocab),
               word_vec_dim=config.word_vec_dim,
               hidden_size=config.hidden_size,
               n_layers=config.n_layers,
               dropout_p=config.dropout,
               max_length=config.max_length)

    # Let criterion cannot count PAD as right prediction, because PAD is easy to predict.
    loss_weight = torch.ones(len(loader.text.vocab))
    loss_weight[data_loader.PAD] = 0
    criterion = nn.NLLLoss(weight=loss_weight, size_average=False)

    print(model)
    print(criterion)

    if config.gpu_id >= 0:
        model.cuda(config.gpu_id)
        criterion.cuda(config.gpu_id)

    trainer.train_epoch(model, criterion, loader.train_iter, loader.valid_iter,
                        config)
Esempio n. 8
0
    if torch.cuda.is_available():
        model = LM(hr_dataset.n_word,
                   embedding_dim=config.embedding_dim,
                   hidden_dim=config.hidden_size,
                   n_layers=config.n_layers,
                   dropout_p=config.dropout,
                   max_length=config.words_num,
                   rnn_type='LSTM').cuda()
    else:
        model = LM(hr_dataset.n_word,
                   embedding_dim=config.embedding_dim,
                   hidden_dim=config.hidden_size,
                   n_layers=config.n_layers,
                   dropout_p=config.dropout,
                   max_length=config.words_num,
                   rnn_type='LSTM')

    # Let criterion cannot count EOS as right prediction, because EOS is easy to predict.
    if torch.cuda.is_available():
        loss_weight = torch.ones(hr_dataset.n_word).cuda()
    else:
        loss_weight = torch.ones(hr_dataset.n_word)

    loss_weight[0] = 0
    criterion = nn.NLLLoss(weight=loss_weight, size_average=False)

    print(model)
    print(criterion)

    trainer.train_epoch(model, criterion, train_loader, test_loader, config)
Esempio n. 9
0
def cnn_embedding(args, h, data_file):
    """
    h[DataHandler]
    """
    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    train_loader = TripletString(h.xt,
                                 h.nt,
                                 h.train_knn,
                                 h.train_dist,
                                 K=args.k)

    # for model save and load, let's use state dict instead
    model_file = "{}/model.torch".format(data_file)
    if os.path.isfile(model_file):
        #model = torch.load(model_file)
        model = _init_net(args, train_loader, device)
        model.load_state_dict(torch.load(model_file))
    else:
        start_time = time.time()
        model = train_epoch(args, train_loader, device)
        if args.save_model:
            #torch.save(model, model_file)
            torch.save(model.state_dict(), model_file)
        train_time = time.time() - start_time
        print("# Training time: " + str(train_time))
    model.eval()

    # check if we use bert here
    char_alphabet = None
    if args.bert:
        char_alphabet = h.alphabet

    xt = _batch_embed(args,
                      model.embedding_net,
                      h.xt,
                      device,
                      char_alphabet=char_alphabet)
    start_time = time.time()
    xt = []
    xb = _batch_embed(args,
                      model.embedding_net,
                      h.xb,
                      device,
                      char_alphabet=char_alphabet)
    embed_time = time.time() - start_time
    xq = _batch_embed(args,
                      model.embedding_net,
                      h.xq,
                      device,
                      char_alphabet=char_alphabet)
    print("# Embedding time: " + str(embed_time))
    if args.save_embed:
        if args.embed_dir != "":
            args.embed_dir = args.embed_dir + "/"
        os.makedirs("{}/{}".format(data_file, args.embed_dir), exist_ok=True)
        np.save("{}/{}embedding_xb".format(data_file, args.embed_dir), xb)
        np.save("{}/{}embedding_xt".format(data_file, args.embed_dir), xt)
        np.save("{}/{}embedding_xq".format(data_file, args.embed_dir), xq)

    if args.recall:
        test_recall(xb, xq, h.query_knn)
    return xq, xb, xt
Esempio n. 10
0
losses = []
trainer = Trainer(device=device,
                  model=Cancer_model,
                  train_loader=CancerDataLoader,
                  val_loader=CancerDataLoader_val,
                  optimizer=optim,
                  loss_fcn=loss_fcn)

big_train_loss_list = []
big_val_loss_list = []

#loading a model from saved state dictionary
# loaded_model = get_model(device=device)
# loaded_model.load_state_dict(torch.load(save_model_path))
# loaded_model.eval()

#to plot the losses
plot = True

for ep in tqdm(range(NUM_EPOCHS), desc='Epochs'):
    #train one epoch
    train_loss_list = trainer.train_epoch(save_model=True)
    val_loss_list = trainer.validate(sample_size=20)

    big_train_loss_list += train_loss_list
    big_val_loss_list += val_loss_list

    if plot:
        plot_losses(train_loss_list=big_train_loss_list,
                    val_loss_list=big_val_loss_list)
Esempio n. 11
0
import argument_parser
import my_utils
import script
import time

opt = argument_parser.parser()
my_utils.plant_seeds(randomized_seed=opt.randomize)

import trainer

trainer = trainer.Trainer(opt)
trainer.build_dataset_train()
trainer.build_dataset_test()
trainer.build_network()
trainer.build_optimizer()
trainer.build_losses()
trainer.start_train_time = time.time()

for epoch in range(opt.nepoch):
    trainer.train_epoch(epoch)
    trainer.test_epoch()
    trainer.dump_stats()
    trainer.save_network()
    trainer.increment_epoch()

trainer.save_new_experiments_results()
script.main(opt, trainer.network)  #Inference
opt.faust = "INTRA"
script.main(opt, trainer.network)  #Inference