Esempio n. 1
0
def main():
    print("Start...")
    global opt
    opt = get_opt()

    # Set seed
    torch.manual_seed(opt.seed)
    np.random.seed(opt.seed)
    random.seed(opt.seed)

    opt.cuda = torch.cuda.is_available() and len(opt.gpus)

    if opt.save_dir and not os.path.exists(opt.save_dir):
        os.makedirs(opt.save_dir)

    if torch.cuda.is_available() and not opt.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with -gpus 1"
        )

    if opt.cuda:
        cuda.set_device(opt.gpus[0])
        torch.cuda.manual_seed(opt.seed)

    dicts, supervised_data, rl_data, valid_data, test_data, DEV, EVAL = load_data(
        opt)

    print("Building model...")

    use_critic = opt.start_reinforce is not None
    print("use_critic: ", use_critic)
    print("has_baseline: ", opt.has_baseline)
    if not opt.has_baseline:
        assert opt.critic_pretrain_epochs == 0

    if opt.load_from is None:
        model, optim = create_model(lib.Seq2SeqModel, dicts,
                                    dicts["tgt"].size())
        checkpoint = None

    else:
        print("Loading from checkpoint at %s" % opt.load_from)
        checkpoint = torch.load(
            opt.load_from)  #, map_location=lambda storage, loc: storage)
        model = checkpoint["model"]
        # config testing
        for attribute in ["predict_mask", "max_predict_length"]:
            model.opt.__dict__[attribute] = opt.__dict__[attribute]
        optim = checkpoint["optim"]
        optim.start_decay_at = opt.start_decay_at
        if optim.start_decay_at > opt.end_epoch:
            print("No decay!")
        opt.start_epoch = checkpoint["epoch"] + 1

    print("model: ", model)
    print("optim: ", optim)

    # GPU.
    if opt.cuda:
        model.cuda(opt.gpus[0])

    # Start reinforce training immediately.
    print("opt.start_reinforce: ", opt.start_reinforce)

    # Check if end_epoch is large enough.
    if use_critic:
        assert opt.start_epoch + opt.critic_pretrain_epochs - 1 <= \
               opt.end_epoch, "Please increase -end_epoch to perform pretraining!"

    nParams = sum([p.nelement() for p in model.parameters()])
    print("* number of parameters: %d" % nParams)

    if opt.sent_reward == "cr":
        lib.RetReward.cr = code_retrieval.CrCritic()

    # Metrics.
    print("sent_reward: %s" % opt.sent_reward)
    metrics = {}
    metrics["xent_loss"] = lib.Loss.weighted_xent_loss
    metrics["critic_loss"] = lib.Loss.weighted_mse
    if opt.sent_reward == "bleu":
        metrics["sent_reward"] = {
            "train": lib.Reward.wrapped_sentence_bleu,
            "eval": lib.Reward.wrapped_sentence_bleu
        }
    else:
        metrics["sent_reward"] = {
            "train": lib.RetReward.retrieval_mrr_train,
            "eval": lib.RetReward.retrieval_mrr_eval
        }

    print("opt.eval: ", opt.eval)
    print("opt.eval_codenn: ", opt.eval_codenn)
    print("opt.eval_codenn_all: ", opt.eval_codenn_all)
    print("opt.collect_anno: ", opt.collect_anno)

    # Evaluate model
    if opt.eval:
        if opt.sent_reward == "cr" and (opt.eval_codenn
                                        or opt.eval_codenn_all):
            raise Exception(
                "Currently we do not support evaluating MRR on codenn!")

        if False:
            # On training set.
            if opt.sent_reward == "cr":
                metrics["sent_reward"][
                    "eval"] = lib.RetReward.retrieval_mrr_train
            #if opt.collect_anno:
            #    metrics["sent_reward"] = {"train": None, "eval": None}

            evaluator = lib.Evaluator(model, metrics, dicts, opt)
            pred_file = opt.load_from.replace(".pt", ".train.pred")
            if opt.eval_codenn or opt.eval_codenn_all:
                raise Exception("Invalid eval_codenn!")
            print("train_data.src: ", len(supervised_data.src))
            if opt.predict_mask:
                pred_file += ".masked"
            pred_file += ".metric%s" % opt.sent_reward
            evaluator.eval(supervised_data, pred_file)

        if True:
            # On validation set.
            if opt.sent_reward == "cr":
                metrics["sent_reward"][
                    "eval"] = lib.RetReward.retrieval_mrr_eval
            #if opt.collect_anno:
            #    metrics["sent_reward"] = {"train": None, "eval": None}

            evaluator = lib.Evaluator(model, metrics, dicts, opt)
            pred_file = opt.load_from.replace(".pt", ".valid.pred")
            if opt.eval_codenn:
                pred_file = pred_file.replace("valid", "DEV")
                valid_data = DEV
            elif opt.eval_codenn_all:
                pred_file = pred_file.replace("valid", "DEV_all")
                print("* Please input valid data = DEV_all")
            print("valid_data.src: ", len(valid_data.src))
            if opt.predict_mask:
                pred_file += ".masked"
            pred_file += ".metric%s" % opt.sent_reward
            evaluator.eval(valid_data, pred_file)

        if False:
            # On test set.
            if opt.sent_reward == "cr":
                metrics["sent_reward"][
                    "eval"] = lib.RetReward.retrieval_mrr_eval
            #if opt.collect_anno:
            #    metrics["sent_reward"] = {"train": None, "eval": None}

            evaluator = lib.Evaluator(model, metrics, dicts, opt)
            pred_file = opt.load_from.replace(".pt", ".test.pred")
            if opt.eval_codenn:
                pred_file = pred_file.replace("test", "EVAL")
                test_data = EVAL
            elif opt.eval_codenn_all:
                pred_file = pred_file.replace("test", "EVAL_all")
                print("* Please input test data = EVAL_all")
            print("test_data.src: ", len(test_data.src))
            if opt.predict_mask:
                pred_file += ".masked"
            pred_file += ".metric%s" % opt.sent_reward
            evaluator.eval(test_data, pred_file)

    else:
        print("supervised_data.src: ", len(supervised_data.src))
        print("supervised_data.tgt: ", len(supervised_data.tgt))
        xent_trainer = lib.Trainer(model,
                                   supervised_data,
                                   valid_data,
                                   metrics,
                                   dicts,
                                   optim,
                                   opt,
                                   DEV=DEV)

        if use_critic:
            start_time = time.time()
            # Supervised training.
            print("supervised training..")
            print("start_epoch: ", opt.start_epoch)

            xent_trainer.train(opt.start_epoch, opt.start_reinforce - 1,
                               start_time)

            if opt.sent_reward == "bleu":
                _valid_data = DEV
            else:
                _valid_data = valid_data

            if opt.has_baseline:
                # Create critic here to not affect random seed.
                critic, critic_optim = create_critic(checkpoint, dicts, opt)
                print("Building critic...")
                print("Critic: ", critic)
                print("Critic optim: ", critic_optim)

                # Pretrain critic.
                print("pretrain critic...")
                if opt.critic_pretrain_epochs > 0:
                    reinforce_trainer = lib.ReinforceTrainer(
                        model, critic, supervised_data, _valid_data, metrics,
                        dicts, optim, critic_optim, opt)
                    reinforce_trainer.train(
                        opt.start_reinforce,
                        opt.start_reinforce + opt.critic_pretrain_epochs - 1,
                        True, start_time)
            else:
                print("NOTE: do not have a baseline model")
                critic, critic_optim = None, None

            # Reinforce training.
            print("reinforce training...")
            reinforce_trainer = lib.ReinforceTrainer(model, critic, rl_data,
                                                     _valid_data, metrics,
                                                     dicts, optim,
                                                     critic_optim, opt)
            reinforce_trainer.train(
                opt.start_reinforce + opt.critic_pretrain_epochs,
                opt.end_epoch, False, start_time)

        else:  # Supervised training only. Set opt.start_reinforce to None
            xent_trainer.train(opt.start_epoch, opt.end_epoch)
Esempio n. 2
0
def main():

    print('Loading data from "%s"' % opt.data)

    dataset = torch.load(opt.data)

    supervised_data = lib.Dataset(dataset["train_xe"], opt.batch_size, opt.cuda, eval=False)
    bandit_data = lib.Dataset(dataset["train_pg"], opt.batch_size, opt.cuda, eval=False)
    valid_data = lib.Dataset(dataset["valid"], opt.batch_size, opt.cuda, eval=True)
    test_data  = lib.Dataset(dataset["test"], opt.batch_size, opt.cuda, eval=True)

    dicts = dataset["dicts"]
    print(" * vocabulary size. source = %d; target = %d" %
          (dicts["src"].size(), dicts["tgt"].size()))
    print(" * number of XENT training sentences. %d" %
          len(dataset["train_xe"]["src"]))
    print(" * number of PG training sentences. %d" %
          len(dataset["train_pg"]["src"]))
    print(" * maximum batch size. %d" % opt.batch_size)
    print("Building model...")

    use_critic = opt.start_reinforce is not None

    if opt.load_from is None:
        model, optim = create_model(lib.NMTModel, dicts, dicts["tgt"].size())
        checkpoint = None
    else:
        print("Loading from checkpoint at %s" % opt.load_from)
        checkpoint = torch.load(opt.load_from)
        model = checkpoint["model"]
        optim = checkpoint["optim"]
        opt.start_epoch = checkpoint["epoch"] + 1

    # GPU.
    if opt.cuda:
        model.cuda(opt.gpus[0])

    # Start reinforce training immediately.
    if opt.start_reinforce == -1:
        opt.start_decay_at = opt.start_epoch
        opt.start_reinforce = opt.start_epoch

    # Check if end_epoch is large enough.
    if use_critic:
        assert opt.start_epoch + opt.critic_pretrain_epochs - 1 <= \
            opt.end_epoch, "Please increase -end_epoch to perform pretraining!"

    nParams = sum([p.nelement() for p in model.parameters()])
    print("* number of parameters: %d" % nParams)

    # Metrics.
    metrics = {}
    metrics["nmt_loss"] = lib.Loss.weighted_xent_loss
    metrics["critic_loss"] = lib.Loss.weighted_mse
    metrics["sent_reward"] = lib.Reward.sentence_bleu
    metrics["corp_reward"] = lib.Reward.corpus_bleu
    if opt.pert_func is not None:
        opt.pert_func = lib.PertFunction(opt.pert_func, opt.pert_param)


    # Evaluate model on heldout dataset.
    if opt.eval:
        evaluator = lib.Evaluator(model, metrics, dicts, opt)
        # On validation set.
        pred_file = opt.load_from.replace(".pt", ".valid.pred")
        evaluator.eval(valid_data, pred_file)
        # On test set.
        pred_file = opt.load_from.replace(".pt", ".test.pred")
        evaluator.eval(test_data, pred_file)
    elif opt.eval_sample:
        opt.no_update = True
        critic, critic_optim = create_critic(checkpoint, dicts, opt)
        reinforce_trainer = lib.ReinforceTrainer(model, critic, bandit_data, test_data,
            metrics, dicts, optim, critic_optim, opt)
        reinforce_trainer.train(opt.start_epoch, opt.start_epoch, False)
    elif opt.sup_train_on_bandit:
        optim.set_lr(opt.reinforce_lr)
        xent_trainer = lib.Trainer(model, bandit_data, test_data, metrics, dicts, optim, opt)
        xent_trainer.train(opt.start_epoch, opt.start_epoch)
    else:
	print("theek hai")
        xent_trainer = lib.Trainer(model, supervised_data, valid_data, metrics, dicts, optim, opt)
        if use_critic:
            start_time = time.time()
            # Supervised training.
            xent_trainer.train(opt.start_epoch, opt.start_reinforce - 1, start_time)
            # Create critic here to not affect random seed.
            critic, critic_optim = create_critic(checkpoint, dicts, opt)
            # Pretrain critic.
            if opt.critic_pretrain_epochs > 0:
                reinforce_trainer = lib.ReinforceTrainer(model, critic, supervised_data, test_data,
                    metrics, dicts, optim, critic_optim, opt)
                reinforce_trainer.train(opt.start_reinforce,
                    opt.start_reinforce + opt.critic_pretrain_epochs - 1, True, start_time)
            # Reinforce training.
            reinforce_trainer = lib.ReinforceTrainer(model, critic, bandit_data, test_data,
                    metrics, dicts, optim, critic_optim, opt)
            reinforce_trainer.train(opt.start_reinforce + opt.critic_pretrain_epochs, opt.end_epoch,
                False, start_time)
        # Supervised training only.
        else:
            xent_trainer.train(opt.start_epoch, opt.end_epoch)
Esempio n. 3
0
def main():
    print("Start...")
    global opt
    opt = get_opt()

    # Set seed
    torch.manual_seed(opt.seed)
    np.random.seed(opt.seed)
    random.seed(opt.seed)

    opt.cuda = len(opt.gpus)

    if opt.save_dir and not os.path.exists(opt.save_dir):
        os.makedirs(opt.save_dir)

    if torch.cuda.is_available() and not opt.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with -gpus 1")

    if opt.cuda:
        cuda.set_device(opt.gpus[0])
        torch.cuda.manual_seed(opt.seed)

    dicts, supervised_data, rl_data, valid_data, test_data, vis_data = load_data(opt)

    print("Building model...")

    use_critic = opt.start_reinforce is not None
    print("use_critic: ", use_critic)

    if opt.load_from is None:
        if opt.data_type == 'code':
            model, optim = create_model(lib.Tree2SeqModel, dicts, dicts["tgt"].size())
        elif opt.data_type == 'text':
            model, optim = create_model(lib.Seq2SeqModel, dicts, dicts["tgt"].size())
        elif opt.data_type == 'hybrid':
            model, optim = create_model(lib.Hybrid2SeqModel, dicts, dicts["tgt"].size())
        checkpoint = None
        print("model: ", model)
        print("optim: ", optim)
    else:
        print("Loading from checkpoint at %s" % opt.load_from)
        checkpoint = torch.load(opt.load_from, map_location=lambda storage, loc: storage)
        model = checkpoint["model"]
        optim = checkpoint["optim"]
        opt.start_epoch = checkpoint["epoch"] + 1

    # GPU.
    if opt.cuda:
        model.cuda(opt.gpus[0])

    # Start reinforce training immediately.
    print("opt.start_reinforce: ", opt.start_reinforce)
    if opt.start_reinforce == -1:
        opt.start_decay_at = opt.start_epoch
        opt.start_reinforce = opt.start_epoch

    # Check if end_epoch is large enough.
    if use_critic:
        assert opt.start_epoch + opt.critic_pretrain_epochs - 1 <= \
               opt.end_epoch, "Please increase -end_epoch to perform pretraining!"

    nParams = sum([p.nelement() for p in model.parameters()])
    print("* number of parameters: %d" % nParams)

    # Metrics.
    metrics = {}
    metrics["xent_loss"] = lib.Loss.weighted_xent_loss
    metrics["critic_loss"] = lib.Loss.weighted_mse
    metrics["sent_reward"] = lib.Reward.sentence_bleu
    metrics["corp_reward"] = lib.Reward.corpus_bleu
    if opt.pert_func is not None:
        opt.pert_func = lib.PertFunction(opt.pert_func, opt.pert_param)

    print("opt.eval: ", opt.eval)
    print("opt.eval_sample: ", opt.eval_sample)

    # Evaluate model on heldout dataset.
    if opt.eval:
        evaluator = lib.Evaluator(model, metrics, dicts, opt)
        # On validation set.
        if opt.var_length:
            pred_file = opt.load_from.replace(".pt", ".valid.pred.var"+opt.var_type)
        else:
            pred_file = opt.load_from.replace(".pt", ".valid.pred")
        evaluator.eval(valid_data, pred_file)

        # On test set.
        if opt.var_length:
            pred_file = opt.load_from.replace(".pt", ".test.pred.var"+opt.var_type)
        else:
            pred_file = opt.load_from.replace(".pt", ".test.pred")
        evaluator.eval(test_data, pred_file)
    elif opt.eval_one:
        print("eval_one..")
        evaluator = lib.Evaluator(model, metrics, dicts, opt)
        # On test set.
        pred_file = opt.load_from.replace(".pt", ".test_one.pred")
        evaluator.eval(vis_data, pred_file)
    elif opt.eval_sample:
        opt.no_update = True
        critic, critic_optim = create_critic(checkpoint, dicts, opt)
        reinforce_trainer = lib.ReinforceTrainer(model, critic, rl_data, test_data,
                                                 metrics, dicts, optim, critic_optim, opt)
        reinforce_trainer.train(opt.start_epoch, opt.start_epoch, False)

    else:
        print("supervised_data.src: ", len(supervised_data.src))
        print("supervised_data.tgt: ", len(supervised_data.tgt))
        print("supervised_data.trees: ", len(supervised_data.trees))
        print("supervised_data.leafs: ", len(supervised_data.leafs))
        xent_trainer = lib.Trainer(model, supervised_data, valid_data, metrics, dicts, optim, opt)
        if use_critic:
            start_time = time.time()
            # Supervised training.
            print("supervised training..")
            print("start_epoch: ", opt.start_epoch)

            xent_trainer.train(opt.start_epoch, opt.start_reinforce - 1, start_time)
            # Create critic here to not affect random seed.
            critic, critic_optim = create_critic(checkpoint, dicts, opt)
            # Pretrain critic.
            print("pretrain critic...")
            if opt.critic_pretrain_epochs > 0:
                reinforce_trainer = lib.ReinforceTrainer(model, critic, supervised_data, test_data, metrics, dicts, optim, critic_optim, opt)
                reinforce_trainer.train(opt.start_reinforce, opt.start_reinforce + opt.critic_pretrain_epochs - 1, True, start_time)
            # Reinforce training.
            print("reinforce training...")
            reinforce_trainer = lib.ReinforceTrainer(model, critic, rl_data, test_data, metrics, dicts, optim, critic_optim, opt)
            reinforce_trainer.train(opt.start_reinforce + opt.critic_pretrain_epochs, opt.end_epoch, False, start_time)

        # Supervised training only.
        else:
            xent_trainer.train(opt.start_epoch, opt.end_epoch)
Esempio n. 4
0
def main():
    if args.wandb_on:
        wandb.init(project=args.wandb_project,
                   name=args.model_name + '-' +
                   args.data_folder.split('/')[2] + '-' + args.loss_type)
        wandb.config.update(
            {'hostname': os.popen('hostname').read().split('.')[0]})
        wandb.config.update(args)
    if args.item2idx_dict is not None:
        item2idx_dict = pd.read_pickle(
            os.path.join(args.data_folder, args.item2idx_dict))
    else:
        item2idx_dict = None

    print("Loading train data from {}".format(
        os.path.join(args.data_folder, args.train_data)))
    print("Loading valid data from {}".format(
        os.path.join(args.data_folder, args.valid_data)))

    train_data = lib.Dataset(os.path.join(args.data_folder, args.train_data))
    valid_data = lib.Dataset(os.path.join(args.data_folder, args.valid_data),
                             itemmap=train_data.itemmap)

    if args.debug:
        train_data.df.to_csv(
            os.path.join(args.data_folder, 'GRU4Rec-train-data.csv'))
        valid_data.df.to_csv(
            os.path.join(args.data_folder, 'GRU4Rec-valid-data.csv'))
    make_checkpoint_dir()

    #set all the parameters according to the defined arguments
    args.input_size = len(train_data.items)
    args.output_size = args.input_size

    #loss function
    loss_function = lib.LossFunction(
        loss_type=args.loss_type,
        use_cuda=args.cuda)  #cuda is used with cross entropy only
    if not args.is_eval:  #training
        #Initialize the model
        model = lib.GRU4REC(args.input_size,
                            args.hidden_size,
                            args.output_size,
                            final_act=args.final_act,
                            num_layers=args.num_layers,
                            use_cuda=args.cuda,
                            batch_size=args.batch_size,
                            dropout_input=args.dropout_input,
                            dropout_hidden=args.dropout_hidden,
                            embedding_dim=args.embedding_dim)
        #weights initialization
        init_model_weight(model)
        if args.wandb_on:
            wandb.watch(model, log="all")

        #optimizer
        optimizer = lib.Optimizer(model.parameters(),
                                  optimizer_type=args.optimizer_type,
                                  lr=args.lr,
                                  weight_decay=args.weight_decay,
                                  momentum=args.momentum,
                                  eps=args.eps)
        #trainer class
        trainer = lib.Trainer(model,
                              train_data=train_data,
                              eval_data=valid_data,
                              optim=optimizer,
                              use_cuda=args.cuda,
                              loss_func=loss_function,
                              batch_size=args.batch_size,
                              args=args)
        print('#### START TRAINING....')
        trainer.train(0, args.n_epochs - 1)
    else:  #testing
        if args.load_model is not None:
            print("Loading pre-trained model from {}".format(args.load_model))
            try:
                checkpoint = torch.load(args.load_model)
            except:
                checkpoint = torch.load(
                    args.load_model, map_location=lambda storage, loc: storage)
            model = lib.GRU4REC(args.input_size,
                                args.hidden_size,
                                args.output_size,
                                final_act=args.final_act,
                                num_layers=args.num_layers,
                                use_cuda=args.cuda,
                                batch_size=args.batch_size,
                                dropout_input=args.dropout_input,
                                dropout_hidden=args.dropout_hidden,
                                embedding_dim=args.embedding_dim)
            model.load_state_dict(checkpoint["state_dict"])

            model.gru.flatten_parameters()
            evaluation = lib.Evaluation(model,
                                        loss_function,
                                        use_cuda=args.cuda,
                                        k=args.k_eval)
            loss, recall, mrr = evaluation.eval(valid_data, args.batch_size)
            print("Final result: recall = {:.2f}, mrr = {:.2f}".format(
                recall, mrr))
        else:
            print("No Pretrained Model was found!")
Esempio n. 5
0
               ),  # average first channels of every tree
).to(device)

with torch.no_grad():
    res = model(torch.as_tensor(data.X_train[:1000], device=device))

if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)

from qhoptim.pyt import QHAdam
optimizer_params = {'nus': (0.7, 1.0), 'betas': (0.95, 0.998)}

trainer = lib.Trainer(model=model,
                      loss_function=F.cross_entropy,
                      experiment_name=experiment_name,
                      warm_start=False,
                      Optimizer=QHAdam,
                      optimizer_params=optimizer_params,
                      verbose=True,
                      n_last_checkpoints=5)

loss_history_step, auc_history_step = [], []
loss_history, mse_history, auc_history = [], [], []
best_auc = 0
best_step_auc = 0
early_stopping_rounds = 1000
report_frequency = 100

print("------ training starts ------")

for batch in lib.iterate_minibatches(data.X_train,
                                     data.y_train,
Esempio n. 6
0
def main():
    assert (opt.start_epoch <=
            opt.end_epoch), 'The start epoch should be <= End Epoch'
    log('Loading data from "%s"' % opt.data)
    dataset = torch.load(opt.data)

    supervised_data = lib.Dataset(dataset["train_xe"],
                                  opt.batch_size,
                                  opt.cuda,
                                  eval=False)
    bandit_data = lib.Dataset(dataset["train_pg"],
                              opt.batch_size,
                              opt.cuda,
                              eval=False)

    sup_valid_data = lib.Dataset(dataset["sup_valid"],
                                 opt.eval_batch_size,
                                 opt.cuda,
                                 eval=True)
    bandit_valid_data = lib.Dataset(dataset["bandit_valid"],
                                    opt.eval_batch_size,
                                    opt.cuda,
                                    eval=True)
    test_data = lib.Dataset(dataset["test"],
                            opt.eval_batch_size,
                            opt.cuda,
                            eval=True)

    dicts = dataset["dicts"]
    log(" * vocabulary size. source = %d; target = %d" %
        (dicts["src"].size(), dicts["tgt"].size()))
    log(" * number of XENT training sentences. %d" %
        len(dataset["train_xe"]["src"]))
    log(" * number of PG training sentences. %d" %
        len(dataset["train_pg"]["src"]))
    log(" * number of bandit valid sentences. %d" %
        len(dataset["bandit_valid"]["src"]))
    log(" * number of  test sentences. %d" % len(dataset["test"]["src"]))
    log(" * maximum batch size. %d" % opt.batch_size)
    log("Building model...")

    use_critic = opt.start_reinforce is not None

    if opt.load_from is None:
        model, optim = create_model(lib.NMTModel, dicts, dicts["tgt"].size())
        checkpoint = None
    else:
        log("Loading from checkpoint at %s" % opt.load_from)
        checkpoint = torch.load(opt.load_from)
        model = checkpoint["model"]
        optim = checkpoint["optim"]
        opt.start_epoch = checkpoint["epoch"] + 1

    # GPU.
    if opt.cuda:
        model.cuda(opt.gpus[0])

    # Start reinforce training immediately.
    if (opt.start_reinforce == -1):
        opt.start_decay_at = opt.start_epoch
        opt.start_reinforce = opt.start_epoch

    nParams = sum([p.nelement() for p in model.parameters()])
    log("* number of parameters: %d" % nParams)

    # Metrics.
    metrics = {}
    metrics["nmt_loss"] = lib.Loss.weighted_xent_loss
    metrics["critic_loss"] = lib.Loss.weighted_mse
    log(" Simulated Feedback: charF score\nEvaluation: charF and Corpus BLEU")
    instance_charF = lib.Reward.charFEvaluator(dict_tgt=dicts["tgt"])
    metrics["sent_reward"] = instance_charF.sentence_charF
    metrics["corp_reward"] = lib.Reward.corpus_bleu

    # Evaluate model on heldout dataset.
    if opt.eval:
        evaluator = lib.Evaluator(model, metrics, dicts, opt, trpro_logger)

        # On Bandit test data
        pred_file = opt.load_from.replace(".pt", ".test.pred")
        tgt_file = opt.load_from.replace(".pt", ".test.tgt")
        evaluator.eval(test_data, pred_file)
        evaluator.eval(test_data, pred_file=None, tgt_file=tgt_file)

    else:
        xent_trainer = lib.Trainer(model,
                                   supervised_data,
                                   sup_valid_data,
                                   metrics,
                                   dicts,
                                   optim,
                                   opt,
                                   trainprocess_logger=trpro_logger)
        if use_critic:
            start_time = time.time()
            # Supervised training: used when running pretrain+bandit together
            xent_trainer.train(opt.start_epoch, opt.start_reinforce - 1,
                               start_time)
            # Actor-Critic
            critic, critic_optim = create_critic(checkpoint, dicts, opt)
            reinforce_trainer = lib.ReinforceTrainer(
                model,
                critic,
                bandit_data,
                bandit_valid_data,
                test_data,
                metrics,
                dicts,
                optim,
                critic_optim,
                opt,
                trainprocess_logger=trpro_logger,
                stat_logger=stat_logger,
                samples_logger=samples_logger)
            reinforce_trainer.train(opt.start_reinforce, opt.end_epoch,
                                    start_time)
            if opt.use_bipnmt:
                stat_logger.close_file()
                samples_logger.close_file()
        else:
            # Supervised training only.
            xent_trainer.train(opt.start_epoch, opt.end_epoch)

    trpro_logger.close_file()
Esempio n. 7
0
def main():
    print("Loading train data from {}".format(
        os.path.join(args.data_folder, args.train_data)))
    print("Loading valid data from {}".format(
        os.path.join(args.data_folder, args.valid_data)))
    print("Loading test data from {}\n".format(
        os.path.join(args.data_folder, args.test_data)))

    train_data = lib.Dataset(os.path.join(args.data_folder, args.train_data))
    valid_data = lib.Dataset(os.path.join(args.data_folder, args.valid_data),
                             itemmap=train_data.itemmap)
    test_data = lib.Dataset(os.path.join(args.data_folder, args.test_data))

    if not args.is_eval:
        make_checkpoint_dir()

    input_size = len(train_data.items)
    hidden_size = args.hidden_size
    num_layers = args.num_layers
    output_size = input_size
    batch_size = args.batch_size
    dropout_input = args.dropout_input
    dropout_hidden = args.dropout_hidden
    embedding_dim = args.embedding_dim
    final_act = args.final_act
    loss_type = args.loss_type

    optimizer_type = args.optimizer_type
    lr = args.lr
    weight_decay = args.weight_decay
    momentum = args.momentum
    eps = args.eps

    n_epochs = args.n_epochs
    time_sort = args.time_sort

    if not args.is_eval:
        model = lib.GRU4REC(input_size,
                            hidden_size,
                            output_size,
                            final_act=final_act,
                            num_layers=num_layers,
                            use_cuda=args.cuda,
                            batch_size=batch_size,
                            dropout_input=dropout_input,
                            dropout_hidden=dropout_hidden,
                            embedding_dim=embedding_dim)

        # init weight
        # See Balazs Hihasi(ICLR 2016), pg.7

        init_model(model)

        optimizer = lib.Optimizer(model.parameters(),
                                  optimizer_type=optimizer_type,
                                  lr=lr,
                                  weight_decay=weight_decay,
                                  momentum=momentum,
                                  eps=eps)

        loss_function = lib.LossFunction(loss_type=loss_type,
                                         use_cuda=args.cuda)

        trainer = lib.Trainer(model,
                              train_data=train_data,
                              eval_data=valid_data,
                              optim=optimizer,
                              use_cuda=args.cuda,
                              loss_func=loss_function,
                              args=args)

        trainer.train(0, n_epochs - 1)
    else:
        if args.load_model is not None:
            print("Loading pre trained model from {}".format(args.load_model))
            checkpoint = torch.load(args.load_model)
            model = checkpoint["model"]
            model.gru.flatten_parameters()
            optim = checkpoint["optim"]
            loss_function = lib.LossFunction(loss_type=loss_type,
                                             use_cuda=args.cuda)
            evaluation = lib.Evaluation(model,
                                        loss_function,
                                        use_cuda=args.cuda)
            loss, recall, mrr = evaluation.eval(valid_data)
            print("Final result: recall = {:.2f}, mrr = {:.2f}".format(
                recall, mrr))
        else:
            print("Pre trained model is None!")
Esempio n. 8
0
def main(args) -> None:
    # Create directory
    os.makedirs(pjoin('logs', args.name), exist_ok=True)

    if pexists(pjoin('logs', args.name, 'MY_IS_FINISHED')):
        print('Quit! Already finish running for %s' % args.name)
        return

    # Set seed
    if args.seed is not None:
        lib.utils.seed_everything(args.seed)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Data
    with lib.utils.Timer(f'Load dataset {args.dataset}'):
        data = lib.DATASETS[args.dataset.upper()](path='./data',
                                                  fold=args.fold)
    qn = args.quantile_noise if getattr(args, 'quantile_noise', None) is not None \
        else data.get('quantile_noise', 1e-3)
    preprocessor = lib.MyPreprocessor(
        cat_features=data.get('cat_features', None),
        y_normalize=(data['problem'] == 'regression'),
        random_state=1337,
        quantile_transform=True,
        output_distribution=args.quantile_dist,
        quantile_noise=qn,
        n_quantiles=args.n_quantiles,
    )

    X_train, y_train = data['X_train'], data['y_train']
    preprocessor.fit(X_train, y_train)
    if args.data_subsample > 1.:
        args.data_subsample = int(args.data_subsample)

    # Do not subsample data in the pretraining!
    if args.pretrain == 0 and args.data_subsample != 1. \
            and args.data_subsample < X_train.shape[0]:
        print(f'Subsample the data by ds={args.data_subsample}')
        X_train, _, y_train, _ = train_test_split(
            X_train,
            y_train,
            train_size=args.data_subsample,
            random_state=1377,
            stratify=(y_train
                      if data['problem'] == 'classification' else None))

    use_data_val = ('X_valid' in data and 'y_valid' in data
                    and (not args.split_train_as_val))
    if use_data_val:
        X_valid, y_valid = data['X_valid'], data['y_valid']
    else:
        # Merge with the valid set, and cut it ourselves
        if 'X_valid' in data:
            X_train = pd.concat([X_train, data['X_valid']], axis=0)
            y_train = np.concatenate([y_train, data['y_valid']], axis=0)

        X_train, X_valid, y_train, y_valid = train_test_split(
            X_train,
            y_train,
            test_size=0.2,
            random_state=1377,
            stratify=(y_train
                      if data['problem'] == 'classification' else None))

    # Transform dataset
    X_train, y_train = preprocessor.transform(X_train, y_train)
    X_valid, y_valid = preprocessor.transform(X_valid, y_valid)
    X_test, y_test = preprocessor.transform(data['X_test'], data['y_test'])

    # Save preprocessor
    with open(pjoin('logs', args.name, 'preprocessor.pkl'), 'wb') as op:
        pickle.dump(preprocessor, op)

    metric = data.get('metric', ('classification_error' if data['problem']
                                 == 'classification' else 'mse'))

    # Modify args based on the dataset
    args.in_features = X_train.shape[1]
    args.problem = data['problem']
    args.num_classes = data.get('num_classes', 1)
    args.data_addi_tree_dim = data.get('addi_tree_dim', 0)

    # Modify based on if doing pretraining!
    if args.pretrain > 0:
        assert args.pretraining_ratio > 0.
        if args.pretrain == 1:
            args.problem = 'pretrain_mask'
        elif args.pretrain == 2:
            args.problem = 'pretrain_recon'
        elif args.pretrain == 3:
            args.problem = 'pretrain_recon2'
        else:
            raise NotImplementedError('Wrong pretrain: ' + str(args.pretrain))

        metric = 'pretrain_loss'
        args.num_classes = args.in_features
        args.data_addi_tree_dim = (-args.in_features) + 1
        # Use both train/val as training set, and use test as val
        X_train, X_valid = np.concatenate([X_train, X_valid], axis=0), X_test
        y_train, y_valid = X_train, X_valid

    print(
        f'X_train: {X_train.shape}, X_valid: {X_valid.shape}, X_test: {X_test.shape}'
    )
    # Model
    model, step_callbacks = getattr(lib.arch,
                                    args.arch + 'Block').load_model_by_hparams(
                                        args, ret_step_callback=True)

    # Initialize bias before sending to cuda
    if 'init_bias' in args and args.init_bias and args.problem == 'classification':
        model.set_bias(y_train)

    model.to(device)

    # if torch.cuda.device_count() > 1:
    #     model = nn.DataParallel(model)

    # Load from pretrained model. Since last fc layer has diff size
    if getattr(args, 'load_from_pretrain', None) is not None:
        print("=> using pre-trained model '{}'".format(
            args.load_from_pretrain))
        path = pjoin('logs', args.load_from_pretrain, "checkpoint_best.pth")
        checkpoint = torch.load(path)

        model_state = model.state_dict()
        pretrained_state = {
            k: v
            for k, v in checkpoint['model'].items()
            if k in model_state and v.size() == model_state[k].size()
        }
        print('Pre-load the following weights:')
        print(list(pretrained_state.keys()))
        print('Ignore the following weights:')
        print([k for k in model_state if k not in pretrained_state])
        model_state.update(pretrained_state)
        model.load_state_dict(model_state)

    from qhoptim.pyt import QHAdam
    optimizer_params = {'nus': (0.7, 1.0), 'betas': (0.95, 0.998)}

    trainer = lib.Trainer(
        model=model,
        experiment_name=args.name,
        warm_start=True,  # To handle the interruption on v server
        Optimizer=QHAdam,
        optimizer_params=optimizer_params,
        lr=args.lr,
        lr_warmup_steps=args.lr_warmup_steps,
        verbose=False,
        n_last_checkpoints=5,
        step_callbacks=step_callbacks,  # Temp annelaing
        fp16=args.fp16,
        problem=args.problem,
        pretraining_ratio=args.pretraining_ratio,
        opt_only_last_layer=(args.load_from_pretrain is not None
                             and args.opt_only_last_layer),
        freeze_steps=(0 if args.load_from_pretrain is None else
                      args.freeze_steps),
    )

    assert metric in [
        'negative_auc', 'classification_error', 'mse', 'multiple_mse',
        'pretrain_loss'
    ]
    eval_fn = getattr(trainer, 'evaluate_' + metric)

    # Before we start, we will need to select the batch size if unspecified
    if args.batch_size is None or args.batch_size < 0:
        assert device != 'cpu', 'Have to specify batch size when using CPU'
        args.batch_size = choose_batch_size(trainer,
                                            X_train,
                                            y_train,
                                            device,
                                            max_bs=args.max_bs,
                                            min_bs=args.min_bs)
    else:
        try:
            with torch.no_grad():
                res = model(
                    torch.as_tensor(X_train[:(2 * args.batch_size)],
                                    device=device))
            # trigger data-aware init
        except RuntimeError as e:
            handle_oom_error(e, args)

    # Then show hparams after deciding the batch size
    print("experiment:", args.name)
    print("Args:")
    print(args)

    # Then record hparams
    saved_args = pjoin('logs', args.name, 'hparams.json')
    json.dump(vars(args), open(saved_args, 'w'))

    # record hparams again, since logs/{args.name} will be deleted!
    os.makedirs(pjoin('logs', 'hparams'), exist_ok=True)
    json.dump(vars(args), open(pjoin('logs', 'hparams', args.name), 'w'))

    # To make sure when rerunning the err history and time are accurate,
    # we save the whole history in training.json.
    recorder = lib.Recorder(path=pjoin('logs', args.name))

    ntf_diff, ntf = 0., None  # Record number of trees assigned to each feature
    st_time = time.time()
    for batch in lib.iterate_minibatches(X_train,
                                         y_train,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         epochs=float('inf')):
        # Handle removing missing by sampling from a Gaussian!
        try:
            metrics = trainer.train_on_batch(*batch, device=device)
        except RuntimeError as e:
            handle_oom_error(e, args)

        if recorder.loss_history is not None:
            recorder.loss_history.append(float(metrics['loss']))

        if trainer.step % args.report_frequency == 0:
            trainer.save_checkpoint()
            trainer.remove_old_temp_checkpoints()
            trainer.average_checkpoints(out_tag='avg')
            trainer.load_checkpoint(tag='avg')

            err = eval_fn(X_valid,
                          y_valid,
                          device=device,
                          batch_size=args.batch_size * 2)

            # Handle per-task early stopping when metric='multiple_mse'
            if metric == 'multiple_mse':
                # Initialize
                if not isinstance(recorder.best_err, list):
                    recorder.best_err = [float('inf') for _ in range(len(err))]
                    recorder.best_step_err = [0 for _ in range(len(err))]

                for idx, (e, be) in enumerate(zip(err, recorder.best_err)):
                    if e < be:
                        recorder.best_err[idx] = e
                        recorder.best_step_err[idx] = trainer.step
                        trainer.save_checkpoint(tag='best_t%d' % idx)
                if recorder.err_history is not None:
                    recorder.err_history.append(np.mean(err))

            else:
                if err < recorder.best_err:
                    recorder.best_err = err
                    recorder.best_step_err = trainer.step
                    trainer.save_checkpoint(tag='best')
                if recorder.err_history is not None:
                    recorder.err_history.append(err)

            recorder.step = trainer.step
            recorder.run_time += float(time.time() - st_time)
            st_time = time.time()

            recorder.save_record()

            trainer.load_checkpoint()  # last
            if recorder.loss_history is not None and recorder.err_history is not None:
                save_loss_fig(recorder.loss_history, recorder.err_history,
                              pjoin('loss_figs', f'{args.name}.jpg'))

            cur_ntf = trainer.model.get_num_trees_assigned_to_each_feature()
            if cur_ntf is None:  # ODST no NTF
                ntf_diff = 0.
            else:
                if ntf is not None:
                    ntf_diff = (torch.sum(torch.abs(cur_ntf - ntf)) * 100.0 /
                                torch.sum(cur_ntf)).item()
                ntf = cur_ntf

            if trainer.step == 1:
                print("Step\tVal_Err\tTime(s)\tNTF(%)")
            print('{}\t{}\t{:.0f}\t{:.2f}%'.format(trainer.step,
                                                   np.around(err, 5),
                                                   recorder.run_time,
                                                   ntf_diff))

        bstep = recorder.best_step_err
        if isinstance(bstep, list):
            bstep = np.max(bstep)

        min_steps = max(bstep, getattr(args, 'anneal_steps', -1))
        if trainer.step > min_steps + args.early_stopping_rounds:
            print('BREAK. There is no improvment for {} steps'.format(
                args.early_stopping_rounds))
            break

        if args.lr_decay_steps > 0 \
                and trainer.step > bstep + args.lr_decay_steps \
                and trainer.step > (recorder.lr_decay_step + args.lr_decay_steps):
            lr_before = trainer.lr
            trainer.decrease_lr(ratio=0.2, min_lr=1e-6)
            recorder.lr_decay_step = trainer.step
            print('LR: %.2e -> %.2e' % (lr_before, trainer.lr))

        if 0 < args.max_rounds < trainer.step:
            print('End. It reaches the maximum rounds %d' % args.max_rounds)
            break

        if recorder.run_time > args.max_time:
            print('End. It reaches the maximum run time %d (s)' %
                  args.max_time)
            break

    print("Best step: ", recorder.best_step_err)
    print("Best Val Error: ", recorder.best_err)

    if args.pretrain:
        # Submit another sbatch job for the real training
        print('***** FINISH pretraining! *****')
    else:
        max_step = trainer.step
        # Run test time
        if metric != 'multiple_mse':
            trainer.load_checkpoint(tag='best')
            test_err = eval_fn(X_test,
                               y_test,
                               device=device,
                               batch_size=2 * args.batch_size)
        else:
            test_err = []
            for idx in range(len(recorder.best_err)):
                trainer.load_checkpoint(tag='best_t%d' % idx)
                tmp = eval_fn(X_test,
                              y_test,
                              device=device,
                              batch_size=2 * args.batch_size)
                test_err.append(tmp[idx])

        print("Test Error rate: {}".format(test_err))

        # Save csv results
        results = dict()
        results['test_err'] = test_err
        results['val_err'] = recorder.best_err
        results['best_step_err'] = recorder.best_step_err
        results['max_step'] = max_step
        results['time(s)'] = '%d' % recorder.run_time
        results['fold'] = args.fold
        results['fp16'] = args.fp16
        results['batch_size'] = args.batch_size
        results['finetuned'] = int(args.load_from_pretrain is not None)
        # Append the hyperparameters
        rs_hparams = getattr(lib.arch, args.arch +
                             'Block').get_model_specific_rs_hparams()
        for k in rs_hparams:
            results[k] = getattr(args, k)

        results = getattr(lib.arch,
                          args.arch + 'Block').add_model_specific_results(
                              results, args)
        results['name'] = args.name

        os.makedirs(f'results', exist_ok=True)
        dataset_postfix = f'_ds{args.data_subsample}' if args.data_subsample != 1. else ''
        if metric != 'multiple_mse':
            csv_file = f'results/{args.dataset}{dataset_postfix}_{args.arch}_new10.csv'
            lib.utils.output_csv(csv_file, results)
        else:
            csv_file = f'results/{args.dataset}{dataset_postfix}_{args.arch}_new10.ssv'
            lib.utils.output_csv(csv_file, results, delimiter=';')
        print('output results to %s' % csv_file)

    # Clean up
    open(pjoin('logs', args.name, 'MY_IS_FINISHED'), 'a')
    trainer.remove_old_temp_checkpoints(number_ckpts_to_keep=0)
Esempio n. 9
0
def main():
    print("Loading train data from {}".format(
        os.path.join(args.data_folder, args.train_data)))
    print("Loading valid data from {}".format(
        os.path.join(args.data_folder, args.valid_data)))

    train_data = lib.Dataset(os.path.join(args.data_folder, args.train_data))
    valid_data = lib.Dataset(os.path.join(args.data_folder, args.valid_data),
                             itemmap=train_data.itemmap)
    make_checkpoint_dir()

    #set all the parameters according to the defined arguments
    input_size = len(train_data.items)
    hidden_size = args.hidden_size
    num_layers = args.num_layers
    output_size = input_size
    batch_size = args.batch_size
    dropout_input = args.dropout_input
    dropout_hidden = args.dropout_hidden
    embedding_dim = args.embedding_dim
    final_act = args.final_act
    loss_type = args.loss_type
    optimizer_type = args.optimizer_type
    lr = args.lr
    weight_decay = args.weight_decay
    momentum = args.momentum
    eps = args.eps
    n_epochs = args.n_epochs
    time_sort = args.time_sort
    #loss function
    loss_function = lib.LossFunction(
        loss_type=loss_type,
        use_cuda=args.cuda)  #cuda is used with cross entropy only
    if not args.is_eval:  #training
        #Initialize the model
        model = lib.GRU4REC(input_size,
                            hidden_size,
                            output_size,
                            final_act=final_act,
                            num_layers=num_layers,
                            use_cuda=args.cuda,
                            batch_size=batch_size,
                            dropout_input=dropout_input,
                            dropout_hidden=dropout_hidden,
                            embedding_dim=embedding_dim)
        #weights initialization
        init_model(model)
        #optimizer
        optimizer = lib.Optimizer(model.parameters(),
                                  optimizer_type=optimizer_type,
                                  lr=lr,
                                  weight_decay=weight_decay,
                                  momentum=momentum,
                                  eps=eps)
        #trainer class
        trainer = lib.Trainer(model,
                              train_data=train_data,
                              eval_data=valid_data,
                              optim=optimizer,
                              use_cuda=args.cuda,
                              loss_func=loss_function,
                              batch_size=batch_size,
                              args=args)
        print('#### START TRAINING....')
        trainer.train(0, n_epochs - 1)
    else:  #testing
        if args.load_model is not None:
            print("Loading pre-trained model from {}".format(args.load_model))
            try:
                checkpoint = torch.load(args.load_model)
            except:
                checkpoint = torch.load(
                    args.load_model, map_location=lambda storage, loc: storage)
            model = checkpoint["model"]
            model.gru.flatten_parameters()
            evaluation = lib.Evaluation(model,
                                        loss_function,
                                        use_cuda=args.cuda,
                                        k=args.k_eval)
            loss, recall, mrr = evaluation.eval(valid_data, batch_size)
            print("Final result: recall = {:.2f}, mrr = {:.2f}".format(
                recall, mrr))
        else:
            print("No Pretrained Model was found!")