def load_data(opt): dataset = torch.load(opt.data) dicts = dataset["dicts"] # filter test data. if opt.var_length: _, dataset["test"]['src'], dataset["test"]['tgt'], dataset["test"]['trees'] = sort_test(dataset) dataset["train_xe"]['trees'] = get_data_trees(dataset["train_xe"]['trees']) dataset["train_pg"]['trees'] = get_data_trees(dataset["train_pg"]['trees']) dataset["valid"]['trees'] = get_data_trees(dataset["valid"]['trees']) dataset["test"]['trees'] = get_data_trees(dataset["test"]['trees']) dataset["train_xe"]['leafs'] = get_data_leafs(dataset["train_xe"]['trees'], dicts['src']) dataset["train_pg"]['leafs'] = get_data_leafs(dataset["train_pg"]['trees'], dicts['src']) dataset["valid"]['leafs'] = get_data_leafs(dataset["valid"]['trees'], dicts['src']) dataset["test"]['leafs'] = get_data_leafs(dataset["test"]['trees'], dicts['src']) supervised_data = lib.Dataset(dataset["train_xe"], opt.batch_size, opt.cuda, eval=False) rl_data = lib.Dataset(dataset["train_pg"], opt.batch_size, opt.cuda, eval=False) valid_data = lib.Dataset(dataset["valid"], opt.batch_size, opt.cuda, eval=True) test_data = lib.Dataset(dataset["test"], opt.batch_size, opt.cuda, eval=True) vis_data = lib.Dataset(dataset["test"], 1, opt.cuda, eval=True) # batch_size set to 1 for case study print(" * vocabulary size. source = %d; target = %d" % (dicts["src"].size(), dicts["tgt"].size())) print(" * number of XENT training sentences. %d" % len(dataset["train_xe"]["src"])) print(" * number of PG training sentences. %d" % len(dataset["train_pg"]["src"])) print(" * maximum batch size. %d" % opt.batch_size) return dicts, supervised_data, rl_data, valid_data, test_data, vis_data
def main(): print('Loading train data from "%s"' % opt.data) dataset = torch.load(opt.data) dicts = dataset["dicts"] if opt.load_from is None: print("REQUIRES PATH TO THE TRAINED MODEL\n") else: print("Loading from checkpoint at %s" % opt.load_from) checkpoint = torch.load(opt.load_from) model = checkpoint["model"] optim = checkpoint["optim"] # GPU. if opt.cuda: model.cuda(opt.gpus[0]) #model=torch.nn.DataParallel(model) #torch.distributed.init_process_group(backend='tcp',rank=0,world_size=2) #model = torch.nn.parallel.DistributedDataParallel(model) # Generating Translations for test set print('Creating test data\n') src,tgt,pos=makeTestData(opt.test_src,dicts) res={} res["src"]=src res["tgt"]=tgt res["pos"]=pos test_data = lib.Dataset(res, opt.batch_size, opt.cuda, eval=False) pred_file = opt.test_src+".pred" predict(model,dicts,test_data,pred_file) print('Generated translations successfully\n')
def load_data(opt): dataset = torch.load(opt.data) dicts = dataset["dicts"] supervised_data = lib.Dataset(dataset["train"], "sl_train", opt.batch_size, opt.cuda, eval=False) rl_data = lib.Dataset(dataset["train"], "rl_train", opt.batch_size, opt.cuda, eval=False) valid_data = lib.Dataset(dataset["valid"], "val", 50, opt.cuda, eval=True) #opt.batch_size test_data = lib.Dataset(dataset["test"], "test", 50, opt.cuda, eval=True) if "DEV" in dataset: DEV = lib.Dataset(dataset['DEV'], "DEV", opt.batch_size, opt.cuda, eval=True) EVAL = lib.Dataset(dataset['EVAL'], "EVAL", opt.batch_size, opt.cuda, eval=True) else: DEV = None EVAL = None print(" * vocabulary size. source = %d; target = %d" % (dicts["src"].size(), dicts["tgt"].size())) print(" * number of XENT training sentences. %d" % len(dataset["train"]["src"])) print(" * number of PG training sentences. %d" % len(dataset["train"]["src"])) print(" * number of val sentences. %d" % len(dataset["valid"]["src"])) print(" * number of test sentences. %d" % len(dataset["test"]["src"])) if "DEV" in dataset: print(" * number of DEV sentences. %d" % len(dataset["DEV"]["src"])) print(" * number of EVAL sentences. %d" % len(dataset["EVAL"]["src"])) print(" * maximum batch size. %d" % opt.batch_size) return dicts, supervised_data, rl_data, valid_data, test_data, DEV, EVAL
def main(): print('Loading data from "%s"' % opt.data) dataset = torch.load(opt.data) supervised_data = lib.Dataset(dataset["train_xe"], opt.batch_size, opt.cuda, eval=False) bandit_data = lib.Dataset(dataset["train_pg"], opt.batch_size, opt.cuda, eval=False) valid_data = lib.Dataset(dataset["valid"], opt.batch_size, opt.cuda, eval=True) test_data = lib.Dataset(dataset["test"], opt.batch_size, opt.cuda, eval=True) dicts = dataset["dicts"] print(" * vocabulary size. source = %d; target = %d" % (dicts["src"].size(), dicts["tgt"].size())) print(" * number of XENT training sentences. %d" % len(dataset["train_xe"]["src"])) print(" * number of PG training sentences. %d" % len(dataset["train_pg"]["src"])) print(" * maximum batch size. %d" % opt.batch_size) print("Building model...") use_critic = opt.start_reinforce is not None if opt.load_from is None: model, optim = create_model(lib.NMTModel, dicts, dicts["tgt"].size()) checkpoint = None else: print("Loading from checkpoint at %s" % opt.load_from) checkpoint = torch.load(opt.load_from) model = checkpoint["model"] optim = checkpoint["optim"] opt.start_epoch = checkpoint["epoch"] + 1 # GPU. if opt.cuda: model.cuda(opt.gpus[0]) # Start reinforce training immediately. if opt.start_reinforce == -1: opt.start_decay_at = opt.start_epoch opt.start_reinforce = opt.start_epoch # Check if end_epoch is large enough. if use_critic: assert opt.start_epoch + opt.critic_pretrain_epochs - 1 <= \ opt.end_epoch, "Please increase -end_epoch to perform pretraining!" nParams = sum([p.nelement() for p in model.parameters()]) print("* number of parameters: %d" % nParams) # Metrics. metrics = {} metrics["nmt_loss"] = lib.Loss.weighted_xent_loss metrics["critic_loss"] = lib.Loss.weighted_mse metrics["sent_reward"] = lib.Reward.sentence_bleu metrics["corp_reward"] = lib.Reward.corpus_bleu if opt.pert_func is not None: opt.pert_func = lib.PertFunction(opt.pert_func, opt.pert_param) # Evaluate model on heldout dataset. if opt.eval: evaluator = lib.Evaluator(model, metrics, dicts, opt) # On validation set. pred_file = opt.load_from.replace(".pt", ".valid.pred") evaluator.eval(valid_data, pred_file) # On test set. pred_file = opt.load_from.replace(".pt", ".test.pred") evaluator.eval(test_data, pred_file) elif opt.eval_sample: opt.no_update = True critic, critic_optim = create_critic(checkpoint, dicts, opt) reinforce_trainer = lib.ReinforceTrainer(model, critic, bandit_data, test_data, metrics, dicts, optim, critic_optim, opt) reinforce_trainer.train(opt.start_epoch, opt.start_epoch, False) elif opt.sup_train_on_bandit: optim.set_lr(opt.reinforce_lr) xent_trainer = lib.Trainer(model, bandit_data, test_data, metrics, dicts, optim, opt) xent_trainer.train(opt.start_epoch, opt.start_epoch) else: print("theek hai") xent_trainer = lib.Trainer(model, supervised_data, valid_data, metrics, dicts, optim, opt) if use_critic: start_time = time.time() # Supervised training. xent_trainer.train(opt.start_epoch, opt.start_reinforce - 1, start_time) # Create critic here to not affect random seed. critic, critic_optim = create_critic(checkpoint, dicts, opt) # Pretrain critic. if opt.critic_pretrain_epochs > 0: reinforce_trainer = lib.ReinforceTrainer(model, critic, supervised_data, test_data, metrics, dicts, optim, critic_optim, opt) reinforce_trainer.train(opt.start_reinforce, opt.start_reinforce + opt.critic_pretrain_epochs - 1, True, start_time) # Reinforce training. reinforce_trainer = lib.ReinforceTrainer(model, critic, bandit_data, test_data, metrics, dicts, optim, critic_optim, opt) reinforce_trainer.train(opt.start_reinforce + opt.critic_pretrain_epochs, opt.end_epoch, False, start_time) # Supervised training only. else: xent_trainer.train(opt.start_epoch, opt.end_epoch)
import lib if __name__ == '__main__': data_file = "data/suumo_sess_data_eval.csv" batch_size = 2 data = lib.Dataset(data_file) print(data.get_click_offset()) dataloader = lib.DataLoader(data, batch_size) for i, (input, target, mask) in enumerate(dataloader): if i < 10: print(input, target, mask)
def main(): if args.wandb_on: wandb.init(project=args.wandb_project, name=args.model_name + '-' + args.data_folder.split('/')[2] + '-' + args.loss_type) wandb.config.update( {'hostname': os.popen('hostname').read().split('.')[0]}) wandb.config.update(args) if args.item2idx_dict is not None: item2idx_dict = pd.read_pickle( os.path.join(args.data_folder, args.item2idx_dict)) else: item2idx_dict = None print("Loading train data from {}".format( os.path.join(args.data_folder, args.train_data))) print("Loading valid data from {}".format( os.path.join(args.data_folder, args.valid_data))) train_data = lib.Dataset(os.path.join(args.data_folder, args.train_data)) valid_data = lib.Dataset(os.path.join(args.data_folder, args.valid_data), itemmap=train_data.itemmap) if args.debug: train_data.df.to_csv( os.path.join(args.data_folder, 'GRU4Rec-train-data.csv')) valid_data.df.to_csv( os.path.join(args.data_folder, 'GRU4Rec-valid-data.csv')) make_checkpoint_dir() #set all the parameters according to the defined arguments args.input_size = len(train_data.items) args.output_size = args.input_size #loss function loss_function = lib.LossFunction( loss_type=args.loss_type, use_cuda=args.cuda) #cuda is used with cross entropy only if not args.is_eval: #training #Initialize the model model = lib.GRU4REC(args.input_size, args.hidden_size, args.output_size, final_act=args.final_act, num_layers=args.num_layers, use_cuda=args.cuda, batch_size=args.batch_size, dropout_input=args.dropout_input, dropout_hidden=args.dropout_hidden, embedding_dim=args.embedding_dim) #weights initialization init_model_weight(model) if args.wandb_on: wandb.watch(model, log="all") #optimizer optimizer = lib.Optimizer(model.parameters(), optimizer_type=args.optimizer_type, lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum, eps=args.eps) #trainer class trainer = lib.Trainer(model, train_data=train_data, eval_data=valid_data, optim=optimizer, use_cuda=args.cuda, loss_func=loss_function, batch_size=args.batch_size, args=args) print('#### START TRAINING....') trainer.train(0, args.n_epochs - 1) else: #testing if args.load_model is not None: print("Loading pre-trained model from {}".format(args.load_model)) try: checkpoint = torch.load(args.load_model) except: checkpoint = torch.load( args.load_model, map_location=lambda storage, loc: storage) model = lib.GRU4REC(args.input_size, args.hidden_size, args.output_size, final_act=args.final_act, num_layers=args.num_layers, use_cuda=args.cuda, batch_size=args.batch_size, dropout_input=args.dropout_input, dropout_hidden=args.dropout_hidden, embedding_dim=args.embedding_dim) model.load_state_dict(checkpoint["state_dict"]) model.gru.flatten_parameters() evaluation = lib.Evaluation(model, loss_function, use_cuda=args.cuda, k=args.k_eval) loss, recall, mrr = evaluation.eval(valid_data, args.batch_size) print("Final result: recall = {:.2f}, mrr = {:.2f}".format( recall, mrr)) else: print("No Pretrained Model was found!")
def main(): assert (opt.start_epoch <= opt.end_epoch), 'The start epoch should be <= End Epoch' log('Loading data from "%s"' % opt.data) dataset = torch.load(opt.data) supervised_data = lib.Dataset(dataset["train_xe"], opt.batch_size, opt.cuda, eval=False) bandit_data = lib.Dataset(dataset["train_pg"], opt.batch_size, opt.cuda, eval=False) sup_valid_data = lib.Dataset(dataset["sup_valid"], opt.eval_batch_size, opt.cuda, eval=True) bandit_valid_data = lib.Dataset(dataset["bandit_valid"], opt.eval_batch_size, opt.cuda, eval=True) test_data = lib.Dataset(dataset["test"], opt.eval_batch_size, opt.cuda, eval=True) dicts = dataset["dicts"] log(" * vocabulary size. source = %d; target = %d" % (dicts["src"].size(), dicts["tgt"].size())) log(" * number of XENT training sentences. %d" % len(dataset["train_xe"]["src"])) log(" * number of PG training sentences. %d" % len(dataset["train_pg"]["src"])) log(" * number of bandit valid sentences. %d" % len(dataset["bandit_valid"]["src"])) log(" * number of test sentences. %d" % len(dataset["test"]["src"])) log(" * maximum batch size. %d" % opt.batch_size) log("Building model...") use_critic = opt.start_reinforce is not None if opt.load_from is None: model, optim = create_model(lib.NMTModel, dicts, dicts["tgt"].size()) checkpoint = None else: log("Loading from checkpoint at %s" % opt.load_from) checkpoint = torch.load(opt.load_from) model = checkpoint["model"] optim = checkpoint["optim"] opt.start_epoch = checkpoint["epoch"] + 1 # GPU. if opt.cuda: model.cuda(opt.gpus[0]) # Start reinforce training immediately. if (opt.start_reinforce == -1): opt.start_decay_at = opt.start_epoch opt.start_reinforce = opt.start_epoch nParams = sum([p.nelement() for p in model.parameters()]) log("* number of parameters: %d" % nParams) # Metrics. metrics = {} metrics["nmt_loss"] = lib.Loss.weighted_xent_loss metrics["critic_loss"] = lib.Loss.weighted_mse log(" Simulated Feedback: charF score\nEvaluation: charF and Corpus BLEU") instance_charF = lib.Reward.charFEvaluator(dict_tgt=dicts["tgt"]) metrics["sent_reward"] = instance_charF.sentence_charF metrics["corp_reward"] = lib.Reward.corpus_bleu # Evaluate model on heldout dataset. if opt.eval: evaluator = lib.Evaluator(model, metrics, dicts, opt, trpro_logger) # On Bandit test data pred_file = opt.load_from.replace(".pt", ".test.pred") tgt_file = opt.load_from.replace(".pt", ".test.tgt") evaluator.eval(test_data, pred_file) evaluator.eval(test_data, pred_file=None, tgt_file=tgt_file) else: xent_trainer = lib.Trainer(model, supervised_data, sup_valid_data, metrics, dicts, optim, opt, trainprocess_logger=trpro_logger) if use_critic: start_time = time.time() # Supervised training: used when running pretrain+bandit together xent_trainer.train(opt.start_epoch, opt.start_reinforce - 1, start_time) # Actor-Critic critic, critic_optim = create_critic(checkpoint, dicts, opt) reinforce_trainer = lib.ReinforceTrainer( model, critic, bandit_data, bandit_valid_data, test_data, metrics, dicts, optim, critic_optim, opt, trainprocess_logger=trpro_logger, stat_logger=stat_logger, samples_logger=samples_logger) reinforce_trainer.train(opt.start_reinforce, opt.end_epoch, start_time) if opt.use_bipnmt: stat_logger.close_file() samples_logger.close_file() else: # Supervised training only. xent_trainer.train(opt.start_epoch, opt.end_epoch) trpro_logger.close_file()
def main(): print("Loading train data from {}".format( os.path.join(args.data_folder, args.train_data))) print("Loading valid data from {}".format( os.path.join(args.data_folder, args.valid_data))) print("Loading test data from {}\n".format( os.path.join(args.data_folder, args.test_data))) train_data = lib.Dataset(os.path.join(args.data_folder, args.train_data)) valid_data = lib.Dataset(os.path.join(args.data_folder, args.valid_data), itemmap=train_data.itemmap) test_data = lib.Dataset(os.path.join(args.data_folder, args.test_data)) if not args.is_eval: make_checkpoint_dir() input_size = len(train_data.items) hidden_size = args.hidden_size num_layers = args.num_layers output_size = input_size batch_size = args.batch_size dropout_input = args.dropout_input dropout_hidden = args.dropout_hidden embedding_dim = args.embedding_dim final_act = args.final_act loss_type = args.loss_type optimizer_type = args.optimizer_type lr = args.lr weight_decay = args.weight_decay momentum = args.momentum eps = args.eps n_epochs = args.n_epochs time_sort = args.time_sort if not args.is_eval: model = lib.GRU4REC(input_size, hidden_size, output_size, final_act=final_act, num_layers=num_layers, use_cuda=args.cuda, batch_size=batch_size, dropout_input=dropout_input, dropout_hidden=dropout_hidden, embedding_dim=embedding_dim) # init weight # See Balazs Hihasi(ICLR 2016), pg.7 init_model(model) optimizer = lib.Optimizer(model.parameters(), optimizer_type=optimizer_type, lr=lr, weight_decay=weight_decay, momentum=momentum, eps=eps) loss_function = lib.LossFunction(loss_type=loss_type, use_cuda=args.cuda) trainer = lib.Trainer(model, train_data=train_data, eval_data=valid_data, optim=optimizer, use_cuda=args.cuda, loss_func=loss_function, args=args) trainer.train(0, n_epochs - 1) else: if args.load_model is not None: print("Loading pre trained model from {}".format(args.load_model)) checkpoint = torch.load(args.load_model) model = checkpoint["model"] model.gru.flatten_parameters() optim = checkpoint["optim"] loss_function = lib.LossFunction(loss_type=loss_type, use_cuda=args.cuda) evaluation = lib.Evaluation(model, loss_function, use_cuda=args.cuda) loss, recall, mrr = evaluation.eval(valid_data) print("Final result: recall = {:.2f}, mrr = {:.2f}".format( recall, mrr)) else: print("Pre trained model is None!")
skf = StratifiedKFold(shuffle=True, random_state=0) test_pred = np.zeros(shape=len(y_test)) test_logits = None for train_index, val_index in skf.split(X_train, y_train): X_tr = X_train[train_index] y_tr = y_train[train_index] X_val = X_train[val_index] y_val = y_train[val_index] data = lib.Dataset(dataset=None, random_state=0, X_train=X_tr, y_train=y_tr, X_valid=X_val, y_valid=y_val, X_test=X_test, y_test=y_test) num_features = data.X_train.shape[1] num_classes = len(set(data.y_train)) model = nn.Sequential( lib.DenseBlock(num_features, layer_dim=216, num_layers=1, tree_dim=num_classes + 1, flatten_output=False, depth=6, choice_function=lib.entmax15,
visitor_ids = all_df_data['visitorid'].unique() # type is numpy.ndarray visitor2idx = pd.Series(data=np.arange(len(visitor_ids)), index=visitor_ids) visitormap = pd.DataFrame({ 'visitorid': visitor_ids, 'visitor_idx': visitor2idx[visitor_ids].values }) item_dic_file_path = os.path.join(args.data_folder, args.item_dic_data) np.savetxt(item_dic_file_path, itemmap, fmt='%d') visitor_dic_file_path = os.path.join(args.data_folder, args.visitor_dic_data) np.savetxt(visitor_dic_file_path, visitormap, fmt='%d') train_data = lib.Dataset(os.path.join(args.data_folder, args.train_data), visitormap=visitormap, itemmap=itemmap) train_dataloader = lib.DataLoader(train_data, args.batch_size) train_dataloader.dataset.df.to_csv("./train.csv") valid_data = lib.Dataset(os.path.join(args.data_folder, args.valid_data), visitormap=visitormap, itemmap=itemmap) valid_dataloader = lib.DataLoader(valid_data, args.batch_size) valid_dataloader.dataset.df.to_csv("./valid.csv") all_df_data = pd.concat( [train_dataloader.dataset.df, valid_dataloader.dataset.df]) all_df_data.to_csv("./all.csv") # create train data
def main(): print("Loading train data from {}".format( os.path.join(args.data_folder, args.train_data))) print("Loading valid data from {}".format( os.path.join(args.data_folder, args.valid_data))) train_data = lib.Dataset(os.path.join(args.data_folder, args.train_data)) valid_data = lib.Dataset(os.path.join(args.data_folder, args.valid_data), itemmap=train_data.itemmap) make_checkpoint_dir() #set all the parameters according to the defined arguments input_size = len(train_data.items) hidden_size = args.hidden_size num_layers = args.num_layers output_size = input_size batch_size = args.batch_size dropout_input = args.dropout_input dropout_hidden = args.dropout_hidden embedding_dim = args.embedding_dim final_act = args.final_act loss_type = args.loss_type optimizer_type = args.optimizer_type lr = args.lr weight_decay = args.weight_decay momentum = args.momentum eps = args.eps n_epochs = args.n_epochs time_sort = args.time_sort #loss function loss_function = lib.LossFunction( loss_type=loss_type, use_cuda=args.cuda) #cuda is used with cross entropy only if not args.is_eval: #training #Initialize the model model = lib.GRU4REC(input_size, hidden_size, output_size, final_act=final_act, num_layers=num_layers, use_cuda=args.cuda, batch_size=batch_size, dropout_input=dropout_input, dropout_hidden=dropout_hidden, embedding_dim=embedding_dim) #weights initialization init_model(model) #optimizer optimizer = lib.Optimizer(model.parameters(), optimizer_type=optimizer_type, lr=lr, weight_decay=weight_decay, momentum=momentum, eps=eps) #trainer class trainer = lib.Trainer(model, train_data=train_data, eval_data=valid_data, optim=optimizer, use_cuda=args.cuda, loss_func=loss_function, batch_size=batch_size, args=args) print('#### START TRAINING....') trainer.train(0, n_epochs - 1) else: #testing if args.load_model is not None: print("Loading pre-trained model from {}".format(args.load_model)) try: checkpoint = torch.load(args.load_model) except: checkpoint = torch.load( args.load_model, map_location=lambda storage, loc: storage) model = checkpoint["model"] model.gru.flatten_parameters() evaluation = lib.Evaluation(model, loss_function, use_cuda=args.cuda, k=args.k_eval) loss, recall, mrr = evaluation.eval(valid_data, batch_size) print("Final result: recall = {:.2f}, mrr = {:.2f}".format( recall, mrr)) else: print("No Pretrained Model was found!")
tmp_dir = TemporaryDirectory() TMP_DIR = tmp_dir.name NUM_EPOCHS = 2 MAX_TRIAL_NUM = 10 # time (in seconds) to wait for each tuning experiment to complete WAITING_TIME = 36000*15 #15 hour MAX_RETRIES = 40 # it is recommended to have MAX_RETRIES>=4*MAX_TRIAL_NUM tmp_dir = TemporaryDirectory() args = parser.parse_args() print("Loading train data from {}".format(os.path.join(args.data_folder, args.train_data))) print("Loading valid data from {}".format(os.path.join(args.data_folder, args.valid_data))) train = lib.Dataset(os.path.join(args.data_folder, args.train_data)) validation = lib.Dataset(os.path.join(args.data_folder, args.valid_data), itemmap=train.itemmap) LOG_DIR = os.path.join(TMP_DIR, "experiments") os.makedirs(LOG_DIR, exist_ok=True) DATA_DIR = os.path.join(TMP_DIR, "data") os.makedirs(DATA_DIR, exist_ok=True) TRAIN_FILE_NAME = "GRU4Rec" + "_train.pkl" train.df.to_pickle(os.path.join(DATA_DIR, TRAIN_FILE_NAME)) VAL_FILE_NAME = "GRU4Rec" + "_val.pkl" validation.df.to_pickle(os.path.join(DATA_DIR, VAL_FILE_NAME))