def test( start_epoch=0, additional_epoch=90, lr=0.0001, optim="adam", leaky_relu=False, ndcg_gain_in_train="exp2", sigma=1.0, double_precision=False, standardize=False, small_dataset=False, debug=False, output_dir="/tmp/ranking_output/", ): print("start_epoch:{}, additional_epoch:{}, lr:{}".format( start_epoch, additional_epoch, lr)) writer = SummaryWriter(output_dir) precision = torch.float64 if double_precision else torch.float32 # get training and validation data: data_fold = 'Fold1' valid_loader, df_valid, test_loader, df_test = load_train_vali_data( 'Fold1', small_dataset=True) print(test_loader.num_features) if standardize: df_train, scaler = train_loader.train_scaler_and_transform() df_valid = valid_loader.apply_scaler(scaler) lambdarank_structure = [136, 64, 16] net = LambdaRank(lambdarank_structure, leaky_relu=leaky_relu, double_precision=double_precision, sigma=sigma) device = get_device() net.to(device) net.load_state_dict(torch.load("ckptdir\lambdarank-136-64-16-scale-1.0")) print(net) ckptfile = get_ckptdir('lambdarank', lambdarank_structure, sigma) net.eval() with torch.no_grad(): count = 0 batch_size = 200 grad_batch, y_pred_batch = [], [] for X, Y in test_loader.generate_batch_per_query(): X_tensor = torch.tensor(X, dtype=precision, device=device) y_pred = net(X_tensor) y_pred_batch.append(y_pred) # compute the rank order of each document rank_df = pd.DataFrame({"Y": y_pred, "doc": np.arange(Y.shape[0])}) rank_df = rank_df.sort_values("Y").reset_index(drop=True) rank_order = rank_df.sort_values("doc").index.values + 1
def get_train_inference_net(train_algo, num_features, start_epoch, double_precision): ranknet_structure = [num_features, 64, 16] if train_algo == BASELINE: net = RankNetPairs(ranknet_structure, double_precision) net_inference = RankNet( ranknet_structure) # inference always use single precision ckptfile = get_ckptdir('ranknet', ranknet_structure) elif train_algo in [SUM_SESSION, ACC_GRADIENT]: net = RankNet(ranknet_structure, double_precision) net_inference = net ckptfile = get_ckptdir('ranknet-factorize', ranknet_structure) else: raise ValueError("train algo {} not implemented".format(train_algo)) if start_epoch != 0: load_from_ckpt(ckptfile, start_epoch, net) return net, net_inference, ckptfile
def train( start_epoch=0, additional_epoch=100, lr=0.0001, optim="adam", leaky_relu=False, ndcg_gain_in_train="exp2", sigma=1.0, double_precision=False, standardize=False, small_dataset=False, debug=False, output_dir="/tmp/ranking_output/", ): print("start_epoch:{}, additional_epoch:{}, lr:{}".format( start_epoch, additional_epoch, lr)) writer = SummaryWriter(output_dir) precision = torch.float64 if double_precision else torch.float32 # get training and validation data: data_fold = 'Fold1' train_loader, df_train, valid_loader, df_valid = load_train_vali_data( data_fold, small_dataset) if standardize: df_train, scaler = train_loader.train_scaler_and_transform() df_valid = valid_loader.apply_scaler(scaler) lambdarank_structure = [136, 64, 16] net = LambdaRank(lambdarank_structure, leaky_relu=leaky_relu, double_precision=double_precision, sigma=sigma) device = get_device('LambdaRank') net.to(device) net.apply(init_weights) print(net) ckptfile = get_ckptdir('lambdarank', lambdarank_structure, sigma) if optim == "adam": optimizer = torch.optim.Adam(net.parameters(), lr=lr) elif optim == "sgd": optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9) else: raise ValueError( "Optimization method {} not implemented".format(optim)) print(optimizer) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.75) ideal_dcg = NDCG(2**9, ndcg_gain_in_train) for i in range(start_epoch, start_epoch + additional_epoch): net.train() net.zero_grad() count = 0 batch_size = 200 grad_batch, y_pred_batch = [], [] for X, Y in train_loader.generate_batch_per_query(shuffle=True): if np.sum(Y) == 0: # negative session, cannot learn useful signal continue N = 1.0 / ideal_dcg.maxDCG(Y) X_tensor = torch.tensor(X, dtype=precision, device=device) y_pred = net(X_tensor) y_pred_batch.append(y_pred) # compute the rank order of each document rank_df = pd.DataFrame({"Y": Y, "doc": np.arange(Y.shape[0])}) rank_df = rank_df.sort_values("Y").reset_index(drop=True) rank_order = rank_df.sort_values("doc").index.values + 1 with torch.no_grad(): pos_pairs_score_diff = 1.0 + torch.exp(sigma * (y_pred - y_pred.t())) Y_tensor = torch.tensor(Y, dtype=precision, device=device).view(-1, 1) rel_diff = Y_tensor - Y_tensor.t() pos_pairs = (rel_diff > 0).type(precision) neg_pairs = (rel_diff < 0).type(precision) Sij = pos_pairs - neg_pairs if ndcg_gain_in_train == "exp2": gain_diff = torch.pow(2.0, Y_tensor) - torch.pow( 2.0, Y_tensor.t()) elif ndcg_gain_in_train == "identity": gain_diff = Y_tensor - Y_tensor.t() else: raise ValueError( "ndcg_gain method not supported yet {}".format( ndcg_gain_in_train)) rank_order_tensor = torch.tensor(rank_order, dtype=precision, device=device).view(-1, 1) decay_diff = 1.0 / torch.log2(rank_order_tensor + 1.0) - 1.0 / torch.log2( rank_order_tensor.t() + 1.0) delta_ndcg = torch.abs(N * gain_diff * decay_diff) lambda_update = sigma * (0.5 * (1 - Sij) - 1 / pos_pairs_score_diff) * delta_ndcg lambda_update = torch.sum(lambda_update, 1, keepdim=True) assert lambda_update.shape == y_pred.shape check_grad = torch.sum(lambda_update, (0, 1)).item() if check_grad == float('inf') or np.isnan(check_grad): import ipdb ipdb.set_trace() grad_batch.append(lambda_update) # optimization is to similar to RankNetListWise, but to maximize NDCG. # lambda_update scales with gain and decay count += 1 if count % batch_size == 0: for grad, y_pred in zip(grad_batch, y_pred_batch): y_pred.backward(grad / batch_size) if count % (4 * batch_size) == 0 and debug: net.dump_param() optimizer.step() net.zero_grad() grad_batch, y_pred_batch = [], [ ] # grad_batch, y_pred_batch used for gradient_acc # optimizer.step() print( get_time(), "training dataset at epoch {}, total queries: {}".format(i, count)) if debug: eval_cross_entropy_loss(net, device, train_loader, i, writer, phase="Train") # eval_ndcg_at_k(net, device, df_train, train_loader, 100000, [10, 30, 50]) if i % 5 == 0 and i != start_epoch: print(get_time(), "eval for epoch: {}".format(i)) eval_cross_entropy_loss(net, device, valid_loader, i, writer) eval_ndcg_at_k(net, device, df_valid, valid_loader, 100000, [10, 30], i, writer) if i % 10 == 0 and i != start_epoch: save_to_ckpt(ckptfile, i, net, optimizer, scheduler) scheduler.step() # save the last ckpt save_to_ckpt(ckptfile, start_epoch + additional_epoch, net, optimizer, scheduler) # save the final model torch.save(net.state_dict(), ckptfile) ndcg_result = eval_ndcg_at_k(net, device, df_valid, valid_loader, 100000, [10, 30], start_epoch + additional_epoch, writer) print( get_time(), "finish training " + ", ".join( ["NDCG@{}: {:.5f}".format(k, ndcg_result[k]) for k in ndcg_result]), '\n\n')
lambdarank_structure = [136, 64, 16] net = LambdaRank(lambdarank_structure, leaky_relu=leaky_relu, double_precision=double_precision, sigma=sigma) device = get_device() net.to(device) net.load_state_dict( torch.load( "D:\Data_Mining\pytorch-examples-master\\ranking\ckptdir\\ranknet-factorize-136-64-16" )) print(net) ckptfile = get_ckptdir('lambdarank', lambdarank_structure, sigma) net.eval() with torch.no_grad(): count = 0 batch_size = 200 grad_batch = [] rank_al_batch = np.array([]) y_batch = np.array([]) qid = 0 for X, Y in test_loader.generate_batch_per_query(): X_tensor = torch.tensor(X, dtype=precision, device=device) y_pred = net(X_tensor) print(len(y_pred))