Ejemplo n.º 1
0
def test(
    start_epoch=0,
    additional_epoch=90,
    lr=0.0001,
    optim="adam",
    leaky_relu=False,
    ndcg_gain_in_train="exp2",
    sigma=1.0,
    double_precision=False,
    standardize=False,
    small_dataset=False,
    debug=False,
    output_dir="/tmp/ranking_output/",
):
    print("start_epoch:{}, additional_epoch:{}, lr:{}".format(
        start_epoch, additional_epoch, lr))
    writer = SummaryWriter(output_dir)

    precision = torch.float64 if double_precision else torch.float32

    # get training and validation data:
    data_fold = 'Fold1'
    valid_loader, df_valid, test_loader, df_test = load_train_vali_data(
        'Fold1', small_dataset=True)
    print(test_loader.num_features)
    if standardize:
        df_train, scaler = train_loader.train_scaler_and_transform()
        df_valid = valid_loader.apply_scaler(scaler)

    lambdarank_structure = [136, 64, 16]

    net = LambdaRank(lambdarank_structure,
                     leaky_relu=leaky_relu,
                     double_precision=double_precision,
                     sigma=sigma)
    device = get_device()
    net.to(device)
    net.load_state_dict(torch.load("ckptdir\lambdarank-136-64-16-scale-1.0"))
    print(net)

    ckptfile = get_ckptdir('lambdarank', lambdarank_structure, sigma)

    net.eval()
    with torch.no_grad():

        count = 0
        batch_size = 200
        grad_batch, y_pred_batch = [], []

        for X, Y in test_loader.generate_batch_per_query():
            X_tensor = torch.tensor(X, dtype=precision, device=device)
            y_pred = net(X_tensor)
            y_pred_batch.append(y_pred)
            # compute the rank order of each document
            rank_df = pd.DataFrame({"Y": y_pred, "doc": np.arange(Y.shape[0])})
            rank_df = rank_df.sort_values("Y").reset_index(drop=True)
            rank_order = rank_df.sort_values("doc").index.values + 1
Ejemplo n.º 2
0
def train(
    start_epoch=0,
    additional_epoch=100,
    lr=0.0001,
    optim="adam",
    leaky_relu=False,
    ndcg_gain_in_train="exp2",
    sigma=1.0,
    double_precision=False,
    standardize=False,
    small_dataset=False,
    debug=False,
    output_dir="/tmp/ranking_output/",
):
    print("start_epoch:{}, additional_epoch:{}, lr:{}".format(
        start_epoch, additional_epoch, lr))
    writer = SummaryWriter(output_dir)

    precision = torch.float64 if double_precision else torch.float32

    # get training and validation data:
    data_fold = 'Fold1'
    train_loader, df_train, valid_loader, df_valid = load_train_vali_data(
        data_fold, small_dataset)
    if standardize:
        df_train, scaler = train_loader.train_scaler_and_transform()
        df_valid = valid_loader.apply_scaler(scaler)

    lambdarank_structure = [136, 64, 16]

    net = LambdaRank(lambdarank_structure,
                     leaky_relu=leaky_relu,
                     double_precision=double_precision,
                     sigma=sigma)
    device = get_device('LambdaRank')
    net.to(device)
    net.apply(init_weights)
    print(net)

    ckptfile = get_ckptdir('lambdarank', lambdarank_structure, sigma)

    if optim == "adam":
        optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    elif optim == "sgd":
        optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9)
    else:
        raise ValueError(
            "Optimization method {} not implemented".format(optim))
    print(optimizer)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=10,
                                                gamma=0.75)

    ideal_dcg = NDCG(2**9, ndcg_gain_in_train)

    for i in range(start_epoch, start_epoch + additional_epoch):
        net.train()
        net.zero_grad()

        count = 0
        batch_size = 200
        grad_batch, y_pred_batch = [], []

        for X, Y in train_loader.generate_batch_per_query(shuffle=True):
            if np.sum(Y) == 0:
                # negative session, cannot learn useful signal
                continue
            N = 1.0 / ideal_dcg.maxDCG(Y)

            X_tensor = torch.tensor(X, dtype=precision, device=device)
            y_pred = net(X_tensor)
            y_pred_batch.append(y_pred)
            # compute the rank order of each document
            rank_df = pd.DataFrame({"Y": Y, "doc": np.arange(Y.shape[0])})
            rank_df = rank_df.sort_values("Y").reset_index(drop=True)
            rank_order = rank_df.sort_values("doc").index.values + 1

            with torch.no_grad():
                pos_pairs_score_diff = 1.0 + torch.exp(sigma *
                                                       (y_pred - y_pred.t()))

                Y_tensor = torch.tensor(Y, dtype=precision,
                                        device=device).view(-1, 1)
                rel_diff = Y_tensor - Y_tensor.t()
                pos_pairs = (rel_diff > 0).type(precision)
                neg_pairs = (rel_diff < 0).type(precision)
                Sij = pos_pairs - neg_pairs
                if ndcg_gain_in_train == "exp2":
                    gain_diff = torch.pow(2.0, Y_tensor) - torch.pow(
                        2.0, Y_tensor.t())
                elif ndcg_gain_in_train == "identity":
                    gain_diff = Y_tensor - Y_tensor.t()
                else:
                    raise ValueError(
                        "ndcg_gain method not supported yet {}".format(
                            ndcg_gain_in_train))

                rank_order_tensor = torch.tensor(rank_order,
                                                 dtype=precision,
                                                 device=device).view(-1, 1)
                decay_diff = 1.0 / torch.log2(rank_order_tensor +
                                              1.0) - 1.0 / torch.log2(
                                                  rank_order_tensor.t() + 1.0)

                delta_ndcg = torch.abs(N * gain_diff * decay_diff)
                lambda_update = sigma * (0.5 * (1 - Sij) -
                                         1 / pos_pairs_score_diff) * delta_ndcg
                lambda_update = torch.sum(lambda_update, 1, keepdim=True)

                assert lambda_update.shape == y_pred.shape
                check_grad = torch.sum(lambda_update, (0, 1)).item()
                if check_grad == float('inf') or np.isnan(check_grad):
                    import ipdb
                    ipdb.set_trace()
                grad_batch.append(lambda_update)

            # optimization is to similar to RankNetListWise, but to maximize NDCG.
            # lambda_update scales with gain and decay

            count += 1
            if count % batch_size == 0:
                for grad, y_pred in zip(grad_batch, y_pred_batch):
                    y_pred.backward(grad / batch_size)

                if count % (4 * batch_size) == 0 and debug:
                    net.dump_param()

                optimizer.step()
                net.zero_grad()
                grad_batch, y_pred_batch = [], [
                ]  # grad_batch, y_pred_batch used for gradient_acc

        # optimizer.step()
        print(
            get_time(),
            "training dataset at epoch {}, total queries: {}".format(i, count))
        if debug:
            eval_cross_entropy_loss(net,
                                    device,
                                    train_loader,
                                    i,
                                    writer,
                                    phase="Train")
        # eval_ndcg_at_k(net, device, df_train, train_loader, 100000, [10, 30, 50])

        if i % 5 == 0 and i != start_epoch:
            print(get_time(), "eval for epoch: {}".format(i))
            eval_cross_entropy_loss(net, device, valid_loader, i, writer)
            eval_ndcg_at_k(net, device, df_valid, valid_loader, 100000,
                           [10, 30], i, writer)
        if i % 10 == 0 and i != start_epoch:
            save_to_ckpt(ckptfile, i, net, optimizer, scheduler)

        scheduler.step()

    # save the last ckpt
    save_to_ckpt(ckptfile, start_epoch + additional_epoch, net, optimizer,
                 scheduler)

    # save the final model
    torch.save(net.state_dict(), ckptfile)
    ndcg_result = eval_ndcg_at_k(net, device, df_valid, valid_loader, 100000,
                                 [10, 30], start_epoch + additional_epoch,
                                 writer)
    print(
        get_time(), "finish training " + ", ".join(
            ["NDCG@{}: {:.5f}".format(k, ndcg_result[k])
             for k in ndcg_result]), '\n\n')
Ejemplo n.º 3
0
def train_rank_net(
    start_epoch=0,
    additional_epoch=100,
    lr=0.0001,
    optim="adam",
    train_algo=SUM_SESSION,
    double_precision=False,
    standardize=False,
    small_dataset=False,
    debug=False,
    output_dir="/tmp/ranking_output/",
):
    """

    :param start_epoch: int
    :param additional_epoch: int
    :param lr: float
    :param optim: str
    :param train_algo: str
    :param double_precision: boolean
    :param standardize: boolean
    :param small_dataset: boolean
    :param debug: boolean
    :return:
    """
    print("start_epoch:{}, additional_epoch:{}, lr:{}".format(
        start_epoch, additional_epoch, lr))
    writer = SummaryWriter(output_dir)

    precision = torch.float64 if double_precision else torch.float32

    # get training and validation data:
    data_fold = 'Fold1'
    train_loader, df_train, valid_loader, df_valid = load_train_vali_data(
        data_fold, small_dataset)
    if standardize:
        df_train, scaler = train_loader.train_scaler_and_transform()
        df_valid = valid_loader.apply_scaler(scaler)

    net, net_inference, ckptfile = get_train_inference_net(
        train_algo, train_loader.num_features, start_epoch, double_precision)
    device = get_device()
    net.to(device)
    net_inference.to(device)

    # initialize to make training faster
    net.apply(init_weights)

    if optim == "adam":
        optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    elif optim == "sgd":
        optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9)
    else:
        raise ValueError(
            "Optimization method {} not implemented".format(optim))
    print(optimizer)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=10,
                                                gamma=0.75)

    loss_func = None
    if train_algo == BASELINE:
        loss_func = torch.nn.BCELoss()
        loss_func.to(device)

    losses = []

    for i in range(start_epoch, start_epoch + additional_epoch):

        scheduler.step()
        net.zero_grad()
        net.train()

        if train_algo == BASELINE:
            epoch_loss = baseline_pairwise_training_loop(i,
                                                         net,
                                                         loss_func,
                                                         optimizer,
                                                         train_loader,
                                                         precision=precision,
                                                         device=device,
                                                         debug=debug)
        elif train_algo in [SUM_SESSION, ACC_GRADIENT]:
            epoch_loss = factorized_training_loop(i,
                                                  net,
                                                  None,
                                                  optimizer,
                                                  train_loader,
                                                  training_algo=train_algo,
                                                  precision=precision,
                                                  device=device,
                                                  debug=debug)

        losses.append(epoch_loss)
        print('=' * 20 + '\n', get_time(),
              'Epoch{}, loss : {}'.format(i, losses[-1]), '\n' + '=' * 20)

        # save to checkpoint every 5 step, and run eval
        if i % 5 == 0 and i != start_epoch:
            save_to_ckpt(ckptfile, i, net, optimizer, scheduler)
            net_inference.load_state_dict(net.state_dict())
            eval_model(net_inference, device, df_valid, valid_loader, i,
                       writer)

    # save the last ckpt
    save_to_ckpt(ckptfile, start_epoch + additional_epoch, net, optimizer,
                 scheduler)

    # final evaluation
    net_inference.load_state_dict(net.state_dict())
    ndcg_result = eval_model(net_inference, device, df_valid, valid_loader,
                             start_epoch + additional_epoch, writer)

    # save the final model
    torch.save(net.state_dict(), ckptfile)
    print(
        get_time(), "finish training " + ", ".join(
            ["NDCG@{}: {:.5f}".format(k, ndcg_result[k])
             for k in ndcg_result]), '\n\n')
Ejemplo n.º 4
0
                           sep=' ',
                           names=[
                               'qid', 'did1', 'did2', 'did3', 'did4', 'did5',
                               'did6', 'did7', 'did8', 'did9', 'did10'
                           ])

    leaky_relu = False
    ndcg_gain_in_train = "exp2"
    sigma = 1.0
    #writer = SummaryWriter(output_dir)
    double_precision = False
    precision = torch.float64 if double_precision else torch.float32

    # get training and validation data:
    data_fold = 'Fold1'
    valid_loader, df_valid, test_loader, df_test = load_train_vali_data(
        'Fold1', small_dataset=True)
    print(test_loader.num_features)
    qid_list = df_test.loc[:, 'qid'].values

    breakpoint

    lambdarank_structure = [136, 64, 16]

    net = LambdaRank(lambdarank_structure,
                     leaky_relu=leaky_relu,
                     double_precision=double_precision,
                     sigma=sigma)
    device = get_device()
    net.to(device)
    net.load_state_dict(
        torch.load(