Beispiel #1
0
def evaluate_scores(gt: torch.Tensor, scores: torch.Tensor, model_cfg):
    num_ins, num_labels = gt.shape
    if gt.is_sparse:
        gt_np = gt.coalesce()
        gt_np = scipy.sparse.coo_matrix(
            (gt_np.values().cpu().numpy(), gt_np.indices().cpu().numpy()),
            shape=(num_ins, num_labels))
    else:
        gt_np = scipy.sparse.coo_matrix(gt.cpu().numpy(),
                                        shape=(num_ins, num_labels))
    if isinstance(scores, torch.Tensor):
        scores_np = scores.numpy()
    else:
        scores_np = scores

    inv_propen = xc_metrics.compute_inv_propesity(gt_np, model_cfg["ps_A"],
                                                  model_cfg["ps_B"])

    acc = xc_metrics.Metrics(true_labels=gt_np,
                             inv_psp=inv_propen,
                             remove_invalid=False)
    map_meter = meter.mAPMeter()
    # map meter requires tensor
    gt_dense = gt if not gt.is_sparse else gt.to_dense()
    map_meter.add(scores, gt_dense)

    prec, ndcg, PSprec, PSnDCG = acc.eval(scores_np, model_cfg["at_k"])
    d = {
        "prec": prec,
        "ndcg": ndcg,
        "psp": PSprec,
        "psn": PSnDCG,
        "mAP": [map_meter.value()]
    }
    return d
Beispiel #2
0
def compute_inv_propensity(train_file, A, B):
    """
        Compute Inverse propensity values
        Values for A/B:
            Wikpedia-500K: 0.5/0.4
            Amazon-670K, Amazon-3M: 0.6/2.6
            Others: 0.55/1.5
    """
    train_labels = data_utils.read_sparse_file(train_file)
    inv_propen = xc_metrics.compute_inv_propesity(train_labels, A, B)
    return inv_propen
Beispiel #3
0
 def evaluate(self, train_labels, test_labels, predictionScore, ps_A, ps_B,
              eval_file_path):
     inv_propen = xc_metrics.compute_inv_propesity(train_labels, ps_A, ps_B)
     acc = xc_metrics.Metrics(true_labels=test_labels, inv_psp=inv_propen)
     args = acc.eval(predictionScore, 5)
     strs = ['P@K', 'nDCG@K', 'PSP@K', 'PSnDCG@K']
     msg = ''
     for i in range(4):
         msg += strs[i] + str(args[i][[0, 2, 4]]) + '\n'
     print(msg)
     if eval_file_path != None:
         with open(eval_file_path) as f:
             f.write(msg)
Beispiel #4
0
def main(targets_label_file, train_label_file, predictions_file, A, B, docs, lbls):
    true_labels = _remove_overlap(
        data_utils.read_sparse_file(
            targets_label_file, force_header=True).tolil(),
        docs, lbls)
    trn_labels = data_utils.read_sparse_file(
        train_label_file, force_header=True)
    inv_propen = xc_metrics.compute_inv_propesity(trn_labels, A=A, B=B)
    acc = xc_metrics.Metrics(
        true_labels, inv_psp=inv_propen, remove_invalid=False)
    predicted_labels = _remove_overlap(
        load_npz(predictions_file+'.npz').tolil(),
        docs, lbls)
    rec = xc_metrics.recall(predicted_labels, true_labels, k=20)[-1]*100
    print("R@20=%0.2f" % (rec))
    args = acc.eval(predicted_labels, 5)
    print(xc_metrics.format(*args))
def main(tst_label_fname, trn_label_fname, filter_fname, pred_fname, A, B,
         betas, top_k, save):
    true_labels = data_utils.read_sparse_file(tst_label_fname)
    trn_labels = data_utils.read_sparse_file(trn_label_fname)
    inv_propen = xc_metrics.compute_inv_propesity(trn_labels, A, B)
    mapping = get_filter_map(filter_fname)
    acc = xc_metrics.Metrics(true_labels, inv_psp=inv_propen)
    root = os.path.dirname(pred_fname)
    ans = ""
    if isinstance(betas, list) and betas[0] != -1:
        knn = filter_predictions(load_npz(pred_fname + '_knn.npz'), mapping)
        clf = filter_predictions(load_npz(pred_fname + '_clf.npz'), mapping)
        args = acc.eval(clf, 5)
        ans = f"classifier\n{xc_metrics.format(*args)}"
        args = acc.eval(knn, 5)
        ans = ans + f"\nshortlist\n{xc_metrics.format(*args)}"
        clf = retain_topk(clf, k=top_k)
        knn = retain_topk(knn, k=top_k)
        clf = normalize(sigmoid(clf), norm='max')
        knn = normalize(sigmoid(knn), norm='max')
        for beta in betas:
            predicted_labels = beta * clf + (1 - beta) * knn
            args = acc.eval(predicted_labels, 5)
            ans = ans + f"\nbeta: {beta:.2f}\n{xc_metrics.format(*args)}"
            if save:
                fname = os.path.join(root, f"score_{beta:.2f}.npz")
                save_npz(fname,
                         retain_topk(predicted_labels, k=top_k),
                         compressed=False)
    else:
        predicted_labels = filter_predictions(
            sigmoid(load_npz(pred_fname + '.npz')), mapping)
        args = acc.eval(predicted_labels, 5)
        ans = xc_metrics.format(*args)
        if save:
            print("Saving predictions..")
            fname = os.path.join(root, "score.npz")
            save_npz(fname,
                     retain_topk(predicted_labels, k=top_k),
                     compressed=False)
    line = "-" * 30
    print(f"\n{line}\n{ans}\n{line}")
    return ans
def main(tst_label_fname, trn_label_fname, pred_fname,
         A, B, save, *args, **kwargs):
    true_labels = data_utils.read_sparse_file(tst_label_fname)
    trn_labels = data_utils.read_sparse_file(trn_label_fname)
    inv_propen = xc_metrics.compute_inv_propesity(trn_labels, A, B)
    acc = xc_metrics.Metrics(true_labels, inv_psp=inv_propen)
    root = os.path.dirname(pred_fname[-1])
    predicted_labels = read_files(pred_fname)
    ens_predicted_labels = merge(predicted_labels)
    ans = ""
    for idx, pred in enumerate(predicted_labels):
        args = acc.eval(pred, 5)
        ans = ans + f"learner: {idx}\n{xc_metrics.format(*args)}\n"
    args = acc.eval(ens_predicted_labels, 5)
    ans = ans + f"Ensemble\n{xc_metrics.format(*args)}"
    if save:
        print("Saving predictions..")
        fname = os.path.join(root, "score.npz")
        save_npz(fname, ens_predicted_labels, compressed=False)
    line = "-"*30
    print(f"\n{line}\n{ans}\n{line}")
    return ans
Beispiel #7
0
def create_params_dict(args, node_features, trn_X_Y,
                       graph, NUM_PARTITIONS, NUM_TRN_POINTS):
    DIM = node_features.shape[1]
    params = dict(hidden_dims=DIM,
                  feature_dim=DIM,
                  embed_dims=DIM,
                  lr=args.lr,
                  attention_lr=args.attention_lr
                  )
    params["batch_size"] = args.batch_size
    params["reduction"] = "mean"
    params["batch_div"] = False
    params["num_epochs"] = args.num_epochs
    params["num_HN_epochs"] = args.num_HN_epochs
    params["dlr_factor"] = args.dlr_factor
    params["adjust_lr_epochs"] = set(
        [int(x) for x in args.adjust_lr.strip().split(",")])
    params["num_random_samples"] = args.num_random_samples
    params["devices"] = [x.strip()
                         for x in args.devices.strip().split(",") if len(x.strip()) != 0]

    params["fanouts"] = [int(x.strip()) for x in args.fanouts.strip().split(
        ",") if len(x.strip()) != 0]
    params["num_partitions"] = NUM_PARTITIONS
    params["num_labels"] = trn_X_Y.shape[1]
    params["graph"] = graph
    params["num_trn"] = NUM_TRN_POINTS
    params["inv_prop"] = xc_metrics.compute_inv_propesity(
        trn_X_Y, args.A, args.B)
    params["num_shortlist"] = args.num_shortlist
    params["num_HN_shortlist"] = args.num_HN_shortlist
    params["restrict_edges_num"] = args.restrict_edges_num
    params["restrict_edges_head_threshold"] = args.restrict_edges_head_threshold
    params["random_shuffle_nbrs"] = args.random_shuffle_nbrs

    return params
Beispiel #8
0
    hc = GraphCollator(head_net,
                       params["num_labels"],
                       params["num_random_samples"],
                       num_hard_neg=0)
    print('Collator created')

    head_train_loader = torch.utils.data.DataLoader(
        head_train_dataset,
        batch_size=params["batch_size"],
        num_workers=10,
        collate_fn=hc,
        shuffle=True,
        pin_memory=False)

    inv_prop = xc_metrics.compute_inv_propesity(trn_X_Y, args.A, args.B)

    head_net.move_to_devices()

    if (args.mpt == 1):
        scaler = torch.cuda.amp.GradScaler()

    train()

    # should be kept as how many we want to test on
    params["num_tst"] = tst_X_Y_val.shape[0]

    if (args.save_model == 1):
        model_dir = "{}/GraphXMLModel{}".format(DATASET, RUN_TYPE)
        if not os.path.exists(model_dir):
            print("Making model dir...")
    # load dataset
    test_file = os.path.join(data_dir, prefix + "_test.txt")
    label_path = os.path.join(record_dir, "_".join(
        [prefix, str(num_labels), str(b),
         str(R)]))  # Bibtex_159_100_32

    pred_avg_meter = AverageMeter()
    logging.info("Evaluating mAP only config %s" % (a.model))
    logging.info("Dataset config %s" % (a.dataset))
    if a.cost:
        logging.info("Evaluating cost-sensitive method: %s" % (a.cost))

    # get inverse propensity

    _, labels, _, _, _ = data_utils.read_data(test_file)
    inv_propen = xc_metrics.compute_inv_propesity(labels, model_cfg["ps_A"],
                                                  model_cfg["ps_B"])
    ap_meter = meter.APMeter()

    a.__dict__['rep'] = 0
    single_model_dir = get_model_dir(data_cfg, model_cfg, a)
    gt_filename = os.path.join(single_model_dir, "gt.npz")
    gt = scipy.sparse.load_npz(gt_filename).tocsc()
    # get label mappings
    l_maps = []
    for r in range(R):
        counts, label_mapping, inv_mapping = get_label_hash(label_path, r)
        l_maps.append(label_mapping)
    l_maps = np.stack(l_maps, axis=0)  # R x #labels
    lfu = cachetools.LRUCache(R * a.bs * a.cs)

    start = 0
Beispiel #10
0
from xclib.data import data_utils
import xclib.evaluation.xc_metrics as xc_metrics
import numpy as np

dataset = 'eurlex'
# Read file with features and labels
features, labels, num_samples, num_features, num_labels = data_utils.read_data(
    'data/' + dataset + '/' + 'train.txt')

A, B = 0.55, 1.5
inv_propen = xc_metrics.compute_inv_propesity(labels, A, B)
np.savetxt('inv_prop.txt', inv_propen)

data_utils.write_sparse_file(features, "trn_X_Xf.txt")
data_utils.write_sparse_file(labels, "trn_X_Y.txt")

features, labels, num_samples, num_features, num_labels = data_utils.read_data(
    'data/' + dataset + '/' + 'test.txt')
data_utils.write_sparse_file(features, "tst_X_Xf.txt")
data_utils.write_sparse_file(labels, "tst_X_Y.txt")