Exemple #1
0
def run_validation(val_predicted_labels, tst_X_Y_val, tst_exact_remove,
                   tst_X_Y_trn, inv_prop):
    data = []
    indptr = [0]
    indices = []
    for i in range(val_predicted_labels.shape[0]):
        _indices1 = val_predicted_labels.indices[
            val_predicted_labels.indptr[i]:val_predicted_labels.indptr[i + 1]]
        _vals1 = val_predicted_labels.data[
            val_predicted_labels.indptr[i]:val_predicted_labels.indptr[i + 1]]

        _indices, _vals = [], []
        for _ind, _val in zip(_indices1, _vals1):
            if (_ind not in tst_exact_remove[i]) and (
                    _ind not in tst_X_Y_trn.
                    indices[tst_X_Y_trn.indptr[i]:tst_X_Y_trn.indptr[i + 1]]):
                _indices.append(_ind)
                _vals.append(_val)

        indices += list(_indices)
        data += list(_vals)
        indptr.append(len(indices))

    _pred = csr_matrix((data, indices, indptr),
                       shape=(val_predicted_labels.shape))

    print(tst_X_Y_val.shape, _pred.shape)
    acc = xc_metrics.Metrics(tst_X_Y_val, inv_psp=inv_prop)
    acc = acc.eval(_pred, 5)
    _recall = recall(tst_X_Y_val, _pred, 100)
    return (acc, _recall), _pred
Exemple #2
0
def compute_metrics(model, X_feat, Y_labels, wts, batch_size=50000):
    '''
    Compute P@k, nDCG@k and their propensity scored versions PSP@k and PSnDCG@k, respectively. k=1,...,5
    '''
    metrics_all = xc_metrics.Metrics(Y_labels, wts)

    if Y_labels.shape[0] > 20000 or Y_labels.shape[1] > 20000:
        count = 0
        dataset_tf = tf.data.Dataset.from_tensor_slices(X_feat).batch(
            batch_size)
        mtr_scores = lil_matrix((Y_labels.shape[0], Y_labels.shape[1]),
                                dtype=np.float64)
        start_time = time.time()
        for batch_X in dataset_tf:
            mtr_scores_tmp = model.predict_scores(batch_X).numpy()
            for n_test in range(batch_X.shape[0]):
                Prob_scores_n_test = mtr_scores_tmp[n_test]
                top5_indices = np.argpartition(Prob_scores_n_test, -5)[-5:]
                top5_indices.sort()

                mtr_scores[count + n_test,
                           top5_indices] = Prob_scores_n_test[top5_indices]

            count += batch_X.shape[0]

        pred_time = time.time() - start_time
        mtr_scores = mtr_scores.tocsr()
    else:
        start_time = time.time()
        mtr_scores = model.predict_scores(X_feat).numpy()
        pred_time = time.time() - start_time

    all_metrics = [100 * elem for elem in metrics_all.eval(mtr_scores)]
    return all_metrics[0], all_metrics[1], all_metrics[2], all_metrics[
        3], pred_time
Exemple #3
0
def evaluate_scores(gt: torch.Tensor, scores: torch.Tensor, model_cfg):
    num_ins, num_labels = gt.shape
    if gt.is_sparse:
        gt_np = gt.coalesce()
        gt_np = scipy.sparse.coo_matrix(
            (gt_np.values().cpu().numpy(), gt_np.indices().cpu().numpy()),
            shape=(num_ins, num_labels))
    else:
        gt_np = scipy.sparse.coo_matrix(gt.cpu().numpy(),
                                        shape=(num_ins, num_labels))
    if isinstance(scores, torch.Tensor):
        scores_np = scores.numpy()
    else:
        scores_np = scores

    inv_propen = xc_metrics.compute_inv_propesity(gt_np, model_cfg["ps_A"],
                                                  model_cfg["ps_B"])

    acc = xc_metrics.Metrics(true_labels=gt_np,
                             inv_psp=inv_propen,
                             remove_invalid=False)
    map_meter = meter.mAPMeter()
    # map meter requires tensor
    gt_dense = gt if not gt.is_sparse else gt.to_dense()
    map_meter.add(scores, gt_dense)

    prec, ndcg, PSprec, PSnDCG = acc.eval(scores_np, model_cfg["at_k"])
    d = {
        "prec": prec,
        "ndcg": ndcg,
        "psp": PSprec,
        "psn": PSnDCG,
        "mAP": [map_meter.value()]
    }
    return d
Exemple #4
0
 def _evaluate(self, true_labels, predicted_labels):
     """
         Evaluate predicted matrix
     """
     pmat = predicted_labels.tocsr()
     acc = xc.Metrics(true_labels)
     rec = xc.recall(pmat, true_labels, self.beam)
     _p, _n = acc.eval(predicted_labels.tocsr(), self.beam)
     return _p, _n, rec
Exemple #5
0
 def evaluate(self, train_labels, test_labels, predictionScore, ps_A, ps_B,
              eval_file_path):
     inv_propen = xc_metrics.compute_inv_propesity(train_labels, ps_A, ps_B)
     acc = xc_metrics.Metrics(true_labels=test_labels, inv_psp=inv_propen)
     args = acc.eval(predictionScore, 5)
     strs = ['P@K', 'nDCG@K', 'PSP@K', 'PSnDCG@K']
     msg = ''
     for i in range(4):
         msg += strs[i] + str(args[i][[0, 2, 4]]) + '\n'
     print(msg)
     if eval_file_path != None:
         with open(eval_file_path) as f:
             f.write(msg)
Exemple #6
0
def main(targets_file, train_file, predictions_file, A, B):
    """
        Args:
            targets_file: test labels
            train_file: train labels (to compute prop)
            prediction_file: predicted labels
            A: int: to compute propensity
            B: int: to compute propensity
    """
    true_labels = data_utils.read_sparse_file(targets_file)
    predicted_labels = data_utils.read_sparse_file(predictions_file)
    inv_psp = compute_inv_propensity(train_file, A, B)
    acc = xc_metrics.Metrics(true_labels=true_labels, inv_psp=inv_psp)
    args = acc.eval(predicted_labels, 5)
    print(xc_metrics.format(*args))
Exemple #7
0
def main(targets_label_file, train_label_file, predictions_file, A, B, docs, lbls):
    true_labels = _remove_overlap(
        data_utils.read_sparse_file(
            targets_label_file, force_header=True).tolil(),
        docs, lbls)
    trn_labels = data_utils.read_sparse_file(
        train_label_file, force_header=True)
    inv_propen = xc_metrics.compute_inv_propesity(trn_labels, A=A, B=B)
    acc = xc_metrics.Metrics(
        true_labels, inv_psp=inv_propen, remove_invalid=False)
    predicted_labels = _remove_overlap(
        load_npz(predictions_file+'.npz').tolil(),
        docs, lbls)
    rec = xc_metrics.recall(predicted_labels, true_labels, k=20)[-1]*100
    print("R@20=%0.2f" % (rec))
    args = acc.eval(predicted_labels, 5)
    print(xc_metrics.format(*args))
def main(tst_label_fname, trn_label_fname, filter_fname, pred_fname, A, B,
         betas, top_k, save):
    true_labels = data_utils.read_sparse_file(tst_label_fname)
    trn_labels = data_utils.read_sparse_file(trn_label_fname)
    inv_propen = xc_metrics.compute_inv_propesity(trn_labels, A, B)
    mapping = get_filter_map(filter_fname)
    acc = xc_metrics.Metrics(true_labels, inv_psp=inv_propen)
    root = os.path.dirname(pred_fname)
    ans = ""
    if isinstance(betas, list) and betas[0] != -1:
        knn = filter_predictions(load_npz(pred_fname + '_knn.npz'), mapping)
        clf = filter_predictions(load_npz(pred_fname + '_clf.npz'), mapping)
        args = acc.eval(clf, 5)
        ans = f"classifier\n{xc_metrics.format(*args)}"
        args = acc.eval(knn, 5)
        ans = ans + f"\nshortlist\n{xc_metrics.format(*args)}"
        clf = retain_topk(clf, k=top_k)
        knn = retain_topk(knn, k=top_k)
        clf = normalize(sigmoid(clf), norm='max')
        knn = normalize(sigmoid(knn), norm='max')
        for beta in betas:
            predicted_labels = beta * clf + (1 - beta) * knn
            args = acc.eval(predicted_labels, 5)
            ans = ans + f"\nbeta: {beta:.2f}\n{xc_metrics.format(*args)}"
            if save:
                fname = os.path.join(root, f"score_{beta:.2f}.npz")
                save_npz(fname,
                         retain_topk(predicted_labels, k=top_k),
                         compressed=False)
    else:
        predicted_labels = filter_predictions(
            sigmoid(load_npz(pred_fname + '.npz')), mapping)
        args = acc.eval(predicted_labels, 5)
        ans = xc_metrics.format(*args)
        if save:
            print("Saving predictions..")
            fname = os.path.join(root, "score.npz")
            save_npz(fname,
                     retain_topk(predicted_labels, k=top_k),
                     compressed=False)
    line = "-" * 30
    print(f"\n{line}\n{ans}\n{line}")
    return ans
def main(tst_label_fname, trn_label_fname, pred_fname,
         A, B, save, *args, **kwargs):
    true_labels = data_utils.read_sparse_file(tst_label_fname)
    trn_labels = data_utils.read_sparse_file(trn_label_fname)
    inv_propen = xc_metrics.compute_inv_propesity(trn_labels, A, B)
    acc = xc_metrics.Metrics(true_labels, inv_psp=inv_propen)
    root = os.path.dirname(pred_fname[-1])
    predicted_labels = read_files(pred_fname)
    ens_predicted_labels = merge(predicted_labels)
    ans = ""
    for idx, pred in enumerate(predicted_labels):
        args = acc.eval(pred, 5)
        ans = ans + f"learner: {idx}\n{xc_metrics.format(*args)}\n"
    args = acc.eval(ens_predicted_labels, 5)
    ans = ans + f"Ensemble\n{xc_metrics.format(*args)}"
    if save:
        print("Saving predictions..")
        fname = os.path.join(root, "score.npz")
        save_npz(fname, ens_predicted_labels, compressed=False)
    line = "-"*30
    print(f"\n{line}\n{ans}\n{line}")
    return ans
Exemple #10
0
 def _evaluate(self, true_labels, predicted_labels):
     acc = xc_metrics.Metrics(true_labels)
     acc = acc.eval(predicted_labels.tocsr(), 5)
     return acc