Beispiel #1
0
def eval_verification(descr, split):
    print('>> Evaluating %s task' % green('verification'))

    start = time.time()
    pos = pd.read_csv(
        os.path.join(tskdir,
                     'verif_pos_split-' + split['name'] + '.csv')).as_matrix()
    neg_intra = pd.read_csv(
        os.path.join(tskdir, 'verif_neg_intra_split-' + split['name'] +
                     '.csv')).as_matrix()
    neg_inter = pd.read_csv(
        os.path.join(tskdir, 'verif_neg_inter_split-' + split['name'] +
                     '.csv')).as_matrix()

    d_pos = get_verif_dists(descr, pos, 1)
    d_neg_intra = get_verif_dists(descr, neg_intra, 2)
    d_neg_inter = get_verif_dists(descr, neg_inter, 3)

    results = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))

    for t in tp:
        l = np.vstack((np.zeros_like(d_pos[t]), np.ones_like(d_pos[t])))
        d_intra = np.vstack((d_neg_intra[t], d_pos[t]))
        d_inter = np.vstack((d_neg_inter[t], d_pos[t]))

        # get results for the balanced protocol: 1M Positives - 1M Negatives
        fpr, tpr, auc = metrics.roc(-d_intra, l)
        results[t]['intra']['balanced']['fpr'] = fpr
        results[t]['intra']['balanced']['tpr'] = tpr
        results[t]['intra']['balanced']['auc'] = auc

        fpr, tpr, auc = metrics.roc(-d_inter, l)
        results[t]['inter']['balanced']['fpr'] = fpr
        results[t]['inter']['balanced']['tpr'] = tpr
        results[t]['inter']['balanced']['auc'] = auc

        # get results for the imbalanced protocol: 0.2M Positives - 1M Negatives
        N_imb = d_pos[t].shape[0] + int(d_pos[t].shape[0] * 0.2)  # 1M + 0.2*1M
        pr, rc, ap = metrics.pr(-d_intra[0:N_imb], l[0:N_imb])
        results[t]['intra']['imbalanced']['pr'] = pr
        results[t]['intra']['imbalanced']['rc'] = rc
        results[t]['intra']['imbalanced']['ap'] = ap

        pr, rc, ap = metrics.pr(-d_inter[0:N_imb], l[0:N_imb])
        results[t]['inter']['imbalanced']['pr'] = pr
        results[t]['inter']['imbalanced']['rc'] = rc
        results[t]['inter']['imbalanced']['ap'] = ap
    end = time.time()
    print(">> %s task finished in %.0f secs  " %
          (green('Verification'), end - start))
    return results
Beispiel #2
0
def eval_matching(descr, split):
    print('>> Evaluating %s task' % green('matching'))
    start = time.time()

    results = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
    pbar = tqdm(split['test'])
    for seq in pbar:
        d_ref = getattr(descr[seq], 'ref')
        gt_l = np.arange(d_ref.shape[0])
        for t in tp:
            for i in range(1, 6):
                d = getattr(descr[seq], t + str(i))
                D = dist_matrix(d_ref, d, descr['distance'])
                idx = np.argmin(D, axis=1)
                m_l = np.equal(idx, gt_l)
                results[seq][t][i]['sr'] = np.count_nonzero(m_l) / float(
                    m_l.shape[0])
                m_d = D[gt_l, idx]
                pr, rc, ap = metrics.pr(-m_d, m_l, numpos=m_l.shape[0])
                results[seq][t][i]['ap'] = ap
                results[seq][t][i]['pr'] = pr
                results[seq][t][i]['rc'] = rc
                # print(t,i,ap,results[seq][t][i]['sr'])
    end = time.time()
    print(">> %s task finished in %.0f secs  " %
          (green('Matching'), end - start))
    return results
Beispiel #3
0
def eval_retrieval(descr, split):  #WIP
    print('>> Evaluating %s task' % green('retrieval'))
    start = time.time()

    q = pd.read_csv(
        os.path.join(tskdir, 'retr_queries_split-' + split['name'] +
                     '.csv')).as_matrix()
    d = pd.read_csv(
        os.path.join(tskdir, 'retr_distractors_split-' + split['name'] +
                     '.csv')).as_matrix()

    # q_std = np.std(q, axis=0)
    # d_std = np.std(d, axis=0)
    # print(q.shape)
    # print(q_std.shape)

    desc_q = descr_from_idx(descr, q).astype(np.float32)
    desc_d = descr_from_idx(descr, d).astype(np.float32)

    # distactor masking per sequence
    m = dict((seq, d[:, 0] != seq) for seq in split['test'])

    print(">> Please wait, computing distance matrix...")
    D = dist_matrix(desc_q, desc_d, descr['distance'])
    print(">> Distance matrix done.")

    results = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
    N_distractors = desc_d.shape[0]
    # at_ranks = [int(x*N_distractors) for x in [0.25,0.5,0.75,1]]
    at_ranks = [100, 500, 1000, 5000, 10000, 15000, 20000]

    pbar = tqdm(range(desc_q.shape[0]))
    pbar.set_description("Processing retrieval task")
    for i in pbar:
        query_descr = desc_q[i]
        for t in tp:
            D_intra = get_query_intra_dists(descr, desc_q[i], q[i], t)
            D_ = D[i, :]
            D_ = D_[m[q[i][0]]]
            gt = np.zeros_like(D_)

            D_ = np.hstack((D_intra, D_))

            # D_[0:5] = D_intra
            gt = np.hstack((np.array([1, 1, 1, 1, 1]), gt))
            # gt[0:5] = 1
            for k in at_ranks:
                pr, rc, ap = metrics.pr(-D_[0:k], gt[0:k])
                # print (pr.shape,rc.shape)
                # print ap
                results[i][t][k]['ap'] = ap
                # perm = np.argsort(D_[0:k], kind='mergesort',axis=0)
                # gt_perm = gt[perm]
                # mi_rank = np.mean(np.where(gt_perm))
                # results[i][t][k]['mi_rank'] = mi_rank
    end = time.time()
    print(">> %s task finished in %.0f secs  " %
          (green('Retrieval'), end - start))
    return results