def eval_verification(descr, split): print('>> Evaluating %s task' % green('verification')) start = time.time() pos = pd.read_csv( os.path.join(tskdir, 'verif_pos_split-' + split['name'] + '.csv')).as_matrix() neg_intra = pd.read_csv( os.path.join(tskdir, 'verif_neg_intra_split-' + split['name'] + '.csv')).as_matrix() neg_inter = pd.read_csv( os.path.join(tskdir, 'verif_neg_inter_split-' + split['name'] + '.csv')).as_matrix() d_pos = get_verif_dists(descr, pos, 1) d_neg_intra = get_verif_dists(descr, neg_intra, 2) d_neg_inter = get_verif_dists(descr, neg_inter, 3) results = defaultdict(lambda: defaultdict(lambda: defaultdict(dict))) for t in tp: l = np.vstack((np.zeros_like(d_pos[t]), np.ones_like(d_pos[t]))) d_intra = np.vstack((d_neg_intra[t], d_pos[t])) d_inter = np.vstack((d_neg_inter[t], d_pos[t])) # get results for the balanced protocol: 1M Positives - 1M Negatives fpr, tpr, auc = metrics.roc(-d_intra, l) results[t]['intra']['balanced']['fpr'] = fpr results[t]['intra']['balanced']['tpr'] = tpr results[t]['intra']['balanced']['auc'] = auc fpr, tpr, auc = metrics.roc(-d_inter, l) results[t]['inter']['balanced']['fpr'] = fpr results[t]['inter']['balanced']['tpr'] = tpr results[t]['inter']['balanced']['auc'] = auc # get results for the imbalanced protocol: 0.2M Positives - 1M Negatives N_imb = d_pos[t].shape[0] + int(d_pos[t].shape[0] * 0.2) # 1M + 0.2*1M pr, rc, ap = metrics.pr(-d_intra[0:N_imb], l[0:N_imb]) results[t]['intra']['imbalanced']['pr'] = pr results[t]['intra']['imbalanced']['rc'] = rc results[t]['intra']['imbalanced']['ap'] = ap pr, rc, ap = metrics.pr(-d_inter[0:N_imb], l[0:N_imb]) results[t]['inter']['imbalanced']['pr'] = pr results[t]['inter']['imbalanced']['rc'] = rc results[t]['inter']['imbalanced']['ap'] = ap end = time.time() print(">> %s task finished in %.0f secs " % (green('Verification'), end - start)) return results
def eval_matching(descr, split): print('>> Evaluating %s task' % green('matching')) start = time.time() results = defaultdict(lambda: defaultdict(lambda: defaultdict(dict))) pbar = tqdm(split['test']) for seq in pbar: d_ref = getattr(descr[seq], 'ref') gt_l = np.arange(d_ref.shape[0]) for t in tp: for i in range(1, 6): d = getattr(descr[seq], t + str(i)) D = dist_matrix(d_ref, d, descr['distance']) idx = np.argmin(D, axis=1) m_l = np.equal(idx, gt_l) results[seq][t][i]['sr'] = np.count_nonzero(m_l) / float( m_l.shape[0]) m_d = D[gt_l, idx] pr, rc, ap = metrics.pr(-m_d, m_l, numpos=m_l.shape[0]) results[seq][t][i]['ap'] = ap results[seq][t][i]['pr'] = pr results[seq][t][i]['rc'] = rc # print(t,i,ap,results[seq][t][i]['sr']) end = time.time() print(">> %s task finished in %.0f secs " % (green('Matching'), end - start)) return results
def eval_retrieval(descr, split): #WIP print('>> Evaluating %s task' % green('retrieval')) start = time.time() q = pd.read_csv( os.path.join(tskdir, 'retr_queries_split-' + split['name'] + '.csv')).as_matrix() d = pd.read_csv( os.path.join(tskdir, 'retr_distractors_split-' + split['name'] + '.csv')).as_matrix() # q_std = np.std(q, axis=0) # d_std = np.std(d, axis=0) # print(q.shape) # print(q_std.shape) desc_q = descr_from_idx(descr, q).astype(np.float32) desc_d = descr_from_idx(descr, d).astype(np.float32) # distactor masking per sequence m = dict((seq, d[:, 0] != seq) for seq in split['test']) print(">> Please wait, computing distance matrix...") D = dist_matrix(desc_q, desc_d, descr['distance']) print(">> Distance matrix done.") results = defaultdict(lambda: defaultdict(lambda: defaultdict(dict))) N_distractors = desc_d.shape[0] # at_ranks = [int(x*N_distractors) for x in [0.25,0.5,0.75,1]] at_ranks = [100, 500, 1000, 5000, 10000, 15000, 20000] pbar = tqdm(range(desc_q.shape[0])) pbar.set_description("Processing retrieval task") for i in pbar: query_descr = desc_q[i] for t in tp: D_intra = get_query_intra_dists(descr, desc_q[i], q[i], t) D_ = D[i, :] D_ = D_[m[q[i][0]]] gt = np.zeros_like(D_) D_ = np.hstack((D_intra, D_)) # D_[0:5] = D_intra gt = np.hstack((np.array([1, 1, 1, 1, 1]), gt)) # gt[0:5] = 1 for k in at_ranks: pr, rc, ap = metrics.pr(-D_[0:k], gt[0:k]) # print (pr.shape,rc.shape) # print ap results[i][t][k]['ap'] = ap # perm = np.argsort(D_[0:k], kind='mergesort',axis=0) # gt_perm = gt[perm] # mi_rank = np.mean(np.where(gt_perm)) # results[i][t][k]['mi_rank'] = mi_rank end = time.time() print(">> %s task finished in %.0f secs " % (green('Retrieval'), end - start)) return results