def benchmark_ap_noref(vocab, gt_label, mil_prob, eval_file=None): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype=np.float) R = np.zeros(mil_prob.shape, dtype=np.float) score = np.zeros(mil_prob.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) human_prec = np.zeros((1, n_words), dtype=np.float) human_rec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec = np.zeros((1, n_words), dtype=np.float) rec_at_human_prec = np.zeros((1, n_words), dtype=np.float) rec_at_half_prec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr_noref( gt_label[:, i], mil_prob[:, i]) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec} if eval_file is not None: eval_file_details_keys = details.keys() eval_file_details_vals = [details[x] for x in eval_file_details_keys] utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True) return details
def benchmark_ap_noref(vocab, gt_label, mil_prob, eval_file = None): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr_noref(gt_label[:,i], mil_prob[:,i]) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; if eval_file is not None: eval_file_details_keys = details.keys(); eval_file_details_vals = [details[x] for x in eval_file_details_keys]; utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True); return details;
def test_model(imdb, model, detection_file=None): """ Tests model and stores detections on disk """ N_WORDS = len(model['vocab']['words']) sc = np.zeros((imdb.num_images, N_WORDS), dtype=np.float) mil_prob = np.zeros((imdb.num_images, N_WORDS), dtype=np.float) for i in xrange(len(imdb.image_index)): im = cv2.imread(imdb.image_path_at(i)) if im is None: print 'could not read %s; skipping' % (imdb.image_path_at(i)) continue sc[i, :], mil_prob[i, :] = test_img(im, model['net'], model['base_image_size'], model['means']) utils.tic_toc_print( 60, 'test_img : {:6d}/{:6d}'.format(i, len(imdb.image_index))) if detection_file is not None: # utils.save_variables(detection_file, [sc, mil_prob, model['vocab'], imdb], # ['sc', 'mil_prob', 'vocab', 'imdb'], overwrite = True) utils.save_variables(detection_file, [sc, mil_prob], \ ['sc', 'mil_prob'], overwrite = True)
def benchmark_det(imdb, vocab, gt_label, map_file, detection_file, eval_file=None): # Get prediction score dt = h5py.File(detection_file, 'r') mil_prob = dt['mil_prob'].value; # Get manual mapping from detection category to caption label det2cap = utils.det2cap(map_file, vocab) # Retrieve probability of detection category as max of probability of its # corresponding caption labels n_words = len(det2cap.keys()) mil_prob_det = np.zeros((mil_prob.shape[0], n_words), dtype=np.float) # gt_label_det = np.zeros((mil_prob.shape[0], n_words), dtype=np.float) gt_label_det = gt_label det_cats = det2cap.keys() for i in range(n_words): cat = det_cats[i] cap_inds = det2cap[cat] mil_prob_det[:, i] = np.max(mil_prob[:, cap_inds], axis=1) # gt_label_det[:, i] = np.max(gt_label[:, cap_inds], axis=1) # temp = np.sum(gt_label[:, cap_inds], axis=1)[:, np.newaxis] # temp = np.hstack((temp, np.ones(temp.shape))) # gt_label_det[:, i] = np.min(temp, axis=1) # Benchmark the output, and return a result struct P = np.zeros(mil_prob_det.shape, dtype=np.float) R = np.zeros(mil_prob_det.shape, dtype=np.float) score = np.zeros(mil_prob_det.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) threshold = np.mean(gt_label, axis=0) for i in range(73): threshold[i] = 0.0001 for i in range(n_words): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = \ cap_eval_utils.calc_pr_det(gt_label_det[:,i], mil_prob_det[:,i], threshold[i]) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score} # Format output title print " {:5s} [{:4s}] : {:7s}". \ format('Pos', 'nPos', 'AP') agg = [] ind = range(n_words); pos = 'all'; print " {:5s} [{:4d}] : {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind])}) if eval_file is not None: utils.save_variables(eval_file, [details, agg, vocab, imdb], ['details', 'agg', 'vocab', 'imdb'], overwrite = True) return details
def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file=None): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype=np.float) R = np.zeros(mil_prob.shape, dtype=np.float) score = np.zeros(mil_prob.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) human_prec = np.zeros((1, n_words), dtype=np.float) human_rec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec = np.zeros((1, n_words), dtype=np.float) rec_at_human_prec = np.zeros((1, n_words), dtype=np.float) rec_at_half_prec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr( gt_label[:, i], mil_prob[:, i], num_references) human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement( gt_label[:, i], num_references) ind = np.where(R[:, i] >= human_rec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0, i] = P[ind[0], i] ind = np.where(P[:, i] >= human_prec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0, i] = R[ind[-1], i] ind = np.where(P[:, i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0, i] = R[ind[-1], i] details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec} if eval_file is not None: eval_file_details_keys = details.keys() eval_file_details_vals = [details[x] for x in eval_file_details_keys] utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True) return details
def test_model(imdb, model, detection_file = None): """ Tests model and stores detections on disk """ N_WORDS = len(model['vocab']['words']) sc = np.zeros((imdb.num_images, N_WORDS), dtype=np.float) mil_prob = np.zeros((imdb.num_images, N_WORDS), dtype=np.float) for i in xrange(len(imdb.image_index)): im = cv2.imread(imdb.image_path_at(i)) sc[i,:], mil_prob[i,:] = test_img(im, model['net'], model['base_image_size'], model['means']) utils.tic_toc_print(60, 'test_img : {:6d}/{:6d}'.format(i, len(imdb.image_index))) if detection_file is not None: utils.save_variables(detection_file, [sc, mil_prob, model['vocab'], imdb], ['sc', 'mil_prob', 'vocab', 'imdb'], overwrite = True)
def testModelBatch(imdb, model, detection_file): if 'words' in model['vocab']: N_WORDS = len(model['vocab']['words']) else: #we are using COCO 80 classes N_WORDS = 80 batchedImList = getBatchedImList(imdb, model) sc = np.zeros((len(batchedImList), N_WORDS), dtype=np.float) mil_prob = np.zeros((len(batchedImList), N_WORDS), dtype=np.float) if model['inf_type'] == 'MILNoise': fields = ['mil', 'mil_max',\ 'qconds10', 'qconds11', 'noisy_comb_noimage'] qdata_raw = np.zeros((len(batchedImList), 4 * N_WORDS), dtype=np.float32) qdata_smax = np.zeros((len(batchedImList), 4 * N_WORDS), dtype=np.float32) qconds10 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32) qconds11 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32) noisy_comb_noimage = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32) for bind in range(len(batchedImList)): if model['inf_type'] != 'MILNoise': mil_prob[bind, :], sc[bind, :] = tm.test_batch( batchedImList[bind], model) else: fOut = tm.test_batch(batchedImList[bind], model, fields) mil_prob[bind, :] = fOut[0] sc[bind, :] = fOut[1] qconds10[bind, :] = fOut[2] qconds11[bind, :] = fOut[3] noisy_comb_noimage[bind, :] = fOut[4] sg_utils.tic_toc_print( 60, 'test_batch : %d/%d (num_per_batch %d)' % (bind, len(batchedImList), len(batchedImList[0]))) if detection_file is not None and model['inf_type'] != 'MILNoise': sg_utils.save_variables(detection_file, [sc, mil_prob], \ ['sc', 'mil_prob'], overwrite = True) elif detection_file is not None: sg_utils.save_variables(detection_file, [sc, mil_prob, \ qconds10, qconds11, noisy_comb_noimage], \ ['sc', 'mil_prob',\ 'qconds10', 'qconds11',\ 'noisy_comb_noimage'], overwrite = True)
def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file = None): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references) ind = np.where(R[:,i] >= human_rec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0,i] = P[ind[0], i]; ind = np.where(P[:,i] >= human_prec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0,i] = R[ind[-1], i]; ind = np.where(P[:,i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0,i] = R[ind[-1], i]; details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; if eval_file is not None: eval_file_details_keys = details.keys(); eval_file_details_vals = [details[x] for x in eval_file_details_keys]; utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True); return details;
def benchmark_only_ap(vocab, gt_label, num_references, mil_prob, eval_file=None, noref=False): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype=np.float) R = np.zeros(mil_prob.shape, dtype=np.float) score = np.zeros(mil_prob.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) if noref: P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr_noref( gt_label[:, i], mil_prob[:, i]) else: P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr( gt_label[:, i], mil_prob[:, i], num_references) details = { 'precision': P, 'recall': R, 'ap': ap, 'score': score } if eval_file is not None: eval_file_details_keys = details.keys() eval_file_details_vals = [details[x] for x in eval_file_details_keys] utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True) return P, R, score, ap
def benchmark_only_ap(vocab, gt_label, num_references, mil_prob, eval_file = None, noref = False): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) if noref: P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr_noref(gt_label[:,i], mil_prob[:,i]) else: P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score }; if eval_file is not None: eval_file_details_keys = details.keys(); eval_file_details_vals = [details[x] for x in eval_file_details_keys]; utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True); return P, R, score, ap;
def testModelBatch(imdb, model, detection_file): if 'words' in model['vocab']: N_WORDS = len(model['vocab']['words']) else: #we are using COCO 80 classes N_WORDS = 80; batchedImList = getBatchedImList(imdb, model); sc = np.zeros((len(batchedImList), N_WORDS), dtype=np.float) mil_prob = np.zeros((len(batchedImList), N_WORDS), dtype=np.float) if model['inf_type'] == 'MILNoise': fields = ['mil', 'mil_max',\ 'qconds10', 'qconds11', 'noisy_comb_noimage'] qdata_raw = np.zeros((len(batchedImList), 4*N_WORDS), dtype=np.float32) qdata_smax = np.zeros((len(batchedImList), 4*N_WORDS), dtype=np.float32) qconds10 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32) qconds11 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32) noisy_comb_noimage = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32) for bind in range(len(batchedImList)): if model['inf_type'] != 'MILNoise': mil_prob[bind,:], sc[bind,:] = tm.test_batch(batchedImList[bind], model) else: fOut = tm.test_batch(batchedImList[bind], model, fields) mil_prob[bind,:] = fOut[0]; sc[bind,:] = fOut[1]; qconds10[bind,:] = fOut[2]; qconds11[bind,:] = fOut[3]; noisy_comb_noimage[bind,:] = fOut[4]; sg_utils.tic_toc_print(60, 'test_batch : %d/%d (num_per_batch %d)'%(bind, len(batchedImList), len(batchedImList[0]))); if detection_file is not None and model['inf_type'] != 'MILNoise': sg_utils.save_variables(detection_file, [sc, mil_prob], \ ['sc', 'mil_prob'], overwrite = True) elif detection_file is not None: sg_utils.save_variables(detection_file, [sc, mil_prob, \ qconds10, qconds11, noisy_comb_noimage], \ ['sc', 'mil_prob',\ 'qconds10', 'qconds11',\ 'noisy_comb_noimage'], overwrite = True)
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None): # Get ground truth # dt = utils.scio.loadmat(detection_file) dt = utils.load_variables(detection_file) mil_prob = dt['mil_prob']; # Benchmark the output, and return a result struct n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references) ind = np.where(R[:,i] >= human_rec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0,i] = P[ind[0], i]; ind = np.where(P[:,i] >= human_prec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0,i] = R[ind[-1], i]; ind = np.where(P[:,i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0,i] = R[ind[-1], i]; # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; # Collect statistics over the POS agg = []; for pos in list(set(vocab['poss'])): ind = [i for i,x in enumerate(vocab['poss']) if pos == x] print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) ind = range(len(vocab['words'])); pos = 'all'; print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) if eval_file is not None: try: utils.save_variables(eval_file, [details, agg], ['details', 'agg'], overwrite = True) except: print 'Error trying to save to pickle, will try hdf5 hack'; namespace = globals().copy() namespace.update(locals()) code.interact(local=namespace) eval_file_details = eval_file.replace('.pkl','_details.h5'); eval_file_details_keys = details.keys(); eval_file_details_vals = [details[x] for x in eval_file_details_keys]; utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True); eval_file_agg = eval_file.replace('.pkl','_agg.h5'); eval_file_agg_keys = agg.keys(); eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys]; utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True); return details
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None): # Get ground truth # dt = utils.scio.loadmat(detection_file) dt = utils.load_variables(detection_file) mil_prob = dt['mil_prob']; # Benchmark the output, and return a result struct n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references) ind = np.where(R[:,i] >= human_rec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0,i] = P[ind[0], i]; ind = np.where(P[:,i] >= human_prec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0,i] = R[ind[-1], i]; ind = np.where(P[:,i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0,i] = R[ind[-1], i]; # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; # Collect statistics over the POS agg = []; for pos in list(set(vocab['poss'])): ind = [i for i,x in enumerate(vocab['poss']) if pos == x] print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) ind = range(len(vocab['words'])); pos = 'all'; print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) if eval_file is not None: utils.save_variables(eval_file, [details, agg, vocab, imdb], ['details', 'agg', 'vocab', 'imdb'], overwrite = True) return details
## MATLAB vocabulary if job_name == 'vocab': import csv import sg_utils matlab_vocab = 'vocabs/vocab_words.txt'; words = []; poss = []; counts = []; with open(matlab_vocab, 'rb') as f: reader = csv.reader(f) for row in reader: words.append(row[0].strip()) poss.append(row[1].strip()) counts.append(int(row[2].strip())) sg_utils.save_variables('vocabs/vocab_train.pkl', [words, poss, counts], \ ['words', 'poss', 'counts'], overwrite = True) ## Load the detections # Code to re-evaluate matlab output, to check sainty of python code # if job_name == 'eval_det': # import sg_utils # import test_model # import cap_eval_utils # vocab = sg_utils.load_variables('cachedir/v1/vocab_train.pkl') # dt = sg_utils.scio.loadmat('cachedir/v1/gt_labels_val.all.mat'); labels = dt['labels']; # details = test_model.benchmark(None, vocab, labels, 5., 'cachedir/v1/mil_prob_val.all.mat')
imset = 'train' coco_caps = COCO('../data/captions_train2014.json'); # mapping to output final statistics mapping = {'NNS': 'NN', 'NNP': 'NN', 'NNPS': 'NN', 'NN': 'NN', \ 'VB': 'VB', 'VBD': 'VB', 'VBN': 'VB', 'VBZ': 'VB', 'VBP': 'VB', 'VBP': 'VB', 'VBG': 'VB', \ 'JJR': 'JJ', 'JJS': 'JJ', 'JJ': 'JJ', 'DT': 'DT', 'PRP': 'PRP', 'PRP$': 'PRP', 'IN': 'IN'}; # punctuations to be removed from the sentences punctuations = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \ ".", "?", "!", ",", ":", "-", "--", "...", ";"] vocab = preprocess.get_vocab(imset, coco_caps, punctuations, mapping); sg_utils.save_variables('vocab_' + imset + '.pkl', \ [vocab[x] for x in vocab.keys()], \ vocab.keys(), \ overwrite = True); ## N_WORDS = 1000; vocab = preprocess.get_vocab_top_k(vocab, N_WORDS) image_ids = coco_caps.getImgIds() counts = preprocess.get_vocab_counts(image_ids, coco_caps, 5, vocab) P = np.zeros((N_WORDS, 1), dtype = np.float); R = np.zeros((N_WORDS, 1), dtype = np.float); for i, w in enumerate(vv['words']): P[i], R[i] = cap_eval_utils.human_agreement(counts[:,i], 5) print w, P[i], R[i] for pos in list(set(vocab['poss'])):
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file=None): # Get ground truth # dt = utils.scio.loadmat(detection_file) dt = utils.load_variables(detection_file) mil_prob = dt['mil_prob'] # Benchmark the output, and return a result struct n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype=np.float) R = np.zeros(mil_prob.shape, dtype=np.float) score = np.zeros(mil_prob.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) human_prec = np.zeros((1, n_words), dtype=np.float) human_rec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec = np.zeros((1, n_words), dtype=np.float) rec_at_human_prec = np.zeros((1, n_words), dtype=np.float) rec_at_half_prec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr( gt_label[:, i], mil_prob[:, i], num_references) human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement( gt_label[:, i], num_references) ind = np.where(R[:, i] >= human_rec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0, i] = P[ind[0], i] ind = np.where(P[:, i] >= human_prec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0, i] = R[ind[-1], i] ind = np.where(P[:, i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0, i] = R[ind[-1], i] # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec} # Collect statistics over the POS agg = [] for pos in list(set(vocab['poss'])): ind = [i for i, x in enumerate(vocab['poss']) if pos == x] print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) ind = range(len(vocab['words'])) pos = 'all' print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) if eval_file is not None: try: utils.save_variables(eval_file, [details, agg], ['details', 'agg'], overwrite=True) except: print 'Error trying to save to pickle, will try hdf5 hack' namespace = globals().copy() namespace.update(locals()) code.interact(local=namespace) eval_file_details = eval_file.replace('.pkl', '_details.h5') eval_file_details_keys = details.keys() eval_file_details_vals = [ details[x] for x in eval_file_details_keys ] utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True) eval_file_agg = eval_file.replace('.pkl', '_agg.h5') eval_file_agg_keys = agg.keys() eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys] utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True) return details
imset = 'train' coco_caps = COCO('../data/captions_train2014.json') # mapping to output final statistics mapping = {'NNS': 'NN', 'NNP': 'NN', 'NNPS': 'NN', 'NN': 'NN', \ 'VB': 'VB', 'VBD': 'VB', 'VBN': 'VB', 'VBZ': 'VB', 'VBP': 'VB', 'VBP': 'VB', 'VBG': 'VB', \ 'JJR': 'JJ', 'JJS': 'JJ', 'JJ': 'JJ', 'DT': 'DT', 'PRP': 'PRP', 'PRP$': 'PRP', 'IN': 'IN'} # punctuations to be removed from the sentences punctuations = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \ ".", "?", "!", ",", ":", "-", "--", "...", ";"] vocab = preprocess.get_vocab(imset, coco_caps, punctuations, mapping) sg_utils.save_variables('vocab_' + imset + '.pkl', \ [vocab[x] for x in vocab.keys()], \ vocab.keys(), \ overwrite = True) ## N_WORDS = 1000 vocab = preprocess.get_vocab_top_k(vocab, N_WORDS) image_ids = coco_caps.getImgIds() counts = preprocess.get_vocab_counts(image_ids, coco_caps, 5, vocab) P = np.zeros((N_WORDS, 1), dtype=np.float) R = np.zeros((N_WORDS, 1), dtype=np.float) for i, w in enumerate(vv['words']): P[i], R[i] = cap_eval_utils.human_agreement(counts[:, i], 5) print w, P[i], R[i] for pos in list(set(vocab['poss'])): ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
## MATLAB vocabulary if job_name == 'vocab': import csv import sg_utils matlab_vocab = 'vocabs/vocab_words.txt' words = [] poss = [] counts = [] with open(matlab_vocab, 'rb') as f: reader = csv.reader(f) for row in reader: words.append(row[0].strip()) poss.append(row[1].strip()) counts.append(int(row[2].strip())) sg_utils.save_variables('vocabs/vocab_train.pkl', [words, poss, counts], \ ['words', 'poss', 'counts'], overwrite = True) ## Load the detections # Code to re-evaluate matlab output, to check sainty of python code # if job_name == 'eval_det': # import sg_utils # import test_model # import cap_eval_utils # vocab = sg_utils.load_variables('cachedir/v1/vocab_train.pkl') # dt = sg_utils.scio.loadmat('cachedir/v1/gt_labels_val.all.mat'); labels = dt['labels']; # details = test_model.benchmark(None, vocab, labels, 5., 'cachedir/v1/mil_prob_val.all.mat')