def benchmark_ap_noref(vocab, gt_label, mil_prob, eval_file=None): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype=np.float) R = np.zeros(mil_prob.shape, dtype=np.float) score = np.zeros(mil_prob.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) human_prec = np.zeros((1, n_words), dtype=np.float) human_rec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec = np.zeros((1, n_words), dtype=np.float) rec_at_human_prec = np.zeros((1, n_words), dtype=np.float) rec_at_half_prec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr_noref( gt_label[:, i], mil_prob[:, i]) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec} if eval_file is not None: eval_file_details_keys = details.keys() eval_file_details_vals = [details[x] for x in eval_file_details_keys] utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True) return details
def benchmark_ap_noref(vocab, gt_label, mil_prob, eval_file = None): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr_noref(gt_label[:,i], mil_prob[:,i]) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; if eval_file is not None: eval_file_details_keys = details.keys(); eval_file_details_vals = [details[x] for x in eval_file_details_keys]; utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True); return details;
def test_model(imdb, model, detection_file=None): """ Tests model and stores detections on disk """ N_WORDS = len(model['vocab']['words']) sc = np.zeros((imdb.num_images, N_WORDS), dtype=np.float) mil_prob = np.zeros((imdb.num_images, N_WORDS), dtype=np.float) for i in xrange(len(imdb.image_index)): im = cv2.imread(imdb.image_path_at(i)) if im is None: print 'could not read %s; skipping' % (imdb.image_path_at(i)) continue sc[i, :], mil_prob[i, :] = test_img(im, model['net'], model['base_image_size'], model['means']) utils.tic_toc_print( 60, 'test_img : {:6d}/{:6d}'.format(i, len(imdb.image_index))) if detection_file is not None: # utils.save_variables(detection_file, [sc, mil_prob, model['vocab'], imdb], # ['sc', 'mil_prob', 'vocab', 'imdb'], overwrite = True) utils.save_variables(detection_file, [sc, mil_prob], \ ['sc', 'mil_prob'], overwrite = True)
def benchmark_det(imdb, vocab, gt_label, map_file, detection_file, eval_file=None): # Get prediction score dt = h5py.File(detection_file, 'r') mil_prob = dt['mil_prob'].value; # Get manual mapping from detection category to caption label det2cap = utils.det2cap(map_file, vocab) # Retrieve probability of detection category as max of probability of its # corresponding caption labels n_words = len(det2cap.keys()) mil_prob_det = np.zeros((mil_prob.shape[0], n_words), dtype=np.float) # gt_label_det = np.zeros((mil_prob.shape[0], n_words), dtype=np.float) gt_label_det = gt_label det_cats = det2cap.keys() for i in range(n_words): cat = det_cats[i] cap_inds = det2cap[cat] mil_prob_det[:, i] = np.max(mil_prob[:, cap_inds], axis=1) # gt_label_det[:, i] = np.max(gt_label[:, cap_inds], axis=1) # temp = np.sum(gt_label[:, cap_inds], axis=1)[:, np.newaxis] # temp = np.hstack((temp, np.ones(temp.shape))) # gt_label_det[:, i] = np.min(temp, axis=1) # Benchmark the output, and return a result struct P = np.zeros(mil_prob_det.shape, dtype=np.float) R = np.zeros(mil_prob_det.shape, dtype=np.float) score = np.zeros(mil_prob_det.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) threshold = np.mean(gt_label, axis=0) for i in range(73): threshold[i] = 0.0001 for i in range(n_words): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = \ cap_eval_utils.calc_pr_det(gt_label_det[:,i], mil_prob_det[:,i], threshold[i]) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score} # Format output title print " {:5s} [{:4s}] : {:7s}". \ format('Pos', 'nPos', 'AP') agg = [] ind = range(n_words); pos = 'all'; print " {:5s} [{:4d}] : {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind])}) if eval_file is not None: utils.save_variables(eval_file, [details, agg, vocab, imdb], ['details', 'agg', 'vocab', 'imdb'], overwrite = True) return details
def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file=None): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype=np.float) R = np.zeros(mil_prob.shape, dtype=np.float) score = np.zeros(mil_prob.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) human_prec = np.zeros((1, n_words), dtype=np.float) human_rec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec = np.zeros((1, n_words), dtype=np.float) rec_at_human_prec = np.zeros((1, n_words), dtype=np.float) rec_at_half_prec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr( gt_label[:, i], mil_prob[:, i], num_references) human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement( gt_label[:, i], num_references) ind = np.where(R[:, i] >= human_rec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0, i] = P[ind[0], i] ind = np.where(P[:, i] >= human_prec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0, i] = R[ind[-1], i] ind = np.where(P[:, i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0, i] = R[ind[-1], i] details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec} if eval_file is not None: eval_file_details_keys = details.keys() eval_file_details_vals = [details[x] for x in eval_file_details_keys] utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True) return details
def test_model(imdb, model, detection_file = None): """ Tests model and stores detections on disk """ N_WORDS = len(model['vocab']['words']) sc = np.zeros((imdb.num_images, N_WORDS), dtype=np.float) mil_prob = np.zeros((imdb.num_images, N_WORDS), dtype=np.float) for i in xrange(len(imdb.image_index)): im = cv2.imread(imdb.image_path_at(i)) sc[i,:], mil_prob[i,:] = test_img(im, model['net'], model['base_image_size'], model['means']) utils.tic_toc_print(60, 'test_img : {:6d}/{:6d}'.format(i, len(imdb.image_index))) if detection_file is not None: utils.save_variables(detection_file, [sc, mil_prob, model['vocab'], imdb], ['sc', 'mil_prob', 'vocab', 'imdb'], overwrite = True)
def testModelBatch(imdb, model, detection_file): if 'words' in model['vocab']: N_WORDS = len(model['vocab']['words']) else: #we are using COCO 80 classes N_WORDS = 80 batchedImList = getBatchedImList(imdb, model) sc = np.zeros((len(batchedImList), N_WORDS), dtype=np.float) mil_prob = np.zeros((len(batchedImList), N_WORDS), dtype=np.float) if model['inf_type'] == 'MILNoise': fields = ['mil', 'mil_max',\ 'qconds10', 'qconds11', 'noisy_comb_noimage'] qdata_raw = np.zeros((len(batchedImList), 4 * N_WORDS), dtype=np.float32) qdata_smax = np.zeros((len(batchedImList), 4 * N_WORDS), dtype=np.float32) qconds10 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32) qconds11 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32) noisy_comb_noimage = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32) for bind in range(len(batchedImList)): if model['inf_type'] != 'MILNoise': mil_prob[bind, :], sc[bind, :] = tm.test_batch( batchedImList[bind], model) else: fOut = tm.test_batch(batchedImList[bind], model, fields) mil_prob[bind, :] = fOut[0] sc[bind, :] = fOut[1] qconds10[bind, :] = fOut[2] qconds11[bind, :] = fOut[3] noisy_comb_noimage[bind, :] = fOut[4] sg_utils.tic_toc_print( 60, 'test_batch : %d/%d (num_per_batch %d)' % (bind, len(batchedImList), len(batchedImList[0]))) if detection_file is not None and model['inf_type'] != 'MILNoise': sg_utils.save_variables(detection_file, [sc, mil_prob], \ ['sc', 'mil_prob'], overwrite = True) elif detection_file is not None: sg_utils.save_variables(detection_file, [sc, mil_prob, \ qconds10, qconds11, noisy_comb_noimage], \ ['sc', 'mil_prob',\ 'qconds10', 'qconds11',\ 'noisy_comb_noimage'], overwrite = True)
def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file = None): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references) ind = np.where(R[:,i] >= human_rec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0,i] = P[ind[0], i]; ind = np.where(P[:,i] >= human_prec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0,i] = R[ind[-1], i]; ind = np.where(P[:,i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0,i] = R[ind[-1], i]; details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; if eval_file is not None: eval_file_details_keys = details.keys(); eval_file_details_vals = [details[x] for x in eval_file_details_keys]; utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True); return details;
def output_words(imdb, detection_file, eval_file, vocab, \ threshold_metric_name, output_metric_name, threshold, min_words, output_file = None, \ functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']): """ Output the words as generated by the model. Loads the detections from detection_file, score precision mapping from eval_file and output the words in output_file. Words in the output_file are sorted according to the threshold_metric_name and report the output_metric_name. """ dt = utils.load_variables(detection_file) pt = utils.load_variables(eval_file) is_functional = np.array( [x not in functional_words for x in vocab['words']]) prec = np.zeros(dt['mil_prob'].shape) for jj in xrange(prec.shape[1]): prec[:,jj] = cap_eval_utils.compute_precision_score_mapping(\ pt['details']['score'][:,jj].copy(), \ pt['details']['precision'][:,jj].copy(), \ dt['mil_prob'][:,jj]) utils.tic_toc_print( 1, 'compute precision score mapping: {:4d} / {:4d}'.format( jj, prec.shape[1])) dt['prec'] = prec out_all = [] for i in xrange(imdb.num_images): out = output_words_image(dt[threshold_metric_name][i,:], dt[output_metric_name][i,:], \ min_words, threshold, vocab, is_functional) out_all.append(out) utils.tic_toc_print( 1, 'output words image: {:4d} / {:4d}'.format(i, imdb.num_images)) if output_file is not None: with open(output_file, 'wt') as f: f.write('detection file %s\n' % (detection_file)) f.write('eval file %s\n' % (eval_file)) f.write('threshold %.2f; min_words %d\n' % (threshold, min_words)) for i in xrange(imdb.num_images): f.write('{:d}: '.format(imdb.image_index[i])) out = out_all[i] for j in xrange(len(out)): f.write('{:s} ({:.2f}), '.format(out[j][0], out[j][1])) f.write('\n')
def benchmark_only_ap(vocab, gt_label, num_references, mil_prob, eval_file=None, noref=False): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype=np.float) R = np.zeros(mil_prob.shape, dtype=np.float) score = np.zeros(mil_prob.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) if noref: P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr_noref( gt_label[:, i], mil_prob[:, i]) else: P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr( gt_label[:, i], mil_prob[:, i], num_references) details = { 'precision': P, 'recall': R, 'ap': ap, 'score': score } if eval_file is not None: eval_file_details_keys = details.keys() eval_file_details_vals = [details[x] for x in eval_file_details_keys] utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True) return P, R, score, ap
def benchmark_only_ap(vocab, gt_label, num_references, mil_prob, eval_file = None, noref = False): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) if noref: P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr_noref(gt_label[:,i], mil_prob[:,i]) else: P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score }; if eval_file is not None: eval_file_details_keys = details.keys(); eval_file_details_vals = [details[x] for x in eval_file_details_keys]; utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True); return P, R, score, ap;
def output_words(imdb, detection_file, eval_file, vocab, \ threshold_metric_name, output_metric_name, threshold, min_words, output_file = None, \ functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']): """ Output the words as generated by the model. Loads the detections from detection_file, score precision mapping from eval_file and output the words in output_file. Words in the output_file are sorted according to the threshold_metric_name and report the output_metric_name. """ dt = utils.load_variables(detection_file); pt = utils.load_variables(eval_file); is_functional = np.array([x not in functional_words for x in vocab['words']]); prec = np.zeros(dt['mil_prob'].shape) for jj in xrange(prec.shape[1]): prec[:,jj] = cap_eval_utils.compute_precision_score_mapping(\ pt['details']['score'][:,jj].copy(), \ pt['details']['precision'][:,jj].copy(), \ dt['mil_prob'][:,jj]); utils.tic_toc_print(1, 'compute precision score mapping: {:4d} / {:4d}'.format(jj, prec.shape[1])) dt['prec'] = prec; out_all = [] for i in xrange(imdb.num_images): out = output_words_image(dt[threshold_metric_name][i,:], dt[output_metric_name][i,:], \ min_words, threshold, vocab, is_functional) out_all.append(out) utils.tic_toc_print(1, 'output words image: {:4d} / {:4d}'.format(i, imdb.num_images)) if output_file is not None: with open(output_file, 'wt') as f: f.write('detection file %s\n'%(detection_file)); f.write('eval file %s\n'%(eval_file)); f.write('threshold %.2f; min_words %d\n'%(threshold, min_words)); for i in xrange(imdb.num_images): f.write('{:d}: '.format(imdb.image_index[i])) out = out_all[i] for j in xrange(len(out)): f.write('{:s} ({:.2f}), '.format(out[j][0], out[j][1])) f.write('\n')
def testModelBatch(imdb, model, detection_file): if 'words' in model['vocab']: N_WORDS = len(model['vocab']['words']) else: #we are using COCO 80 classes N_WORDS = 80; batchedImList = getBatchedImList(imdb, model); sc = np.zeros((len(batchedImList), N_WORDS), dtype=np.float) mil_prob = np.zeros((len(batchedImList), N_WORDS), dtype=np.float) if model['inf_type'] == 'MILNoise': fields = ['mil', 'mil_max',\ 'qconds10', 'qconds11', 'noisy_comb_noimage'] qdata_raw = np.zeros((len(batchedImList), 4*N_WORDS), dtype=np.float32) qdata_smax = np.zeros((len(batchedImList), 4*N_WORDS), dtype=np.float32) qconds10 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32) qconds11 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32) noisy_comb_noimage = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32) for bind in range(len(batchedImList)): if model['inf_type'] != 'MILNoise': mil_prob[bind,:], sc[bind,:] = tm.test_batch(batchedImList[bind], model) else: fOut = tm.test_batch(batchedImList[bind], model, fields) mil_prob[bind,:] = fOut[0]; sc[bind,:] = fOut[1]; qconds10[bind,:] = fOut[2]; qconds11[bind,:] = fOut[3]; noisy_comb_noimage[bind,:] = fOut[4]; sg_utils.tic_toc_print(60, 'test_batch : %d/%d (num_per_batch %d)'%(bind, len(batchedImList), len(batchedImList[0]))); if detection_file is not None and model['inf_type'] != 'MILNoise': sg_utils.save_variables(detection_file, [sc, mil_prob], \ ['sc', 'mil_prob'], overwrite = True) elif detection_file is not None: sg_utils.save_variables(detection_file, [sc, mil_prob, \ qconds10, qconds11, noisy_comb_noimage], \ ['sc', 'mil_prob',\ 'qconds10', 'qconds11',\ 'noisy_comb_noimage'], overwrite = True)
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None): # Get ground truth # dt = utils.scio.loadmat(detection_file) dt = utils.load_variables(detection_file) mil_prob = dt['mil_prob']; # Benchmark the output, and return a result struct n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references) ind = np.where(R[:,i] >= human_rec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0,i] = P[ind[0], i]; ind = np.where(P[:,i] >= human_prec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0,i] = R[ind[-1], i]; ind = np.where(P[:,i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0,i] = R[ind[-1], i]; # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; # Collect statistics over the POS agg = []; for pos in list(set(vocab['poss'])): ind = [i for i,x in enumerate(vocab['poss']) if pos == x] print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) ind = range(len(vocab['words'])); pos = 'all'; print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) if eval_file is not None: try: utils.save_variables(eval_file, [details, agg], ['details', 'agg'], overwrite = True) except: print 'Error trying to save to pickle, will try hdf5 hack'; namespace = globals().copy() namespace.update(locals()) code.interact(local=namespace) eval_file_details = eval_file.replace('.pkl','_details.h5'); eval_file_details_keys = details.keys(); eval_file_details_vals = [details[x] for x in eval_file_details_keys]; utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True); eval_file_agg = eval_file.replace('.pkl','_agg.h5'); eval_file_agg_keys = agg.keys(); eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys]; utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True); return details
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None): # Get ground truth # dt = utils.scio.loadmat(detection_file) dt = utils.load_variables(detection_file) mil_prob = dt['mil_prob']; # Benchmark the output, and return a result struct n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references) ind = np.where(R[:,i] >= human_rec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0,i] = P[ind[0], i]; ind = np.where(P[:,i] >= human_prec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0,i] = R[ind[-1], i]; ind = np.where(P[:,i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0,i] = R[ind[-1], i]; # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; # Collect statistics over the POS agg = []; for pos in list(set(vocab['poss'])): ind = [i for i,x in enumerate(vocab['poss']) if pos == x] print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) ind = range(len(vocab['words'])); pos = 'all'; print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) if eval_file is not None: utils.save_variables(eval_file, [details, agg, vocab, imdb], ['details', 'agg', 'vocab', 'imdb'], overwrite = True) return details
import os import sg_utils as utils import coco_voc import shutil # Make directories for i in xrange(60): utils.mkdir_if_missing(os.path.join('..', 'data', 'images', '{:02d}'.format(i))) # Copy files over sets = ['train', 'val', 'test'] for set_ in sets: imdb = coco_voc.coco_voc(set_) for i in xrange(imdb.num_images): in_file = os.path.join('../data', set_ + '2014', \ 'COCO_{}2014_{:012d}.jpg'.format(set_, imdb.image_index[i])); out_file = imdb.image_path_at(i) # print in_file, out_file shutil.copyfile(in_file, out_file) utils.tic_toc_print(1, ' Copying images [{}]: {:06d} / {:06d}\n'.format(set_, i, imdb.num_images));
import os import sg_utils as utils import coco_voc import shutil # Make directories for i in xrange(60): utils.mkdir_if_missing(os.path.join('data', 'images', '{:02d}'.format(i))) # Copy files over sets = ['train', 'val', 'test'] for set_ in sets: imdb = coco_voc.coco_voc(set_) for i in xrange(imdb.num_images): in_file = os.path.join(set_ + '2014', \ 'COCO_{}2014_{:012d}.jpg'.format(set_, imdb.image_index[i])); out_file = imdb.image_path_at(i) # print in_file, out_file shutil.copyfile(in_file, out_file) utils.tic_toc_print(1, ' Copying images [{}]: {:06d} / {:06d}\n'.format(set_, i, imdb.num_images));
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file=None): # Get ground truth # dt = utils.scio.loadmat(detection_file) dt = utils.load_variables(detection_file) mil_prob = dt['mil_prob'] # Benchmark the output, and return a result struct n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype=np.float) R = np.zeros(mil_prob.shape, dtype=np.float) score = np.zeros(mil_prob.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) human_prec = np.zeros((1, n_words), dtype=np.float) human_rec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec = np.zeros((1, n_words), dtype=np.float) rec_at_human_prec = np.zeros((1, n_words), dtype=np.float) rec_at_half_prec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr( gt_label[:, i], mil_prob[:, i], num_references) human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement( gt_label[:, i], num_references) ind = np.where(R[:, i] >= human_rec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0, i] = P[ind[0], i] ind = np.where(P[:, i] >= human_prec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0, i] = R[ind[-1], i] ind = np.where(P[:, i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0, i] = R[ind[-1], i] # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec} # Collect statistics over the POS agg = [] for pos in list(set(vocab['poss'])): ind = [i for i, x in enumerate(vocab['poss']) if pos == x] print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) ind = range(len(vocab['words'])) pos = 'all' print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) if eval_file is not None: try: utils.save_variables(eval_file, [details, agg], ['details', 'agg'], overwrite=True) except: print 'Error trying to save to pickle, will try hdf5 hack' namespace = globals().copy() namespace.update(locals()) code.interact(local=namespace) eval_file_details = eval_file.replace('.pkl', '_details.h5') eval_file_details_keys = details.keys() eval_file_details_vals = [ details[x] for x in eval_file_details_keys ] utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True) eval_file_agg = eval_file.replace('.pkl', '_agg.h5') eval_file_agg_keys = agg.keys() eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys] utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True) return details
# Compute targets for the file counts = preprocess.get_vocab_counts(imdb[i].image_index, \ imdb[i].coco_caption_data, 5, vocab) if args.write_labels: label_file = os.path.join(output_dir, 'labels_' + sets[i] + '.h5') print 'Writing labels to {}'.format(label_file) with h5py.File(label_file, 'w') as f: for j in xrange(imdb[i].num_images): ind = imdb[i].image_index[j] ind_str = '{:02d}/{:d}'.format(int(math.floor(ind)/1e4), ind) l = f.create_dataset('/labels-{}'.format(ind_str), (1, 1, counts.shape[1], 1), dtype = 'f') c = counts[j,:].copy(); c = c > 0; c = c.astype(np.float32); c = c.reshape((1, 1, c.size, 1)) l[...] = c utils.tic_toc_print(1, 'write labels {:6d} / {:6d}'.format(j, imdb[i].num_images)) if args.write_splits: split_file = os.path.join(output_dir, sets[i] + '.ids') print 'Writing labels to {}'.format(split_file) with open(split_file, 'wt') as f: for j in xrange(imdb[i].num_images): ind = imdb[i].image_index[j] ind_str = '{:02d}/{:d}'.format(int(math.floor(ind)/1e4), ind) f.write('{}\n'.format(ind_str)) # Print the command to start training if args.task == 'test_model': imdb = coco_voc.coco_voc(args.test_set) mean = np.array([[[ 103.939, 116.779, 123.68]]]);