def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file=None): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype=np.float) R = np.zeros(mil_prob.shape, dtype=np.float) score = np.zeros(mil_prob.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) human_prec = np.zeros((1, n_words), dtype=np.float) human_rec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec = np.zeros((1, n_words), dtype=np.float) rec_at_human_prec = np.zeros((1, n_words), dtype=np.float) rec_at_half_prec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr( gt_label[:, i], mil_prob[:, i], num_references) human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement( gt_label[:, i], num_references) ind = np.where(R[:, i] >= human_rec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0, i] = P[ind[0], i] ind = np.where(P[:, i] >= human_prec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0, i] = R[ind[-1], i] ind = np.where(P[:, i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0, i] = R[ind[-1], i] details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec} if eval_file is not None: eval_file_details_keys = details.keys() eval_file_details_vals = [details[x] for x in eval_file_details_keys] utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True) return details
def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file = None): n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references) ind = np.where(R[:,i] >= human_rec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0,i] = P[ind[0], i]; ind = np.where(P[:,i] >= human_prec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0,i] = R[ind[-1], i]; ind = np.where(P[:,i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0,i] = R[ind[-1], i]; details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; if eval_file is not None: eval_file_details_keys = details.keys(); eval_file_details_vals = [details[x] for x in eval_file_details_keys]; utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True); return details;
def benchmark_one_word(i, P, R, score, ap, human_rec, human_prec, \ prec_at_human_rec, rec_at_human_prec, rec_at_half_prec,\ gt_label, mil_prob, num_references): P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references) ind = np.where(R[:,i] >= human_rec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0,i] = P[ind[0], i]; ind = np.where(P[:,i] >= human_prec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0,i] = R[ind[-1], i]; ind = np.where(P[:,i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0,i] = R[ind[-1], i];
def benchmark_one_word(i, P, R, score, ap, human_rec, human_prec, \ prec_at_human_rec, rec_at_human_prec, rec_at_half_prec,\ gt_label, mil_prob, num_references): P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr( gt_label[:, i], mil_prob[:, i], num_references) human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement( gt_label[:, i], num_references) ind = np.where(R[:, i] >= human_rec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0, i] = P[ind[0], i] ind = np.where(P[:, i] >= human_prec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0, i] = R[ind[-1], i] ind = np.where(P[:, i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0, i] = R[ind[-1], i]
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None): # Get ground truth # dt = utils.scio.loadmat(detection_file) dt = utils.load_variables(detection_file) mil_prob = dt['mil_prob']; # Benchmark the output, and return a result struct n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references) ind = np.where(R[:,i] >= human_rec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0,i] = P[ind[0], i]; ind = np.where(P[:,i] >= human_prec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0,i] = R[ind[-1], i]; ind = np.where(P[:,i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0,i] = R[ind[-1], i]; # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; # Collect statistics over the POS agg = []; for pos in list(set(vocab['poss'])): ind = [i for i,x in enumerate(vocab['poss']) if pos == x] print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) ind = range(len(vocab['words'])); pos = 'all'; print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) if eval_file is not None: try: utils.save_variables(eval_file, [details, agg], ['details', 'agg'], overwrite = True) except: print 'Error trying to save to pickle, will try hdf5 hack'; namespace = globals().copy() namespace.update(locals()) code.interact(local=namespace) eval_file_details = eval_file.replace('.pkl','_details.h5'); eval_file_details_keys = details.keys(); eval_file_details_vals = [details[x] for x in eval_file_details_keys]; utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True); eval_file_agg = eval_file.replace('.pkl','_agg.h5'); eval_file_agg_keys = agg.keys(); eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys]; utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True); return details
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None): # Get ground truth # dt = utils.scio.loadmat(detection_file) dt = utils.load_variables(detection_file) mil_prob = dt['mil_prob']; # Benchmark the output, and return a result struct n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references) ind = np.where(R[:,i] >= human_rec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0,i] = P[ind[0], i]; ind = np.where(P[:,i] >= human_prec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0,i] = R[ind[-1], i]; ind = np.where(P[:,i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0,i] = R[ind[-1], i]; # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; # Collect statistics over the POS agg = []; for pos in list(set(vocab['poss'])): ind = [i for i,x in enumerate(vocab['poss']) if pos == x] print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) ind = range(len(vocab['words'])); pos = 'all'; print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) if eval_file is not None: utils.save_variables(eval_file, [details, agg, vocab, imdb], ['details', 'agg', 'vocab', 'imdb'], overwrite = True) return details
mapping = {'NNS': 'NN', 'NNP': 'NN', 'NNPS': 'NN', 'NN': 'NN', \ 'VB': 'VB', 'VBD': 'VB', 'VBN': 'VB', 'VBZ': 'VB', 'VBP': 'VB', 'VBP': 'VB', 'VBG': 'VB', \ 'JJR': 'JJ', 'JJS': 'JJ', 'JJ': 'JJ', 'DT': 'DT', 'PRP': 'PRP', 'PRP$': 'PRP', 'IN': 'IN'}; # punctuations to be removed from the sentences punctuations = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \ ".", "?", "!", ",", ":", "-", "--", "...", ";"] vocab = preprocess.get_vocab(imset, coco_caps, punctuations, mapping); sg_utils.save_variables('vocab_' + imset + '.pkl', \ [vocab[x] for x in vocab.keys()], \ vocab.keys(), \ overwrite = True); ## N_WORDS = 1000; vocab = preprocess.get_vocab_top_k(vocab, N_WORDS) image_ids = coco_caps.getImgIds() counts = preprocess.get_vocab_counts(image_ids, coco_caps, 5, vocab) P = np.zeros((N_WORDS, 1), dtype = np.float); R = np.zeros((N_WORDS, 1), dtype = np.float); for i, w in enumerate(vv['words']): P[i], R[i] = cap_eval_utils.human_agreement(counts[:,i], 5) print w, P[i], R[i] for pos in list(set(vocab['poss'])): ind = [i for i,x in enumerate(vocab['poss']) if pos == x] print "{:5s} [{:3d}] : {:.2f} {:.2f} ".format(pos, len(ind), 100*np.mean(P[ind]), 100*np.mean(R[ind]))
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file=None): # Get ground truth # dt = utils.scio.loadmat(detection_file) dt = utils.load_variables(detection_file) mil_prob = dt['mil_prob'] # Benchmark the output, and return a result struct n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype=np.float) R = np.zeros(mil_prob.shape, dtype=np.float) score = np.zeros(mil_prob.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) human_prec = np.zeros((1, n_words), dtype=np.float) human_rec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec = np.zeros((1, n_words), dtype=np.float) rec_at_human_prec = np.zeros((1, n_words), dtype=np.float) rec_at_half_prec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr( gt_label[:, i], mil_prob[:, i], num_references) human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement( gt_label[:, i], num_references) ind = np.where(R[:, i] >= human_rec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0, i] = P[ind[0], i] ind = np.where(P[:, i] >= human_prec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0, i] = R[ind[-1], i] ind = np.where(P[:, i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0, i] = R[ind[-1], i] # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec} # Collect statistics over the POS agg = [] for pos in list(set(vocab['poss'])): ind = [i for i, x in enumerate(vocab['poss']) if pos == x] print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) ind = range(len(vocab['words'])) pos = 'all' print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) if eval_file is not None: try: utils.save_variables(eval_file, [details, agg], ['details', 'agg'], overwrite=True) except: print 'Error trying to save to pickle, will try hdf5 hack' namespace = globals().copy() namespace.update(locals()) code.interact(local=namespace) eval_file_details = eval_file.replace('.pkl', '_details.h5') eval_file_details_keys = details.keys() eval_file_details_vals = [ details[x] for x in eval_file_details_keys ] utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True) eval_file_agg = eval_file.replace('.pkl', '_agg.h5') eval_file_agg_keys = agg.keys() eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys] utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True) return details
'VB': 'VB', 'VBD': 'VB', 'VBN': 'VB', 'VBZ': 'VB', 'VBP': 'VB', 'VBP': 'VB', 'VBG': 'VB', \ 'JJR': 'JJ', 'JJS': 'JJ', 'JJ': 'JJ', 'DT': 'DT', 'PRP': 'PRP', 'PRP$': 'PRP', 'IN': 'IN'} # punctuations to be removed from the sentences punctuations = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \ ".", "?", "!", ",", ":", "-", "--", "...", ";"] vocab = preprocess.get_vocab(imset, coco_caps, punctuations, mapping) sg_utils.save_variables('vocab_' + imset + '.pkl', \ [vocab[x] for x in vocab.keys()], \ vocab.keys(), \ overwrite = True) ## N_WORDS = 1000 vocab = preprocess.get_vocab_top_k(vocab, N_WORDS) image_ids = coco_caps.getImgIds() counts = preprocess.get_vocab_counts(image_ids, coco_caps, 5, vocab) P = np.zeros((N_WORDS, 1), dtype=np.float) R = np.zeros((N_WORDS, 1), dtype=np.float) for i, w in enumerate(vv['words']): P[i], R[i] = cap_eval_utils.human_agreement(counts[:, i], 5) print w, P[i], R[i] for pos in list(set(vocab['poss'])): ind = [i for i, x in enumerate(vocab['poss']) if pos == x] print "{:5s} [{:3d}] : {:.2f} {:.2f} ".format(pos, len(ind), 100 * np.mean(P[ind]), 100 * np.mean(R[ind]))