def det_label_init(): # Load the vocabulary vocab_file = 'vocabs/vocab_train.pkl' vocab = utils.load_variables(vocab_file) # Set up Caffe caffe.set_mode_gpu() caffe.set_device(0) # Load the model mean = np.array([[[103.939, 116.779, 123.68]]]); base_image_size = 565; prototxt_deploy = 'visual_concepts/code/output/vgg/mil_finetune.prototxt.deploy' model_file = 'visual_concepts/code/output/vgg/snapshot_iter_240000.caffemodel' model = test_model.load_model(prototxt_deploy, model_file, base_image_size, mean, vocab) # define functional words functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are'] is_functional = np.array([x not in functional_words for x in vocab['words']]) # load the score precision mapping file eval_file = visual_concepts/code/code/output/vgg/snapshot_iter_240000.caffemodel_output/coco_valid1_eval.pkl' pt = utils.load_variables(eval_file); # Set threshold_metric_name and output_metric_name threshold_metric_name = 'prec'; output_metric_name = 'prec'; return model,functional_words,threshold_metric_name,output_metric_name,vocab,is_functional,pt
def get_model_vocab(solverProtoKey): vocabName, vocabKey = get_model_vocab_filename(solverProtoKey) dt = sg_utils.load_variables(vocabName) if 'vocab' in dt: return dt['vocab'] else: return dt
def get_model_vocab(solverProtoKey): vocabName, vocabKey = get_model_vocab_filename(solverProtoKey); dt = sg_utils.load_variables(vocabName); if 'vocab' in dt: return dt['vocab']; else: return dt;
def evalModelBatchNoRef(imdb, model, gtLabel, \ numReferencesToEval, detectionFile, evalFile, evalNoiseKey=None): N_WORDS = len(model['vocab']['words']) vocab = model['vocab'] imBnames, imIds = imdb.get_all_image_bnames_ids() gtLabel = np.array(gtLabel > 0, dtype=np.float32) dt = sg_utils.load_variables(detectionFile) mil_prob = dt['mil_prob'] tm.benchmark_only_ap(vocab, gtLabel, numReferencesToEval, mil_prob, eval_file=evalFile, noref=True) if evalNoiseKey is not None: mil_prob = dt[evalNoiseKey] evalNoiseFile = evalFile.replace('.h5', '_noise.h5') if not lock_utils.is_locked(evalNoiseFile): tm.benchmark_only_ap(vocab, gtLabel, numReferencesToEval, mil_prob, eval_file=evalNoiseFile, noref=True) lock_utils.unlock(evalNoiseFile)
def output_words(imdb, detection_file, eval_file, vocab, \ threshold_metric_name, output_metric_name, threshold, min_words, output_file = None, \ functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']): """ Output the words as generated by the model. Loads the detections from detection_file, score precision mapping from eval_file and output the words in output_file. Words in the output_file are sorted according to the threshold_metric_name and report the output_metric_name. """ dt = utils.load_variables(detection_file) pt = utils.load_variables(eval_file) is_functional = np.array( [x not in functional_words for x in vocab['words']]) prec = np.zeros(dt['mil_prob'].shape) for jj in xrange(prec.shape[1]): prec[:,jj] = cap_eval_utils.compute_precision_score_mapping(\ pt['details']['score'][:,jj].copy(), \ pt['details']['precision'][:,jj].copy(), \ dt['mil_prob'][:,jj]) utils.tic_toc_print( 1, 'compute precision score mapping: {:4d} / {:4d}'.format( jj, prec.shape[1])) dt['prec'] = prec out_all = [] for i in xrange(imdb.num_images): out = output_words_image(dt[threshold_metric_name][i,:], dt[output_metric_name][i,:], \ min_words, threshold, vocab, is_functional) out_all.append(out) utils.tic_toc_print( 1, 'output words image: {:4d} / {:4d}'.format(i, imdb.num_images)) if output_file is not None: with open(output_file, 'wt') as f: f.write('detection file %s\n' % (detection_file)) f.write('eval file %s\n' % (eval_file)) f.write('threshold %.2f; min_words %d\n' % (threshold, min_words)) for i in xrange(imdb.num_images): f.write('{:d}: '.format(imdb.image_index[i])) out = out_all[i] for j in xrange(len(out)): f.write('{:s} ({:.2f}), '.format(out[j][0], out[j][1])) f.write('\n')
def output_words(imdb, detection_file, eval_file, vocab, \ threshold_metric_name, output_metric_name, threshold, min_words, output_file = None, \ functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']): """ Output the words as generated by the model. Loads the detections from detection_file, score precision mapping from eval_file and output the words in output_file. Words in the output_file are sorted according to the threshold_metric_name and report the output_metric_name. """ dt = utils.load_variables(detection_file); pt = utils.load_variables(eval_file); is_functional = np.array([x not in functional_words for x in vocab['words']]); prec = np.zeros(dt['mil_prob'].shape) for jj in xrange(prec.shape[1]): prec[:,jj] = cap_eval_utils.compute_precision_score_mapping(\ pt['details']['score'][:,jj].copy(), \ pt['details']['precision'][:,jj].copy(), \ dt['mil_prob'][:,jj]); utils.tic_toc_print(1, 'compute precision score mapping: {:4d} / {:4d}'.format(jj, prec.shape[1])) dt['prec'] = prec; out_all = [] for i in xrange(imdb.num_images): out = output_words_image(dt[threshold_metric_name][i,:], dt[output_metric_name][i,:], \ min_words, threshold, vocab, is_functional) out_all.append(out) utils.tic_toc_print(1, 'output words image: {:4d} / {:4d}'.format(i, imdb.num_images)) if output_file is not None: with open(output_file, 'wt') as f: f.write('detection file %s\n'%(detection_file)); f.write('eval file %s\n'%(eval_file)); f.write('threshold %.2f; min_words %d\n'%(threshold, min_words)); for i in xrange(imdb.num_images): f.write('{:d}: '.format(imdb.image_index[i])) out = out_all[i] for j in xrange(len(out)): f.write('{:s} ({:.2f}), '.format(out[j][0], out[j][1])) f.write('\n')
def evalModelBatch(imdb, model, gtLabel, numReferencesToEval,\ detectionFile, evalFile, evalNoiseKey=None): N_WORDS = len(model['vocab']['words']) vocab = model['vocab'] imBnames, imIds = imdb.get_all_image_bnames_ids(); dt = sg_utils.load_variables(detectionFile) mil_prob = dt['mil_prob']; tm.benchmark_ap(vocab, gtLabel, numReferencesToEval, mil_prob, eval_file = evalFile) if evalNoiseKey is not None: mil_prob = dt[evalNoiseKey]; evalNoiseFile = evalFile.replace('.h5','_noise.h5'); if not lock_utils.isLocked(evalNoiseFile): tm.benchmark_ap(vocab, gtLabel, numReferencesToEval, mil_prob, eval_file = evalNoiseFile) lock_utils.unlock(evalNoiseFile);
def print_benchmark_latex(evalFile, vocab = None, sortBy = "words", \ printWords = False, printPos = True, printAgg = False, possOrder=None): #evalFile has the following ['details', 'agg', 'vocab', 'imdb'] evalData = sg_utils.load_variables(evalFile) if vocab == None: vocab = evalData['vocab'] if 'details' in evalData: details = evalData['details'] else: details = evalData ap = details['ap'] prec_at_human_rec = details['prec_at_human_rec'] human_prec = details['prec_at_human_rec'] words = vocab['words'] ind = 0 if possOrder is None: possOrder = ['NN', 'VB', 'JJ', 'DT', 'PRP', 'IN', 'other'] print ' '.join(possOrder) for pos in possOrder: ind = [i for i, x in enumerate(vocab['poss']) if pos == x] ind = np.asarray(ind, dtype=np.int32) if any(np.isnan(ap[0, ind])): #print 'nan numbers ... skipping them for mean' print 'nan numbers ... setting them to zero for mean stats' ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0 print '%.1f &' % (100 * np.mean(ap[0, ind])), print '%.1f & &' % (100 * np.mean(ap[0, :])) for pos in possOrder: ind = [i for i, x in enumerate(vocab['poss']) if pos == x] ind = np.asarray(ind, dtype=np.int32) if any( np.isnan(prec_at_human_rec[0,ind] )) or \ any( np.isnan(human_prec[0,ind] )) : #print 'nan numbers ... skipping them for mean' print 'nan numbers ... setting them to zero for mean stats' prec_at_human_rec[ 0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0 human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0 print '%.1f &' % (100 * np.mean(prec_at_human_rec[0, ind])), print '%.1f \\\\' % (100 * np.mean(prec_at_human_rec[0, :]))
def print_benchmark_latex(evalFile, vocab = None, sortBy = "words", \ printWords = False, printPos = True, printAgg = False, possOrder=None): #evalFile has the following ['details', 'agg', 'vocab', 'imdb'] evalData = sg_utils.load_variables(evalFile); if vocab==None: vocab = evalData['vocab']; if 'details' in evalData: details = evalData['details']; else: details = evalData; ap = details['ap']; prec_at_human_rec = details['prec_at_human_rec']; human_prec = details['prec_at_human_rec']; words = vocab['words']; ind = 0; if possOrder is None: possOrder = ['NN', 'VB', 'JJ', 'DT', 'PRP', 'IN', 'other'] print ' '.join(possOrder); for pos in possOrder: ind = [i for i,x in enumerate(vocab['poss']) if pos == x] ind = np.asarray(ind,dtype=np.int32) if any( np.isnan(ap[0,ind] )): #print 'nan numbers ... skipping them for mean' print 'nan numbers ... setting them to zero for mean stats' ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0; print '%.1f &'%(100*np.mean(ap[0,ind])), print '%.1f & &'%(100*np.mean(ap[0, :])) for pos in possOrder: ind = [i for i,x in enumerate(vocab['poss']) if pos == x] ind = np.asarray(ind,dtype=np.int32) if any( np.isnan(prec_at_human_rec[0,ind] )) or \ any( np.isnan(human_prec[0,ind] )) : #print 'nan numbers ... skipping them for mean' print 'nan numbers ... setting them to zero for mean stats' prec_at_human_rec[0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0; human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0; print '%.1f &'%(100*np.mean(prec_at_human_rec[0,ind])), print '%.1f \\\\'%(100*np.mean(prec_at_human_rec[0, :]))
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file=None): # Get ground truth # dt = utils.scio.loadmat(detection_file) dt = utils.load_variables(detection_file) mil_prob = dt['mil_prob'] # Benchmark the output, and return a result struct n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype=np.float) R = np.zeros(mil_prob.shape, dtype=np.float) score = np.zeros(mil_prob.shape, dtype=np.float) ap = np.zeros((1, n_words), dtype=np.float) human_prec = np.zeros((1, n_words), dtype=np.float) human_rec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec = np.zeros((1, n_words), dtype=np.float) rec_at_human_prec = np.zeros((1, n_words), dtype=np.float) rec_at_half_prec = np.zeros((1, n_words), dtype=np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr( gt_label[:, i], mil_prob[:, i], num_references) human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement( gt_label[:, i], num_references) ind = np.where(R[:, i] >= human_rec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0, i] = P[ind[0], i] ind = np.where(P[:, i] >= human_prec[0, i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0, i] = R[ind[-1], i] ind = np.where(P[:, i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0, i] = R[ind[-1], i] # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec} # Collect statistics over the POS agg = [] for pos in list(set(vocab['poss'])): ind = [i for i, x in enumerate(vocab['poss']) if pos == x] print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) ind = range(len(vocab['words'])) pos = 'all' print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) if eval_file is not None: try: utils.save_variables(eval_file, [details, agg], ['details', 'agg'], overwrite=True) except: print 'Error trying to save to pickle, will try hdf5 hack' namespace = globals().copy() namespace.update(locals()) code.interact(local=namespace) eval_file_details = eval_file.replace('.pkl', '_details.h5') eval_file_details_keys = details.keys() eval_file_details_vals = [ details[x] for x in eval_file_details_keys ] utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True) eval_file_agg = eval_file.replace('.pkl', '_agg.h5') eval_file_agg_keys = agg.keys() eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys] utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True) return details
def print_benchmark_plain(evalFile, vocab = None, \ sortBy = "words", printWords = False, printPos = True, printAgg = False): #evalFile has the following ['details', 'agg', 'vocab', 'imdb'] evalData = sg_utils.load_variables(evalFile) if vocab == None: vocab = evalData['vocab'] if 'details' in evalData: details = evalData['details'] else: details = evalData ap = details['ap'] prec_at_human_rec = details['prec_at_human_rec'] human_prec = details['prec_at_human_rec'] words = vocab['words'] ind = 0 if sortBy == "words": srtInds = np.argsort(words) elif sortBy == "ap": srtInds = np.argsort(ap) srtInds = srtInds[0] srtInds = srtInds[::-1] if printWords == True: print "{:>50s}".format("-" * 50) print "{:^50s}".format("Word metrics") print "{:>50s}".format("-" * 50) print "{:>15s} {:>8s} {:>6s} : {:^5s} {:^5s}". \ format("Words","POS","Counts","mAP", "p@H") for i in srtInds: print "{:>15s} {:>8s} {:6d} : {:5.2f} {:5.2f}". \ format(words[i], vocab['poss'][i], vocab['counts'][i], 100*np.mean(ap[0, i]), 100*np.mean(prec_at_human_rec[0, i])) if printPos: print "{:>50s}".format("-" * 50) print "{:^50s}".format("POS metrics") print "{:>50s}".format("-" * 50) print "{:>15s} : {:^5s} {:^5s} {:^5s}". \ format("POS", "mAP", "p@H", "h") for pos in list(set(vocab['poss'])): ind = [i for i, x in enumerate(vocab['poss']) if pos == x] ind = np.asarray(ind) if any( np.isnan(ap[0,ind] )) or \ any( np.isnan(prec_at_human_rec[0,ind] )) or \ any( np.isnan(human_prec[0,ind] )) : print 'nan numbers ... setting them to zero for mean stats' ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0 prec_at_human_rec[ 0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0 human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0 print "{:>11s} [{:4d}]: {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) if printAgg: print "{:>50s}".format("-" * 50) print "{:^50s}".format("Agg metrics") print "{:>50s}".format("-" * 50) print "{:>15s} : {:^5s} {:^5s} {:^5s}". \ format("agg", "mAP", "p@H", "h") pos = 'all' ind = srtInds ind = np.asarray(ind) if any( np.isnan(ap[0,ind] )) or \ any( np.isnan(prec_at_human_rec[0,ind] )) or \ any( np.isnan(human_prec[0,ind] )) : print 'nan numbers ... setting them to zero for mean stats' ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0 prec_at_human_rec[ 0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0 human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0 print "{:>11s} [{:^4d}] : {:^5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind]))
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None): # Get ground truth # dt = utils.scio.loadmat(detection_file) dt = utils.load_variables(detection_file) mil_prob = dt['mil_prob']; # Benchmark the output, and return a result struct n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references) ind = np.where(R[:,i] >= human_rec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0,i] = P[ind[0], i]; ind = np.where(P[:,i] >= human_prec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0,i] = R[ind[-1], i]; ind = np.where(P[:,i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0,i] = R[ind[-1], i]; # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; # Collect statistics over the POS agg = []; for pos in list(set(vocab['poss'])): ind = [i for i,x in enumerate(vocab['poss']) if pos == x] print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) ind = range(len(vocab['words'])); pos = 'all'; print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) if eval_file is not None: try: utils.save_variables(eval_file, [details, agg], ['details', 'agg'], overwrite = True) except: print 'Error trying to save to pickle, will try hdf5 hack'; namespace = globals().copy() namespace.update(locals()) code.interact(local=namespace) eval_file_details = eval_file.replace('.pkl','_details.h5'); eval_file_details_keys = details.keys(); eval_file_details_vals = [details[x] for x in eval_file_details_keys]; utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True); eval_file_agg = eval_file.replace('.pkl','_agg.h5'); eval_file_agg_keys = agg.keys(); eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys]; utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True); return details
def print_benchmark_plain(evalFile, vocab = None, \ sortBy = "words", printWords = False, printPos = True, printAgg = False): #evalFile has the following ['details', 'agg', 'vocab', 'imdb'] evalData = sg_utils.load_variables(evalFile); if vocab==None: vocab = evalData['vocab']; if 'details' in evalData: details = evalData['details']; else: details = evalData; ap = details['ap']; prec_at_human_rec = details['prec_at_human_rec']; human_prec = details['prec_at_human_rec']; words = vocab['words']; ind = 0; if sortBy == "words": srtInds = np.argsort(words); elif sortBy == "ap": srtInds = np.argsort(ap); srtInds = srtInds[0]; srtInds = srtInds[::-1]; if printWords == True: print "{:>50s}".format("-"*50) print "{:^50s}".format("Word metrics") print "{:>50s}".format("-"*50) print "{:>15s} {:>8s} {:>6s} : {:^5s} {:^5s}". \ format("Words","POS","Counts","mAP", "p@H") for i in srtInds: print "{:>15s} {:>8s} {:6d} : {:5.2f} {:5.2f}". \ format(words[i], vocab['poss'][i], vocab['counts'][i], 100*np.mean(ap[0, i]), 100*np.mean(prec_at_human_rec[0, i])); if printPos: print "{:>50s}".format("-"*50) print "{:^50s}".format("POS metrics") print "{:>50s}".format("-"*50) print "{:>15s} : {:^5s} {:^5s} {:^5s}". \ format("POS", "mAP", "p@H", "h") for pos in list(set(vocab['poss'])): ind = [i for i,x in enumerate(vocab['poss']) if pos == x] ind = np.asarray(ind) if any( np.isnan(ap[0,ind] )) or \ any( np.isnan(prec_at_human_rec[0,ind] )) or \ any( np.isnan(human_prec[0,ind] )) : print 'nan numbers ... setting them to zero for mean stats' ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0; prec_at_human_rec[0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0; human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0; print "{:>11s} [{:4d}]: {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) if printAgg: print "{:>50s}".format("-"*50) print "{:^50s}".format("Agg metrics") print "{:>50s}".format("-"*50) print "{:>15s} : {:^5s} {:^5s} {:^5s}". \ format("agg", "mAP", "p@H", "h") pos = 'all'; ind = srtInds; ind = np.asarray(ind); if any( np.isnan(ap[0,ind] )) or \ any( np.isnan(prec_at_human_rec[0,ind] )) or \ any( np.isnan(human_prec[0,ind] )) : print 'nan numbers ... setting them to zero for mean stats' ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0; prec_at_human_rec[0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0; human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0; print "{:>11s} [{:^4d}] : {:^5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind]))
def encode_captions(imgs, params, wtoi,model,functional_words,threshold_metric_name,output_metric_name,vocab,is_functional,pt): """ encode all captions into one large array, which will be 1-indexed. also produces label_start_ix and label_end_ix which store 1-indexed and inclusive (Lua-style) pointers to the first and last caption for each image in the dataset. """ import sg_utils as utils vocab = utils.load_variables('visual_concepts/code/vocabs/vocab_train.pkl') max_length = params['max_length'] N = len(imgs) M = sum(len(img['final_captions']) for img in imgs) # total number of captions counts = np.zeros((len(imgs), len(vocab['words'])), dtype=np.float) label_attributes = [] label_attributes_prob = [] label_arrays = [] label_semantic = [] label_start_ix = np.zeros(N, dtype='uint32') # note: these will be one-indexed label_end_ix = np.zeros(N, dtype='uint32') label_length = np.zeros(M, dtype='uint32') caption_counter = 0 counter = 1 image_files = [] for i, img in enumerate(imgs): n = len(img['final_captions']) assert n > 0, 'error: some image has no captions' Li = np.zeros((n, max_length), dtype='uint32') semant_label = np.zeros(len(vocab['words']), dtype='uint32') for j, s in enumerate(img['final_captions']): label_length[caption_counter] = min(max_length, len(s)) # record the length of this sequence caption_counter += 1 for k, w in enumerate(s): if k < max_length: Li[j, k] = wtoi[w] pos = [vocab['words'].index(tmp_j_k) for tmp_j_k in s if tmp_j_k in vocab['words']] pos = list(set(pos)) counts[i, pos] = counts[i, pos] + 1 sort_counts=np.argsort(counts[i], axis=0)[::-1] import numpy sort_key = sorted(counts[i], reverse=True) for m in range(len(sort_key)): if sort_key[m] >0 : semant_label[m] = wtoi[vocab['words'][sort_counts[m]]] image_files.append(img['file_path']) label_arrays.append(Li) label_semantic.append(semant_label) label_start_ix[i] = counter label_end_ix[i] = counter + n - 1 counter += n #L_semantic = np.concatenate(label_semantic, axis=0) # put all the labels together L = np.concatenate(label_arrays, axis=0) # put all the labels together assert L.shape[0] == M, 'lengths don\'t match? that\'s weird' assert np.all(label_length > 0), 'error: some caption had no words?' print 'encoded captions to array of size ', `L.shape` return L, label_start_ix, label_end_ix, label_length, label_semantic, label_attributes, label_attributes_prob
def __init__(self, vocab_file): # Set threshold_metric_name and output_metric_name self.base_image_size = 565 self.vocab = utils.load_variables(vocab_file) self.is_functional = np.array([x not in functional_words for x in self.vocab['words']]) self.threshold = 0.5
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None): # Get ground truth # dt = utils.scio.loadmat(detection_file) dt = utils.load_variables(detection_file) mil_prob = dt['mil_prob']; # Benchmark the output, and return a result struct n_words = len(vocab['words']) P = np.zeros(mil_prob.shape, dtype = np.float) R = np.zeros(mil_prob.shape, dtype = np.float) score = np.zeros(mil_prob.shape, dtype = np.float) ap = np.zeros((1, n_words), dtype = np.float) human_prec = np.zeros((1, n_words), dtype = np.float) human_rec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec = np.zeros((1, n_words), dtype = np.float) rec_at_human_prec = np.zeros((1, n_words), dtype = np.float) rec_at_half_prec = np.zeros((1, n_words), dtype = np.float) prec_at_human_rec[...] = np.nan for i in range(len(vocab['words'])): utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words)) P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references) human_prec[0,i], human_rec[0,i] = cap_eval_utils.human_agreement(gt_label[:,i], num_references) ind = np.where(R[:,i] >= human_rec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) prec_at_human_rec[0,i] = P[ind[0], i]; ind = np.where(P[:,i] >= human_prec[0,i])[0] if len(ind) > 0: ind = np.sort(ind) rec_at_human_prec[0,i] = R[ind[-1], i]; ind = np.where(P[:,i] >= 0.5)[0] if len(ind) > 0: ind = np.sort(ind) rec_at_half_prec[0,i] = R[ind[-1], i]; # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \ 'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \ 'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; # Collect statistics over the POS agg = []; for pos in list(set(vocab['poss'])): ind = [i for i,x in enumerate(vocab['poss']) if pos == x] print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) ind = range(len(vocab['words'])); pos = 'all'; print " {:5s} [{:4d}] : {:5.2f} {:5.2f} {:5.2f}". \ format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \ 100*np.mean(human_prec[0, ind])) agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \ 'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \ 'human_prec': 100*np.mean(human_prec[0, ind])}) if eval_file is not None: utils.save_variables(eval_file, [details, agg, vocab, imdb], ['details', 'agg', 'vocab', 'imdb'], overwrite = True) return details
print('Called with args:') print(args) # if not args.randomize: # # fix the random seeds (numpy and caffe) for reproducibility # np.random.seed(cfg.RNG_SEED) # caffe.set_random_seed(cfg.RNG_SEED) # # set up caffe caffe.set_mode_gpu() if args.gpu_id is not None: caffe.set_device(args.gpu_id) # Load the vocabulary vocab = utils.load_variables(args.vocab_file) if args.task == 'compute_targets': imdb = [] output_dir = args.train_dir sets = ['train', 'val'] for i, imset in enumerate([args.train_set, args.val_set]): imdb.append(coco_voc.coco_voc(imset)) print 'Loaded dataset {:s}'.format(imdb[i].name) # Compute targets for the file counts = preprocess.get_vocab_counts(imdb[i].image_index, \ imdb[i].coco_caption_data, 5, vocab) if args.write_labels:
if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() print('Called with args:') print(args) imdb = coco_voc.coco_voc('test') vocab = utils.load_variables(args.vocab_file) gt_label = preprocess.get_vocab_counts( imdb.image_index, imdb.coco_caption_data, 5, vocab ) det_file = args.det_file det_dir = os.path.dirname(det_file) # get root dir of det_file eval_file = os.path.join(det_dir, imdb.name + '_eval.pkl') benchmark(imdb, vocab, gt_label, 5, det_file, eval_file=eval_file) map_file = args.map_file gt_label_det = preprocess.get_vocab_counts_det( imdb.image_index,
# In[1]: import _init_paths import caffe, test_model, cap_eval_utils, sg_utils as utils import cv2, numpy as np # import matplotlib # import matplotlib.pyplot as plt # In[2]: # Load the vocabulary vocab_file = 'vocabs/vocab_train.pkl' vocab = utils.load_variables(vocab_file) # Set up Caffe caffe.set_mode_gpu() caffe.set_device(0) # Load the model mean = np.array([[[ 103.939, 116.779, 123.68]]]); base_image_size = 565; prototxt_deploy = 'output/vgg/mil_finetune.prototxt.deploy' model_file = 'output/vgg/snapshot_iter_240000.caffemodel' model = test_model.load_model(prototxt_deploy, model_file, base_image_size, mean, vocab) # In[3]: