Exemple #1
def det_label_init():
    # Load the vocabulary
    vocab_file = 'vocabs/vocab_train.pkl'
    vocab = utils.load_variables(vocab_file)

    # Set up Caffe

    # Load the model
    mean = np.array([[[103.939, 116.779, 123.68]]]);
    base_image_size = 565;
    prototxt_deploy = 'visual_concepts/code/output/vgg/mil_finetune.prototxt.deploy'
    model_file = 'visual_concepts/code/output/vgg/snapshot_iter_240000.caffemodel'
    model = test_model.load_model(prototxt_deploy, model_file, base_image_size, mean, vocab)
    # define functional words
    functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']
    is_functional = np.array([x not in functional_words for x in vocab['words']])

    # load the score precision mapping file
    eval_file = visual_concepts/code/code/output/vgg/snapshot_iter_240000.caffemodel_output/coco_valid1_eval.pkl'
    pt = utils.load_variables(eval_file);

    # Set threshold_metric_name and output_metric_name
    threshold_metric_name = 'prec';
    output_metric_name = 'prec';
    return model,functional_words,threshold_metric_name,output_metric_name,vocab,is_functional,pt
def get_model_vocab(solverProtoKey):
    vocabName, vocabKey = get_model_vocab_filename(solverProtoKey)
    dt = sg_utils.load_variables(vocabName)
    if 'vocab' in dt:
        return dt['vocab']
        return dt
def get_model_vocab(solverProtoKey):
  vocabName, vocabKey = get_model_vocab_filename(solverProtoKey);
  dt = sg_utils.load_variables(vocabName);
  if 'vocab' in dt:
    return dt['vocab'];
    return dt;
def evalModelBatchNoRef(imdb, model, gtLabel, \
  numReferencesToEval, detectionFile, evalFile, evalNoiseKey=None):
    N_WORDS = len(model['vocab']['words'])
    vocab = model['vocab']
    imBnames, imIds = imdb.get_all_image_bnames_ids()
    gtLabel = np.array(gtLabel > 0, dtype=np.float32)

    dt = sg_utils.load_variables(detectionFile)
    mil_prob = dt['mil_prob']

    if evalNoiseKey is not None:
        mil_prob = dt[evalNoiseKey]
        evalNoiseFile = evalFile.replace('.h5', '_noise.h5')
        if not lock_utils.is_locked(evalNoiseFile):
def output_words(imdb, detection_file, eval_file, vocab, \
  threshold_metric_name, output_metric_name, threshold, min_words, output_file = None, \
  functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']):
  Output the words as generated by the model. Loads the detections from
  detection_file, score precision mapping from eval_file and output the words
  in output_file. Words in the output_file are sorted according to the
  threshold_metric_name and report the output_metric_name.
    dt = utils.load_variables(detection_file)
    pt = utils.load_variables(eval_file)

    is_functional = np.array(
        [x not in functional_words for x in vocab['words']])
    prec = np.zeros(dt['mil_prob'].shape)
    for jj in xrange(prec.shape[1]):
        prec[:,jj] = cap_eval_utils.compute_precision_score_mapping(\
          pt['details']['score'][:,jj].copy(), \
          pt['details']['precision'][:,jj].copy(), \
            1, 'compute precision score mapping: {:4d} / {:4d}'.format(
                jj, prec.shape[1]))
    dt['prec'] = prec

    out_all = []
    for i in xrange(imdb.num_images):
        out = output_words_image(dt[threshold_metric_name][i,:], dt[output_metric_name][i,:], \
          min_words, threshold, vocab, is_functional)
            1, 'output words image: {:4d} / {:4d}'.format(i, imdb.num_images))

    if output_file is not None:
        with open(output_file, 'wt') as f:
            f.write('detection file %s\n' % (detection_file))
            f.write('eval file %s\n' % (eval_file))
            f.write('threshold %.2f; min_words %d\n' % (threshold, min_words))
            for i in xrange(imdb.num_images):
                f.write('{:d}: '.format(imdb.image_index[i]))
                out = out_all[i]
                for j in xrange(len(out)):
                    f.write('{:s} ({:.2f}), '.format(out[j][0], out[j][1]))
Exemple #7
def evalModelBatch(imdb, model, gtLabel, numReferencesToEval,\
     detectionFile, evalFile, evalNoiseKey=None):
  N_WORDS = len(model['vocab']['words'])
  vocab = model['vocab']
  imBnames, imIds = imdb.get_all_image_bnames_ids();
  dt = sg_utils.load_variables(detectionFile)
  mil_prob = dt['mil_prob'];

  tm.benchmark_ap(vocab, gtLabel, numReferencesToEval, mil_prob, eval_file = evalFile)
  if evalNoiseKey is not None:
    mil_prob = dt[evalNoiseKey];
    evalNoiseFile = evalFile.replace('.h5','_noise.h5');
    if not lock_utils.isLocked(evalNoiseFile):
      tm.benchmark_ap(vocab, gtLabel, numReferencesToEval, mil_prob, eval_file = evalNoiseFile)
def print_benchmark_latex(evalFile, vocab = None, sortBy = "words", \
  printWords = False, printPos = True, printAgg = False, possOrder=None):
    #evalFile has the following ['details', 'agg', 'vocab', 'imdb']
    evalData = sg_utils.load_variables(evalFile)
    if vocab == None:
        vocab = evalData['vocab']
    if 'details' in evalData:
        details = evalData['details']
        details = evalData
    ap = details['ap']
    prec_at_human_rec = details['prec_at_human_rec']
    human_prec = details['prec_at_human_rec']
    words = vocab['words']
    ind = 0
    if possOrder is None:
        possOrder = ['NN', 'VB', 'JJ', 'DT', 'PRP', 'IN', 'other']
    print ' '.join(possOrder)
    for pos in possOrder:
        ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
        ind = np.asarray(ind, dtype=np.int32)
        if any(np.isnan(ap[0, ind])):
            #print 'nan numbers ... skipping them for mean'
            print 'nan numbers ... setting them to zero for mean stats'
            ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0
        print '%.1f &' % (100 * np.mean(ap[0, ind])),
    print '%.1f & &' % (100 * np.mean(ap[0, :]))
    for pos in possOrder:
        ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
        ind = np.asarray(ind, dtype=np.int32)
        if any( np.isnan(prec_at_human_rec[0,ind] )) or \
           any( np.isnan(human_prec[0,ind] )) :
            #print 'nan numbers ... skipping them for mean'
            print 'nan numbers ... setting them to zero for mean stats'
                0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0
            human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0
        print '%.1f &' % (100 * np.mean(prec_at_human_rec[0, ind])),
    print '%.1f \\\\' % (100 * np.mean(prec_at_human_rec[0, :]))
def benchmark(imdb,
    # Get ground truth
    # dt = utils.scio.loadmat(detection_file)
    dt = utils.load_variables(detection_file)
    mil_prob = dt['mil_prob']

    # Benchmark the output, and return a result struct
    n_words = len(vocab['words'])
    P = np.zeros(mil_prob.shape, dtype=np.float)
    R = np.zeros(mil_prob.shape, dtype=np.float)
    score = np.zeros(mil_prob.shape, dtype=np.float)
    ap = np.zeros((1, n_words), dtype=np.float)

    human_prec = np.zeros((1, n_words), dtype=np.float)
    human_rec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec = np.zeros((1, n_words), dtype=np.float)
    rec_at_human_prec = np.zeros((1, n_words), dtype=np.float)
    rec_at_half_prec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec[...] = np.nan

    for i in range(len(vocab['words'])):
                            'benchmarking : {:4d} / {:4d}'.format(i, n_words))
        P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr(
            gt_label[:, i], mil_prob[:, i], num_references)
        human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement(
            gt_label[:, i], num_references)

        ind = np.where(R[:, i] >= human_rec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            prec_at_human_rec[0, i] = P[ind[0], i]

        ind = np.where(P[:, i] >= human_prec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_human_prec[0, i] = R[ind[-1], i]

        ind = np.where(P[:, i] >= 0.5)[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_half_prec[0, i] = R[ind[-1], i]
        # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100)

    details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
      'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
      'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}

    # Collect statistics over the POS
    agg = []
    for pos in list(set(vocab['poss'])):
        ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
        print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
          format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
            100*np.mean(human_prec[0, ind]))
        agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
          'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
          'human_prec': 100*np.mean(human_prec[0, ind])})

    ind = range(len(vocab['words']))
    pos = 'all'
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})

    if eval_file is not None:
            utils.save_variables(eval_file, [details, agg], ['details', 'agg'],
            print 'Error trying to save to pickle, will try hdf5 hack'
            namespace = globals().copy()

            eval_file_details = eval_file.replace('.pkl', '_details.h5')
            eval_file_details_keys = details.keys()
            eval_file_details_vals = [
                details[x] for x in eval_file_details_keys

            eval_file_agg = eval_file.replace('.pkl', '_agg.h5')
            eval_file_agg_keys = agg.keys()
            eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys]

    return details
def print_benchmark_plain(evalFile, vocab = None, \
  sortBy = "words", printWords = False, printPos = True, printAgg = False):
    #evalFile has the following ['details', 'agg', 'vocab', 'imdb']
    evalData = sg_utils.load_variables(evalFile)
    if vocab == None:
        vocab = evalData['vocab']
    if 'details' in evalData:
        details = evalData['details']
        details = evalData
    ap = details['ap']
    prec_at_human_rec = details['prec_at_human_rec']
    human_prec = details['prec_at_human_rec']
    words = vocab['words']
    ind = 0

    if sortBy == "words":
        srtInds = np.argsort(words)
    elif sortBy == "ap":
        srtInds = np.argsort(ap)
        srtInds = srtInds[0]
        srtInds = srtInds[::-1]
    if printWords == True:
        print "{:>50s}".format("-" * 50)
        print "{:^50s}".format("Word metrics")
        print "{:>50s}".format("-" * 50)
        print "{:>15s} {:>8s} {:>6s} :     {:^5s}     {:^5s}". \
          format("Words","POS","Counts","mAP", "p@H")
        for i in srtInds:
            print "{:>15s} {:>8s} {:6d} :     {:5.2f}     {:5.2f}". \
              format(words[i], vocab['poss'][i], vocab['counts'][i], 100*np.mean(ap[0, i]), 100*np.mean(prec_at_human_rec[0, i]))

    if printPos:
        print "{:>50s}".format("-" * 50)
        print "{:^50s}".format("POS metrics")
        print "{:>50s}".format("-" * 50)
        print "{:>15s} :     {:^5s}     {:^5s}     {:^5s}". \
        format("POS", "mAP", "p@H", "h")

        for pos in list(set(vocab['poss'])):
            ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
            ind = np.asarray(ind)
            if any( np.isnan(ap[0,ind] )) or \
               any( np.isnan(prec_at_human_rec[0,ind] )) or \
               any( np.isnan(human_prec[0,ind] )) :
                print 'nan numbers ... setting them to zero for mean stats'
                ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0
                    0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0
                human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0
            print "{:>11s} [{:4d}]:     {:5.2f}     {:5.2f}     {:5.2f}". \
              format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
              100*np.mean(human_prec[0, ind]))

    if printAgg:
        print "{:>50s}".format("-" * 50)
        print "{:^50s}".format("Agg metrics")
        print "{:>50s}".format("-" * 50)
        print "{:>15s} :     {:^5s}     {:^5s}     {:^5s}". \
          format("agg", "mAP", "p@H", "h")
        pos = 'all'
        ind = srtInds
        ind = np.asarray(ind)
        if any( np.isnan(ap[0,ind] )) or \
             any( np.isnan(prec_at_human_rec[0,ind] )) or \
             any( np.isnan(human_prec[0,ind] )) :
            print 'nan numbers ... setting them to zero for mean stats'
            ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0
                0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0
            human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0
        print "{:>11s} [{:^4d}]     :     {:^5.2f}     {:5.2f}     {:5.2f}". \
          format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
            100*np.mean(human_prec[0, ind]))
Exemple #12
Exemple #14
def encode_captions(imgs, params, wtoi,model,functional_words,threshold_metric_name,output_metric_name,vocab,is_functional,pt):
    encode all captions into one large array, which will be 1-indexed.
    also produces label_start_ix and label_end_ix which store 1-indexed
    and inclusive (Lua-style) pointers to the first and last caption for
    each image in the dataset.
    import sg_utils as utils
    vocab = utils.load_variables('visual_concepts/code/vocabs/vocab_train.pkl')
    max_length = params['max_length']
    N = len(imgs)
    M = sum(len(img['final_captions']) for img in imgs)  # total number of captions
    counts = np.zeros((len(imgs), len(vocab['words'])), dtype=np.float)
    label_attributes = []
    label_attributes_prob = []
    label_arrays = []
    label_semantic = []
    label_start_ix = np.zeros(N, dtype='uint32')  # note: these will be one-indexed
    label_end_ix = np.zeros(N, dtype='uint32')
    label_length = np.zeros(M, dtype='uint32')
    caption_counter = 0
    counter = 1
    image_files = []

    for i, img in enumerate(imgs):
        n = len(img['final_captions'])
        assert n > 0, 'error: some image has no captions'

        Li = np.zeros((n, max_length), dtype='uint32')
        semant_label = np.zeros(len(vocab['words']), dtype='uint32')
        for j, s in enumerate(img['final_captions']):
            label_length[caption_counter] = min(max_length, len(s))  # record the length of this sequence
            caption_counter += 1
            for k, w in enumerate(s):
                if k < max_length:
                    Li[j, k] = wtoi[w]
            pos = [vocab['words'].index(tmp_j_k) for tmp_j_k in s if tmp_j_k in vocab['words']]
            pos = list(set(pos))
            counts[i, pos] = counts[i, pos] + 1

        sort_counts=np.argsort(counts[i], axis=0)[::-1]
        import numpy
        sort_key = sorted(counts[i], reverse=True)
        for m in range(len(sort_key)):
            if sort_key[m] >0 :
                semant_label[m] = wtoi[vocab['words'][sort_counts[m]]]


        label_start_ix[i] = counter
        label_end_ix[i] = counter + n - 1

        counter += n

    #L_semantic = np.concatenate(label_semantic, axis=0)  # put all the labels together
    L = np.concatenate(label_arrays, axis=0)  # put all the labels together
    assert L.shape[0] == M, 'lengths don\'t match? that\'s weird'
    assert np.all(label_length > 0), 'error: some caption had no words?'

    print 'encoded captions to array of size ', `L.shape`
    return L, label_start_ix, label_end_ix, label_length, label_semantic, label_attributes, label_attributes_prob
Exemple #15
 def __init__(self, vocab_file):
     # Set threshold_metric_name and output_metric_name
     self.base_image_size = 565
     self.vocab = utils.load_variables(vocab_file)
     self.is_functional = np.array([x not in functional_words for x in self.vocab['words']])
     self.threshold = 0.5
Exemple #16
Exemple #17
  print('Called with args:')

  # if not args.randomize:
  #   # fix the random seeds (numpy and caffe) for reproducibility
  #   np.random.seed(cfg.RNG_SEED)
  #   caffe.set_random_seed(cfg.RNG_SEED)

  # # set up caffe
  if args.gpu_id is not None:
  # Load the vocabulary
  vocab = utils.load_variables(args.vocab_file)
  if args.task == 'compute_targets':
    imdb = []
    output_dir = args.train_dir
    sets = ['train', 'val']
    for i, imset in enumerate([args.train_set, args.val_set]):
      print 'Loaded dataset {:s}'.format(imdb[i].name)
      # Compute targets for the file
      counts = preprocess.get_vocab_counts(imdb[i].image_index, \
          imdb[i].coco_caption_data, 5, vocab)
      if args.write_labels:
Exemple #18
    if len(sys.argv) == 1:

    args = parser.parse_args()
    return args

if __name__ == '__main__':
    args = parse_args()

    print('Called with args:')

    imdb = coco_voc.coco_voc('test')
    vocab = utils.load_variables(args.vocab_file)
    gt_label = preprocess.get_vocab_counts(
    det_file = args.det_file
    det_dir = os.path.dirname(det_file) # get root dir of det_file

    eval_file = os.path.join(det_dir, imdb.name + '_eval.pkl')
    benchmark(imdb, vocab, gt_label, 5, det_file, eval_file=eval_file)

    map_file = args.map_file
    gt_label_det = preprocess.get_vocab_counts_det(
Exemple #19
# In[1]:

import _init_paths
import caffe, test_model, cap_eval_utils, sg_utils as utils
import cv2, numpy as np
# import matplotlib
# import matplotlib.pyplot as plt

# In[2]:

# Load the vocabulary
vocab_file = 'vocabs/vocab_train.pkl'
vocab = utils.load_variables(vocab_file)

# Set up Caffe

# Load the model
mean = np.array([[[ 103.939, 116.779, 123.68]]]);
base_image_size = 565;    
prototxt_deploy = 'output/vgg/mil_finetune.prototxt.deploy'
model_file = 'output/vgg/snapshot_iter_240000.caffemodel'
model = test_model.load_model(prototxt_deploy, model_file, base_image_size, mean, vocab)

# In[3]: