Exemple #1
0
def det_label_init():
    # Load the vocabulary
    vocab_file = 'vocabs/vocab_train.pkl'
    vocab = utils.load_variables(vocab_file)

    # Set up Caffe
    caffe.set_mode_gpu()
    caffe.set_device(0)

    # Load the model
    mean = np.array([[[103.939, 116.779, 123.68]]]);
    base_image_size = 565;
    prototxt_deploy = 'visual_concepts/code/output/vgg/mil_finetune.prototxt.deploy'
    model_file = 'visual_concepts/code/output/vgg/snapshot_iter_240000.caffemodel'
    model = test_model.load_model(prototxt_deploy, model_file, base_image_size, mean, vocab)
    # define functional words
    functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']
    is_functional = np.array([x not in functional_words for x in vocab['words']])

    # load the score precision mapping file
    eval_file = visual_concepts/code/code/output/vgg/snapshot_iter_240000.caffemodel_output/coco_valid1_eval.pkl'
    pt = utils.load_variables(eval_file);

    # Set threshold_metric_name and output_metric_name
    threshold_metric_name = 'prec';
    output_metric_name = 'prec';
    return model,functional_words,threshold_metric_name,output_metric_name,vocab,is_functional,pt
def get_model_vocab(solverProtoKey):
    vocabName, vocabKey = get_model_vocab_filename(solverProtoKey)
    dt = sg_utils.load_variables(vocabName)
    if 'vocab' in dt:
        return dt['vocab']
    else:
        return dt
def get_model_vocab(solverProtoKey):
  vocabName, vocabKey = get_model_vocab_filename(solverProtoKey);
  dt = sg_utils.load_variables(vocabName);
  if 'vocab' in dt:
    return dt['vocab'];
  else:
    return dt;
def evalModelBatchNoRef(imdb, model, gtLabel, \
  numReferencesToEval, detectionFile, evalFile, evalNoiseKey=None):
    N_WORDS = len(model['vocab']['words'])
    vocab = model['vocab']
    imBnames, imIds = imdb.get_all_image_bnames_ids()
    gtLabel = np.array(gtLabel > 0, dtype=np.float32)

    dt = sg_utils.load_variables(detectionFile)
    mil_prob = dt['mil_prob']

    tm.benchmark_only_ap(vocab,
                         gtLabel,
                         numReferencesToEval,
                         mil_prob,
                         eval_file=evalFile,
                         noref=True)
    if evalNoiseKey is not None:
        mil_prob = dt[evalNoiseKey]
        evalNoiseFile = evalFile.replace('.h5', '_noise.h5')
        if not lock_utils.is_locked(evalNoiseFile):
            tm.benchmark_only_ap(vocab,
                                 gtLabel,
                                 numReferencesToEval,
                                 mil_prob,
                                 eval_file=evalNoiseFile,
                                 noref=True)
            lock_utils.unlock(evalNoiseFile)
def output_words(imdb, detection_file, eval_file, vocab, \
  threshold_metric_name, output_metric_name, threshold, min_words, output_file = None, \
  functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']):
    """
  Output the words as generated by the model. Loads the detections from
  detection_file, score precision mapping from eval_file and output the words
  in output_file. Words in the output_file are sorted according to the
  threshold_metric_name and report the output_metric_name.
  """
    dt = utils.load_variables(detection_file)
    pt = utils.load_variables(eval_file)

    is_functional = np.array(
        [x not in functional_words for x in vocab['words']])
    prec = np.zeros(dt['mil_prob'].shape)
    for jj in xrange(prec.shape[1]):
        prec[:,jj] = cap_eval_utils.compute_precision_score_mapping(\
          pt['details']['score'][:,jj].copy(), \
          pt['details']['precision'][:,jj].copy(), \
          dt['mil_prob'][:,jj])
        utils.tic_toc_print(
            1, 'compute precision score mapping: {:4d} / {:4d}'.format(
                jj, prec.shape[1]))
    dt['prec'] = prec

    out_all = []
    for i in xrange(imdb.num_images):
        out = output_words_image(dt[threshold_metric_name][i,:], dt[output_metric_name][i,:], \
          min_words, threshold, vocab, is_functional)
        out_all.append(out)
        utils.tic_toc_print(
            1, 'output words image: {:4d} / {:4d}'.format(i, imdb.num_images))

    if output_file is not None:
        with open(output_file, 'wt') as f:
            f.write('detection file %s\n' % (detection_file))
            f.write('eval file %s\n' % (eval_file))
            f.write('threshold %.2f; min_words %d\n' % (threshold, min_words))
            for i in xrange(imdb.num_images):
                f.write('{:d}: '.format(imdb.image_index[i]))
                out = out_all[i]
                for j in xrange(len(out)):
                    f.write('{:s} ({:.2f}), '.format(out[j][0], out[j][1]))
                f.write('\n')
def output_words(imdb, detection_file, eval_file, vocab, \
  threshold_metric_name, output_metric_name, threshold, min_words, output_file = None, \
  functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']):
  """
  Output the words as generated by the model. Loads the detections from
  detection_file, score precision mapping from eval_file and output the words
  in output_file. Words in the output_file are sorted according to the
  threshold_metric_name and report the output_metric_name.
  """
  dt = utils.load_variables(detection_file);
  pt = utils.load_variables(eval_file);
  
  is_functional = np.array([x not in functional_words for x in vocab['words']]);
  prec = np.zeros(dt['mil_prob'].shape)
  for jj in xrange(prec.shape[1]):
    prec[:,jj] = cap_eval_utils.compute_precision_score_mapping(\
      pt['details']['score'][:,jj].copy(), \
      pt['details']['precision'][:,jj].copy(), \
      dt['mil_prob'][:,jj]);
    utils.tic_toc_print(1, 'compute precision score mapping: {:4d} / {:4d}'.format(jj, prec.shape[1]))
  dt['prec'] = prec;

  out_all = [] 
  for i in xrange(imdb.num_images):
    out = output_words_image(dt[threshold_metric_name][i,:], dt[output_metric_name][i,:], \
      min_words, threshold, vocab, is_functional)
    out_all.append(out)
    utils.tic_toc_print(1, 'output words image: {:4d} / {:4d}'.format(i, imdb.num_images))
     
  if output_file is not None:
    with open(output_file, 'wt') as f:
      f.write('detection file %s\n'%(detection_file));
      f.write('eval file %s\n'%(eval_file));
      f.write('threshold %.2f; min_words %d\n'%(threshold, min_words));
      for i in xrange(imdb.num_images):
        f.write('{:d}: '.format(imdb.image_index[i]))
        out = out_all[i]
        for j in xrange(len(out)):
          f.write('{:s} ({:.2f}), '.format(out[j][0], out[j][1]))
        f.write('\n')
Exemple #7
0
def evalModelBatch(imdb, model, gtLabel, numReferencesToEval,\
     detectionFile, evalFile, evalNoiseKey=None):
  N_WORDS = len(model['vocab']['words'])
  vocab = model['vocab']
  imBnames, imIds = imdb.get_all_image_bnames_ids();
  dt = sg_utils.load_variables(detectionFile)
  mil_prob = dt['mil_prob'];

  tm.benchmark_ap(vocab, gtLabel, numReferencesToEval, mil_prob, eval_file = evalFile)
  if evalNoiseKey is not None:
    mil_prob = dt[evalNoiseKey];
    evalNoiseFile = evalFile.replace('.h5','_noise.h5');
    if not lock_utils.isLocked(evalNoiseFile):
      tm.benchmark_ap(vocab, gtLabel, numReferencesToEval, mil_prob, eval_file = evalNoiseFile)
      lock_utils.unlock(evalNoiseFile);
def print_benchmark_latex(evalFile, vocab = None, sortBy = "words", \
  printWords = False, printPos = True, printAgg = False, possOrder=None):
    #evalFile has the following ['details', 'agg', 'vocab', 'imdb']
    evalData = sg_utils.load_variables(evalFile)
    if vocab == None:
        vocab = evalData['vocab']
    if 'details' in evalData:
        details = evalData['details']
    else:
        details = evalData
    ap = details['ap']
    prec_at_human_rec = details['prec_at_human_rec']
    human_prec = details['prec_at_human_rec']
    words = vocab['words']
    ind = 0
    if possOrder is None:
        possOrder = ['NN', 'VB', 'JJ', 'DT', 'PRP', 'IN', 'other']
    print ' '.join(possOrder)
    for pos in possOrder:
        ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
        ind = np.asarray(ind, dtype=np.int32)
        if any(np.isnan(ap[0, ind])):
            #print 'nan numbers ... skipping them for mean'
            print 'nan numbers ... setting them to zero for mean stats'
            ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0
        print '%.1f &' % (100 * np.mean(ap[0, ind])),
    print '%.1f & &' % (100 * np.mean(ap[0, :]))
    for pos in possOrder:
        ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
        ind = np.asarray(ind, dtype=np.int32)
        if any( np.isnan(prec_at_human_rec[0,ind] )) or \
           any( np.isnan(human_prec[0,ind] )) :
            #print 'nan numbers ... skipping them for mean'
            print 'nan numbers ... setting them to zero for mean stats'
            prec_at_human_rec[
                0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0
            human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0
        print '%.1f &' % (100 * np.mean(prec_at_human_rec[0, ind])),
    print '%.1f \\\\' % (100 * np.mean(prec_at_human_rec[0, :]))
def print_benchmark_latex(evalFile, vocab = None, sortBy = "words", \
  printWords = False, printPos = True, printAgg = False, possOrder=None):
  #evalFile has the following ['details', 'agg', 'vocab', 'imdb'] 
  evalData = sg_utils.load_variables(evalFile);
  if vocab==None:
    vocab = evalData['vocab'];
  if 'details' in evalData:
    details = evalData['details'];
  else:
    details = evalData;
  ap = details['ap'];
  prec_at_human_rec = details['prec_at_human_rec'];
  human_prec = details['prec_at_human_rec'];
  words = vocab['words'];
  ind = 0;
  if possOrder is None:
    possOrder = ['NN', 'VB', 'JJ', 'DT', 'PRP', 'IN', 'other']
  print ' '.join(possOrder);
  for pos in possOrder:
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    ind = np.asarray(ind,dtype=np.int32)
    if any( np.isnan(ap[0,ind] )):
       #print 'nan numbers ... skipping them for mean'
       print 'nan numbers ... setting them to zero for mean stats'
       ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0;
    print '%.1f &'%(100*np.mean(ap[0,ind])),
  print '%.1f & &'%(100*np.mean(ap[0, :]))
  for pos in possOrder:
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    ind = np.asarray(ind,dtype=np.int32)
    if any( np.isnan(prec_at_human_rec[0,ind] )) or \
       any( np.isnan(human_prec[0,ind] )) :
       #print 'nan numbers ... skipping them for mean'
       print 'nan numbers ... setting them to zero for mean stats'
       prec_at_human_rec[0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0;
       human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0;
    print '%.1f &'%(100*np.mean(prec_at_human_rec[0,ind])),
  print '%.1f \\\\'%(100*np.mean(prec_at_human_rec[0, :]))
def benchmark(imdb,
              vocab,
              gt_label,
              num_references,
              detection_file,
              eval_file=None):
    # Get ground truth
    # dt = utils.scio.loadmat(detection_file)
    dt = utils.load_variables(detection_file)
    mil_prob = dt['mil_prob']

    # Benchmark the output, and return a result struct
    n_words = len(vocab['words'])
    P = np.zeros(mil_prob.shape, dtype=np.float)
    R = np.zeros(mil_prob.shape, dtype=np.float)
    score = np.zeros(mil_prob.shape, dtype=np.float)
    ap = np.zeros((1, n_words), dtype=np.float)

    human_prec = np.zeros((1, n_words), dtype=np.float)
    human_rec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec = np.zeros((1, n_words), dtype=np.float)
    rec_at_human_prec = np.zeros((1, n_words), dtype=np.float)
    rec_at_half_prec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec[...] = np.nan

    for i in range(len(vocab['words'])):
        utils.tic_toc_print(1,
                            'benchmarking : {:4d} / {:4d}'.format(i, n_words))
        P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr(
            gt_label[:, i], mil_prob[:, i], num_references)
        human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement(
            gt_label[:, i], num_references)

        ind = np.where(R[:, i] >= human_rec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            prec_at_human_rec[0, i] = P[ind[0], i]

        ind = np.where(P[:, i] >= human_prec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_human_prec[0, i] = R[ind[-1], i]

        ind = np.where(P[:, i] >= 0.5)[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_half_prec[0, i] = R[ind[-1], i]
        # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100)

    details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
      'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
      'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}

    # Collect statistics over the POS
    agg = []
    for pos in list(set(vocab['poss'])):
        ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
        print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
          format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
            100*np.mean(human_prec[0, ind]))
        agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
          'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
          'human_prec': 100*np.mean(human_prec[0, ind])})

    ind = range(len(vocab['words']))
    pos = 'all'
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})

    if eval_file is not None:
        try:
            utils.save_variables(eval_file, [details, agg], ['details', 'agg'],
                                 overwrite=True)
        except:
            print 'Error trying to save to pickle, will try hdf5 hack'
            namespace = globals().copy()
            namespace.update(locals())
            code.interact(local=namespace)

            eval_file_details = eval_file.replace('.pkl', '_details.h5')
            eval_file_details_keys = details.keys()
            eval_file_details_vals = [
                details[x] for x in eval_file_details_keys
            ]
            utils.save_variables(eval_file_details,
                                 eval_file_details_vals,
                                 eval_file_details_keys,
                                 overwrite=True)

            eval_file_agg = eval_file.replace('.pkl', '_agg.h5')
            eval_file_agg_keys = agg.keys()
            eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys]
            utils.save_variables(eval_file_agg,
                                 eval_file_agg_vals,
                                 eval_file_agg_keys,
                                 overwrite=True)

    return details
def print_benchmark_plain(evalFile, vocab = None, \
  sortBy = "words", printWords = False, printPos = True, printAgg = False):
    #evalFile has the following ['details', 'agg', 'vocab', 'imdb']
    evalData = sg_utils.load_variables(evalFile)
    if vocab == None:
        vocab = evalData['vocab']
    if 'details' in evalData:
        details = evalData['details']
    else:
        details = evalData
    ap = details['ap']
    prec_at_human_rec = details['prec_at_human_rec']
    human_prec = details['prec_at_human_rec']
    words = vocab['words']
    ind = 0

    if sortBy == "words":
        srtInds = np.argsort(words)
    elif sortBy == "ap":
        srtInds = np.argsort(ap)
        srtInds = srtInds[0]
        srtInds = srtInds[::-1]
    if printWords == True:
        print "{:>50s}".format("-" * 50)
        print "{:^50s}".format("Word metrics")
        print "{:>50s}".format("-" * 50)
        print "{:>15s} {:>8s} {:>6s} :     {:^5s}     {:^5s}". \
          format("Words","POS","Counts","mAP", "p@H")
        for i in srtInds:
            print "{:>15s} {:>8s} {:6d} :     {:5.2f}     {:5.2f}". \
              format(words[i], vocab['poss'][i], vocab['counts'][i], 100*np.mean(ap[0, i]), 100*np.mean(prec_at_human_rec[0, i]))

    if printPos:
        print "{:>50s}".format("-" * 50)
        print "{:^50s}".format("POS metrics")
        print "{:>50s}".format("-" * 50)
        print "{:>15s} :     {:^5s}     {:^5s}     {:^5s}". \
        format("POS", "mAP", "p@H", "h")

        for pos in list(set(vocab['poss'])):
            ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
            ind = np.asarray(ind)
            if any( np.isnan(ap[0,ind] )) or \
               any( np.isnan(prec_at_human_rec[0,ind] )) or \
               any( np.isnan(human_prec[0,ind] )) :
                print 'nan numbers ... setting them to zero for mean stats'
                ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0
                prec_at_human_rec[
                    0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0
                human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0
            print "{:>11s} [{:4d}]:     {:5.2f}     {:5.2f}     {:5.2f}". \
              format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
              100*np.mean(human_prec[0, ind]))

    if printAgg:
        print "{:>50s}".format("-" * 50)
        print "{:^50s}".format("Agg metrics")
        print "{:>50s}".format("-" * 50)
        print "{:>15s} :     {:^5s}     {:^5s}     {:^5s}". \
          format("agg", "mAP", "p@H", "h")
        pos = 'all'
        ind = srtInds
        ind = np.asarray(ind)
        if any( np.isnan(ap[0,ind] )) or \
             any( np.isnan(prec_at_human_rec[0,ind] )) or \
             any( np.isnan(human_prec[0,ind] )) :
            print 'nan numbers ... setting them to zero for mean stats'
            ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0
            prec_at_human_rec[
                0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0
            human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0
        print "{:>11s} [{:^4d}]     :     {:^5.2f}     {:5.2f}     {:5.2f}". \
          format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
            100*np.mean(human_prec[0, ind]))
Exemple #12
0
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None):
  # Get ground truth
  # dt = utils.scio.loadmat(detection_file)
  dt = utils.load_variables(detection_file)
  mil_prob = dt['mil_prob'];
  
  # Benchmark the output, and return a result struct
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)
  
  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan
  
  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)
    
    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];
    
    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
    # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) 
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; 
  
  # Collect statistics over the POS
  agg = [];
  for pos in list(set(vocab['poss'])):
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})  
  
  ind = range(len(vocab['words'])); pos = 'all';
  print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
    format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
      100*np.mean(human_prec[0, ind]))
  agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
    'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
    'human_prec': 100*np.mean(human_prec[0, ind])})  

  if eval_file is not None:
    try:
      utils.save_variables(eval_file, [details, agg],
        ['details', 'agg'], overwrite = True)
    except:
      print 'Error trying to save to pickle, will try hdf5 hack';
      namespace = globals().copy()
      namespace.update(locals())
      code.interact(local=namespace)

      eval_file_details = eval_file.replace('.pkl','_details.h5');
      eval_file_details_keys = details.keys();
      eval_file_details_vals = [details[x] for x in eval_file_details_keys];
      utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True);

      eval_file_agg = eval_file.replace('.pkl','_agg.h5');
      eval_file_agg_keys = agg.keys();
      eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys];
      utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True);

  
  return details
def print_benchmark_plain(evalFile, vocab = None, \
  sortBy = "words", printWords = False, printPos = True, printAgg = False):
  #evalFile has the following ['details', 'agg', 'vocab', 'imdb'] 
  evalData = sg_utils.load_variables(evalFile);
  if vocab==None:
    vocab = evalData['vocab'];
  if 'details' in evalData:
    details = evalData['details'];
  else:
    details = evalData;
  ap = details['ap'];
  prec_at_human_rec = details['prec_at_human_rec'];
  human_prec = details['prec_at_human_rec'];
  words = vocab['words'];
  ind = 0;

  if sortBy == "words":
    srtInds = np.argsort(words);
  elif sortBy == "ap":
    srtInds = np.argsort(ap);
    srtInds = srtInds[0];
    srtInds = srtInds[::-1];
  if printWords == True:
    print "{:>50s}".format("-"*50)
    print "{:^50s}".format("Word metrics")
    print "{:>50s}".format("-"*50)
    print "{:>15s} {:>8s} {:>6s} :     {:^5s}     {:^5s}". \
      format("Words","POS","Counts","mAP", "p@H")
    for i in srtInds:
      print "{:>15s} {:>8s} {:6d} :     {:5.2f}     {:5.2f}". \
        format(words[i], vocab['poss'][i], vocab['counts'][i], 100*np.mean(ap[0, i]), 100*np.mean(prec_at_human_rec[0, i]));

  if printPos:
    print "{:>50s}".format("-"*50)
    print "{:^50s}".format("POS metrics")
    print "{:>50s}".format("-"*50)
    print "{:>15s} :     {:^5s}     {:^5s}     {:^5s}". \
    format("POS", "mAP", "p@H", "h")

    for pos in list(set(vocab['poss'])):
      ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
      ind = np.asarray(ind)
      if any( np.isnan(ap[0,ind] )) or \
         any( np.isnan(prec_at_human_rec[0,ind] )) or \
         any( np.isnan(human_prec[0,ind] )) :
         print 'nan numbers ... setting them to zero for mean stats'
         ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0;
         prec_at_human_rec[0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0;
         human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0;
      print "{:>11s} [{:4d}]:     {:5.2f}     {:5.2f}     {:5.2f}". \
        format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))

  if printAgg:
    print "{:>50s}".format("-"*50)
    print "{:^50s}".format("Agg metrics")
    print "{:>50s}".format("-"*50)
    print "{:>15s} :     {:^5s}     {:^5s}     {:^5s}". \
      format("agg", "mAP", "p@H", "h")
    pos = 'all';
    ind = srtInds;
    ind = np.asarray(ind);
    if any( np.isnan(ap[0,ind] )) or \
         any( np.isnan(prec_at_human_rec[0,ind] )) or \
         any( np.isnan(human_prec[0,ind] )) :
         print 'nan numbers ... setting them to zero for mean stats'
         ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0;
         prec_at_human_rec[0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0;
         human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0;
    print "{:>11s} [{:^4d}]     :     {:^5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
Exemple #14
0
def encode_captions(imgs, params, wtoi,model,functional_words,threshold_metric_name,output_metric_name,vocab,is_functional,pt):
    """
    encode all captions into one large array, which will be 1-indexed.
    also produces label_start_ix and label_end_ix which store 1-indexed
    and inclusive (Lua-style) pointers to the first and last caption for
    each image in the dataset.
    """
    import sg_utils as utils
    vocab = utils.load_variables('visual_concepts/code/vocabs/vocab_train.pkl')
    max_length = params['max_length']
    N = len(imgs)
    M = sum(len(img['final_captions']) for img in imgs)  # total number of captions
    counts = np.zeros((len(imgs), len(vocab['words'])), dtype=np.float)
    label_attributes = []
    label_attributes_prob = []
    label_arrays = []
    label_semantic = []
    label_start_ix = np.zeros(N, dtype='uint32')  # note: these will be one-indexed
    label_end_ix = np.zeros(N, dtype='uint32')
    label_length = np.zeros(M, dtype='uint32')
    caption_counter = 0
    counter = 1
    image_files = []

    for i, img in enumerate(imgs):
        n = len(img['final_captions'])
        assert n > 0, 'error: some image has no captions'

        Li = np.zeros((n, max_length), dtype='uint32')
        semant_label = np.zeros(len(vocab['words']), dtype='uint32')
        for j, s in enumerate(img['final_captions']):
            label_length[caption_counter] = min(max_length, len(s))  # record the length of this sequence
            caption_counter += 1
            for k, w in enumerate(s):
                if k < max_length:
                    Li[j, k] = wtoi[w]
            pos = [vocab['words'].index(tmp_j_k) for tmp_j_k in s if tmp_j_k in vocab['words']]
            pos = list(set(pos))
            counts[i, pos] = counts[i, pos] + 1

        sort_counts=np.argsort(counts[i], axis=0)[::-1]
        import numpy
        sort_key = sorted(counts[i], reverse=True)
        for m in range(len(sort_key)):
            if sort_key[m] >0 :
                semant_label[m] = wtoi[vocab['words'][sort_counts[m]]]

        image_files.append(img['file_path'])

        label_arrays.append(Li)
        label_semantic.append(semant_label)
        label_start_ix[i] = counter
        label_end_ix[i] = counter + n - 1

        counter += n


    #L_semantic = np.concatenate(label_semantic, axis=0)  # put all the labels together
    L = np.concatenate(label_arrays, axis=0)  # put all the labels together
    assert L.shape[0] == M, 'lengths don\'t match? that\'s weird'
    assert np.all(label_length > 0), 'error: some caption had no words?'

    print 'encoded captions to array of size ', `L.shape`
    return L, label_start_ix, label_end_ix, label_length, label_semantic, label_attributes, label_attributes_prob
Exemple #15
0
 def __init__(self, vocab_file):
     # Set threshold_metric_name and output_metric_name
     self.base_image_size = 565
     self.vocab = utils.load_variables(vocab_file)
     self.is_functional = np.array([x not in functional_words for x in self.vocab['words']])
     self.threshold = 0.5
Exemple #16
0
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None):
  # Get ground truth
  # dt = utils.scio.loadmat(detection_file)
  dt = utils.load_variables(detection_file)
  mil_prob = dt['mil_prob'];
  
  # Benchmark the output, and return a result struct
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)
  
  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan
  
  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)
    
    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];
    
    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
    # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) 
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; 
  
  # Collect statistics over the POS
  agg = [];
  for pos in list(set(vocab['poss'])):
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})  
  
  ind = range(len(vocab['words'])); pos = 'all';
  print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
    format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
      100*np.mean(human_prec[0, ind]))
  agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
    'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
    'human_prec': 100*np.mean(human_prec[0, ind])})  

  if eval_file is not None:
    utils.save_variables(eval_file, [details, agg, vocab, imdb],
      ['details', 'agg', 'vocab', 'imdb'], overwrite = True)
  
  return details
Exemple #17
0
  print('Called with args:')
  print(args)

  # if not args.randomize:
  #   # fix the random seeds (numpy and caffe) for reproducibility
  #   np.random.seed(cfg.RNG_SEED)
  #   caffe.set_random_seed(cfg.RNG_SEED)

  # # set up caffe
  caffe.set_mode_gpu()
  if args.gpu_id is not None:
    caffe.set_device(args.gpu_id)
 
  # Load the vocabulary
  vocab = utils.load_variables(args.vocab_file)
  
  if args.task == 'compute_targets':
    
    imdb = []
    output_dir = args.train_dir
    sets = ['train', 'val']
    for i, imset in enumerate([args.train_set, args.val_set]):
      imdb.append(coco_voc.coco_voc(imset))
      print 'Loaded dataset {:s}'.format(imdb[i].name)
      
      # Compute targets for the file
      counts = preprocess.get_vocab_counts(imdb[i].image_index, \
          imdb[i].coco_caption_data, 5, vocab)
      
      if args.write_labels:
Exemple #18
0
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    args = parser.parse_args()
    return args

if __name__ == '__main__':
    args = parse_args()

    print('Called with args:')
    print(args)

    imdb = coco_voc.coco_voc('test')
    vocab = utils.load_variables(args.vocab_file)
    gt_label = preprocess.get_vocab_counts(
        imdb.image_index,
        imdb.coco_caption_data,
        5,
        vocab
        )
    det_file = args.det_file
    det_dir = os.path.dirname(det_file) # get root dir of det_file

    eval_file = os.path.join(det_dir, imdb.name + '_eval.pkl')
    benchmark(imdb, vocab, gt_label, 5, det_file, eval_file=eval_file)

    map_file = args.map_file
    gt_label_det = preprocess.get_vocab_counts_det(
        imdb.image_index,
Exemple #19
0
# In[1]:


import _init_paths
import caffe, test_model, cap_eval_utils, sg_utils as utils
import cv2, numpy as np
# import matplotlib
# import matplotlib.pyplot as plt


# In[2]:


# Load the vocabulary
vocab_file = 'vocabs/vocab_train.pkl'
vocab = utils.load_variables(vocab_file)

# Set up Caffe
caffe.set_mode_gpu()
caffe.set_device(0)

# Load the model
mean = np.array([[[ 103.939, 116.779, 123.68]]]);
base_image_size = 565;    
prototxt_deploy = 'output/vgg/mil_finetune.prototxt.deploy'
model_file = 'output/vgg/snapshot_iter_240000.caffemodel'
model = test_model.load_model(prototxt_deploy, model_file, base_image_size, mean, vocab)


# In[3]: