def benchmark_ap_noref(vocab, gt_label, mil_prob, eval_file=None):
    n_words = len(vocab['words'])
    P = np.zeros(mil_prob.shape, dtype=np.float)
    R = np.zeros(mil_prob.shape, dtype=np.float)
    score = np.zeros(mil_prob.shape, dtype=np.float)
    ap = np.zeros((1, n_words), dtype=np.float)

    human_prec = np.zeros((1, n_words), dtype=np.float)
    human_rec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec = np.zeros((1, n_words), dtype=np.float)
    rec_at_human_prec = np.zeros((1, n_words), dtype=np.float)
    rec_at_half_prec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec[...] = np.nan

    for i in range(len(vocab['words'])):
        utils.tic_toc_print(1,
                            'benchmarking : {:4d} / {:4d}'.format(i, n_words))
        P[:, i], R[:, i], score[:,
                                i], ap[0,
                                       i] = cap_eval_utils.calc_pr_ovr_noref(
                                           gt_label[:, i], mil_prob[:, i])
    details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
      'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
      'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}

    if eval_file is not None:
        eval_file_details_keys = details.keys()
        eval_file_details_vals = [details[x] for x in eval_file_details_keys]
        utils.save_variables(eval_file,
                             eval_file_details_vals,
                             eval_file_details_keys,
                             overwrite=True)
    return details
Example #2
0
def benchmark_ap_noref(vocab, gt_label, mil_prob, eval_file = None):
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)

  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan

  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr_noref(gt_label[:,i], mil_prob[:,i])
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec};

  if eval_file is not None:
    eval_file_details_keys = details.keys();
    eval_file_details_vals = [details[x] for x in eval_file_details_keys];
    utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True);
  return details;
def test_model(imdb, model, detection_file=None):
    """
  Tests model and stores detections on disk
  """
    N_WORDS = len(model['vocab']['words'])
    sc = np.zeros((imdb.num_images, N_WORDS), dtype=np.float)
    mil_prob = np.zeros((imdb.num_images, N_WORDS), dtype=np.float)

    for i in xrange(len(imdb.image_index)):
        im = cv2.imread(imdb.image_path_at(i))
        if im is None:
            print 'could not read %s; skipping' % (imdb.image_path_at(i))
            continue

        sc[i, :], mil_prob[i, :] = test_img(im, model['net'],
                                            model['base_image_size'],
                                            model['means'])
        utils.tic_toc_print(
            60, 'test_img : {:6d}/{:6d}'.format(i, len(imdb.image_index)))

    if detection_file is not None:
        # utils.save_variables(detection_file, [sc, mil_prob, model['vocab'], imdb],
        #   ['sc', 'mil_prob', 'vocab', 'imdb'], overwrite = True)
        utils.save_variables(detection_file, [sc, mil_prob], \
          ['sc', 'mil_prob'], overwrite = True)
Example #4
0
def benchmark_det(imdb, vocab, gt_label, map_file, detection_file, eval_file=None):
  # Get prediction score
  dt = h5py.File(detection_file, 'r')
  mil_prob = dt['mil_prob'].value;

  # Get manual mapping from detection category to caption label
  det2cap = utils.det2cap(map_file, vocab)

  # Retrieve probability of detection category as max of probability of its
  # corresponding caption labels
  n_words = len(det2cap.keys())
  mil_prob_det = np.zeros((mil_prob.shape[0], n_words), dtype=np.float)
  # gt_label_det = np.zeros((mil_prob.shape[0], n_words), dtype=np.float)
  gt_label_det = gt_label
  det_cats = det2cap.keys()
  for i in range(n_words):
    cat = det_cats[i]
    cap_inds = det2cap[cat]
    mil_prob_det[:, i] = np.max(mil_prob[:, cap_inds], axis=1)
    # gt_label_det[:, i] = np.max(gt_label[:, cap_inds], axis=1)
    # temp = np.sum(gt_label[:, cap_inds], axis=1)[:, np.newaxis]
    # temp = np.hstack((temp, np.ones(temp.shape)))
    # gt_label_det[:, i] = np.min(temp, axis=1)

  # Benchmark the output, and return a result struct
  P     = np.zeros(mil_prob_det.shape, dtype=np.float)
  R     = np.zeros(mil_prob_det.shape, dtype=np.float)
  score = np.zeros(mil_prob_det.shape, dtype=np.float)
  ap    = np.zeros((1, n_words), dtype=np.float)

  threshold = np.mean(gt_label, axis=0)
  for i in range(73):
    threshold[i] = 0.0001

  for i in range(n_words):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = \
      cap_eval_utils.calc_pr_det(gt_label_det[:,i], mil_prob_det[:,i], threshold[i])

  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score}

  # Format output title
  print "    {:5s} [{:4s}]     :     {:7s}". \
    format('Pos', 'nPos', 'AP')

  agg = []
  ind = range(n_words); pos = 'all';
  print "    {:5s} [{:4d}]     :     {:5.2f}". \
    format(pos, len(ind), 100*np.mean(ap[0, ind]))
  agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind])})

  if eval_file is not None:
    utils.save_variables(eval_file, [details, agg, vocab, imdb],
      ['details', 'agg', 'vocab', 'imdb'], overwrite = True)

  return details
def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file=None):
    n_words = len(vocab['words'])
    P = np.zeros(mil_prob.shape, dtype=np.float)
    R = np.zeros(mil_prob.shape, dtype=np.float)
    score = np.zeros(mil_prob.shape, dtype=np.float)
    ap = np.zeros((1, n_words), dtype=np.float)

    human_prec = np.zeros((1, n_words), dtype=np.float)
    human_rec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec = np.zeros((1, n_words), dtype=np.float)
    rec_at_human_prec = np.zeros((1, n_words), dtype=np.float)
    rec_at_half_prec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec[...] = np.nan

    for i in range(len(vocab['words'])):
        utils.tic_toc_print(1,
                            'benchmarking : {:4d} / {:4d}'.format(i, n_words))
        P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr(
            gt_label[:, i], mil_prob[:, i], num_references)
        human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement(
            gt_label[:, i], num_references)

        ind = np.where(R[:, i] >= human_rec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            prec_at_human_rec[0, i] = P[ind[0], i]

        ind = np.where(P[:, i] >= human_prec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_human_prec[0, i] = R[ind[-1], i]

        ind = np.where(P[:, i] >= 0.5)[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_half_prec[0, i] = R[ind[-1], i]

    details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
      'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
      'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}

    if eval_file is not None:
        eval_file_details_keys = details.keys()
        eval_file_details_vals = [details[x] for x in eval_file_details_keys]
        utils.save_variables(eval_file,
                             eval_file_details_vals,
                             eval_file_details_keys,
                             overwrite=True)
    return details
Example #6
0
def test_model(imdb, model, detection_file = None):
  """
  Tests model and stores detections on disk
  """
  N_WORDS = len(model['vocab']['words'])
  sc = np.zeros((imdb.num_images, N_WORDS), dtype=np.float)
  mil_prob = np.zeros((imdb.num_images, N_WORDS), dtype=np.float)
  for i in xrange(len(imdb.image_index)):
    im = cv2.imread(imdb.image_path_at(i))
    sc[i,:], mil_prob[i,:] = test_img(im, model['net'], model['base_image_size'], model['means'])
    utils.tic_toc_print(60, 'test_img : {:6d}/{:6d}'.format(i, len(imdb.image_index)))

  if detection_file is not None:
    utils.save_variables(detection_file, [sc, mil_prob, model['vocab'], imdb],
      ['sc', 'mil_prob', 'vocab', 'imdb'], overwrite = True)
def testModelBatch(imdb, model, detection_file):
    if 'words' in model['vocab']:
        N_WORDS = len(model['vocab']['words'])
    else:
        #we are using COCO 80 classes
        N_WORDS = 80
    batchedImList = getBatchedImList(imdb, model)

    sc = np.zeros((len(batchedImList), N_WORDS), dtype=np.float)
    mil_prob = np.zeros((len(batchedImList), N_WORDS), dtype=np.float)
    if model['inf_type'] == 'MILNoise':
        fields = ['mil', 'mil_max',\
          'qconds10', 'qconds11', 'noisy_comb_noimage']
        qdata_raw = np.zeros((len(batchedImList), 4 * N_WORDS),
                             dtype=np.float32)
        qdata_smax = np.zeros((len(batchedImList), 4 * N_WORDS),
                              dtype=np.float32)
        qconds10 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32)
        qconds11 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32)
        noisy_comb_noimage = np.zeros((len(batchedImList), N_WORDS),
                                      dtype=np.float32)

    for bind in range(len(batchedImList)):
        if model['inf_type'] != 'MILNoise':
            mil_prob[bind, :], sc[bind, :] = tm.test_batch(
                batchedImList[bind], model)
        else:
            fOut = tm.test_batch(batchedImList[bind], model, fields)
            mil_prob[bind, :] = fOut[0]
            sc[bind, :] = fOut[1]
            qconds10[bind, :] = fOut[2]
            qconds11[bind, :] = fOut[3]
            noisy_comb_noimage[bind, :] = fOut[4]
        sg_utils.tic_toc_print(
            60, 'test_batch : %d/%d (num_per_batch %d)' %
            (bind, len(batchedImList), len(batchedImList[0])))

    if detection_file is not None and model['inf_type'] != 'MILNoise':
        sg_utils.save_variables(detection_file, [sc, mil_prob], \
        ['sc', 'mil_prob'], overwrite = True)
    elif detection_file is not None:
        sg_utils.save_variables(detection_file, [sc, mil_prob, \
          qconds10, qconds11, noisy_comb_noimage], \
        ['sc', 'mil_prob',\
         'qconds10', 'qconds11',\
         'noisy_comb_noimage'], overwrite = True)
Example #8
0
def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file = None):
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)

  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan

  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)    

    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];

    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec};

  if eval_file is not None:
    eval_file_details_keys = details.keys();
    eval_file_details_vals = [details[x] for x in eval_file_details_keys];
    utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True);
  return details;
def benchmark_only_ap(vocab,
                      gt_label,
                      num_references,
                      mil_prob,
                      eval_file=None,
                      noref=False):
    n_words = len(vocab['words'])
    P = np.zeros(mil_prob.shape, dtype=np.float)
    R = np.zeros(mil_prob.shape, dtype=np.float)
    score = np.zeros(mil_prob.shape, dtype=np.float)
    ap = np.zeros((1, n_words), dtype=np.float)

    for i in range(len(vocab['words'])):
        utils.tic_toc_print(1,
                            'benchmarking : {:4d} / {:4d}'.format(i, n_words))
        if noref:
            P[:, i], R[:,
                       i], score[:,
                                 i], ap[0,
                                        i] = cap_eval_utils.calc_pr_ovr_noref(
                                            gt_label[:, i], mil_prob[:, i])
        else:
            P[:, i], R[:,
                       i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr(
                           gt_label[:, i], mil_prob[:, i], num_references)

    details = {
        'precision': P,
        'recall': R,
        'ap': ap,
        'score': score
    }

    if eval_file is not None:
        eval_file_details_keys = details.keys()
        eval_file_details_vals = [details[x] for x in eval_file_details_keys]
        utils.save_variables(eval_file,
                             eval_file_details_vals,
                             eval_file_details_keys,
                             overwrite=True)
    return P, R, score, ap
Example #10
0
def benchmark_only_ap(vocab, gt_label, num_references, mil_prob, eval_file = None, noref = False):
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)

  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    if noref:
      P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr_noref(gt_label[:,i], mil_prob[:,i])
    else:
      P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score };

  if eval_file is not None:
    eval_file_details_keys = details.keys();
    eval_file_details_vals = [details[x] for x in eval_file_details_keys];
    utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True);
  return P, R, score, ap;
Example #11
0
def testModelBatch(imdb, model, detection_file):
  if 'words' in model['vocab']:
    N_WORDS = len(model['vocab']['words'])
  else:
    #we are using COCO 80 classes
    N_WORDS = 80;
  batchedImList = getBatchedImList(imdb, model);

  sc = np.zeros((len(batchedImList), N_WORDS), dtype=np.float)
  mil_prob = np.zeros((len(batchedImList), N_WORDS), dtype=np.float)
  if model['inf_type'] == 'MILNoise':
    fields = ['mil', 'mil_max',\
      'qconds10', 'qconds11', 'noisy_comb_noimage']
    qdata_raw = np.zeros((len(batchedImList), 4*N_WORDS), dtype=np.float32)
    qdata_smax = np.zeros((len(batchedImList), 4*N_WORDS), dtype=np.float32)
    qconds10 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32)
    qconds11 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32)
    noisy_comb_noimage = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32)

  for bind in range(len(batchedImList)):
    if model['inf_type'] != 'MILNoise':
      mil_prob[bind,:], sc[bind,:] = tm.test_batch(batchedImList[bind], model)
    else:
      fOut = tm.test_batch(batchedImList[bind], model, fields)
      mil_prob[bind,:] = fOut[0];
      sc[bind,:] = fOut[1];
      qconds10[bind,:] = fOut[2];
      qconds11[bind,:] = fOut[3];
      noisy_comb_noimage[bind,:] = fOut[4];
    sg_utils.tic_toc_print(60, 'test_batch : %d/%d (num_per_batch %d)'%(bind, len(batchedImList), len(batchedImList[0])));

  if detection_file is not None and model['inf_type'] != 'MILNoise':
    sg_utils.save_variables(detection_file, [sc, mil_prob], \
    ['sc', 'mil_prob'], overwrite = True)
  elif detection_file is not None:
    sg_utils.save_variables(detection_file, [sc, mil_prob, \
      qconds10, qconds11, noisy_comb_noimage], \
    ['sc', 'mil_prob',\
     'qconds10', 'qconds11',\
     'noisy_comb_noimage'], overwrite = True)
Example #12
0
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None):
  # Get ground truth
  # dt = utils.scio.loadmat(detection_file)
  dt = utils.load_variables(detection_file)
  mil_prob = dt['mil_prob'];
  
  # Benchmark the output, and return a result struct
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)
  
  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan
  
  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)
    
    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];
    
    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
    # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) 
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; 
  
  # Collect statistics over the POS
  agg = [];
  for pos in list(set(vocab['poss'])):
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})  
  
  ind = range(len(vocab['words'])); pos = 'all';
  print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
    format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
      100*np.mean(human_prec[0, ind]))
  agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
    'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
    'human_prec': 100*np.mean(human_prec[0, ind])})  

  if eval_file is not None:
    try:
      utils.save_variables(eval_file, [details, agg],
        ['details', 'agg'], overwrite = True)
    except:
      print 'Error trying to save to pickle, will try hdf5 hack';
      namespace = globals().copy()
      namespace.update(locals())
      code.interact(local=namespace)

      eval_file_details = eval_file.replace('.pkl','_details.h5');
      eval_file_details_keys = details.keys();
      eval_file_details_vals = [details[x] for x in eval_file_details_keys];
      utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True);

      eval_file_agg = eval_file.replace('.pkl','_agg.h5');
      eval_file_agg_keys = agg.keys();
      eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys];
      utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True);

  
  return details
Example #13
0
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None):
  # Get ground truth
  # dt = utils.scio.loadmat(detection_file)
  dt = utils.load_variables(detection_file)
  mil_prob = dt['mil_prob'];
  
  # Benchmark the output, and return a result struct
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)
  
  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan
  
  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)
    
    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];
    
    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
    # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) 
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; 
  
  # Collect statistics over the POS
  agg = [];
  for pos in list(set(vocab['poss'])):
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})  
  
  ind = range(len(vocab['words'])); pos = 'all';
  print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
    format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
      100*np.mean(human_prec[0, ind]))
  agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
    'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
    'human_prec': 100*np.mean(human_prec[0, ind])})  

  if eval_file is not None:
    utils.save_variables(eval_file, [details, agg, vocab, imdb],
      ['details', 'agg', 'vocab', 'imdb'], overwrite = True)
  
  return details
Example #14
0
## MATLAB vocabulary
if job_name == 'vocab':
  import csv
  import sg_utils
  matlab_vocab = 'vocabs/vocab_words.txt';
  words = []; poss = []; counts = [];
  with open(matlab_vocab, 'rb') as f:
    reader = csv.reader(f)
    for row in reader:
      words.append(row[0].strip()) 
      poss.append(row[1].strip()) 
      counts.append(int(row[2].strip()))
  sg_utils.save_variables('vocabs/vocab_train.pkl', [words, poss, counts], \
    ['words', 'poss', 'counts'], overwrite = True) 

## Load the detections
# Code to re-evaluate matlab output, to check sainty of python code
# if job_name == 'eval_det':
#   import sg_utils
#   import test_model
#   import cap_eval_utils
#   vocab = sg_utils.load_variables('cachedir/v1/vocab_train.pkl')
#   dt = sg_utils.scio.loadmat('cachedir/v1/gt_labels_val.all.mat'); labels = dt['labels'];
#   details = test_model.benchmark(None, vocab, labels, 5., 'cachedir/v1/mil_prob_val.all.mat')
Example #15
0
imset = 'train'
coco_caps = COCO('../data/captions_train2014.json');

# mapping to output final statistics
mapping = {'NNS': 'NN', 'NNP': 'NN', 'NNPS': 'NN', 'NN': 'NN', \
  'VB': 'VB', 'VBD': 'VB', 'VBN': 'VB', 'VBZ': 'VB', 'VBP': 'VB', 'VBP': 'VB', 'VBG': 'VB', \
  'JJR': 'JJ', 'JJS': 'JJ', 'JJ': 'JJ', 'DT': 'DT', 'PRP': 'PRP', 'PRP$': 'PRP', 'IN': 'IN'};
    
# punctuations to be removed from the sentences
punctuations = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \
  ".", "?", "!", ",", ":", "-", "--", "...", ";"] 

vocab = preprocess.get_vocab(imset, coco_caps, punctuations, mapping);

sg_utils.save_variables('vocab_' + imset + '.pkl', \
  [vocab[x] for x in vocab.keys()], \
  vocab.keys(), \
  overwrite = True);


##
N_WORDS = 1000;
vocab = preprocess.get_vocab_top_k(vocab, N_WORDS)
image_ids = coco_caps.getImgIds()
counts = preprocess.get_vocab_counts(image_ids, coco_caps, 5, vocab)
P = np.zeros((N_WORDS, 1), dtype = np.float); 
R = np.zeros((N_WORDS, 1), dtype = np.float); 
for i, w in enumerate(vv['words']): 
  P[i], R[i] = cap_eval_utils.human_agreement(counts[:,i], 5)
  print w, P[i], R[i]

for pos in list(set(vocab['poss'])):
def benchmark(imdb,
              vocab,
              gt_label,
              num_references,
              detection_file,
              eval_file=None):
    # Get ground truth
    # dt = utils.scio.loadmat(detection_file)
    dt = utils.load_variables(detection_file)
    mil_prob = dt['mil_prob']

    # Benchmark the output, and return a result struct
    n_words = len(vocab['words'])
    P = np.zeros(mil_prob.shape, dtype=np.float)
    R = np.zeros(mil_prob.shape, dtype=np.float)
    score = np.zeros(mil_prob.shape, dtype=np.float)
    ap = np.zeros((1, n_words), dtype=np.float)

    human_prec = np.zeros((1, n_words), dtype=np.float)
    human_rec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec = np.zeros((1, n_words), dtype=np.float)
    rec_at_human_prec = np.zeros((1, n_words), dtype=np.float)
    rec_at_half_prec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec[...] = np.nan

    for i in range(len(vocab['words'])):
        utils.tic_toc_print(1,
                            'benchmarking : {:4d} / {:4d}'.format(i, n_words))
        P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr(
            gt_label[:, i], mil_prob[:, i], num_references)
        human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement(
            gt_label[:, i], num_references)

        ind = np.where(R[:, i] >= human_rec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            prec_at_human_rec[0, i] = P[ind[0], i]

        ind = np.where(P[:, i] >= human_prec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_human_prec[0, i] = R[ind[-1], i]

        ind = np.where(P[:, i] >= 0.5)[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_half_prec[0, i] = R[ind[-1], i]
        # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100)

    details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
      'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
      'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}

    # Collect statistics over the POS
    agg = []
    for pos in list(set(vocab['poss'])):
        ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
        print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
          format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
            100*np.mean(human_prec[0, ind]))
        agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
          'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
          'human_prec': 100*np.mean(human_prec[0, ind])})

    ind = range(len(vocab['words']))
    pos = 'all'
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})

    if eval_file is not None:
        try:
            utils.save_variables(eval_file, [details, agg], ['details', 'agg'],
                                 overwrite=True)
        except:
            print 'Error trying to save to pickle, will try hdf5 hack'
            namespace = globals().copy()
            namespace.update(locals())
            code.interact(local=namespace)

            eval_file_details = eval_file.replace('.pkl', '_details.h5')
            eval_file_details_keys = details.keys()
            eval_file_details_vals = [
                details[x] for x in eval_file_details_keys
            ]
            utils.save_variables(eval_file_details,
                                 eval_file_details_vals,
                                 eval_file_details_keys,
                                 overwrite=True)

            eval_file_agg = eval_file.replace('.pkl', '_agg.h5')
            eval_file_agg_keys = agg.keys()
            eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys]
            utils.save_variables(eval_file_agg,
                                 eval_file_agg_vals,
                                 eval_file_agg_keys,
                                 overwrite=True)

    return details
imset = 'train'
coco_caps = COCO('../data/captions_train2014.json')

# mapping to output final statistics
mapping = {'NNS': 'NN', 'NNP': 'NN', 'NNPS': 'NN', 'NN': 'NN', \
  'VB': 'VB', 'VBD': 'VB', 'VBN': 'VB', 'VBZ': 'VB', 'VBP': 'VB', 'VBP': 'VB', 'VBG': 'VB', \
  'JJR': 'JJ', 'JJS': 'JJ', 'JJ': 'JJ', 'DT': 'DT', 'PRP': 'PRP', 'PRP$': 'PRP', 'IN': 'IN'}

# punctuations to be removed from the sentences
punctuations = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \
  ".", "?", "!", ",", ":", "-", "--", "...", ";"]

vocab = preprocess.get_vocab(imset, coco_caps, punctuations, mapping)

sg_utils.save_variables('vocab_' + imset + '.pkl', \
  [vocab[x] for x in vocab.keys()], \
  vocab.keys(), \
  overwrite = True)

##
N_WORDS = 1000
vocab = preprocess.get_vocab_top_k(vocab, N_WORDS)
image_ids = coco_caps.getImgIds()
counts = preprocess.get_vocab_counts(image_ids, coco_caps, 5, vocab)
P = np.zeros((N_WORDS, 1), dtype=np.float)
R = np.zeros((N_WORDS, 1), dtype=np.float)
for i, w in enumerate(vv['words']):
    P[i], R[i] = cap_eval_utils.human_agreement(counts[:, i], 5)
    print w, P[i], R[i]

for pos in list(set(vocab['poss'])):
    ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
## MATLAB vocabulary
if job_name == 'vocab':
    import csv
    import sg_utils
    matlab_vocab = 'vocabs/vocab_words.txt'
    words = []
    poss = []
    counts = []
    with open(matlab_vocab, 'rb') as f:
        reader = csv.reader(f)
        for row in reader:
            words.append(row[0].strip())
            poss.append(row[1].strip())
            counts.append(int(row[2].strip()))
    sg_utils.save_variables('vocabs/vocab_train.pkl', [words, poss, counts], \
      ['words', 'poss', 'counts'], overwrite = True)

## Load the detections
# Code to re-evaluate matlab output, to check sainty of python code
# if job_name == 'eval_det':
#   import sg_utils
#   import test_model
#   import cap_eval_utils
#   vocab = sg_utils.load_variables('cachedir/v1/vocab_train.pkl')
#   dt = sg_utils.scio.loadmat('cachedir/v1/gt_labels_val.all.mat'); labels = dt['labels'];
#   details = test_model.benchmark(None, vocab, labels, 5., 'cachedir/v1/mil_prob_val.all.mat')