def benchmark_ap_noref(vocab, gt_label, mil_prob, eval_file=None):
    n_words = len(vocab['words'])
    P = np.zeros(mil_prob.shape, dtype=np.float)
    R = np.zeros(mil_prob.shape, dtype=np.float)
    score = np.zeros(mil_prob.shape, dtype=np.float)
    ap = np.zeros((1, n_words), dtype=np.float)

    human_prec = np.zeros((1, n_words), dtype=np.float)
    human_rec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec = np.zeros((1, n_words), dtype=np.float)
    rec_at_human_prec = np.zeros((1, n_words), dtype=np.float)
    rec_at_half_prec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec[...] = np.nan

    for i in range(len(vocab['words'])):
        utils.tic_toc_print(1,
                            'benchmarking : {:4d} / {:4d}'.format(i, n_words))
        P[:, i], R[:, i], score[:,
                                i], ap[0,
                                       i] = cap_eval_utils.calc_pr_ovr_noref(
                                           gt_label[:, i], mil_prob[:, i])
    details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
      'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
      'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}

    if eval_file is not None:
        eval_file_details_keys = details.keys()
        eval_file_details_vals = [details[x] for x in eval_file_details_keys]
        utils.save_variables(eval_file,
                             eval_file_details_vals,
                             eval_file_details_keys,
                             overwrite=True)
    return details
Beispiel #2
0
def benchmark_ap_noref(vocab, gt_label, mil_prob, eval_file = None):
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)

  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan

  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr_noref(gt_label[:,i], mil_prob[:,i])
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec};

  if eval_file is not None:
    eval_file_details_keys = details.keys();
    eval_file_details_vals = [details[x] for x in eval_file_details_keys];
    utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True);
  return details;
def test_model(imdb, model, detection_file=None):
    """
  Tests model and stores detections on disk
  """
    N_WORDS = len(model['vocab']['words'])
    sc = np.zeros((imdb.num_images, N_WORDS), dtype=np.float)
    mil_prob = np.zeros((imdb.num_images, N_WORDS), dtype=np.float)

    for i in xrange(len(imdb.image_index)):
        im = cv2.imread(imdb.image_path_at(i))
        if im is None:
            print 'could not read %s; skipping' % (imdb.image_path_at(i))
            continue

        sc[i, :], mil_prob[i, :] = test_img(im, model['net'],
                                            model['base_image_size'],
                                            model['means'])
        utils.tic_toc_print(
            60, 'test_img : {:6d}/{:6d}'.format(i, len(imdb.image_index)))

    if detection_file is not None:
        # utils.save_variables(detection_file, [sc, mil_prob, model['vocab'], imdb],
        #   ['sc', 'mil_prob', 'vocab', 'imdb'], overwrite = True)
        utils.save_variables(detection_file, [sc, mil_prob], \
          ['sc', 'mil_prob'], overwrite = True)
Beispiel #4
0
def benchmark_det(imdb, vocab, gt_label, map_file, detection_file, eval_file=None):
  # Get prediction score
  dt = h5py.File(detection_file, 'r')
  mil_prob = dt['mil_prob'].value;

  # Get manual mapping from detection category to caption label
  det2cap = utils.det2cap(map_file, vocab)

  # Retrieve probability of detection category as max of probability of its
  # corresponding caption labels
  n_words = len(det2cap.keys())
  mil_prob_det = np.zeros((mil_prob.shape[0], n_words), dtype=np.float)
  # gt_label_det = np.zeros((mil_prob.shape[0], n_words), dtype=np.float)
  gt_label_det = gt_label
  det_cats = det2cap.keys()
  for i in range(n_words):
    cat = det_cats[i]
    cap_inds = det2cap[cat]
    mil_prob_det[:, i] = np.max(mil_prob[:, cap_inds], axis=1)
    # gt_label_det[:, i] = np.max(gt_label[:, cap_inds], axis=1)
    # temp = np.sum(gt_label[:, cap_inds], axis=1)[:, np.newaxis]
    # temp = np.hstack((temp, np.ones(temp.shape)))
    # gt_label_det[:, i] = np.min(temp, axis=1)

  # Benchmark the output, and return a result struct
  P     = np.zeros(mil_prob_det.shape, dtype=np.float)
  R     = np.zeros(mil_prob_det.shape, dtype=np.float)
  score = np.zeros(mil_prob_det.shape, dtype=np.float)
  ap    = np.zeros((1, n_words), dtype=np.float)

  threshold = np.mean(gt_label, axis=0)
  for i in range(73):
    threshold[i] = 0.0001

  for i in range(n_words):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = \
      cap_eval_utils.calc_pr_det(gt_label_det[:,i], mil_prob_det[:,i], threshold[i])

  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score}

  # Format output title
  print "    {:5s} [{:4s}]     :     {:7s}". \
    format('Pos', 'nPos', 'AP')

  agg = []
  ind = range(n_words); pos = 'all';
  print "    {:5s} [{:4d}]     :     {:5.2f}". \
    format(pos, len(ind), 100*np.mean(ap[0, ind]))
  agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind])})

  if eval_file is not None:
    utils.save_variables(eval_file, [details, agg, vocab, imdb],
      ['details', 'agg', 'vocab', 'imdb'], overwrite = True)

  return details
def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file=None):
    n_words = len(vocab['words'])
    P = np.zeros(mil_prob.shape, dtype=np.float)
    R = np.zeros(mil_prob.shape, dtype=np.float)
    score = np.zeros(mil_prob.shape, dtype=np.float)
    ap = np.zeros((1, n_words), dtype=np.float)

    human_prec = np.zeros((1, n_words), dtype=np.float)
    human_rec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec = np.zeros((1, n_words), dtype=np.float)
    rec_at_human_prec = np.zeros((1, n_words), dtype=np.float)
    rec_at_half_prec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec[...] = np.nan

    for i in range(len(vocab['words'])):
        utils.tic_toc_print(1,
                            'benchmarking : {:4d} / {:4d}'.format(i, n_words))
        P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr(
            gt_label[:, i], mil_prob[:, i], num_references)
        human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement(
            gt_label[:, i], num_references)

        ind = np.where(R[:, i] >= human_rec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            prec_at_human_rec[0, i] = P[ind[0], i]

        ind = np.where(P[:, i] >= human_prec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_human_prec[0, i] = R[ind[-1], i]

        ind = np.where(P[:, i] >= 0.5)[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_half_prec[0, i] = R[ind[-1], i]

    details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
      'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
      'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}

    if eval_file is not None:
        eval_file_details_keys = details.keys()
        eval_file_details_vals = [details[x] for x in eval_file_details_keys]
        utils.save_variables(eval_file,
                             eval_file_details_vals,
                             eval_file_details_keys,
                             overwrite=True)
    return details
Beispiel #6
0
def test_model(imdb, model, detection_file = None):
  """
  Tests model and stores detections on disk
  """
  N_WORDS = len(model['vocab']['words'])
  sc = np.zeros((imdb.num_images, N_WORDS), dtype=np.float)
  mil_prob = np.zeros((imdb.num_images, N_WORDS), dtype=np.float)
  for i in xrange(len(imdb.image_index)):
    im = cv2.imread(imdb.image_path_at(i))
    sc[i,:], mil_prob[i,:] = test_img(im, model['net'], model['base_image_size'], model['means'])
    utils.tic_toc_print(60, 'test_img : {:6d}/{:6d}'.format(i, len(imdb.image_index)))

  if detection_file is not None:
    utils.save_variables(detection_file, [sc, mil_prob, model['vocab'], imdb],
      ['sc', 'mil_prob', 'vocab', 'imdb'], overwrite = True)
def testModelBatch(imdb, model, detection_file):
    if 'words' in model['vocab']:
        N_WORDS = len(model['vocab']['words'])
    else:
        #we are using COCO 80 classes
        N_WORDS = 80
    batchedImList = getBatchedImList(imdb, model)

    sc = np.zeros((len(batchedImList), N_WORDS), dtype=np.float)
    mil_prob = np.zeros((len(batchedImList), N_WORDS), dtype=np.float)
    if model['inf_type'] == 'MILNoise':
        fields = ['mil', 'mil_max',\
          'qconds10', 'qconds11', 'noisy_comb_noimage']
        qdata_raw = np.zeros((len(batchedImList), 4 * N_WORDS),
                             dtype=np.float32)
        qdata_smax = np.zeros((len(batchedImList), 4 * N_WORDS),
                              dtype=np.float32)
        qconds10 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32)
        qconds11 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32)
        noisy_comb_noimage = np.zeros((len(batchedImList), N_WORDS),
                                      dtype=np.float32)

    for bind in range(len(batchedImList)):
        if model['inf_type'] != 'MILNoise':
            mil_prob[bind, :], sc[bind, :] = tm.test_batch(
                batchedImList[bind], model)
        else:
            fOut = tm.test_batch(batchedImList[bind], model, fields)
            mil_prob[bind, :] = fOut[0]
            sc[bind, :] = fOut[1]
            qconds10[bind, :] = fOut[2]
            qconds11[bind, :] = fOut[3]
            noisy_comb_noimage[bind, :] = fOut[4]
        sg_utils.tic_toc_print(
            60, 'test_batch : %d/%d (num_per_batch %d)' %
            (bind, len(batchedImList), len(batchedImList[0])))

    if detection_file is not None and model['inf_type'] != 'MILNoise':
        sg_utils.save_variables(detection_file, [sc, mil_prob], \
        ['sc', 'mil_prob'], overwrite = True)
    elif detection_file is not None:
        sg_utils.save_variables(detection_file, [sc, mil_prob, \
          qconds10, qconds11, noisy_comb_noimage], \
        ['sc', 'mil_prob',\
         'qconds10', 'qconds11',\
         'noisy_comb_noimage'], overwrite = True)
Beispiel #8
0
def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file = None):
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)

  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan

  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)    

    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];

    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec};

  if eval_file is not None:
    eval_file_details_keys = details.keys();
    eval_file_details_vals = [details[x] for x in eval_file_details_keys];
    utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True);
  return details;
def output_words(imdb, detection_file, eval_file, vocab, \
  threshold_metric_name, output_metric_name, threshold, min_words, output_file = None, \
  functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']):
    """
  Output the words as generated by the model. Loads the detections from
  detection_file, score precision mapping from eval_file and output the words
  in output_file. Words in the output_file are sorted according to the
  threshold_metric_name and report the output_metric_name.
  """
    dt = utils.load_variables(detection_file)
    pt = utils.load_variables(eval_file)

    is_functional = np.array(
        [x not in functional_words for x in vocab['words']])
    prec = np.zeros(dt['mil_prob'].shape)
    for jj in xrange(prec.shape[1]):
        prec[:,jj] = cap_eval_utils.compute_precision_score_mapping(\
          pt['details']['score'][:,jj].copy(), \
          pt['details']['precision'][:,jj].copy(), \
          dt['mil_prob'][:,jj])
        utils.tic_toc_print(
            1, 'compute precision score mapping: {:4d} / {:4d}'.format(
                jj, prec.shape[1]))
    dt['prec'] = prec

    out_all = []
    for i in xrange(imdb.num_images):
        out = output_words_image(dt[threshold_metric_name][i,:], dt[output_metric_name][i,:], \
          min_words, threshold, vocab, is_functional)
        out_all.append(out)
        utils.tic_toc_print(
            1, 'output words image: {:4d} / {:4d}'.format(i, imdb.num_images))

    if output_file is not None:
        with open(output_file, 'wt') as f:
            f.write('detection file %s\n' % (detection_file))
            f.write('eval file %s\n' % (eval_file))
            f.write('threshold %.2f; min_words %d\n' % (threshold, min_words))
            for i in xrange(imdb.num_images):
                f.write('{:d}: '.format(imdb.image_index[i]))
                out = out_all[i]
                for j in xrange(len(out)):
                    f.write('{:s} ({:.2f}), '.format(out[j][0], out[j][1]))
                f.write('\n')
def benchmark_only_ap(vocab,
                      gt_label,
                      num_references,
                      mil_prob,
                      eval_file=None,
                      noref=False):
    n_words = len(vocab['words'])
    P = np.zeros(mil_prob.shape, dtype=np.float)
    R = np.zeros(mil_prob.shape, dtype=np.float)
    score = np.zeros(mil_prob.shape, dtype=np.float)
    ap = np.zeros((1, n_words), dtype=np.float)

    for i in range(len(vocab['words'])):
        utils.tic_toc_print(1,
                            'benchmarking : {:4d} / {:4d}'.format(i, n_words))
        if noref:
            P[:, i], R[:,
                       i], score[:,
                                 i], ap[0,
                                        i] = cap_eval_utils.calc_pr_ovr_noref(
                                            gt_label[:, i], mil_prob[:, i])
        else:
            P[:, i], R[:,
                       i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr(
                           gt_label[:, i], mil_prob[:, i], num_references)

    details = {
        'precision': P,
        'recall': R,
        'ap': ap,
        'score': score
    }

    if eval_file is not None:
        eval_file_details_keys = details.keys()
        eval_file_details_vals = [details[x] for x in eval_file_details_keys]
        utils.save_variables(eval_file,
                             eval_file_details_vals,
                             eval_file_details_keys,
                             overwrite=True)
    return P, R, score, ap
Beispiel #11
0
def benchmark_only_ap(vocab, gt_label, num_references, mil_prob, eval_file = None, noref = False):
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)

  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    if noref:
      P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr_noref(gt_label[:,i], mil_prob[:,i])
    else:
      P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score };

  if eval_file is not None:
    eval_file_details_keys = details.keys();
    eval_file_details_vals = [details[x] for x in eval_file_details_keys];
    utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True);
  return P, R, score, ap;
Beispiel #12
0
def output_words(imdb, detection_file, eval_file, vocab, \
  threshold_metric_name, output_metric_name, threshold, min_words, output_file = None, \
  functional_words = ['a', 'on', 'of', 'the', 'in', 'with', 'and', 'is', 'to', 'an', 'two', 'at', 'next', 'are']):
  """
  Output the words as generated by the model. Loads the detections from
  detection_file, score precision mapping from eval_file and output the words
  in output_file. Words in the output_file are sorted according to the
  threshold_metric_name and report the output_metric_name.
  """
  dt = utils.load_variables(detection_file);
  pt = utils.load_variables(eval_file);
  
  is_functional = np.array([x not in functional_words for x in vocab['words']]);
  prec = np.zeros(dt['mil_prob'].shape)
  for jj in xrange(prec.shape[1]):
    prec[:,jj] = cap_eval_utils.compute_precision_score_mapping(\
      pt['details']['score'][:,jj].copy(), \
      pt['details']['precision'][:,jj].copy(), \
      dt['mil_prob'][:,jj]);
    utils.tic_toc_print(1, 'compute precision score mapping: {:4d} / {:4d}'.format(jj, prec.shape[1]))
  dt['prec'] = prec;

  out_all = [] 
  for i in xrange(imdb.num_images):
    out = output_words_image(dt[threshold_metric_name][i,:], dt[output_metric_name][i,:], \
      min_words, threshold, vocab, is_functional)
    out_all.append(out)
    utils.tic_toc_print(1, 'output words image: {:4d} / {:4d}'.format(i, imdb.num_images))
     
  if output_file is not None:
    with open(output_file, 'wt') as f:
      f.write('detection file %s\n'%(detection_file));
      f.write('eval file %s\n'%(eval_file));
      f.write('threshold %.2f; min_words %d\n'%(threshold, min_words));
      for i in xrange(imdb.num_images):
        f.write('{:d}: '.format(imdb.image_index[i]))
        out = out_all[i]
        for j in xrange(len(out)):
          f.write('{:s} ({:.2f}), '.format(out[j][0], out[j][1]))
        f.write('\n')
Beispiel #13
0
def testModelBatch(imdb, model, detection_file):
  if 'words' in model['vocab']:
    N_WORDS = len(model['vocab']['words'])
  else:
    #we are using COCO 80 classes
    N_WORDS = 80;
  batchedImList = getBatchedImList(imdb, model);

  sc = np.zeros((len(batchedImList), N_WORDS), dtype=np.float)
  mil_prob = np.zeros((len(batchedImList), N_WORDS), dtype=np.float)
  if model['inf_type'] == 'MILNoise':
    fields = ['mil', 'mil_max',\
      'qconds10', 'qconds11', 'noisy_comb_noimage']
    qdata_raw = np.zeros((len(batchedImList), 4*N_WORDS), dtype=np.float32)
    qdata_smax = np.zeros((len(batchedImList), 4*N_WORDS), dtype=np.float32)
    qconds10 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32)
    qconds11 = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32)
    noisy_comb_noimage = np.zeros((len(batchedImList), N_WORDS), dtype=np.float32)

  for bind in range(len(batchedImList)):
    if model['inf_type'] != 'MILNoise':
      mil_prob[bind,:], sc[bind,:] = tm.test_batch(batchedImList[bind], model)
    else:
      fOut = tm.test_batch(batchedImList[bind], model, fields)
      mil_prob[bind,:] = fOut[0];
      sc[bind,:] = fOut[1];
      qconds10[bind,:] = fOut[2];
      qconds11[bind,:] = fOut[3];
      noisy_comb_noimage[bind,:] = fOut[4];
    sg_utils.tic_toc_print(60, 'test_batch : %d/%d (num_per_batch %d)'%(bind, len(batchedImList), len(batchedImList[0])));

  if detection_file is not None and model['inf_type'] != 'MILNoise':
    sg_utils.save_variables(detection_file, [sc, mil_prob], \
    ['sc', 'mil_prob'], overwrite = True)
  elif detection_file is not None:
    sg_utils.save_variables(detection_file, [sc, mil_prob, \
      qconds10, qconds11, noisy_comb_noimage], \
    ['sc', 'mil_prob',\
     'qconds10', 'qconds11',\
     'noisy_comb_noimage'], overwrite = True)
Beispiel #14
0
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None):
  # Get ground truth
  # dt = utils.scio.loadmat(detection_file)
  dt = utils.load_variables(detection_file)
  mil_prob = dt['mil_prob'];
  
  # Benchmark the output, and return a result struct
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)
  
  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan
  
  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)
    
    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];
    
    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
    # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) 
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; 
  
  # Collect statistics over the POS
  agg = [];
  for pos in list(set(vocab['poss'])):
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})  
  
  ind = range(len(vocab['words'])); pos = 'all';
  print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
    format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
      100*np.mean(human_prec[0, ind]))
  agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
    'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
    'human_prec': 100*np.mean(human_prec[0, ind])})  

  if eval_file is not None:
    try:
      utils.save_variables(eval_file, [details, agg],
        ['details', 'agg'], overwrite = True)
    except:
      print 'Error trying to save to pickle, will try hdf5 hack';
      namespace = globals().copy()
      namespace.update(locals())
      code.interact(local=namespace)

      eval_file_details = eval_file.replace('.pkl','_details.h5');
      eval_file_details_keys = details.keys();
      eval_file_details_vals = [details[x] for x in eval_file_details_keys];
      utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True);

      eval_file_agg = eval_file.replace('.pkl','_agg.h5');
      eval_file_agg_keys = agg.keys();
      eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys];
      utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True);

  
  return details
Beispiel #15
0
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None):
  # Get ground truth
  # dt = utils.scio.loadmat(detection_file)
  dt = utils.load_variables(detection_file)
  mil_prob = dt['mil_prob'];
  
  # Benchmark the output, and return a result struct
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)
  
  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan
  
  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)
    
    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];
    
    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
    # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) 
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; 
  
  # Collect statistics over the POS
  agg = [];
  for pos in list(set(vocab['poss'])):
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})  
  
  ind = range(len(vocab['words'])); pos = 'all';
  print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
    format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
      100*np.mean(human_prec[0, ind]))
  agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
    'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
    'human_prec': 100*np.mean(human_prec[0, ind])})  

  if eval_file is not None:
    utils.save_variables(eval_file, [details, agg, vocab, imdb],
      ['details', 'agg', 'vocab', 'imdb'], overwrite = True)
  
  return details
import os
import sg_utils as utils
import coco_voc
import shutil

# Make directories
for i in xrange(60):
  utils.mkdir_if_missing(os.path.join('..', 'data', 'images', '{:02d}'.format(i)))

# Copy files over
sets = ['train', 'val', 'test']
for set_ in sets:
  imdb = coco_voc.coco_voc(set_)
  for i in xrange(imdb.num_images):
    in_file = os.path.join('../data', set_ + '2014', \
      'COCO_{}2014_{:012d}.jpg'.format(set_, imdb.image_index[i])); 
    out_file = imdb.image_path_at(i)
    # print in_file, out_file
    shutil.copyfile(in_file, out_file)
    utils.tic_toc_print(1, ' Copying images [{}]: {:06d} / {:06d}\n'.format(set_, i, imdb.num_images));
Beispiel #17
0
import os
import sg_utils as utils
import coco_voc
import shutil

# Make directories
for i in xrange(60):
  utils.mkdir_if_missing(os.path.join('data', 'images', '{:02d}'.format(i)))

# Copy files over
sets = ['train', 'val', 'test']
for set_ in sets:
  imdb = coco_voc.coco_voc(set_)
  for i in xrange(imdb.num_images):
    in_file = os.path.join(set_ + '2014', \
      'COCO_{}2014_{:012d}.jpg'.format(set_, imdb.image_index[i])); 
    out_file = imdb.image_path_at(i)
    # print in_file, out_file
    shutil.copyfile(in_file, out_file)
    utils.tic_toc_print(1, ' Copying images [{}]: {:06d} / {:06d}\n'.format(set_, i, imdb.num_images));
def benchmark(imdb,
              vocab,
              gt_label,
              num_references,
              detection_file,
              eval_file=None):
    # Get ground truth
    # dt = utils.scio.loadmat(detection_file)
    dt = utils.load_variables(detection_file)
    mil_prob = dt['mil_prob']

    # Benchmark the output, and return a result struct
    n_words = len(vocab['words'])
    P = np.zeros(mil_prob.shape, dtype=np.float)
    R = np.zeros(mil_prob.shape, dtype=np.float)
    score = np.zeros(mil_prob.shape, dtype=np.float)
    ap = np.zeros((1, n_words), dtype=np.float)

    human_prec = np.zeros((1, n_words), dtype=np.float)
    human_rec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec = np.zeros((1, n_words), dtype=np.float)
    rec_at_human_prec = np.zeros((1, n_words), dtype=np.float)
    rec_at_half_prec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec[...] = np.nan

    for i in range(len(vocab['words'])):
        utils.tic_toc_print(1,
                            'benchmarking : {:4d} / {:4d}'.format(i, n_words))
        P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr(
            gt_label[:, i], mil_prob[:, i], num_references)
        human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement(
            gt_label[:, i], num_references)

        ind = np.where(R[:, i] >= human_rec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            prec_at_human_rec[0, i] = P[ind[0], i]

        ind = np.where(P[:, i] >= human_prec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_human_prec[0, i] = R[ind[-1], i]

        ind = np.where(P[:, i] >= 0.5)[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_half_prec[0, i] = R[ind[-1], i]
        # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100)

    details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
      'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
      'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}

    # Collect statistics over the POS
    agg = []
    for pos in list(set(vocab['poss'])):
        ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
        print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
          format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
            100*np.mean(human_prec[0, ind]))
        agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
          'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
          'human_prec': 100*np.mean(human_prec[0, ind])})

    ind = range(len(vocab['words']))
    pos = 'all'
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})

    if eval_file is not None:
        try:
            utils.save_variables(eval_file, [details, agg], ['details', 'agg'],
                                 overwrite=True)
        except:
            print 'Error trying to save to pickle, will try hdf5 hack'
            namespace = globals().copy()
            namespace.update(locals())
            code.interact(local=namespace)

            eval_file_details = eval_file.replace('.pkl', '_details.h5')
            eval_file_details_keys = details.keys()
            eval_file_details_vals = [
                details[x] for x in eval_file_details_keys
            ]
            utils.save_variables(eval_file_details,
                                 eval_file_details_vals,
                                 eval_file_details_keys,
                                 overwrite=True)

            eval_file_agg = eval_file.replace('.pkl', '_agg.h5')
            eval_file_agg_keys = agg.keys()
            eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys]
            utils.save_variables(eval_file_agg,
                                 eval_file_agg_vals,
                                 eval_file_agg_keys,
                                 overwrite=True)

    return details
Beispiel #19
0
      
      # Compute targets for the file
      counts = preprocess.get_vocab_counts(imdb[i].image_index, \
          imdb[i].coco_caption_data, 5, vocab)
      
      if args.write_labels:
        label_file = os.path.join(output_dir, 'labels_' + sets[i] + '.h5') 
        print 'Writing labels to {}'.format(label_file)
        with h5py.File(label_file, 'w') as f:
          for j in xrange(imdb[i].num_images):
            ind = imdb[i].image_index[j]
            ind_str = '{:02d}/{:d}'.format(int(math.floor(ind)/1e4), ind)
            l = f.create_dataset('/labels-{}'.format(ind_str), (1, 1, counts.shape[1], 1), dtype = 'f')
            c = counts[j,:].copy(); c = c > 0; c = c.astype(np.float32); c = c.reshape((1, 1, c.size, 1))
            l[...] = c
            utils.tic_toc_print(1, 'write labels {:6d} / {:6d}'.format(j, imdb[i].num_images)) 

      if args.write_splits:
        split_file = os.path.join(output_dir, sets[i] + '.ids') 
        print 'Writing labels to {}'.format(split_file)
        with open(split_file, 'wt') as f:
          for j in xrange(imdb[i].num_images):
            ind = imdb[i].image_index[j]
            ind_str = '{:02d}/{:d}'.format(int(math.floor(ind)/1e4), ind)
            f.write('{}\n'.format(ind_str))

      # Print the command to start training

  if args.task == 'test_model':
    imdb = coco_voc.coco_voc(args.test_set)
    mean = np.array([[[ 103.939, 116.779, 123.68]]]);