def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file=None):
    n_words = len(vocab['words'])
    P = np.zeros(mil_prob.shape, dtype=np.float)
    R = np.zeros(mil_prob.shape, dtype=np.float)
    score = np.zeros(mil_prob.shape, dtype=np.float)
    ap = np.zeros((1, n_words), dtype=np.float)

    human_prec = np.zeros((1, n_words), dtype=np.float)
    human_rec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec = np.zeros((1, n_words), dtype=np.float)
    rec_at_human_prec = np.zeros((1, n_words), dtype=np.float)
    rec_at_half_prec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec[...] = np.nan

    for i in range(len(vocab['words'])):
        utils.tic_toc_print(1,
                            'benchmarking : {:4d} / {:4d}'.format(i, n_words))
        P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr(
            gt_label[:, i], mil_prob[:, i], num_references)
        human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement(
            gt_label[:, i], num_references)

        ind = np.where(R[:, i] >= human_rec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            prec_at_human_rec[0, i] = P[ind[0], i]

        ind = np.where(P[:, i] >= human_prec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_human_prec[0, i] = R[ind[-1], i]

        ind = np.where(P[:, i] >= 0.5)[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_half_prec[0, i] = R[ind[-1], i]

    details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
      'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
      'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}

    if eval_file is not None:
        eval_file_details_keys = details.keys()
        eval_file_details_vals = [details[x] for x in eval_file_details_keys]
        utils.save_variables(eval_file,
                             eval_file_details_vals,
                             eval_file_details_keys,
                             overwrite=True)
    return details
Esempio n. 2
0
def benchmark_ap(vocab, gt_label, num_references, mil_prob, eval_file = None):
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)

  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan

  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)    

    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];

    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec};

  if eval_file is not None:
    eval_file_details_keys = details.keys();
    eval_file_details_vals = [details[x] for x in eval_file_details_keys];
    utils.save_variables(eval_file, eval_file_details_vals, eval_file_details_keys, overwrite=True);
  return details;
Esempio n. 3
0
def benchmark_one_word(i, P, R, score, ap, human_rec, human_prec, \
                      prec_at_human_rec, rec_at_human_prec, rec_at_half_prec,\
                      gt_label, mil_prob, num_references):
  P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
  human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)    
  
  ind = np.where(R[:,i] >= human_rec[0,i])[0]
  if len(ind) > 0:
    ind = np.sort(ind)
    prec_at_human_rec[0,i] = P[ind[0], i];

  ind = np.where(P[:,i] >= human_prec[0,i])[0]
  if len(ind) > 0:
    ind = np.sort(ind)
    rec_at_human_prec[0,i] = R[ind[-1], i];
  
  ind = np.where(P[:,i] >= 0.5)[0]
  if len(ind) > 0:
    ind = np.sort(ind)
    rec_at_half_prec[0,i]  = R[ind[-1], i];
def benchmark_one_word(i, P, R, score, ap, human_rec, human_prec, \
                      prec_at_human_rec, rec_at_human_prec, rec_at_half_prec,\
                      gt_label, mil_prob, num_references):
    P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr(
        gt_label[:, i], mil_prob[:, i], num_references)
    human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement(
        gt_label[:, i], num_references)

    ind = np.where(R[:, i] >= human_rec[0, i])[0]
    if len(ind) > 0:
        ind = np.sort(ind)
        prec_at_human_rec[0, i] = P[ind[0], i]

    ind = np.where(P[:, i] >= human_prec[0, i])[0]
    if len(ind) > 0:
        ind = np.sort(ind)
        rec_at_human_prec[0, i] = R[ind[-1], i]

    ind = np.where(P[:, i] >= 0.5)[0]
    if len(ind) > 0:
        ind = np.sort(ind)
        rec_at_half_prec[0, i] = R[ind[-1], i]
Esempio n. 5
0
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None):
  # Get ground truth
  # dt = utils.scio.loadmat(detection_file)
  dt = utils.load_variables(detection_file)
  mil_prob = dt['mil_prob'];
  
  # Benchmark the output, and return a result struct
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)
  
  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan
  
  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)
    
    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];
    
    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
    # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) 
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; 
  
  # Collect statistics over the POS
  agg = [];
  for pos in list(set(vocab['poss'])):
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})  
  
  ind = range(len(vocab['words'])); pos = 'all';
  print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
    format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
      100*np.mean(human_prec[0, ind]))
  agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
    'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
    'human_prec': 100*np.mean(human_prec[0, ind])})  

  if eval_file is not None:
    try:
      utils.save_variables(eval_file, [details, agg],
        ['details', 'agg'], overwrite = True)
    except:
      print 'Error trying to save to pickle, will try hdf5 hack';
      namespace = globals().copy()
      namespace.update(locals())
      code.interact(local=namespace)

      eval_file_details = eval_file.replace('.pkl','_details.h5');
      eval_file_details_keys = details.keys();
      eval_file_details_vals = [details[x] for x in eval_file_details_keys];
      utils.save_variables(eval_file_details, eval_file_details_vals, eval_file_details_keys, overwrite=True);

      eval_file_agg = eval_file.replace('.pkl','_agg.h5');
      eval_file_agg_keys = agg.keys();
      eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys];
      utils.save_variables(eval_file_agg, eval_file_agg_vals, eval_file_agg_keys, overwrite=True);

  
  return details
Esempio n. 6
0
def benchmark(imdb, vocab, gt_label, num_references, detection_file, eval_file = None):
  # Get ground truth
  # dt = utils.scio.loadmat(detection_file)
  dt = utils.load_variables(detection_file)
  mil_prob = dt['mil_prob'];
  
  # Benchmark the output, and return a result struct
  n_words           = len(vocab['words'])
  P                 = np.zeros(mil_prob.shape, dtype = np.float)
  R                 = np.zeros(mil_prob.shape, dtype = np.float)
  score             = np.zeros(mil_prob.shape, dtype = np.float)
  ap                = np.zeros((1, n_words), dtype   = np.float)
  
  human_prec        = np.zeros((1, n_words), dtype   = np.float)
  human_rec         = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_human_prec = np.zeros((1, n_words), dtype   = np.float)
  rec_at_half_prec  = np.zeros((1, n_words), dtype   = np.float)
  
  prec_at_human_rec[...] = np.nan
  
  for i in range(len(vocab['words'])):
    utils.tic_toc_print(1, 'benchmarking : {:4d} / {:4d}'.format(i, n_words))
    P[:,i], R[:,i], score[:,i], ap[0,i] = cap_eval_utils.calc_pr_ovr(gt_label[:,i], mil_prob[:,i], num_references)
    human_prec[0,i], human_rec[0,i]  = cap_eval_utils.human_agreement(gt_label[:,i], num_references)
    
    ind = np.where(R[:,i] >= human_rec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      prec_at_human_rec[0,i] = P[ind[0], i];

    ind = np.where(P[:,i] >= human_prec[0,i])[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_human_prec[0,i] = R[ind[-1], i];
    
    ind = np.where(P[:,i] >= 0.5)[0]
    if len(ind) > 0:
      ind = np.sort(ind)
      rec_at_half_prec[0,i]  = R[ind[-1], i];
    # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100) 
  
  details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
    'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
    'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}; 
  
  # Collect statistics over the POS
  agg = [];
  for pos in list(set(vocab['poss'])):
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})  
  
  ind = range(len(vocab['words'])); pos = 'all';
  print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
    format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
      100*np.mean(human_prec[0, ind]))
  agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
    'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
    'human_prec': 100*np.mean(human_prec[0, ind])})  

  if eval_file is not None:
    utils.save_variables(eval_file, [details, agg, vocab, imdb],
      ['details', 'agg', 'vocab', 'imdb'], overwrite = True)
  
  return details
Esempio n. 7
0
mapping = {'NNS': 'NN', 'NNP': 'NN', 'NNPS': 'NN', 'NN': 'NN', \
  'VB': 'VB', 'VBD': 'VB', 'VBN': 'VB', 'VBZ': 'VB', 'VBP': 'VB', 'VBP': 'VB', 'VBG': 'VB', \
  'JJR': 'JJ', 'JJS': 'JJ', 'JJ': 'JJ', 'DT': 'DT', 'PRP': 'PRP', 'PRP$': 'PRP', 'IN': 'IN'};
    
# punctuations to be removed from the sentences
punctuations = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \
  ".", "?", "!", ",", ":", "-", "--", "...", ";"] 

vocab = preprocess.get_vocab(imset, coco_caps, punctuations, mapping);

sg_utils.save_variables('vocab_' + imset + '.pkl', \
  [vocab[x] for x in vocab.keys()], \
  vocab.keys(), \
  overwrite = True);


##
N_WORDS = 1000;
vocab = preprocess.get_vocab_top_k(vocab, N_WORDS)
image_ids = coco_caps.getImgIds()
counts = preprocess.get_vocab_counts(image_ids, coco_caps, 5, vocab)
P = np.zeros((N_WORDS, 1), dtype = np.float); 
R = np.zeros((N_WORDS, 1), dtype = np.float); 
for i, w in enumerate(vv['words']): 
  P[i], R[i] = cap_eval_utils.human_agreement(counts[:,i], 5)
  print w, P[i], R[i]

for pos in list(set(vocab['poss'])):
  ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
  print "{:5s} [{:3d}] : {:.2f} {:.2f} ".format(pos, len(ind), 100*np.mean(P[ind]), 100*np.mean(R[ind]))
def benchmark(imdb,
              vocab,
              gt_label,
              num_references,
              detection_file,
              eval_file=None):
    # Get ground truth
    # dt = utils.scio.loadmat(detection_file)
    dt = utils.load_variables(detection_file)
    mil_prob = dt['mil_prob']

    # Benchmark the output, and return a result struct
    n_words = len(vocab['words'])
    P = np.zeros(mil_prob.shape, dtype=np.float)
    R = np.zeros(mil_prob.shape, dtype=np.float)
    score = np.zeros(mil_prob.shape, dtype=np.float)
    ap = np.zeros((1, n_words), dtype=np.float)

    human_prec = np.zeros((1, n_words), dtype=np.float)
    human_rec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec = np.zeros((1, n_words), dtype=np.float)
    rec_at_human_prec = np.zeros((1, n_words), dtype=np.float)
    rec_at_half_prec = np.zeros((1, n_words), dtype=np.float)

    prec_at_human_rec[...] = np.nan

    for i in range(len(vocab['words'])):
        utils.tic_toc_print(1,
                            'benchmarking : {:4d} / {:4d}'.format(i, n_words))
        P[:, i], R[:, i], score[:, i], ap[0, i] = cap_eval_utils.calc_pr_ovr(
            gt_label[:, i], mil_prob[:, i], num_references)
        human_prec[0, i], human_rec[0, i] = cap_eval_utils.human_agreement(
            gt_label[:, i], num_references)

        ind = np.where(R[:, i] >= human_rec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            prec_at_human_rec[0, i] = P[ind[0], i]

        ind = np.where(P[:, i] >= human_prec[0, i])[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_human_prec[0, i] = R[ind[-1], i]

        ind = np.where(P[:, i] >= 0.5)[0]
        if len(ind) > 0:
            ind = np.sort(ind)
            rec_at_half_prec[0, i] = R[ind[-1], i]
        # # print '{:20s}: {:.3f}'.format(vocab['words'][i], ap[0,i]*100)

    details = {'precision': P, 'recall': R, 'ap': ap, 'score': score, \
      'prec_at_human_rec': prec_at_human_rec, 'rec_at_human_prec': rec_at_human_prec, \
      'human_prec': human_prec, 'human_rec': human_rec, 'rec_at_half_prec': rec_at_half_prec}

    # Collect statistics over the POS
    agg = []
    for pos in list(set(vocab['poss'])):
        ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
        print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
          format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
            100*np.mean(human_prec[0, ind]))
        agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
          'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
          'human_prec': 100*np.mean(human_prec[0, ind])})

    ind = range(len(vocab['words']))
    pos = 'all'
    print "    {:5s} [{:4d}]     :     {:5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))
    agg.append({'pos': pos, 'ap': 100*np.mean(ap[0, ind]), \
      'prec_at_human_rec': 100*np.mean(prec_at_human_rec[0, ind]), \
      'human_prec': 100*np.mean(human_prec[0, ind])})

    if eval_file is not None:
        try:
            utils.save_variables(eval_file, [details, agg], ['details', 'agg'],
                                 overwrite=True)
        except:
            print 'Error trying to save to pickle, will try hdf5 hack'
            namespace = globals().copy()
            namespace.update(locals())
            code.interact(local=namespace)

            eval_file_details = eval_file.replace('.pkl', '_details.h5')
            eval_file_details_keys = details.keys()
            eval_file_details_vals = [
                details[x] for x in eval_file_details_keys
            ]
            utils.save_variables(eval_file_details,
                                 eval_file_details_vals,
                                 eval_file_details_keys,
                                 overwrite=True)

            eval_file_agg = eval_file.replace('.pkl', '_agg.h5')
            eval_file_agg_keys = agg.keys()
            eval_file_agg_vals = [agg[x] for x in eval_file_agg_keys]
            utils.save_variables(eval_file_agg,
                                 eval_file_agg_vals,
                                 eval_file_agg_keys,
                                 overwrite=True)

    return details
  'VB': 'VB', 'VBD': 'VB', 'VBN': 'VB', 'VBZ': 'VB', 'VBP': 'VB', 'VBP': 'VB', 'VBG': 'VB', \
  'JJR': 'JJ', 'JJS': 'JJ', 'JJ': 'JJ', 'DT': 'DT', 'PRP': 'PRP', 'PRP$': 'PRP', 'IN': 'IN'}

# punctuations to be removed from the sentences
punctuations = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \
  ".", "?", "!", ",", ":", "-", "--", "...", ";"]

vocab = preprocess.get_vocab(imset, coco_caps, punctuations, mapping)

sg_utils.save_variables('vocab_' + imset + '.pkl', \
  [vocab[x] for x in vocab.keys()], \
  vocab.keys(), \
  overwrite = True)

##
N_WORDS = 1000
vocab = preprocess.get_vocab_top_k(vocab, N_WORDS)
image_ids = coco_caps.getImgIds()
counts = preprocess.get_vocab_counts(image_ids, coco_caps, 5, vocab)
P = np.zeros((N_WORDS, 1), dtype=np.float)
R = np.zeros((N_WORDS, 1), dtype=np.float)
for i, w in enumerate(vv['words']):
    P[i], R[i] = cap_eval_utils.human_agreement(counts[:, i], 5)
    print w, P[i], R[i]

for pos in list(set(vocab['poss'])):
    ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
    print "{:5s} [{:3d}] : {:.2f} {:.2f} ".format(pos, len(ind),
                                                  100 * np.mean(P[ind]),
                                                  100 * np.mean(R[ind]))