Python get_vocab_counts Examples, preprocess.get_vocab_counts Python Examples

Example #1

0

Show file

File: main_test.py Project: caomw/latent-noise-icnm

def getLabels(imdb, model, solverProtoName):
  labelFileName = meu.get_model_label_filename(solverProtoName);
  labelFileName = labelFileName%(imdb.name, imdb.split);
  if not os.path.exists(labelFileName):
    print 'creating ground truth labels for evaluation'
    imBnames, imIds = imdb.get_all_image_bnames_ids();
    gtLabel = preprocess.get_vocab_counts(imIds, imdb._coco_caption_data, model['vocab']);
    sg_utils.save(labelFileName, [gtLabel], ['gtLabel'], overwrite=True)
  else:
    gtLabel = sg_utils.load(labelFileName)['gtLabel'];
  return gtLabel;

Example #2

0

Show file

File: main_test.py Project: spandanagella/latent-noise-icnm

def getLabels(imdb, model, solverProtoName):
    labelFileName = meu.get_model_label_filename(solverProtoName)
    labelFileName = labelFileName % (imdb.name, imdb.split)
    if not os.path.exists(labelFileName):
        print 'creating ground truth labels for evaluation'
        imBnames, imIds = imdb.get_all_image_bnames_ids()
        gtLabel = preprocess.get_vocab_counts(imIds, imdb._coco_caption_data,
                                              model['vocab'])
        sg_utils.save(labelFileName, [gtLabel], ['gtLabel'], overwrite=True)
    else:
        gtLabel = sg_utils.load(labelFileName)['gtLabel']
    return gtLabel

Example #3

0

Show file

File: script_1.py Project: Aleem21/visual-concepts

mapping = {'NNS': 'NN', 'NNP': 'NN', 'NNPS': 'NN', 'NN': 'NN', \
  'VB': 'VB', 'VBD': 'VB', 'VBN': 'VB', 'VBZ': 'VB', 'VBP': 'VB', 'VBP': 'VB', 'VBG': 'VB', \
  'JJR': 'JJ', 'JJS': 'JJ', 'JJ': 'JJ', 'DT': 'DT', 'PRP': 'PRP', 'PRP$': 'PRP', 'IN': 'IN'};
    
# punctuations to be removed from the sentences
punctuations = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \
  ".", "?", "!", ",", ":", "-", "--", "...", ";"] 

vocab = preprocess.get_vocab(imset, coco_caps, punctuations, mapping);

sg_utils.save_variables('vocab_' + imset + '.pkl', \
  [vocab[x] for x in vocab.keys()], \
  vocab.keys(), \
  overwrite = True);


##
N_WORDS = 1000;
vocab = preprocess.get_vocab_top_k(vocab, N_WORDS)
image_ids = coco_caps.getImgIds()
counts = preprocess.get_vocab_counts(image_ids, coco_caps, 5, vocab)
P = np.zeros((N_WORDS, 1), dtype = np.float); 
R = np.zeros((N_WORDS, 1), dtype = np.float); 
for i, w in enumerate(vv['words']): 
  P[i], R[i] = cap_eval_utils.human_agreement(counts[:,i], 5)
  print w, P[i], R[i]

for pos in list(set(vocab['poss'])):
  ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
  print "{:5s} [{:3d}] : {:.2f} {:.2f} ".format(pos, len(ind), 100*np.mean(P[ind]), 100*np.mean(R[ind]))

Example #4

0

Show file

File: model_eval.py Project: buttomnutstoast/VCNN

        sys.exit(1)

    args = parser.parse_args()
    return args

if __name__ == '__main__':
    args = parse_args()

    print('Called with args:')
    print(args)

    imdb = coco_voc.coco_voc('test')
    vocab = utils.load_variables(args.vocab_file)
    gt_label = preprocess.get_vocab_counts(
        imdb.image_index,
        imdb.coco_caption_data,
        5,
        vocab
        )
    det_file = args.det_file
    det_dir = os.path.dirname(det_file) # get root dir of det_file

    eval_file = os.path.join(det_dir, imdb.name + '_eval.pkl')
    benchmark(imdb, vocab, gt_label, 5, det_file, eval_file=eval_file)

    map_file = args.map_file
    gt_label_det = preprocess.get_vocab_counts_det(
        imdb.image_index,
        imdb.coco_instances_data,
        map_file,
        vocab
        )

Example #5

0

Show file

File: script_1.py Project: vanpersie32/ETHZ-Bootstrapped-Captioning

  'VB': 'VB', 'VBD': 'VB', 'VBN': 'VB', 'VBZ': 'VB', 'VBP': 'VB', 'VBP': 'VB', 'VBG': 'VB', \
  'JJR': 'JJ', 'JJS': 'JJ', 'JJ': 'JJ', 'DT': 'DT', 'PRP': 'PRP', 'PRP$': 'PRP', 'IN': 'IN'}

# punctuations to be removed from the sentences
punctuations = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \
  ".", "?", "!", ",", ":", "-", "--", "...", ";"]

vocab = preprocess.get_vocab(imset, coco_caps, punctuations, mapping)

sg_utils.save_variables('vocab_' + imset + '.pkl', \
  [vocab[x] for x in vocab.keys()], \
  vocab.keys(), \
  overwrite = True)

##
N_WORDS = 1000
vocab = preprocess.get_vocab_top_k(vocab, N_WORDS)
image_ids = coco_caps.getImgIds()
counts = preprocess.get_vocab_counts(image_ids, coco_caps, 5, vocab)
P = np.zeros((N_WORDS, 1), dtype=np.float)
R = np.zeros((N_WORDS, 1), dtype=np.float)
for i, w in enumerate(vv['words']):
    P[i], R[i] = cap_eval_utils.human_agreement(counts[:, i], 5)
    print w, P[i], R[i]

for pos in list(set(vocab['poss'])):
    ind = [i for i, x in enumerate(vocab['poss']) if pos == x]
    print "{:5s} [{:3d}] : {:.2f} {:.2f} ".format(pos, len(ind),
                                                  100 * np.mean(P[ind]),
                                                  100 * np.mean(R[ind]))

Example #6

0

Show file

    caffe.set_device(args.gpu_id)
 
  # Load the vocabulary
  vocab = utils.load_variables(args.vocab_file)
  
  if args.task == 'compute_targets':
    
    imdb = []
    output_dir = args.train_dir
    sets = ['train', 'val']
    for i, imset in enumerate([args.train_set, args.val_set]):
      imdb.append(coco_voc.coco_voc(imset))
      print 'Loaded dataset {:s}'.format(imdb[i].name)
      
      # Compute targets for the file
      counts = preprocess.get_vocab_counts(imdb[i].image_index, \
          imdb[i].coco_caption_data, 5, vocab)
      
      if args.write_labels:
        label_file = os.path.join(output_dir, 'labels_' + sets[i] + '.h5') 
        print 'Writing labels to {}'.format(label_file)
        with h5py.File(label_file, 'w') as f:
          for j in xrange(imdb[i].num_images):
            ind = imdb[i].image_index[j]
            ind_str = '{:02d}/{:d}'.format(int(math.floor(ind)/1e4), ind)
            l = f.create_dataset('/labels-{}'.format(ind_str), (1, 1, counts.shape[1], 1), dtype = 'f')
            c = counts[j,:].copy(); c = c > 0; c = c.astype(np.float32); c = c.reshape((1, 1, c.size, 1))
            l[...] = c
            utils.tic_toc_print(1, 'write labels {:6d} / {:6d}'.format(j, imdb[i].num_images)) 

      if args.write_splits:
        split_file = os.path.join(output_dir, sets[i] + '.ids')