コード例 #1
0
class VisionDataset(torch.utils.data.Dataset):
    def __init__(self, filename):
        self.vis_feat_file = BigFile(filename)
        self.vis_ids = self.vis_feat_file.names

    def __getitem__(self, index):
        vis_tensor = self.vis_feat_file.read_one(self.vis_ids[index])
        return self.vis_ids[index], torch.Tensor(vis_tensor)

    def get_by_name(self, name):
        vis_tensor = self.vis_feat_file.read_one(name)
        return torch.Tensor(vis_tensor)

    def __len__(self):
        return len(self.vis_ids)
コード例 #2
0
class VisionDataset(data.Dataset):
    def __init__(self, params):
        self.vis_feat_file = BigFile(params['vis_feat']) if isinstance(
            params['vis_feat'], str) else params['vis_feat']
        self.vis_ids = self.vis_feat_file.names
        self.length = len(self.vis_ids)

    def __getitem__(self, index):
        vis_id = self.vis_ids[index]
        vis_tensor = self.get_feat_by_id(vis_id)
        return vis_tensor, index, vis_id

    def get_feat_by_id(self, vis_id):
        vis_tensor = torch.Tensor(self.vis_feat_file.read_one(vis_id))
        return vis_tensor

    def __len__(self):
        return self.length
コード例 #3
0
class BucketDataProvider(object):
    """TensorFlow Data Provider with Buckets"""
    def __init__(self, collection, vocab_file, feature, language,
                flag_shuffle=False,  fluency_threshold=DEFAULT_FLUENCY_U, rootpath=ROOT_PATH):
        self.language = language
        self.anno_file_path = utility.get_sent_file(collection, language, rootpath)
        self.fluency_threshold = fluency_threshold
        self.textbank = TextBank(vocab_file)
        assert self.textbank.vocab[TOKEN_PAD] == 0
        self.vf_reader = BigFile(utility.get_feat_dir(collection, feature, rootpath))
        self.vf_names = set(self.vf_reader.names)
        self.vf_size = self.vf_reader.ndims
        self.flag_shuffle = flag_shuffle
        self._load_data()

    def shuffle_data_queue(self):
        random.shuffle(self._data_queue)

    def generate_batches(self, batch_size, buckets):
        """Return a list generator of mini-batches of training data."""
        # create Batches
        batches = []
        for max_seq_len in buckets:
            batches.append(Batch(batch_size, max_seq_len, self.vf_size, self.textbank.vocab[TOKEN_BOS]))
        
        # shuffle if necessary
        if self.flag_shuffle:
            np.random.shuffle(self._data_queue)
        # scan data queue
        for data in self._data_queue:
            # pdb.set_trace()
            sentence = data['sentence']
            # Load visual features
            # print(len(data['image_id']))
            visual_features = np.array(self.vf_reader.read_one(data['image_id']))
            #print("11111111")
            # print (data['image_id'])
            # print(visual_features)
            # print(data['sentence'])
            # sent = self.textbank.decode_tokens(data['sentence'], flag_remove_bos=True)
            # for word in sent:
            #     print (word)
            # # pdb.set_trace()
            if len(sentence) >= buckets[-1]:
                feed_res = batches[-1].feed_and_vomit(visual_features, sentence)
                ind_buc = len(buckets) - 1
            else:
                for (ind_b, batch) in enumerate(batches):
                    if len(sentence) < batch.max_seq_len:
                        feed_res = batches[ind_b].feed_and_vomit(visual_features, sentence)
                        ind_buc = ind_b
                        break
            if feed_res:
                yield (ind_buc,) + feed_res
                batches[ind_buc].empty()

            
    def _load_data(self, verbose=True):
        logger.debug('Loading data')
        self._data_queue = []
        annoss = codecs.open(self.anno_file_path,'r','utf-8').readlines()
        annos = [an.encode('utf-8').decode('utf-8-sig') for an in annoss]

        for (ind_a, line) in enumerate(annos):
            data = {}
            sid, sent = line.strip().split(" ", 1)
            imgid = sid.strip().split("#", 1)[0]
            # print(imgid)
            assert(imgid in self.vf_names)
            # pdb.set_trace()
            # if imgid not in self.vf_names:
            #    print(imgid)
            #    logger.info('%s not in feature data, skipping that.'%imgid)
            #    pdb.set_trace()
            #    continue
            data['image_id'] = imgid
            # print(imgid)
            # # Encode sentences

            tokens = TextTool.tokenize(sent, self.language)
            data['sentence'] = self.textbank.encode_tokens(tokens, flag_add_bos=False)
            self._data_queue.append(data)
            if verbose and (ind_a + 1) % 20000 == 0:
                logger.debug('%d/%d annotation', ind_a + 1, len(annos))
        random.shuffle( self._data_queue )   #       ############################# changed by gxr
        
        nr_of_images = len(set([data['image_id'] for data in self._data_queue]))
        logger.info('%d images, %d sentences from %s', nr_of_images, len(self._data_queue), self.anno_file_path)
コード例 #4
0
ファイル: test_models.py プロジェクト: sddai/fluent-cap
def main(unused_args):

  length_normalization_factor = FLAGS.length_normalization_factor

  # Load model configuration
  config_path = os.path.join(os.path.dirname(__file__), 'model_conf', FLAGS.model_name + '.py')
  config = utility.load_config(config_path)

  config.trainCollection = FLAGS.train_collection
  config.word_cnt_thr = FLAGS.word_cnt_thr
  config.rootpath = FLAGS.rootpath

  train_collection =  FLAGS.train_collection
  test_collection = FLAGS.test_collection
  overwrite = FLAGS.overwrite
  feature = FLAGS.vf_name


  img_set_file = os.path.join(rootpath, test_collection, 'VideoSets', '%s.txt' % test_collection)
  if not os.path.exists(img_set_file):
      img_set_file = os.path.join(rootpath, test_collection, 'ImageSets', '%s.txt' % test_collection)
  img_list = map(str.strip, open(img_set_file).readlines())

  # have visual feature ready
  vf_dir = utility.get_feat_dir(test_collection, feature, rootpath)
  vf_reader = BigFile( vf_dir )

  textbank = TextBank(utility.get_train_vocab_file(FLAGS))
  config.vocab_size = len(textbank.vocab)
  config.vf_size = int(open(os.path.join(vf_dir, 'shape.txt')).read().split()[1])

  model_dir = utility.get_model_dir(FLAGS)
  output_dir = utility.get_pred_dir(FLAGS)

  checkpoint_style = FLAGS.checkpoint_style

  if checkpoint_style == 'file':
    #output_per_filename = 'model_perf_in_topk_%d_%s' % (FLAGS.top_k, FLAGS.eval_model_list_file)
    # read validated top models
    validation_output_dir = utility.get_sim_dir(FLAGS)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    eval_model_list_file = os.path.join(validation_output_dir, 'loss_info.txt') #FLAGS.eval_model_list_file)
    shutil.copy(eval_model_list_file, output_dir)
    test_iter_list = []
    for line in open(eval_model_list_file).readlines()[:FLAGS.top_k]:
      iter_current = int(line.strip().split()[0])
      test_iter_list.append(iter_current)

  elif checkpoint_style == 'iter_interval':
    #output_per_filename =  'model_perf_in_%s' % FLAGS.eval_stat
    test_iter_list = range(*[int(x) for x in FLAGS.eval_stat.split("-")])
  elif checkpoint_style == 'iter_num':
    #output_per_filename =  'model_perf_in_iter_%d' % FLAGS.iter_num
    test_iter_list = [FLAGS.iter_num]

  with_image_embedding = True if FLAGS.with_image_embedding != 0 else False
  g = tf.Graph()
  with g.as_default():
    model = InferenceWrapper(config=config,model_dir=model_dir,
                             gpu_memory_fraction=FLAGS.gpu_memory_fraction,
                             gpu=FLAGS.gpu,
                             with_image_embedding=with_image_embedding)
    model.build_model()
  
  for k, iter_n in enumerate(test_iter_list):
    model_path = os.path.join(model_dir, 'variables', 'model_%d.ckpt' % iter_n)
    while not os.path.exists(model_path+'.meta'):
      logger.error('Model path: %s', model_path)
      logger.error('Cannot load model file and exit')
      sys.exit(0)

    top_one_pred_sent_file = os.path.join(output_dir, 'top%d' % k, 'top_one_pred_sent.txt')
    top_n_pred_sent_file = os.path.join(output_dir, 'top%d' % k, 'top_n_pred_sent.txt')
    # perf_file = os.path.join(output_dir, 'model_%d.ckpt' % iter_n, 'perf.txt')

    if os.path.exists(top_one_pred_sent_file) and not overwrite:
      # write existing perf file and print out
      logger.info('%s exists. skip', top_one_pred_sent_file)
      continue

    if not os.path.exists(os.path.split(top_one_pred_sent_file)[0]):
      os.makedirs(os.path.split(top_one_pred_sent_file)[0])

    logger.info('save results to %s', top_one_pred_sent_file)

    # load the trained model
    generator = CaptionGenerator(config, model, length_normalization_factor = length_normalization_factor)
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    config_proto = tf.ConfigProto(
      intra_op_parallelism_threads=FLAGS.ses_threads, gpu_options=gpu_options, allow_soft_placement=True)
    #with  tf.Session(config=config_proto) as session:
      #model.build_model(session, model_path)
    model.load_model(model_path)

    fout_one_sent = codecs.open(top_one_pred_sent_file, 'w','utf-8')
    fout_n_sent = codecs.open(top_n_pred_sent_file, 'w','utf-8')

    for progress,img in enumerate(img_list):
        # predict sentences given a visual feature
        visual_feature = np.array(vf_reader.read_one(img))
        sentences = generator.beam_search( visual_feature, FLAGS.beam_size)

        # output top one sentence info
        sent_score = sentences[0].score
        sent = ' '.join(sentences[0].words)
        fout_one_sent.write(img + ' ' + '%.3f' % sent_score + ' ' + sent + '\n')
        logger.debug(img + ' ' + '%.3f' % sent_score + ' ' + sent)

        # output top n sentences info
        fout_n_sent.write(img)
        for sentence in sentences:
            sent_score = sentence.score
            sent = ' '.join(sentence.words)
            fout_n_sent.write('\t' + '%.3f' % sent_score + '\t' + sent)
        fout_n_sent.write('\n')
      
        if progress % 100 == 0:
          logger.info('%d images decoded' % (progress+1))

    logger.info('%d images decoded' % (progress+1))
 
    fout_one_sent.close()
    fout_n_sent.close()
コード例 #5
0
ファイル: demo.py プロジェクト: danieljf24/cmrf
if __name__ == "__main__":
    rootpath = './'
    trainCollection = 'toydata'
    nimages = 2
    feature = 'f1'
    dim = 3

    testCollection = trainCollection
    testset = testCollection
   
    featureDir = os.path.join(rootpath, trainCollection, "FeatureData", feature)
    searcher = simpleknn.load_model(os.path.join(featureDir, "feature.bin"), dim, nimages, os.path.join(featureDir, "id.txt"))
    searcher.set_distance('l2')
    searcher.set_distance('l1')
    print ("[simpleknn] dim=%d, nr_images=%d" % (searcher.get_dim(), searcher.get_nr_images()))


    testfeaturedir = os.path.join(rootpath, testCollection, 'FeatureData', feature)
    testfeaturefile = BigFile(testfeaturedir, dim)
    testset = testfeaturefile.names

    for testid in testset:
        testfeature = testfeaturefile.read_one(testid)
        visualNeighbors = searcher.search_knn(testfeature, max_hits=20000)
        print testid, len(visualNeighbors), " ".join(["%s %.3f" % (v[0],v[1]) for v in visualNeighbors[:3]])

 


コード例 #6
0
ファイル: demo.py プロジェクト: tianfeng80/cmrf
if __name__ == "__main__":
    rootpath = './'
    trainCollection = 'toydata'
    nimages = 2
    feature = 'f1'
    dim = 3

    testCollection = trainCollection
    testset = testCollection

    featureDir = os.path.join(rootpath, trainCollection, "FeatureData",
                              feature)
    searcher = simpleknn.load_model(os.path.join(featureDir, "feature.bin"),
                                    dim, nimages,
                                    os.path.join(featureDir, "id.txt"))
    searcher.set_distance('l2')
    searcher.set_distance('l1')
    print("[simpleknn] dim=%d, nr_images=%d" %
          (searcher.get_dim(), searcher.get_nr_images()))

    testfeaturedir = os.path.join(rootpath, testCollection, 'FeatureData',
                                  feature)
    testfeaturefile = BigFile(testfeaturedir, dim)
    testset = testfeaturefile.names

    for testid in testset:
        testfeature = testfeaturefile.read_one(testid)
        visualNeighbors = searcher.search_knn(testfeature, max_hits=20000)
        print testid, len(visualNeighbors), " ".join(
            ["%s %.3f" % (v[0], v[1]) for v in visualNeighbors[:3]])
コード例 #7
0
class BucketDataProvider(object):
    """TensorFlow Data Provider with Buckets"""
    def __init__(self,
                 collection,
                 vocab_file,
                 feature,
                 language,
                 flag_shuffle=False,
                 fluency_threshold=DEFAULT_FLUENCY_U,
                 rootpath=ROOT_PATH):
        self.language = language
        self.anno_file_path = utility.get_sent_file(collection, language,
                                                    rootpath)
        self.fluency_threshold = fluency_threshold
        self.textbank = TextBank(vocab_file)
        assert self.textbank.vocab[TOKEN_PAD] == 0
        self.vf_reader = BigFile(
            utility.get_feat_dir(collection, feature, rootpath))
        self.vf_names = set(self.vf_reader.names)
        self.vf_size = self.vf_reader.ndims
        self.flag_shuffle = flag_shuffle
        self._load_data()

    def shuffle_data_queue(self):
        random.shuffle(self._data_queue)

    def generate_batches(self, batch_size, buckets):
        """Return a list generator of mini-batches of training data."""
        # create Batches
        batches = []
        for max_seq_len in buckets:
            batches.append(
                Batch(batch_size, max_seq_len, self.vf_size,
                      self.textbank.vocab[TOKEN_BOS]))

        # shuffle if necessary
        if self.flag_shuffle:
            np.random.shuffle(self._data_queue)
        # scan data queue
        for data in self._data_queue:
            # pdb.set_trace()
            sentence = data['sentence']
            # Load visual features
            # print(len(data['image_id']))
            visual_features = np.array(
                self.vf_reader.read_one(data['image_id']))
            #print("11111111")
            # print (data['image_id'])
            # print(visual_features)
            # print(data['sentence'])
            # sent = self.textbank.decode_tokens(data['sentence'], flag_remove_bos=True)
            # for word in sent:
            #     print (word)
            # # pdb.set_trace()
            if len(sentence) >= buckets[-1]:
                feed_res = batches[-1].feed_and_vomit(visual_features,
                                                      sentence)
                ind_buc = len(buckets) - 1
            else:
                for (ind_b, batch) in enumerate(batches):
                    if len(sentence) < batch.max_seq_len:
                        feed_res = batches[ind_b].feed_and_vomit(
                            visual_features, sentence)
                        ind_buc = ind_b
                        break
            if feed_res:
                yield (ind_buc, ) + feed_res
                batches[ind_buc].empty()

    def _load_data(self, verbose=True):
        logger.debug('Loading data')
        self._data_queue = []
        annoss = codecs.open(self.anno_file_path, 'r', 'utf-8').readlines()
        annos = [an.encode('utf-8').decode('utf-8-sig') for an in annoss]

        for (ind_a, line) in enumerate(annos):
            data = {}
            sid, sent = line.strip().split(" ", 1)
            imgid = sid.strip().split("#", 1)[0]
            # print(imgid)
            assert (imgid in self.vf_names)
            # pdb.set_trace()
            # if imgid not in self.vf_names:
            #    print(imgid)
            #    logger.info('%s not in feature data, skipping that.'%imgid)
            #    pdb.set_trace()
            #    continue
            data['image_id'] = imgid
            # print(imgid)
            # # Encode sentences

            tokens = TextTool.tokenize(sent, self.language)
            data['sentence'] = self.textbank.encode_tokens(tokens,
                                                           flag_add_bos=False)
            self._data_queue.append(data)
            if verbose and (ind_a + 1) % 20000 == 0:
                logger.debug('%d/%d annotation', ind_a + 1, len(annos))
        random.shuffle(self._data_queue
                       )  #       ############################# changed by gxr

        nr_of_images = len(set([data['image_id']
                                for data in self._data_queue]))
        logger.info('%d images, %d sentences from %s', nr_of_images,
                    len(self._data_queue), self.anno_file_path)
コード例 #8
0
ファイル: test_models.py プロジェクト: gedaye11/fluent_change
def main(unused_args):

  length_normalization_factor = FLAGS.length_normalization_factor

  # Load model configuration
  config_path = os.path.join(os.path.dirname(__file__), 'model_conf', FLAGS.model_name + '.py')
  config = utility.load_config(config_path)

  config.trainCollection = FLAGS.train_collection
  config.word_cnt_thr = FLAGS.word_cnt_thr
  config.rootpath = FLAGS.rootpath

  train_collection =  FLAGS.train_collection
  test_collection = FLAGS.test_collection
  overwrite = FLAGS.overwrite
  feature = FLAGS.vf_name


  img_set_file = os.path.join(rootpath, test_collection, 'VideoSets', '%s.txt' % test_collection)
  if not os.path.exists(img_set_file):
      img_set_file = os.path.join(rootpath, test_collection, 'ImageSets', '%s.txt' % test_collection)
  img_list = map(str.strip, open(img_set_file).readlines())

  # have visual feature ready
  FLAGS.vf_dir = os.path.join(rootpath, test_collection, 'FeatureData', feature)
  vf_reader = BigFile(FLAGS.vf_dir)

  textbank = TextBank(utility.get_train_vocab_file(FLAGS))
  config.vocab_size = len(textbank.vocab)
  config.vf_size = int(open(os.path.join(FLAGS.vf_dir, 'shape.txt')).read().split()[1])

  model_dir = utility.get_model_dir(FLAGS)
  output_dir = utility.get_pred_dir(FLAGS)

  checkpoint_style = FLAGS.checkpoint_style

  if checkpoint_style == 'file':
    #output_per_filename = 'model_perf_in_topk_%d_%s' % (FLAGS.top_k, FLAGS.eval_model_list_file)
    # read validated top models
    validation_output_dir = utility.get_sim_dir(FLAGS)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    eval_model_list_file = os.path.join(validation_output_dir, 'loss_info.txt') #FLAGS.eval_model_list_file)
    shutil.copy(eval_model_list_file, output_dir)
    test_iter_list = []
    for line in open(eval_model_list_file).readlines()[:FLAGS.top_k]:
      iter_current = int(line.strip().split()[0])
      test_iter_list.append(iter_current)

  elif checkpoint_style == 'iter_interval':
    #output_per_filename =  'model_perf_in_%s' % FLAGS.eval_stat
    test_iter_list = range(*[int(x) for x in FLAGS.eval_stat.split("-")])
  elif checkpoint_style == 'iter_num':
    #output_per_filename =  'model_perf_in_iter_%d' % FLAGS.iter_num
    test_iter_list = [FLAGS.iter_num]

  with_image_embedding = True if FLAGS.with_image_embedding != 0 else False
  g = tf.Graph()
  with g.as_default():
    model = InferenceWrapper(config=config,model_dir=model_dir,
                             gpu_memory_fraction=FLAGS.gpu_memory_fraction,
                             gpu=FLAGS.gpu,
                             with_image_embedding=with_image_embedding)
    model.build_model()
  
  for k, iter_n in enumerate(test_iter_list):
    model_path = os.path.join(model_dir, 'variables', 'model_%d.ckpt' % iter_n)
    while not os.path.exists(model_path+'.meta'):
      logger.error('Model path: %s', model_path)
      logger.error('Cannot load model file and exit')
      sys.exit(0)

    top_one_pred_sent_file = os.path.join(output_dir, 'top%d' % k, 'top_one_pred_sent.txt')
    top_n_pred_sent_file = os.path.join(output_dir, 'top%d' % k, 'top_n_pred_sent.txt')
    # perf_file = os.path.join(output_dir, 'model_%d.ckpt' % iter_n, 'perf.txt')

    if os.path.exists(top_one_pred_sent_file) and not overwrite:
      # write existing perf file and print out
      logger.info('%s exists. skip', top_one_pred_sent_file)
      continue

    if not os.path.exists(os.path.split(top_one_pred_sent_file)[0]):
      os.makedirs(os.path.split(top_one_pred_sent_file)[0])

    logger.info('save results to %s', top_one_pred_sent_file)

    # load the trained model
    generator = CaptionGenerator(config, model, length_normalization_factor = length_normalization_factor)
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    config_proto = tf.ConfigProto(
      intra_op_parallelism_threads=FLAGS.ses_threads, gpu_options=gpu_options, allow_soft_placement=True)
    #with  tf.Session(config=config_proto) as session:
      #model.build_model(session, model_path)
    model.load_model(model_path)

    fout_one_sent = codecs.open(top_one_pred_sent_file, 'w','utf-8')
    fout_n_sent = codecs.open(top_n_pred_sent_file, 'w','utf-8')

    for progress,img in enumerate(img_list):
        print(img)
        # predict sentences given a visual feature
        visual_feature = np.array(vf_reader.read_one(img))
        sentences = generator.beam_search( visual_feature, FLAGS.beam_size)

        # output top one sentence info
        sent_score = sentences[0].score
        sent = ' '.join(sentences[0].words)
        fout_one_sent.write(img + ' ' + '%.3f' % sent_score + ' ' + sent + '\n')
        logger.debug(img + ' ' + '%.3f' % sent_score + ' ' + sent)

        # output top n sentences info
        fout_n_sent.write(img)
        for sentence in sentences:
            sent_score = sentence.score
            sent = ' '.join(sentence.words)
            fout_n_sent.write('\t' + '%.3f' % sent_score + '\t' + sent)
        fout_n_sent.write('\n')
      
        if progress % 100 == 0:
          logger.info('%d images decoded' % (progress+1))

    logger.info('%d images decoded' % (progress+1))
 
    fout_one_sent.close()
    fout_n_sent.close()
コード例 #9
0
class BucketDataProvider(object):
    """TensorFlow Data Provider with Buckets"""
    def __init__(self,
                 collection,
                 vocab_file,
                 feature,
                 language,
                 flag_shuffle=True,
                 method=None,
                 fluency_threshold=DEFAULT_FLUENCY_U,
                 rootpath=ROOT_PATH):
        self.language = language
        self.anno_file_path = utility.get_sent_file(collection, language,
                                                    rootpath)
        self.fluency_threshold = fluency_threshold
        self.method = method
        if method:
            self.sent_score_file = utility.get_sent_score_file(
                collection, language, rootpath)
            assert method in ['sample', 'filter', 'weighted']
            assert self.sent_score_file != None
            assert fluency_threshold > 0
            if method == 'weighted':
                # Not sampling the data if fluency-guided method is weighted_loss
                self.method = method = None
        else:
            self.sent_score_file = None

        self.textbank = TextBank(vocab_file)
        assert self.textbank.vocab[TOKEN_PAD] == 0
        self.vf_reader = BigFile(
            utility.get_feat_dir(collection, feature, rootpath))
        self.vf_names = set(self.vf_reader.names)
        self.vf_size = self.vf_reader.ndims
        self.flag_shuffle = flag_shuffle
        self._load_data()

    def shuffle_data_queue(self):
        random.shuffle(self._data_queue)

    def generate_batches(self, batch_size, buckets):
        """Return a list generator of mini-batches of training data."""
        # create Batches
        batches = []
        for max_seq_len in buckets:
            batches.append(
                Batch(batch_size, max_seq_len, self.vf_size,
                      self.textbank.vocab[TOKEN_BOS]))

        # shuffle if necessary
        if self.flag_shuffle:
            np.random.shuffle(self._data_queue)
        # scan data queue
        for data in self._data_queue:
            if self.method:
                if data['sent_score'] < self.fluency_threshold:
                    if self.method == 'filter':
                        #Drop if the sent_score < threshold
                        continue
                    elif self.method == 'sample':
                        # Drop with certain probability if the sent_score < 1
                        x = random.uniform(0, self.fluency_threshold)
                        if x > data['sent_score']:
                            continue
            score = data['sent_score'] if self.sent_score_file else None
            sentence = data['sentence']
            # Load visual features
            visual_features = np.array(
                self.vf_reader.read_one(data['image_id']))
            if len(sentence) >= buckets[-1]:
                feed_res = batches[-1].feed_and_vomit(visual_features,
                                                      sentence, score)
                ind_buc = len(buckets) - 1
            else:
                for (ind_b, batch) in enumerate(batches):
                    if len(sentence) < batch.max_seq_len:
                        feed_res = batches[ind_b].feed_and_vomit(
                            visual_features, sentence, score)
                        ind_buc = ind_b
                        break
            if feed_res:
                yield (ind_buc, ) + feed_res
                batches[ind_buc].empty()

    def _load_data(self, verbose=True):
        logger.debug('Loading data')
        self._data_queue = []
        ind_img = 0
        num_failed = 0
        if self.sent_score_file != None:
            sid2score = {}
            for line in open(self.sent_score_file):
                elem = line.strip().split('\t')
                sid = elem[0]
                score = float(elem[-1])
                sid2score[sid] = score
        annos = codecs.open(self.anno_file_path, 'r', 'utf-8').readlines()
        for (ind_a, line) in enumerate(annos):
            data = {}
            sid, sent = line.strip().split(" ", 1)
            imgid = sid.strip().split("#")[0]
            if imgid.endswith('.jpg') or imgid.endswith('.mp4'):
                imgid = imgid[:-4]
            #assert imgid in self.vf_names, '%s not in feature data'%imgid
            assert (imgid in self.vf_names)
            #if imgid not in self.vf_names:
            #    logger.info('%s not in feature data, skipping that.'%imgid)
            #    continue
            data['image_id'] = imgid

            # Encode sentences
            tokens = TextTool.tokenize(sent, self.language)
            data['sentence'] = self.textbank.encode_tokens(tokens,
                                                           flag_add_bos=False)
            data['sent_score'] = sid2score[
                sid] if self.sent_score_file and sid in sid2score else 1
            self._data_queue.append(data)
            if verbose and (ind_a + 1) % 20000 == 0:
                logger.debug('%d/%d annotation', ind_a + 1, len(annos))
        random.shuffle(self._data_queue)

        nr_of_images = len(set([data['image_id']
                                for data in self._data_queue]))
        logger.info('%d images, %d sentences from %s', nr_of_images,
                    len(self._data_queue), self.anno_file_path)