class VisionDataset(torch.utils.data.Dataset): def __init__(self, filename): self.vis_feat_file = BigFile(filename) self.vis_ids = self.vis_feat_file.names def __getitem__(self, index): vis_tensor = self.vis_feat_file.read_one(self.vis_ids[index]) return self.vis_ids[index], torch.Tensor(vis_tensor) def get_by_name(self, name): vis_tensor = self.vis_feat_file.read_one(name) return torch.Tensor(vis_tensor) def __len__(self): return len(self.vis_ids)
class VisionDataset(data.Dataset): def __init__(self, params): self.vis_feat_file = BigFile(params['vis_feat']) if isinstance( params['vis_feat'], str) else params['vis_feat'] self.vis_ids = self.vis_feat_file.names self.length = len(self.vis_ids) def __getitem__(self, index): vis_id = self.vis_ids[index] vis_tensor = self.get_feat_by_id(vis_id) return vis_tensor, index, vis_id def get_feat_by_id(self, vis_id): vis_tensor = torch.Tensor(self.vis_feat_file.read_one(vis_id)) return vis_tensor def __len__(self): return self.length
class BucketDataProvider(object): """TensorFlow Data Provider with Buckets""" def __init__(self, collection, vocab_file, feature, language, flag_shuffle=False, fluency_threshold=DEFAULT_FLUENCY_U, rootpath=ROOT_PATH): self.language = language self.anno_file_path = utility.get_sent_file(collection, language, rootpath) self.fluency_threshold = fluency_threshold self.textbank = TextBank(vocab_file) assert self.textbank.vocab[TOKEN_PAD] == 0 self.vf_reader = BigFile(utility.get_feat_dir(collection, feature, rootpath)) self.vf_names = set(self.vf_reader.names) self.vf_size = self.vf_reader.ndims self.flag_shuffle = flag_shuffle self._load_data() def shuffle_data_queue(self): random.shuffle(self._data_queue) def generate_batches(self, batch_size, buckets): """Return a list generator of mini-batches of training data.""" # create Batches batches = [] for max_seq_len in buckets: batches.append(Batch(batch_size, max_seq_len, self.vf_size, self.textbank.vocab[TOKEN_BOS])) # shuffle if necessary if self.flag_shuffle: np.random.shuffle(self._data_queue) # scan data queue for data in self._data_queue: # pdb.set_trace() sentence = data['sentence'] # Load visual features # print(len(data['image_id'])) visual_features = np.array(self.vf_reader.read_one(data['image_id'])) #print("11111111") # print (data['image_id']) # print(visual_features) # print(data['sentence']) # sent = self.textbank.decode_tokens(data['sentence'], flag_remove_bos=True) # for word in sent: # print (word) # # pdb.set_trace() if len(sentence) >= buckets[-1]: feed_res = batches[-1].feed_and_vomit(visual_features, sentence) ind_buc = len(buckets) - 1 else: for (ind_b, batch) in enumerate(batches): if len(sentence) < batch.max_seq_len: feed_res = batches[ind_b].feed_and_vomit(visual_features, sentence) ind_buc = ind_b break if feed_res: yield (ind_buc,) + feed_res batches[ind_buc].empty() def _load_data(self, verbose=True): logger.debug('Loading data') self._data_queue = [] annoss = codecs.open(self.anno_file_path,'r','utf-8').readlines() annos = [an.encode('utf-8').decode('utf-8-sig') for an in annoss] for (ind_a, line) in enumerate(annos): data = {} sid, sent = line.strip().split(" ", 1) imgid = sid.strip().split("#", 1)[0] # print(imgid) assert(imgid in self.vf_names) # pdb.set_trace() # if imgid not in self.vf_names: # print(imgid) # logger.info('%s not in feature data, skipping that.'%imgid) # pdb.set_trace() # continue data['image_id'] = imgid # print(imgid) # # Encode sentences tokens = TextTool.tokenize(sent, self.language) data['sentence'] = self.textbank.encode_tokens(tokens, flag_add_bos=False) self._data_queue.append(data) if verbose and (ind_a + 1) % 20000 == 0: logger.debug('%d/%d annotation', ind_a + 1, len(annos)) random.shuffle( self._data_queue ) # ############################# changed by gxr nr_of_images = len(set([data['image_id'] for data in self._data_queue])) logger.info('%d images, %d sentences from %s', nr_of_images, len(self._data_queue), self.anno_file_path)
def main(unused_args): length_normalization_factor = FLAGS.length_normalization_factor # Load model configuration config_path = os.path.join(os.path.dirname(__file__), 'model_conf', FLAGS.model_name + '.py') config = utility.load_config(config_path) config.trainCollection = FLAGS.train_collection config.word_cnt_thr = FLAGS.word_cnt_thr config.rootpath = FLAGS.rootpath train_collection = FLAGS.train_collection test_collection = FLAGS.test_collection overwrite = FLAGS.overwrite feature = FLAGS.vf_name img_set_file = os.path.join(rootpath, test_collection, 'VideoSets', '%s.txt' % test_collection) if not os.path.exists(img_set_file): img_set_file = os.path.join(rootpath, test_collection, 'ImageSets', '%s.txt' % test_collection) img_list = map(str.strip, open(img_set_file).readlines()) # have visual feature ready vf_dir = utility.get_feat_dir(test_collection, feature, rootpath) vf_reader = BigFile( vf_dir ) textbank = TextBank(utility.get_train_vocab_file(FLAGS)) config.vocab_size = len(textbank.vocab) config.vf_size = int(open(os.path.join(vf_dir, 'shape.txt')).read().split()[1]) model_dir = utility.get_model_dir(FLAGS) output_dir = utility.get_pred_dir(FLAGS) checkpoint_style = FLAGS.checkpoint_style if checkpoint_style == 'file': #output_per_filename = 'model_perf_in_topk_%d_%s' % (FLAGS.top_k, FLAGS.eval_model_list_file) # read validated top models validation_output_dir = utility.get_sim_dir(FLAGS) if not os.path.exists(output_dir): os.makedirs(output_dir) eval_model_list_file = os.path.join(validation_output_dir, 'loss_info.txt') #FLAGS.eval_model_list_file) shutil.copy(eval_model_list_file, output_dir) test_iter_list = [] for line in open(eval_model_list_file).readlines()[:FLAGS.top_k]: iter_current = int(line.strip().split()[0]) test_iter_list.append(iter_current) elif checkpoint_style == 'iter_interval': #output_per_filename = 'model_perf_in_%s' % FLAGS.eval_stat test_iter_list = range(*[int(x) for x in FLAGS.eval_stat.split("-")]) elif checkpoint_style == 'iter_num': #output_per_filename = 'model_perf_in_iter_%d' % FLAGS.iter_num test_iter_list = [FLAGS.iter_num] with_image_embedding = True if FLAGS.with_image_embedding != 0 else False g = tf.Graph() with g.as_default(): model = InferenceWrapper(config=config,model_dir=model_dir, gpu_memory_fraction=FLAGS.gpu_memory_fraction, gpu=FLAGS.gpu, with_image_embedding=with_image_embedding) model.build_model() for k, iter_n in enumerate(test_iter_list): model_path = os.path.join(model_dir, 'variables', 'model_%d.ckpt' % iter_n) while not os.path.exists(model_path+'.meta'): logger.error('Model path: %s', model_path) logger.error('Cannot load model file and exit') sys.exit(0) top_one_pred_sent_file = os.path.join(output_dir, 'top%d' % k, 'top_one_pred_sent.txt') top_n_pred_sent_file = os.path.join(output_dir, 'top%d' % k, 'top_n_pred_sent.txt') # perf_file = os.path.join(output_dir, 'model_%d.ckpt' % iter_n, 'perf.txt') if os.path.exists(top_one_pred_sent_file) and not overwrite: # write existing perf file and print out logger.info('%s exists. skip', top_one_pred_sent_file) continue if not os.path.exists(os.path.split(top_one_pred_sent_file)[0]): os.makedirs(os.path.split(top_one_pred_sent_file)[0]) logger.info('save results to %s', top_one_pred_sent_file) # load the trained model generator = CaptionGenerator(config, model, length_normalization_factor = length_normalization_factor) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config_proto = tf.ConfigProto( intra_op_parallelism_threads=FLAGS.ses_threads, gpu_options=gpu_options, allow_soft_placement=True) #with tf.Session(config=config_proto) as session: #model.build_model(session, model_path) model.load_model(model_path) fout_one_sent = codecs.open(top_one_pred_sent_file, 'w','utf-8') fout_n_sent = codecs.open(top_n_pred_sent_file, 'w','utf-8') for progress,img in enumerate(img_list): # predict sentences given a visual feature visual_feature = np.array(vf_reader.read_one(img)) sentences = generator.beam_search( visual_feature, FLAGS.beam_size) # output top one sentence info sent_score = sentences[0].score sent = ' '.join(sentences[0].words) fout_one_sent.write(img + ' ' + '%.3f' % sent_score + ' ' + sent + '\n') logger.debug(img + ' ' + '%.3f' % sent_score + ' ' + sent) # output top n sentences info fout_n_sent.write(img) for sentence in sentences: sent_score = sentence.score sent = ' '.join(sentence.words) fout_n_sent.write('\t' + '%.3f' % sent_score + '\t' + sent) fout_n_sent.write('\n') if progress % 100 == 0: logger.info('%d images decoded' % (progress+1)) logger.info('%d images decoded' % (progress+1)) fout_one_sent.close() fout_n_sent.close()
if __name__ == "__main__": rootpath = './' trainCollection = 'toydata' nimages = 2 feature = 'f1' dim = 3 testCollection = trainCollection testset = testCollection featureDir = os.path.join(rootpath, trainCollection, "FeatureData", feature) searcher = simpleknn.load_model(os.path.join(featureDir, "feature.bin"), dim, nimages, os.path.join(featureDir, "id.txt")) searcher.set_distance('l2') searcher.set_distance('l1') print ("[simpleknn] dim=%d, nr_images=%d" % (searcher.get_dim(), searcher.get_nr_images())) testfeaturedir = os.path.join(rootpath, testCollection, 'FeatureData', feature) testfeaturefile = BigFile(testfeaturedir, dim) testset = testfeaturefile.names for testid in testset: testfeature = testfeaturefile.read_one(testid) visualNeighbors = searcher.search_knn(testfeature, max_hits=20000) print testid, len(visualNeighbors), " ".join(["%s %.3f" % (v[0],v[1]) for v in visualNeighbors[:3]])
if __name__ == "__main__": rootpath = './' trainCollection = 'toydata' nimages = 2 feature = 'f1' dim = 3 testCollection = trainCollection testset = testCollection featureDir = os.path.join(rootpath, trainCollection, "FeatureData", feature) searcher = simpleknn.load_model(os.path.join(featureDir, "feature.bin"), dim, nimages, os.path.join(featureDir, "id.txt")) searcher.set_distance('l2') searcher.set_distance('l1') print("[simpleknn] dim=%d, nr_images=%d" % (searcher.get_dim(), searcher.get_nr_images())) testfeaturedir = os.path.join(rootpath, testCollection, 'FeatureData', feature) testfeaturefile = BigFile(testfeaturedir, dim) testset = testfeaturefile.names for testid in testset: testfeature = testfeaturefile.read_one(testid) visualNeighbors = searcher.search_knn(testfeature, max_hits=20000) print testid, len(visualNeighbors), " ".join( ["%s %.3f" % (v[0], v[1]) for v in visualNeighbors[:3]])
class BucketDataProvider(object): """TensorFlow Data Provider with Buckets""" def __init__(self, collection, vocab_file, feature, language, flag_shuffle=False, fluency_threshold=DEFAULT_FLUENCY_U, rootpath=ROOT_PATH): self.language = language self.anno_file_path = utility.get_sent_file(collection, language, rootpath) self.fluency_threshold = fluency_threshold self.textbank = TextBank(vocab_file) assert self.textbank.vocab[TOKEN_PAD] == 0 self.vf_reader = BigFile( utility.get_feat_dir(collection, feature, rootpath)) self.vf_names = set(self.vf_reader.names) self.vf_size = self.vf_reader.ndims self.flag_shuffle = flag_shuffle self._load_data() def shuffle_data_queue(self): random.shuffle(self._data_queue) def generate_batches(self, batch_size, buckets): """Return a list generator of mini-batches of training data.""" # create Batches batches = [] for max_seq_len in buckets: batches.append( Batch(batch_size, max_seq_len, self.vf_size, self.textbank.vocab[TOKEN_BOS])) # shuffle if necessary if self.flag_shuffle: np.random.shuffle(self._data_queue) # scan data queue for data in self._data_queue: # pdb.set_trace() sentence = data['sentence'] # Load visual features # print(len(data['image_id'])) visual_features = np.array( self.vf_reader.read_one(data['image_id'])) #print("11111111") # print (data['image_id']) # print(visual_features) # print(data['sentence']) # sent = self.textbank.decode_tokens(data['sentence'], flag_remove_bos=True) # for word in sent: # print (word) # # pdb.set_trace() if len(sentence) >= buckets[-1]: feed_res = batches[-1].feed_and_vomit(visual_features, sentence) ind_buc = len(buckets) - 1 else: for (ind_b, batch) in enumerate(batches): if len(sentence) < batch.max_seq_len: feed_res = batches[ind_b].feed_and_vomit( visual_features, sentence) ind_buc = ind_b break if feed_res: yield (ind_buc, ) + feed_res batches[ind_buc].empty() def _load_data(self, verbose=True): logger.debug('Loading data') self._data_queue = [] annoss = codecs.open(self.anno_file_path, 'r', 'utf-8').readlines() annos = [an.encode('utf-8').decode('utf-8-sig') for an in annoss] for (ind_a, line) in enumerate(annos): data = {} sid, sent = line.strip().split(" ", 1) imgid = sid.strip().split("#", 1)[0] # print(imgid) assert (imgid in self.vf_names) # pdb.set_trace() # if imgid not in self.vf_names: # print(imgid) # logger.info('%s not in feature data, skipping that.'%imgid) # pdb.set_trace() # continue data['image_id'] = imgid # print(imgid) # # Encode sentences tokens = TextTool.tokenize(sent, self.language) data['sentence'] = self.textbank.encode_tokens(tokens, flag_add_bos=False) self._data_queue.append(data) if verbose and (ind_a + 1) % 20000 == 0: logger.debug('%d/%d annotation', ind_a + 1, len(annos)) random.shuffle(self._data_queue ) # ############################# changed by gxr nr_of_images = len(set([data['image_id'] for data in self._data_queue])) logger.info('%d images, %d sentences from %s', nr_of_images, len(self._data_queue), self.anno_file_path)
def main(unused_args): length_normalization_factor = FLAGS.length_normalization_factor # Load model configuration config_path = os.path.join(os.path.dirname(__file__), 'model_conf', FLAGS.model_name + '.py') config = utility.load_config(config_path) config.trainCollection = FLAGS.train_collection config.word_cnt_thr = FLAGS.word_cnt_thr config.rootpath = FLAGS.rootpath train_collection = FLAGS.train_collection test_collection = FLAGS.test_collection overwrite = FLAGS.overwrite feature = FLAGS.vf_name img_set_file = os.path.join(rootpath, test_collection, 'VideoSets', '%s.txt' % test_collection) if not os.path.exists(img_set_file): img_set_file = os.path.join(rootpath, test_collection, 'ImageSets', '%s.txt' % test_collection) img_list = map(str.strip, open(img_set_file).readlines()) # have visual feature ready FLAGS.vf_dir = os.path.join(rootpath, test_collection, 'FeatureData', feature) vf_reader = BigFile(FLAGS.vf_dir) textbank = TextBank(utility.get_train_vocab_file(FLAGS)) config.vocab_size = len(textbank.vocab) config.vf_size = int(open(os.path.join(FLAGS.vf_dir, 'shape.txt')).read().split()[1]) model_dir = utility.get_model_dir(FLAGS) output_dir = utility.get_pred_dir(FLAGS) checkpoint_style = FLAGS.checkpoint_style if checkpoint_style == 'file': #output_per_filename = 'model_perf_in_topk_%d_%s' % (FLAGS.top_k, FLAGS.eval_model_list_file) # read validated top models validation_output_dir = utility.get_sim_dir(FLAGS) if not os.path.exists(output_dir): os.makedirs(output_dir) eval_model_list_file = os.path.join(validation_output_dir, 'loss_info.txt') #FLAGS.eval_model_list_file) shutil.copy(eval_model_list_file, output_dir) test_iter_list = [] for line in open(eval_model_list_file).readlines()[:FLAGS.top_k]: iter_current = int(line.strip().split()[0]) test_iter_list.append(iter_current) elif checkpoint_style == 'iter_interval': #output_per_filename = 'model_perf_in_%s' % FLAGS.eval_stat test_iter_list = range(*[int(x) for x in FLAGS.eval_stat.split("-")]) elif checkpoint_style == 'iter_num': #output_per_filename = 'model_perf_in_iter_%d' % FLAGS.iter_num test_iter_list = [FLAGS.iter_num] with_image_embedding = True if FLAGS.with_image_embedding != 0 else False g = tf.Graph() with g.as_default(): model = InferenceWrapper(config=config,model_dir=model_dir, gpu_memory_fraction=FLAGS.gpu_memory_fraction, gpu=FLAGS.gpu, with_image_embedding=with_image_embedding) model.build_model() for k, iter_n in enumerate(test_iter_list): model_path = os.path.join(model_dir, 'variables', 'model_%d.ckpt' % iter_n) while not os.path.exists(model_path+'.meta'): logger.error('Model path: %s', model_path) logger.error('Cannot load model file and exit') sys.exit(0) top_one_pred_sent_file = os.path.join(output_dir, 'top%d' % k, 'top_one_pred_sent.txt') top_n_pred_sent_file = os.path.join(output_dir, 'top%d' % k, 'top_n_pred_sent.txt') # perf_file = os.path.join(output_dir, 'model_%d.ckpt' % iter_n, 'perf.txt') if os.path.exists(top_one_pred_sent_file) and not overwrite: # write existing perf file and print out logger.info('%s exists. skip', top_one_pred_sent_file) continue if not os.path.exists(os.path.split(top_one_pred_sent_file)[0]): os.makedirs(os.path.split(top_one_pred_sent_file)[0]) logger.info('save results to %s', top_one_pred_sent_file) # load the trained model generator = CaptionGenerator(config, model, length_normalization_factor = length_normalization_factor) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config_proto = tf.ConfigProto( intra_op_parallelism_threads=FLAGS.ses_threads, gpu_options=gpu_options, allow_soft_placement=True) #with tf.Session(config=config_proto) as session: #model.build_model(session, model_path) model.load_model(model_path) fout_one_sent = codecs.open(top_one_pred_sent_file, 'w','utf-8') fout_n_sent = codecs.open(top_n_pred_sent_file, 'w','utf-8') for progress,img in enumerate(img_list): print(img) # predict sentences given a visual feature visual_feature = np.array(vf_reader.read_one(img)) sentences = generator.beam_search( visual_feature, FLAGS.beam_size) # output top one sentence info sent_score = sentences[0].score sent = ' '.join(sentences[0].words) fout_one_sent.write(img + ' ' + '%.3f' % sent_score + ' ' + sent + '\n') logger.debug(img + ' ' + '%.3f' % sent_score + ' ' + sent) # output top n sentences info fout_n_sent.write(img) for sentence in sentences: sent_score = sentence.score sent = ' '.join(sentence.words) fout_n_sent.write('\t' + '%.3f' % sent_score + '\t' + sent) fout_n_sent.write('\n') if progress % 100 == 0: logger.info('%d images decoded' % (progress+1)) logger.info('%d images decoded' % (progress+1)) fout_one_sent.close() fout_n_sent.close()
class BucketDataProvider(object): """TensorFlow Data Provider with Buckets""" def __init__(self, collection, vocab_file, feature, language, flag_shuffle=True, method=None, fluency_threshold=DEFAULT_FLUENCY_U, rootpath=ROOT_PATH): self.language = language self.anno_file_path = utility.get_sent_file(collection, language, rootpath) self.fluency_threshold = fluency_threshold self.method = method if method: self.sent_score_file = utility.get_sent_score_file( collection, language, rootpath) assert method in ['sample', 'filter', 'weighted'] assert self.sent_score_file != None assert fluency_threshold > 0 if method == 'weighted': # Not sampling the data if fluency-guided method is weighted_loss self.method = method = None else: self.sent_score_file = None self.textbank = TextBank(vocab_file) assert self.textbank.vocab[TOKEN_PAD] == 0 self.vf_reader = BigFile( utility.get_feat_dir(collection, feature, rootpath)) self.vf_names = set(self.vf_reader.names) self.vf_size = self.vf_reader.ndims self.flag_shuffle = flag_shuffle self._load_data() def shuffle_data_queue(self): random.shuffle(self._data_queue) def generate_batches(self, batch_size, buckets): """Return a list generator of mini-batches of training data.""" # create Batches batches = [] for max_seq_len in buckets: batches.append( Batch(batch_size, max_seq_len, self.vf_size, self.textbank.vocab[TOKEN_BOS])) # shuffle if necessary if self.flag_shuffle: np.random.shuffle(self._data_queue) # scan data queue for data in self._data_queue: if self.method: if data['sent_score'] < self.fluency_threshold: if self.method == 'filter': #Drop if the sent_score < threshold continue elif self.method == 'sample': # Drop with certain probability if the sent_score < 1 x = random.uniform(0, self.fluency_threshold) if x > data['sent_score']: continue score = data['sent_score'] if self.sent_score_file else None sentence = data['sentence'] # Load visual features visual_features = np.array( self.vf_reader.read_one(data['image_id'])) if len(sentence) >= buckets[-1]: feed_res = batches[-1].feed_and_vomit(visual_features, sentence, score) ind_buc = len(buckets) - 1 else: for (ind_b, batch) in enumerate(batches): if len(sentence) < batch.max_seq_len: feed_res = batches[ind_b].feed_and_vomit( visual_features, sentence, score) ind_buc = ind_b break if feed_res: yield (ind_buc, ) + feed_res batches[ind_buc].empty() def _load_data(self, verbose=True): logger.debug('Loading data') self._data_queue = [] ind_img = 0 num_failed = 0 if self.sent_score_file != None: sid2score = {} for line in open(self.sent_score_file): elem = line.strip().split('\t') sid = elem[0] score = float(elem[-1]) sid2score[sid] = score annos = codecs.open(self.anno_file_path, 'r', 'utf-8').readlines() for (ind_a, line) in enumerate(annos): data = {} sid, sent = line.strip().split(" ", 1) imgid = sid.strip().split("#")[0] if imgid.endswith('.jpg') or imgid.endswith('.mp4'): imgid = imgid[:-4] #assert imgid in self.vf_names, '%s not in feature data'%imgid assert (imgid in self.vf_names) #if imgid not in self.vf_names: # logger.info('%s not in feature data, skipping that.'%imgid) # continue data['image_id'] = imgid # Encode sentences tokens = TextTool.tokenize(sent, self.language) data['sentence'] = self.textbank.encode_tokens(tokens, flag_add_bos=False) data['sent_score'] = sid2score[ sid] if self.sent_score_file and sid in sid2score else 1 self._data_queue.append(data) if verbose and (ind_a + 1) % 20000 == 0: logger.debug('%d/%d annotation', ind_a + 1, len(annos)) random.shuffle(self._data_queue) nr_of_images = len(set([data['image_id'] for data in self._data_queue])) logger.info('%d images, %d sentences from %s', nr_of_images, len(self._data_queue), self.anno_file_path)