class Synset2Vec: def __init__(self, corpus=DEFAULT_W2V_CORPUS, w2v_name=DEFAULT_W2V, wnid2words_file=DEFAULT_WNID2WORDS_FILE, rootpath=ROOT_PATH): word2vec_dir = os.path.join(rootpath, corpus, 'word2vec', w2v_name) self.word2vec = BigFile(word2vec_dir) self.wnid2words = pickle.load(open(wnid2words_file, 'rb')) logger.info('w2v(%s): %d words, %d dims', corpus, self.word2vec.shape()[0], self.get_feat_dim()) def get_feat_dim(self): return self.word2vec.ndims def explain(self, wnid): return self.wnid2words[wnid] def _mapping(self, query_wnid): words = self.wnid2words[query_wnid].lower() words = [w.strip().replace(' ', '_') for w in words.split(',')] words = [w.replace('-', '_') for w in words] for w in words: vec = self.word2vec.read_one(w) if vec: return vec return None def embedding(self, wnid): return self._mapping(wnid)
def get_we_parameter(vocabulary, word2vec_file): print('getting inital word embedding ...') w2v_reader = BigFile(word2vec_file) ndims = w2v_reader.ndims #print("word embedding dim ", ndims) #print(vocabulary) #sys.exit() we = [] # Reserve 0 for masking via pad_sequences we.append(np.array([0]*ndims)) fail_counter = 0 for word in vocabulary: word = word.strip() try: vec = w2v_reader.read_one(word) vec = np.array(vec) assert vec.shape == (500,) we.append(vec) except Exception as e: # print word vec = np.random.uniform(-1,1,ndims) #print(vec.shape) we.append(vec) fail_counter +=1 print("%d words out of %d words cannot find pre-trained word2vec vector" % (fail_counter, len(vocabulary))) return np.array(we)
class Synset2Vec: def __init__(self, corpus=DEFAULT_W2V_CORPUS, w2v_name=DEFAULT_W2V, wnid2words_file=DEFAULT_WNID2WORDS_FILE, rootpath=ROOT_PATH): word2vec_dir = os.path.join(rootpath, corpus, 'word2vec', w2v_name) self.word2vec = BigFile(word2vec_dir) self.wnid2words = pickle.load(open(wnid2words_file, 'rb')) logger.info('w2v(%s): %d words, %d dims', corpus, self.word2vec.shape()[0], self.get_feat_dim()) def get_feat_dim(self): return self.word2vec.ndims def explain(self, wnid): return self.wnid2words[wnid] def _mapping(self, query_wnid): words = self.wnid2words[query_wnid].lower() words = [w.strip().replace(' ','_') for w in words.split(',')] words = [w.replace('-', '_') for w in words] for w in words: vec = self.word2vec.read_one(w) if vec: return vec return None def embedding(self, wnid): return self._mapping(wnid)
class ImageSimer: def __init__(self, dev_feat_path, train_feat_path): self.dev_feats = BigFile(dev_feat_path) self.train_feats = BigFile(train_feat_path) def calsimImage(self, img, imgs): imgfeat = self.dev_feats.read_one(img) renamed, test_X = self.train_feats.read(imgs) resorted_feats = [None] * len(renamed) for i in xrange(len(renamed)): idx = imgs.index(renamed[i]) resorted_feats[idx] = test_X[i] return calImageSimiByCos( imgfeat, resorted_feats) def calsimiImagewithClick(self, img, img_click_list, clickthres): imgfeat = self.dev_feats.read_one(img) img_list = [x[0] for x in img_click_list if int(x[1]) >= clickthres] clc_list = [int(x[1]) for x in img_click_list if int(x[1]) >= clickthres] assert (len(img_list) == len(clc_list)) renamed, test_X = self.train_feats.read(img_list) # re-sort the label list according to the renamed resorted_feats = [None] * len(renamed) for i in xrange(len(renamed)): idx = img_list.index(renamed[i]) resorted_feats[idx] = test_X[i] img_simi = calImageSimiByCos( imgfeat, resorted_feats) return sum(np.array(img_simi) * np.log(clc_list)) / len(img_list)
class ImageSimer: def __init__(self, dev_feat_path, train_feat_path): self.dev_feats = BigFile(dev_feat_path) self.train_feats = BigFile(train_feat_path) def calsimImage(self, img, imgs): imgfeat = self.dev_feats.read_one(img) renamed, test_X = self.train_feats.read(imgs) resorted_feats = [None] * len(renamed) for i in xrange(len(renamed)): idx = imgs.index(renamed[i]) resorted_feats[idx] = test_X[i] return calImageSimiByCos(imgfeat, resorted_feats) def calsimiImagewithClick(self, img, img_click_list, clickthres): imgfeat = self.dev_feats.read_one(img) img_list = [x[0] for x in img_click_list if int(x[1]) >= clickthres] clc_list = [ int(x[1]) for x in img_click_list if int(x[1]) >= clickthres ] assert (len(img_list) == len(clc_list)) renamed, test_X = self.train_feats.read(img_list) # re-sort the label list according to the renamed resorted_feats = [None] * len(renamed) for i in xrange(len(renamed)): idx = img_list.index(renamed[i]) resorted_feats[idx] = test_X[i] img_simi = calImageSimiByCos(imgfeat, resorted_feats) return sum(np.array(img_simi) * np.log(clc_list)) / len(img_list)
def get_en_we_parameter(vocabulary, word2vec_file): print 'getting inital word embedding ...' w2v_reader = BigFile(word2vec_file) ndims = w2v_reader.ndims fail_counter = 0 we = [] # Reserve 0 for masking via pad_sequences we.append([0]*ndims) for word in vocabulary: word = word.strip() try: vec = w2v_reader.read_one(word) # print vec we.append(vec) except Exception, e: vec = np.random.uniform(-1,1,ndims) we.append(vec) fail_counter +=1
def process(options, collection): rootpath = options.rootpath overwrite = options.overwrite feature = options.feature method = options.method sigma = options.sigma # result path ranking_result_path = os.path.join(rootpath, collection, 'SimilarityIndex', collection, 'MetaData', method, feature) DCG_result_path = os.path.join(rootpath, collection, 'DCG', method, feature) if checkToSkip(ranking_result_path, overwrite): sys.exit(0) if checkToSkip(DCG_result_path, overwrite): sys.exit(0) # inpute of query qid_query_file = os.path.join(rootpath, collection, 'Annotations', 'qid.text.txt') qid_list, query_list = readQidQuery(qid_query_file) qid2query = dict(zip(qid_list, query_list)) # inpute of image img_feat_path = os.path.join(rootpath, collection, 'FeatureData', feature) img_feats = BigFile(img_feat_path) # the model to calculate DCG@25 scorer = getScorer("DCG@25") done = 0 qid2dcg = collections.OrderedDict() qid2iid_label_score = {} for qid in qid_list: iid_list, label_list = readAnnotationsFrom( collection, 'concepts%s.txt' % collection, qid, False, rootpath) renamed, test_X = img_feats.read(iid_list) parzen_list = [] for imidx in iid_list: parzen_list.append( calParzen(img_feats.read_one(imidx), test_X, sigma)) # parzen_list_suffle = calParzen_fast(test_X, len(renamed), sigma) # parzen_list = [] # for imidx in iid_list: # parzen_list.append(parzen_list_suffle[renamed.index(imidx)]) sorted_tuple = sorted(zip(iid_list, label_list, parzen_list), key=lambda v: v[2], reverse=True) qid2iid_label_score[qid] = sorted_tuple # calculate DCG@25 sorted_label = [x[1] for x in sorted_tuple] qid2dcg[qid] = scorer.score(sorted_label) printMessage("Done", qid, qid2query[qid]) done += 1 if done % 20 == 0: writeRankingResult(ranking_result_path, qid2iid_label_score) qid2iid_label_score = {} writeDCGResult(DCG_result_path, qid2dcg) writeRankingResult(ranking_result_path, qid2iid_label_score) print "average DCG@25: %f" % (1.0 * sum(qid2dcg.values()) / len(qid2dcg.values())) result_path_file = "result/individual_result_pathes.txt" if os.path.exists(result_path_file): fout = open(result_path_file, 'a') else: makedirsforfile(result_path_file) fout = open(result_path_file, 'w') fout.write(ranking_result_path + '\n') fout.close()
def process(options, collection): rootpath = options.rootpath overwrite = options.overwrite feature = options.feature method = options.method sigma =options.sigma # result path ranking_result_path = os.path.join(rootpath, collection, 'SimilarityIndex', collection, 'MetaData', method, feature) DCG_result_path = os.path.join(rootpath, collection, 'DCG', method, feature) if checkToSkip(ranking_result_path, overwrite): sys.exit(0) if checkToSkip(DCG_result_path, overwrite): sys.exit(0) # inpute of query qid_query_file = os.path.join(rootpath, collection, 'Annotations', 'qid.text.txt') qid_list, query_list = readQidQuery(qid_query_file) qid2query = dict(zip(qid_list, query_list)) # inpute of image img_feat_path = os.path.join(rootpath, collection, 'FeatureData', feature) img_feats = BigFile(img_feat_path) # the model to calculate DCG@25 scorer = getScorer("DCG@25") done = 0 qid2dcg = collections.OrderedDict() qid2iid_label_score = {} for qid in qid_list: iid_list, label_list = readAnnotationsFrom(collection, 'concepts%s.txt' % collection, qid, False, rootpath) renamed, test_X = img_feats.read(iid_list) parzen_list = [] for imidx in iid_list: parzen_list.append(calParzen(img_feats.read_one(imidx), test_X , sigma)) sorted_tuple = sorted(zip(iid_list, label_list, parzen_list), key=lambda v:v[2], reverse=True) qid2iid_label_score[qid] = sorted_tuple # calculate DCG@25 sorted_label = [x[1] for x in sorted_tuple] qid2dcg[qid] = scorer.score(sorted_label) printMessage("Done", qid, qid2query[qid]) done += 1 if done % 20 == 0: writeRankingResult(ranking_result_path, qid2iid_label_score) qid2iid_label_score = {} writeDCGResult(DCG_result_path, qid2dcg) writeRankingResult(ranking_result_path, qid2iid_label_score) print "average DCG@25: %f" % (1.0*sum(qid2dcg.values())/ len(qid2dcg.values())) result_path_file = "result/individual_result_pathes.txt" if os.path.exists(result_path_file): fout = open(result_path_file,'a') else: makedirsforfile(result_path_file) fout = open(result_path_file, 'w') fout.write(ranking_result_path + '\n') fout.close()
class ImageSimer: def __init__(self, dev_feat_path, train_feat_path): self.dev_feats = BigFile(dev_feat_path) self.train_feats = BigFile(train_feat_path) def calsimImage(self, img, imgs): imgfeat = self.dev_feats.read_one(img) renamed, test_X = self.train_feats.read(imgs) resorted_feats = [None] * len(renamed) for i in xrange(len(renamed)): idx = imgs.index(renamed[i]) resorted_feats[idx] = test_X[i] return calImageSimiByCos( imgfeat, resorted_feats) def calsimiImagewithClick(self, img, img_click_list, clickthres): imgfeat = self.dev_feats.read_one(img) img_list = [x[0] for x in img_click_list if int(x[1]) >= clickthres] clc_list = [int(x[1]) for x in img_click_list if int(x[1]) >= clickthres] assert (len(img_list) == len(clc_list)) renamed, test_X = self.train_feats.read(img_list) # re-sort the label list according to the renamed resorted_feats = [None] * len(renamed) for i in xrange(len(renamed)): idx = img_list.index(renamed[i]) resorted_feats[idx] = test_X[i] img_simi = calImageSimiByCos( imgfeat, resorted_feats) return sum(np.array(img_simi) * np.log(clc_list)) / len(img_list) def clasimiImgwithWeightImgs(self, img, imgs, weightes): assert(len(imgs) == len(weightes)) imgfeat = self.dev_feats.read_one(img) renamed, feats = self.train_feats.read(imgs) # re-sort the label list according to the renamed resorted_weight = [None] * len(weightes) for i in xrange(len(renamed)): idx = imgs.index(renamed[i]) resorted_weight[i] = weightes[idx] simi_list = calImageSimiByCos(imgfeat, feats) normal_weight = np.array(resorted_weight) / sum(resorted_weight) score = np.dot(normal_weight, np.array(simi_list) ) return score def simiImgs_WeightImgs(self, t_img_list, s_img_list, weightes): assert(len(s_img_list) == len(weightes)) t_renamed, t_feats = self.dev_feats.read(t_img_list) s_renamed, s_feats = self.train_feats.read(s_img_list) # re-sort the label list according to the renamed resorted_weight = [None] * len(weightes) for i in xrange(len(s_renamed)): idx = s_img_list.index(s_renamed[i]) resorted_weight[i] = weightes[idx] normal_weight = np.array(resorted_weight) / sum(resorted_weight) cosineSimi = -(distance.cdist(t_feats, s_feats, 'cosine')-1) weightSimi = np.dot(cosineSimi, normal_weight) renamed2sim = dict(zip(t_renamed, list(weightSimi))) final_score = [] for key in t_img_list: final_score.append(renamed2sim[key]) return final_score
class ImageSimer: def __init__(self, dev_feat_path, train_feat_path): self.dev_feats = BigFile(dev_feat_path) self.train_feats = BigFile(train_feat_path) def calsimImage(self, img, imgs): imgfeat = self.dev_feats.read_one(img) renamed, test_X = self.train_feats.read(imgs) resorted_feats = [None] * len(renamed) for i in xrange(len(renamed)): idx = imgs.index(renamed[i]) resorted_feats[idx] = test_X[i] return calImageSimiByCos(imgfeat, resorted_feats) def calsimiImagewithClick(self, img, img_click_list, clickthres): imgfeat = self.dev_feats.read_one(img) img_list = [x[0] for x in img_click_list if int(x[1]) >= clickthres] clc_list = [ int(x[1]) for x in img_click_list if int(x[1]) >= clickthres ] assert (len(img_list) == len(clc_list)) renamed, test_X = self.train_feats.read(img_list) # re-sort the label list according to the renamed resorted_feats = [None] * len(renamed) for i in xrange(len(renamed)): idx = img_list.index(renamed[i]) resorted_feats[idx] = test_X[i] img_simi = calImageSimiByCos(imgfeat, resorted_feats) return sum(np.array(img_simi) * np.log(clc_list)) / len(img_list) def clasimiImgwithWeightImgs(self, img, imgs, weightes): assert (len(imgs) == len(weightes)) imgfeat = self.dev_feats.read_one(img) renamed, feats = self.train_feats.read(imgs) # re-sort the label list according to the renamed resorted_weight = [None] * len(weightes) for i in xrange(len(renamed)): idx = imgs.index(renamed[i]) resorted_weight[i] = weightes[idx] simi_list = calImageSimiByCos(imgfeat, feats) normal_weight = np.array(resorted_weight) / sum(resorted_weight) score = np.dot(normal_weight, np.array(simi_list)) return score def simiImgs_WeightImgs(self, t_img_list, s_img_list, weightes): assert (len(s_img_list) == len(weightes)) t_renamed, t_feats = self.dev_feats.read(t_img_list) s_renamed, s_feats = self.train_feats.read(s_img_list) # re-sort the label list according to the renamed resorted_weight = [None] * len(weightes) for i in xrange(len(s_renamed)): idx = s_img_list.index(s_renamed[i]) resorted_weight[i] = weightes[idx] normal_weight = np.array(resorted_weight) / sum(resorted_weight) cosineSimi = -(distance.cdist(t_feats, s_feats, 'cosine') - 1) weightSimi = np.dot(cosineSimi, normal_weight) renamed2sim = dict(zip(t_renamed, list(weightSimi))) final_score = [] for key in t_img_list: final_score.append(renamed2sim[key]) return final_score
self.__shuffle_mask() perturbation = (np.random.randn(self.n_dims) * self.std + self.mean) * self.perturb_intensity * self.mask aug_feat = vid_feat + perturbation return aug_feat if __name__ == "__main__": # test frame level augmentation feats = np.random.randn(11, 4) n_vecs = feats.shape[0] for stride in [2, [2, 3]]: f_auger = Frame_Level_Augmenter(stride) print f_auger.get_aug_index(n_vecs) # print f_auger.get_aug_feat(feats) print[len(a) for a in f_auger.get_aug_feat(feats)] # test video level augmentation rootpath = '/home/daniel/VisualSearch/hulu' collection = 'track_1_shows' feature = 'c3d-pool5' feat_path = os.path.join(rootpath, collection, "FeatureData", feature) feat_reader = BigFile(feat_path) v_auger = Video_Level_Augmenter(feat_path, feat_reader, perturb_intensity=1, perturb_prob=0.5) vid_feat = feat_reader.read_one(random.choice(feat_reader.names)) aug_feat = v_auger.get_aug_feat(vid_feat)
def process(options, trainCollection, testCollection): lang = which_language(trainCollection) assert (which_language(trainCollection) == which_language(testCollection)) rootpath = options.rootpath overwrite = options.overwrite model_path = options.model_path model_name = options.model_name weight_name = options.weight_name resfile = options.resfile # only save the predicted top k sentence k = options.k corpus = options.corpus word2vec = options.word2vec simi_fun = options.simi_fun set_style = options.set_style w2vv_config = os.path.basename(os.path.normpath(model_path)) config = load_config('w2vv_configs/%s.py' % w2vv_config) # image feature img_feature = config.img_feature # text embedding style (word2vec, bag-of-words, word hashing) text_style = config.text_style L1_normalize = config.L1_normalize L2_normalize = config.L2_normalize bow_vocab = config.bow_vocab + '.txt' loss_fun = config.loss_fun # lstm sent_maxlen = config.sent_maxlen embed_size = config.embed_size we_trainable = config.we_trainable # result file info output_dir = os.path.join(rootpath, testCollection, 'SimilarityIndex', trainCollection, w2vv_config) result_pred_sents = os.path.join(output_dir, 'sent.id.score.txt') sent_feat_file = os.path.join(output_dir, "sent_feat.txt") test_sent_file = os.path.join(rootpath, testCollection, 'TextData', '%s.caption.txt' % testCollection) if checkToSkip(sent_feat_file, overwrite): sys.exit(0) makedirsforfile(result_pred_sents) rnn_style, bow_style, w2v_style = text_style.strip().split('@') if "lstm" in text_style or "gru" in text_style: if 'zh' == lang: w2v_data_path = os.path.join(rootpath, 'zh_w2v', 'model', 'zh_jieba.model') else: w2v_data_path = os.path.join(rootpath, "word2vec", corpus, word2vec) text_data_path = os.path.join(rootpath, trainCollection, "TextData", "vocabulary", 'bow', bow_vocab) bow_data_path = os.path.join(rootpath, trainCollection, "TextData", "vocabulary", bow_style, bow_vocab) text2vec = get_text_encoder(rnn_style)(text_data_path, ndims=0, language=lang, L1_normalize=L1_normalize, L2_normalize=L2_normalize, maxlen=sent_maxlen) bow2vec = get_text_encoder(bow_style)(bow_data_path, ndims=0, language=lang, L1_normalize=L1_normalize, L2_normalize=L2_normalize) w2v2vec = get_text_encoder(w2v_style)(w2v_data_path, ndims=0, language=lang, L1_normalize=L1_normalize, L2_normalize=L1_normalize) else: logger.info( "%s is not supported, please check the 'text_style' parameter", text_style) sys.exit(0) # img2vec img_feats_path = os.path.join(rootpath, FULL_COLLECTION, 'FeatureData', img_feature) img_feats = BigFile(img_feats_path) # similarity function simer = get_simer(simi_fun)() abs_model_path = os.path.join(model_path, model_name) weight_path = os.path.join(model_path, weight_name) predictor = W2VV_pred(abs_model_path, weight_path, text2vec, sent_maxlen, embed_size, bow2vec, w2v2vec) test_sents_id, test_sents, id2sents = readSentsInfo(test_sent_file) test_img_list = map( str.strip, open( os.path.join(rootpath, testCollection, set_style, '%s.txt' % testCollection)).readlines()) fw = open(sent_feat_file, 'w') logger.info( "predict the visual CNN features for all sentences in the test set ..." ) pred_progbar = generic_utils.Progbar(len(test_sents_id)) filtered_test_sent_id = [] test_sent_visual_feats_batch_list = [] text_batch_size = 10000 for start in range(0, len(test_sents_id), text_batch_size): end = min(len(test_sents_id), start + text_batch_size) text_batch_list = test_sents_id[start:end] test_sent_visual_feats_batch = [] sents_id = [] for index in range(len(text_batch_list)): sid = text_batch_list[index] test_sent = id2sents[sid] test_sent_feat = predictor.predict_one(test_sent) if test_sent_feat is not None: test_sent_visual_feats_batch.append(test_sent_feat) sents_id.append(sid) fw.write(sid + ' ' + ' '.join(map(str, test_sent_feat)) + '\n') else: logger.info('failed to vectorize "%s"', test_sent) pred_progbar.add(1) test_sent_visual_feats_batch_list.append(test_sent_visual_feats_batch) filtered_test_sent_id.append(sents_id) fw.close() # evaluation only when training on Chinese collection if 'zh' == lang: logger.info("matching image and text on %s ...", testCollection) fout_1 = open(result_pred_sents, 'w') test_progbar = generic_utils.Progbar(len(test_img_list)) img_batch_size = 1000 counter = 0 for i, test_sent_visual_feats in enumerate( test_sent_visual_feats_batch_list): sents_id = filtered_test_sent_id[i] batch_score_list = [] for start in range(0, len(test_img_list), img_batch_size): end = min(len(test_img_list), start + img_batch_size) img_batch_list = test_img_list[start:end] img_feat_batch = [] for test_img in img_batch_list: test_img_feat = img_feats.read_one(test_img) img_feat_batch.append(test_img_feat) scorelist_batch = simer.calculate(test_sent_visual_feats, img_feat_batch) batch_score_list.append(scorelist_batch) batch_score_list = np.concatenate(batch_score_list, axis=-1) assert len(batch_score_list) == len(sents_id) for sent_id, scorelist in zip(sents_id, batch_score_list): top_hits = np.argsort(scorelist)[::-1] top_imgs = [] for idx in top_hits.tolist(): top_imgs.append(test_img_list[idx]) top_imgs.append(scorelist[idx]) fout_1.write(sent_id + ' ' + ' '.join(map(str, top_imgs)) + '\n') counter += 1 test_progbar.update(counter) assert counter == len(test_sents_id) fout_1.close() print(result_pred_sents) recall_name, recall_score, med_r, mean_r, mean_invert_r = cal_perf_t2i( result_pred_sents) #fout_recall = open(os.path.join(output_dir, 'recall.txt'), 'w') #fout_recall.write(recall_name + '\n' + recall_score + '\n') #fout_recall.write('med_r:' + '\n' + str(med_r) + '\n') #fout_recall.write('mean_r:' + '\n' + str(mean_r) + '\n') #fout_recall.write('mean_invert_r:' + '\n' + str(mean_invert_r) + '\n') fout_recall = open(os.path.join(output_dir, 'mir.txt'), 'w') fout_recall.write('mean_invert_r: {}\n'.format(round(mean_invert_r, 3))) fout_recall.close()
class CocoDataset(data.Dataset): def __init__(self, input_json_path, vocab, vf_dir, use_att=False, eng_gt_file=None, rootpath=rootpath): print input_json_path with open(input_json_path) as f: data = json.load(f) self.eng_gt_file = eng_gt_file self.imgname2enggt = {} if self.eng_gt_file is not None: assert os.path.exists(self.eng_gt_file), "Eng gt file not exist: %s"%eng_gt_file print ('Loading eng gt file') eng_data = json.load(open(self.eng_gt_file)) for x in eng_data['images']: img_filename = x['filename'] sents=[] for y in x['sentences']: sents.append(' '.join(y['tokens'])) self.imgname2enggt[img_filename] = sents self.images = data['images'] self.vocab = vocab self.sentences = {} self.img2sents = {} self.img2enggt = {} self.img2filename = {} self.sentId2imgId = {} self.imgIds = [] self.sentIds = [] for img in self.images: img_id = img['imgid'] self.img2filename[img_id] = img['filename'].split('.')[0] self.imgIds.append(img_id) self.img2sents[img_id] = img['sentids'] self.img2enggt[img_id] = self.imgname2enggt.get(img['filename'], []) for i, sent in enumerate(img['sentences']): self.sentences[sent['sentid']] = (sent['tokens'], sent['raw']) self.sentIds.append(sent['sentid']) self.sentId2imgId[sent['sentid']] = img_id sid = img['filename'].split('.')[0]+'#'+str(i) self.use_att = use_att if self.use_att == True: self.vf_dir = vf_dir else: self.vf_dir = vf_dir self.vf_reader = BigFile(vf_dir) def __getitem__(self, index): sentid = self.sentIds[index] img_id = self.sentId2imgId[sentid] img_name = self.img2filename[img_id] tokens, raw = self.sentences[sentid] caption = [] # caption.append(self.vocab('<start>')) caption.extend([self.vocab(token) for token in tokens]) caption.append(self.vocab('<end>')) caption = torch.Tensor(caption) img_captions = [] for x in self.img2sents[img_id]: tokens, raw = self.sentences[x] temp = [] # temp.append(self.vocab('<start>')) temp.extend([self.vocab(token) for token in tokens]) temp.append(self.vocab('<end>')) img_captions.append(temp) eng_gt = self.img2enggt[img_id] if self.use_att == False: feature = np.array(self.vf_reader.read_one(img_name), dtype='float32') feature = torch.from_numpy(feature) return caption, feature, None, img_id, img_name, img_captions, eng_gt else: feature = np.load(os.path.join(self.vf_dir+'_fc', str(img_id)) + '.npy') att_feature = np.load(os.path.join(self.vf_dir + '_att', str(img_id)) + '.npz')['feat'] feature = torch.from_numpy(feature) att_feature = torch.from_numpy(att_feature) return caption, feature, att_feature, img_id, img_name, img_captions, eng_gt def __len__(self): return len(self.sentIds)