def __init__(self, **kwargs): dataset = kwargs.get('dataset') splitBy = kwargs.get('splitBy') split = kwargs.get('split') data_json = osp.join('cache/prepro', dataset + "_" + splitBy, split + '.json') with open(data_json, 'r') as f: self.data = json.load(f) #only use the questinos having 1 bbox as answer datanew = [] for ent in self.data: gtbox = ent['gtbox'] if len(gtbox[0]) != 0 and len(gtbox) == 1: datanew.append(ent) self.data = datanew dictfile = kwargs.get('dictionaryfile') self.dictionary = Dictionary.load_from_file(dictfile) if kwargs.get('testrun'): self.data = self.data[:32] self.spatial = True self.image_features_path_coco = kwargs.get('coco_bottomup') self.coco_id_to_index = self.id_to_index(self.image_features_path_coco) print("Dataset [{}] loaded....".format(dataset, split)) print("Split [{}] has {} ref exps.".format(split, len(self.data)))
def __init__(self, **kwargs): file = kwargs.get('file') self.isnms = kwargs.get('isnms') self.trainembd = kwargs.get('trainembd') #6 postion encoded vectors as used by irls self.spatial = True with open(file, 'rb') as f: self.data = pickle.load(f) if self.trainembd: self.dictionary = Dictionary.load_from_file( kwargs.get('dictionaryfile')) if kwargs.get('testrun'): self.data = self.data[:32] self.pool_features_path_coco = kwargs.get('coco_pool_features') self.pool_features_path_genome = kwargs.get('genome_pool_features') self.poolcoco_id_to_index = self._poolcreate_coco_id_to_index( self.pool_features_path_coco) self.poolcoco_id_to_index_gen = self._poolcreate_coco_id_to_index( self.pool_features_path_genome) self.image_features_path_coco = kwargs.get('coco_bottomup') self.coco_id_to_index = self.id_to_index(self.image_features_path_coco) self.image_features_path_genome = kwargs.get('genome_bottomup') self.genome_id_to_index = self.id_to_index( self.image_features_path_genome)
def __init__(self, **kwargs): dataset = kwargs.get('dataset') splitBy = kwargs.get('splitBy') split = kwargs.get('split') data_json = osp.join('cache/prepro', "vqd" + "_" + splitBy, split + '.json') with open(data_json, 'r') as f: self.data = json.load(f) #only use 1 or more gt bos if dataset == 'vqd1': print('VQDv1 1box loaded .......') #only use the questinos having 1 bbox as answer datanew = [] for ent in self.data: gtbox = ent['gtbox'] if len(gtbox[0]) != 0 and len(gtbox) == 1: datanew.append(ent) self.data = datanew dictfile = kwargs.get('dictionaryfile') self.dictionary = Dictionary.load_from_file(dictfile) if kwargs.get('testrun'): self.data = self.data[:32] self.spatial = True self.image_features_path_coco = kwargs.get('vqd_detfeats').format( split) self.coco_id_to_index = self.id_to_index(self.image_features_path_coco) print("Dataset [{}] loaded....".format(dataset, split)) print("Split [{}] has {} ref exps.".format(split, len(self.data))) cocoids = set(self.coco_id_to_index) if kwargs.get('istrain'): cocoids.remove(81768) #some qid doesnot exist datanew = [] for ent in self.data: #some image ids are not in the dataset if ent['image_id'] in cocoids: datanew.append(ent) self.data = datanew
def __init__(self,**kwargs): dataset = kwargs.get('dataset') splitBy = kwargs.get('splitBy') split = kwargs.get('split') data_json = osp.join('cache/prepro', dataset +"_"+ splitBy , split +'.json') with open(data_json,'r') as f: self.data = json.load(f) dictfile = kwargs.get('dictionaryfile') self.dictionary = Dictionary.load_from_file(dictfile) if kwargs.get('testrun'): self.data = self.data[:32] self.spatial = True print ("Dataset [{}] loaded....".format(dataset,split)) print ("Split [{}] has {} ref exps.".format(split,len(self.data)))
def __init__(self, **kwargs): dataset = kwargs.get('dataset') splitBy = kwargs.get('splitBy') split = kwargs.get('split') data_json = osp.join('cache/prepro', dataset + "_" + splitBy, split + '.json') with open(data_json, 'r') as f: self.data = json.load(f) dictfile = kwargs.get('dictionaryfile') self.dictionary = Dictionary.load_from_file(dictfile) if kwargs.get('testrun'): self.data = self.data[:32] self.spatial = True feats_use = '{}_{}_det_feats.h5'.format(dataset, splitBy) self.image_features_path_coco = osp.join(kwargs.get('refcoco_frcnn'), feats_use) self.coco_id_to_index = self.id_to_index(self.image_features_path_coco) print("Dataset [{}] loaded....".format(dataset, split)) print("Split [{}] has {} ref exps.".format(split, len(self.data)))
def create_glove_embedding_init(idx2word, glove_file): word2emb = {} with open(glove_file, 'r') as f: entries = f.readlines() emb_dim = len(entries[0].split(' ')) - 1 print('embedding dim is %d' % emb_dim) weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32) for entry in entries: vals = entry.split(' ') word = vals[0] vals = [float(val) for val in vals[1:]] word2emb[word] = np.array(vals) for idx, word in enumerate(idx2word): if word not in word2emb: continue weights[idx] = word2emb[word] return weights, word2emb if __name__ == '__main__': ds = 'Ourdb' d = create_dictionary(ds) d.dump_to_file('data/dictionary.pickle') d = Dictionary.load_from_file('data/dictionary.pickle') emb_dim = 300 glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file) np.save('data/glove6b_init_%dd.npy' % emb_dim, weights)
import os.path as osp import json for ds in config.dataset: kwargs = {**config.global_config, **config.dataset[ds]} data_root = kwargs.get('data_root') dataset = kwargs.get('dataset') splitBy = kwargs.get('splitBy') splits = kwargs.get('splits') data = [] for split in splits + ['train']: data_json = osp.join('cache/prepro', dataset + "_" + splitBy, split + '.json') with open(data_json, 'r') as f: d = json.load(f) data.extend(d) d = create_dictionary(data, dataset=dataset) basedir = os.path.dirname(kwargs['dictionaryfile'].format(dataset)) if not os.path.exists(basedir): os.mkdir(basedir) d.dump_to_file(kwargs['dictionaryfile'].format(dataset)) d = Dictionary.load_from_file(kwargs['dictionaryfile'].format(dataset)) emb_dim = 300 glove = 'glove/glove.6B.%dd.txt' % emb_dim embedding_basedir = os.path.dirname(kwargs['glove']) glove_file = embedding_basedir.format(glove) weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file) np.save( os.path.join(embedding_basedir.format(ds), 'glove6b_init_%dd.npy' % emb_dim), weights)