def __init__(self, config, split, name='vlmap_memft'): self.config = config self.data_dir = data_dir = config.data_dir self.name = name self.split = split log.warn('loading image_info ..') image_info_path = os.path.join(data_dir, '{}_image_info.pkl'.format(split)) image_info = cPickle.load(open(image_info_path, 'rb')) self._ids = image_info['image_ids'] self.image_id2idx = image_info['image_id2idx'] log.info('loading image_info done') log.warn('loading processed data ..') processed_path = os.path.join(data_dir, '{}_processed.pkl'.format(split)) self.processed = cPickle.load(open(processed_path, 'rb')) log.info('loading processed done') log.warn('loading answer_dict ..') answer_dict_path = os.path.join(data_dir, 'answer_dict.pkl') self.answer_dict = cPickle.load(open(answer_dict_path, 'rb')) self.num_answers = len(self.answer_dict['vocab']) log.info('loading answer_dict done') log.warn('loading wordset_dict ..') ws_dict_path = os.path.join(data_dir, 'wordset_dict5.pkl') self.ws_dict = cPickle.load(open(ws_dict_path, 'rb')) log.info('loading wordset_dict done') if self.config.debug: self.image_features, self.spatial_features, self.normal_boxes, self.num_boxes, \ self.max_box_num, self.vfeat_dim = get_dummy_data() else: with h5py.File( os.path.join(data_dir, '{}_vfeat.hdf5'.format(split)), 'r') as f: self.vfeat_dim = int(f['data_info']['vfeat_dim'].value) self.max_box_num = int(f['data_info']['max_box_num'].value) log.warn('loading {} image_features ..'.format(split)) self.image_features = np.array(f.get('image_features')) log.warn('loading {} normal_boxes ..'.format(split)) self.normal_boxes = np.array(f.get('normal_boxes')) log.warn('loading {} num_boxes ..'.format(split)) self.num_boxes = np.array(f.get('num_boxes')) log.warn('loading {} spatial_features ..'.format(split)) self.spatial_features = np.array(f.get('spatial_features')) log.warn('loading {} features done ..'.format(split)) log.info('dataset {} {} init done'.format(name, split))
def __init__(self, batch, config, is_train=True, image_features=None): self.batch = batch self.config = config self.image_dir = config.image_dir self.is_train = is_train self.word_weight_dir = getattr(config, 'pretrain_word_weight_dir', None) if self.word_weight_dir is None: log.warn('word_weight_dir is None') self.losses = {} self.report = {} self.mid_result = {} self.output = {} self.heavy_output = {} self.vis_image = {} self.vocab = cPickle.load(open(config.vocab_path, 'rb')) self.answer_dict = cPickle.load( open(os.path.join(config.tf_record_dir, 'answer_dict.pkl'), 'rb')) self.num_answer = len(self.answer_dict['vocab']) self.num_train_answer = self.answer_dict['num_train_answer'] self.train_answer_mask = tf.expand_dims(tf.sequence_mask( self.num_train_answer, maxlen=self.num_answer, dtype=tf.float32), axis=0) self.test_answer_mask = 1.0 - self.train_answer_mask self.obj_answer_mask = tf.expand_dims(tf.constant( self.answer_dict['is_object'], dtype=tf.float32), axis=0) self.attr_answer_mask = tf.expand_dims(tf.constant( self.answer_dict['is_attribute'], dtype=tf.float32), axis=0) self.glove_map = modules.LearnGloVe(self.vocab) self.answer_exist_mask = modules.AnswerExistMask( self.answer_dict, self.word_weight_dir) self.answer_non_exist_mask = 1.0 - self.answer_exist_mask if self.config.debug: self.features, self.spatials, self.normal_boxes, self.num_boxes, \ self.max_box_num, self.vfeat_dim = get_dummy_data() elif image_features is None: log.infov('loading image features...') with h5py.File(config.vfeat_path, 'r') as f: self.features = np.array(f.get('image_features')) log.infov('feature done') self.spatials = np.array(f.get('spatial_features')) log.infov('spatials done') self.normal_boxes = np.array(f.get('normal_boxes')) log.infov('normal_boxes done') self.num_boxes = np.array(f.get('num_boxes')) log.infov('num_boxes done') self.max_box_num = int(f['data_info']['max_box_num'].value) self.vfeat_dim = int(f['data_info']['vfeat_dim'].value) log.infov('done') else: self.features = image_features['features'] self.spatials = image_features['spatials'] self.normal_boxes = image_features['normal_boxes'] self.num_boxes = image_features['num_boxes'] self.max_box_num = image_features['max_box_num'] self.vfeat_dim = image_features['vfeat_dim'] self.build()
def __init__(self, config, split, name='pretrain'): self.config = config self.data_dir = data_dir = config.data_dir self.name = name self.split = split log.warn('loading image_info ..') image_info_path = os.path.join(data_dir, '{}_image_info.pkl'.format(split)) image_info = cPickle.load(open(image_info_path, 'rb')) self._ids = image_info['image_ids'] self.image_id2idx = image_info['image_id2idx'] log.info('loading image_info done') log.warn('loading processed data ..') processed_path = os.path.join(data_dir, '{}_processed.pkl'.format(split)) self.processed = cPickle.load(open(processed_path, 'rb')) log.info('loading processed done') log.warn('loading answer_dict ..') answer_dict_path = os.path.join(data_dir, 'answer_dict.pkl') self.answer_dict = cPickle.load(open(answer_dict_path, 'rb')) self.num_answers = len(self.answer_dict['vocab']) log.info('loading answer_dict done') log.warn('loading wordset_dict ..') ws_dict_path = os.path.join(data_dir, config.wordset_dict_name) self.ws_dict = cPickle.load(open(ws_dict_path, 'rb')) log.info('loading wordset_dict: {} done'.format(ws_dict_path)) if self.config.debug: log.warn('Debug mode: proceed with dummy data') self.image_features, self.spatial_features, self.normal_boxes, self.num_boxes, \ self.max_box_num, self.vfeat_dim = get_dummy_data() else: log.warn('No debug mode: proceed with real data') with h5py.File( os.path.join(data_dir, '{}_vfeat.hdf5'.format(split)), 'r') as f: self.vfeat_dim = int(f['data_info']['vfeat_dim'].value) self.max_box_num = int(f['data_info']['max_box_num'].value) log.warn('loading {} image_features ..'.format(split)) self.image_features = np.array(f.get('image_features')) log.warn('loading {} normal_boxes ..'.format(split)) self.normal_boxes = np.array(f.get('normal_boxes')) log.warn('loading {} num_boxes ..'.format(split)) self.num_boxes = np.array(f.get('num_boxes')) log.warn('loading {} spatial_features ..'.format(split)) self.spatial_features = np.array(f.get('spatial_features')) log.warn('loading {} features done ..'.format(split)) self.wordset_choice_idx = defaultdict( lambda: defaultdict(lambda: defaultdict(int))) self.index_list_idx = { 'obj_blank_fill': defaultdict(int), 'attr_blank_fill': defaultdict(int), } log.info('dataset {} {} init done'.format(name, split))
def __init__(self, config, split, name='vlmap_memft'): self.config = config self.data_dir = data_dir = config.data_dir self.name = name self.split = split log.warn('loading image_info ..') image_info_path = os.path.join(data_dir, '{}_image_info.pkl'.format(split)) image_info = cPickle.load(open(image_info_path, 'rb')) self._ids = image_info['image_ids'] self.image_id2idx = image_info['image_id2idx'] log.info('loading image_info done') log.warn('loading processed data ..') processed_path = os.path.join(data_dir, '{}_processed.pkl'.format(split)) self.processed = cPickle.load(open(processed_path, 'rb')) log.info('loading processed done') log.warn('loading answer_dict ..') answer_dict_path = os.path.join(data_dir, 'answer_dict.pkl') self.answer_dict = cPickle.load(open(answer_dict_path, 'rb')) self.num_answers = len(self.answer_dict['vocab']) log.info('loading answer_dict done') log.warn('loading wordset_dict ..') ws_dict_path = os.path.join(data_dir, 'wordset_dict5.pkl') self.ws_dict = cPickle.load(open(ws_dict_path, 'rb')) log.info('loading wordset_dict done') log.warn('loading enwiki_context_dict ..') enwiki_dict_pkl_path = os.path.join( data_dir, 'enwiki_context_dict_w3_p{}_n5.pkl'.format( config.enwiki_preprocessing)) enwiki_dict_h5_path = os.path.join( data_dir, 'enwiki_context_dict_w3_p{}_n5.hdf5'.format( config.enwiki_preprocessing)) self.enwiki_dict = cPickle.load(open(enwiki_dict_pkl_path, 'rb')) with h5py.File(enwiki_dict_h5_path, 'r') as f: self.enwiki_dict['np_context'] = f['np_context'].value self.enwiki_dict['np_context_len'] = f['np_context_len'].value if self.config.debug: self.image_features, self.spatial_features, self.normal_boxes, self.num_boxes, \ self.max_box_num, self.vfeat_dim = get_dummy_data() else: with h5py.File( os.path.join(data_dir, '{}_vfeat.hdf5'.format(split)), 'r') as f: self.vfeat_dim = int(f['data_info']['vfeat_dim'].value) self.max_box_num = int(f['data_info']['max_box_num'].value) log.warn('loading {} image_features ..'.format(split)) self.image_features = np.array(f.get('image_features')) log.warn('loading {} normal_boxes ..'.format(split)) self.normal_boxes = np.array(f.get('normal_boxes')) log.warn('loading {} num_boxes ..'.format(split)) self.num_boxes = np.array(f.get('num_boxes')) log.warn('loading {} spatial_features ..'.format(split)) self.spatial_features = np.array(f.get('spatial_features')) log.warn('loading {} features done ..'.format(split)) self.wordset_choice_idx = defaultdict( lambda: defaultdict(lambda: defaultdict(int))) self.enwiki_choice_idx = defaultdict( lambda: defaultdict(lambda: defaultdict(int))) self.index_list_idx = { 'obj_blank_fill': defaultdict(int), 'attr_blank_fill': defaultdict(int), } log.info('dataset {} {} init done'.format(name, split))