def __init__(self, config, split, name='vlmap_memft'):
        self.config = config
        self.data_dir = data_dir = config.data_dir

        self.name = name
        self.split = split

        log.warn('loading image_info ..')
        image_info_path = os.path.join(data_dir,
                                       '{}_image_info.pkl'.format(split))
        image_info = cPickle.load(open(image_info_path, 'rb'))
        self._ids = image_info['image_ids']
        self.image_id2idx = image_info['image_id2idx']
        log.info('loading image_info done')

        log.warn('loading processed data ..')
        processed_path = os.path.join(data_dir,
                                      '{}_processed.pkl'.format(split))
        self.processed = cPickle.load(open(processed_path, 'rb'))
        log.info('loading processed done')

        log.warn('loading answer_dict ..')
        answer_dict_path = os.path.join(data_dir, 'answer_dict.pkl')
        self.answer_dict = cPickle.load(open(answer_dict_path, 'rb'))
        self.num_answers = len(self.answer_dict['vocab'])
        log.info('loading answer_dict done')

        log.warn('loading wordset_dict ..')
        ws_dict_path = os.path.join(data_dir, 'wordset_dict5.pkl')
        self.ws_dict = cPickle.load(open(ws_dict_path, 'rb'))
        log.info('loading wordset_dict done')

        if self.config.debug:
            self.image_features, self.spatial_features, self.normal_boxes, self.num_boxes, \
                self.max_box_num, self.vfeat_dim = get_dummy_data()
        else:
            with h5py.File(
                    os.path.join(data_dir, '{}_vfeat.hdf5'.format(split)),
                    'r') as f:

                self.vfeat_dim = int(f['data_info']['vfeat_dim'].value)
                self.max_box_num = int(f['data_info']['max_box_num'].value)
                log.warn('loading {} image_features ..'.format(split))
                self.image_features = np.array(f.get('image_features'))
                log.warn('loading {} normal_boxes ..'.format(split))
                self.normal_boxes = np.array(f.get('normal_boxes'))
                log.warn('loading {} num_boxes ..'.format(split))
                self.num_boxes = np.array(f.get('num_boxes'))
                log.warn('loading {} spatial_features ..'.format(split))
                self.spatial_features = np.array(f.get('spatial_features'))
                log.warn('loading {} features done ..'.format(split))

        log.info('dataset {} {} init done'.format(name, split))
Example #2
0
    def __init__(self, batch, config, is_train=True, image_features=None):
        self.batch = batch
        self.config = config
        self.image_dir = config.image_dir
        self.is_train = is_train

        self.word_weight_dir = getattr(config, 'pretrain_word_weight_dir',
                                       None)
        if self.word_weight_dir is None:
            log.warn('word_weight_dir is None')

        self.losses = {}
        self.report = {}
        self.mid_result = {}
        self.output = {}
        self.heavy_output = {}
        self.vis_image = {}

        self.vocab = cPickle.load(open(config.vocab_path, 'rb'))
        self.answer_dict = cPickle.load(
            open(os.path.join(config.tf_record_dir, 'answer_dict.pkl'), 'rb'))
        self.num_answer = len(self.answer_dict['vocab'])
        self.num_train_answer = self.answer_dict['num_train_answer']
        self.train_answer_mask = tf.expand_dims(tf.sequence_mask(
            self.num_train_answer, maxlen=self.num_answer, dtype=tf.float32),
                                                axis=0)
        self.test_answer_mask = 1.0 - self.train_answer_mask
        self.obj_answer_mask = tf.expand_dims(tf.constant(
            self.answer_dict['is_object'], dtype=tf.float32),
                                              axis=0)
        self.attr_answer_mask = tf.expand_dims(tf.constant(
            self.answer_dict['is_attribute'], dtype=tf.float32),
                                               axis=0)

        self.glove_map = modules.LearnGloVe(self.vocab)
        self.answer_exist_mask = modules.AnswerExistMask(
            self.answer_dict, self.word_weight_dir)
        self.answer_non_exist_mask = 1.0 - self.answer_exist_mask

        if self.config.debug:
            self.features, self.spatials, self.normal_boxes, self.num_boxes, \
                self.max_box_num, self.vfeat_dim = get_dummy_data()
        elif image_features is None:
            log.infov('loading image features...')
            with h5py.File(config.vfeat_path, 'r') as f:
                self.features = np.array(f.get('image_features'))
                log.infov('feature done')
                self.spatials = np.array(f.get('spatial_features'))
                log.infov('spatials done')
                self.normal_boxes = np.array(f.get('normal_boxes'))
                log.infov('normal_boxes done')
                self.num_boxes = np.array(f.get('num_boxes'))
                log.infov('num_boxes done')
                self.max_box_num = int(f['data_info']['max_box_num'].value)
                self.vfeat_dim = int(f['data_info']['vfeat_dim'].value)
            log.infov('done')
        else:
            self.features = image_features['features']
            self.spatials = image_features['spatials']
            self.normal_boxes = image_features['normal_boxes']
            self.num_boxes = image_features['num_boxes']
            self.max_box_num = image_features['max_box_num']
            self.vfeat_dim = image_features['vfeat_dim']

        self.build()
Example #3
0
    def __init__(self, config, split, name='pretrain'):
        self.config = config
        self.data_dir = data_dir = config.data_dir

        self.name = name
        self.split = split

        log.warn('loading image_info ..')
        image_info_path = os.path.join(data_dir,
                                       '{}_image_info.pkl'.format(split))
        image_info = cPickle.load(open(image_info_path, 'rb'))
        self._ids = image_info['image_ids']
        self.image_id2idx = image_info['image_id2idx']
        log.info('loading image_info done')

        log.warn('loading processed data ..')
        processed_path = os.path.join(data_dir,
                                      '{}_processed.pkl'.format(split))
        self.processed = cPickle.load(open(processed_path, 'rb'))
        log.info('loading processed done')

        log.warn('loading answer_dict ..')
        answer_dict_path = os.path.join(data_dir, 'answer_dict.pkl')
        self.answer_dict = cPickle.load(open(answer_dict_path, 'rb'))
        self.num_answers = len(self.answer_dict['vocab'])
        log.info('loading answer_dict done')

        log.warn('loading wordset_dict ..')
        ws_dict_path = os.path.join(data_dir, config.wordset_dict_name)
        self.ws_dict = cPickle.load(open(ws_dict_path, 'rb'))
        log.info('loading wordset_dict: {} done'.format(ws_dict_path))

        if self.config.debug:
            log.warn('Debug mode: proceed with dummy data')
            self.image_features, self.spatial_features, self.normal_boxes, self.num_boxes, \
                self.max_box_num, self.vfeat_dim = get_dummy_data()
        else:
            log.warn('No debug mode: proceed with real data')
            with h5py.File(
                    os.path.join(data_dir, '{}_vfeat.hdf5'.format(split)),
                    'r') as f:

                self.vfeat_dim = int(f['data_info']['vfeat_dim'].value)
                self.max_box_num = int(f['data_info']['max_box_num'].value)
                log.warn('loading {} image_features ..'.format(split))
                self.image_features = np.array(f.get('image_features'))
                log.warn('loading {} normal_boxes ..'.format(split))
                self.normal_boxes = np.array(f.get('normal_boxes'))
                log.warn('loading {} num_boxes ..'.format(split))
                self.num_boxes = np.array(f.get('num_boxes'))
                log.warn('loading {} spatial_features ..'.format(split))
                self.spatial_features = np.array(f.get('spatial_features'))
                log.warn('loading {} features done ..'.format(split))

        self.wordset_choice_idx = defaultdict(
            lambda: defaultdict(lambda: defaultdict(int)))

        self.index_list_idx = {
            'obj_blank_fill': defaultdict(int),
            'attr_blank_fill': defaultdict(int),
        }

        log.info('dataset {} {} init done'.format(name, split))
    def __init__(self, config, split, name='vlmap_memft'):
        self.config = config
        self.data_dir = data_dir = config.data_dir

        self.name = name
        self.split = split

        log.warn('loading image_info ..')
        image_info_path = os.path.join(data_dir,
                                       '{}_image_info.pkl'.format(split))
        image_info = cPickle.load(open(image_info_path, 'rb'))
        self._ids = image_info['image_ids']
        self.image_id2idx = image_info['image_id2idx']
        log.info('loading image_info done')

        log.warn('loading processed data ..')
        processed_path = os.path.join(data_dir,
                                      '{}_processed.pkl'.format(split))
        self.processed = cPickle.load(open(processed_path, 'rb'))
        log.info('loading processed done')

        log.warn('loading answer_dict ..')
        answer_dict_path = os.path.join(data_dir, 'answer_dict.pkl')
        self.answer_dict = cPickle.load(open(answer_dict_path, 'rb'))
        self.num_answers = len(self.answer_dict['vocab'])
        log.info('loading answer_dict done')

        log.warn('loading wordset_dict ..')
        ws_dict_path = os.path.join(data_dir, 'wordset_dict5.pkl')
        self.ws_dict = cPickle.load(open(ws_dict_path, 'rb'))
        log.info('loading wordset_dict done')

        log.warn('loading enwiki_context_dict ..')
        enwiki_dict_pkl_path = os.path.join(
            data_dir, 'enwiki_context_dict_w3_p{}_n5.pkl'.format(
                config.enwiki_preprocessing))
        enwiki_dict_h5_path = os.path.join(
            data_dir, 'enwiki_context_dict_w3_p{}_n5.hdf5'.format(
                config.enwiki_preprocessing))

        self.enwiki_dict = cPickle.load(open(enwiki_dict_pkl_path, 'rb'))
        with h5py.File(enwiki_dict_h5_path, 'r') as f:
            self.enwiki_dict['np_context'] = f['np_context'].value
            self.enwiki_dict['np_context_len'] = f['np_context_len'].value

        if self.config.debug:
            self.image_features, self.spatial_features, self.normal_boxes, self.num_boxes, \
                self.max_box_num, self.vfeat_dim = get_dummy_data()
        else:
            with h5py.File(
                    os.path.join(data_dir, '{}_vfeat.hdf5'.format(split)),
                    'r') as f:

                self.vfeat_dim = int(f['data_info']['vfeat_dim'].value)
                self.max_box_num = int(f['data_info']['max_box_num'].value)
                log.warn('loading {} image_features ..'.format(split))
                self.image_features = np.array(f.get('image_features'))
                log.warn('loading {} normal_boxes ..'.format(split))
                self.normal_boxes = np.array(f.get('normal_boxes'))
                log.warn('loading {} num_boxes ..'.format(split))
                self.num_boxes = np.array(f.get('num_boxes'))
                log.warn('loading {} spatial_features ..'.format(split))
                self.spatial_features = np.array(f.get('spatial_features'))
                log.warn('loading {} features done ..'.format(split))

        self.wordset_choice_idx = defaultdict(
            lambda: defaultdict(lambda: defaultdict(int)))
        self.enwiki_choice_idx = defaultdict(
            lambda: defaultdict(lambda: defaultdict(int)))

        self.index_list_idx = {
            'obj_blank_fill': defaultdict(int),
            'attr_blank_fill': defaultdict(int),
        }

        log.info('dataset {} {} init done'.format(name, split))