def image_feats_converter(filenames): h_train = h5py.File(filenames['train_data_file'], "w") h_val = h5py.File(filenames['val_data_file'], "w") if os.path.exists(filenames['train_ids_file']) and os.path.exists(filenames['val_ids_file']): print(filenames['train_ids_file']) print(filenames['val_ids_file']) train_imgids = cPickle.load(open(filenames['train_ids_file'],'rb')) val_imgids = cPickle.load(open(filenames['val_ids_file'],'rb')) else: train_imgids = utils.load_imageid('data/train2014') val_imgids = utils.load_imageid('data/val2014') cPickle.dump(train_imgids, open(filenames['train_ids_file'], 'wb')) cPickle.dump(val_imgids, open(filenames['val_ids_file'], 'wb')) train_indices = {} val_indices = {} train_img_features = h_train.create_dataset( 'image_features', (len(train_imgids), num_fixed_boxes, feature_length), 'f') train_img_bb = h_train.create_dataset( 'image_bb', (len(train_imgids), num_fixed_boxes, 4), 'f') train_spatial_img_features = h_train.create_dataset( 'spatial_features', (len(train_imgids), num_fixed_boxes, 6), 'f') val_img_bb = h_val.create_dataset( 'image_bb', (len(val_imgids), num_fixed_boxes, 4), 'f') val_img_features = h_val.create_dataset( 'image_features', (len(val_imgids), num_fixed_boxes, feature_length), 'f') val_spatial_img_features = h_val.create_dataset( 'spatial_features', (len(val_imgids), num_fixed_boxes, 6), 'f') train_counter = 0 val_counter = 0 print("reading tsv...") with open(filenames['infile'], "r") as tsv_in_file: reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES) for item in tqdm(reader): item['num_boxes'] = int(item['num_boxes']) image_id = int(item['image_id']) image_w = float(item['image_w']) image_h = float(item['image_h']) bboxes = np.frombuffer( base64.b64decode(item['boxes']), dtype=np.float32).reshape((item['num_boxes'], -1)) box_width = bboxes[:, 2] - bboxes[:, 0] box_height = bboxes[:, 3] - bboxes[:, 1] scaled_width = box_width / image_w scaled_height = box_height / image_h scaled_x = bboxes[:, 0] / image_w scaled_y = bboxes[:, 1] / image_h box_width = box_width[..., np.newaxis] box_height = box_height[..., np.newaxis] scaled_width = scaled_width[..., np.newaxis] scaled_height = scaled_height[..., np.newaxis] scaled_x = scaled_x[..., np.newaxis] scaled_y = scaled_y[..., np.newaxis] spatial_features = np.concatenate( (scaled_x, scaled_y, scaled_x + scaled_width, scaled_y + scaled_height, scaled_width, scaled_height), axis=1) if image_id in train_imgids: train_imgids.remove(image_id) train_indices[image_id] = train_counter train_img_bb[train_counter, :, :] = bboxes train_img_features[train_counter, :, :] = np.frombuffer( base64.b64decode(item['features']), dtype=np.float32).reshape((item['num_boxes'], -1)) train_spatial_img_features[train_counter, :, :] = spatial_features train_counter += 1 elif image_id in val_imgids: val_imgids.remove(image_id) val_indices[image_id] = val_counter val_img_bb[val_counter, :, :] = bboxes val_img_features[val_counter, :, :] = np.frombuffer( base64.b64decode(item['features']), dtype=np.float32).reshape((item['num_boxes'], -1)) val_spatial_img_features[val_counter, :, :] = spatial_features val_counter += 1 else: assert False, 'Unknown image id: %d' % image_id if len(train_imgids) != 0: print('Warning: train_image_ids is not empty') if len(val_imgids) != 0: print('Warning: val_image_ids is not empty') cPickle.dump(train_indices, open(filenames['train_indices_file'], 'wb')) cPickle.dump(val_indices, open(filenames['val_indices_file'], 'wb')) h_train.close() h_val.close() print("done!")
val_indices_file = 'val36_imgid2idx.pkl' train_ids_file = 'train_ids.pkl' val_ids_file = 'val_ids.pkl' feature_length = 2048 num_fixed_boxes = 36 if __name__ == '__main__': h_train = h5py.File(train_data_file, "w") h_val = h5py.File(val_data_file, "w") if os.path.exists(train_ids_file) and os.path.exists(val_ids_file): train_imgids = cPickle.load(open(train_ids_file)) val_imgids = cPickle.load(open(val_ids_file)) else: train_imgids = utils.load_imageid('../data/train2014') val_imgids = utils.load_imageid('../data/val2014') cPickle.dump(train_imgids, open(train_ids_file, 'wb'), protocol=2) cPickle.dump(val_imgids, open(val_ids_file, 'wb'), protocol=2) train_indices = {} val_indices = {} train_img_features = h_train.create_dataset( 'image_features', (len(train_imgids), num_fixed_boxes, feature_length), 'f') train_img_bb = h_train.create_dataset( 'image_bb', (len(train_imgids), num_fixed_boxes, 4), 'f') train_spatial_img_features = h_train.create_dataset( 'spatial_features', (len(train_imgids), num_fixed_boxes, 6), 'f')
train_ids_file = 'data/train_ids.pkl' val_ids_file = 'data/val_ids.pkl' feature_length = 2048 num_fixed_boxes = 36 if __name__ == '__main__': h_train = h5py.File(train_data_file, "w") h_val = h5py.File(val_data_file, "w") if os.path.exists(train_ids_file) and os.path.exists(val_ids_file): train_imgids = cPickle.load(open(train_ids_file)) val_imgids = cPickle.load(open(val_ids_file)) else: train_imgids = utils.load_imageid('data/train2014') val_imgids = utils.load_imageid('data/val2014') cPickle.dump(train_imgids, open(train_ids_file, 'wb')) cPickle.dump(val_imgids, open(val_ids_file, 'wb')) train_indices = {} val_indices = {} train_img_features = h_train.create_dataset( 'image_features', (len(train_imgids), num_fixed_boxes, feature_length), 'f') train_img_bb = h_train.create_dataset( 'image_bb', (len(train_imgids), num_fixed_boxes, 4), 'f') train_spatial_img_features = h_train.create_dataset( 'spatial_features', (len(train_imgids), num_fixed_boxes, 6), 'f') val_img_bb = h_val.create_dataset(
def extract(split, infiles, task='scenario_data'): FIELDNAMES = ["img_id", "img_h", "img_w", "objects_id", "objects_conf", "attrs_id", "attrs_conf", "num_boxes", "boxes", "features"] data_file = { 'infer': f'data/{task}/infer.hdf5', } indices_file = { 'infer': f'data/{task}/infer_imgid2idx.pkl' } ids_file = { 'infer': f'data/{task}/infer_ids.pkl' } path_imgs = { 'infer': f'data/{task}/images' } known_num_boxes = {'infer': None} feature_length = 2048 min_fixed_boxes = 10 max_fixed_boxes = 100 if os.path.exists(ids_file[split]): imgids = cPickle.load(open(ids_file[split], 'rb')) else: imgids = utils.load_imageid(path_imgs[split]) cPickle.dump(imgids, open(ids_file[split], 'wb')) h = h5py.File(data_file[split], 'w') if known_num_boxes[split] is None: num_boxes = 0 for infile in infiles: print("reading tsv...%s" % infile) with open(infile, "r+") as tsv_in_file: reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES) for item in reader: item['num_boxes'] = int(item['num_boxes']) image_id = item['img_id'] if image_id in imgids: num_boxes += item['num_boxes'] else: num_boxes = known_num_boxes[split] print('num_boxes=%d' % num_boxes) img_features = h.create_dataset( 'image_features', (num_boxes, feature_length), 'f') img_bb = h.create_dataset( 'image_bb', (num_boxes, 4), 'f') spatial_img_features = h.create_dataset( 'spatial_features', (num_boxes, 6), 'f') pos_boxes = h.create_dataset( 'pos_boxes', (len(imgids), 2), dtype='int32') counter = 0 num_boxes = 0 indices = {} for infile in infiles: unknown_ids = [] print("reading tsv...%s" % infile) with open(infile, "r+") as tsv_in_file: reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES) for item in reader: item['num_boxes'] = int(item['num_boxes']) item['boxes'] = bytes(item['boxes'], 'utf') item['features'] = bytes(item['features'], 'utf') image_id = item['img_id'] image_w = float(item['img_w']) image_h = float(item['img_h']) bboxes = np.frombuffer( base64.decodestring(item['boxes']), dtype=np.float32).reshape((item['num_boxes'], -1)) box_width = bboxes[:, 2] - bboxes[:, 0] box_height = bboxes[:, 3] - bboxes[:, 1] scaled_width = box_width / image_w scaled_height = box_height / image_h scaled_x = bboxes[:, 0] / image_w scaled_y = bboxes[:, 1] / image_h box_width = box_width[..., np.newaxis] box_height = box_height[..., np.newaxis] scaled_width = scaled_width[..., np.newaxis] scaled_height = scaled_height[..., np.newaxis] scaled_x = scaled_x[..., np.newaxis] scaled_y = scaled_y[..., np.newaxis] spatial_features = np.concatenate( (scaled_x, scaled_y, scaled_x + scaled_width, scaled_y + scaled_height, scaled_width, scaled_height), axis=1) if image_id in imgids: imgids.remove(image_id) indices[image_id] = counter pos_boxes[counter,:] = np.array([num_boxes, num_boxes + item['num_boxes']]) img_bb[num_boxes:num_boxes+item['num_boxes'], :] = bboxes img_features[num_boxes:num_boxes+item['num_boxes'], :] = np.frombuffer( base64.decodestring(item['features']), dtype=np.float32).reshape((item['num_boxes'], -1)) spatial_img_features[num_boxes:num_boxes+item['num_boxes'], :] = spatial_features counter += 1 num_boxes += item['num_boxes'] else: unknown_ids.append(image_id) print('%d unknown_ids...' % len(unknown_ids)) print('%d image_ids left...' % len(imgids)) if len(imgids) != 0: print('Warning: %s_image_ids is not empty' % split) cPickle.dump(indices, open(indices_file[split], 'wb')) h.close() print("done!")
infile = os.path.join( addr_BU_path, 'test2014_36/test2014_resnet101_faster_rcnn_genome_36.tsv') test_data_file = os.path.join(addr_hdf5path, 'test201436.hdf5') test_indices_file = os.path.join(addr_pkl_path, 'test201436_imgid2idx.pkl') test_ids_file = os.path.join(addr_pkl_path, 'test201436_ids.pkl') feature_length = 2048 num_fixed_boxes = 36 if __name__ == '__main__': h_test = h5py.File(test_data_file, "w") if os.path.exists(test_ids_file): test_imgids = pickle.load(open(test_ids_file, 'rb')) else: test_imgids = utils.load_imageid(addr_test_imgs) pickle.dump(test_imgids, open(test_ids_file, 'wb'), protocol=2) test_indices = {} test_img_features = h_test.create_dataset( 'image_features', (len(test_imgids), num_fixed_boxes, feature_length), 'f') test_img_bb = h_test.create_dataset('image_bb', (len(test_imgids), num_fixed_boxes, 4), 'f') test_spatial_img_features = h_test.create_dataset( 'spatial_features', (len(test_imgids), num_fixed_boxes, 6), 'f') test_counter = 0 unknown_ids = []
] infile = 'data/test2015_36/test2015_resnet101_faster_rcnn_genome_36.tsv' test_data_file = 'data/test36.hdf5' test_indices_file = 'data/test36_imgid2idx.pkl' test_ids_file = 'data/test_ids.pkl' feature_length = 2048 num_fixed_boxes = 36 if __name__ == '__main__': h_test = h5py.File(test_data_file, "w") if os.path.exists(test_ids_file): test_imgids = cPickle.load(open(test_ids_file)) else: test_imgids = utils.load_imageid('data/test2015') cPickle.dump(test_imgids, open(test_ids_file, 'wb')) test_indices = {} test_img_features = h_test.create_dataset( 'image_features', (len(test_imgids), num_fixed_boxes, feature_length), 'f') test_img_bb = h_test.create_dataset('image_bb', (len(test_imgids), num_fixed_boxes, 4), 'f') test_spatial_img_features = h_test.create_dataset( 'spatial_features', (len(test_imgids), num_fixed_boxes, 6), 'f') test_counter = 0
infile = 'data/%s_36/%s_resnet101_faster_rcnn_genome_36.tsv' % (target, target) data_file = 'data/%s36.hdf5' % target indices_file = 'data/%s36_imgid2idx.pkl' % target ids_file = 'data/%s_ids.pkl' % target feature_length = 2048 num_fixed_boxes = 36 if __name__ == '__main__': h = h5py.File(data_file, "w") if os.path.exists(ids_file): imgids = cPickle.load(open(ids_file, 'rb')) else: imgids = utils.load_imageid('data/%s' % target) cPickle.dump(imgids, open(ids_file, 'wb')) indices = {} img_bb = h.create_dataset( 'image_bb', (len(imgids), num_fixed_boxes, 4), 'f') img_features = h.create_dataset( 'image_features', (len(imgids), num_fixed_boxes, feature_length), 'f') spatial_img_features = h.create_dataset( 'spatial_features', (len(imgids), num_fixed_boxes, 6), 'f') counter = 0 print("reading tsv...") with open(infile, "r+") as tsv_in_file:
feature_length = 2048 num_fixed_boxes = 36 if __name__ == '__main__': # h_train = h5py.File(train_data_file, "w") # h_val = h5py.File(val_data_file, "w") h_test = h5py.File(test_data_file, "w") if os.path.exists(train_ids_file) and os.path.exists(val_ids_file) and os.path.exists(test_ids_file): train_imgids = cPickle.load(open(train_ids_file)) val_imgids = cPickle.load(open(val_ids_file)) test_imgids = cPickle.load(open(test_ids_file)) else: train_imgids = utils.load_imageid('data/images/train2014') val_imgids = utils.load_imageid('data/images/val2014') # test_imgids = [] test_imgids = utils.load_imageid('data/images/test2015') cPickle.dump(train_imgids, open(train_ids_file, 'wb')) cPickle.dump(val_imgids, open(val_ids_file, 'wb')) cPickle.dump(test_imgids, open(test_ids_file, 'wb')) train_indices = {} val_indices = {} test_indices = {} print('> len(test_imgids): %d' % len(test_imgids)) ''' train_img_features = h_train.create_dataset(
if __name__ == '__main__': h_train = h5py.File(train_data_file, "w") h_val = h5py.File(val_data_file, "w") if os.path.exists(train_ids_file) and os.path.exists(val_ids_file): train_imgids = cPickle.load(open(train_ids_file)) val_imgids = cPickle.load(open(val_ids_file)) else: base_imgs = '../DATA_imgs/' folders = [ 'scene_img_abstract_v002_train2015/', 'scene_img_abstract_v002_val2015/' ] train_imgids = utils.load_imageid(base_imgs + folders[0]) #set of ids val_imgids = utils.load_imageid(base_imgs + folders[1]) #set of ids cPickle.dump(train_imgids, open(train_ids_file, 'wb')) cPickle.dump(val_imgids, open(val_ids_file, 'wb')) train_indices = {} val_indices = {} train_img_features = h_train.create_dataset( 'image_features', (len(train_imgids), num_fixed_boxes, feature_length), 'f') train_img_bb = h_train.create_dataset( 'image_bb', (len(train_imgids), num_fixed_boxes, 4), 'f') train_spatial_img_features = h_train.create_dataset( 'spatial_features', (len(train_imgids), num_fixed_boxes, 6), 'f')
def extract(split, infiles): FIELDNAMES = [ 'image_id', 'image_w', 'image_h', 'num_boxes', 'boxes', 'features' ] data_file = { 'train': 'data/train.hdf5', 'val': 'data/val.hdf5', 'test': 'data/test2015.hdf5' } indices_file = { 'train': 'data/train_imgid2idx.pkl', 'val': 'data/val_imgid2idx.pkl', 'test': 'data/test2015_imgid2idx.pkl' } ids_file = { 'train': 'data/train_ids.pkl', 'val': 'data/val_ids.pkl', 'test': 'data/test2015_ids.pkl' } path_imgs = { 'train': '/media/data-aioz/VQA/Visual7W/train', 'val': '/media/data-aioz/VQA/Visual7W/val', 'test': '/media/data-aioz/VQA/Visual7W/test' } # known_num_boxes = { # 'train': 2643089, # 'val': 1281164, # 'test': 2566887, # } known_num_boxes = {'train': None, 'val': None, 'test': None} feature_length = 2048 min_fixed_boxes = 10 max_fixed_boxes = 100 if os.path.exists(ids_file[split]): imgids = cPickle.load(open(ids_file[split], 'rb')) else: imgids = utils.load_imageid(path_imgs[split]) cPickle.dump(imgids, open(ids_file[split], 'wb')) h = h5py.File(data_file[split], 'w') if known_num_boxes[split] is None: num_boxes = 0 for infile in infiles: print("reading tsv...%s" % infile) with open(infile, "r+") as tsv_in_file: reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES) for item in reader: item['num_boxes'] = int(item['num_boxes']) image_id = int(item['image_id'].split('_')[1]) if image_id in imgids: num_boxes += item['num_boxes'] else: num_boxes = known_num_boxes[split] print('num_boxes=%d' % num_boxes) img_features = h.create_dataset('image_features', (num_boxes, 196, feature_length), 'f') img_bb = h.create_dataset('image_bb', (num_boxes, 4), 'f') spatial_img_features = h.create_dataset('spatial_features', (num_boxes, 6), 'f') pos_boxes = h.create_dataset('pos_boxes', (len(imgids), 2), dtype='int32') counter = 0 num_boxes = 0 indices = {} for infile in infiles: unknown_ids = [] print("reading tsv...%s" % infile) with open(infile, "r+") as tsv_in_file: reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES) for item in reader: item['num_boxes'] = int(item['num_boxes']) item['boxes'] = bytes(item['boxes'], 'utf') item['features'] = bytes(item['features'], 'utf') image_id = int(item['image_id'].split('_')[1]) image_w = float(item['image_w']) image_h = float(item['image_h']) bboxes = np.frombuffer(base64.decodestring(item['boxes']), dtype=np.float64).reshape( (item['num_boxes'], -1)) box_width = bboxes[:, 2] - bboxes[:, 0] box_height = bboxes[:, 3] - bboxes[:, 1] scaled_width = box_width / image_w scaled_height = box_height / image_h scaled_x = bboxes[:, 0] / image_w scaled_y = bboxes[:, 1] / image_h box_width = box_width[..., np.newaxis] box_height = box_height[..., np.newaxis] scaled_width = scaled_width[..., np.newaxis] scaled_height = scaled_height[..., np.newaxis] scaled_x = scaled_x[..., np.newaxis] scaled_y = scaled_y[..., np.newaxis] spatial_features = np.concatenate( (scaled_x, scaled_y, scaled_x + scaled_width, scaled_y + scaled_height, scaled_width, scaled_height), axis=1) if image_id in imgids: imgids.remove(image_id) indices[image_id] = counter pos_boxes[counter, :] = np.array( [num_boxes, num_boxes + item['num_boxes']]) img_bb[num_boxes:num_boxes + item['num_boxes'], :] = bboxes img_features[counter, :, :] = np.frombuffer( base64.decodestring(item['features']), dtype=np.float32).reshape((196, -1)) spatial_img_features[ num_boxes:num_boxes + item['num_boxes'], :] = spatial_features counter += 1 num_boxes += 196 else: unknown_ids.append(image_id) print('%d unknown_ids...' % len(unknown_ids)) print('%d image_ids left...' % len(imgids)) if len(imgids) != 0: print('Warning: %s_image_ids is not empty' % split) cPickle.dump(indices, open(indices_file[split], 'wb')) h.close() print("done!")
val_ids_file = 'data/val_ids.pkl' feature_length = 2048 num_fixed_boxes = 36 train_path = os.path.join(config.data_path, 'train2014') val_path = os.path.join(config.data_path, 'val2014') if __name__ == '__main__': h_train = h5py.File(train_data_file, "w") h_val = h5py.File(val_data_file, "w") if os.path.exists(train_ids_file) and os.path.exists(val_ids_file): train_imgids = cPickle.load(open(train_ids_file)) val_imgids = cPickle.load(open(val_ids_file)) else: train_imgids = utils.load_imageid(train_path) val_imgids = utils.load_imageid(val_path) cPickle.dump(train_imgids, open(train_ids_file, 'wb')) cPickle.dump(val_imgids, open(val_ids_file, 'wb')) train_indices = {} val_indices = {} train_img_features = h_train.create_dataset( 'image_features', (len(train_imgids), num_fixed_boxes, feature_length), 'f') train_img_bb = h_train.create_dataset( 'image_bb', (len(train_imgids), num_fixed_boxes, 4), 'f') train_spatial_img_features = h_train.create_dataset( 'spatial_features', (len(train_imgids), num_fixed_boxes, 6), 'f')