Exemple #1
0
def image_feats_converter(filenames):


    h_train = h5py.File(filenames['train_data_file'], "w")
    h_val = h5py.File(filenames['val_data_file'], "w")


    if os.path.exists(filenames['train_ids_file']) and os.path.exists(filenames['val_ids_file']):
        print(filenames['train_ids_file'])
        print(filenames['val_ids_file'])
        train_imgids = cPickle.load(open(filenames['train_ids_file'],'rb'))
        val_imgids = cPickle.load(open(filenames['val_ids_file'],'rb'))
    else:
        train_imgids = utils.load_imageid('data/train2014')
        val_imgids = utils.load_imageid('data/val2014')
        cPickle.dump(train_imgids, open(filenames['train_ids_file'], 'wb'))
        cPickle.dump(val_imgids, open(filenames['val_ids_file'], 'wb'))

    train_indices = {}
    val_indices = {}

    train_img_features = h_train.create_dataset(
        'image_features', (len(train_imgids), num_fixed_boxes, feature_length), 'f')
    train_img_bb = h_train.create_dataset(
        'image_bb', (len(train_imgids), num_fixed_boxes, 4), 'f')
    train_spatial_img_features = h_train.create_dataset(
        'spatial_features', (len(train_imgids), num_fixed_boxes, 6), 'f')

    val_img_bb = h_val.create_dataset(
        'image_bb', (len(val_imgids), num_fixed_boxes, 4), 'f')
    val_img_features = h_val.create_dataset(
        'image_features', (len(val_imgids), num_fixed_boxes, feature_length), 'f')
    val_spatial_img_features = h_val.create_dataset(
        'spatial_features', (len(val_imgids), num_fixed_boxes, 6), 'f')

    train_counter = 0
    val_counter = 0

    print("reading tsv...")
    with open(filenames['infile'], "r") as tsv_in_file:
        reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES)
        for item in tqdm(reader):
            item['num_boxes'] = int(item['num_boxes'])
            image_id = int(item['image_id'])
            image_w = float(item['image_w'])
            image_h = float(item['image_h'])
            bboxes = np.frombuffer(
                base64.b64decode(item['boxes']),
                dtype=np.float32).reshape((item['num_boxes'], -1))

            box_width = bboxes[:, 2] - bboxes[:, 0]
            box_height = bboxes[:, 3] - bboxes[:, 1]
            scaled_width = box_width / image_w
            scaled_height = box_height / image_h
            scaled_x = bboxes[:, 0] / image_w
            scaled_y = bboxes[:, 1] / image_h

            box_width = box_width[..., np.newaxis]
            box_height = box_height[..., np.newaxis]
            scaled_width = scaled_width[..., np.newaxis]
            scaled_height = scaled_height[..., np.newaxis]
            scaled_x = scaled_x[..., np.newaxis]
            scaled_y = scaled_y[..., np.newaxis]

            spatial_features = np.concatenate(
                (scaled_x,
                 scaled_y,
                 scaled_x + scaled_width,
                 scaled_y + scaled_height,
                 scaled_width,
                 scaled_height),
                axis=1)

            if image_id in train_imgids:
                train_imgids.remove(image_id)
                train_indices[image_id] = train_counter
                train_img_bb[train_counter, :, :] = bboxes
                train_img_features[train_counter, :, :] = np.frombuffer(
                    base64.b64decode(item['features']),
                    dtype=np.float32).reshape((item['num_boxes'], -1))
                train_spatial_img_features[train_counter, :, :] = spatial_features
                train_counter += 1
            elif image_id in val_imgids:
                val_imgids.remove(image_id)
                val_indices[image_id] = val_counter
                val_img_bb[val_counter, :, :] = bboxes
                val_img_features[val_counter, :, :] = np.frombuffer(
                    base64.b64decode(item['features']),
                    dtype=np.float32).reshape((item['num_boxes'], -1))
                val_spatial_img_features[val_counter, :, :] = spatial_features
                val_counter += 1
            else:
                assert False, 'Unknown image id: %d' % image_id

    if len(train_imgids) != 0:
        print('Warning: train_image_ids is not empty')

    if len(val_imgids) != 0:
        print('Warning: val_image_ids is not empty')

    cPickle.dump(train_indices, open(filenames['train_indices_file'], 'wb'))
    cPickle.dump(val_indices, open(filenames['val_indices_file'], 'wb'))
    h_train.close()
    h_val.close()
    print("done!")
Exemple #2
0
val_indices_file = 'val36_imgid2idx.pkl'
train_ids_file = 'train_ids.pkl'
val_ids_file = 'val_ids.pkl'

feature_length = 2048
num_fixed_boxes = 36

if __name__ == '__main__':
    h_train = h5py.File(train_data_file, "w")
    h_val = h5py.File(val_data_file, "w")

    if os.path.exists(train_ids_file) and os.path.exists(val_ids_file):
        train_imgids = cPickle.load(open(train_ids_file))
        val_imgids = cPickle.load(open(val_ids_file))
    else:
        train_imgids = utils.load_imageid('../data/train2014')
        val_imgids = utils.load_imageid('../data/val2014')
        cPickle.dump(train_imgids, open(train_ids_file, 'wb'), protocol=2)
        cPickle.dump(val_imgids, open(val_ids_file, 'wb'), protocol=2)

    train_indices = {}
    val_indices = {}

    train_img_features = h_train.create_dataset(
        'image_features', (len(train_imgids), num_fixed_boxes, feature_length),
        'f')
    train_img_bb = h_train.create_dataset(
        'image_bb', (len(train_imgids), num_fixed_boxes, 4), 'f')
    train_spatial_img_features = h_train.create_dataset(
        'spatial_features', (len(train_imgids), num_fixed_boxes, 6), 'f')
train_ids_file = 'data/train_ids.pkl'
val_ids_file = 'data/val_ids.pkl'

feature_length = 2048
num_fixed_boxes = 36


if __name__ == '__main__':
    h_train = h5py.File(train_data_file, "w")
    h_val = h5py.File(val_data_file, "w")

    if os.path.exists(train_ids_file) and os.path.exists(val_ids_file):
        train_imgids = cPickle.load(open(train_ids_file))
        val_imgids = cPickle.load(open(val_ids_file))
    else:
        train_imgids = utils.load_imageid('data/train2014')
        val_imgids = utils.load_imageid('data/val2014')
        cPickle.dump(train_imgids, open(train_ids_file, 'wb'))
        cPickle.dump(val_imgids, open(val_ids_file, 'wb'))

    train_indices = {}
    val_indices = {}

    train_img_features = h_train.create_dataset(
        'image_features', (len(train_imgids), num_fixed_boxes, feature_length), 'f')
    train_img_bb = h_train.create_dataset(
        'image_bb', (len(train_imgids), num_fixed_boxes, 4), 'f')
    train_spatial_img_features = h_train.create_dataset(
        'spatial_features', (len(train_imgids), num_fixed_boxes, 6), 'f')

    val_img_bb = h_val.create_dataset(
def extract(split, infiles, task='scenario_data'):

    FIELDNAMES = ["img_id", "img_h", "img_w", "objects_id", "objects_conf",
                  "attrs_id", "attrs_conf", "num_boxes", "boxes", "features"]
        
    data_file = {
        'infer': f'data/{task}/infer.hdf5',
    }
    indices_file = {
        'infer': f'data/{task}/infer_imgid2idx.pkl'
    }
    ids_file = {
        'infer': f'data/{task}/infer_ids.pkl'
    }
    path_imgs = {
        'infer': f'data/{task}/images'
    }
    known_num_boxes = {'infer': None}

    feature_length = 2048
    min_fixed_boxes = 10
    max_fixed_boxes = 100

    if os.path.exists(ids_file[split]):
        imgids = cPickle.load(open(ids_file[split], 'rb'))
    else:
        imgids = utils.load_imageid(path_imgs[split])
        cPickle.dump(imgids, open(ids_file[split], 'wb'))

    h = h5py.File(data_file[split], 'w')

    if known_num_boxes[split] is None:
        num_boxes = 0
        for infile in infiles:
            print("reading tsv...%s" % infile)
            with open(infile, "r+") as tsv_in_file:
                reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES)
                for item in reader:
                    item['num_boxes'] = int(item['num_boxes'])
                    image_id = item['img_id']
                    if image_id in imgids:
                        num_boxes += item['num_boxes']
    else:
        num_boxes = known_num_boxes[split]

    print('num_boxes=%d' % num_boxes)

    img_features = h.create_dataset(
        'image_features', (num_boxes, feature_length), 'f')
    img_bb = h.create_dataset(
        'image_bb', (num_boxes, 4), 'f')
    spatial_img_features = h.create_dataset(
        'spatial_features', (num_boxes, 6), 'f')
    pos_boxes = h.create_dataset(
        'pos_boxes', (len(imgids), 2), dtype='int32')

    counter = 0
    num_boxes = 0
    indices = {}

    for infile in infiles:
        unknown_ids = []
        print("reading tsv...%s" % infile)
        with open(infile, "r+") as tsv_in_file:
            reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES)
            for item in reader:
                item['num_boxes'] = int(item['num_boxes'])
                item['boxes'] = bytes(item['boxes'], 'utf')
                item['features'] = bytes(item['features'], 'utf')
                image_id = item['img_id']
                image_w = float(item['img_w'])
                image_h = float(item['img_h'])
                bboxes = np.frombuffer(
                    base64.decodestring(item['boxes']),
                    dtype=np.float32).reshape((item['num_boxes'], -1))

                box_width = bboxes[:, 2] - bboxes[:, 0]
                box_height = bboxes[:, 3] - bboxes[:, 1]
                scaled_width = box_width / image_w
                scaled_height = box_height / image_h
                scaled_x = bboxes[:, 0] / image_w
                scaled_y = bboxes[:, 1] / image_h

                box_width = box_width[..., np.newaxis]
                box_height = box_height[..., np.newaxis]
                scaled_width = scaled_width[..., np.newaxis]
                scaled_height = scaled_height[..., np.newaxis]
                scaled_x = scaled_x[..., np.newaxis]
                scaled_y = scaled_y[..., np.newaxis]

                spatial_features = np.concatenate(
                    (scaled_x,
                     scaled_y,
                     scaled_x + scaled_width,
                     scaled_y + scaled_height,
                     scaled_width,
                     scaled_height),
                    axis=1)

                if image_id in imgids:
                    imgids.remove(image_id)
                    indices[image_id] = counter
                    pos_boxes[counter,:] = np.array([num_boxes, num_boxes + item['num_boxes']])
                    img_bb[num_boxes:num_boxes+item['num_boxes'], :] = bboxes
                    img_features[num_boxes:num_boxes+item['num_boxes'], :] = np.frombuffer(
                        base64.decodestring(item['features']),
                        dtype=np.float32).reshape((item['num_boxes'], -1))
                    spatial_img_features[num_boxes:num_boxes+item['num_boxes'], :] = spatial_features
                    counter += 1
                    num_boxes += item['num_boxes']
                else:
                    unknown_ids.append(image_id)

        print('%d unknown_ids...' % len(unknown_ids))
        print('%d image_ids left...' % len(imgids))

    if len(imgids) != 0:
        print('Warning: %s_image_ids is not empty' % split)

    cPickle.dump(indices, open(indices_file[split], 'wb'))
    h.close()
    print("done!")
infile = os.path.join(
    addr_BU_path, 'test2014_36/test2014_resnet101_faster_rcnn_genome_36.tsv')
test_data_file = os.path.join(addr_hdf5path, 'test201436.hdf5')
test_indices_file = os.path.join(addr_pkl_path, 'test201436_imgid2idx.pkl')
test_ids_file = os.path.join(addr_pkl_path, 'test201436_ids.pkl')

feature_length = 2048
num_fixed_boxes = 36

if __name__ == '__main__':
    h_test = h5py.File(test_data_file, "w")

    if os.path.exists(test_ids_file):
        test_imgids = pickle.load(open(test_ids_file, 'rb'))
    else:
        test_imgids = utils.load_imageid(addr_test_imgs)
        pickle.dump(test_imgids, open(test_ids_file, 'wb'), protocol=2)

    test_indices = {}

    test_img_features = h_test.create_dataset(
        'image_features', (len(test_imgids), num_fixed_boxes, feature_length),
        'f')
    test_img_bb = h_test.create_dataset('image_bb',
                                        (len(test_imgids), num_fixed_boxes, 4),
                                        'f')
    test_spatial_img_features = h_test.create_dataset(
        'spatial_features', (len(test_imgids), num_fixed_boxes, 6), 'f')

    test_counter = 0
    unknown_ids = []
Exemple #6
0
]
infile = 'data/test2015_36/test2015_resnet101_faster_rcnn_genome_36.tsv'
test_data_file = 'data/test36.hdf5'
test_indices_file = 'data/test36_imgid2idx.pkl'
test_ids_file = 'data/test_ids.pkl'

feature_length = 2048
num_fixed_boxes = 36

if __name__ == '__main__':
    h_test = h5py.File(test_data_file, "w")

    if os.path.exists(test_ids_file):
        test_imgids = cPickle.load(open(test_ids_file))
    else:
        test_imgids = utils.load_imageid('data/test2015')
        cPickle.dump(test_imgids, open(test_ids_file, 'wb'))

    test_indices = {}

    test_img_features = h_test.create_dataset(
        'image_features', (len(test_imgids), num_fixed_boxes, feature_length),
        'f')
    test_img_bb = h_test.create_dataset('image_bb',
                                        (len(test_imgids), num_fixed_boxes, 4),
                                        'f')
    test_spatial_img_features = h_test.create_dataset(
        'spatial_features', (len(test_imgids), num_fixed_boxes, 6), 'f')

    test_counter = 0
Exemple #7
0
infile = 'data/%s_36/%s_resnet101_faster_rcnn_genome_36.tsv' % (target, target)
data_file = 'data/%s36.hdf5' % target
indices_file = 'data/%s36_imgid2idx.pkl' % target
ids_file = 'data/%s_ids.pkl' % target

feature_length = 2048
num_fixed_boxes = 36


if __name__ == '__main__':
    h = h5py.File(data_file, "w")

    if os.path.exists(ids_file):
        imgids = cPickle.load(open(ids_file, 'rb'))
    else:
        imgids = utils.load_imageid('data/%s' % target)
        cPickle.dump(imgids, open(ids_file, 'wb'))

    indices = {}

    img_bb = h.create_dataset(
        'image_bb', (len(imgids), num_fixed_boxes, 4), 'f')
    img_features = h.create_dataset(
        'image_features', (len(imgids), num_fixed_boxes, feature_length), 'f')
    spatial_img_features = h.create_dataset(
        'spatial_features', (len(imgids), num_fixed_boxes, 6), 'f')

    counter = 0

    print("reading tsv...")
    with open(infile, "r+") as tsv_in_file:
feature_length = 2048
num_fixed_boxes = 36


if __name__ == '__main__':
    # h_train = h5py.File(train_data_file, "w")
    # h_val = h5py.File(val_data_file, "w")
    h_test = h5py.File(test_data_file, "w")

    if os.path.exists(train_ids_file) and os.path.exists(val_ids_file) and os.path.exists(test_ids_file):
        train_imgids = cPickle.load(open(train_ids_file))
        val_imgids = cPickle.load(open(val_ids_file))
        test_imgids = cPickle.load(open(test_ids_file))
    else:
        train_imgids = utils.load_imageid('data/images/train2014')
        val_imgids = utils.load_imageid('data/images/val2014')
        # test_imgids = []
        test_imgids = utils.load_imageid('data/images/test2015')
        cPickle.dump(train_imgids, open(train_ids_file, 'wb'))
        cPickle.dump(val_imgids, open(val_ids_file, 'wb'))
        cPickle.dump(test_imgids, open(test_ids_file, 'wb'))

    train_indices = {}
    val_indices = {}
    test_indices = {}

    print('> len(test_imgids): %d' % len(test_imgids))

    '''
    train_img_features = h_train.create_dataset(
if __name__ == '__main__':
    h_train = h5py.File(train_data_file, "w")
    h_val = h5py.File(val_data_file, "w")

    if os.path.exists(train_ids_file) and os.path.exists(val_ids_file):
        train_imgids = cPickle.load(open(train_ids_file))
        val_imgids = cPickle.load(open(val_ids_file))
    else:

        base_imgs = '../DATA_imgs/'
        folders = [
            'scene_img_abstract_v002_train2015/',
            'scene_img_abstract_v002_val2015/'
        ]

        train_imgids = utils.load_imageid(base_imgs + folders[0])  #set of ids
        val_imgids = utils.load_imageid(base_imgs + folders[1])  #set of ids

        cPickle.dump(train_imgids, open(train_ids_file, 'wb'))
        cPickle.dump(val_imgids, open(val_ids_file, 'wb'))

    train_indices = {}
    val_indices = {}

    train_img_features = h_train.create_dataset(
        'image_features', (len(train_imgids), num_fixed_boxes, feature_length),
        'f')
    train_img_bb = h_train.create_dataset(
        'image_bb', (len(train_imgids), num_fixed_boxes, 4), 'f')
    train_spatial_img_features = h_train.create_dataset(
        'spatial_features', (len(train_imgids), num_fixed_boxes, 6), 'f')
def extract(split, infiles):
    FIELDNAMES = [
        'image_id', 'image_w', 'image_h', 'num_boxes', 'boxes', 'features'
    ]
    data_file = {
        'train': 'data/train.hdf5',
        'val': 'data/val.hdf5',
        'test': 'data/test2015.hdf5'
    }
    indices_file = {
        'train': 'data/train_imgid2idx.pkl',
        'val': 'data/val_imgid2idx.pkl',
        'test': 'data/test2015_imgid2idx.pkl'
    }
    ids_file = {
        'train': 'data/train_ids.pkl',
        'val': 'data/val_ids.pkl',
        'test': 'data/test2015_ids.pkl'
    }
    path_imgs = {
        'train': '/media/data-aioz/VQA/Visual7W/train',
        'val': '/media/data-aioz/VQA/Visual7W/val',
        'test': '/media/data-aioz/VQA/Visual7W/test'
    }
    # known_num_boxes = {
    #     'train': 2643089,
    #     'val': 1281164,
    #     'test': 2566887,
    # }
    known_num_boxes = {'train': None, 'val': None, 'test': None}
    feature_length = 2048
    min_fixed_boxes = 10
    max_fixed_boxes = 100

    if os.path.exists(ids_file[split]):
        imgids = cPickle.load(open(ids_file[split], 'rb'))
    else:
        imgids = utils.load_imageid(path_imgs[split])
        cPickle.dump(imgids, open(ids_file[split], 'wb'))

    h = h5py.File(data_file[split], 'w')

    if known_num_boxes[split] is None:
        num_boxes = 0
        for infile in infiles:
            print("reading tsv...%s" % infile)
            with open(infile, "r+") as tsv_in_file:
                reader = csv.DictReader(tsv_in_file,
                                        delimiter='\t',
                                        fieldnames=FIELDNAMES)
                for item in reader:
                    item['num_boxes'] = int(item['num_boxes'])
                    image_id = int(item['image_id'].split('_')[1])
                    if image_id in imgids:
                        num_boxes += item['num_boxes']
    else:
        num_boxes = known_num_boxes[split]

    print('num_boxes=%d' % num_boxes)

    img_features = h.create_dataset('image_features',
                                    (num_boxes, 196, feature_length), 'f')
    img_bb = h.create_dataset('image_bb', (num_boxes, 4), 'f')
    spatial_img_features = h.create_dataset('spatial_features', (num_boxes, 6),
                                            'f')
    pos_boxes = h.create_dataset('pos_boxes', (len(imgids), 2), dtype='int32')

    counter = 0
    num_boxes = 0
    indices = {}

    for infile in infiles:
        unknown_ids = []
        print("reading tsv...%s" % infile)
        with open(infile, "r+") as tsv_in_file:
            reader = csv.DictReader(tsv_in_file,
                                    delimiter='\t',
                                    fieldnames=FIELDNAMES)
            for item in reader:
                item['num_boxes'] = int(item['num_boxes'])
                item['boxes'] = bytes(item['boxes'], 'utf')
                item['features'] = bytes(item['features'], 'utf')
                image_id = int(item['image_id'].split('_')[1])
                image_w = float(item['image_w'])
                image_h = float(item['image_h'])
                bboxes = np.frombuffer(base64.decodestring(item['boxes']),
                                       dtype=np.float64).reshape(
                                           (item['num_boxes'], -1))

                box_width = bboxes[:, 2] - bboxes[:, 0]
                box_height = bboxes[:, 3] - bboxes[:, 1]
                scaled_width = box_width / image_w
                scaled_height = box_height / image_h
                scaled_x = bboxes[:, 0] / image_w
                scaled_y = bboxes[:, 1] / image_h

                box_width = box_width[..., np.newaxis]
                box_height = box_height[..., np.newaxis]
                scaled_width = scaled_width[..., np.newaxis]
                scaled_height = scaled_height[..., np.newaxis]
                scaled_x = scaled_x[..., np.newaxis]
                scaled_y = scaled_y[..., np.newaxis]

                spatial_features = np.concatenate(
                    (scaled_x, scaled_y, scaled_x + scaled_width,
                     scaled_y + scaled_height, scaled_width, scaled_height),
                    axis=1)

                if image_id in imgids:
                    imgids.remove(image_id)
                    indices[image_id] = counter
                    pos_boxes[counter, :] = np.array(
                        [num_boxes, num_boxes + item['num_boxes']])
                    img_bb[num_boxes:num_boxes + item['num_boxes'], :] = bboxes
                    img_features[counter, :, :] = np.frombuffer(
                        base64.decodestring(item['features']),
                        dtype=np.float32).reshape((196, -1))
                    spatial_img_features[
                        num_boxes:num_boxes +
                        item['num_boxes'], :] = spatial_features
                    counter += 1
                    num_boxes += 196
                else:
                    unknown_ids.append(image_id)

        print('%d unknown_ids...' % len(unknown_ids))
        print('%d image_ids left...' % len(imgids))

    if len(imgids) != 0:
        print('Warning: %s_image_ids is not empty' % split)

    cPickle.dump(indices, open(indices_file[split], 'wb'))
    h.close()
    print("done!")
Exemple #11
0
val_ids_file = 'data/val_ids.pkl'

feature_length = 2048
num_fixed_boxes = 36
train_path = os.path.join(config.data_path, 'train2014')
val_path = os.path.join(config.data_path, 'val2014')

if __name__ == '__main__':
    h_train = h5py.File(train_data_file, "w")
    h_val = h5py.File(val_data_file, "w")

    if os.path.exists(train_ids_file) and os.path.exists(val_ids_file):
        train_imgids = cPickle.load(open(train_ids_file))
        val_imgids = cPickle.load(open(val_ids_file))
    else:
        train_imgids = utils.load_imageid(train_path)
        val_imgids = utils.load_imageid(val_path)
        cPickle.dump(train_imgids, open(train_ids_file, 'wb'))
        cPickle.dump(val_imgids, open(val_ids_file, 'wb'))

    train_indices = {}
    val_indices = {}

    train_img_features = h_train.create_dataset(
        'image_features', (len(train_imgids), num_fixed_boxes, feature_length),
        'f')
    train_img_bb = h_train.create_dataset(
        'image_bb', (len(train_imgids), num_fixed_boxes, 4), 'f')
    train_spatial_img_features = h_train.create_dataset(
        'spatial_features', (len(train_imgids), num_fixed_boxes, 6), 'f')