def gen_label_weigths(target):
    # label maps
    if target == 'object':
        labelnet = objnet
    elif target == 'predicate':
        labelnet = prenet
    else:
        print('Target is wrong!')
        exit(-1)

    raw2path = labelnet.raw2path()
    index2label = labelnet.index2label()
    label2index = labelnet.label2index()

    # org data
    dataset_config = DatasetConfig('vg')
    prepare_root = dataset_config.extra_config[target].prepare_root
    box_label_path = os.path.join(prepare_root, 'train_box_label.bin')
    box_labels = pickle.load(open(box_label_path, 'rb'))

    # weight save path
    weights_save_path = dataset_config.extra_config[target].config[
        'raw2weight_path']
    gen_weights1(box_labels, raw2path, index2label, label2index,
                 weights_save_path, 'raw')
Beispiel #2
0
def reformat_anno():
    dataset_config = DatasetConfig('vrd')
    org_anno_root = dataset_config.data_config['dirty_anno_root']
    dst_anno_root = dataset_config.data_config['clean_anno_root']

    # load vrd label list
    obj_label_list_path = os.path.join(dataset_config.dataset_root,
                                       'object_labels.txt')
    obj_ind2label = load_list(obj_label_list_path)

    pre_label_list_path = os.path.join(dataset_config.dataset_root,
                                       'predicate_labels.txt')
    pre_ind2label = load_list(pre_label_list_path)

    # all dirty annotation files
    anno_list = os.listdir(org_anno_root)
    for i, anno_name in enumerate(anno_list):
        print('processing [%d/%d]' % (len(anno_list), i + 1))

        org_anno_path = os.path.join(org_anno_root, anno_name)
        org_anno = json.load(open(org_anno_path, 'r'))

        # for removing redundant objects from predicate
        obj_label_boxes = []

        # clean anno collection
        rlts = []
        for rlt in org_anno:
            # convert predicate anno
            new_rlt = rlt_reformat(rlt, obj_ind2label, pre_ind2label)
            rlts.append(new_rlt)

            obj_sbj = [rlt['object'], rlt['subject']]
            for obj in obj_sbj:
                # left top, right bottom
                # ymin, ymax, xmin, xmax, category
                label_box = obj['bbox']
                label_box.append(obj['category'])
                obj_label_boxes.append(label_box)

        objs = []
        # remove redundant objects
        if len(obj_label_boxes) > 0:
            obj_label_boxes = np.array(obj_label_boxes)
            unique_label_boxes = np.unique(obj_label_boxes, axis=0)
            for label_box in unique_label_boxes:
                obj = dict()
                obj['name'] = obj_ind2label[int(label_box[4])].strip()
                obj['ymin'] = int(label_box[0])
                obj['ymax'] = int(label_box[1])
                obj['xmin'] = int(label_box[2])
                obj['xmax'] = int(label_box[3])
                objs.append(obj)

        new_anno = dict()
        new_anno['objects'] = objs
        new_anno['relations'] = rlts
        save_path = os.path.join(dst_anno_root, anno_name)
        json.dump(new_anno, open(save_path, 'w'))
def filter_anno():
    vg_config = DatasetConfig('vg')
    dirty_anno_root = vg_config.data_config['dirty_anno_root']
    clean_anno_root = vg_config.data_config['clean_anno_root']

    anno_list = os.listdir(dirty_anno_root)
    anno_num = len(anno_list)

    obj_raw_labels = set(objnet.get_raw_labels())
    pre_raw_labels = set(prenet.get_raw_labels())

    for i in range(anno_num):
        print('filtering [%d/%d]' % (anno_num, i + 1))
        anno_name = anno_list[i]

        # load dirty json anno
        dirty_anno_path = os.path.join(dirty_anno_root, anno_name)
        dirty_anno = json.load(open(dirty_anno_path, 'r'))

        # keep objects in label set
        clean_objects = []
        dirty_objects = dirty_anno['objects']
        for d_obj in dirty_objects:
            if d_obj['name'] in obj_raw_labels:
                clean_objects.append(d_obj)

        # keep relationships whose sbj,obj,pre are in label set
        clean_relations = []
        dirty_relations = dirty_anno['relationships']
        for d_rlt in dirty_relations:
            keep_rlt = True
            r_objs = [d_rlt['subject'], d_rlt['object']]

            if d_rlt['predicate']['name'] not in pre_raw_labels:
                keep_rlt = False
                continue

            for r_obj in r_objs:
                if r_obj['name'] not in obj_raw_labels:
                    keep_rlt = False
                    break

            if keep_rlt:
                clean_relations.append(d_rlt)

        if len(clean_objects) == 0 or len(clean_relations) == 0:
            continue

        # save cleaned json anno
        clean_anno = dict()
        clean_anno['objects'] = clean_objects
        clean_anno['relationships'] = clean_relations
        clean_anno['image_info'] = dirty_anno['image_info']

        clean_anno_path = os.path.join(clean_anno_root, anno_name)
        json.dump(clean_anno, open(clean_anno_path, 'w'))

    clean_annos = os.listdir(clean_anno_root)
    print('>>> filter_anno: image num = %d' % (len(clean_annos)))
def split_anno_pkg():
    vg_config = DatasetConfig('vg')
    org_anno_root = vg_config.data_config['raw_anno_root']
    image_data_path = os.path.join(org_anno_root, 'image_data.json')
    relationship_path = os.path.join(org_anno_root, 'relationships.json')
    output_json_root = vg_config.data_config['dirty_anno_root']

    split_json(image_data_path, output_json_root, u'image_info', False)
    split_json(relationship_path, output_json_root, u'relationships', True)
Beispiel #5
0
def reformat_anno():
    vg_config = DatasetConfig('vg')
    dirty_anno_root = vg_config.data_config['dirty_anno_root']
    clean_anno_root = vg_config.data_config['dirty_anno_root']
    anno_list = os.listdir(dirty_anno_root)
    anno_list = sorted(anno_list)
    anno_sum = len(anno_list)
    for i in range(0, anno_sum):
        print('processing wash_anno [%d/%d]' % (anno_sum, i + 1))
        dirty_anno_path = os.path.join(dirty_anno_root, anno_list[i])
        clean_anno_path = os.path.join(clean_anno_root, anno_list[i])
        wash_anno(dirty_anno_path, clean_anno_path)
def vg2pascal():
    vg_config = DatasetConfig('vg')
    json_anno_root = vg_config.data_config['clean_anno_root']
    pascal_anno_root = vg_config.pascal_format['Annotations']
    json_annos = os.listdir(json_anno_root)
    for i in range(len(json_annos)):
        print('processing vg2pascal: [%d/%d]' % (len(json_annos), i + 1))
        json_anno_path = os.path.join(json_anno_root, json_annos[i])
        json_anno = json.load(open(json_anno_path, 'r'))
        mid_anno = convert_anno(json_anno)
        pascal_anno_path = os.path.join(pascal_anno_root, json_annos[i][:-5]+'.xml')
        output_pascal_format(mid_anno, pascal_anno_path)
Beispiel #7
0
def gen_cnn_feat():
    # load cnn
    prototxt = global_config.fast_prototxt_path
    caffemodel = global_config.fast_caffemodel_path
    caffe.set_mode_gpu()
    caffe.set_device(0)
    net = caffe.Net(prototxt, caffemodel, caffe.TEST)

    # prepare
    dataset_config = DatasetConfig('vrd')
    target = 'object'
    labelnet = objnet

    # extract feature
    anno_root = dataset_config.data_config['clean_anno_root']
    img_root = dataset_config.data_config['img_root']
    label_save_root = dataset_config.extra_config[target].label_root
    prepare_root = dataset_config.extra_config[target].prepare_root
    fc7_save_root = dataset_config.extra_config[target].fc7_root

    datasets = ['train', 'test']
    for d in datasets:
        # prepare labels and boxes
        label_save_path = os.path.join(label_save_root, d + '.txt')
        anno_list = os.path.join(dataset_config.pascal_format['ImageSets'],
                                 d + '.txt')
        box_label_path = os.path.join(prepare_root, d + '_box_label.bin')
        prepare_object_boxes_and_labels(anno_root, anno_list, box_label_path)

        # extract cnn feature
        box_label = pickle.load(open(box_label_path, 'rb'))
        label2index = labelnet.label2index()
        raw2path = labelnet.raw2path()

        # cal sample ratio
        sample_ratio = cal_sample_ratio(objnet, box_label)

        extract_fc7_features(net, box_label, img_root, anno_list,
                             fc7_save_root, label_save_path, raw2path,
                             sample_ratio, d)

        if d == 'train':
            ind2weight_path = dataset_config.extra_config['object'].config[
                'ind2weight_path']
            pickle.dump(sample_ratio, open(ind2weight_path, 'wb'))

    # split a small val list for quick evaluation
    small_val_path = os.path.join(label_save_root, 'val.txt')
    val_path = os.path.join(label_save_root, 'test.txt')
    split_a_small_val(val_path, 1000, small_val_path)
def split_anno_pkg():
    data_config = DatasetConfig('vrd')
    # ====== split annotation package ======
    datasets = ['train', 'test']
    # contain image lists
    dataset_lists = {'train': [], 'test': []}
    list_root = data_config.pascal_format['ImageSets']

    # all images and annotations are saved together
    image_root = data_config.pascal_format['JPEGImages']
    splited_anno_root = data_config.data_config['dirty_anno_root']

    for d in datasets:
        anno_package_path = os.path.join(data_config.dataset_root,
                                         'json_dataset',
                                         'annotations_' + d + '.json')
        anno_package = json.load(open(anno_package_path))

        data_list = dataset_lists[d]
        d_image_root = os.path.join(data_config.dataset_root, 'sg_dataset',
                                    'sg_' + d + '_images')

        for i, img_name in enumerate(anno_package.keys()):
            print('processing [%d/%d]' % (len(anno_package), i + 1))
            anno = anno_package[img_name]

            # copy image
            # only jpeg image
            img_name = img_name.split('.')[0] + '.jpg'
            org_img_path = os.path.join(d_image_root, img_name)
            if not os.path.exists(org_img_path):
                continue
            dst_img_root = os.path.join(image_root)
            shutil.copy(org_img_path, dst_img_root)

            # record image name in list
            data_list.append(img_name.split('.')[0] + '\n')

            # save splited annotation
            anno_name = img_name.split('.')[0] + '.json'
            anno_save_path = os.path.join(splited_anno_root, anno_name)
            json.dump(anno, open(anno_save_path, 'w'))

        # save image list
        list_file_path = os.path.join(list_root, d + '.txt')
        list_file = open(list_file_path, 'w')
        list_file.writelines(data_list)
        list_file.close()
Beispiel #9
0
def gen_cnn_feat():
    dataset = 'vrd'
    target = 'object'

    # load cnn
    net = load_detector(dataset)

    # prepare
    dataset_config = DatasetConfig(dataset)
    labelnet = objnet

    # extract feature
    anno_root = dataset_config.data_config['clean_anno_root']
    img_root = dataset_config.data_config['img_root']
    label_save_root = dataset_config.extra_config[target].label_root
    prepare_root = dataset_config.extra_config[target].prepare_root
    fc7_save_root = dataset_config.extra_config[target].fc7_root

    datasets = ['train', 'test']
    for d in datasets:
        # prepare labels and boxes
        label_save_path = os.path.join(label_save_root, d + '.txt')
        anno_list = os.path.join(dataset_config.pascal_format['ImageSets'],
                                 d + '.txt')
        box_label_path = os.path.join(prepare_root, d + '_box_label.bin')
        prepare_object_boxes_and_labels(anno_root, anno_list, box_label_path)

        # extract cnn feature
        box_label = pickle.load(open(box_label_path, 'rb'))
        label2index = labelnet.label2index()
        raw2wn = labelnet.raw2wn()
        raw2path = labelnet.raw2path()

        # cal sample ratio
        sample_ratio = cal_sample_ratio(label2index, raw2path, box_label)

        extract_fc7_features(net, box_label, img_root, anno_list,
                             fc7_save_root, label_save_path, label2index,
                             raw2wn, raw2path, sample_ratio, d)

    # split a small val list for quick evaluation
    small_val_path = os.path.join(label_save_root, 'val.txt')
    val_path = os.path.join(label_save_root, 'test.txt')
    split_a_small_val(val_path, 1000, small_val_path)
def gen_cnn_feat():
    # load cnn
    prototxt = global_config.fast_prototxt_path
    caffemodel = global_config.fast_caffemodel_path
    datasets = ['train', 'test', 'val']
    caffe.set_mode_gpu()
    caffe.set_device(0)
    net = caffe.Net(prototxt, caffemodel, caffe.TEST)

    # prepare
    dataset_config = DatasetConfig('vg')
    target = 'object'
    labelnet = objnet

    # extract feature
    anno_root = dataset_config.data_config['clean_anno_root']
    img_root = dataset_config.data_config['img_root']
    label_save_root = dataset_config.extra_config[target].label_root
    prepare_root = dataset_config.extra_config[target].prepare_root
    fc7_save_root = dataset_config.extra_config[target].fc7_root
    for d in datasets:
        # prepare labels and boxes
        label_save_path = os.path.join(label_save_root, d + '.txt')
        anno_list = os.path.join(dataset_config.pascal_format['ImageSets'],
                                 d + '.txt')
        box_label_path = os.path.join(prepare_root, d + '_box_label.bin')
        prepare_object_boxes_and_labels(anno_root, anno_list, box_label_path)

        # extract cnn feature
        box_label = pickle.load(open(box_label_path, 'rb'))
        label2index = labelnet.label2index()
        raw2path = labelnet.raw2path()

        # cal sample ratio
        sample_ratio = cal_sample_ratio(label2index, raw2path, box_label)

        extract_fc7_features(net, box_label, img_root, anno_list,
                             fc7_save_root, label_save_path, raw2path,
                             sample_ratio, d)
Beispiel #11
0
def split_dataset():
    vg_config = DatasetConfig('vg')
    anno_root = vg_config.data_config['clean_anno_root']
    anno_list = os.listdir(anno_root)
    anno_sum = len(anno_list)
    print('>>> Split dataset: image num = %d' % anno_sum)
    # train : test = 4 : 1
    # test_capacity = anno_sum / 5

    test_capacity = 5000
    val_capacity = 400
    train_capacity = anno_sum - val_capacity - test_capacity

    # random.shuffle(anno_list)
    split_list = {
        'trainval':
        anno_list[:train_capacity + val_capacity],
        'train':
        anno_list[:train_capacity],
        'val':
        anno_list[train_capacity:train_capacity + val_capacity],
        'test':
        anno_list[train_capacity + val_capacity:train_capacity + val_capacity +
                  test_capacity],
    }

    # save split list
    split_list_root = vg_config.pascal_format['ImageSets']
    for d in split_list:
        image_id_list = []
        lines = split_list[d]

        for l in lines:
            image_id_list.append(l.split('.')[0] + '\n')
        list_file_path = os.path.join(split_list_root, d + '.txt')

        with open(list_file_path, 'w') as list_file:
            list_file.writelines(image_id_list)
import os
from open_relation import global_config
from open_relation.dataset.dataset_config import DatasetConfig

log_root = 'open_relation/log'

vg_dataset_config = DatasetConfig('vg')

vg_obj_hyper_params = {
    'visual_d':
    4096,
    'hidden_d':
    4096,
    'embedding_d':
    600,
    'epoch':
    20,
    'batch_size':
    64,
    'negative_label_num':
    2450,
    'eval_freq':
    5000,
    'print_freq':
    10,
    'lr':
    0.01,
    'visual_feature_root':
    vg_dataset_config.extra_config['object'].fc7_root,
    'list_root':
    vg_dataset_config.extra_config['object'].label_root,
Beispiel #13
0
        for hypo in hypos:
            print('%s -> %s' % (hypo.name(), hyper.name()))
            hypernyms.append([hypo.index(), hyper.index()])
            nodes.insert(0, hypo)

    # save hypernym dataset
    hypernyms = np.array(hypernyms)
    import h5py
    f = h5py.File(hypernym_save_path, 'w')
    f.create_dataset('hypernyms', data=hypernyms)
    f.close()


if __name__ == '__main__':

    dataset = 'vrd'
    data_config = DatasetConfig(dataset)

    if dataset == 'vrd':
        from open_relation.dataset.vrd.label_hier.obj_hier import objnet
    else:
        from open_relation.dataset.vg.label_hier.obj_hier import objnet

    label2index = objnet.label2index()

    hypernym_save_path = os.path.join(global_config.project_root,
                                      'open_relation', 'label_embedding',
                                      'object', dataset + '_dataset',
                                      'wordnet_with_' + dataset + '.h5')
    generate_direct_hypernyms(objnet, hypernym_save_path)
import os
import json
import matplotlib.pyplot as plt
from open_relation.dataset.dataset_config import DatasetConfig


vg_config = DatasetConfig('vg')


def count():
    # counter
    obj_counter = dict()
    pre_counter = dict()
    obj2wn = dict()
    pre2wn = dict()

    # counting
    clean_anno_root = vg_config.data_config['dirty_anno_root']
    anno_list = os.listdir(clean_anno_root)
    anno_num = len(anno_list)
    for i, anno_name in enumerate(anno_list):
        print('counting [%d/%d]' % (anno_num, i+1))
        anno_path = os.path.join(clean_anno_root, anno_name)
        anno = json.load(open(anno_path, 'r'))
        objs = anno['objects']
        for obj in objs:
            synsets = set(obj['synsets'])
            name = obj['name']
            if name in obj_counter:
                obj_counter[name] += 1
            else:
Beispiel #15
0
            obj_hyper_inds = objnet.get_node_by_index(
                raw_rlt[2]).trans_hyper_inds()
            obj_sample_probs = equal_interval_prob(len(obj_hyper_inds))
            obj_samples = np.random.choice(obj_hyper_inds,
                                           obj_sample_num,
                                           p=obj_sample_probs)

            # extend hyper object
            for i in range(obj_sample_num):
                # hyper subject, hyper_pre, hyper object, raw pre
                new_rlts.append(
                    [sbj_samples[i], p_ind, obj_samples[i], pre_ind])

    new_rlts = np.array(new_rlts)
    np.save(rlt_save_path, new_rlts)
    return new_rlts


if __name__ == '__main__':

    config = DatasetConfig('vrd')
    anno_root = config.data_config['clean_anno_root']
    split = ['train', 'test']
    for d in split:
        list_path = os.path.join(config.pascal_format['ImageSets'], d + '.txt')
        rlt_save_path = data_config[d]['raw_rlt_path']
        raw_rlts = collect_raw_rlts(anno_root, list_path, rlt_save_path)
        print('raw relationship tuple num: %d' % len(raw_rlts))
        rlt_save_path = data_config[d]['ext_rlt_path']
        ext_rlts = extend_rlts(raw_rlts, rlt_save_path)
        print('extended relationship tuple num: %d' % len(ext_rlts))
import pickle
import h5py
import numpy as np
from nltk.corpus import wordnet as wn
from open_relation.dataset.dataset_config import DatasetConfig


dataset_name = 'vrd'
target = 'object'

data_config = DatasetConfig(dataset_name)

if dataset_name == 'vrd' and target == 'object':
    from open_relation.dataset.vrd.label_hier.obj_hier import objnet as classnet
elif dataset_name == 'vrd' and target == 'predicate':
    from open_relation.dataset.vrd.label_hier.pre_hier import prenet as classnet
elif dataset_name == 'vg' and target == 'object':
    from open_relation.dataset.vg.label_hier.obj_hier import objnet as classnet
else:
    from open_relation.dataset.vg.label_hier.pre_hier import prenet as classnet


def eval2(label_vecs, labelnet):
    label2index = labelnet.label2index()
    index2label = labelnet.index2label()
    vg_labels = labelnet.get_raw_labels()
    for vg_label in vg_labels:
        vg_label_index = label2index[vg_label]
        vg_label_vec = label_vecs[vg_label_index]
        sub = label_vecs - vg_label_vec