def gen_label_weigths(target): # label maps if target == 'object': labelnet = objnet elif target == 'predicate': labelnet = prenet else: print('Target is wrong!') exit(-1) raw2path = labelnet.raw2path() index2label = labelnet.index2label() label2index = labelnet.label2index() # org data dataset_config = DatasetConfig('vg') prepare_root = dataset_config.extra_config[target].prepare_root box_label_path = os.path.join(prepare_root, 'train_box_label.bin') box_labels = pickle.load(open(box_label_path, 'rb')) # weight save path weights_save_path = dataset_config.extra_config[target].config[ 'raw2weight_path'] gen_weights1(box_labels, raw2path, index2label, label2index, weights_save_path, 'raw')
def reformat_anno(): dataset_config = DatasetConfig('vrd') org_anno_root = dataset_config.data_config['dirty_anno_root'] dst_anno_root = dataset_config.data_config['clean_anno_root'] # load vrd label list obj_label_list_path = os.path.join(dataset_config.dataset_root, 'object_labels.txt') obj_ind2label = load_list(obj_label_list_path) pre_label_list_path = os.path.join(dataset_config.dataset_root, 'predicate_labels.txt') pre_ind2label = load_list(pre_label_list_path) # all dirty annotation files anno_list = os.listdir(org_anno_root) for i, anno_name in enumerate(anno_list): print('processing [%d/%d]' % (len(anno_list), i + 1)) org_anno_path = os.path.join(org_anno_root, anno_name) org_anno = json.load(open(org_anno_path, 'r')) # for removing redundant objects from predicate obj_label_boxes = [] # clean anno collection rlts = [] for rlt in org_anno: # convert predicate anno new_rlt = rlt_reformat(rlt, obj_ind2label, pre_ind2label) rlts.append(new_rlt) obj_sbj = [rlt['object'], rlt['subject']] for obj in obj_sbj: # left top, right bottom # ymin, ymax, xmin, xmax, category label_box = obj['bbox'] label_box.append(obj['category']) obj_label_boxes.append(label_box) objs = [] # remove redundant objects if len(obj_label_boxes) > 0: obj_label_boxes = np.array(obj_label_boxes) unique_label_boxes = np.unique(obj_label_boxes, axis=0) for label_box in unique_label_boxes: obj = dict() obj['name'] = obj_ind2label[int(label_box[4])].strip() obj['ymin'] = int(label_box[0]) obj['ymax'] = int(label_box[1]) obj['xmin'] = int(label_box[2]) obj['xmax'] = int(label_box[3]) objs.append(obj) new_anno = dict() new_anno['objects'] = objs new_anno['relations'] = rlts save_path = os.path.join(dst_anno_root, anno_name) json.dump(new_anno, open(save_path, 'w'))
def filter_anno(): vg_config = DatasetConfig('vg') dirty_anno_root = vg_config.data_config['dirty_anno_root'] clean_anno_root = vg_config.data_config['clean_anno_root'] anno_list = os.listdir(dirty_anno_root) anno_num = len(anno_list) obj_raw_labels = set(objnet.get_raw_labels()) pre_raw_labels = set(prenet.get_raw_labels()) for i in range(anno_num): print('filtering [%d/%d]' % (anno_num, i + 1)) anno_name = anno_list[i] # load dirty json anno dirty_anno_path = os.path.join(dirty_anno_root, anno_name) dirty_anno = json.load(open(dirty_anno_path, 'r')) # keep objects in label set clean_objects = [] dirty_objects = dirty_anno['objects'] for d_obj in dirty_objects: if d_obj['name'] in obj_raw_labels: clean_objects.append(d_obj) # keep relationships whose sbj,obj,pre are in label set clean_relations = [] dirty_relations = dirty_anno['relationships'] for d_rlt in dirty_relations: keep_rlt = True r_objs = [d_rlt['subject'], d_rlt['object']] if d_rlt['predicate']['name'] not in pre_raw_labels: keep_rlt = False continue for r_obj in r_objs: if r_obj['name'] not in obj_raw_labels: keep_rlt = False break if keep_rlt: clean_relations.append(d_rlt) if len(clean_objects) == 0 or len(clean_relations) == 0: continue # save cleaned json anno clean_anno = dict() clean_anno['objects'] = clean_objects clean_anno['relationships'] = clean_relations clean_anno['image_info'] = dirty_anno['image_info'] clean_anno_path = os.path.join(clean_anno_root, anno_name) json.dump(clean_anno, open(clean_anno_path, 'w')) clean_annos = os.listdir(clean_anno_root) print('>>> filter_anno: image num = %d' % (len(clean_annos)))
def split_anno_pkg(): vg_config = DatasetConfig('vg') org_anno_root = vg_config.data_config['raw_anno_root'] image_data_path = os.path.join(org_anno_root, 'image_data.json') relationship_path = os.path.join(org_anno_root, 'relationships.json') output_json_root = vg_config.data_config['dirty_anno_root'] split_json(image_data_path, output_json_root, u'image_info', False) split_json(relationship_path, output_json_root, u'relationships', True)
def reformat_anno(): vg_config = DatasetConfig('vg') dirty_anno_root = vg_config.data_config['dirty_anno_root'] clean_anno_root = vg_config.data_config['dirty_anno_root'] anno_list = os.listdir(dirty_anno_root) anno_list = sorted(anno_list) anno_sum = len(anno_list) for i in range(0, anno_sum): print('processing wash_anno [%d/%d]' % (anno_sum, i + 1)) dirty_anno_path = os.path.join(dirty_anno_root, anno_list[i]) clean_anno_path = os.path.join(clean_anno_root, anno_list[i]) wash_anno(dirty_anno_path, clean_anno_path)
def vg2pascal(): vg_config = DatasetConfig('vg') json_anno_root = vg_config.data_config['clean_anno_root'] pascal_anno_root = vg_config.pascal_format['Annotations'] json_annos = os.listdir(json_anno_root) for i in range(len(json_annos)): print('processing vg2pascal: [%d/%d]' % (len(json_annos), i + 1)) json_anno_path = os.path.join(json_anno_root, json_annos[i]) json_anno = json.load(open(json_anno_path, 'r')) mid_anno = convert_anno(json_anno) pascal_anno_path = os.path.join(pascal_anno_root, json_annos[i][:-5]+'.xml') output_pascal_format(mid_anno, pascal_anno_path)
def gen_cnn_feat(): # load cnn prototxt = global_config.fast_prototxt_path caffemodel = global_config.fast_caffemodel_path caffe.set_mode_gpu() caffe.set_device(0) net = caffe.Net(prototxt, caffemodel, caffe.TEST) # prepare dataset_config = DatasetConfig('vrd') target = 'object' labelnet = objnet # extract feature anno_root = dataset_config.data_config['clean_anno_root'] img_root = dataset_config.data_config['img_root'] label_save_root = dataset_config.extra_config[target].label_root prepare_root = dataset_config.extra_config[target].prepare_root fc7_save_root = dataset_config.extra_config[target].fc7_root datasets = ['train', 'test'] for d in datasets: # prepare labels and boxes label_save_path = os.path.join(label_save_root, d + '.txt') anno_list = os.path.join(dataset_config.pascal_format['ImageSets'], d + '.txt') box_label_path = os.path.join(prepare_root, d + '_box_label.bin') prepare_object_boxes_and_labels(anno_root, anno_list, box_label_path) # extract cnn feature box_label = pickle.load(open(box_label_path, 'rb')) label2index = labelnet.label2index() raw2path = labelnet.raw2path() # cal sample ratio sample_ratio = cal_sample_ratio(objnet, box_label) extract_fc7_features(net, box_label, img_root, anno_list, fc7_save_root, label_save_path, raw2path, sample_ratio, d) if d == 'train': ind2weight_path = dataset_config.extra_config['object'].config[ 'ind2weight_path'] pickle.dump(sample_ratio, open(ind2weight_path, 'wb')) # split a small val list for quick evaluation small_val_path = os.path.join(label_save_root, 'val.txt') val_path = os.path.join(label_save_root, 'test.txt') split_a_small_val(val_path, 1000, small_val_path)
def split_anno_pkg(): data_config = DatasetConfig('vrd') # ====== split annotation package ====== datasets = ['train', 'test'] # contain image lists dataset_lists = {'train': [], 'test': []} list_root = data_config.pascal_format['ImageSets'] # all images and annotations are saved together image_root = data_config.pascal_format['JPEGImages'] splited_anno_root = data_config.data_config['dirty_anno_root'] for d in datasets: anno_package_path = os.path.join(data_config.dataset_root, 'json_dataset', 'annotations_' + d + '.json') anno_package = json.load(open(anno_package_path)) data_list = dataset_lists[d] d_image_root = os.path.join(data_config.dataset_root, 'sg_dataset', 'sg_' + d + '_images') for i, img_name in enumerate(anno_package.keys()): print('processing [%d/%d]' % (len(anno_package), i + 1)) anno = anno_package[img_name] # copy image # only jpeg image img_name = img_name.split('.')[0] + '.jpg' org_img_path = os.path.join(d_image_root, img_name) if not os.path.exists(org_img_path): continue dst_img_root = os.path.join(image_root) shutil.copy(org_img_path, dst_img_root) # record image name in list data_list.append(img_name.split('.')[0] + '\n') # save splited annotation anno_name = img_name.split('.')[0] + '.json' anno_save_path = os.path.join(splited_anno_root, anno_name) json.dump(anno, open(anno_save_path, 'w')) # save image list list_file_path = os.path.join(list_root, d + '.txt') list_file = open(list_file_path, 'w') list_file.writelines(data_list) list_file.close()
def gen_cnn_feat(): dataset = 'vrd' target = 'object' # load cnn net = load_detector(dataset) # prepare dataset_config = DatasetConfig(dataset) labelnet = objnet # extract feature anno_root = dataset_config.data_config['clean_anno_root'] img_root = dataset_config.data_config['img_root'] label_save_root = dataset_config.extra_config[target].label_root prepare_root = dataset_config.extra_config[target].prepare_root fc7_save_root = dataset_config.extra_config[target].fc7_root datasets = ['train', 'test'] for d in datasets: # prepare labels and boxes label_save_path = os.path.join(label_save_root, d + '.txt') anno_list = os.path.join(dataset_config.pascal_format['ImageSets'], d + '.txt') box_label_path = os.path.join(prepare_root, d + '_box_label.bin') prepare_object_boxes_and_labels(anno_root, anno_list, box_label_path) # extract cnn feature box_label = pickle.load(open(box_label_path, 'rb')) label2index = labelnet.label2index() raw2wn = labelnet.raw2wn() raw2path = labelnet.raw2path() # cal sample ratio sample_ratio = cal_sample_ratio(label2index, raw2path, box_label) extract_fc7_features(net, box_label, img_root, anno_list, fc7_save_root, label_save_path, label2index, raw2wn, raw2path, sample_ratio, d) # split a small val list for quick evaluation small_val_path = os.path.join(label_save_root, 'val.txt') val_path = os.path.join(label_save_root, 'test.txt') split_a_small_val(val_path, 1000, small_val_path)
def gen_cnn_feat(): # load cnn prototxt = global_config.fast_prototxt_path caffemodel = global_config.fast_caffemodel_path datasets = ['train', 'test', 'val'] caffe.set_mode_gpu() caffe.set_device(0) net = caffe.Net(prototxt, caffemodel, caffe.TEST) # prepare dataset_config = DatasetConfig('vg') target = 'object' labelnet = objnet # extract feature anno_root = dataset_config.data_config['clean_anno_root'] img_root = dataset_config.data_config['img_root'] label_save_root = dataset_config.extra_config[target].label_root prepare_root = dataset_config.extra_config[target].prepare_root fc7_save_root = dataset_config.extra_config[target].fc7_root for d in datasets: # prepare labels and boxes label_save_path = os.path.join(label_save_root, d + '.txt') anno_list = os.path.join(dataset_config.pascal_format['ImageSets'], d + '.txt') box_label_path = os.path.join(prepare_root, d + '_box_label.bin') prepare_object_boxes_and_labels(anno_root, anno_list, box_label_path) # extract cnn feature box_label = pickle.load(open(box_label_path, 'rb')) label2index = labelnet.label2index() raw2path = labelnet.raw2path() # cal sample ratio sample_ratio = cal_sample_ratio(label2index, raw2path, box_label) extract_fc7_features(net, box_label, img_root, anno_list, fc7_save_root, label_save_path, raw2path, sample_ratio, d)
def split_dataset(): vg_config = DatasetConfig('vg') anno_root = vg_config.data_config['clean_anno_root'] anno_list = os.listdir(anno_root) anno_sum = len(anno_list) print('>>> Split dataset: image num = %d' % anno_sum) # train : test = 4 : 1 # test_capacity = anno_sum / 5 test_capacity = 5000 val_capacity = 400 train_capacity = anno_sum - val_capacity - test_capacity # random.shuffle(anno_list) split_list = { 'trainval': anno_list[:train_capacity + val_capacity], 'train': anno_list[:train_capacity], 'val': anno_list[train_capacity:train_capacity + val_capacity], 'test': anno_list[train_capacity + val_capacity:train_capacity + val_capacity + test_capacity], } # save split list split_list_root = vg_config.pascal_format['ImageSets'] for d in split_list: image_id_list = [] lines = split_list[d] for l in lines: image_id_list.append(l.split('.')[0] + '\n') list_file_path = os.path.join(split_list_root, d + '.txt') with open(list_file_path, 'w') as list_file: list_file.writelines(image_id_list)
import os from open_relation import global_config from open_relation.dataset.dataset_config import DatasetConfig log_root = 'open_relation/log' vg_dataset_config = DatasetConfig('vg') vg_obj_hyper_params = { 'visual_d': 4096, 'hidden_d': 4096, 'embedding_d': 600, 'epoch': 20, 'batch_size': 64, 'negative_label_num': 2450, 'eval_freq': 5000, 'print_freq': 10, 'lr': 0.01, 'visual_feature_root': vg_dataset_config.extra_config['object'].fc7_root, 'list_root': vg_dataset_config.extra_config['object'].label_root,
for hypo in hypos: print('%s -> %s' % (hypo.name(), hyper.name())) hypernyms.append([hypo.index(), hyper.index()]) nodes.insert(0, hypo) # save hypernym dataset hypernyms = np.array(hypernyms) import h5py f = h5py.File(hypernym_save_path, 'w') f.create_dataset('hypernyms', data=hypernyms) f.close() if __name__ == '__main__': dataset = 'vrd' data_config = DatasetConfig(dataset) if dataset == 'vrd': from open_relation.dataset.vrd.label_hier.obj_hier import objnet else: from open_relation.dataset.vg.label_hier.obj_hier import objnet label2index = objnet.label2index() hypernym_save_path = os.path.join(global_config.project_root, 'open_relation', 'label_embedding', 'object', dataset + '_dataset', 'wordnet_with_' + dataset + '.h5') generate_direct_hypernyms(objnet, hypernym_save_path)
import os import json import matplotlib.pyplot as plt from open_relation.dataset.dataset_config import DatasetConfig vg_config = DatasetConfig('vg') def count(): # counter obj_counter = dict() pre_counter = dict() obj2wn = dict() pre2wn = dict() # counting clean_anno_root = vg_config.data_config['dirty_anno_root'] anno_list = os.listdir(clean_anno_root) anno_num = len(anno_list) for i, anno_name in enumerate(anno_list): print('counting [%d/%d]' % (anno_num, i+1)) anno_path = os.path.join(clean_anno_root, anno_name) anno = json.load(open(anno_path, 'r')) objs = anno['objects'] for obj in objs: synsets = set(obj['synsets']) name = obj['name'] if name in obj_counter: obj_counter[name] += 1 else:
obj_hyper_inds = objnet.get_node_by_index( raw_rlt[2]).trans_hyper_inds() obj_sample_probs = equal_interval_prob(len(obj_hyper_inds)) obj_samples = np.random.choice(obj_hyper_inds, obj_sample_num, p=obj_sample_probs) # extend hyper object for i in range(obj_sample_num): # hyper subject, hyper_pre, hyper object, raw pre new_rlts.append( [sbj_samples[i], p_ind, obj_samples[i], pre_ind]) new_rlts = np.array(new_rlts) np.save(rlt_save_path, new_rlts) return new_rlts if __name__ == '__main__': config = DatasetConfig('vrd') anno_root = config.data_config['clean_anno_root'] split = ['train', 'test'] for d in split: list_path = os.path.join(config.pascal_format['ImageSets'], d + '.txt') rlt_save_path = data_config[d]['raw_rlt_path'] raw_rlts = collect_raw_rlts(anno_root, list_path, rlt_save_path) print('raw relationship tuple num: %d' % len(raw_rlts)) rlt_save_path = data_config[d]['ext_rlt_path'] ext_rlts = extend_rlts(raw_rlts, rlt_save_path) print('extended relationship tuple num: %d' % len(ext_rlts))
import pickle import h5py import numpy as np from nltk.corpus import wordnet as wn from open_relation.dataset.dataset_config import DatasetConfig dataset_name = 'vrd' target = 'object' data_config = DatasetConfig(dataset_name) if dataset_name == 'vrd' and target == 'object': from open_relation.dataset.vrd.label_hier.obj_hier import objnet as classnet elif dataset_name == 'vrd' and target == 'predicate': from open_relation.dataset.vrd.label_hier.pre_hier import prenet as classnet elif dataset_name == 'vg' and target == 'object': from open_relation.dataset.vg.label_hier.obj_hier import objnet as classnet else: from open_relation.dataset.vg.label_hier.pre_hier import prenet as classnet def eval2(label_vecs, labelnet): label2index = labelnet.label2index() index2label = labelnet.index2label() vg_labels = labelnet.get_raw_labels() for vg_label in vg_labels: vg_label_index = label2index[vg_label] vg_label_vec = label_vecs[vg_label_index] sub = label_vecs - vg_label_vec