def vr_make_meta_gt_visual_phrase(): m = h5py.File('data/sg_vrd_meta.h5','r',driver='core') h5f = h5py.File('data/sg_vrd_vp_meta.h5') triplets = {} cnt = 0 zl.tick() for k in m['gt/train'].keys(): if cnt %1000==0: print cnt,zl.tock() zl.tick() cnt+=1 gt_boxes = [] gt_labels = [] sub_boxes = m['gt/train/%s/sub_boxes'%k][...] obj_boxes = m['gt/train/%s/obj_boxes'%k][...] rlp_labels = m['gt/train/%s/rlp_labels'%k][...] for i in xrange(rlp_labels.shape[0]): sub_box = sub_boxes[i] obj_box = obj_boxes[i] rlp_label = rlp_labels[i] joint_box = [min(sub_box[0],obj_box[0]), min(sub_box[1],obj_box[1]),max(sub_box[2],obj_box[2]),max(sub_box[3],obj_box[3])] s_lbl = zl.idx2name_cls(m,rlp_label[0]) o_lbl = zl.idx2name_cls(m,rlp_label[2]) p_lbl = zl.idx2name_pre(m,rlp_label[1]) spo = '%s_%s_%s'%(s_lbl,p_lbl,o_lbl) lbl = zl.name2idx_tri(h5f,spo) gt_boxes.append(joint_box) gt_labels.append(lbl) h5f.create_dataset('gt/train/%s/labels'%k,data = np.array(gt_labels).astype(np.int16)) h5f.create_dataset('gt/train/%s/boxes'%k,data = np.array(gt_boxes).astype(np.int16))
def vr_vphrase_make_voc_format(split_type): if split_type != 'train' and split_type != 'test': print 'error' exit(0) m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5') m_vp = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_vphrase_meta.h5') root = '/home/zawlin/data/data_vrd/vrd/sg_vp/' anno_root = root + 'Annotations/' + split_type + '/' data_root = root + 'Data/' + split_type + '/' zl.make_dirs(anno_root) zl.make_dirs(data_root) cnt = 0 zl.tick() for k in m_vp['gt/%s' % split_type].keys(): if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() cnt += 1 # todo for vg # im_data= db.image_data.find_one({'image_id':imid}) # im_path_full = im_data['url'].replace('https://cs.stanford.edu/people/rak248/','') # im_path_folder = im_path_full.split('/')[0] # im_path_file = im_path_full.split('/')[1] # im_src_path = vr_root+'%s/%s'%(im_path_folder,im_path_file) # im_dst_path = data_root+'%s/%s'%(im_path_folder,im_path_file) # zl.copy_file(im_src_path,im_dst_path) voc_datum = { "folder": '', "source": { "database": "sg vrd visual phrase" }, "filename": k + '.jpg' } m['train/%s/w' % k][...] w, h = int(m['train/%s/w' % k][...]), int(m['train/%s/h' % k][...]) voc_datum['size'] = {'width': w, 'height': h} objs = [] gt_boxes = m_vp['gt/%s/%s/boxes' % (split_type, k)][...] gt_labels = m_vp['gt/%s/%s/labels' % (split_type, k)][...] for i in xrange(gt_boxes.shape[0]): gt_box = gt_boxes[i] gt_label = gt_labels[i] ymin, ymax, xmin, xmax = gt_box[1], gt_box[3], gt_box[0], gt_box[2] bbox = {'ymin': ymin, 'ymax': ymax, 'xmin': xmin, 'xmax': xmax} name = zl.idx2name_tri(m_vp, gt_label) obj = {'name': name, 'bndbox': bbox} objs.append(obj) voc_datum['object'] = objs #write to xml dst_path = os.path.join( anno_root, voc_datum["folder"], voc_datum["filename"][:voc_datum["filename"].rfind('.')] + '.xml') voc_datum = {'annotation': voc_datum} f = open(dst_path, 'w') f.write(dict2xml(voc_datum) + '\n') f.close() print 'images with annotation=%d\n' % cnt
def setup(self, bottom, top): """Setup the RoIDataLayer.""" self._cur_idx = 0 self.gt_labels = {} self.meta = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r') if os.path.exists('output/cache/sg_vrd_gt.pkl'): self.gt_labels = zl.load('output/cache/sg_vrd_gt.pkl') glog.info('loaded gt data from cache') else: glog.info( 'Preloading gt') zl.tick() for k in self.meta['gt/train'].keys(): rlp_labels = self.meta['gt/train/%s/rlp_labels'%k][...] sub_boxes = self.meta['gt/train/%s/sub_boxes'%k][...].astype(np.float) obj_boxes = self.meta['gt/train/%s/obj_boxes'%k][...].astype(np.float) if sub_boxes.shape[0]>0: zeros = np.zeros((sub_boxes.shape[0],1), dtype=np.float) # first index is always zero since we do one image by one image sub_boxes = np.concatenate((zeros, sub_boxes),axis=1) obj_boxes = np.concatenate((zeros, obj_boxes),axis=1) self.gt_labels[k] = {} self.gt_labels[k]['rlp_labels']=rlp_labels self.gt_labels[k]['sub_boxes']=sub_boxes self.gt_labels[k]['obj_boxes']=obj_boxes glog.info('done preloading gt %f'%zl.tock()) zl.save('output/cache/sg_vrd_gt.pkl',self.gt_labels) self.imids = [] for k in self.gt_labels.keys(): self.imids.append(k) self.imidx =0 random.shuffle(self.imids) # parse the layer parameter string, which must be valid YAML layer_params = yaml.load(self.param_str) self._num_classes = layer_params['num_classes'] self._name_to_top_map = {} # data blob: holds a batch of N images, each with 3 channels idx = 0 top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 3, max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE) self._name_to_top_map['data'] = idx idx += 1 top[idx].reshape(1, 5, 1, 1) self._name_to_top_map['sub_boxes'] = idx idx += 1 top[idx].reshape(1, 5, 1, 1) self._name_to_top_map['obj_boxes'] = idx idx += 1 # labels blob: R categorical labels in [0, ..., K] for K foreground # classes plus background top[idx].reshape(1, 1, 1, 1) self._name_to_top_map['labels'] = idx
def vg_cannonicalize(): client = MongoClient("mongodb://localhost:27017") db = client.visual_genome_1_2 db_results = db.relationships.find(no_cursor_timeout=True) cnt = 0 mappings = make_mappings() mappings_p = make_p_mappings() wnl = WordNetLemmatizer() spl = SpellingReplacer() zl.tick() for doc in db_results: id = doc['image_id'] cnt += 1 if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() rcnt = 0 for r in doc['relationships']: pre = r['predicate'] sub_name = r['subject']['name'] obj_name = r['object']['name'] #if pre == ' if sub_name in mappings: sub_name = mappings[sub_name] if obj_name in mappings: obj_name = mappings[obj_name] r['predicate_orig'] = pre r['object']['name_orig'] = obj_name r['subject']['name_orig'] = sub_name pre_canon = cannonicalize_relationship(pre, wnl, spl) obj_canon = cannonicalize_so(obj_name, wnl, spl) sub_canon = cannonicalize_so(sub_name, wnl, spl) if pre_canon in mappings_p: pre_canon = mappings_p[pre_canon] if pre_canon == 'short than': pre_canon = 'tall than' sub_doc = r['subject'] obj_doc = r['object'] r['subject'], r['object'] = obj_doc, sub_doc r['subject_orig'], r['object_orig'] = sub_doc, obj_doc sub_canon, obj_canon = obj_canon, sub_canon if pre_canon == 'large than': pre_canon = 'small than' sub_doc = r['subject'] obj_doc = r['object'] r['subject'], r['object'] = obj_doc, sub_doc r['subject_orig'], r['object_orig'] = sub_doc, obj_doc sub_canon, obj_canon = obj_canon, sub_canon r['predicate'] = pre_canon r['object']['name'] = obj_canon r['subject']['name'] = sub_canon db.relationships_cannon.insert(doc)
def merge_pickled_files(): import os h5f = h5py.File(C.coco_eb_h5_path, 'w') cnt = 0 zl.tick() for path, subdirs, files in os.walk(C.coco_eb_dir): for name in files: cnt += 1 if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() fpath = os.path.join(path, name) fid = name.replace('.eb', '') bbs = np.array(zl.load(fpath)).astype(np.float16) h5f[fid] = bbs
def run_test_remove_invalid_samples(): m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5','r','core') h5f = h5py.File('output/precalc/vg1_2_2016_test.hdf5') imids ={} for k in m['gt/test'].keys(): imids[k]=0 cnt = 0 zl.tick() for k in h5f.keys(): if cnt%1000==0: print cnt,zl.tock() zl.tick() cnt+=1 if k not in imids: del h5f[k]
def vr_vphrase_make_voc_format(split_type): if split_type !='train' and split_type!='test': print 'error' exit(0) m = h5py.File('data/sg_vrd_meta.h5') m_vp = h5py.File('data/sg_vrd_vphrase_meta.h5') root = 'data/sg_vrd_2016_vp/' anno_root= root+'Annotations/'+split_type+'/' data_root= root+'Data/'+split_type+'/' zl.make_dirs(anno_root) zl.make_dirs(data_root) cnt = 0 zl.tick() for k in m_vp['gt/%s'%split_type].keys(): if cnt%1000==0: print cnt,zl.tock() zl.tick() cnt+=1 voc_datum = {"folder": '', "source": {"database":"sg vrd visual phrase"}, "filename":k+'.jpg' } m['train/%s/w'%k][...] w, h = int(m['train/%s/w'%k][...]),int(m['train/%s/h'%k][...]) voc_datum['size']={'width':w,'height':h} objs = [] gt_boxes = m_vp['gt/%s/%s/boxes'%(split_type,k)][...] gt_labels = m_vp['gt/%s/%s/labels'%(split_type,k)][...] for i in xrange(gt_boxes.shape[0]): gt_box = gt_boxes[i] gt_label = gt_labels[i] ymin, ymax, xmin, xmax = gt_box[1],gt_box[3],gt_box[0],gt_box[2] bbox = {'ymin':ymin,'ymax':ymax,'xmin':xmin,'xmax':xmax} name = zl.idx2name_tri(m_vp,gt_label) obj = {'name':name, 'bndbox':bbox} objs.append(obj) voc_datum['object']=objs #write to xml dst_path = os.path.join(anno_root,voc_datum["folder"], voc_datum["filename"][:voc_datum["filename"].rfind('.')]+'.xml') voc_datum={'annotation':voc_datum} f = open(dst_path,'w') f.write(dict2xml(voc_datum)+'\n') f.close() print 'images with annotation=%d\n'%cnt
def vg_stats_predicate(): client = MongoClient("mongodb://localhost:27017") db = client.visual_genome_1_2 db_results = db.relationships_cannon.find(no_cursor_timeout=True) cnt = 0 mappings = make_mappings() mappings_p = make_p_mappings() wnl = WordNetLemmatizer() spl = SpellingReplacer() sub_obj_info = {} zl.tick() for doc in db_results: id = doc['image_id'] cnt += 1 if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() rcnt = 0 for r in doc['relationships']: pre = r['predicate'] sub_name = r['subject']['name'] obj_name = r['object']['name'] so_pair = sub_name + '_' + obj_name if so_pair not in sub_obj_info: so_info = {'total': 0, 'predicates': []} else: so_info = sub_obj_info[so_pair] so_info['total'] += 1 if pre not in so_info['predicates']: so_info['predicates'].append(pre) zl.save('output/sub_obj_info.pkl', sub_obj_info) #total_pairs = len(sub_obj_info.keys())+0.0 total_pairs = 0.0 total_of_averages = 0.0 for k in sub_obj_info.keys(): so_info = sub_obj_info[k] total_predicates = len(so_info['predicates']) + 0.0 if so_info['total'] < 2: continue total_pairs += 1 total_annotated_pairs = so_info['total'] + 0.0 avg_predicates_for_this_pair = total_predicates / total_annotated_pairs total_of_averages += avg_predicates_for_this_pair total_of_averages /= total_pairs print 'total_pairs = %d' % total_pairs print 'total_of_averages = %d' % total_of_averages
def vg_make_meta_visual_phrase(): m = h5py.File('data/vg1_2_meta.h5', 'r', driver='core') h5f = h5py.File('data/vg1_2_vp_meta.h5') triplets = {} cnt = 0 zl.tick() for k in m['gt/train'].keys(): if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() cnt += 1 # sub_boxes = m['gt/train/%s/sub_boxes'%k][...] # obj_boxes = m['gt/train/%s/obj_boxes'%k][...] rlp_labels = m['gt/train/%s/rlp_labels' % k][...] for i in xrange(rlp_labels.shape[0]): # sub_box = sub_boxes[i] # obj_box = obj_boxes[i] rlp_label = rlp_labels[i] # joint_bbox = [min(sub_bbox[0],obj_bbox[0]), min(sub_bbox[1],obj_bbox[1]),max(sub_bbox[2],obj_bbox[2]),max(sub_bbox[3],obj_bbox[3])] s_lbl = zl.idx2name_cls(m, rlp_label[0]) o_lbl = zl.idx2name_cls(m, rlp_label[2]) p_lbl = zl.idx2name_pre(m, rlp_label[1]) spo = '%s_%s_%s' % (s_lbl, p_lbl, o_lbl) # spo = '%d_%d_%d'%(rlp_label[0],rlp_label[1],rlp_label[2]) if spo not in triplets: triplets[spo] = 0 triplets[spo] += 1 zl.save('output/pkl/triplets_train_vp.pkl', triplets) triplets_sorted = zl.sort_dict_by_val(triplets) triplets_ok = [] for k, v in triplets_sorted: triplets_ok.append(k) print k, v triplets_ok = sorted(triplets_ok) triplets_ok = ['__background__'] + triplets_ok for i in xrange(len(triplets_ok)): h5f['meta/tri/idx2name/%d' % i] = triplets_ok[i] h5f['meta/tri/name2idx/%s' % triplets_ok[i]] = i print len(triplets_ok)
def gen_meta_for_retrieval(): out_pkl = 'output/pkl/vg_retr_meta.pkl' m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5', 'r') rlp_labels = [] files = [] counts = [] cnt = 0 zl.tick() for k in m['gt/test']: if cnt % 100 == 0: print cnt, zl.tock() zl.tick() cnt += 1 gt_rlp_labels = m['gt/test'][k]['rlp_labels'][...] for i in xrange(gt_rlp_labels.shape[0]): gt_rlp_label = gt_rlp_labels[i] if len(rlp_labels) == 0: rlp_labels.append(gt_rlp_label.tolist()) files.append([k]) counts.append(1) continue bInd = np.all(gt_rlp_label == rlp_labels, axis=1) ind = np.arange(len(rlp_labels))[bInd] if len(ind) == 0: rlp_labels.append(gt_rlp_label.tolist()) files.append([k]) counts.append(1) else: files[ind].append(k) counts[ind] = counts[ind] + 1 # rlp_labels.append(gt_rlp_label.tolist()) # files.append([k]) # counts.append(1) rlp_labels = np.array(rlp_labels) files = np.array(files) counts = np.array(counts) ind = np.argsort(counts)[::-1] counts = counts[ind] files = files[ind] rlp_labels = rlp_labels[ind] retr_meta = {'counts': counts, 'files': files, 'rlp_labels': rlp_labels} zl.save(out_pkl, retr_meta)
def setup(self, bottom, top): self._cur_idx = 0 self.vgg_data = {} vgg_h5 = h5py.File( "output/precalc/vg1_2_2016_predicate_exp_train.hdf5", 'r') layer_params = yaml.load(self.param_str_) self.imids = [] for k in vgg_h5.keys(): self.imids.append(k) self.imidx = 0 if os.path.exists('output/cache/vg1_2_2016_pre_train_concat.pkl'): self.vgg_data = zl.load( 'output/cache/vg1_2_2016_pre_train_concat.pkl') print 'loaded train data from cache' else: print 'Preloading training data' zl.tick() for k in vgg_h5.keys(): sub_visual = vgg_h5[k]['sub_visual'][...] obj_visual = vgg_h5[k]['obj_visual'][...] pre_label = vgg_h5[k]['pre_label'][...] self.vgg_data[k] = {} self.vgg_data[k]['sub_visual'] = sub_visual self.vgg_data[k]['obj_visual'] = obj_visual self.vgg_data[k]['pre_label'] = pre_label print 'done preloading training data %f' % zl.tock() zl.save('output/cache/vg1_2_2016_pre_train_concat.pkl', self.vgg_data) vgg_h5.close() self._batch_size = layer_params['batch_size'] self.train_data = [] self._name_to_top_map = {} # data blob: holds a batch of N images, each with 3 channels # top[0].reshape(self._batch_size, 4096 * 2 ) top[0].reshape(self._batch_size, 2 * 4096) top[1].reshape(self._batch_size) self._name_to_top_map['visual'] = 0 self._name_to_top_map['label'] = 1
def run_test_save_result(): caffe.set_mode_gpu() caffe.set_device(0) m_vp = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_vp_meta.h5', 'r', 'core') m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r', 'core') net = caffe.Net('models/sg_vrd_vp/vgg16/faster_rcnn_end2end/test.prototxt', 'output/faster_rcnn_end2end/sg_vrd_vp_2016_train/sg_vrd_vp_vgg16_faster_rcnn_finetune_iter_15000.caffemodel',caffe.TEST) h5path = 'output/precalc/sg_vrd_2016_test_new.hdf5' h5f = h5py.File(h5path) root = 'data/sg_vrd_2016/Data/sg_test_images/' _t = {'im_detect': Timer(), 'misc': Timer()} cnt = 0 thresh = .15 img_set_file = 'data/sg_vrd_2016/ImageSets/test.txt' imlist = {line.strip().split(' ')[1]:line.strip().split(' ')[0] for line in open(img_set_file)} # cv2.namedWindow('ctrl') # cv2.createTrackbar('thresh','ctrl',10,100,nothing) results = {} for imid in imlist.keys(): cnt += 1 if imid in h5f:continue impath = imlist[imid] +'.jpg' impath = root+impath im = cv2.imread(impath) if im == None: print impath box_proposals = None _t['im_detect'].tic() score_raw, scores, fc7, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() boxes_tosort = [] zl.tick() h5f.create_dataset(imid + '/scores', dtype='float16', data=scores.astype(np.float16)) h5f.create_dataset(imid + '/boxes', dtype='short', data=boxes[:,:4].astype(np.short)) t_misc = zl.tock() print 'im_detect: {:d} {:.3f}s {:.3f}s' \ .format(cnt, _t['im_detect'].average_time, t_misc)
def vg_db_stats(): client = MongoClient("mongodb://localhost:27017") db = client.visual_genome_1_2 db_rel_train_all = db.relationships_all_train.find(no_cursor_timeout=True) db_rel_test_all = db.relationships_all_test.find(no_cursor_timeout=True) train_stats = {} test_stats = {} cnt = 0 zl.tick() for db_rel in db_rel_train_all: if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() cnt += 1 for r in db_rel['relationships']: name = r['predicate'] if name not in train_stats: train_stats[name] = 0 train_stats[name] += 1 for db_rel in db_rel_test_all: if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() cnt += 1 for r in db_rel['relationships']: name = r['predicate'] if name not in test_stats: test_stats[name] = 0 test_stats[name] += 1
def convert_vp_result_for_matlab_eval2(): _t = {'im_detect': Timer(), 'misc': Timer()} m_vp = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_vp_meta.h5', 'r', 'core') m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5', 'r', 'core') root = 'data/vg1_2_2016/Data/test/' h5path = 'output/precalc/vg1_2_vp2016_test_new.hdf5' h5f = h5py.File(h5path, 'r') h5path = 'output/precalc/vg1_2_vp2016_test_nms2_.4.hdf5' h5f_nms = h5py.File(h5path) img_set_file = 'data/vg1_2_2016/ImageSets/test.txt' imlist = { line.strip().split(' ')[1]: line.strip().split(' ')[0] for line in open(img_set_file) } cnt = 1 results = {} thresh = 0 zl.tick() for imid in imlist.keys(): cnt += 1 if cnt % 100 == 0: print cnt, zl.tock() zl.tick() imid_orig = imlist[imid].split('/')[1] impath = imlist[imid] + '.jpg' impath = root + impath im = cv2.imread(impath) if im == None: print impath box_proposals = None _t['im_detect'].tic() scores, boxes = h5f[imid]['scores'][...], h5f[imid]['boxes'][...] _t['im_detect'].toc() _t['misc'].tic() boxes_tosort = [] # print scores.shape # print boxes.shape zl.tick() im_disp = im.copy() h5_boxes = [] h5_labels = [] h5_confs = [] ind = np.argmax(scores[:, 1:], axis=1) + 1 scores = scores[np.arange(scores.shape[0]), ind] dets = np.hstack((boxes, scores[:, np.newaxis])) keep = nms(dets, .4, force_cpu=True) # nms threshold # print len(keep) # exit(0) dets = dets[keep, :] ind = ind[keep] h5f_nms.create_dataset(imid_orig + '/boxes', data=dets[:, :4]) h5f_nms.create_dataset(imid_orig + '/confs', data=dets[:, 4]) h5f_nms.create_dataset(imid_orig + '/labels', data=ind) # if c ==27: # exit(0) # if c ==ord( ' '): # break _t['misc'].toc()
def gen_obj_detection_results_from_hdf5(h5_path, out_path): h5f = h5py.File(h5_path, 'r') outfile = open(out_path, 'w') thresh = 0.01 cnt = 0 zl.tick() for k in h5f.keys(): if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() cnt += 1 scores = h5f['%s/scores' % k][...] boxes = h5f['%s/boxes' % k][...] boxes_tosort = [] for j in xrange(1, 201): inds = np.where(scores[:, j] > 0.001)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, .2, force_cpu=False) # nms threshold cls_dets = cls_dets[keep, :] boxes_tosort.append(cls_dets) for j in xrange(len(boxes_tosort)): cls_dets = boxes_tosort[j] for di in xrange(cls_dets.shape[0]): # print 'here' di = cls_dets[di] score = di[-1] cls_idx = j + 1 if score > 1: score = 1 if score < thresh: continue res_line = '%s %d %f %d %d %d %d' % (k, cls_idx, score, di[0], di[1], di[2], di[3]) outfile.write(res_line + '\n') outfile.close()
def vg_count_only_one_triplet(): client = MongoClient("mongodb://localhost:27017") db = client.visual_genome_1_2 db_results = db.relationships_all_train.find(no_cursor_timeout=True) cnt = 0 spo_info = {} spo_list = [] zl.tick() for doc in db_results: id = doc['image_id'] cnt += 1 if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() rcnt = 0 for r in doc['relationships']: pre = r['predicate'] sub_name = r['subject']['name'] obj_name = r['object']['name'] spo = sub_name + '_' + pre + '_' + obj_name if spo not in spo_info: spo_info[spo] = 0 spo_info[spo] += 1 db_results_2 = db.relationships_all_test.find(no_cursor_timeout=True) for doc in db_results_2: id = doc['image_id'] cnt += 1 if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() rcnt = 0 for r in doc['relationships']: pre = r['predicate'] sub_name = r['subject']['name'] obj_name = r['object']['name'] spo = sub_name + '_' + pre + '_' + obj_name if spo not in spo_info: spo_info[spo] = 0 spo_info[spo] += 1 zl.save('output/spo_info_vg.pkl', spo_info) #total_pairs = len(sub_obj_info.keys())+0.0 total_spo = len(spo_info.keys()) + 0.0 one_count = 0 for k in spo_info.keys(): if spo_info[k] >= 5: spo_list.append(k) one_count += 1 #print total_spo,one_count vg_total_annotation_count(spo_list)
def vg_total_annotation_count(spo_list): client = MongoClient("mongodb://localhost:27017") db = client.visual_genome_1_2 db_results = db.relationships_all_train.find(no_cursor_timeout=True) cnt = 0 rcnt = 0 zl.tick() total_train_cnt = 0 for doc in db_results: id = doc['image_id'] cnt += 1 if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() ok = False for r in doc['relationships']: pre = r['predicate'] sub_name = r['subject']['name'] obj_name = r['object']['name'] spo = sub_name + '_' + pre + '_' + obj_name if spo in spo_list: rcnt += 1 ok = True if ok: total_train_cnt += 1 db_results_2 = db.relationships_all_test.find(no_cursor_timeout=True) total_test_cnt = 0 for doc in db_results_2: id = doc['image_id'] cnt += 1 if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() ok = False for r in doc['relationships']: pre = r['predicate'] sub_name = r['subject']['name'] obj_name = r['object']['name'] spo = sub_name + '_' + pre + '_' + obj_name if spo in spo_list: rcnt += 1 ok = True if ok: total_test_cnt += 1 print rcnt, total_train_cnt, total_test_cnt
def vg_check_obj_stats(): client = MongoClient("mongodb://localhost:27017") db = client.visual_genome_1_2 db_obj_train_all = db.relationships_objects_train.find( no_cursor_timeout=True) db_obj_test_all = db.relationships_objects_test.find( no_cursor_timeout=True) train_stats = {} test_stats = {} cnt = 0 zl.tick() for db_obj in db_obj_train_all: if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() cnt += 1 for o in db_obj['objects']: name = o['name'] if name not in train_stats: train_stats[name] = 0 train_stats[name] += 1 for db_obj in db_obj_test_all: if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() cnt += 1 for o in db_obj['objects']: name = o['name'] if name not in test_stats: test_stats[name] = 0 test_stats[name] += 1 zl.save('output/train_stats.pkl', train_stats) zl.save('output/test_stats.pkl', test_stats) print zl.sort_dict_by_val(train_stats) print zl.sort_dict_by_val(test_stats)
def vg_count_predicate_per_object(): client = MongoClient("mongodb://localhost:27017") db = client.visual_genome_1_2 db_results = db.relationships_all_train.find(no_cursor_timeout=True) cnt = 0 spo_infos = {} zl.tick() for doc in db_results: id = doc['image_id'] cnt += 1 if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() rcnt = 0 for r in doc['relationships']: pre = r['predicate'] sub_name = r['subject']['name'] obj_name = r['object']['name'] if obj_name not in spo_infos: spo_info = {'predicates': []} spo_infos[obj_name] = spo_info if sub_name not in spo_infos: spo_info = {'predicates': []} spo_infos[sub_name] = spo_info sub_spo_info = spo_infos[sub_name] obj_spo_info = spo_infos[obj_name] if pre not in sub_spo_info['predicates']: sub_spo_info['predicates'].append(pre) if pre not in obj_spo_info['predicates']: obj_spo_info['predicates'].append(pre) db_results_2 = db.relationships_all_test.find(no_cursor_timeout=True) for doc in db_results_2: id = doc['image_id'] cnt += 1 if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() rcnt = 0 for r in doc['relationships']: pre = r['predicate'] sub_name = r['subject']['name'] obj_name = r['object']['name'] if obj_name not in spo_infos: spo_info = {'predicates': []} spo_infos[obj_name] = spo_info if sub_name not in spo_infos: spo_info = {'predicates': []} spo_infos[sub_name] = spo_info sub_spo_info = spo_infos[sub_name] obj_spo_info = spo_infos[obj_name] if pre not in sub_spo_info['predicates']: sub_spo_info['predicates'].append(pre) if pre not in obj_spo_info['predicates']: obj_spo_info['predicates'].append(pre) total_predicates = 0 for k in spo_infos.keys(): spo_info = spo_infos[k] print len(spo_info['predicates']) total_predicates += len(spo_info['predicates']) print total_predicates / 200.
def setup(self, bottom, top): self._cur_idx = 0 self.vgg_data = {} self.gt_labels = {} vgg_h5 = h5py.File("output/precalc/vg1_2_2016_train.hdf5", 'r') if os.path.exists('output/cache/vg1_2_2016_train.pkl'): self.vgg_data = zl.load('output/cache/vg1_2_2016_train.pkl') print 'loaded train data from cache' else: print 'Preloading training data' zl.tick() for k in vgg_h5.keys(): classemes = vgg_h5[k]['classemes'][...] visuals = vgg_h5[k]['visuals'][...] locations = vgg_h5[k]['locations'][...] cls_confs = vgg_h5[k]['cls_confs'][...] self.vgg_data[k] = {} self.vgg_data[k]['classemes'] = classemes self.vgg_data[k]['visuals'] = visuals self.vgg_data[k]['cls_confs'] = cls_confs self.vgg_data[k]['locations'] = locations print 'done preloading training data %f' % zl.tock() zl.save('output/cache/vg1_2_2016_train.pkl', self.vgg_data) vgg_h5.close() self.meta = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5', 'r') if os.path.exists('output/cache/vg1_2_2016_gt.pkl'): self.gt_labels = zl.load('output/cache/vg1_2_2016_gt.pkl') print 'loaded gt data from cache' else: print 'Preloading gt' zl.tick() for k in self.meta['gt/train'].keys(): rlp_labels = self.meta['gt/train/%s/rlp_labels' % k][...] sub_boxes = self.meta['gt/train/%s/sub_boxes' % k][...].astype( np.float) obj_boxes = self.meta['gt/train/%s/obj_boxes' % k][...].astype( np.float) self.gt_labels[k] = {} self.gt_labels[k]['rlp_labels'] = rlp_labels self.gt_labels[k]['sub_boxes'] = sub_boxes self.gt_labels[k]['obj_boxes'] = obj_boxes print 'done preloading gt %f' % zl.tock() zl.save('output/cache/vg1_2_2016_gt.pkl', self.gt_labels) self.imids = [] for k in self.vgg_data.keys(): self.imids.append(k) self.imidx = 0 random.shuffle(self.imids) layer_params = yaml.load(self.param_str_) self._batch_size = layer_params['batch_size'] self.train_data = [] self._name_to_top_map = {} # top[0].reshape(self._batch_size, 201*2) top[0].reshape(self._batch_size, 4096 * 2) # top[0].reshape(self._batch_size, 4*2) top[1].reshape(self._batch_size) # self._name_to_top_map['classeme'] = 0 self._name_to_top_map['visual'] = 0 # self._name_to_top_map['location'] = 0 self._name_to_top_map['label'] = 1
def run_test_visualize(): caffe.set_mode_gpu() caffe.set_device(0) m_vp = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_vp_meta.h5', 'r', 'core') m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5', 'r', 'core') net = caffe.Net('models/vg1_2_vp/vgg16/faster_rcnn_end2end/test.prototxt', 'output/faster_rcnn_end2end/vg1_2_vp2016_train/vg1_2_vp_vgg16_faster_rcnn_finetune_no_bbox_reg_iter_110000.caffemodel',caffe.TEST) root = 'data/vg1_2_2016/Data/test/' _t = {'im_detect': Timer(), 'misc': Timer()} cnt = 0 thresh = .15 img_set_file = 'data/vg1_2_2016/ImageSets/test.txt' imlist = {line.strip().split(' ')[1]:line.strip().split(' ')[0] for line in open(img_set_file)} rel_types = {} rel_types['p']=[] rel_types['s']=[] rel_types['v']=[] rel_types['c']=[] for k in m_vp['meta/tri/name2idx'].keys(): if k !='__background__': idx = int(str(m_vp['meta/tri/name2idx/'+k][...])) r_type = m_vp['meta/tri/name2idx/'+k].attrs['type'] rel_types[r_type].append(idx) cv2.namedWindow('ctrl') cv2.createTrackbar('thresh','ctrl',10,100,nothing) results = {} for imid in imlist.keys(): cnt += 1 impath = imlist[imid] +'.jpg' if '1059' not in impath and '107901' not in impath:continue impath = root+impath im = cv2.imread(impath) if im == None: print impath box_proposals = None _t['im_detect'].tic() score_raw, scores, fc7, raw_boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() boxes_tosort = [] zl.tick() # boxes =np.array([]) # labels =np.array([]) boxes = None labels = None print 'generating boxes' for j in xrange(1, 19237): inds = np.where(scores[:, j] > 0.00)[0] cls_scores = scores[inds, j] cls_boxes = raw_boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) boxes_tosort.append(cls_dets) keep = nms(cls_dets, .2, force_cpu=True) # nms threshold cls_dets = cls_dets[keep, :] # sorted_ind = np.argsort(cls_dets[:,-1])[::-1] # cls_dets=cls_dets[sorted_ind] if cls_dets.shape[0]>0: if boxes == None: boxes = cls_dets else: boxes = np.vstack((boxes,cls_dets)) if labels == None: labels = np.tile(j,cls_dets.shape[0]) else: labels = np.hstack((labels,np.tile(j,cls_dets.shape[0]))) # print boxes[:5] # print labels[:5] # exit(0) # sort the results print 'sorting' sorted_ind = np.argsort(boxes[:,-1])[::-1] boxes = boxes[sorted_ind] labels = labels[sorted_ind] ours_indices = {} ours_indices['p']=[] ours_indices['s']=[] ours_indices['v']=[] ours_indices['c']=[] indexor = np.arange(labels.shape[0]) c_ind = np.in1d(labels,np.array(rel_types['c'])) ours_indices['c'] = indexor[c_ind] p_ind = np.in1d(labels,np.array(rel_types['p'])) ours_indices['p'] = indexor[p_ind] v_ind = np.in1d(labels,np.array(rel_types['v'])) ours_indices['v'] = indexor[v_ind] s_ind = np.in1d(labels,np.array(rel_types['s'])) ours_indices['s'] = indexor[s_ind] # exit(0) # for i in xrange(labels.shape[0]): # lbl =a labels[i] # if lbl in rel_types['p']: ours_indices['p'].append(i) # if lbl in rel_types['s']: ours_indices['s'].append(i) # if lbl in rel_types['v']: ours_indices['v'].append(i) # if lbl in rel_types['c']: ours_indices['c'].append(i) # print labels.shape[0] # print len(ours_indices['p']) # print len(ours_indices['s']) # print len(ours_indices['v']) # print len(ours_indices['c']) # print rel_types['c'] # exit(0) _t['misc'].toc() t_misc = zl.tock() cv2.namedWindow('ctrl') cv2.destroyWindow('ctrl') cv2.namedWindow('ctrl') ours_p_len = ours_indices['p'].shape[0]-1 ours_c_len = ours_indices['c'].shape[0]-1 ours_v_len = ours_indices['v'].shape[0]-1 ours_s_len = ours_indices['s'].shape[0]-1 #ours_len = len(rlp_labels_ours)-1 ours_len = labels.shape[0]-1 if ours_len>0 :cv2.createTrackbar('idx_ours','ctrl',0,ours_len,nothing) if ours_p_len>0 :cv2.createTrackbar('idx_ours_p','ctrl',0,ours_p_len,nothing) if ours_c_len>0: cv2.createTrackbar('idx_ours_c','ctrl',0,ours_c_len,nothing) if ours_v_len>0:cv2.createTrackbar('idx_ours_v','ctrl',0, ours_v_len,nothing) if ours_s_len>0:cv2.createTrackbar('idx_ours_s','ctrl',0, ours_s_len,nothing) im_orig = im.copy() while True: if ours_len>=0: idx_ours = cv2.getTrackbarPos('idx_ours','ctrl') im_ours = im_orig.copy() box = boxes[idx_ours] lbl = zl.idx2name_tri(m_vp,labels[idx_ours]) cv2.putText(im_ours,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2) cv2.rectangle(im_ours,(box[0],box[1]),(box[2],box[3]),(0,200,0),2) cv2.imshow('im_ours',im_ours) if ours_c_len>=0: idx_ours_c = cv2.getTrackbarPos('idx_ours_c','ctrl') idx_ours = ours_indices['c'][idx_ours_c] im_ours_c = im_orig.copy() box = boxes[idx_ours] lbl = zl.idx2name_tri(m_vp,labels[idx_ours]) cv2.putText(im_ours_c,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2) cv2.rectangle(im_ours_c,(box[0],box[1]),(box[2],box[3]),(0,0,200),2) cv2.imshow('im_ours_c',im_ours_c) if ours_s_len>=0: idx_ours_s = cv2.getTrackbarPos('idx_ours_s','ctrl') idx_ours = ours_indices['s'][idx_ours_s] im_ours_s = im_orig.copy() box = boxes[idx_ours] lbl = zl.idx2name_tri(m_vp,labels[idx_ours]) cv2.putText(im_ours_s,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2) cv2.rectangle(im_ours_s,(box[0],box[1]),(box[2],box[3]),(0,0,200),2) cv2.imshow('im_ours_s',im_ours_s) if ours_v_len>=0: idx_ours_v = cv2.getTrackbarPos('idx_ours_v','ctrl') idx_ours = ours_indices['v'][idx_ours_v] im_ours_v = im_orig.copy() box = boxes[idx_ours] lbl = zl.idx2name_tri(m_vp,labels[idx_ours]) cv2.putText(im_ours_v,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2) cv2.rectangle(im_ours_v,(box[0],box[1]),(box[2],box[3]),(0,0,200),2) cv2.imshow('im_ours_v',im_ours_v) if ours_p_len>=0: idx_ours_p = cv2.getTrackbarPos('idx_ours_p','ctrl') idx_ours = ours_indices['p'][idx_ours_p] im_ours_p = im_orig.copy() box = boxes[idx_ours] lbl = zl.idx2name_tri(m_vp,labels[idx_ours]) cv2.putText(im_ours_p,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2) cv2.rectangle(im_ours_p,(box[0],box[1]),(box[2],box[3]),(0,0,200),2) cv2.imshow('im_ours_p',im_ours_p) c = cv2.waitKey(1)&0xFF if c == ord(' '): break if c == ord('s'): im_folder = 'output/results/examples/'+imid if not os.path.exists('output/results/examples/'+imid): os.makedirs('output/results/examples/'+imid) if not os.path.exists('output/results/examples/'+imid+'/orig_'+imid+'.jpg'): cv2.imwrite('output/results/examples/'+imid+'/orig_'+imid+'.jpg',im_orig) if ours_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_'+imid+str(idx_ours)+'.jpg',im_ours) if ours_v_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_v_'+imid+str(idx_ours)+'.jpg',im_ours_v) if ours_p_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_p_'+imid+str(idx_ours)+'.jpg',im_ours_p) if ours_c_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_c_'+imid+str(idx_ours)+'.jpg',im_ours_c) if ours_s_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_s_'+imid+str(idx_ours)+'.jpg',im_ours_s) print 'im_detect: {:d} {:.3f}s {:.3f}s' \ .format(cnt, _t['im_detect'].average_time, t_misc)
def run_relation(model_type, iteration): vgg_data = h5py.File('output/precalc/vg1_2_2016_test.hdf5') result = h5py.File('output/vg_results/vg1_2_2016_result_' + model_type + '_' + iteration + '.hdf5') m = h5py.File('data/vg1_2_meta.h5') data_root = 'data/vg1_2_2016/Data/test/' keep = 100 thresh = 0.0001 net = caffe.Net( 'models/vg1_2/relation/test_' + model_type + '.prototxt', 'output/relation/vg/relation_vgg16_' + model_type + '_iter_' + iteration + '.caffemodel', caffe.TEST) #net = caffe.Net('models/sg_vrd/relation/test.prototxt','output/models/sg_vrd_relation_vgg16_iter_264000.caffemodel',caffe.TEST) cnt = 1 zl.tick() for imid in vgg_data.keys(): if cnt % 100 == 0: print cnt, zl.tock() zl.tick() exit(0) cnt += 1 # if cnt%100==0: # print cnt classemes = vgg_data[imid]['classemes'] visuals = vgg_data[imid]['visuals'] locations = vgg_data[imid]['locations'] cls_confs = vgg_data[imid]['cls_confs'] # im = cv2.imread(data_root+imid+'.jpg') # #print cls_confs # # for box in locations: # # b=box[:4].astype(np.int32) # # cv2.rectangle(im,(b[0],b[1]),(b[2],b[3]),(255,0,0)) # w,h = im.shape[2],im.shape[1] rlp_labels = [] rlp_confs = [] sub_boxes = [] obj_boxes = [] relation_vectors = [] classemes_in = [] visuals_in = [] locations_in = [] cls_confs_in = [] for s in xrange(len(locations)): for o in xrange(len(locations)): if s == o: continue sub = locations[s] obj = locations[o] sub_visual = visuals[s] obj_visual = visuals[o] sub_cls = cls_confs[s, 0] obj_cls = cls_confs[o, 0] sub_score = cls_confs[s, 1] obj_score = cls_confs[o, 1] sub_classme = classemes[s] obj_classme = classemes[o] if sub_score < 0.01 or obj_score < 0.01: continue sub_loc_encoded = bbox_transform(np.array([obj[:4]]), np.array([sub[:4]]))[0] obj_loc_encoded = bbox_transform(np.array([sub[:4]]), np.array([obj[:4]]))[0] #sub_loc_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([sub[:4]]))[0] #obj_loc_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([obj[:4]]))[0] visual = np.hstack((sub_visual, obj_visual)).reshape(1, 8192) classeme = np.hstack( (sub_classme, obj_classme)).reshape(1, 402) loc = np.hstack( (sub_loc_encoded, obj_loc_encoded)).reshape(1, 8) if 'all' in model_type: blob = { 'classeme': classeme, 'visual': visual, 'location': loc } elif 'visual' in model_type: blob = { 'visual': visual, } elif 'classeme' in model_type: blob = { 'classeme': classeme, } elif 'location' in model_type: blob = {'location': loc} #batch this net.forward_all(**blob) relation_score = net.blobs['relation_prob'].data[0].copy() #l2_norm = relation_score/LA.norm(relation_score) #relation_score=softmax(relation_score) #relation_score/=LA.norm(relation_score) #relation_score=softmax(relation_score) argmax = np.argmax(relation_score) rs = relation_score[argmax] predicate = argmax rlp_label = np.array([sub_cls, predicate, obj_cls]).astype(np.int32) rlp_conf = rs + sub_score + obj_score #relation_score[predicate] rlp_confs.append(rlp_conf) rlp_labels.append(rlp_label) sub_boxes.append(sub[:4]) obj_boxes.append(obj[:4]) relation_vectors.append(relation_score) # for i in xrange(70): # rs = relation_score[i] # if rs>0.0: # predicate =i # rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32) # rlp_conf = rs+sub_score+obj_score#relation_score[predicate] # rlp_confs.append(rlp_conf) # rlp_labels.append(rlp_label) # sub_boxes.append(sub[:4]) # obj_boxes.append(obj[:4]) result.create_dataset(imid + '/rlp_confs', dtype='float16', data=np.array(rlp_confs).astype(np.float16)) result.create_dataset(imid + '/sub_boxes', dtype='float16', data=np.array(sub_boxes).astype(np.float16)) result.create_dataset(imid + '/obj_boxes', dtype='float16', data=np.array(obj_boxes).astype(np.float16)) result.create_dataset(imid + '/rlp_labels', dtype='float16', data=np.array(rlp_labels).astype(np.float16))
def vg_make_voc_format(split_type): if split_type != 'train' and split_type != 'test': print 'error' exit(0) vg_root = 'data/vg_1.2/' anno_root = 'data/vg_1.2/voc_format/Annotations/' + split_type + '/' data_root = 'data/vg_1.2/voc_format/Data/' + split_type + '/' zl.make_dirs(anno_root + 'VG_100K_2') zl.make_dirs(anno_root + 'VG_100K') zl.make_dirs(data_root + 'VG_100K_2') zl.make_dirs(data_root + 'VG_100K') client = MongoClient("mongodb://localhost:27017") db = client.visual_genome_1_2 if split_type == 'train': db_objs = db.relationships_objects_train.find(no_cursor_timeout=True) else: db_objs = db.relationships_objects_test.find(no_cursor_timeout=True) cnt = 0 zl.tick() for db_obj in db_objs: if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() cnt += 1 imid = db_obj['image_id'] im_data = db.image_data.find_one({'image_id': imid}) im_path_full = im_data['url'].replace( 'https://cs.stanford.edu/people/rak248/', '') im_path_folder = im_path_full.split('/')[0] im_path_file = im_path_full.split('/')[1] im_src_path = vg_root + '%s/%s' % (im_path_folder, im_path_file) im_dst_path = data_root + '%s/%s' % (im_path_folder, im_path_file) zl.copy_file(im_src_path, im_dst_path) voc_datum = { "folder": im_path_folder, "source": { "database": "visual genome 1.2" }, "filename": im_path_file } w, h = im_data['width'], im_data['height'] voc_datum['size'] = {'width': w, 'height': h} objs = [] for o in db_obj['objects']: ymin, ymax, xmin, xmax = o['y'], o['y'] + o['h'], o[ 'x'], o['x'] + o['w'] bbox = {'ymin': ymin, 'ymax': ymax, 'xmin': xmin, 'xmax': xmax} obj = {'name': o['name'], 'bndbox': bbox} objs.append(obj) voc_datum['object'] = objs #write to xml dst_path = os.path.join( anno_root, voc_datum["folder"], voc_datum["filename"][:voc_datum["filename"].rfind('.')] + '.xml') voc_datum = {'annotation': voc_datum} f = open(dst_path, 'w') f.write(dict2xml(voc_datum) + '\n') f.close() print 'images with annotation=%d\n' % cnt
def vg_make_voc_imageset(split_type): client = MongoClient("mongodb://localhost:27017") blacklist = [] db = client.visual_genome_1_2 if split_type != 'train' and split_type != 'test': print 'error' exit(0) vg_root = 'data/vg_1.2/' imageset_root = 'data/vg_1.2/voc_format/ImageSets/' + split_type + '.txt' cnt = 1 # preload image data imdatas = {} for imdata in db.image_data.find(no_cursor_timeout=True): imid = imdata['image_id'] imdatas[imid] = imdata if split_type == 'train': db_objs = db.relationships_objects_train.find(no_cursor_timeout=True) else: db_objs = db.relationships_objects_test.find(no_cursor_timeout=True) output = open(imageset_root, 'w') mini_selection = {} for db_obj in db_objs: if len(db_obj['objects']) <= 0: continue if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() imid = db_obj['image_id'] im_data = imdatas[imid] if im_data['width'] < 100 or im_data['height'] < 100: continue im_path_full = im_data['url'].replace( 'https://cs.stanford.edu/people/rak248/', '') im_path_folder = im_path_full.split('/')[0] im_path_file = im_path_full.split('/')[1] if '.jpg' not in im_path_file: print 'not a jpg image %s\n' % im_path_file exit(0) im_index = im_path_folder + '/' + im_path_file.replace('.jpg', '') if im_index in blacklist: continue if split_type == 'train': for o in db_obj['objects']: name = o['name'] if name not in mini_selection: mini_selection[name] = [] if len(mini_selection[name] ) < 3 and im_index not in mini_selection[name]: mini_selection[name].append(im_index) output.write('%s %d\n' % (im_index, cnt)) cnt += 1 output.close() if split_type == 'train': imageset_root = 'data/vg_1.2/voc_format/ImageSets/mini.txt' cnt = 1 imageset_content = '' for k in mini_selection.keys(): for f in mini_selection[k]: imageset_content += '%s %d\n' % (f, cnt) cnt += 1 output = open(imageset_root, 'w') output.write(imageset_content) output.close()
def run_test_save_result(): caffe.set_mode_gpu() caffe.set_device(0) m_vp = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_vp_meta.h5', 'r', 'core') m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5', 'r', 'core') net = caffe.Net( 'models/vg1_2_vp/vgg16/faster_rcnn_end2end/test.prototxt', 'output/faster_rcnn_end2end/vg1_2_vp2016_train/vg1_2_vp_vgg16_faster_rcnn_finetune_no_bbox_reg_iter_110000.caffemodel', caffe.TEST) h5path = 'output/precalc/vg1_2_vp2016_test_new.hdf5' h5f = h5py.File(h5path) root = 'data/vg1_2_2016/Data/test/' _t = {'im_detect': Timer(), 'misc': Timer()} cnt = 0 thresh = .15 img_set_file = 'data/vg1_2_2016/ImageSets/test.txt' imlist = { line.strip().split(' ')[1]: line.strip().split(' ')[0] for line in open(img_set_file) } # cv2.namedWindow('ctrl') # cv2.createTrackbar('thresh','ctrl',10,100,nothing) results = {} for imid in imlist.keys(): cnt += 1 if imid in h5f: continue impath = imlist[imid] + '.jpg' impath = root + impath im = cv2.imread(impath) if im == None: print impath box_proposals = None _t['im_detect'].tic() score_raw, scores, fc7, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() boxes_tosort = [] zl.tick() # results[imid] = {'scores':scores.astype(np.float16),'boxes':boxes[:,:4].astype(np.float16)} # if len(results.keys())>10: # for k in results.keys(): # h5f.create_dataset(k+ '/scores', dtype='float16', data=results[k]['scores']) # h5f.create_dataset(k+ '/boxes', dtype='float16', data=results[k]['boxes']) # results = {} h5f.create_dataset(imid + '/scores', dtype='float16', data=scores.astype(np.float16)) h5f.create_dataset(imid + '/boxes', dtype='short', data=boxes[:, :4].astype(np.short)) h5_boxes = None h5_labels = None h5_confs = None for j in xrange(1, 19237): inds = np.where(scores[:, j] > 0.0001)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, .2) # nms threshold cls_dets = cls_dets[keep, :] boxes_tosort.append(cls_dets) if cls_dets.shape[0] > 0: pass #_t['misc'].toc() t_misc = zl.tock() # while True: # thresh = cv2.getTrackbarPos('thresh','ctrl')/100. # im_disp = im.copy() # for j in xrange(len(boxes_tosort)): # cls_dets = boxes_tosort[j] # for di in xrange(cls_dets.shape[0]): # # print 'here' # di = cls_dets[di] # score = di[-1] # if score<thresh: # continue # cls_idx = j + 1 # cls_name = zl.idx2name_tri(m_vp,cls_idx) # # cls_name = str(m['meta/cls/idx2name/' + str(cls_idx)][...]) # if score > 1: # score = 1 # x, y = int(di[0]), int(di[1]) # if x < 10: # x = 15 # if y < 10: # y = 15 # cv2.putText(im_disp, cls_name, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 1) # cv2.rectangle(im_disp, (di[0], di[1]), (di[2], di[3]), (255, 0, 0), 2) # #print '%s %d %f %d %d %d %f\n' % (im_idx, j + 1, score, di[0], di[1], di[2], di[3]) # cv2.imshow('im', im_disp) # c = cv2.waitKey(1) & 0xFF # if c ==27: # exit(0) # if c ==ord( ' '): # break print 'im_detect: {:d} {:.3f}s {:.3f}s' \ .format(cnt, _t['im_detect'].average_time, t_misc)
def run_relation_diff(model_type,iteration): #vgg_data = h5py.File('output/sg_vrd_2016_test.hdf5') vgg_data = h5py.File('output/sg_vrd_2016_test_more.hdf5') result = h5py.File('output/sg_vrd_2016_result_'+model_type+'_'+iteration+'.hdf5') #if os.path.exists('output/sg_vrd_2016_result.hdf5'): # os.remove('output/sg_vrd_2016_result.hdf5') m = h5py.File('data/sg_vrd_meta.h5') data_root='data/sg_vrd_2016/Data/sg_test_images/' keep = 100 thresh = 0.0001 net = caffe.Net('models/sg_vrd/relation/test_'+model_type+'.prototxt','output/relation/sg_vrd_relation_vgg16_'+model_type+'_iter_'+iteration+'.caffemodel',caffe.TEST) #net = caffe.Net('models/sg_vrd/relation/test.prototxt','output/models/sg_vrd_relation_vgg16_iter_264000.caffemodel',caffe.TEST) cnt =0 zl.tick() for imid in vgg_data.keys(): cnt+=1 print cnt,zl.tock() zl.tick() classemes = vgg_data[imid]['classemes'] visuals = vgg_data[imid]['visuals'] locations = vgg_data[imid]['locations'] cls_confs = vgg_data[imid]['cls_confs'] #im = cv2.imread(data_root+imid+'.jpg') #print cls_confs # for box in locations: # b=box[:4].astype(np.int32) # cv2.rectangle(im,(b[0],b[1]),(b[2],b[3]),(255,0,0)) rlp_labels = [] rlp_confs = [] sub_boxes=[] obj_boxes=[] for s in xrange(len(locations)): for o in xrange(len(locations)): if s==o:continue sub = locations[s] obj = locations[o] sub_visual = visuals[s] obj_visual = visuals[o] sub_cls = cls_confs[s,0] obj_cls = cls_confs[o,0] sub_score = cls_confs[s,1] obj_score = cls_confs[o,1] if sub_score<0.1 or obj_score<0.1:continue sub_classme = classemes[s] obj_classme = classemes[o] sub_loc_encoded = bbox_transform( np.array([obj[:4]]), np.array([sub[:4]]))[0] obj_loc_encoded = bbox_transform( np.array([sub[:4]]), np.array([obj[:4]]))[0] visual = np.hstack((sub_visual, obj_visual)).reshape(1,8192) classeme = np.hstack((sub_classme, obj_classme)).reshape(1,202) loc = sub_loc_encoded.reshape(1,4)#np.hstack((sub_loc_encoded, obj_loc_encoded)).reshape(1,4) if 'all' in model_type: blob = { 'classeme_s':np.array(sub_classme).reshape(1,101), 'classeme_o':np.array(obj_classme).reshape(1,101), 'visual_s':np.array(sub_visual).reshape(1,4096), 'visual_o':np.array(obj_visual).reshape(1,4096), 'location_s':np.array(sub_loc_encoded).reshape(1,4), 'location_o':np.array(obj_loc_encoded).reshape(1,4), } elif 'visual' in model_type: blob = { 'visual_s':np.array(sub_visual).reshape(1,4096), 'visual_o':np.array(obj_visual).reshape(1,4096), } elif 'classeme' in model_type: blob = { 'classeme_s':np.array(sub_classme).reshape(1,101), 'classeme_o':np.array(obj_classme).reshape(1,101), } elif 'location' in model_type: blob = { 'location_s':np.array(sub_loc_encoded).reshape(1,4), 'location_o':np.array(obj_loc_encoded).reshape(1,4), } #print visual.shape net.forward_all(**blob) relation_score =net.blobs['relation'].data[0] #l2_norm = relation_score/LA.norm(relation_score) relation_score=softmax(relation_score) argmax = np.argmax(relation_score) rs = relation_score[argmax] predicate = argmax rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32) #print '%s %s %s %f'%(m['meta/cls/idx2name/'+str(rlp_label[0])][...],m['meta/pre/idx2name/'+str(rlp_label[1])][...],m['meta/cls/idx2name/'+str(rlp_label[2])][...],rs) rlp_conf = rs+sub_score+obj_score#relation_score[predicate] rlp_confs.append(rlp_conf) rlp_labels.append(rlp_label) sub_boxes.append(sub[:4]) obj_boxes.append(obj[:4]) #relation_score/=LA.norm(relation_score) # for i in xrange(70): # rs = relation_score[i] # if rs>0.0: # predicate =i # #print relation_score[predicate] # rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32) # #print '%s %s %s %f'%(m['meta/cls/idx2name/'+str(rlp_label[0])][...],m['meta/pre/idx2name/'+str(rlp_label[1])][...],m['meta/cls/idx2name/'+str(rlp_label[2])][...],rs) # rlp_conf = rs+sub_score+obj_score#relation_score[predicate] # rlp_confs.append(rlp_conf) # rlp_labels.append(rlp_label) # sub_boxes.append(sub[:4]) # obj_boxes.append(obj[:4]) result.create_dataset(imid+'/rlp_confs',dtype='float16', data=np.array(rlp_confs).astype(np.float16)) result.create_dataset(imid+'/sub_boxes',dtype='float16', data=np.array(sub_boxes).astype(np.float16)) result.create_dataset(imid+'/obj_boxes',dtype='float16', data=np.array(obj_boxes).astype(np.float16)) result.create_dataset(imid+'/rlp_labels',dtype='float16', data=np.array(rlp_labels).astype(np.float16))
def run_relation_batch(model_type, iteration): vgg_h5 = h5py.File('output/precalc/vg1_2_2016_test.hdf5') vgg_data = {} if os.path.exists('output/cache/vg1_2_2016_test.pkl'): vgg_data = zl.load('output/cache/vg1_2_2016_test.pkl') print 'loaded test data from cache' else: print 'Preloading testing data' zl.tick() for k in vgg_h5.keys(): classemes = vgg_h5[k]['classemes'][...] visuals = vgg_h5[k]['visuals'][...] locations = vgg_h5[k]['locations'][...] cls_confs = vgg_h5[k]['cls_confs'][...] vgg_data[k] = {} vgg_data[k]['classemes'] = classemes vgg_data[k]['visuals'] = visuals vgg_data[k]['cls_confs'] = cls_confs vgg_data[k]['locations'] = locations print 'done preloading testing data %f' % zl.tock() zl.save('output/cache/vg1_2_2016_test.pkl', vgg_data) vgg_h5.close() result = h5py.File('output/vg_results/vg1_2_2016_result_' + model_type + '_' + iteration + '.hdf5') m = h5py.File('data/vg1_2_meta.h5') data_root = 'data/vg1_2_2016/Data/test/' keep = 100 thresh = 0.0001 net = caffe.Net( 'models/vg1_2/relation/test_' + model_type + '.prototxt', 'output/relation/vg/relation_vgg16_' + model_type + '_iter_' + iteration + '.caffemodel', caffe.TEST) #net = caffe.Net('models/sg_vrd/relation/test.prototxt','output/models/sg_vrd_relation_vgg16_iter_264000.caffemodel',caffe.TEST) cnt = 1 zl.tick() imids = sorted(vgg_data.keys()) for imid in imids: if cnt % 100 == 0: print cnt, zl.tock() zl.tick() cnt += 1 if imid in result: continue classemes = vgg_data[imid]['classemes'] visuals = vgg_data[imid]['visuals'] locations = vgg_data[imid]['locations'] cls_confs = vgg_data[imid]['cls_confs'] # im = cv2.imread(data_root+imid+'.jpg') # #print cls_confs # # for box in locations: # # b=box[:4].astype(np.int32) # # cv2.rectangle(im,(b[0],b[1]),(b[2],b[3]),(255,0,0)) # w,h = im.shape[2],im.shape[1] rlp_labels = [] rlp_confs = [] sub_boxes = [] obj_boxes = [] relation_vectors = [] classemes_in = [] visuals_in = [] locations_in = [] cls_confs_in = [] sub_cls_in = [] obj_cls_in = [] sub_score_in = [] obj_score_in = [] sub_boxes = [] obj_boxes = [] for s in xrange(len(locations)): for o in xrange(len(locations)): if s == o: continue sub = locations[s] obj = locations[o] sub_visual = visuals[s] obj_visual = visuals[o] sub_cls = cls_confs[s, 0] obj_cls = cls_confs[o, 0] sub_score = cls_confs[s, 1] obj_score = cls_confs[o, 1] sub_classme = classemes[s] obj_classme = classemes[o] sub_loc_encoded = bbox_transform(np.array([obj[:4]]), np.array([sub[:4]]))[0] obj_loc_encoded = bbox_transform(np.array([sub[:4]]), np.array([obj[:4]]))[0] visual = np.hstack((sub_visual, obj_visual)).reshape(8192) classeme = np.hstack((sub_classme, obj_classme)).reshape(402) loc = np.hstack((sub_loc_encoded, obj_loc_encoded)).reshape(8) classemes_in.append(classeme) visuals_in.append(visual) locations_in.append(loc) sub_cls_in.append(sub_cls) obj_cls_in.append(obj_cls) sub_score_in.append(sub_score) obj_score_in.append(obj_score) sub_boxes.append(sub[:4]) obj_boxes.append(obj[:4]) if 'all' in model_type: blob = { 'classeme': np.array(classemes_in), 'visual': np.array(visuals_in), 'location': np.array(locations_in) } net.blobs['classeme'].reshape(*blob['classeme'].shape) net.blobs['visual'].reshape(*blob['visual'].shape) net.blobs['location'].reshape(*blob['location'].shape) elif 'visual' in model_type: blob = { 'visual': np.array(visuals_in), } net.blobs['visual'].reshape(*blob['visual'].shape) elif 'classeme' in model_type: blob = { 'classeme': np.array(classemes_in), } net.blobs['classeme'].reshape(*blob['classeme'].shape) elif 'location' in model_type: blob = {'location': np.array(locations_in)} #batch this net.blobs['location'].reshape(*blob['location'].shape) if len(locations_in) == 0: rlp_confs = [] sub_boxes = [] obj_boxes = [] rlp_labels = [] else: net.forward_all(**blob) relation_score = net.blobs['relation_prob'].data.copy() argmax = np.argmax(relation_score, axis=1) rs = relation_score[np.arange(relation_score.shape[0]), argmax] rlp_labels = np.vstack((sub_cls_in, argmax, obj_cls_in)).T rlp_confs = np.array(sub_score_in) + np.array(rs) + np.array( obj_score_in) result.create_dataset(imid + '/rlp_confs', dtype='float16', data=np.array(rlp_confs).astype(np.float16)) result.create_dataset(imid + '/sub_boxes', dtype='float16', data=np.array(sub_boxes).astype(np.float16)) result.create_dataset(imid + '/obj_boxes', dtype='float16', data=np.array(obj_boxes).astype(np.float16)) result.create_dataset(imid + '/rlp_labels', dtype='float16', data=np.array(rlp_labels).astype(np.float16))
def run_relation(model_type, iteration): vgg_data = h5py.File('output/precalc/vg1_2_2016_predicate_exp_test.hdf5') result = h5py.File('output/vg1_2_2016_result_' + model_type + '_' + iteration + '.hdf5') #if os.path.exists('output/sg_vrd_2016_result.hdf5'): # os.remove('output/sg_vrd_2016_result.hdf5') m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5') data_root = '/home/zawlin/g/py-faster-rcnn/data/vg1_2_2016/Data/test/' keep = 100 thresh = 0.0001 net = caffe.Net( 'models/vg1_2/relation/test_' + model_type + '.prototxt', 'output/relation/vg/relation_vgg16_' + model_type + '_iter_' + iteration + '.caffemodel', caffe.TEST) cnt = 0 zl.tick() for imid in vgg_data.keys(): if cnt % 100 == 0: print cnt, zl.tock() zl.tick() cnt += 1 obj_boxes_gt = vgg_data[imid]['obj_boxes'] sub_boxes_gt = vgg_data[imid]['sub_boxes'] sub_visual = vgg_data[imid]['sub_visual'] obj_visual = vgg_data[imid]['obj_visual'] joint_visual = vgg_data[imid]['joint_visual'] sub_cls = vgg_data[imid]['sub_cls'] obj_cls = vgg_data[imid]['obj_cls'] rlp_labels = [] rlp_confs = [] sub_boxes = [] obj_boxes = [] for s in xrange(sub_boxes_gt.shape[0]): if model_type == 'pre_diff': blob = { 'visual_s': np.array(sub_visual[s]).reshape(1, 4096), 'visual_o': np.array(obj_visual[s]).reshape(1, 4096), } elif model_type == 'pre_jointbox': blob = { 'visual': np.array(joint_visual[s]).reshape(1, 4096), } pass elif model_type == 'pre_concat': visual = np.hstack( (sub_visual[s], obj_visual[s])).reshape(1, 8192) blob = {'visual': visual} #print visual.shape net.forward_all(**blob) relation_score = net.blobs['relation'].data[0] #l2_norm = relation_score/LA.norm(relation_score) relation_score = softmax(relation_score) #relation_score/=LA.norm(relation_score) argmax = np.argmax(relation_score) rs = relation_score[argmax] predicate = argmax #rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32) rlp_label = np.array([sub_cls[s], predicate, obj_cls[s]]).astype(np.int32) #print '%s %s %s %f'%(m['meta/cls/idx2name/'+str(rlp_label[0])][...],m['meta/pre/idx2name/'+str(rlp_label[1])][...],m['meta/cls/idx2name/'+str(rlp_label[2])][...],rs) rlp_conf = rs #+sub_score+obj_score#relation_score[predicate] rlp_confs.append(rlp_conf) rlp_labels.append(rlp_label) sub_boxes.append(sub_boxes_gt[s]) obj_boxes.append(obj_boxes_gt[s]) # for i in xrange(70): # rs = relation_score[i] # if rs>0.0: # predicate =i # #print relation_score[predicate] # rlp_label = np.array([sub_cls[s],predicate,obj_cls[s]]).astype(np.int32) # #print '%s %s %s %f'%(m['meta/cls/idx2name/'+str(rlp_label[0])][...],m['meta/pre/idx2name/'+str(rlp_label[1])][...],m['meta/cls/idx2name/'+str(rlp_label[2])][...],rs) # rlp_conf = rs#+sub_score+obj_score#relation_score[predicate] # rlp_confs.append(rlp_conf) # rlp_labels.append(rlp_label) # sub_boxes.append(sub_boxes_gt[s]) # obj_boxes.append(obj_boxes_gt[s]) result.create_dataset(imid + '/rlp_confs', dtype='float16', data=np.array(rlp_confs).astype(np.float16)) result.create_dataset(imid + '/sub_boxes', dtype='float16', data=np.array(sub_boxes).astype(np.float16)) result.create_dataset(imid + '/obj_boxes', dtype='float16', data=np.array(obj_boxes).astype(np.float16)) result.create_dataset(imid + '/rlp_labels', dtype='float16', data=np.array(rlp_labels).astype(np.float16))
def run_relation(model_type,iteration): vgg_data = h5py.File('output/sg_vrd_2016_test.hdf5') result = h5py.File('output/sg_vrd_2016_result_'+model_type+'_'+iteration+'.hdf5') #if os.path.exists('output/sg_vrd_2016_result.hdf5'): # os.remove('output/sg_vrd_2016_result.hdf5') m = h5py.File('data/sg_vrd_meta.h5') data_root='data/sg_vrd_2016/Data/sg_test_images/' keep = 100 thresh = 0.0001 net = caffe.Net('models/sg_vrd/relation/test_'+model_type+'.prototxt','output/relation/vr/sg_vrd_relation_vgg16_'+model_type+'_iter_'+iteration+'.caffemodel',caffe.TEST) #net = caffe.Net('models/sg_vrd/relation/test.prototxt','output/models/sg_vrd_relation_vgg16_iter_264000.caffemodel',caffe.TEST) cnt =0 zl.tick() rel_types = {} rel_types['p']=[] rel_types['s']=[] rel_types['v']=[] rel_types['c']=[] for k in m['meta/pre/name2idx'].keys(): idx = int(str(m['meta/pre/name2idx/'+k][...])) r_type = m['meta/pre/name2idx/'+k].attrs['type'] rel_types[r_type].append(idx) for imid in vgg_data.keys(): cnt+=1 print cnt,zl.tock() zl.tick() # if cnt%100==0: # print cnt classemes = vgg_data[imid]['classemes'] visuals = vgg_data[imid]['visuals'] locations = vgg_data[imid]['locations'] cls_confs = vgg_data[imid]['cls_confs'] # im = cv2.imread(data_root+imid+'.jpg') # #print cls_confs # # for box in locations: # # b=box[:4].astype(np.int32) # # cv2.rectangle(im,(b[0],b[1]),(b[2],b[3]),(255,0,0)) # w,h = im.shape[2],im.shape[1] rlp_labels = [] rlp_confs = [] sub_boxes=[] obj_boxes=[] relation_vectors = [] for s in xrange(len(locations)): for o in xrange(len(locations)): if s==o:continue sub = locations[s] obj = locations[o] sub_visual = visuals[s] obj_visual = visuals[o] sub_cls = cls_confs[s,0] obj_cls = cls_confs[o,0] sub_score = cls_confs[s,1] obj_score = cls_confs[o,1] sub_classme = classemes[s] obj_classme = classemes[o] if sub_score<0.01 or obj_score<0.01:continue sub_loc_encoded = bbox_transform( np.array([obj[:4]]), np.array([sub[:4]]))[0] obj_loc_encoded = bbox_transform( np.array([sub[:4]]), np.array([obj[:4]]))[0] #sub_loc_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([sub[:4]]))[0] #obj_loc_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([obj[:4]]))[0] visual = np.hstack((sub_visual, obj_visual)).reshape(1,8192) classeme = np.hstack((sub_classme, obj_classme)).reshape(1,202) loc = np.hstack((sub_loc_encoded, obj_loc_encoded)).reshape(1,8) if 'all' in model_type: blob = { 'classeme':classeme, 'visual':visual, 'location':loc } elif 'visual' in model_type: blob = { 'visual':visual, } elif 'classeme' in model_type: blob = { 'classeme':classeme, } elif 'location' in model_type: blob = { 'location':loc } #batch this net.forward_all(**blob) relation_score =net.blobs['relation_prob'].data[0].copy() #l2_norm = relation_score/LA.norm(relation_score) #relation_score=softmax(relation_score) #relation_score/=LA.norm(relation_score) #relation_score=softmax(relation_score) # argmax = np.argmax(relation_score) # rs = relation_score[argmax] # predicate = argmax # rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32) # # rlp_conf = rs+sub_score+obj_score#relation_score[predicate] # rlp_conf = rs+sub_score+obj_score#*sub_score*obj_score # rlp_confs.append(rlp_conf) # rlp_labels.append(rlp_label) # sub_boxes.append(sub[:4]) # obj_boxes.append(obj[:4]) # relation_vectors.append(relation_score) # for i in xrange(70): # rs = relation_score[i] # if rs>0.0: # predicate =i # rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32) # # rlp_conf = rs+sub_score+obj_score#relation_score[predicate] # rlp_conf = rs # rlp_confs.append(rlp_conf) # rlp_labels.append(rlp_label) # sub_boxes.append(sub[:4]) # obj_boxes.append(obj[:4]) r_scores = {'s':{},'v':{},'c':{},'p':{}} for i in xrange(70): rs = relation_score[i] if i in rel_types['s']:r_scores['s'][i] = rs if i in rel_types['v']:r_scores['v'][i] = rs if i in rel_types['c']:r_scores['c'][i] = rs if i in rel_types['p']:r_scores['p'][i] = rs r_scores['s'] = zl.sort_dict_by_val(r_scores['s']) r_scores['v'] = zl.sort_dict_by_val(r_scores['v']) r_scores['c'] = zl.sort_dict_by_val(r_scores['c']) r_scores['p'] = zl.sort_dict_by_val(r_scores['p']) for i,rs in r_scores['s'][:4]: predicate =i rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32) # rlp_conf = rs+sub_score+obj_score#relation_score[predicate] rlp_conf = rs rlp_confs.append(rlp_conf) rlp_labels.append(rlp_label) sub_boxes.append(sub[:4]) obj_boxes.append(obj[:4]) for i,rs in r_scores['v'][:4]: predicate =i rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32) # rlp_conf = rs+sub_score+obj_score#relation_score[predicate] rlp_conf = rs rlp_confs.append(rlp_conf) rlp_labels.append(rlp_label) sub_boxes.append(sub[:4]) obj_boxes.append(obj[:4]) for i,rs in r_scores['p'][:4]: predicate =i rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32) # rlp_conf = rs+sub_score+obj_score#relation_score[predicate] rlp_conf = rs rlp_confs.append(rlp_conf) rlp_labels.append(rlp_label) sub_boxes.append(sub[:4]) obj_boxes.append(obj[:4]) for i,rs in r_scores['c']: predicate =i rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32) # rlp_conf = rs+sub_score+obj_score#relation_score[predicate] rlp_conf = rs rlp_confs.append(rlp_conf) rlp_labels.append(rlp_label) sub_boxes.append(sub[:4]) obj_boxes.append(obj[:4]) # if rs>0.0: # predicate =i # rlp_label = np.array([sub_cls,predicate,obj_cls]).astype(np.int32) # # rlp_conf = rs+sub_score+obj_score#relation_score[predicate] # rlp_conf = rs # rlp_confs.append(rlp_conf) # rlp_labels.append(rlp_label) # sub_boxes.append(sub[:4]) # obj_boxes.append(obj[:4]) result.create_dataset(imid+'/rlp_confs',dtype='float16', data=np.array(rlp_confs).astype(np.float16)) result.create_dataset(imid+'/sub_boxes',dtype='float16', data=np.array(sub_boxes).astype(np.float16)) result.create_dataset(imid+'/obj_boxes',dtype='float16', data=np.array(obj_boxes).astype(np.float16)) result.create_dataset(imid+'/rlp_labels',dtype='float16', data=np.array(rlp_labels).astype(np.float16))
def vg_vphrase_make_voc_format(split_type): if split_type != 'train' and split_type != 'test': print 'error' exit(0) m = h5py.File('data/vg1_2_meta.h5') m_vp = h5py.File('data/vg1_2_vp_meta.h5') vg_root = 'data/vg_1.2/' root = 'data/vg_1.2/voc_format_vp/' anno_root = root + 'Annotations/' + split_type + '/' data_root = root + 'Data/' + split_type + '/' zl.make_dirs(anno_root + 'VG_100K_2') zl.make_dirs(anno_root + 'VG_100K') zl.make_dirs(data_root + 'VG_100K_2') zl.make_dirs(data_root + 'VG_100K') client = MongoClient("mongodb://localhost:27017") db = client.visual_genome_1_2 imdatas = {} for imdata in db.image_data.find(no_cursor_timeout=True): imid = str(imdata['image_id']) imdatas[imid] = imdata imid2path = {} for k in m['meta/imid2path'].keys(): imid2path[k] = str(m['meta/imid2path/%s' % k][...]) cnt = 0 zl.tick() for k in m_vp['gt/%s' % split_type].keys(): if cnt % 1000 == 0: print cnt, zl.tock() zl.tick() cnt += 1 # todo for vg im_path = imid2path[k] im_src_path = vg_root + im_path im_dst_path = data_root + im_path zl.copy_file(im_src_path, im_dst_path) voc_datum = { "folder": im_path.split('/')[0], "source": { "database": "sg vrd visual phrase" }, "filename": im_path.split('/')[1] } #todo,remove mongodb from this processing stage imdata = imdatas[k] w, h = imdata['width'], imdata['height'] voc_datum['size'] = {'width': w, 'height': h} objs = [] gt_boxes = m_vp['gt/%s/%s/boxes' % (split_type, k)][...] gt_labels = m_vp['gt/%s/%s/labels' % (split_type, k)][...] for i in xrange(gt_boxes.shape[0]): gt_box = gt_boxes[i] gt_label = gt_labels[i] ymin, ymax, xmin, xmax = gt_box[1], gt_box[3], gt_box[0], gt_box[2] bbox = {'ymin': ymin, 'ymax': ymax, 'xmin': xmin, 'xmax': xmax} name = zl.idx2name_tri(m_vp, gt_label) obj = {'name': name, 'bndbox': bbox} objs.append(obj) voc_datum['object'] = objs #write to xml dst_path = os.path.join( anno_root, voc_datum["folder"], voc_datum["filename"][:voc_datum["filename"].rfind('.')] + '.xml') voc_datum = {'annotation': voc_datum} f = open(dst_path, 'w') f.write(dict2xml(voc_datum) + '\n') f.close() print 'images with annotation=%d\n' % cnt