Beispiel #1
0
def vr_vphrase_make_voc_format(split_type):
    if split_type !='train' and split_type!='test':
        print 'error'
        exit(0)
    m = h5py.File('data/sg_vrd_meta.h5')
    m_vp = h5py.File('data/sg_vrd_vphrase_meta.h5')
    root = 'data/sg_vrd_2016_vp/'
    anno_root= root+'Annotations/'+split_type+'/'
    data_root= root+'Data/'+split_type+'/'
    zl.make_dirs(anno_root)
    zl.make_dirs(data_root)
    cnt = 0
    zl.tick()
    for k in m_vp['gt/%s'%split_type].keys():
        if cnt%1000==0:
            print cnt,zl.tock()
            zl.tick()
        cnt+=1
        voc_datum = {"folder": '',
                     "source": {"database":"sg vrd visual phrase"},
                     "filename":k+'.jpg'
                     }
        m['train/%s/w'%k][...]
        w, h = int(m['train/%s/w'%k][...]),int(m['train/%s/h'%k][...])
        voc_datum['size']={'width':w,'height':h}

        objs = []
        gt_boxes = m_vp['gt/%s/%s/boxes'%(split_type,k)][...]
        gt_labels = m_vp['gt/%s/%s/labels'%(split_type,k)][...]
        for i in xrange(gt_boxes.shape[0]):
            gt_box = gt_boxes[i]
            gt_label = gt_labels[i]
            ymin, ymax, xmin, xmax = gt_box[1],gt_box[3],gt_box[0],gt_box[2]
            bbox = {'ymin':ymin,'ymax':ymax,'xmin':xmin,'xmax':xmax}
            name = zl.idx2name_tri(m_vp,gt_label)
            obj = {'name':name,
                   'bndbox':bbox}
            objs.append(obj)

        voc_datum['object']=objs
        #write to xml
        dst_path = os.path.join(anno_root,voc_datum["folder"], voc_datum["filename"][:voc_datum["filename"].rfind('.')]+'.xml')
        voc_datum={'annotation':voc_datum}
        f = open(dst_path,'w')
        f.write(dict2xml(voc_datum)+'\n')
        f.close()
    print 'images with annotation=%d\n'%cnt
Beispiel #2
0
def vg_vphrase_make_voc_format(split_type):
    if split_type != 'train' and split_type != 'test':
        print 'error'
        exit(0)
    m = h5py.File('data/vg1_2_meta.h5')
    m_vp = h5py.File('data/vg1_2_vp_meta.h5')
    vg_root = 'data/vg_1.2/'
    root = 'data/vg_1.2/voc_format_vp/'
    anno_root = root + 'Annotations/' + split_type + '/'
    data_root = root + 'Data/' + split_type + '/'
    zl.make_dirs(anno_root + 'VG_100K_2')
    zl.make_dirs(anno_root + 'VG_100K')
    zl.make_dirs(data_root + 'VG_100K_2')
    zl.make_dirs(data_root + 'VG_100K')

    client = MongoClient("mongodb://localhost:27017")
    db = client.visual_genome_1_2
    imdatas = {}
    for imdata in db.image_data.find(no_cursor_timeout=True):
        imid = str(imdata['image_id'])
        imdatas[imid] = imdata
    imid2path = {}
    for k in m['meta/imid2path'].keys():
        imid2path[k] = str(m['meta/imid2path/%s' % k][...])

    cnt = 0
    zl.tick()
    for k in m_vp['gt/%s' % split_type].keys():
        if cnt % 1000 == 0:
            print cnt, zl.tock()
            zl.tick()
        cnt += 1
        # todo for vg
        im_path = imid2path[k]
        im_src_path = vg_root + im_path
        im_dst_path = data_root + im_path
        zl.copy_file(im_src_path, im_dst_path)
        voc_datum = {
            "folder": im_path.split('/')[0],
            "source": {
                "database": "sg vrd visual phrase"
            },
            "filename": im_path.split('/')[1]
        }
        #todo,remove mongodb from this processing stage
        imdata = imdatas[k]
        w, h = imdata['width'], imdata['height']
        voc_datum['size'] = {'width': w, 'height': h}

        objs = []
        gt_boxes = m_vp['gt/%s/%s/boxes' % (split_type, k)][...]
        gt_labels = m_vp['gt/%s/%s/labels' % (split_type, k)][...]
        for i in xrange(gt_boxes.shape[0]):
            gt_box = gt_boxes[i]
            gt_label = gt_labels[i]
            ymin, ymax, xmin, xmax = gt_box[1], gt_box[3], gt_box[0], gt_box[2]
            bbox = {'ymin': ymin, 'ymax': ymax, 'xmin': xmin, 'xmax': xmax}
            name = zl.idx2name_tri(m_vp, gt_label)
            obj = {'name': name, 'bndbox': bbox}
            objs.append(obj)

        voc_datum['object'] = objs
        #write to xml
        dst_path = os.path.join(
            anno_root, voc_datum["folder"],
            voc_datum["filename"][:voc_datum["filename"].rfind('.')] + '.xml')
        voc_datum = {'annotation': voc_datum}
        f = open(dst_path, 'w')
        f.write(dict2xml(voc_datum) + '\n')
        f.close()
    print 'images with annotation=%d\n' % cnt
Beispiel #3
0
def make_matlab_from_vp_nms2():
    import scipy.io as sio
    m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5')
    m_vp = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_vp_meta.h5')
    h5path = 'output/precalc/sg_vrd_2016_test_nms.7.hdf5'
    h5f_nms = h5py.File(h5path)

    imids = sorted(m['gt/test'].keys())
    boxes_ours= np.empty((len(imids)),dtype=np.object)
    rlp_labels_ours = np.empty((len(imids)),dtype=np.object)
    rlp_confs_ours = np.empty((len(imids)),dtype=np.object)
    for i in xrange(boxes_ours.shape[0]):
        boxes_ours[i]=[]
        rlp_labels_ours[i]=[]
        rlp_confs_ours[i]=[]
    cnt = 1
    idx = 0
    for imid in imids:
        if cnt %100==0:
            print cnt
        cnt+=1
        if imid not in h5f_nms:
            rlp_labels_ours[idx]=[]
            rlp_confs_ours[idx]=[]
            boxes_ours[idx]=[]
            idx += 1
            continue
        boxes = h5f_nms[imid]['boxes'][...]
        confs = h5f_nms[imid]['confs'][...]
        labels = h5f_nms[imid]['labels'][...]
        ind = np.argsort(confs)[::-1]

        if ind.shape[0]>100:
            ind = ind[:100]

        boxes_raw = boxes[ind]
        confs = confs[ind]
        labels = labels[ind]
        boxes = []
        rlp_confs = []
        rlp_labels = []
        for i in xrange(confs.shape[0]):
            lbl = zl.idx2name_tri(m_vp,labels[i])
            sub_lbl = lbl.split('_')[0]
            pre_lbl = lbl.split('_')[1]
            obj_lbl = lbl.split('_')[2]
            rlp_label = [zl.name2idx_cls(m,sub_lbl),zl.name2idx_pre(m,pre_lbl),zl.name2idx_cls(m,obj_lbl)]
            rlp_labels.append(np.array(rlp_label).astype(np.float64))
            rlp_confs.append(np.array(confs[i]).astype(np.float64))
            boxes.append(np.array(boxes_raw[i]))
        # boxes = np.array(boxes).reshape((-1,4))
        # rlp_labels = np.array(rlp_labels).reshape((-1,3))
        # rlp_confs= np.array(rlp_confs)[:,np.newaxis]
        rlp_confs_ours[idx] = rlp_confs
        rlp_labels_ours[idx] = rlp_labels
        boxes_ours[idx] = boxes
        idx += 1
    # boxes_ours.append([])
    # boxes_ours=boxes_ours[:-1]
    sio.savemat('output/sg_vrd_vp_results_.7.mat', {'bboxes_ours': boxes_ours,
        'rlp_labels_ours':rlp_labels_ours,'rlp_confs_ours':rlp_confs_ours})#'relation_vectors':relation_vectors})
Beispiel #4
0
def run_test_visualize():
    caffe.set_mode_gpu()
    caffe.set_device(0)

    m_vp = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_vp_meta.h5', 'r', 'core')
    m = h5py.File('/home/zawlin/Dropbox/proj/vg1_2_meta.h5', 'r', 'core')
    net = caffe.Net('models/vg1_2_vp/vgg16/faster_rcnn_end2end/test.prototxt',
                    'output/faster_rcnn_end2end/vg1_2_vp2016_train/vg1_2_vp_vgg16_faster_rcnn_finetune_no_bbox_reg_iter_110000.caffemodel',caffe.TEST)
    root = 'data/vg1_2_2016/Data/test/'
    _t = {'im_detect': Timer(), 'misc': Timer()}
    cnt = 0
    thresh = .15
    img_set_file = 'data/vg1_2_2016/ImageSets/test.txt'
    imlist = {line.strip().split(' ')[1]:line.strip().split(' ')[0] for line in open(img_set_file)}

    rel_types = {}
    rel_types['p']=[]
    rel_types['s']=[]
    rel_types['v']=[]
    rel_types['c']=[]

    for k in m_vp['meta/tri/name2idx'].keys():
        if k !='__background__':
            idx = int(str(m_vp['meta/tri/name2idx/'+k][...]))
            r_type = m_vp['meta/tri/name2idx/'+k].attrs['type']
            rel_types[r_type].append(idx)

    cv2.namedWindow('ctrl')
    cv2.createTrackbar('thresh','ctrl',10,100,nothing)
    results = {}
    for imid in imlist.keys():
            cnt += 1
            impath = imlist[imid] +'.jpg'
            if  '1059' not in impath and '107901' not in impath:continue
            impath = root+impath
            im = cv2.imread(impath)
            if im == None:
                print impath
            box_proposals = None
            _t['im_detect'].tic()
            score_raw, scores, fc7, raw_boxes = im_detect(net, im, box_proposals)
            _t['im_detect'].toc()
            boxes_tosort = []
            zl.tick()
            # boxes =np.array([])
            # labels =np.array([])
            boxes = None
            labels = None
            print 'generating boxes'
            for j in xrange(1, 19237):
                inds = np.where(scores[:, j] > 0.00)[0]
                cls_scores = scores[inds, j]
                cls_boxes = raw_boxes[inds, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                boxes_tosort.append(cls_dets)
                keep = nms(cls_dets, .2, force_cpu=True)  # nms threshold

                cls_dets = cls_dets[keep, :]
                # sorted_ind = np.argsort(cls_dets[:,-1])[::-1]
                # cls_dets=cls_dets[sorted_ind]
                if cls_dets.shape[0]>0:
                    if boxes == None:
                        boxes = cls_dets
                    else:
                        boxes = np.vstack((boxes,cls_dets))
                    if labels == None:
                        labels = np.tile(j,cls_dets.shape[0])
                    else:
                        labels = np.hstack((labels,np.tile(j,cls_dets.shape[0])))
            # print boxes[:5]
            # print labels[:5]
            # exit(0)
            # sort the results
            print 'sorting'
            sorted_ind = np.argsort(boxes[:,-1])[::-1]
            boxes = boxes[sorted_ind]
            labels = labels[sorted_ind]

            ours_indices = {}
            ours_indices['p']=[]
            ours_indices['s']=[]
            ours_indices['v']=[]
            ours_indices['c']=[]
            indexor = np.arange(labels.shape[0])

            c_ind = np.in1d(labels,np.array(rel_types['c']))
            ours_indices['c'] = indexor[c_ind]
            p_ind = np.in1d(labels,np.array(rel_types['p']))
            ours_indices['p'] = indexor[p_ind]
            v_ind = np.in1d(labels,np.array(rel_types['v']))
            ours_indices['v'] = indexor[v_ind]
            s_ind = np.in1d(labels,np.array(rel_types['s']))
            ours_indices['s'] = indexor[s_ind]

            # exit(0)
            # for i in xrange(labels.shape[0]):

                # lbl =a labels[i]
                # if lbl in rel_types['p']: ours_indices['p'].append(i)
                # if lbl in rel_types['s']: ours_indices['s'].append(i)
                # if lbl in rel_types['v']: ours_indices['v'].append(i)
                # if lbl in rel_types['c']: ours_indices['c'].append(i)
            # print labels.shape[0]
            # print len(ours_indices['p'])
            # print len(ours_indices['s'])
            # print len(ours_indices['v'])
            # print len(ours_indices['c'])
            # print rel_types['c']
            # exit(0)
            _t['misc'].toc()
            t_misc = zl.tock()
            cv2.namedWindow('ctrl')
            cv2.destroyWindow('ctrl')
            cv2.namedWindow('ctrl')

            ours_p_len = ours_indices['p'].shape[0]-1
            ours_c_len = ours_indices['c'].shape[0]-1
            ours_v_len = ours_indices['v'].shape[0]-1
            ours_s_len = ours_indices['s'].shape[0]-1
            #ours_len = len(rlp_labels_ours)-1
            ours_len = labels.shape[0]-1

            if ours_len>0 :cv2.createTrackbar('idx_ours','ctrl',0,ours_len,nothing)
            if ours_p_len>0 :cv2.createTrackbar('idx_ours_p','ctrl',0,ours_p_len,nothing)
            if ours_c_len>0: cv2.createTrackbar('idx_ours_c','ctrl',0,ours_c_len,nothing)
            if ours_v_len>0:cv2.createTrackbar('idx_ours_v','ctrl',0, ours_v_len,nothing)
            if ours_s_len>0:cv2.createTrackbar('idx_ours_s','ctrl',0, ours_s_len,nothing)
            im_orig = im.copy()
            while True:

                if ours_len>=0:
                    idx_ours = cv2.getTrackbarPos('idx_ours','ctrl')
                    im_ours = im_orig.copy()
                    box = boxes[idx_ours]
                    lbl = zl.idx2name_tri(m_vp,labels[idx_ours])
                    cv2.putText(im_ours,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
                    cv2.rectangle(im_ours,(box[0],box[1]),(box[2],box[3]),(0,200,0),2)
                    cv2.imshow('im_ours',im_ours)

                if ours_c_len>=0:
                    idx_ours_c = cv2.getTrackbarPos('idx_ours_c','ctrl')
                    idx_ours = ours_indices['c'][idx_ours_c]
                    im_ours_c = im_orig.copy()
                    box = boxes[idx_ours]
                    lbl = zl.idx2name_tri(m_vp,labels[idx_ours])
                    cv2.putText(im_ours_c,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
                    cv2.rectangle(im_ours_c,(box[0],box[1]),(box[2],box[3]),(0,0,200),2)
                    cv2.imshow('im_ours_c',im_ours_c)
                if ours_s_len>=0:
                    idx_ours_s = cv2.getTrackbarPos('idx_ours_s','ctrl')
                    idx_ours = ours_indices['s'][idx_ours_s]
                    im_ours_s = im_orig.copy()
                    box = boxes[idx_ours]
                    lbl = zl.idx2name_tri(m_vp,labels[idx_ours])
                    cv2.putText(im_ours_s,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
                    cv2.rectangle(im_ours_s,(box[0],box[1]),(box[2],box[3]),(0,0,200),2)
                    cv2.imshow('im_ours_s',im_ours_s)
                if ours_v_len>=0:
                    idx_ours_v = cv2.getTrackbarPos('idx_ours_v','ctrl')
                    idx_ours = ours_indices['v'][idx_ours_v]
                    im_ours_v = im_orig.copy()
                    box = boxes[idx_ours]
                    lbl = zl.idx2name_tri(m_vp,labels[idx_ours])
                    cv2.putText(im_ours_v,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
                    cv2.rectangle(im_ours_v,(box[0],box[1]),(box[2],box[3]),(0,0,200),2)
                    cv2.imshow('im_ours_v',im_ours_v)
                if ours_p_len>=0:
                    idx_ours_p = cv2.getTrackbarPos('idx_ours_p','ctrl')
                    idx_ours = ours_indices['p'][idx_ours_p]
                    im_ours_p = im_orig.copy()
                    box = boxes[idx_ours]
                    lbl = zl.idx2name_tri(m_vp,labels[idx_ours])
                    cv2.putText(im_ours_p,lbl,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),2)
                    cv2.rectangle(im_ours_p,(box[0],box[1]),(box[2],box[3]),(0,0,200),2)
                    cv2.imshow('im_ours_p',im_ours_p)

                c = cv2.waitKey(1)&0xFF
                if c == ord(' '):
                    break
                if c == ord('s'):
                    im_folder = 'output/results/examples/'+imid
                    if not os.path.exists('output/results/examples/'+imid):
                        os.makedirs('output/results/examples/'+imid)
                    if not os.path.exists('output/results/examples/'+imid+'/orig_'+imid+'.jpg'):
                        cv2.imwrite('output/results/examples/'+imid+'/orig_'+imid+'.jpg',im_orig)


                    if ours_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_'+imid+str(idx_ours)+'.jpg',im_ours)
                    if ours_v_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_v_'+imid+str(idx_ours)+'.jpg',im_ours_v)
                    if ours_p_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_p_'+imid+str(idx_ours)+'.jpg',im_ours_p)
                    if ours_c_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_c_'+imid+str(idx_ours)+'.jpg',im_ours_c)
                    if ours_s_len>=0:cv2.imwrite('output/results/examples/'+imid+'/ours_s_'+imid+str(idx_ours)+'.jpg',im_ours_s)

            print 'im_detect: {:d} {:.3f}s {:.3f}s' \
                .format(cnt, _t['im_detect'].average_time,
                        t_misc)