def build_feature_db(net,images_info,imdbs,out_obj):
    paths1 = [os.path.basename(imdbs[0].image_path_at(i)) for i in range(imdbs[0].num_images)]
    paths2 = [os.path.basename(imdbs[1].image_path_at(i)) for i in range(imdbs[1].num_images)]
    paths = [paths1,paths2]
    #im_db = []
    _t = Timer()
    for i in range(len(images_info['image_name'])):
        print 'caching features for image {:d}/{:d}'.format(i+1,len(images_info['image_name']))
        _t.tic()
        if   images_info['image_name'][i] in paths[0]:
            im = cv2.imread(imdbs[0].image_path_at(paths[0].index(images_info['image_name'][i])))
        elif images_info['image_name'][i] in paths[1]:
            im = cv2.imread(imdbs[1].image_path_at(paths[1].index(images_info['image_name'][i])))
        print 'Done running NN'
        #gt features
        if 'gt' in images_info.keys():
            scores, boxes = im_detect(net,im,images_info['gt'][i])
            feat_pos = net.blobs['fc7'].data
        #roi features
        scores, boxes = im_detect(net,im,images_info['roi'][i])
        feat_neg = net.blobs['fc7'].data
        print 'Done extracting features from fc7'
        #generate image db
        
        if 'gt' in images_info.keys():
            im_reg = {'name' : images_info['image_name'][i], 'roi_boxes' : images_info['roi'][i], 'roi_features' : feat_neg,'gt_boxes' : images_info['gt'][i], 'gt_features' : feat_pos}
        else:
            im_reg = {'name' : images_info['image_name'][i], 'roi_boxes' : images_info['roi'][i], 'roi_features' : feat_neg}
        pickle.dump(im_reg,out_obj)
        _t.toc()
        #im_db.append(im_reg)
        print 'Done in {}'.format(_t.average_time)
Beispiel #2
0
def demo (net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
    def get_pos_examples(self):
        counts = self._get_pos_counts()
        for i in xrange(len(counts)):
            self.trainers[i].alloc_pos(counts[i])

        _t = Timer()
        roidb = self.imdb.roidb
        num_images = len(roidb)
        # num_images = 100
        for i in xrange(num_images):
            im = cv2.imread(self.imdb.image_path_at(i))
            if roidb[i]['flipped']:
                im = im[:, ::-1, :]
            gt_inds = np.where(roidb[i]['gt_classes'] > 0)[0]
            gt_boxes = roidb[i]['boxes'][gt_inds]
            _t.tic()
            scores, boxes = im_detect(self.net, im, gt_boxes)
            _t.toc()
            feat = self.net.blobs[self.layer].data
            for j in xrange(1, self.imdb.num_classes):
                cls_inds = np.where(roidb[i]['gt_classes'][gt_inds] == j)[0]
                if len(cls_inds) > 0:
                    cls_feat = feat[cls_inds, :]
                    self.trainers[j].append_pos(cls_feat)

            print('get_pos_examples: {:d}/{:d} {:.3f}s' \
                  .format(i + 1, len(roidb), _t.average_time))
Beispiel #4
0
def demo(sess, net, im_file, result_dir, viz=False, oriented=False):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im = helper.read_rgb_img(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes, resized_im_shape, im_scale = im_detect(sess, net, im)
    timer.toc()

    im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
    img_name = im_file.split('/')[-1]

    draw_rpn_boxes(im, img_name, boxes, scores[:, np.newaxis], im_scale, True, result_dir)
    draw_rpn_boxes(im, img_name, boxes, scores[:, np.newaxis], im_scale, False, result_dir)

    # Run TextDetector to merge small box
    line_detector = TextDetector(oriented)

    # line_detector 的输入必须是在 scale 之后的图片上!!,
    # 如果还原了以后再进行行构建,原图可能太大,导致每个 anchor 的 width 很大,导致 MAX_HORIZONTAL_GAP 太小
    # text_lines point order: left-top, right-top, left-bottom, right-bottom
    text_lines = line_detector.detect(boxes, scores[:, np.newaxis], resized_im_shape)
    print("Image %s, detect %d text lines in %.3fs" % (im_file, len(text_lines), timer.diff))

    if len(text_lines) != 0:
        text_lines = recover_scale(text_lines, im_scale)
        save_result(im, img_name, text_lines, result_dir)

    # Visualize detections
    if viz:
        vis_detections(im, CLASSES[1], text_lines)
def load_model_h5(weight_file):
    darknet = DarkNet()
    model = SimpleNet(darknet)
    model.load_weights(weight_file)
    sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    timer = Timer()
    timer.tic()
    model.compile(optimizer=sgd, loss='categorical_crossentropy')
    timer.toc()
    print 'Total compile time is {:.3f}s'.format(timer.total_time)
    for i in xrange(len(model.layers)):
        print model.layers[i]
        print model.layers[i].input_shape, model.layers[i].output_shape
        weights = model.layers[i].get_weights()
        if not weights is None and len(weights) > 0:
            print weights[0].shape, weights[0].max(), weights[0].min()
            # if len(weights) > 1:
            #     # print weights[0].shape, weights[0].max(), weights[0].min()
            #     # print "layer: %d" % (i)
            #     # w = weights[0].transpose()
            #     # w = weights[1]
            #     # print w.shape
            #     # cnt = 0
            #     # for val in w.flatten():
            #     # #     print >> f, val
            #     #     print 'weights[1]', cnt, ':', val
            #     #     cnt += 1
            #     #     raw_input()
            #     # print model.layers[4].get_weights()[0].shape, model.layers[4].get_weights()[1].shape
            #     # weights = model.layers[4].get_weights()[0]
            #     weights = weights[0]
            #     vis_square(weights.reshape((weights.shape[0]*weights.shape[1], weights.shape[2], weights.shape[3])))
    return model
Beispiel #6
0
def demo(net, image_name, classes):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load pre-computed Selected Search object proposals
    box_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo',
                            image_name + '_boxes.mat')
    obj_proposals = sio.loadmat(box_file)['boxes']

    # Load the demo image
    im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '.jpg')
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im, obj_proposals)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in classes:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls,
                                                                    CONF_THRESH)
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
    def train_with_hard_negatives(self):
        _t = Timer()
        roidb = self.imdb.roidb
        num_images = len(roidb)
        # num_images = 100
        for i in xrange(num_images):
            im = cv2.imread(self.imdb.image_path_at(i))
            if roidb[i]['flipped']:
                im = im[:, ::-1, :]
            _t.tic()
            scores, boxes = im_detect(self.net, im, roidb[i]['boxes'])
            _t.toc()
            feat = self.net.blobs[self.layer].data
            for j in xrange(1, self.imdb.num_classes):
                hard_inds = \
                    np.where((scores[:, j] > self.hard_thresh) &
                             (roidb[i]['gt_overlaps'][:, j].toarray().ravel() <
                              self.neg_iou_thresh))[0]
                if len(hard_inds) > 0:
                    hard_feat = feat[hard_inds, :].copy()
                    new_w_b = \
                        self.trainers[j].append_neg_and_retrain(feat=hard_feat)
                    if new_w_b is not None:
                        self.update_net(j, new_w_b[0], new_w_b[1])

            print(('train_with_hard_negatives: '
                   '{:d}/{:d} {:.3f}s').format(i + 1, len(roidb),
                                               _t.average_time))
Beispiel #8
0
 def detection_to_file(target_path, v_num, file_list, detect,total_frames, current_frames, max_proposal=100, thresh=0):
     timer = Timer()
     w = open("{}/{}.txt".format(target_path, v_num), "w")
     for file_index, file_path in enumerate(file_list):
         file_name = file_path.split("/")[-1]
         set_num, v_num, frame_num = file_name[:-4].split("_")
         
         timer.tic()
         dets = detect(file_path)
        
         timer.toc()
          
         print('Detection Time:{:.3f}s  {}/{} images'.format(timer.average_time, current_frames+file_index+1 , total_frames))
         
                      
         inds = np.where(dets[:, -1] >= thresh)[0]     
         for i in inds:
             bbox = dets[i, :4]
             score = dets[i, -1]
     
             x = bbox[0]
             y = bbox[1] 
             width = bbox[2] - x 
             length =  bbox[3] - y
             w.write("{},{},{},{},{},{}\n".format(frame_num, x, y, width, length, score*100))
             
        
     w.close()
     print("Evalutaion file {} has been writen".format(w.name))   
     return file_index + 1
def Detect(net, image_path):
    
    """Detect object classes in an image assuming the whole image is an object."""
    # Load the image
    im = cv2.imread(image_path)
    h, w, c = im.shape
    
    # TODO: Run selective search first
    # 

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im, np.array([[0, 0, w, h]]))
    timer.toc()
    scores = scores[0]
 
    # get top 6 prediction
    pred_classes = [CLASSES[idx] for idx in ((-scores).argsort()[:6]).tolist()]
    conf = [ (-1) * prob for prob in np.sort(-scores)[:6].tolist()]
    
    img_blob = {}
    img_blob['image_path'] = image_path
    img_blob['pred'] = {'text': pred_classes, 'conf': conf}
    img_blob['rcnn_time'] = timer.total_time

    return img_blob
Beispiel #10
0
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0]))

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    output_path = os.path.join(cfg.DATA_DIR,'test_output')
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
	name = image_name.split('.')[0] + '.txt'
	with open(os.path.join(output_path,name),'a') as f:
	    for item in dets:
	        f.write(str(item[0]) + '\t' + str(item[1]) + '\t' + str(item[2])+ '\t' + str(item[3]) + '\t' +str(item[4]) + '\n')
def demo(net, im_file):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image as gray scale
    gim = cv2.imread(im_file, flags= cv2.CV_LOAD_IMAGE_GRAYSCALE)
    # convert to rgb repeated in each channel
    im = cv2.cvtColor(gim, cv2.COLOR_GRAY2BGR)
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
Beispiel #12
0
def test_net_on_dataset(
        args,
        dataset_name,
        proposal_file,
        output_dir,
        multi_gpu=False,
        gpu_id=0):
    """Run inference on a dataset."""
    dataset = JsonDataset(dataset_name)
    test_timer = Timer()
    test_timer.tic()
    if multi_gpu:
        num_images = len(dataset.get_roidb())
        all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset(
            args, dataset_name, proposal_file, num_images, output_dir
        )
    else:
        all_boxes, all_segms, all_keyps = test_net(
            args, dataset_name, proposal_file, output_dir, gpu_id=gpu_id
        )
    test_timer.toc()
    logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time))
    results = task_evaluation.evaluate_all(
        dataset, all_boxes, all_segms, all_keyps, output_dir
    )
    return results
Beispiel #13
0
    def _get_feature_scale(self, num_images=100):
        TARGET_NORM = 20.0  # Magic value from traditional R-CNN
        _t = Timer()
        roidb = self.imdb.roidb
        total_norm = 0.0
        count = 0.0
        inds = npr.choice(
            range(self.imdb.num_images), size=num_images,
            replace=False
        )

        for i_, i in enumerate(inds):
            im = cv2.imread(self.imdb.image_path_at(i))

            if roidb[i]['flipped']:
                im = im[:, ::-1, :]

            _t.tic()
            scores, boxes = im_detect(self.net, im, roidb[i]['boxes'])
            _t.toc()
            feat = self.net.blobs[self.layer].data
            total_norm += np.sqrt((feat ** 2).sum(axis=1)).sum()
            count += feat.shape[0]
            print('{}/{}: avg feature norm: {:.3f}'.format(
                    i_ + 1, num_images,
                    total_norm / count
                )
            )

        return TARGET_NORM * 1.0 / (total_norm / count)
Beispiel #14
0
def demo(net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.7
    NMS_THRESH = 0.3
    json_data_list = []
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        bbox, score = vis_detections(im, cls, dets, thresh=CONF_THRESH)
        if score:
            json_data_list.append({"class":cls,
                                   'bbox':bbox,
                                   'score':score})
        if len(json_data_list):
            f = open("result/"+image_name+".json", "w")
            json.dump(json_data_list, f, indent=2)
Beispiel #15
0
def demo(net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])
    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background\
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        order = cls_scores.argsort()[::-1]
        sorted_dets = dets[order, :]
        keep = nms(dets, NMS_THRESH)
        with open('/home/xyy/Desktop/doing/Object Detection/py-faster-rcnn/test_python.txt','w') as f:
            dets = dets[keep, :]
            for i in dets:
                for j in i:
                    f.write(str(j)+ ' ')
                f.write('\n')
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
def demo(net, im, return_boxes):
    """Detect object classes in an image using pre-computed object proposals."""
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    classes = {}
    for cls_ind, cls in enumerate(CLASSES[1:]):
        try:
            cls_ind += 1 # because we skipped background
            cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]
            bboxes = vis_detections(im, cls, dets, return_boxes, thresh=CONF_THRESH)
            classes[cls] = bboxes
        except Exception as e:
            continue
    if not return_boxes:
        cv2.imshow("image", im)
    return classes
Beispiel #17
0
def imdb_proposals(net, imdb):
    """Generate RPN proposals on all images in an imdb."""

    _t = Timer()
    imdb_boxes = [[] for _ in xrange(imdb.num_images)]
    for i in xrange(imdb.num_images):
        im = None
        if cfg.TRAIN.FORMAT == 'pickle':
            with open(imdb.image_path_at(i), 'rb') as f:
                im = cPickle.load(f)
        else:
            im = cv2.imread(imdb.image_path_at(i))

        _t.tic()
        imdb_boxes[i], scores = im_proposals(net, im)
        _t.toc()
        print 'im_proposals: {:d}/{:d} {:.3f}s' \
              .format(i + 1, imdb.num_images, _t.average_time)
        if 0:
            dets = np.hstack((imdb_boxes[i], scores))
            # from IPython import embed; embed()
            _vis_proposals(im, dets[:3, :], thresh=0.9)
            plt.show()

    return imdb_boxes
def detect_person(net, im,cls_ind=1,cls='person',CONF_THRESH = 0.8):
    """Detect object classes in an image using pre-computed object proposals."""

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    NMS_THRESH = 0.3
    cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
    cls_scores = scores[:, cls_ind]
    dets = np.hstack((cls_boxes,
                      cls_scores[:, np.newaxis])).astype(np.float32)
    keep = nms(dets, NMS_THRESH)
    # Filtering by confidence threshold as well
    keep = [ind for ind in keep if cls_scores[ind]>CONF_THRESH]
    if (len(keep)>1):
        sizes = np.zeros((len(keep),))
        for ind,curr_ind in enumerate(keep):
            bbox = dets[curr_ind,:4]
            sizes[ind] = (bbox[3]-bbox[1])*(bbox[2]-bbox[0])
        # Retain only the biggest bounding box
        keep = keep[np.argmax(sizes)]
    
    dets = dets[keep, :]
    return (dets.reshape(1,-1),cls_scores[keep])
def demo (net, imagePathName, scoreThreshold):
    """Detect object classes in an image using pre-computed object proposals."""
    # Load the demo image
    im = cv2.imread(imagePathName)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()

    scores, boxes = im_detect(net, im)

    timer.toc()
    debug('Object detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0]))

    # Visualize detections for each class
    path, imageFilename = os.path.split(imagePathName)
    catDir = os.path.split(path)[-1]
    imageName = catDir + '/' + imageFilename
    for i, cls in enumerate(CLASSES[1:]):
        i += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * i:4 * (i + 1)]
        cls_scores = scores[:, i]
        dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESHOLD)
        dets = dets[keep, :]
        vis_detections(im, cls, imageName, dets, scoreThreshold)
Beispiel #20
0
    def train_model(self, max_iters):
        """Network training loop."""
        last_snapshot_iter = -1
        train_result = {}
        timer = Timer()
        while self.solver.iter < max_iters:
            # Make one SGD update
            timer.tic()
            self.solver.step(1)
            timer.toc()

            # store accurate (fg/bg)
            tmp_result = self.check_error()
            train_result = self.expandTrainResult(train_result, tmp_result)

            if self.solver.iter % (100 * self.solver_param.display) == 0:
                self.show_status(self.solver.iter, train_result)
                train_result = {}
                print 'speed: {:.3f}s / iter'.format(timer.average_time)

            if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = self.solver.iter
                self.snapshot()

        if last_snapshot_iter != self.solver.iter:
            self.snapshot()
Beispiel #21
0
def detect_objects(imgpath):
    """Detect object classes in an image using pre-computed object proposals."""

    print("in detect object")    
    # Load the demo image
    im_file = os.path.join(imgpath)
    im = cv2.imread(im_file)
    print("read image")

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    print("im_detect")
    scores, boxes = im_detect(app.config['net'], im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    results = dict()
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        results[cls] = detect_positions(im, cls, dets, thresh=CONF_THRESH)
    return results
def train_SVM(setting, y):
	print "train SVM"
	# SVM Training

	# SVM options
	# svm_kernel                  	= 'rbf';
	# svm_C							= 1.0;
	# svm_loss						= 'squared_hinge'
	# svm_penalty					= 'l2'
	# svm_multi_class				= 'ovr'
	# svm_random_state				= 0 


	filePath = os.path.join(setting['DST_MODEL_DIR'], "svm_trained.pkl")
	try:
		clf = joblib.load(filePath)
		print "using trained model"		
	except:
		print "building svm model"
		X = loadDesc(setting)
		X = X.astype('float')
		timer = Timer()	

		timer.tic()
		clf = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y)
		timer.toc()
		print timer.total_time

		joblib.dump(clf, filePath)

	# TEST
	# print clf.decision_function(X[0])
	# print clf.predict(X[5000])
	return clf
Beispiel #23
0
def tattoo_detection(net, image_name, args):
    """Detect object classes in an image using pre-computed object proposals."""

    im_in = cv2.imread(image_name)

    if im_in is None:
        print('cannot open %s for read' % image_name )
        exit(-1)

    rows,cols = im_in.shape[:2]
    print([rows,cols])

    scale=1.0
    if rows >= cols:
        scale = float(args.longdim) / float(rows)
        im = cv2.resize( im_in, (int(0.5 + float(cols)*scale), args.longdim) )
    else:
        scale = float(args.longdim) / float(cols)
        im = cv2.resize( im_in, (args.longdim, int(0.5 + float(rows)*scale)) )

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    seconds = '%.3f' % timer.total_time
    print('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    max_scores = scores.max(axis=0)
    print(max_scores)
    print(boxes.shape)

    # Visualize detections for each class
    CONF_THRESH = args.threshold
    NMS_THRESH  = args.nms_thresh

    tattoo_dets=[]
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]

        inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
        dets_filter = dets[inds]

        vis_detections(im, cls, dets_filter, thresh=CONF_THRESH)

        if cls == 'tattoo' and len(dets_filter)>0:
            plt.savefig(os.path.join(args.output, os.path.splitext(os.path.basename(image_name))[0] + '_det.png'))
            tattoo_dets = dets_filter

    if args.inspect == 'v':
        plt.show()
    plt.clf()

    return tattoo_dets, max_scores, seconds, scale
Beispiel #24
0
def demoRest(net, image_name, classes, box_file, obj_proposals, im_file, im):
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im, obj_proposals)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls in classes:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        cls_boxes = cls_boxes[keep, :]
        cls_scores = cls_scores[keep]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls,
                                                                    CONF_THRESH)
        vis_detections(im, cls, dets, thresh=CONF_THRESH)
Beispiel #25
0
    def train_whole_model(self, tester=None):
        '''
        test the performance using all the features
        may be memory consuming.
        '''
        self.comm.barrier()
        mpi.rootprint('*'*46)
        mpi.rootprint('*'*15+'whole featureset'+'*'*15)
        mpi.rootprint('*'*46)

        if tester is not None:
            # normalize the test data with the stats of the training data
            tester.normalize_data(self.mLocal, self.stdLocal)

        timer = Timer()
        timer.reset()
        if self.maxGraftDim != self.nMetabins*self.nCodes:
            mpi.rootprint('Please initialize with maxGraftDim=nMetabins*nCodes')
            return
        self.nSelFeats = 0
        self.isSelected[:] = False
        mpi.rootprint('Generating Features...')
        for code in range(self.nCodes):
            for metabin in range(self.nMetabins):
                self.append_feature(code, metabin)
                if tester is not None:
                    tester.append_feature(code, metabin)
        mpi.rootprint('Feature generation took {} secs'.format(timer.lap()))
        mpi.rootprint('Training...')
        loss = self.retrain_model(None)
        mpi.rootprint('Training took {} secs'.format(timer.lap()))
        mpi.rootprint('Training accuracy: {}'.format(self.compute_current_accuracy()))
        if tester is not None:
            mpi.rootprint('Current Testing accuracy: {}'.format(tester.compute_test_accuracy(self.weights, self.b)))
Beispiel #26
0
    def train_model(self, max_iters):
        #display = self.solver_param.display #40
        #test_iter = 1
        #test_interval = 1
        #_accuracy = 0
        #accuracy = 0

        timer = Timer()
        while self.solver.iter < max_iters:
            #print self.solver.iter
            #make one SGD update
            timer.tic()
            self.solver.step(1)
            timer.toc()
            """
            _train_loss += self.solver.net.blobs['euclidean_loss'].data
            if (self.solver.iter-1) % display == 0:
                train_loss[(self.solver.iter-1) // display] = _train_loss / display
                _train_loss = 0
            """
            if self.solver.iter % (self.solver_param.display) == 0:
                print ('speed {:.3f}s / iter').format(timer.average_time)
            """
            if self.solver.iter % test_interval == 0:
                for test_it in range(test_iter):
                    self.solver.test_nets[0].forward()
                    _accuracy += self.solver.test_nets[0].blobs['loss3/top-5'].data
                accuracy = _accuracy / test_iter
                f.write(str(self.solver.iter) + ' ' + str(accuracy) + '\n')
                _accuracy = 0
            """
                
            
        """
Beispiel #27
0
def detect(net, image_set, image_name, output_file):
    """Detect object classes in an image using pre-computed object proposals."""
    # Load pre-computed Selected Search object proposals
    #box_file = os.path.join(coco_root, 'boxes', image_set, image_name + '.mat')
    box_file = os.path.join(coco_root, 'boxes_full', image_set, image_name + '.mat')
    
    if not os.path.exists(box_file):
        print 'File does not exist', box_file
        return
        
    obj_proposals = sio.loadmat(box_file)['boxes']

    # Load the demo image
    im_file = os.path.join(coco_root, 'images', image_set, image_name + '.jpg')
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im, obj_proposals)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])
	
    np.savez(output_file, scores=scores, boxes=boxes)
Beispiel #28
0
def demo(sess, net, image_name):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
    #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(sess, net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')

    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
Beispiel #29
0
def detect_bboxes(net, im_names, subset_classes):
    """Detect object classes in an image using pre-computed object proposals."""
    df = cnn_utils.create_bbox_data_frame(with_object_index=False)

    for im_name in im_names:
        print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
        print 'Demo for {}'.format(im_name)

        # Load the input image.
        im_file = os.path.join(FLAGS.data_dir, 'images', im_name)
        im = cv2.imread(im_file)
        im_size_x = im.shape[1]
        im_size_y = im.shape[0]

        # Detect all object classes and regress object bounds.
        timer = Timer()
        timer.tic()
        scores, boxes = im_detect(net, im)
        timer.toc()
        print ('Detection took {:.3f}s for '
               '{:d} object proposals').format(
            timer.total_time, boxes.shape[0])

        # Detect for each class
        for subset_cls_ind in range(len(class_names_to_be_detected)):
            cls = class_names_to_be_detected[subset_cls_ind]
            try:
                cls_ind = CLASSES.index(cls)
            except:
                print('error: class does not exist in training data: '
                      '{0}'.format(cls))
                exit(-1)

            cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes,
                              cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, FLAGS.nms_thresh)
            dets = dets[keep, :]
            inds = np.where(dets[:, -1] >= FLAGS.conf_thresh)[0]
            if len(inds) > 0:
                print ('{} {}(s) are detected.'.format(len(inds), cls))

            for i in inds:
                # ['image_name', 'class_index', 'x1', 'y1', 'x2', 'y2', 'score']
                x1 = dets[i, 0]
                y1 = dets[i, 1]
                x2 = dets[i, 2]
                y2 = dets[i, 3]
                score = dets[i, -1]
                if FLAGS.ignore_bbox_on_boundary:
                    # Ignore bounding boxes on the frame boundary.
                    if x1 <= 0 or x2 >= (im_size_x - 1) or \
                            y1 <= 0 or y2 >= (im_size_y - 1):
                        continue
                # Append a row.
                df.loc[len(df)] = [
                    im_name, subset_cls_ind, x1, y1, x2, y2, score]

    return df
def loadDesc(setting):
	print "Load Desc..."
	timer = Timer()	

	featureDstDir = setting['featureDstDir']
	sortedList = sorted([ f for f in os.listdir(featureDstDir)])
	descPath = np.array([ os.path.join(featureDstDir, x) for x in sortedList])

	X = []
	cnt = 0
	size = len(descPath)
	timer.tic()
	for path in descPath:
		feature = readCSV(path)
		X.append(feature)
		print "%d / %d file loaded" % (cnt, size)
		cnt = cnt + 1

	timer.toc()

	# print timer.total_time

	X = np.array(X)
	X = np.reshape(X, X.shape[0:2])
	return X
Beispiel #31
0
def test_net(net, imdb, max_per_image=400, thresh=0.03, vis=False):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)

    # timers
    _t = {'im_detect' : Timer(), 'misc' : Timer()}

    if not cfg.TEST.HAS_RPN:
        roidb = imdb.roidb

    for i in xrange(num_images):
        # filter out any ground truth boxes
        if cfg.TEST.HAS_RPN:
            box_proposals = None
        else:
            # The roidb may contain ground-truth rois (for example, if the roidb
            # comes from the training or val split). We only want to evaluate
            # detection on the *non*-ground-truth rois. We select those the rois
            # that have the gt_classes field set to 0, which means there's no
            # ground truth.
            box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]

        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        scores, boxes = im_detect(net, im, box_proposals)
        _t['im_detect'].toc()

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        for j in xrange(1, imdb.num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            if cfg.TEST.AGNOSTIC:
                cls_boxes = boxes[inds, 4:8]
            else:
                cls_boxes = boxes[inds, j*4:(j+1)*4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            # # ----- set numpy decimal precision ------
            # float_formatter = lambda x: "%.5f" % x
            # np.set_printoptions(formatter={'float_kind':float_formatter})
            # # cls_dets[:,:-1] = np.round(cls_dets[:,:-1], decimals=0) # --- test ---
            # print 'cls_dets:'                 # --- test ---
            # print cls_dets                    # --- test ---
            # # ----------------------------------------
            if vis:
                vis_detections(im, imdb.classes[j], cls_dets)
            all_boxes[j][i] = cls_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)
Beispiel #32
0
def test_net(sess,
             net,
             imdb,
             weights_filename,
             max_per_image=100,
             thresh=0.05):
    np.random.seed(cfg.RNG_SEED)
    """Test a Fast R-CNN network on an image database."""
    # num_images = len(imdb.image_index)
    # all detections are collected into:
    #  all_boxes[cls][image] = N x 5 array of detections in
    #  (x1, y1, x2, y2, score)

    # all_boxes = [[[] for _ in range(num_images)]
    #        for _ in range(imdb.num_classes)]
    #
    # output_dir = get_output_dir(imdb, weights_filename)
    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    # testFile = open('/netscratch/siddiqui/Datasets/ComplexBackground/data/ImageSets/test.txt')
    testFile = open(
        '/netscratch/siddiqui/Datasets/ComplexBackground/data_of_bgs_gray/data/ImageSets/test.txt'
    )
    imageNames = testFile.readlines()
    counter = 0
    reject_classes = []

    imagesOutputDir = '/netscratch/siddiqui/Datasets/ComplexBackground/faster-rcnn/output-images/'
    os.system("rm -rf " + imagesOutputDir)
    os.system("mkdir " + imagesOutputDir)

    fileAlreadyProcessed = False
    if os.path.isfile(
            "/netscratch/siddiqui/Datasets/ComplexBackground/faster-rcnn/output.txt"
    ):
        f = open(
            "/netscratch/siddiqui/Datasets/ComplexBackground/faster-rcnn/output.txt",
            "r")
        processedFiles = f.readlines()
        f.close()
        #print (processedFiles)
        if len(processedFiles) != 0:
            print("Resuming processing")
            lastProcessedFile = processedFiles[-1]
            lastProcessedFile = lastProcessedFile.split(';')[0]
            fileAlreadyProcessed = True
            print("Last processed file: %s" % lastProcessedFile)

    fileIndex = 0
    videoScores = {}
    scoreFile = open(
        "/netscratch/siddiqui/Datasets/ComplexBackground/faster-rcnn/output-image.txt",
        "w")
    for im_name in imageNames:
        im_name = im_name.strip()
        # Skip all names already processed
        if fileAlreadyProcessed:
            fileIndex += 1
            if im_name == lastProcessedFile:
                print("Resuming processing from file (%d): %s" %
                      (fileIndex, im_name))
                fileAlreadyProcessed = False
            continue

        rejectExample = False
        for r_class in reject_classes:
            if r_class in im_name:
                rejectExample = True
                break
        if rejectExample:
            continue
        # im_path = '/netscratch/siddiqui/Datasets/ComplexBackground/data/Images/' + im_name + '.png'
        # annot_file = '/netscratch/siddiqui/Datasets/ComplexBackground/data/Annotations/' + im_name + '.xml'
        im_path = '/netscratch/siddiqui/Datasets/ComplexBackground/data_of_bgs_gray/data/Images/' + im_name + '.png'
        annot_file = '/netscratch/siddiqui/Datasets/ComplexBackground/data_of_bgs_gray/data/Annotations/' + im_name + '.xml'

        video_name = im_name[:im_name.rfind('_')]
        if video_name not in videoScores:
            # True Positives, False Positives, False Negatives
            videoScores[video_name] = [0, 0, 0]

        im = cv2.imread(im_path)
        if im is None:
            print("Error loading file: %s" % im_path)
            continue

        overlay = im.copy()

        _t['im_detect'].tic()
        scores, boxes = im_detect(sess, net, im)
        _t['im_detect'].toc()

        _t['misc'].tic()

        # Visualize detections for each class
        CONF_THRESH = 0.5
        NMS_THRESH = 0.3

        with open(annot_file, 'r') as fd:
            doc = xmltodict.parse(fd.read())

        # Load GT bboxes
        gtBBoxes = []
        for xmlAttribName, xmlData in doc['annotation'].items():
            # print (xmlAttribName)
            if isinstance(xmlData, list):
                for obj in xmlData:
                    # If multiple objects
                    bbox = obj['bndbox']
                    gtBBoxes.append([
                        int(bbox['xmin']),
                        int(bbox['ymin']),
                        int(bbox['xmax']),
                        int(bbox['ymax'])
                    ])
            else:
                # If only one object
                bbox = xmlData['bndbox']
                gtBBoxes.append([
                    int(bbox['xmin']),
                    int(bbox['ymin']),
                    int(bbox['xmax']),
                    int(bbox['ymax'])
                ])

        bboxes = []
        for cls_ind, cls in enumerate(CLASSES[1:]):
            cls_ind += 1  # because we skipped background
            cls = CLASSES[cls_ind]
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]

            if SAVE_SINGLE_IMAGE:
                inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
                for i in inds:
                    bbox = dets[i, :4]
                    score = dets[i, -1]
                    bboxes.append(
                        [bbox[0], bbox[1], bbox[2], bbox[3], score, cls])
                    cv2.rectangle(
                        overlay, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                        CLASSES_COLORS[cls_ind],
                        3)  # Negative thinkness results in filled rect
            else:
                vis_detections(im, cls, dets, thresh=CONF_THRESH)

        for gtBBox in gtBBoxes:
            cv2.rectangle(overlay, (gtBBox[0], gtBBox[1]),
                          (gtBBox[2], gtBBox[3]), CLASSES_COLORS[2],
                          3)  # Negative thinkness results in filled rect

        if SAVE_SINGLE_IMAGE:
            if True:  #len(bboxes) > 0:
                # (3) blend with the original:
                opacity = 0.5
                cv2.addWeighted(overlay, opacity, im, 1 - opacity, 0, im)
                # out_im_path = '/netscratch/siddiqui/Bosch/data/faster-rcnn/output-defected-io/' + img_name + '.jpg'
                # out_im_path = '/netscratch/siddiqui/TableDetection/output-images/' + im_name.split('/')[-1]
                out_im_path = imagesOutputDir + im_name + '.png'
                cv2.imwrite(out_im_path, im)
                print("Writing output image for file (%d): %s" %
                      (fileIndex, im_name))

                f = open(
                    "/netscratch/siddiqui/Datasets/ComplexBackground/faster-rcnn/output.txt",
                    "a+")
                f.write(im_name + ';' + str(len(bboxes)) + ';' + str(bboxes) +
                        "\n")
                f.close()
        else:
            # Close previous plots before moving onto the next image
            plt.show()
            plt.close('all')

        # Compute F measure based on bounding boxes
        truePositives = 0
        falsePositives = 0
        falseNegatives = 0
        matchedGTBBox = [0] * len(gtBBoxes)

        # Iterate over all the predicted bboxes
        for predictedBBox in bboxes:
            bboxMatchedIdx = -1
            # Iterate over all the GT bboxes
            for gtBBoxIdx in range(len(gtBBoxes)):
                gtBBox = gtBBoxes[gtBBoxIdx]
                if USE_IOU:
                    # Compute IoU
                    iou = bbox_intersection_over_union(gtBBox, predictedBBox)
                    if (iou > IOU_THRESHOLD):
                        if (matchedGTBBox[gtBBoxIdx] == 0):
                            bboxMatchedIdx = gtBBoxIdx
                            break
                else:
                    # Compute IoM
                    iom = bbox_intersection_over_min(gtBBox, predictedBBox)
                    # if ((iom > IOM_THRESHOLD) and (not matchedGTBBox[bboxMatchedIdx])):
                    #   bboxMatchedIdx = gtBBoxIdx
                    #   break
                    if (iom > IOM_THRESHOLD):
                        if (matchedGTBBox[gtBBoxIdx] == 0):
                            bboxMatchedIdx = gtBBoxIdx
                            break

            if (bboxMatchedIdx != -1):
                truePositives += 1
                matchedGTBBox[bboxMatchedIdx] = 1
            else:
                falsePositives += 1

        # All the unmatched bboxes are false negatives
        falseNegatives = len(matchedGTBBox) - sum(matchedGTBBox)

        # Print final statistics for the frame
        print("True positives: %d" % truePositives)
        print("False positives: %d" % falsePositives)
        print("False negatives: %d" % falseNegatives)

        videoScores[video_name][0] += truePositives
        videoScores[video_name][1] += falsePositives
        videoScores[video_name][2] += falseNegatives

        # Compute F-Score
        if ((truePositives == 0) and (falseNegatives == 0)
                and (falsePositives == 0)):
            assert ((len(gtBBoxes) == 0) and (len(bboxes) == 0))
            recall = 100.0
            precision = 100.0
        else:
            if ((truePositives == 0) and (falseNegatives == 0)):
                recall = 0.0
            else:
                recall = (truePositives /
                          float(truePositives + falseNegatives)) * 100

            if ((truePositives == 0) and (falsePositives == 0)):
                precision = 0.0
            else:
                precision = (truePositives /
                             float(truePositives + falsePositives)) * 100

        if ((precision == 0.0) and (recall == 0.0)):
            fMeasure = 0.0
        else:
            fMeasure = 2 * ((precision * recall) / (precision + recall))

        print("Recall: %f" % recall)
        print("Precision: %f" % precision)
        print("F-Measure: %f" % fMeasure)

        scoreFile.write(im_name + ';' + str([
            len(bboxes),
            len(gtBBoxes), truePositives, falsePositives, falseNegatives,
            recall, precision, fMeasure
        ]) + '\n')

        fileIndex += 1

    print("-------------------------------------------")
    # Write video scores to file
    videoScoresFileName = "/netscratch/siddiqui/Datasets/ComplexBackground/faster-rcnn/video.txt"
    averageFMeasure = 0
    videoScoresFile = open(videoScoresFileName, 'w')

    for videoName, videoScore in videoScores.items():
        print(videoName)
        recall = (videoScore[0] / float(videoScore[0] + videoScore[2])) * 100
        precision = (videoScore[0] /
                     float(videoScore[0] + videoScore[1])) * 100
        fMeasure = 2 * ((precision * recall) / (precision + recall))
        videoScoresFile.write(videoName + ";" +
                              str(videoScore + [recall, precision, fMeasure]) +
                              '\n')
        print("Recall: %f" % recall)
        print("Precision: %f" % precision)
        print("F-Measure: %f" % fMeasure)

        averageFMeasure += fMeasure

    print("-------------------------------------------")
    averageFMeasure = averageFMeasure / len(videoScores)
    print("Average F-Measure: %f" % averageFMeasure)

    videoScoresFile.write('Average F-Measure: ' + str(averageFMeasure) + '\n')
    videoScoresFile.close()

    scoreFile.close()
Beispiel #33
0
def demo(net, image_name, gt_boxes, result_dir, conf=0.75):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = image_name
    im = cv2.imread(im_file)
    print "src image : ", im.shape
    im_height = im.shape[0]
    im_width = im.shape[1]

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    # scores :  (300, 2)
    # boxes :  (300, 10)
    scores, boxes = r_im_detect(net, im)
    print __file__, '==>scores : ', scores.shape
    print __file__, '==>boxes : ', boxes.shape

    print __file__, "==>gt_margin: ", cfg.TEST.GT_MARGIN, cfg.TRAIN.GT_MARGIN
    print __file__, "==>img_padding: ", cfg.IMG_PADDING

    timer.toc()
    print('Detection took {:.3f}s for '
          '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = conf
    NMS_THRESH = 0.3
    # enumerate [(0, 'Spring'), (1, 'Summer'), (2, 'Fall'), (3, 'Winter')]
    #for cls_ind, cls in enumerate(CLASSES[1:]):
    # because we skipped background
    cls_ind = 1
    cls_boxes = boxes[:, 5 * cls_ind:5 * (cls_ind + 1)]  # D
    cls_scores = scores[:, cls_ind]
    # 将每个box对应的score并入对应的位置
    dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
    keep = rotate_gpu_nms(dets, NMS_THRESH)  # D
    # print __file__,'==>keep : ',keep
    dets = dets[keep, :]
    print __file__, '==>dets : ', dets.shape
    print dets[:, 5]
    #dets = dets[0:20]
    #dets[:, 4] = dets[:, 4] * 0.45

    dets[:, 2] = dets[:, 2] / cfg.TEST.GT_MARGIN
    dets[:, 3] = dets[:, 3] / cfg.TEST.GT_MARGIN

    #if imdb_name == "icdar13":
    #write_result_ICDAR2013(im_file, dets, CONF_THRESH, ori_result_dir, im_height, im_width)
    #result_file = write_result_ICDAR2013(im_file, dets, CONF_THRESH, result_dir, im_height, im_width)

    #print dets

    #if imdb_name == "icdar15":
    # write_result_ICDAR(im_file, dets, CONF_THRESH, ori_result_dir, im_height, im_width)
    results = write_result_ICDAR(im_file, dets, CONF_THRESH, result_dir,
                                 im_height, im_width)

    # write_result(im_file, dets, CONF_THRESH, ori_result_dir, im_height, im_width)
    # result_file = write_result(im_file, dets, CONF_THRESH, result_dir, im_height, im_width)
    # print "write done"
    # post_merge(result_file)
    # print "merge done"
    #
    #print "merge done"
    #vis_detections(im, cls, dets, gt_boxes, thresh=CONF_THRESH)

    return results
Beispiel #34
0
        cfg = cfg_mnet
    net = RetinaFace(cfg=cfg, phase='test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    print('Finished loading model!')
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    # testing dataset
    testset_folder = os.path.join(args.dataset_folder, "images")
    testset_list = os.path.join(args.dataset_folder, "wider_val.txt")
    with open(testset_list, 'r') as fr:
        test_dataset = fr.read().split()
    num_images = len(test_dataset)

    _t = {'forward_pass': Timer(), 'misc': Timer()}

    # testing begin
    for i, img_name in enumerate(test_dataset):
        image_path = os.path.join(testset_folder, img_name)
        img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
        img = np.float32(img_raw)

        # testing scale
        target_size = 1600
        max_size = 2150
        im_shape = img.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        resize = float(target_size) / float(im_size_min)
        # prevent bigger axis from being more than max_size:
    def detection_to_file(target_path,
                          v_num,
                          file_list,
                          detect,
                          total_frames,
                          current_frames,
                          max_proposal=100,
                          thresh=0):
        timer = Timer()
        w = open("{}/{}.txt".format(target_path, v_num), "w")
        for file_index, file_path in enumerate(file_list):
            file_name = file_path.split("/")[-1]
            set_num, v_num, frame_num = file_name[:-4].split("_")
            frame_num = str(int(frame_num) + 1)
            im = cv2.imread(file_path)
            timer = Timer()
            timer.tic()
            #print(file_path)
            #print(im.shape)
            #_t = {'im_preproc': Timer(), 'im_net' : Timer(), 'im_postproc': Timer(), 'misc' : Timer()}
            _t = {
                'im_preproc': Timer(),
                'im_net': Timer(),
                'im_postproc': Timer(),
                'misc': Timer()
            }
            scores, sub_scores, boxes = im_detect_hierarchy(net, im, _t)
            timer.toc()
            print('Detection Time:{:.3f}s on {}  {}/{} images'.format(timer.average_time,\
                                                   file_name ,current_frames+file_index+1 , total_frames))

            NMS_THRESH = 0.3
            for cls_ind, cls in enumerate(CLASSES_main[1:]):
                if cls != "car":
                    continue
                cls_ind += 1  # because we skipped background
                cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
                cls_scores = scores[:, cls_ind]

                dets = np.hstack(
                    (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
                keep = nms(dets, NMS_THRESH)
                dets = dets[keep, :]
                tmp_SS = sub_scores[keep, :]
                thresh = 0
                inds = np.where(dets[:, -1] > thresh)[0]

                for i in inds:
                    bbox = dets[i, :4]
                    score = dets[i, -1] * 100
                    if score < 50:
                        continue

                    sub_ind = np.argmax(tmp_SS[i])
                    sub_score = tmp_SS[i][sub_ind] * 100
                    sub_label = CLASSES_sub[sub_ind]
                    if sub_label == "__background__" or sub_label == "not-target":
                        continue
                    #Fix bug 6
                    x = bbox[0]
                    y = bbox[1]
                    width = bbox[2] - bbox[0]
                    height = bbox[3] - bbox[1]
                    label = sub_label

                    w.write("{},{},{},{},{},{},{}\n".format(
                        frame_num, x, y, width, height, sub_score, label))

        w.close()
        print("Evalutaion file {} has been writen".format(w.name))

        return file_index + 1
Beispiel #36
0
def eval_seq(opt,
             dataloader,
             data_type,
             result_filename,
             save_dir=None,
             show_image=True,
             frame_rate=30):
    '''
       Processes the video sequence given and provides the output of tracking result (write the results in video file)

       It uses JDE model for getting information about the online targets present.

       Parameters
       ----------
       opt : Namespace
             Contains information passed as commandline arguments.

       dataloader : LoadVideo
                    Instance of LoadVideo class used for fetching the image sequence and associated data.

       data_type : String
                   Type of dataset corresponding(similar) to the given video.

       result_filename : String
                         The name(path) of the file for storing results.

       save_dir : String
                  Path to the folder for storing the frames containing bounding box information (Result frames).

       show_image : bool
                    Option for shhowing individial frames during run-time.

       frame_rate : int
                    Frame-rate of the given video.

       Returns
       -------
       (Returns are not significant here)
       frame_id : int
                  Sequence number of the last sequence
       '''
    width, height = dataloader.w, dataloader.h
    if save_dir:
        mkdir_if_missing(save_dir)
    tracker = JDETracker(opt, frame_rate=frame_rate)
    timer = Timer()
    results = []
    frame_id = 0

    # for selected object tracking
    global click_pos
    global is_selected
    selected_id = None

    # set video output writer
    counter = 0
    encode = 0x00000021
    output_video = cv2.VideoWriter(
        os.path.join(save_dir, f'result_{counter}.mp4'), encode, 5,
        (width, height), True)

    # start tracking
    for path, img, img0 in dataloader:
        if frame_id % 100 == 0:
            logger.info('Processing frame {} ({:.2f} fps)'.format(
                frame_id, 1. / max(1e-5, timer.average_time)))
            output_video.release()
            # Call MP4Box to divide new mp4 file
            output_video = cv2.VideoWriter(
                os.path.join(save_dir, f'result_{counter}.mp4'), encode, 5,
                (width, height), True)
            counter += 1

        # run tracking
        timer.tic()
        blob = torch.from_numpy(img).cuda().unsqueeze(0)
        online_targets = tracker.update(blob, img0)
        online_tlwhs = []
        online_ids = []
        for t in online_targets:
            tlwh = t.tlwh
            tid = t.track_id
            vertical = tlwh[2] / tlwh[3] > 1.6
            if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
                online_tlwhs.append(tlwh)
                online_ids.append(tid)
        timer.toc()

        # save results
        results.append((frame_id + 1, online_tlwhs, online_ids))
        if show_image or save_dir is not None:
            # get visualization result and some control flags for selected object tracking
            online_im, click_pos, selected_id, is_selected = vis.plot_tracking(
                img0,
                online_tlwhs,
                online_ids,
                frame_id=frame_id,
                fps=1. / timer.average_time,
                selected_id=selected_id,
                click_pos=click_pos,
                is_selected=is_selected)
        if show_image:
            # bind mouse event linstener
            cv2.setMouseCallback("online_im", on_click)
            cv2.imshow('online_im', online_im)
        if save_dir is not None:
            cv2.imwrite(
                os.path.join(save_dir, 'frame', '{:05d}.jpg'.format(frame_id)),
                online_im)
            output_video.write(online_im)
        frame_id += 1
    output_video.release()
    # save results
    write_results(result_filename, results, data_type)

    return frame_id, timer.average_time, timer.calls
MIN_COUNT = 2
TARGET_FEAT = 'advertiser_id'
N_CLS = 10

TRAIN_SEQ_FILE = 'data/train_preliminary/train_seq.pkl'
TEST_SEQ_FILE = 'data/test/test_seq.pkl'
LABEL_FILE = 'data/train_preliminary/user.csv'

TRAIN_GENDER_FEAT = f'data/train_feat/train_tfidf_gender_feat_{TARGET_FEAT}.pkl'
TEST_GENDER_FEAT = f'data/test_feat/test_tfidf_gender_feat_{TARGET_FEAT}.pkl'
TRAIN_AGE_FEAT = f'data/train_feat/train_tfidf_age_feat_{TARGET_FEAT}.pkl'
TEST_AGE_FEAT = f'data/test_feat/test_tfidf_age_feat_{TARGET_FEAT}.pkl'

dtype = {'user_id': 'int32', 'age': 'uint8', 'gender': 'uint8'}

timer = Timer()
# -------------------------------------------------------------------------------------------------
print('Loading data and preprocessing...')
timer.start()
train = pd.read_pickle(TRAIN_SEQ_FILE)
test = pd.read_pickle(TEST_SEQ_FILE)
user = pd.read_csv(LABEL_FILE, dtype=dtype)

label_gender = user.gender.values - 1
label_age = user.age.values - 1

# concatenate train and test into one dataframe
concated_data = pd.concat([train[TARGET_FEAT], test[TARGET_FEAT]]) \
    .reset_index(level=0, drop=True) \
    .tolist()
timer.stop()
    def train(self): 
        '''
        训练
        '''
        train_timer = Timer()  #train_timer
        load_timer = Timer()   #load_timer

        # 加载验证数据
        val_images, val_labels = self.data.getValid()
        val_feed_dict = {self.net.images: val_images, self.net.labels: val_labels}  
        
        # 迭代训练
        for step in range(1, self.max_iter + 1):
            #print("step: ",step)
      
            load_timer.tic()
            train_images, train_labels = self.data.getTrain() #获取到batch_size大小的图片和对应的label
            load_timer.toc()
            
            #################   开始训练   ################
            train_feed_dict = {self.net.images: train_images, self.net.labels: train_labels}  
            train_timer.tic()
            summary_str, _ = self.sess.run([self.summary_op, self.train_op], feed_dict=train_feed_dict)
            train_timer.toc()

            if step % self.summary_iter == 0:  # 记录summary信息
                self.writer.add_summary(summary_str, step)
                
                if step % (self.summary_iter * 10) == 0:   #记录当前训练的模型信息
                    [loss_train] = self.sess.run([self.net.total_loss],feed_dict=train_feed_dict)
                    [loss_val] = self.sess.run([self.net.total_loss],feed_dict=val_feed_dict)
                    log_str = '''{} Epoch: {}, Step: {}, Learning rate: {}, Loss-train: {:5.3f}, 
                        Loss-val: {:5.3f}, Speed: {:.3f}s/iter, Load: {:.3f}s/iter, Remain: {}
                        '''.format(
                        datetime.datetime.now().strftime('%m-%d %H:%M:%S'), self.data.epoch,
                        int(step), round(self.learning_rate.eval(session=self.sess), 6),
                        loss_train, loss_val,
                        train_timer.average_time, load_timer.average_time,
                        train_timer.remain(step, self.max_iter))
                    print(log_str)

            if step % self.save_iter == 0:   #保留检查点,以供测试时用
                print('{} Saving checkpoint file to: {}'.format(datetime.datetime.now().strftime('%m-%d %H:%M:%S'), self.output_dir))
                self.saver.save( self.sess, self.ckpt_file, global_step=self.global_step)
                print("save done!!!")
class JsonDatasetRel(object):
    """A class representing a COCO json dataset."""
    def __init__(self, name):
        assert name in DATASETS.keys(), \
            'Unknown dataset name: {}'.format(name)
        assert os.path.exists(DATASETS[name][IM_DIR]), \
            'Image directory \'{}\' not found'.format(DATASETS[name][IM_DIR])
        assert os.path.exists(DATASETS[name][ANN_FN]), \
            'Annotation file \'{}\' not found'.format(DATASETS[name][ANN_FN])
        logger.debug('Creating: {}'.format(name))
        self.name = name
        self.image_directory = DATASETS[name][IM_DIR]
        self.image_prefix = ('' if IM_PREFIX not in DATASETS[name] else
                             DATASETS[name][IM_PREFIX])
        self.COCO = COCO(DATASETS[name][ANN_FN])
        self.debug_timer = Timer()
        # Set up dataset classes
        category_ids = self.COCO.getCatIds()
        categories = [c['name'] for c in self.COCO.loadCats(category_ids)]
        self.category_to_id_map = dict(zip(categories, category_ids))
        self.classes = ['__background__'] + categories
        self.num_classes = len(self.classes)
        self.json_category_id_to_contiguous_id = {
            v: i + 1
            for i, v in enumerate(self.COCO.getCatIds())
        }
        self.contiguous_category_id_to_json_id = {
            v: k
            for k, v in self.json_category_id_to_contiguous_id.items()
        }
        self._init_keypoints()

        assert ANN_FN2 in DATASETS[name] and ANN_FN3 in DATASETS[name]
        with open(DATASETS[name][ANN_FN2]) as f:
            self.rel_anns = json.load(f)
        with open(DATASETS[name][ANN_FN3]) as f:
            prd_categories = json.load(f)
        self.obj_classes = self.classes[1:]  # excludes background for now
        self.num_obj_classes = len(self.obj_classes)
        # self.prd_classes = ['__background__'] + prd_categories
        self.prd_classes = prd_categories  # excludes background for now
        self.num_prd_classes = len(self.prd_classes)

    @property
    def cache_path(self):
        cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache'))
        if not os.path.exists(cache_path):
            os.makedirs(cache_path)
        return cache_path

    @property
    def valid_cached_keys(self):
        """ Can load following key-ed values from the cached roidb file

        'image'(image path) and 'flipped' values are already filled on _prep_roidb_entry,
        so we don't need to overwrite it again.
        """
        keys = [
            'dataset_name', 'boxes', 'segms', 'gt_classes', 'seg_areas',
            'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'sbj_gt_boxes',
            'sbj_gt_classes', 'obj_gt_boxes', 'obj_gt_classes',
            'prd_gt_classes', 'sbj_gt_overlaps', 'obj_gt_overlaps',
            'prd_gt_overlaps', 'pair_to_gt_ind_map'
        ]
        if self.keypoints is not None:
            keys += ['gt_keypoints', 'has_visible_keypoints']
        return keys

    def get_roidb(self,
                  gt=False,
                  proposal_file=None,
                  min_proposal_size=2,
                  proposal_limit=-1,
                  crowd_filter_thresh=0):
        """Return an roidb corresponding to the json dataset. Optionally:
           - include ground truth boxes in the roidb
           - add proposals specified in a proposals file
           - filter proposals based on a minimum side length
           - filter proposals that intersect with crowd regions
        """
        assert gt is True or crowd_filter_thresh == 0, \
            'Crowd filter threshold must be 0 if ground-truth annotations ' \
            'are not included.'
        image_ids = self.COCO.getImgIds()
        image_ids.sort()
        if cfg.DEBUG:
            roidb = copy.deepcopy(self.COCO.loadImgs(image_ids))[:100]
        else:
            roidb = copy.deepcopy(self.COCO.loadImgs(image_ids))
        new_roidb = []
        for entry in roidb:
            # In OpenImages_v4, the detection-annotated images are more than relationship
            # annotated images, hence the need to check
            if entry['file_name'] in self.rel_anns:
                self._prep_roidb_entry(entry)
                new_roidb.append(entry)
        roidb = new_roidb
        if gt:
            # Include ground-truth object annotations
            cache_filepath = os.path.join(self.cache_path,
                                          self.name + '_rel_gt_roidb.pkl')
            if os.path.exists(cache_filepath) and not cfg.DEBUG:
                self.debug_timer.tic()
                self._add_gt_from_cache(roidb, cache_filepath)
                logger.debug('_add_gt_from_cache took {:.3f}s'.format(
                    self.debug_timer.toc(average=False)))
            else:
                self.debug_timer.tic()
                for entry in roidb:
                    self._add_gt_annotations(entry)
                logger.debug('_add_gt_annotations took {:.3f}s'.format(
                    self.debug_timer.toc(average=False)))
                if not cfg.DEBUG:
                    with open(cache_filepath, 'wb') as fp:
                        pickle.dump(roidb, fp, pickle.HIGHEST_PROTOCOL)
                    logger.info('Cache ground truth roidb to %s',
                                cache_filepath)
        if proposal_file is not None:
            # Include proposals from a file
            self.debug_timer.tic()
            self._add_proposals_from_file(roidb, proposal_file,
                                          min_proposal_size, proposal_limit,
                                          crowd_filter_thresh)
            logger.debug('_add_proposals_from_file took {:.3f}s'.format(
                self.debug_timer.toc(average=False)))
        _add_class_assignments(roidb)
        return roidb

    def _prep_roidb_entry(self, entry):
        """Adds empty metadata fields to an roidb entry."""
        # Reference back to the parent dataset
        entry['dataset'] = self
        # Make file_name an abs path
        im_path = os.path.join(self.image_directory,
                               self.image_prefix + entry['file_name'])
        assert os.path.exists(im_path), 'Image \'{}\' not found'.format(
            im_path)
        entry['image'] = im_path
        entry['flipped'] = False
        entry['has_visible_keypoints'] = False
        # Empty placeholders
        entry['boxes'] = np.empty((0, 4), dtype=np.float32)
        entry['segms'] = []
        entry['gt_classes'] = np.empty((0), dtype=np.int32)
        entry['seg_areas'] = np.empty((0), dtype=np.float32)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(
            np.empty((0, self.num_classes), dtype=np.float32))
        entry['is_crowd'] = np.empty((0), dtype=np.bool)
        # 'box_to_gt_ind_map': Shape is (#rois). Maps from each roi to the index
        # in the list of rois that satisfy np.where(entry['gt_classes'] > 0)
        entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32)
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.empty((0, 3, self.num_keypoints),
                                             dtype=np.int32)
        # Remove unwanted fields that come from the json file (if they exist)
        for k in ['date_captured', 'url', 'license']:
            if k in entry:
                del entry[k]

        entry['dataset_name'] = ''

        # add relationship annotations
        # sbj
        entry['sbj_gt_boxes'] = np.empty((0, 4), dtype=np.float32)
        entry['sbj_gt_classes'] = np.empty((0), dtype=np.int32)
        entry['sbj_gt_overlaps'] = scipy.sparse.csr_matrix(
            np.empty((0, self.num_obj_classes), dtype=np.float32))
        # entry['sbj_box_to_gt_ind_map'] = np.empty((0), dtype=np.int32)
        # obj
        entry['obj_gt_boxes'] = np.empty((0, 4), dtype=np.float32)
        entry['obj_gt_classes'] = np.empty((0), dtype=np.int32)
        entry['obj_gt_overlaps'] = scipy.sparse.csr_matrix(
            np.empty((0, self.num_obj_classes), dtype=np.float32))
        # entry['obj_box_to_gt_ind_map'] = np.empty((0), dtype=np.int32)
        # prd
        entry['prd_gt_classes'] = np.empty((0), dtype=np.int32)
        entry['prd_gt_overlaps'] = scipy.sparse.csr_matrix(
            np.empty((0, self.num_prd_classes), dtype=np.float32))
        entry['pair_to_gt_ind_map'] = np.empty((0), dtype=np.int32)

    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                # valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs),
                              dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros((num_valid_objs, self.num_classes),
                               dtype=entry['gt_overlaps'].dtype)
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros((num_valid_objs),
                                     dtype=entry['box_to_gt_ind_map'].dtype)
        if self.keypoints is not None:
            gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints),
                                    dtype=entry['gt_keypoints'].dtype)

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(entry['gt_keypoints'],
                                              gt_keypoints,
                                              axis=0)
            entry['has_visible_keypoints'] = im_has_visible_keypoints

        entry['dataset_name'] = self.name

        # add relationship annotations
        im_rels = self.rel_anns[entry['file_name']]
        sbj_gt_boxes = np.zeros((len(im_rels), 4),
                                dtype=entry['sbj_gt_boxes'].dtype)
        obj_gt_boxes = np.zeros((len(im_rels), 4),
                                dtype=entry['obj_gt_boxes'].dtype)
        sbj_gt_classes = np.zeros(len(im_rels),
                                  dtype=entry['sbj_gt_classes'].dtype)
        obj_gt_classes = np.zeros(len(im_rels),
                                  dtype=entry['obj_gt_classes'].dtype)
        prd_gt_classes = np.zeros(len(im_rels),
                                  dtype=entry['prd_gt_classes'].dtype)
        for ix, rel in enumerate(im_rels):
            # sbj
            sbj_gt_box = box_utils_rel.y1y2x1x2_to_x1y1x2y2(
                rel['subject']['bbox'])
            sbj_gt_boxes[ix] = sbj_gt_box
            sbj_gt_classes[ix] = rel['subject'][
                'category']  # excludes background
            # obj
            obj_gt_box = box_utils_rel.y1y2x1x2_to_x1y1x2y2(
                rel['object']['bbox'])
            obj_gt_boxes[ix] = obj_gt_box
            obj_gt_classes[ix] = rel['object'][
                'category']  # excludes background
            # prd
            prd_gt_classes[ix] = rel['predicate']  # exclude background
        entry['sbj_gt_boxes'] = np.append(entry['sbj_gt_boxes'],
                                          sbj_gt_boxes,
                                          axis=0)
        entry['obj_gt_boxes'] = np.append(entry['obj_gt_boxes'],
                                          obj_gt_boxes,
                                          axis=0)
        entry['sbj_gt_classes'] = np.append(entry['sbj_gt_classes'],
                                            sbj_gt_classes)
        entry['obj_gt_classes'] = np.append(entry['obj_gt_classes'],
                                            obj_gt_classes)
        entry['prd_gt_classes'] = np.append(entry['prd_gt_classes'],
                                            prd_gt_classes)
        # misc
        sbj_gt_overlaps = np.zeros((len(im_rels), self.num_obj_classes),
                                   dtype=entry['sbj_gt_overlaps'].dtype)
        for ix in range(len(im_rels)):
            sbj_cls = sbj_gt_classes[ix]
            sbj_gt_overlaps[ix, sbj_cls] = 1.0
        entry['sbj_gt_overlaps'] = np.append(
            entry['sbj_gt_overlaps'].toarray(), sbj_gt_overlaps, axis=0)
        entry['sbj_gt_overlaps'] = scipy.sparse.csr_matrix(
            entry['sbj_gt_overlaps'])

        obj_gt_overlaps = np.zeros((len(im_rels), self.num_obj_classes),
                                   dtype=entry['obj_gt_overlaps'].dtype)
        for ix in range(len(im_rels)):
            obj_cls = obj_gt_classes[ix]
            obj_gt_overlaps[ix, obj_cls] = 1.0
        entry['obj_gt_overlaps'] = np.append(
            entry['obj_gt_overlaps'].toarray(), obj_gt_overlaps, axis=0)
        entry['obj_gt_overlaps'] = scipy.sparse.csr_matrix(
            entry['obj_gt_overlaps'])

        prd_gt_overlaps = np.zeros((len(im_rels), self.num_prd_classes),
                                   dtype=entry['prd_gt_overlaps'].dtype)
        pair_to_gt_ind_map = np.zeros((len(im_rels)),
                                      dtype=entry['pair_to_gt_ind_map'].dtype)
        for ix in range(len(im_rels)):
            prd_cls = prd_gt_classes[ix]
            prd_gt_overlaps[ix, prd_cls] = 1.0
            pair_to_gt_ind_map[ix] = ix
        entry['prd_gt_overlaps'] = np.append(
            entry['prd_gt_overlaps'].toarray(), prd_gt_overlaps, axis=0)
        entry['prd_gt_overlaps'] = scipy.sparse.csr_matrix(
            entry['prd_gt_overlaps'])
        entry['pair_to_gt_ind_map'] = np.append(entry['pair_to_gt_ind_map'],
                                                pair_to_gt_ind_map)

        for k in ['file_name']:
            if k in entry:
                del entry[k]

    def _add_gt_from_cache(self, roidb, cache_filepath):
        """Add ground truth annotation metadata from cached file."""
        logger.info('Loading cached gt_roidb from %s', cache_filepath)
        with open(cache_filepath, 'rb') as fp:
            cached_roidb = pickle.load(fp)

        assert len(roidb) == len(cached_roidb)

        for entry, cached_entry in zip(roidb, cached_roidb):
            values = [cached_entry[key] for key in self.valid_cached_keys]
            dataset_name, boxes, segms, gt_classes, seg_areas, gt_overlaps, is_crowd, box_to_gt_ind_map, \
                sbj_gt_boxes, sbj_gt_classes, obj_gt_boxes, obj_gt_classes, prd_gt_classes, \
                sbj_gt_overlaps, obj_gt_overlaps, prd_gt_overlaps, pair_to_gt_ind_map = values[:len(self.valid_cached_keys)]
            if self.keypoints is not None:
                gt_keypoints, has_visible_keypoints = values[
                    len(self.valid_cached_keys):]
            entry['dataset_name'] = dataset_name
            entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
            entry['segms'].extend(segms)
            entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
            entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
            entry['gt_overlaps'] = scipy.sparse.csr_matrix(gt_overlaps)
            entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
            entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                                   box_to_gt_ind_map)
            if self.keypoints is not None:
                entry['gt_keypoints'] = np.append(entry['gt_keypoints'],
                                                  gt_keypoints,
                                                  axis=0)
                entry['has_visible_keypoints'] = has_visible_keypoints

            # add relationship annotations
            entry['sbj_gt_boxes'] = np.append(entry['sbj_gt_boxes'],
                                              sbj_gt_boxes,
                                              axis=0)
            entry['sbj_gt_classes'] = np.append(entry['sbj_gt_classes'],
                                                sbj_gt_classes)
            entry['sbj_gt_overlaps'] = scipy.sparse.csr_matrix(sbj_gt_overlaps)
            entry['obj_gt_boxes'] = np.append(entry['obj_gt_boxes'],
                                              obj_gt_boxes,
                                              axis=0)
            entry['obj_gt_classes'] = np.append(entry['obj_gt_classes'],
                                                obj_gt_classes)
            entry['obj_gt_overlaps'] = scipy.sparse.csr_matrix(obj_gt_overlaps)
            entry['prd_gt_classes'] = np.append(entry['prd_gt_classes'],
                                                prd_gt_classes)
            entry['prd_gt_overlaps'] = scipy.sparse.csr_matrix(prd_gt_overlaps)
            entry['pair_to_gt_ind_map'] = np.append(
                entry['pair_to_gt_ind_map'], pair_to_gt_ind_map)

    def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size,
                                 top_k, crowd_thresh):
        """Add proposals from a proposals file to an roidb."""
        logger.info('Loading proposals from: {}'.format(proposal_file))
        with open(proposal_file, 'r') as f:
            proposals = pickle.load(f)
        id_field = 'indexes' if 'indexes' in proposals else 'ids'  # compat fix
        _sort_proposals(proposals, id_field)
        box_list = []
        for i, entry in enumerate(roidb):
            if i % 2500 == 0:
                logger.info(' {:d}/{:d}'.format(i + 1, len(roidb)))
            boxes = proposals['boxes'][i]
            # Sanity check that these boxes are for the correct image id
            assert entry['id'] == proposals[id_field][i]
            # Remove duplicate boxes and very small boxes and then take top k
            boxes = box_utils.clip_boxes_to_image(boxes, entry['height'],
                                                  entry['width'])
            keep = box_utils.unique_boxes(boxes)
            boxes = boxes[keep, :]
            keep = box_utils.filter_small_boxes(boxes, min_proposal_size)
            boxes = boxes[keep, :]
            if top_k > 0:
                boxes = boxes[:top_k, :]
            box_list.append(boxes)
        _merge_proposal_boxes_into_roidb(roidb, box_list)
        if crowd_thresh > 0:
            _filter_crowd_proposals(roidb, crowd_thresh)

    def _init_keypoints(self):
        """Initialize COCO keypoint information."""
        self.keypoints = None
        self.keypoint_flip_map = None
        self.keypoints_to_id_map = None
        self.num_keypoints = 0
        # Thus far only the 'person' category has keypoints
        if 'person' in self.category_to_id_map:
            cat_info = self.COCO.loadCats([self.category_to_id_map['person']])
        else:
            return

        # Check if the annotations contain keypoint data or not
        if 'keypoints' in cat_info[0]:
            keypoints = cat_info[0]['keypoints']
            self.keypoints_to_id_map = dict(
                zip(keypoints, range(len(keypoints))))
            self.keypoints = keypoints
            self.num_keypoints = len(keypoints)
            if cfg.KRCNN.NUM_KEYPOINTS != -1:
                assert cfg.KRCNN.NUM_KEYPOINTS == self.num_keypoints, \
                    "number of keypoints should equal when using multiple datasets"
            else:
                cfg.KRCNN.NUM_KEYPOINTS = self.num_keypoints
            self.keypoint_flip_map = {
                'left_eye': 'right_eye',
                'left_ear': 'right_ear',
                'left_shoulder': 'right_shoulder',
                'left_elbow': 'right_elbow',
                'left_wrist': 'right_wrist',
                'left_hip': 'right_hip',
                'left_knee': 'right_knee',
                'left_ankle': 'right_ankle'
            }

    def _get_gt_keypoints(self, obj):
        """Return ground truth keypoints."""
        if 'keypoints' not in obj:
            return None
        kp = np.array(obj['keypoints'])
        x = kp[0::3]  # 0-indexed x coordinates
        y = kp[1::3]  # 0-indexed y coordinates
        # 0: not labeled; 1: labeled, not inside mask;
        # 2: labeled and inside mask
        v = kp[2::3]
        num_keypoints = len(obj['keypoints']) / 3
        assert num_keypoints == self.num_keypoints
        gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32)
        for i in range(self.num_keypoints):
            gt_kps[0, i] = x[i]
            gt_kps[1, i] = y[i]
            gt_kps[2, i] = v[i]
        return gt_kps
Beispiel #40
0
    def train_model(self, max_iters):
        """Network training loop."""
        last_snapshot_iter = -1
        timer = Timer()
        model_paths = []
        net = self.solver.net


        #def gen_data(t=0):
        rpn_loss_cls = 0
        rpn_loss_bbox = 0
        frcn_loss_cls = 0
        frcn_loss_bbox = 0
        accuarcy=0
        while self.solver.iter < max_iters:
            # Make one SGD update
            t = self.solver.iter

            timer.tic()
            self.solver.step(1)
            timer.toc()
            rpn_loss_cls += net.blobs['rpn_cls_loss'].data
            rpn_loss_bbox += net.blobs['rpn_loss_bbox'].data
            frcn_loss_cls += net.blobs['loss_cls'].data
            frcn_loss_bbox += net.blobs['loss_bbox'].data
            accuarcy+=net.blobs['accuarcy'].data
            if self.solver.iter % (10 * self.solver_param.display) == 0:
                print 'speed: {:.3f}s / iter'.format(timer.average_time)

            if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = self.solver.iter
                model_paths.append(self.snapshot())
            if self.solver.iter % cfg.TRAIN.DRAW_ITERS == 0:
                #yield t, rpn_loss_cls  / cfg.TRAIN.DRAW_ITERS ,rpn_loss_bbox  / cfg.TRAIN.DRAW_ITERS, frcn_loss_cls  / cfg.TRAIN.DRAW_ITERS ,frcn_loss_bbox  / cfg.TRAIN.DRAW_ITERS,accuarcy / cfg.TRAIN.DRAW_ITERS
                rpn_loss_cls = 0
                rpn_loss_bbox = 0
                frcn_loss_cls = 0
                frcn_loss_bbox = 0
                accuarcy=0
		if self.solver.iter==max_iters:
	            time.sleep(5)
		    #plt.close(fig)
        '''
        def init1():
            ax1.set_ylim(0,1)
            ax1.set_xlim(0,100)
            ax2.set_ylim(0,1)
            ax2.set_xlim(0,100)
            ax3.set_ylim(0,1)
            ax3.set_xlim(0,100)
            ax4.set_ylim(0,1)
            ax4.set_xlim(0,100)
            ax5.set_ylim(0,1)
            ax5.set_xlim(0,100)
            del xdata[:]
            del ydata1[:]
            del ydata2[:]
            del ydata3[:]
            del ydata4[:]
            del ydata5[:]
            line.set_data(xdata,ydata1)
            line2.set_data(xdata,ydata2)
            line3.set_data(xdata,ydata3)
            line4.set_data(xdata,ydata4)
            line5.set_data(xdata,ydata5)
            return line,line2,line3,line4,line5
        fig = plt.figure()
        ax1 = fig.add_subplot(5,1,1)
        ax1.set_title("RPN cls loss")
        ax2 = fig.add_subplot(5,1,2)
        ax2.set_title("RPN bbox loss")
        ax3 = fig.add_subplot(5,1,3)
        ax3.set_title("FRCN cls loss")
        ax4 = fig.add_subplot(5,1,4)
        ax4.set_title("FRCN bbox loss")
        ax5 = fig.add_subplot(5,1,5)
        ax5.set_title("ACCUARCY")
        line, = ax1.plot([], [], lw=1)
        line2, = ax2.plot([], [], lw=1)
        line3, = ax3.plot([], [], lw=1)
        line4, = ax4.plot([], [], lw=1)
        line5, = ax5.plot([], [], lw=1)
        ax1.grid()
        ax2.grid()
        ax3.grid()
        ax4.grid()
        ax5.grid()
        xdata, ydata1,ydata2,ydata3,ydata4,ydata5 =[], [], [], [], [], []
        def run1(data):
            t,y1,y2,y3,y4,y5 = data
            xdata.append(t)
            ydata1.append(y1)
            ydata2.append(y2)
            ydata3.append(y3)
            ydata4.append(y4)
            ydata5.append(y5)
            xmin, xmax = ax1.get_xlim()
            if t >= xmax:
                ax1.set_xlim(xmin,2*xmax)
                ax1.figure.canvas.draw()
                ax2.set_xlim(xmin,2*xmax)
                ax2.figure.canvas.draw()
                ax3.set_xlim(xmin,2*xmax)
                ax3.figure.canvas.draw()
                ax4.set_xlim(xmin,2*xmax)
                ax4.figure.canvas.draw()
                ax5.set_xlim(xmin,2*xmax)
                ax5.figure.canvas.draw()

            line.set_data(xdata,ydata1)
            line2.set_data(xdata,ydata2)
            line3.set_data(xdata,ydata3)
            line4.set_data(xdata,ydata4)
            line5.set_data(xdata,ydata5)
            return line, line2, line3, line4,line5
        ani = animation.FuncAnimation(fig, run1, gen_data, blit=False, interval=10,
                                     repeat=False, init_func=init1)
        plt.show()
        '''
        if last_snapshot_iter != self.solver.iter:
            model_paths.append(self.snapshot())
        return model_paths
Beispiel #41
0
def test_net(net, imdb):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # heuristic: keep an average of 40 detections per class per images prior
    # to NMS
    max_per_set = 40 * num_images
    # heuristic: keep at most 100 detection per class per image prior to NMS
    max_per_image = 100
    # detection thresold for each class (this is adaptively set based on the
    # max_per_set constraint)
    thresh = -np.inf * np.ones(imdb.num_classes)
    # top_scores will hold one minheap of scores per class (used to enforce
    # the max_per_set constraint)
    top_scores = [[] for _ in xrange(imdb.num_classes)]
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    output_dir = get_output_dir(imdb, net)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    roidb = imdb.roidb
    for i in xrange(num_images):
        im = cv2.imread(imdb.image_path_at(i))
        _t['im_detect'].tic()
        scores, boxes = im_detect(net, im, roidb[i]['boxes'])
        _t['im_detect'].toc()

        _t['misc'].tic()
        for j in xrange(1, imdb.num_classes):
            inds = np.where((scores[:, j] > thresh[j])
                            & (roidb[i]['gt_classes'] == 0))[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            top_inds = np.argsort(-cls_scores)[:max_per_image]
            cls_scores = cls_scores[top_inds]
            cls_boxes = cls_boxes[top_inds, :]
            # push new scores onto the minheap
            for val in cls_scores:
                heapq.heappush(top_scores[j], val)
            # if we've collected more than the max number of detection,
            # then pop items off the minheap and update the class threshold
            if len(top_scores[j]) > max_per_set:
                while len(top_scores[j]) > max_per_set:
                    heapq.heappop(top_scores[j])
                thresh[j] = top_scores[j][0]

            all_boxes[j][i] = \
                    np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)

            if 0:
                keep = nms(all_boxes[j][i], 0.3)
                vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :])
        _t['misc'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)

    for j in xrange(1, imdb.num_classes):
        for i in xrange(num_images):
            inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0]
            all_boxes[j][i] = all_boxes[j][i][inds, :]

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Applying NMS to all detections'
    nms_dets = apply_nms(all_boxes, cfg.TEST.NMS)

    print 'Evaluating detections'
    imdb.evaluate_detections(nms_dets, output_dir)
Beispiel #42
0
        caffe.set_mode_cpu()
    else:
        caffe.set_mode_gpu()
        caffe.set_device(args.gpu_id)
        cfg.GPU_ID = args.gpu_id
    net = caffe.Net(prototxt, caffemodel, caffe.TEST)

    print '\n\nLoaded network {:s}'.format(caffemodel)

    # Warmup on a dummy image
    im = 128 * np.ones((300, 500, 3), dtype=np.uint8)
    for i in xrange(2):
        _, _ = im_detect(net, im)

    images = sorted(glob.glob(os.path.join(args.frames_dir, '*')))
    print("Processing {}: {} files... ".format(args.frames_dir, len(images))),
    sys.stdout.flush

    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)

    timer = Timer()
    timer.tic()
    for image in images:
        if args.debug:
            print("Processing file {}".format(image))
        detection(net, image)
    timer.toc()
    print "{:.2f} min, {:.2f} fps".format(
        (timer.total_time) / 60., 1. * len(images) / (timer.total_time))
Beispiel #43
0
def main():
    opt = TrainOptions()
    args = opt.initialize()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU
    _t = {'iter time': Timer()}

    model_name = args.source + '_to_' + args.target
    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)
        os.makedirs(os.path.join(args.snapshot_dir, 'logs'))
    opt.print_options(args)

    sourceloader, targetloader = CreateSrcDataLoader(
        args), CreateTrgDataLoader(args)
    sourceloader_iter, targetloader_iter = iter(sourceloader), iter(
        targetloader)

    pseudotrgloader = CreatePseudoTrgLoader(args)
    pseudoloader_iter = iter(pseudotrgloader)

    model, optimizer = CreateModel(args)

    start_iter = 0
    if args.restore_from is not None:
        start_iter = int(args.restore_from.rsplit('/', 1)[1].rsplit('_')[1])
    if args.restore_optim_from is not None:
        optimizer.load_state_dict(torch.load(args.restore_optim_from))
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()

    cudnn.enabled = True
    cudnn.benchmark = True

    model.train()
    model.cuda()

    wandb.watch(model, log='gradient', log_freq=1)

    # losses to log
    loss = ['loss_seg_src', 'loss_seg_psu']
    loss_train = 0.0
    loss_val = 0.0
    loss_pseudo = 0.0
    loss_train_list = []
    loss_val_list = []
    loss_pseudo_list = []

    mean_img = torch.zeros(1, 1)
    class_weights = Variable(CS_weights).cuda()

    _t['iter time'].tic()
    for i in range(start_iter, args.num_steps):

        model.adjust_learning_rate(args, optimizer, i)  # adjust learning rate
        optimizer.zero_grad()  # zero grad

        src_img, src_lbl, _, _ = sourceloader_iter.next()  # new batch source
        trg_img, trg_lbl, _, _ = targetloader_iter.next()  # new batch target
        psu_img, psu_lbl, _, _ = pseudoloader_iter.next()

        scr_img_copy = src_img.clone()

        if mean_img.shape[-1] < 2:
            B, C, H, W = src_img.shape
            mean_img = IMG_MEAN.repeat(B, 1, H, W)

        #-------------------------------------------------------------------#

        # 1. source to target, target to target
        src_in_trg = FDA_source_to_target(src_img, trg_img,
                                          L=args.LB)  # src_lbl
        trg_in_trg = trg_img

        # 2. subtract mean
        src_img = src_in_trg.clone() - mean_img  # src_1, trg_1, src_lbl
        trg_img = trg_in_trg.clone() - mean_img  # trg_1, trg_0, trg_lbl
        psu_img = psu_img.clone() - mean_img

        #-------------------------------------------------------------------#

        # evaluate and update params #####
        src_img, src_lbl = Variable(src_img).cuda(), Variable(
            src_lbl.long()).cuda()  # to gpu
        src_seg_score = model(src_img,
                              lbl=src_lbl,
                              weight=class_weights,
                              ita=args.ita)  # forward pass
        loss_seg_src = model.loss_seg  # get loss
        loss_ent_src = model.loss_ent

        # use pseudo label as supervision
        psu_img, psu_lbl = Variable(psu_img).cuda(), Variable(
            psu_lbl.long()).cuda()
        psu_seg_score = model(psu_img,
                              lbl=psu_lbl,
                              weight=class_weights,
                              ita=args.ita)
        loss_seg_psu = model.loss_seg
        loss_ent_psu = model.loss_ent

        loss_all = loss_seg_src + (loss_seg_psu + args.entW * loss_ent_psu
                                   )  # loss of seg on src, and ent on s and t
        loss_all.backward()
        optimizer.step()

        loss_train += loss_seg_src.detach().cpu().numpy()
        loss_val += loss_seg_psu.detach().cpu().numpy()

        if (i + 1) % args.save_pred_every == 0:
            print('taking snapshot ...')
            torch.save(
                model.state_dict(),
                os.path.join(args.snapshot_dir,
                             '%s_' % (args.source) + str(i + 1) + '.pth'))
            torch.save(
                optimizer.state_dict(),
                os.path.join(args.snapshot_dir_optim,
                             '%s_' % (args.source) + '.pth'))
            wandb.log({
                "src seg loss": loss_seg_src.data,
                "psu seg loss": loss_seg_psu.data,
                "learnign rate": optimizer.param_groups[0]['lr'] * 10000
            })
        if (i + 1) % args.print_freq == 0:
            _t['iter time'].toc(average=False)
            print('[it %d][src seg loss %.4f][psu seg loss %.4f][lr %.4f][%.2fs]' % \
                    (i + 1, loss_seg_src.data, loss_seg_psu.data, optimizer.param_groups[0]['lr']*10000, _t['iter time'].diff) )

            sio.savemat(args.tempdata, {
                'src_img': src_img.cpu().numpy(),
                'trg_img': trg_img.cpu().numpy()
            })

            loss_train /= args.print_freq
            loss_val /= args.print_freq
            loss_train_list.append(loss_train)
            loss_val_list.append(loss_val)
            sio.savemat(args.matname, {
                'loss_train': loss_train_list,
                'loss_val': loss_val_list
            })
            loss_train = 0.0
            loss_val = 0.0

            if i + 1 > args.num_steps_stop:
                print('finish training')
                break
            _t['iter time'].tic()
Beispiel #44
0
    def train_model(self, sess, max_iters):
        # Build data layers for both training and validation set
        self.data_layer = RoIDataLayer(self.roidb, self.imdb.num_classes)
        self.data_layer_val = RoIDataLayer(self.valroidb,
                                           self.imdb.num_classes,
                                           random=True)

        # Construct the computation graph
        lr, train_op = self.construct_graph(sess)

        # Find previous snapshots if there is any to restore from
        lsf, nfiles, sfiles = self.find_previous()

        # Initialize the variables or restore them from the last snapshot
        if lsf == 0:
            rate, last_snapshot_iter, stepsizes, np_paths, ss_paths = self.initialize(
                sess)
        else:
            rate, last_snapshot_iter, stepsizes, np_paths, ss_paths = self.restore(
                sess, str(sfiles[-1]), str(nfiles[-1]))
        timer = Timer()
        iter = last_snapshot_iter + 1
        last_summary_time = time.time()
        # Make sure the lists are not empty
        stepsizes.append(max_iters)
        stepsizes.reverse()
        next_stepsize = stepsizes.pop()
        while iter < max_iters + 1:
            # Learning rate
            if iter == next_stepsize + 1:
                # Add snapshot here before reducing the learning rate
                self.snapshot(sess, iter)
                rate *= cfg.TRAIN.GAMMA
                sess.run(tf.assign(lr, rate))
                next_stepsize = stepsizes.pop()

            timer.tic()
            # Get training data, one batch at a time
            blobs = self.data_layer.forward()

            now = time.time()
            if iter == 1 or now - last_summary_time > cfg.TRAIN.SUMMARY_INTERVAL:
                # Compute the graph with summary
                rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss, summary = \
                  self.net.train_step_with_summary(sess, blobs, train_op)
                self.writer.add_summary(summary, float(iter))
                # Also check the summary on the validation set
                # todo: wn to modify
                blobs_val = self.data_layer_val.forward()
                summary_val = self.net.get_summary(sess, blobs_val)
                self.valwriter.add_summary(summary_val, float(iter))
                last_summary_time = now
            else:
                # Compute the graph without summary
                rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss = \
                  self.net.train_step(sess, blobs, train_op)
            timer.toc()

            # Display training information
            if iter % (cfg.TRAIN.DISPLAY) == 0:
                print('iter: %d / %d, total loss: %.6f\n >>> rpn_loss_cls: %.6f\n '
                      '>>> rpn_loss_box: %.6f\n >>> loss_cls: %.6f\n >>> loss_box: %.6f\n >>> lr: %f' % \
                      (iter, max_iters, total_loss, rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, lr.eval()))
                print('speed: {:.3f}s / iter'.format(timer.average_time))

            # Snapshotting
            if iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = iter
                ss_path, np_path = self.snapshot(sess, iter)
                np_paths.append(np_path)
                ss_paths.append(ss_path)

                # Remove the old snapshots if there are too many
                if len(np_paths) > cfg.TRAIN.SNAPSHOT_KEPT:
                    self.remove_snapshot(np_paths, ss_paths)

            iter += 1

        if last_snapshot_iter != iter - 1:
            self.snapshot(sess, iter - 1)

        self.writer.close()
        self.valwriter.close()
Beispiel #45
0
    def extractObjects(self, video_path):
        import os
        import cv2
        import torch
        import numpy as np
        from torch.multiprocessing import Pool

        from darknet import Darknet19
        import utils.yolo as yolo_utils
        import utils.network as net_utils
        from utils.timer import Timer
        import cfgs.config as cfg

        def preprocess(fname):
            # return fname
            image = cv2.imread(fname)
            im_data = np.expand_dims(
                yolo_utils.preprocess_test((image, None, cfg.inp_size))[0], 0)
            return image, im_data

        # hyper-parameters
        # npz_fname = 'models/yolo-voc.weights.npz'
        # h5_fname = 'models/yolo-voc.weights.h5'
        trained_model = cfg.trained_model
        # trained_model = os.path.join(cfg.train_output_dir, 'darknet19_voc07trainval_exp3_158.h5')
        thresh = 0.5
        im_path = video_path
        # ---

        net = Darknet19()
        net_utils.load_net(trained_model, net)
        # net.load_from_npz(npz_fname)
        # net_utils.save_net(h5_fname, net)
        net.cuda()
        net.eval()
        print('load model succ...')

        t_det = Timer()
        t_total = Timer()
        # im_fnames = ['person.jpg']
        im_fnames = sorted([
            fname for fname in sorted(os.listdir(im_path))
            if os.path.splitext(fname)[-1] == '.jpg'
        ])
        im_fnames = (os.path.join(im_path, fname) for fname in im_fnames)
        objectDetect = []
        for i, (image) in enumerate(im_fnames):
            t_total.tic()
            im_data = preprocess(image)
            image = im_data[0]
            im_data = im_data[1]
            im_data = net_utils.np_to_variable(im_data,
                                               is_cuda=True,
                                               volatile=True).permute(
                                                   0, 3, 1, 2)
            t_det.tic()
            bbox_pred, iou_pred, prob_pred = net(im_data)
            det_time = t_det.toc()
            # to numpy
            bbox_pred = bbox_pred.data.cpu().numpy()
            iou_pred = iou_pred.data.cpu().numpy()
            prob_pred = prob_pred.data.cpu().numpy()

            # print bbox_pred.shape, iou_pred.shape, prob_pred.shape
            bboxes, scores, cls_inds = yolo_utils.postprocess(
                bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)
            objectDetect.append(','.join(
                set([cfg.label_names[i] for i in cls_inds])))
        return objectDetect
Beispiel #46
0
def eval_seq(opt,
             dataloader,
             data_type,
             result_filename,
             save_dir=None,
             show_image=True,
             frame_rate=30):
    '''
       Processes the video sequence given and provides the output of tracking result (write the results in video file)

       It uses JDE model for getting information about the online targets present.

       Parameters
       ----------
       opt : Namespace
             Contains information passed as commandline arguments.

       dataloader : LoadVideo
                    Instance of LoadVideo class used for fetching the image sequence and associated data.

       data_type : String
                   Type of dataset corresponding(similar) to the given video.

       result_filename : String
                         The name(path) of the file for storing results.

       save_dir : String
                  Path to the folder for storing the frames containing bounding box information (Result frames).

       show_image : bool
                    Option for shhowing individial frames during run-time.

       frame_rate : int
                    Frame-rate of the given video.

       Returns
       -------
       (Returns are not significant here)
       frame_id : int
                  Sequence number of the last sequence
       '''
    '''
    width = dataloader.vw
    height = dataloader.vh
    '''
    width = 640
    height = 480
    '''
    process = (
        ffmpeg
        #new added re
        #new added preset ultrafast (try different mode if not ok)
        .input('pipe:', format = 'rawvideo', pix_fmt = 'rgb24', s = '{}x{}'.format(width, height), re = None)
        #new added
        #.setpts('1.7*PTS')
        .output('../try.m3u8', format = 'hls', pix_fmt = 'yuv420p', vcodec = 'libx264', preset = "ultrafast", hls_time = 10, hls_list_size = 2, start_number = 0, hls_flags = 'delete_segments+append_list', hls_segment_filename = '../try_%05d.ts')
        .overwrite_output()
        .run_async(pipe_stdin = True)
    )
    '''
    track_id = 0
    if save_dir:
        mkdir_if_missing(save_dir)
    tracker = JDETracker(opt, frame_rate=frame_rate)
    timer = Timer()
    results = []
    frame_id = 0
    cv2.namedWindow('online_im')
    cv2.setMouseCallback('online_im', mouse_click)

    #ffmpeg process
    for path, img, img0 in dataloader:
        if frame_id % 20 == 0:
            logger.info('Processing frame {} ({:.2f} fps)'.format(
                frame_id, 1. / max(1e-5, timer.average_time)))

        # run tracking
        timer.tic()
        blob = torch.from_numpy(img).cuda().unsqueeze(0)
        online_targets = tracker.update(blob, img0)
        online_tlwhs = []
        online_ids = []
        for t in online_targets:
            tlwh = t.tlwh
            tid = t.track_id
            vertical = tlwh[2] / tlwh[3] > 1.6
            if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
                online_tlwhs.append(tlwh)
                online_ids.append(tid)
        timer.toc()
        # save results
        results.append((frame_id + 1, online_tlwhs, online_ids))
        if show_image or save_dir is not None:
            online_im, track_id = vis.plot_tracking(img0,
                                                    online_tlwhs,
                                                    online_ids,
                                                    frame_id=frame_id,
                                                    fps=1. /
                                                    timer.average_time,
                                                    single=single,
                                                    mouse_x=mouse_x,
                                                    mouse_y=mouse_y,
                                                    track_id=track_id)
        if show_image:
            pass
            #cv2.imshow('online_im', online_im)
            #cv2.waitKey(1)
            #plt.imshow(online_im)
            #plt.show()
        #online_im_rgb = cv2.cvtColor(online_im, cv2.COLOR_BGR2RGB)
        #write_frame(process, online_im_rgb)
        stream(online_im)
        if save_dir is not None:
            cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)),
                        online_im)
        frame_id += 1
    # save results
    write_results(result_filename, results, data_type)
    # close process
    #close_process(process)
    terminate_stream()
    return frame_id, timer.average_time, timer.calls
def test_net(save_folder, net, detector, cuda, testset, transform, max_per_image=300, thresh=0.005):

    if not os.path.exists(save_folder):
        os.mkdir(save_folder)
    # dump predictions and assoc. ground truth to text file for now
    num_images = len(testset)
    num_classes = (21, 81)[args.dataset == 'COCO']
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(num_classes)]

    _t = {'im_detect': Timer(), 'misc': Timer()}
    det_file = os.path.join(save_folder, 'detections.pkl')

    if args.retest:
        f = open(det_file,'rb')
        all_boxes = pickle.load(f)
        print('Evaluating detections')
        testset.evaluate_detections(all_boxes, save_folder)
        return


    for i in range(num_images):
        img = testset.pull_image(i)
        x = Variable(transform(img).unsqueeze(0),volatile=True)
        if cuda:
            x = x.cuda()

        _t['im_detect'].tic()
        out = net(x=x, test=True)  # forward pass
        arm_loc,arm_conf,odm_loc,odm_conf = out
        boxes, scores = detector.forward((odm_loc,odm_conf), priors,(arm_loc,arm_conf))
        detect_time = _t['im_detect'].toc()
        boxes = boxes[0]
        scores=scores[0]

        boxes = boxes.cpu().numpy()
        scores = scores.cpu().numpy()
        # scale each detection back up to the image
        scale = torch.Tensor([img.shape[1], img.shape[0],
                              img.shape[1], img.shape[0]]).cpu().numpy()
        boxes *= scale

        _t['misc'].tic()

        for j in range(1, num_classes):
            inds = np.where(scores[:, j] > thresh)[0]
            if len(inds) == 0:
                all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                continue
            c_bboxes = boxes[inds]
            c_scores = scores[inds, j]
            c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(
                np.float32, copy=False)
            if args.dataset == 'VOC':
                cpu = False
            else:
                cpu = False

            keep = nms(c_dets, 0.45, force_cpu=cpu)
            keep = keep[:50]
            c_dets = c_dets[keep, :]
            all_boxes[j][i] = c_dets
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1,num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        nms_time = _t['misc'].toc()

        if i % 20 == 0:
            print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'
                  .format(i + 1, num_images, detect_time, nms_time))
            _t['im_detect'].clear()
            _t['misc'].clear()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    if args.dataset == 'VOC':
        APs,mAP = testset.evaluate_detections(all_boxes, save_folder)
        return APs,mAP
    else:
        testset.evaluate_detections(all_boxes, save_folder)
Beispiel #48
0
opt_param = list(net.parameters())
optimizer = torch.optim.SGD(opt_param[2:],
                            lr=lr,
                            momentum=momentum,
                            weight_decay=weight_decay)

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# training
firstFlag = True
train_loss = 0
tp, tf, fg, bg = 0., 0., 0, 0
step_cnt = 0
re_cnt = False
t = Timer()
t.tic()
for step in range(start_step, end_step + 1):

    # get one batch
    blobs = data_layer.forward()
    im_data = blobs['data']
    rois = blobs['rois']
    im_info = blobs['im_info']
    gt_vec = blobs['labels']
    #gt_boxes = blobs['gt_boxes']

    # forward
    net(im_data, rois, im_info, gt_vec)
    loss = net.loss
    train_loss += loss.item()
def test_net(net,
             imdb,
             thresh=0.05,
             visualize=False,
             no_cache=False,
             output_path=None):
    """
    Testing the SSH network on a dataset
    :param net: The trained network
    :param imdb: The test imdb
    :param thresh: Detections with a probability less than this threshold are ignored
    :param visualize: Whether to visualize the detections
    :param no_cache: Whether to cache detections or not
    :param output_path: Output directory
    """
    # Initializing the timers
    print('Evaluating {} on {}'.format(net.name, imdb.name))
    timers = {'detect': Timer(), 'misc': Timer()}

    dets = [[[] for _ in xrange(len(imdb))] for _ in xrange(imdb.num_classes)]
    # NOTE: by default the detections for a given method is cached, set no_cache to disable caching!
    run_inference = True
    if not no_cache:
        output_dir = get_output_dir(imdb_name=imdb.name,
                                    net_name=net.name,
                                    output_dir=output_path)
        det_file = os.path.join(output_dir, 'detections.pkl')
        if os.path.exists(det_file) and not visualize:
            try:
                with open(det_file, 'r') as f:
                    dets = cPickle.load(f)
                    run_inference = False
                    print('Loading detections from cache: {}'.format(det_file))
            except:
                print(
                    'Could not load the cached detections file, detecting from scratch!'
                )

    # Perform inference on images if necessary
    if run_inference:
        pyramid = True if len(cfg.TEST.SCALES) > 1 else False

        for i in xrange(len(imdb)):
            im_path = imdb.image_path_at(i)
            dets[1][i], detect_time = detect(net,
                                             im_path,
                                             thresh,
                                             visualize=visualize,
                                             timers=timers,
                                             pyramid=pyramid)
            print('\r{:d}/{:d} detect-time: {:.3f}s, misc-time:{:.3f}s'.format(
                i + 1, len(imdb), timers['detect'].average_time,
                timers['misc'].average_time),
                  end='')

        det_file = os.path.join(output_dir, 'detections.pkl')
        if not no_cache:
            with open(det_file, 'wb') as f:
                cPickle.dump(dets, f, cPickle.HIGHEST_PROTOCOL)
        print('\n', end='')

    # Evaluate the detections
    print('Evaluating detections')
    imdb.evaluate_detections(all_boxes=dets,
                             output_dir=output_dir,
                             method_name=net.name)
    print('All Done!')
Beispiel #50
0
from interface import compare_vector, Detect, Reid
import cv2
import multiprocessing
from multiprocessing import Process, Queue
import threading
from threading import Lock
import time
import zmq
import sys
import signal
import datetime

from configparser import ConfigParser
# import matplotlib.pyplot as plt

_t = {'detect': Timer(), 'reid': Timer()}
r'''
    port: 11111 to receive message from system
    port: 11112 to send the result of recognition
'''


def quit(signum, frame):
    zmq_process.stop()
    # os.kill(zmq_process.pid, signal.SIGTERM)
    for i in processes:
        os.kill(i.pid, signal.SIGTERM)
    sys.exit()


class ZMQReID(threading.Thread):
Beispiel #51
0
from utils.timer import Timer
import numpy as np
import time

caffe.set_mode_gpu()
caffe.set_device(3)

#cfg_from_file("/tmp/test/submit_1019.yml")
cfg_from_file("/tmp/test/submit_0716.yml")
prototxt = "/tmp/test/weaponModel_test.prototxt"
caffemodel = "/tmp/test/weaponModel_iter_6000.caffemodel"
net = caffe.Net(prototxt, caffemodel, caffe.TEST)

im = cv2.imread("/tmp/test/test.jpg")

_t = {
    'im_preproc': Timer(),
    'im_net': Timer(),
    'im_postproc': Timer(),
    'misc': Timer()
}

scores, boxes = im_detect(net, im, _t)

for i in range(10):
    _s = time.time()
    scores, boxes = im_detect(net, im, _t)
    _e = time.time()
    print "time: %s" % (_e - _s)
    time.sleep(1)
def detect(net,
           im_path,
           thresh=0.05,
           visualize=False,
           timers=None,
           pyramid=False,
           visualization_folder=None):
    """
    Main module to detect faces
    :param net: The trained network
    :param im_path: The path to the image
    :param thresh: Detection with a less score than thresh are ignored
    :param visualize: Whether to visualize the detections
    :param timers: Timers for calculating detect time (if None new timers would be created)
    :param pyramid: Whether to use pyramid during inference
    :param visualization_folder: If set the visualizations would be saved in this folder (if visualize=True)
    :return: cls_dets (bounding boxes concatenated with scores) and the timers
    """
    if not timers:
        timers = {'detect': Timer(), 'misc': Timer()}

    im = cv2.imread(im_path)
    imfname = os.path.basename(im_path)
    sys.stdout.flush()
    timers['detect'].tic()

    if not pyramid:
        im_scale = _compute_scaling_factor(im.shape, cfg.TEST.SCALES[0],
                                           cfg.TEST.MAX_SIZE)
        im_blob = _get_image_blob(im, [im_scale])
        probs, boxes = forward_net(net, im_blob[0], im_scale, False)
        boxes = boxes[:, 0:4]
    else:
        all_probs = []
        all_boxes = []
        # Compute the scaling coefficients for the pyramid
        base_scale = _compute_scaling_factor(im.shape,
                                             cfg.TEST.PYRAMID_BASE_SIZE[0],
                                             cfg.TEST.PYRAMID_BASE_SIZE[1])
        pyramid_scales = [
            float(scale) / cfg.TEST.PYRAMID_BASE_SIZE[0] * base_scale
            for scale in cfg.TEST.SCALES
        ]

        im_blobs = _get_image_blob(im, pyramid_scales)

        for i in range(len(pyramid_scales)):
            probs, boxes = forward_net(net, im_blobs[i], pyramid_scales[i],
                                       True)
            for j in xrange(len(probs)):
                # Do not apply M3 to the largest scale
                if i < len(pyramid_scales) - 1 or j < len(probs) - 1:
                    all_boxes.append(boxes[j][:, 0:4])
                    all_probs.append(probs[j].copy())

        probs = np.concatenate(all_probs)
        boxes = np.concatenate(all_boxes)

    timers['detect'].toc()
    timers['misc'].tic()

    inds = np.where(probs[:, 0] > thresh)[0]
    probs = probs[inds, 0]
    boxes = boxes[inds, :]
    dets = np.hstack((boxes, probs[:, np.newaxis])) \
            .astype(np.float32, copy=False)
    keep = nms(dets, cfg.TEST.NMS_THRESH)
    cls_dets = dets[keep, :]
    if visualize:
        plt_name = os.path.splitext(imfname)[0] + '_detections_{}'.format(
            net.name)
        visusalize_detections(im,
                              cls_dets,
                              plt_name=plt_name,
                              visualization_folder=visualization_folder)
    timers['misc'].toc()
    return cls_dets, timers
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--job_name", type=str, default="")
    parser.add_argument("--task_index", type=int, default=0)
    parser.add_argument('--debug', default=False, type=bool)
    parser.add_argument('--stop_globalstep', default=2000, type=int)
    parser.add_argument('--checkpoint_dir', default="checkpoint_dir",type=str)
    parser.add_argument('--watch_gpu',required=True ,type=int, help="watch gpu id filled Set it the same as visible gpu id")
    parser.add_argument('--warm_up_step',default = 20, type = int)

    profiler_save_steps = cfg.PROFILER_SAVE_STEP
    summary_save_steps = cfg.SUMMARY_SAVE_STEP
    FLAGS, unparsed = parser.parse_known_args()
    
    ps_hosts = cfg.PS_HOSTS.split(",")
    worker_hosts = cfg.WORKER_HOSTS.split(",")
    
    ps_size = len(ps_hosts)
    workers_size = len(worker_hosts)

    dispipe_dir="DisPipe_"+str(workers_size)+"workers"+str(ps_size)+"ps"+"_train_logs"
    if not os.path.exists(dispipe_dir):
        os.makedirs(dispipe_dir)

    inside_bsnQnM_dir = "Dis_Pipe_"+cfg.BS_NT_MUL_PREFIX
    logrootpath = os.path.join(dispipe_dir, inside_bsnQnM_dir)
    if not os.path.exists(logrootpath):
        os.makedirs(logrootpath)

   
    fpslog_name = "DisPipe_" +"task"+str(FLAGS.task_index) +cfg.BS_NT_MUL_PREFIX+ "_fpslog.txt"
    concated_path = logrootpath + "/" + fpslog_name

    checkpoint_dir = FLAGS.checkpoint_dir
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    gpulog_name = "DisPipe" + "_task" + str(FLAGS.task_index)+"gpu"+str(FLAGS.watch_gpu)+cfg.BS_NT_MUL_PREFIX + "_gpulog.txt"

    ############
    ###########################gpulog#################################
    def start_gpulog(path, fname):
        # has to be called before start of training
        gpuinfo_path = path + "/" + fname
        with open(gpuinfo_path, 'w'):
            argument = 'timestamp,count,gpu_name,gpu_bus_id,memory.total,memory.used,utilization.gpu,utilization.memory'
        try:
            proc = subprocess.Popen(
                ['nvidia-smi --format=csv --query-gpu=%s %s %s %s' % (argument, ' -l', '-i '+ str(FLAGS.watch_gpu), '-f ' + gpuinfo_path)],shell=True)
        except KeyboardInterrupt:
            try:
                proc.kill()
            except OSError:
                pass
                proc.wait()
        return proc
    
    initial_learning_rate = cfg.LEARNING_RATE
    decay_steps = cfg.DECAY_STEPS
    decay_rate = cfg.DECAY_RATE
    staircase = cfg.STAIRCASE
    
    #os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPU
    print('Start training ...')
    
    ###############################pipeline###########################################
    tf.reset_default_graph()
    
    image_producer = Pascal_voc('train')
    
    (image, label) = image_producer.get_one_image_label_element()

    image_shape = (image_producer.image_size, image_producer.image_size, 3)  # possible value is a int number

    label_size = (image_producer.cell_size, image_producer.cell_size, 25)  # possible value is 0 or 1

    processed_queue = tf.FIFOQueue(capacity=int(image_producer.batch_size * cfg.MUL_QUEUE_BATCH),shapes = [image_shape, label_size],dtypes = [tf.float32, tf.float32],name = 'processed_queue')

    enqueue_processed_op = processed_queue.enqueue([image, label])

    num_enqueue_threads = min(image_producer.num_enqueue_threads, image_producer.gt_labels_length)

    queue_runner = tf.train.QueueRunner(processed_queue, [enqueue_processed_op] * num_enqueue_threads)
    tf.train.add_queue_runner(queue_runner)

    (images, labels) = processed_queue.dequeue_many(image_producer.batch_size)
    ##############################################################################

    #############################Parameters#######################################
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)
    with tf.device(tf.train.replica_device_setter(
        worker_device="/job:worker/task:%d" % FLAGS.task_index,cluster=cluster)):
        yolo = YOLONet(images, labels)
        # print('allocate variable and tensor successfuly')

        global_step = tf.train.get_or_create_global_step()
        learning_rate = tf.train.exponential_decay(
            initial_learning_rate, global_step, decay_steps,
            decay_rate, staircase, name='learning_rate')
        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=learning_rate)
        train_op = slim.learning.create_train_op(
            yolo.total_loss, optimizer, global_step=global_step)
    ################################################################################

    #############################loghook############################################
    profiler_hook = tf.train.ProfilerHook(save_steps=profiler_save_steps, output_dir=logrootpath, show_memory=True,show_dataflow=True)
    summary_op = tf.summary.merge_all()
    summary_hook = tf.train.SummarySaverHook(save_steps=summary_save_steps, output_dir=logrootpath, summary_op=summary_op)

    if FLAGS.debug == True:
        tensors_to_log = [global_step, yolo.total_loss]
        def formatter(curvals):
            print("Global step %d, Loss %f!" % (
                curvals[global_step], curvals[yolo.total_loss]))
        logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=100, formatter=formatter)
        hooks = [tf.train.StopAtStepHook(last_step=FLAGS.stop_globalstep), logging_hook, profiler_hook, summary_hook]
      
    else:
        hooks = [tf.train.StopAtStepHook(last_step=FLAGS.stop_globalstep), profiler_hook, summary_hook]
    
    # config.gpu_options.allocator_type = 'BFC'
    # config.gpu_options.per_process_gpu_memory_fraction = 0.8
    config = tf.ConfigProto(allow_soft_placement = True, log_device_placement=False)
    config.gpu_options.allow_growth = True
    proc = start_gpulog(logrootpath, gpulog_name)
    ################################################################################
    
    ###########################train####################################################
    with tf.train.MonitoredTrainingSession(master=server.target, is_chief=(FLAGS.task_index == 0), config=config,hooks=hooks, checkpoint_dir=FLAGS.checkpoint_dir,save_checkpoint_secs=3600) as sess:
    
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    
        start_global_step_value = sess.run(global_step)
        timer = Timer()

        iters_per_toc = 20
        txtForm = "Training speed:global step %d, local avg %f fps, global %f fps, loss %f"

        #run and log
        n = 0
        while not sess.should_stop():
            n = n + 1
            if n==FLAGS.warm_up_step:
                start_global_step_value = sess.run(global_step)
                timer.tic(global_restart=True, start_global_step_value = start_global_step_value)
            if n % iters_per_toc ==0:
                timer.tic()

            yolo_loss, global_step_value, _ = sess.run([yolo.total_loss, global_step, train_op])
            
            if n % iters_per_toc == 0:
                local_avg_fps, global_avg_fps = timer.toc(iters_per_toc, global_step_value)
                txtData = global_step_value, local_avg_fps, global_avg_fps, yolo_loss
                print(txtForm % txtData)
                with open(concated_path, 'a+') as log:
                        log.write("%d,%.4f,%.4f,%.4f\n" % txtData)
 
        coord.request_stop()
        coord.join(threads)
    
    print('Done training.')
    try:
        proc.terminate()
    except OSError:
        pass
        print("Kill subprocess failed. Kill nvidia-smi mannually")
Beispiel #54
0
        nn.Conv2d(160, anchor_num[4] * num_classes, kernel_size=3, padding=1)
    ]
    # ===================================================================================#
    loc_layers += [nn.Conv2d(160, anchor_num[5] * 4, kernel_size=3, padding=1)]
    conf_layers += [
        nn.Conv2d(160, anchor_num[5] * num_classes, kernel_size=3, padding=1)
    ]
    return (loc_layers, conf_layers)


def build_net(phase, size=300, num_classes=21):
    if phase != "test" and phase != "train":
        print("Error: Phase not recognized")
        return

    return MobileNetV3(phase, size, multibox(num_classes), num_classes)


if __name__ == '__main__':
    x = torch.randn(1, 3, 300, 300)
    net = build_net('test')
    net.eval()
    from utils.timer import Timer

    _t = {'im_detect': Timer()}
    for i in range(300):
        _t['im_detect'].tic()
        net.forward(x)
        detect_time = _t['im_detect'].toc()
        print(detect_time)
Beispiel #55
0
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False,test='01'):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)

    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    # timers
    _t = {'im_detect' : Timer(), 'misc' : Timer()}

    # results file directory: data/test_results/
    result_dir = os.path.join(cfg.DATA_DIR,'test_results')

    for i in xrange(num_images):
        # filter out any ground truth boxes

        box_proposals = None
        # generate test results file according to the rules
        sequence = imdb._image_index[i].split('/')[0]
        frame = int(imdb._image_index[i].split('/')[1].split('g')[-1])
        test_file = os.path.join(result_dir, sequence.split('_')[0] + '_' + sequence.split('_')[1] + '_Det_EB.txt')
        #print 'test_file:', test_file
        f = open(test_file, 'a')

        im = cv2.imread(imdb.image_path_at(i))

        _t['im_detect'].tic()
        print _t['im_detect'].start_time
        scores, boxes = im_detect(net, im, box_proposals)

        _t['misc'].tic()
        # skip j = 0, because it's the background class
        for j in xrange(1, imdb.num_classes):
            #print 'last number of proposal:',scores.shape
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j*4:(j+1)*4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            if vis:
                vis_detections(im, imdb.classes[j], cls_dets)
            all_boxes[j][i] = cls_dets


        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        _t['misc'].toc()
        for k in range(len(all_boxes[1][i])):

            temp = str(frame) + ',' + str(k + 1) + ',' + str(all_boxes[1][i][k][0]) + ',' + str(
                all_boxes[1][i][k][1]) + ',' + str(
                all_boxes[1][i][k][2] - all_boxes[1][i][k][0]) + ',' + str(
                all_boxes[1][i][k][3] - all_boxes[1][i][k][1]) + ',' + str(all_boxes[1][i][k][4]) + '\n'

            f.write(temp)
        f.close()
        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i + 1, num_images, _t['im_detect'].average_time,
                      _t['misc'].average_time)
Beispiel #56
0
    def train(self):

        train_timer = Timer()
        load_timer = Timer()
        sum_loss = np.zeros([cfg.MAX_ITER + 1], dtype=float)
        #plt.axis([0, cfg.MAX_ITER, cfg.AX_LOW, cfg.AX_HIGHT])
        #plt.ion()

        for step in xrange(1, self.max_iter + 1):

            load_timer.tic()
            images, labels = self.data.get()
            load_timer.toc()
            feed_dict = {self.net.images: images, self.net.labels: labels}

            if step % self.summary_iter == 0:
                if step % (self.summary_iter * 10) == 0:

                    train_timer.tic()
                    summary_str, loss, _ = self.sess.run(
                        [self.summary_op, self.net.total_loss, self.train_op],
                        feed_dict=feed_dict)
                    sum_loss[step] = loss
                    #plt.scatter(step,loss)
                    #plt.pause(0.1)
                    train_timer.toc()

                    log_str = (
                        '{} Epoch: {}, Step: {}, Learning rate: {},'
                        ' Loss: {:5.3f}\nSpeed: {:.3f}s/iter,'
                        ' Load: {:.3f}s/iter, Remain: {}').format(
                            datetime.datetime.now().strftime('%m/%d %H:%M:%S'),
                            self.data.epoch,
                            int(step) + cfg.LAST_STEP,
                            round(self.learning_rate.eval(session=self.sess),
                                  6), loss, train_timer.average_time,
                            load_timer.average_time,
                            train_timer.remain(step, self.max_iter))
                    print(log_str)
                    with open(self.train_process_save_txt, 'a') as f:
                        f.writelines(log_str + '\n')

                else:
                    train_timer.tic()
                    summary_str, loss = self.sess.run(
                        [self.summary_op, self.train_op], feed_dict=feed_dict)
                    sum_loss[step] = loss

                    train_timer.toc()
                    #print('\nb')
                    #print(summary_str)

                self.writer.add_summary(summary_str, step)

            else:
                train_timer.tic()
                loss = self.sess.run(self.train_op, feed_dict=feed_dict)
                sum_loss[step] = loss

                train_timer.toc()
                #print('q')

            if step % self.save_iter == 0:
                print('Saving checkpoint file to:{}-{}'.format(
                    self.ckpt_file, step + cfg.LAST_STEP))
                self.saver.save(self.sess,
                                self.ckpt_file,
                                global_step=self.global_step + cfg.LAST_STEP)
                with open(self.train_process_save_txt, 'a') as f:
                    f.writelines('Saving checkpoint file to:{}-{}\n'.format(
                        self.ckpt_file, step + cfg.LAST_STEP))
    def train_model(self, sess, max_iters):
        """Network training loop."""

        data_layer = get_data_layer(self.roidb, self.imdb.num_classes)

        part_features_fc7 = self.net.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc71 = self.net1.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc72 = self.net2.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc73 = self.net3.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc74 = self.net4.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc75 = self.net5.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc76 = self.net6.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc77 = self.net7.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc78 = self.net8.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc79 = self.net9.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc710 = self.net10.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc711 = self.net11.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc712 = self.net12.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc713 = self.net13.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc714 = self.net14.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc715 = self.net15.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc716 = self.net16.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc717 = self.net17.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc718 = self.net18.get_output('pool_5')[:self.proposal_number, :]
        part_features_fc719 = self.net19.get_output('pool_5')[:self.proposal_number, :]

        #print(part_features)

        # learning matrix 1
        Matrix_L1_S1 = tf.get_variable('L1_S1', [self.feature_size, self.feature_size], initializer=tf.random_normal_initializer(
                                           stddev=1 / math.sqrt(self.feature_size * self.feature_size)))
        # learning matrix 2
        Matrix_L1_S2 = tf.get_variable('L1_S2', [self.feature_size, self.feature_size], initializer=tf.random_normal_initializer(
                                           stddev=1 / math.sqrt(self.feature_size * self.feature_size)))

        # # learning matrix 3
        # Matrix_L2_S1 = tf.get_variable('L2_S1', [self.feature_size, self.feature_size], initializer=tf.random_normal_initializer(
        #                                    stddev=1 / math.sqrt(self.feature_size * self.feature_size)))

        # learning matrix 4
        #Matrix_L1_S3 = tf.get_variable('L1_S3', [self.hidden_size, self.hidden_size],
        #                               initializer=tf.random_normal_initializer(
        #                                   stddev=1 / math.sqrt(self.hidden_size * self.hidden_size)))
        ################################
        #### get the region feature ####
        ######### max pooling ##########
        ################################
        part_features_fc7 = tf.reduce_max(tf.reshape(part_features_fc7, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc71 = tf.reduce_max(tf.reshape(part_features_fc71, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc72 = tf.reduce_max(tf.reshape(part_features_fc72, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc73 = tf.reduce_max(tf.reshape(part_features_fc73, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc74 = tf.reduce_max(tf.reshape(part_features_fc74, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc75 = tf.reduce_max(tf.reshape(part_features_fc75, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc76 = tf.reduce_max(tf.reshape(part_features_fc76, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc77 = tf.reduce_max(tf.reshape(part_features_fc77, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc78 = tf.reduce_max(tf.reshape(part_features_fc78, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc79 = tf.reduce_max(tf.reshape(part_features_fc79, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc710 = tf.reduce_max(tf.reshape(part_features_fc710, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc711 = tf.reduce_max(tf.reshape(part_features_fc711, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc712 = tf.reduce_max(tf.reshape(part_features_fc712, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc713 = tf.reduce_max(tf.reshape(part_features_fc713, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc714 = tf.reduce_max(tf.reshape(part_features_fc714, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc715 = tf.reduce_max(tf.reshape(part_features_fc715, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc716 = tf.reduce_max(tf.reshape(part_features_fc716, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc717 = tf.reduce_max(tf.reshape(part_features_fc717, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc718 = tf.reduce_max(tf.reshape(part_features_fc718, [self.proposal_number, 49, 512]), axis=1)
        part_features_fc719 = tf.reduce_max(tf.reshape(part_features_fc719, [self.proposal_number, 49, 512]), axis=1)

        #######get model parts #########
        #part_features = tf.stack([part_features_fc7, part_features_fc71], axis=0)
        #part_features = tf.concat([part_features, [part_features_fc72]], axis=0)
        #part_features = tf.concat([part_features, [part_features_fc73]], axis=0)
        #part_features = tf.concat([part_features, [part_features_fc74]], axis=0)
        #part_features = tf.concat([part_features, [part_features_fc75]], axis=0)
        #part_features = tf.concat([part_features, [part_features_fc76]], axis=0)
        #part_features = tf.concat([part_features, [part_features_fc77]], axis=0)
        #part_features = tf.concat([part_features, [part_features_fc78]], axis=0)
        #part_features = tf.concat([part_features, [part_features_fc79]], axis=0)
        #part_features = tf.concat([part_features, [part_features_fc710]], axis=0)
        #part_features = tf.concat([part_features, [part_features_fc711]], axis=0)


        ######no attention ###########
        # similarity = tf.constant([[1.0 / self.proposal_number]] * self.proposal_number, dtype=tf.float32)
        # similarity1 = similarity
        # similarity2 = similarity
        # similarity3 = similarity
        # similarity4 = similarity
        # similarity5 = similarity
        # similarity6 = similarity
        # similarity7 = similarity
        # similarity8 = similarity
        # similarity9 = similarity
        # similarity10 = similarity
        # similarity11 = similarity
        # part_sum = tf.reduce_sum(tf.multiply(similarity, part_features_fc7), axis=0, keep_dims=True)
        # part_sum1 = tf.reduce_sum(tf.multiply(similarity1, part_features_fc71), axis=0, keep_dims=True)
        # part_sum2 = tf.reduce_sum(tf.multiply(similarity2, part_features_fc72), axis=0, keep_dims=True)
        # part_sum3 = tf.reduce_sum(tf.multiply(similarity3, part_features_fc73), axis=0, keep_dims=True)
        # part_sum4 = tf.reduce_sum(tf.multiply(similarity4, part_features_fc74), axis=0, keep_dims=True)
        # part_sum5 = tf.reduce_sum(tf.multiply(similarity5, part_features_fc75), axis=0, keep_dims=True)
        # part_sum6 = tf.reduce_sum(tf.multiply(similarity6, part_features_fc76), axis=0, keep_dims=True)
        # part_sum7 = tf.reduce_sum(tf.multiply(similarity7, part_features_fc77), axis=0, keep_dims=True)
        # part_sum8 = tf.reduce_sum(tf.multiply(similarity8, part_features_fc78), axis=0, keep_dims=True)
        # part_sum9 = tf.reduce_sum(tf.multiply(similarity9, part_features_fc79), axis=0, keep_dims=True)
        # part_sum10 = tf.reduce_sum(tf.multiply(similarity10, part_features_fc710), axis=0, keep_dims=True)
        # part_sum11 = tf.reduce_sum(tf.multiply(similarity11, part_features_fc711), axis=0, keep_dims=True)

        ##############################
        ######### L1_S1 ##############
        ##############################
        
        # view 0
        L1_S1_Similarity = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc7, Matrix_L1_S1),
                                                   tf.transpose(part_features_fc7)))
        similarity = tf.reduce_sum(L1_S1_Similarity, axis=0, keep_dims=True) / self.proposal_number
        similarity = tf.transpose(similarity)
        part_sum = tf.reduce_sum(tf.multiply(similarity, part_features_fc7), axis=0, keep_dims=True)

        # view 1
        L1_S1_Similarity1 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc71, Matrix_L1_S1),
                                                    tf.transpose(part_features_fc71)))
        similarity1 = tf.reduce_sum(L1_S1_Similarity1, axis=0, keep_dims=True) / self.proposal_number
        similarity1 = tf.transpose(similarity1)
        part_sum1 = tf.reduce_sum(tf.multiply(similarity1, part_features_fc71), axis=0, keep_dims=True)

        # view 2
        L1_S1_Similarity2 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc72, Matrix_L1_S1),
                                                    tf.transpose(part_features_fc72)))
        similarity2 = tf.reduce_sum(L1_S1_Similarity2, axis=0, keep_dims=True) / self.proposal_number
        similarity2 = tf.transpose(similarity2)
        part_sum2 = tf.reduce_sum(tf.multiply(similarity2, part_features_fc72), axis=0, keep_dims=True)

        # view 3
        L1_S1_Similarity3 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc73, Matrix_L1_S1),
                                                    tf.transpose(part_features_fc73)))
        similarity3 = tf.reduce_sum(L1_S1_Similarity3, axis=0, keep_dims=True) / self.proposal_number
        similarity3 = tf.transpose(similarity3)
        part_sum3 = tf.reduce_sum(tf.multiply(similarity3, part_features_fc73), axis=0, keep_dims=True)

        # view 4
        L1_S1_Similarity4 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc74, Matrix_L1_S1),
                                                    tf.transpose(part_features_fc74)))
        similarity4 = tf.reduce_sum(L1_S1_Similarity4, axis=0, keep_dims=True) / self.proposal_number
        similarity4 = tf.transpose(similarity4)
        part_sum4 = tf.reduce_sum(tf.multiply(similarity4, part_features_fc74), axis=0, keep_dims=True)

        # view 5
        L1_S1_Similarity5 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc75, Matrix_L1_S1),
                                                    tf.transpose(part_features_fc75)))
        similarity5 = tf.reduce_sum(L1_S1_Similarity5, axis=0, keep_dims=True) / self.proposal_number
        similarity5 = tf.transpose(similarity5)
        part_sum5 = tf.reduce_sum(tf.multiply(similarity5, part_features_fc75), axis=0, keep_dims=True)

        # view 6
        L1_S1_Similarity6 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc76, Matrix_L1_S1),
                                                    tf.transpose(part_features_fc76)))
        similarity6 = tf.reduce_sum(L1_S1_Similarity6, axis=0, keep_dims=True) / self.proposal_number
        similarity6 = tf.transpose(similarity6)
        part_sum6 = tf.reduce_sum(tf.multiply(similarity6, part_features_fc76), axis=0, keep_dims=True)

        # view 7
        L1_S1_Similarity7 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc77, Matrix_L1_S1),
                                                    tf.transpose(part_features_fc77)))
        similarity7 = tf.reduce_sum(L1_S1_Similarity7, axis=0, keep_dims=True) / self.proposal_number
        similarity7 = tf.transpose(similarity7)
        part_sum7 = tf.reduce_sum(tf.multiply(similarity7, part_features_fc77), axis=0, keep_dims=True)

        # view 8
        L1_S1_Similarity8 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc78, Matrix_L1_S1),
                                                    tf.transpose(part_features_fc78)))
        similarity8 = tf.reduce_sum(L1_S1_Similarity8, axis=0, keep_dims=True) / self.proposal_number
        similarity8 = tf.transpose(similarity8)
        part_sum8 = tf.reduce_sum(tf.multiply(similarity8, part_features_fc78), axis=0, keep_dims=True)

        # view 9
        L1_S1_Similarity9 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc79, Matrix_L1_S1),
                                                    tf.transpose(part_features_fc79)))
        similarity9 = tf.reduce_sum(L1_S1_Similarity9, axis=0, keep_dims=True) / self.proposal_number
        similarity9 = tf.transpose(similarity9)
        part_sum9 = tf.reduce_sum(tf.multiply(similarity9, part_features_fc79), axis=0, keep_dims=True)

        # view 10
        L1_S1_Similarity10 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc710, Matrix_L1_S1),
                                                     tf.transpose(part_features_fc710)))
        similarity10 = tf.reduce_sum(L1_S1_Similarity10, axis=0, keep_dims=True) / self.proposal_number
        similarity10 = tf.transpose(similarity10)
        part_sum10 = tf.reduce_sum(tf.multiply(similarity10, part_features_fc710), axis=0, keep_dims=True)

        # view 11
        L1_S1_Similarity11 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc711, Matrix_L1_S1),
                                                     tf.transpose(part_features_fc711)))
        similarity11 = tf.reduce_sum(L1_S1_Similarity11, axis=0, keep_dims=True) / self.proposal_number
        similarity11 = tf.transpose(similarity11)
        part_sum11 = tf.reduce_sum(tf.multiply(similarity11, part_features_fc711), axis=0, keep_dims=True)

        # view 12
        L1_S1_Similarity12 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc712, Matrix_L1_S1),
                                                     tf.transpose(part_features_fc712)))
        similarity12 = tf.reduce_sum(L1_S1_Similarity12, axis=0, keep_dims=True) / self.proposal_number
        similarity12 = tf.transpose(similarity12)
        part_sum12 = tf.reduce_sum(tf.multiply(similarity12, part_features_fc712), axis=0, keep_dims=True)

        # view 13
        L1_S1_Similarity13 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc713, Matrix_L1_S1),
                                                     tf.transpose(part_features_fc713)))
        similarity13 = tf.reduce_sum(L1_S1_Similarity13, axis=0, keep_dims=True) / self.proposal_number
        similarity13 = tf.transpose(similarity13)
        part_sum13 = tf.reduce_sum(tf.multiply(similarity13, part_features_fc713), axis=0, keep_dims=True)

        # view 14
        L1_S1_Similarity14 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc714, Matrix_L1_S1),
                                                     tf.transpose(part_features_fc714)))
        similarity14 = tf.reduce_sum(L1_S1_Similarity14, axis=0, keep_dims=True) / self.proposal_number
        similarity14 = tf.transpose(similarity14)
        part_sum14 = tf.reduce_sum(tf.multiply(similarity14, part_features_fc714), axis=0, keep_dims=True)

        # view 15
        L1_S1_Similarity15 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc715, Matrix_L1_S1),
                                                     tf.transpose(part_features_fc715)))
        similarity15 = tf.reduce_sum(L1_S1_Similarity5, axis=0, keep_dims=True) / self.proposal_number
        similarity15 = tf.transpose(similarity15)
        part_sum15 = tf.reduce_sum(tf.multiply(similarity15, part_features_fc715), axis=0, keep_dims=True)

        # view 16
        L1_S1_Similarity16 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc716, Matrix_L1_S1),
                                                     tf.transpose(part_features_fc716)))
        similarity16 = tf.reduce_sum(L1_S1_Similarity16, axis=0, keep_dims=True) / self.proposal_number
        similarity16 = tf.transpose(similarity16)
        part_sum16 = tf.reduce_sum(tf.multiply(similarity16, part_features_fc716), axis=0, keep_dims=True)

        # view 17
        L1_S1_Similarity17 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc717, Matrix_L1_S1),
                                                     tf.transpose(part_features_fc717)))
        similarity17 = tf.reduce_sum(L1_S1_Similarity17, axis=0, keep_dims=True) / self.proposal_number
        similarity17 = tf.transpose(similarity17)
        part_sum17 = tf.reduce_sum(tf.multiply(similarity17, part_features_fc717), axis=0, keep_dims=True)

        # view 18
        L1_S1_Similarity18 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc718, Matrix_L1_S1),
                                                     tf.transpose(part_features_fc718)))
        similarity18 = tf.reduce_sum(L1_S1_Similarity18, axis=0, keep_dims=True) / self.proposal_number
        similarity18 = tf.transpose(similarity18)
        part_sum18 = tf.reduce_sum(tf.multiply(similarity18, part_features_fc718), axis=0, keep_dims=True)

        # view 19
        L1_S1_Similarity19 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc719, Matrix_L1_S1),
                                                     tf.transpose(part_features_fc719)))
        similarity19 = tf.reduce_sum(L1_S1_Similarity19, axis=0, keep_dims=True) / self.proposal_number
        similarity19 = tf.transpose(similarity19)
        part_sum19 = tf.reduce_sum(tf.multiply(similarity19, part_features_fc719), axis=0, keep_dims=True)

        # concat views
        view_parts = tf.concat([part_sum, part_sum1], axis=0)
        view_parts = tf.concat([view_parts, part_sum2], axis=0)
        view_parts = tf.concat([view_parts, part_sum3], axis=0)
        view_parts = tf.concat([view_parts, part_sum4], axis=0)
        view_parts = tf.concat([view_parts, part_sum5], axis=0)
        view_parts = tf.concat([view_parts, part_sum6], axis=0)
        view_parts = tf.concat([view_parts, part_sum7], axis=0)
        view_parts = tf.concat([view_parts, part_sum8], axis=0)
        view_parts = tf.concat([view_parts, part_sum9], axis=0)
        view_parts = tf.concat([view_parts, part_sum10], axis=0)
        view_parts = tf.concat([view_parts, part_sum11], axis=0)
        view_parts = tf.concat([view_parts, part_sum12], axis=0)
        view_parts = tf.concat([view_parts, part_sum13], axis=0)
        view_parts = tf.concat([view_parts, part_sum14], axis=0)
        view_parts = tf.concat([view_parts, part_sum15], axis=0)
        view_parts = tf.concat([view_parts, part_sum16], axis=0)
        view_parts = tf.concat([view_parts, part_sum17], axis=0)
        view_parts = tf.concat([view_parts, part_sum18], axis=0)
        view_parts = tf.concat([view_parts, part_sum19], axis=0)
        view_parts = tf.nn.l2_normalize(view_parts, 1)

        '''L1_S2'''
        L1_S2_Similarity = tf.nn.softmax(tf.matmul(tf.matmul(view_parts, Matrix_L1_S2),
                                                   tf.transpose(view_parts)))
        view_similarity = tf.reduce_sum(L1_S2_Similarity, axis=0, keep_dims=True) / self.views
        view_similarity = tf.transpose(view_similarity)

        # view_similarity = tf.constant([[1.0 / self.views]] * self.views, dtype=tf.float32)
        view_sums = tf.reduce_sum(tf.multiply(view_similarity, view_parts), axis=0, keep_dims=True)
        view_sums = tf.nn.l2_normalize(view_sums, 1)
        

        view_sums_extend = tf.tile(view_sums,[self.views,1])
        views_input = tf.add(view_parts,view_sums_extend)
        view_extend = [views_input]

        view_sequence = tf.unstack(view_extend, self.rnn_steps, 1)


        ######RNN Part##########
        ########################
        ########################
        outputs, states = self.build_RNN(view_sequence)
        outputs = tf.reshape(outputs, [-1, self.views, 4096])
        #outputs = tf.concat(outputs, 1)
        outputs = tf.reshape(outputs, [-1, self.views, self.hidden_size])
        model_feature = tf.reduce_max(outputs, 1)

        # classification layer
        # second attention part is related to the acutual classes
        w_init = tf.truncated_normal_initializer(stddev=0.1)
        b_init = tf.constant_initializer(0.1)
        fc2_w = tf.get_variable('fc2_w', [self.hidden_size, self.classes], dtype=tf.float32,
                                initializer=w_init)
        fc2_b = tf.get_variable('fc2_b', [self.classes], dtype=tf.float32, initializer=b_init)

        cls_logits = tf.matmul(model_feature, fc2_w) + fc2_b
        cls_prob = tf.nn.softmax(cls_logits)


        # initializing variables
        saver1 = tf.train.Saver(max_to_keep=150)
        self.saver = saver1
        sess.run(tf.global_variables_initializer())
        self.saver.restore(sess, self.pretrained_model)
        print('loaded:%s'%(self.pretrained_model))


        last_snapshot_iter = -1
        timer = Timer()
        sums = .0

        class_ac_test = True
        # class_ac_test = False
        class_acc = np.zeros(13, np.float32)
	cmatrix = np.zeros([13,13], np.float32)
        if class_ac_test == True:
            model_num = 732
            classes_num = [100, 10, 50, 50, 100, 100, 100, 100, 20, 50, 7, 30, 15]
        else:
            model_num = 3991
            classes_num = [106, 515, 889, 200, 200, 465, 200, 680, 392, 344]
	cnum = [[100], [10], [50], [50], [100], [100], [100], [100], [20], [50], [7], [30], [15]]
        for iter in range(model_num):
            # get one batch
            train_target = data_layer.netvlad_target()

            # print(train_target)

            blobs = data_layer.forward()
            blobs1 = data_layer.forward()
            blobs2 = data_layer.forward()
            blobs3 = data_layer.forward()
            blobs4 = data_layer.forward()
            blobs5 = data_layer.forward()
            blobs6 = data_layer.forward()
            blobs7 = data_layer.forward()
            blobs8 = data_layer.forward()
            blobs9 = data_layer.forward()
            blobs10 = data_layer.forward()
            blobs11 = data_layer.forward()
            blobs12 = data_layer.forward()
            blobs13 = data_layer.forward()
            blobs14 = data_layer.forward()
            blobs15 = data_layer.forward()
            blobs16 = data_layer.forward()
            blobs17 = data_layer.forward()
            blobs18 = data_layer.forward()
            blobs19 = data_layer.forward()

            # randnum = random.randint(0, 11)
            # bloblist = [blobs, blobs1, blobs2, blobs3, blobs4, blobs5, blobs6, blobs7, blobs8, blobs9, blobs10, blobs11]
            # bloblist = bloblist[randnum:self.views] + bloblist[0:randnum]
            # feed_dict = {self.net.data: bloblist[0]['data'], self.net.im_info: bloblist[0]['im_info'],
            #              self.net.keep_prob: 1.0,
            #              self.net1.data: bloblist[1]['data'], self.net1.im_info: bloblist[1]['im_info'],
            #              self.net1.keep_prob: 1.0,
            #              self.net2.data: bloblist[2]['data'], self.net2.im_info: bloblist[2]['im_info'],
            #              self.net2.keep_prob: 1.0,
            #              self.net3.data: bloblist[3]['data'], self.net3.im_info: bloblist[3]['im_info'],
            #              self.net3.keep_prob: 1.0,
            #              self.net4.data: bloblist[4]['data'], self.net4.im_info: bloblist[4]['im_info'],
            #              self.net4.keep_prob: 1.0,
            #              self.net5.data: bloblist[5]['data'], self.net5.im_info: bloblist[5]['im_info'],
            #              self.net5.keep_prob: 1.0,
            #              self.net6.data: bloblist[6]['data'], self.net6.im_info: bloblist[6]['im_info'],
            #              self.net6.keep_prob: 1.0,
            #              self.net7.data: bloblist[7]['data'], self.net7.im_info: bloblist[7]['im_info'],
            #              self.net7.keep_prob: 1.0,
            #              self.net8.data: bloblist[8]['data'], self.net8.im_info: bloblist[8]['im_info'],
            #              self.net8.keep_prob: 1.0,
            #              self.net9.data: bloblist[9]['data'], self.net9.im_info: bloblist[9]['im_info'],
            #              self.net9.keep_prob: 1.0,
            #              self.net10.data: bloblist[10]['data'], self.net10.im_info: bloblist[10]['im_info'],
            #              self.net10.keep_prob: 1.0,
            #              self.net11.data: bloblist[11]['data'], self.net11.im_info: bloblist[11]['im_info'],
            #              self.net11.keep_prob: 1.0}
            #raw_input()
            # Make one SGD update
            feed_dict={self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 1.0,
                       self.net1.data: blobs1['data'], self.net1.im_info: blobs1['im_info'], self.net1.keep_prob: 1.0,
                       self.net2.data: blobs2['data'], self.net2.im_info: blobs2['im_info'], self.net2.keep_prob: 1.0,
                       self.net3.data: blobs3['data'], self.net3.im_info: blobs3['im_info'], self.net3.keep_prob: 1.0,
                       self.net4.data: blobs4['data'], self.net4.im_info: blobs4['im_info'], self.net4.keep_prob: 1.0,
                       self.net5.data: blobs5['data'], self.net5.im_info: blobs5['im_info'], self.net5.keep_prob: 1.0,
                       self.net6.data: blobs6['data'], self.net6.im_info: blobs6['im_info'], self.net6.keep_prob: 1.0,
                       self.net7.data: blobs7['data'], self.net7.im_info: blobs7['im_info'], self.net7.keep_prob: 1.0,
                       self.net8.data: blobs8['data'], self.net8.im_info: blobs8['im_info'], self.net8.keep_prob: 1.0,
                       self.net9.data: blobs9['data'], self.net9.im_info: blobs9['im_info'], self.net9.keep_prob: 1.0,
                       self.net10.data: blobs10['data'], self.net10.im_info: blobs10['im_info'],self.net10.keep_prob: 1.0,
                       self.net11.data: blobs11['data'], self.net11.im_info: blobs11['im_info'],self.net11.keep_prob: 1.0,
                       self.net12.data: blobs12['data'], self.net12.im_info: blobs12['im_info'],self.net12.keep_prob: 1.0,
                       self.net13.data: blobs13['data'], self.net13.im_info: blobs13['im_info'],self.net13.keep_prob: 1.0,
                       self.net14.data: blobs14['data'], self.net14.im_info: blobs14['im_info'],self.net14.keep_prob: 1.0,
                       self.net15.data: blobs15['data'], self.net15.im_info: blobs15['im_info'],self.net15.keep_prob: 1.0,
                       self.net16.data: blobs16['data'], self.net16.im_info: blobs16['im_info'],self.net16.keep_prob: 1.0,
                       self.net17.data: blobs17['data'], self.net17.im_info: blobs17['im_info'],self.net17.keep_prob: 1.0,
                       self.net18.data: blobs18['data'], self.net18.im_info: blobs18['im_info'],self.net18.keep_prob: 1.0,
                       self.net19.data: blobs19['data'], self.net19.im_info: blobs19['im_info'],self.net19.keep_prob: 1.0
                       }

            run_options = None
            run_metadata = None
            if cfg.TRAIN.DEBUG_TIMELINE:
                run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

            timer.tic()
            test_acc = sess.run(cls_prob, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata)
            timer.toc()
            cmatrix[np.argmax(train_target)][np.argmax(test_acc, axis=1)[0]] += 1
            if np.argmax(test_acc, axis=1)[0] == np.argmax(train_target):
                sums += 1.0
                class_acc[np.argmax(train_target)] += 1.0

            print('model id: %d' % iter, np.argmax(test_acc, axis=1)[0], np.argmax(train_target))

        print("Total accuracy: %f" % (sums / model_num))
	print(cmatrix)
	print(cmatrix/cnum)

        fid = open('/home/liuxinhai/fine-grained/results/airplane_v20_deco.txt', 'a+')
        fid.write('{:.6f}\n'.format(sums / model_num))
        fid.close()

        for i in range(self.classes):
            print("the %d class:%f" % (i, class_acc[i] / classes_num[i]))

        print('class acc: %f'%(sum(class_acc / classes_num) / self.classes))
        fid = open('/home/liuxinhai/fine-grained/results/airplane_v20_deco_class.txt', 'a+')
        fid.write('{:.6f}\n'.format(sum(class_acc / classes_num) / self.classes))
        fid.close()
Beispiel #58
0
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False):
    num_images = imdb.num_images

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    det_file = os.path.join(output_dir, 'detections.pkl')
    size_index = args.image_size_index

    for i in range(num_images):

        batch = imdb.next_batch(size_index=size_index)
        ori_im = batch['origin_im'][0]
        im_data = net_utils.np_to_variable(batch['images'],
                                           is_cuda=True,
                                           volatile=True).permute(0, 3, 1, 2)

        _t['im_detect'].tic()
        bbox_pred, iou_pred, prob_pred = net(im_data)

        # to numpy
        bbox_pred = bbox_pred.data.cpu().numpy()
        iou_pred = iou_pred.data.cpu().numpy()
        prob_pred = prob_pred.data.cpu().numpy()

        bboxes, scores, cls_inds = yolo_utils.postprocess(
            bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh,
            size_index)
        detect_time = _t['im_detect'].toc()

        _t['misc'].tic()

        for j in range(imdb.num_classes):
            inds = np.where(cls_inds == j)[0]
            if len(inds) == 0:
                all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                continue
            c_bboxes = bboxes[inds]
            c_scores = scores[inds]
            c_dets = np.hstack(
                (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
            all_boxes[j][i] = c_dets

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]
        nms_time = _t['misc'].toc()

        if i % 20 == 0:
            print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(
                i + 1, num_images, detect_time, nms_time))  # noqa
            _t['im_detect'].clear()
            _t['misc'].clear()

        if vis:
            im2show = yolo_utils.draw_detection(ori_im,
                                                bboxes,
                                                scores,
                                                cls_inds,
                                                cfg,
                                                thr=0.1)
            if im2show.shape[0] > 1100:
                im2show = cv2.resize(im2show,
                                     (int(1000. * float(im2show.shape[1]) /
                                          im2show.shape[0]), 1000))  # noqa
            cv2.imshow('test', im2show)
            cv2.waitKey(0)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    imdb.evaluate_detections(all_boxes, output_dir)
Beispiel #59
0
    def train_model(self, sess, max_iters):
        """Network training loop."""

        data_layer = get_data_layer(self.roidb, self.imdb.num_classes)

        # RPN
        # classification loss
        rpn_cls_score = tf.reshape(
            self.net.get_output('rpn_cls_score_reshape'), [-1, 2])
        rpn_label = tf.reshape(self.net.get_output('rpn-data')[0], [-1])
        rpn_cls_score = tf.reshape(
            tf.gather(rpn_cls_score, tf.where(tf.not_equal(rpn_label, -1))),
            [-1, 2])
        rpn_label = tf.reshape(
            tf.gather(rpn_label, tf.where(tf.not_equal(rpn_label, -1))), [-1])
        rpn_cross_entropy = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=rpn_cls_score, labels=rpn_label))

        # bounding box regression L1 loss
        rpn_bbox_pred = self.net.get_output('rpn_bbox_pred')
        rpn_bbox_targets = tf.transpose(
            self.net.get_output('rpn-data')[1], [0, 2, 3, 1])
        rpn_bbox_inside_weights = tf.transpose(
            self.net.get_output('rpn-data')[2], [0, 2, 3, 1])
        rpn_bbox_outside_weights = tf.transpose(
            self.net.get_output('rpn-data')[3], [0, 2, 3, 1])

        rpn_smooth_l1 = self._modified_smooth_l1(3.0, rpn_bbox_pred,
                                                 rpn_bbox_targets,
                                                 rpn_bbox_inside_weights,
                                                 rpn_bbox_outside_weights)
        rpn_loss_box = tf.reduce_mean(
            tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3]))

        # R-CNN
        # classification loss
        cls_score = self.net.get_output('cls_score')
        label = tf.reshape(self.net.get_output('roi-data')[1], [-1])
        cross_entropy = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score,
                                                           labels=label))

        # bounding box regression L1 loss
        bbox_pred = self.net.get_output('bbox_pred')
        bbox_targets = self.net.get_output('roi-data')[2]
        bbox_inside_weights = self.net.get_output('roi-data')[3]
        bbox_outside_weights = self.net.get_output('roi-data')[4]

        smooth_l1 = self._modified_smooth_l1(1.0, bbox_pred, bbox_targets,
                                             bbox_inside_weights,
                                             bbox_outside_weights)
        loss_box = tf.reduce_mean(
            tf.reduce_sum(smooth_l1, reduction_indices=[1]))

        # final loss
        loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box

        # optimizer and learning rate
        global_step = tf.Variable(0, trainable=False)
        lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE,
                                        global_step,
                                        cfg.TRAIN.STEPSIZE,
                                        0.1,
                                        staircase=True)
        momentum = cfg.TRAIN.MOMENTUM
        train_op = tf.train.MomentumOptimizer(lr, momentum).minimize(
            loss, global_step=global_step)

        # iintialize variables
        sess.run(tf.global_variables_initializer())
        if self.pretrained_model is not None:
            print('Loading pretrained model '
                  'weights from {:s}').format(self.pretrained_model)
            self.net.load(self.pretrained_model, sess, self.saver, True)

        last_snapshot_iter = -1
        timer = Timer()
        for iter in range(max_iters):
            # get one batch
            blobs = data_layer.forward()

            # Make one SGD update
            feed_dict={self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, \
                           self.net.gt_boxes: blobs['gt_boxes']}

            run_options = None
            run_metadata = None
            if cfg.TRAIN.DEBUG_TIMELINE:
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

            timer.tic()

            rpn_loss_cls_value, rpn_loss_box_value, loss_cls_value, loss_box_value, _ = sess.run(
                [
                    rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box,
                    train_op
                ],
                feed_dict=feed_dict,
                options=run_options,
                run_metadata=run_metadata)

            timer.toc()

            if cfg.TRAIN.DEBUG_TIMELINE:
                trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                trace_file = open(
                    str(long(time.time() * 1000)) + '-train-timeline.ctf.json',
                    'w')
                trace_file.write(
                    trace.generate_chrome_trace_format(show_memory=False))
                trace_file.close()

            if (iter + 1) % (cfg.TRAIN.DISPLAY) == 0:
                print 'iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\
                        (iter+1, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval())
                print 'speed: {:.3f}s / iter'.format(timer.average_time)

            if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0:
                last_snapshot_iter = iter
                self.snapshot(sess, iter)

        if last_snapshot_iter != iter:
            self.snapshot(sess, iter)
Beispiel #60
0
def eval_seq(opt,
             dataloader,
             data_type,
             result_filename,
             save_dir=None,
             show_image=True,
             frame_rate=30):
    '''
       Processes the video sequence given and provides the output of tracking result (write the results in video file)

       It uses JDE model for getting information about the online targets present.

       Parameters
       ----------
       opt : Namespace
             Contains information passed as commandline arguments.

       dataloader : LoadVideo
                    Instance of LoadVideo class used for fetching the image sequence and associated data.

       data_type : String
                   Type of dataset corresponding(similar) to the given video.

       result_filename : String
                         The name(path) of the file for storing results.

       save_dir : String
                  Path to the folder for storing the frames containing bounding box information (Result frames).

       show_image : bool
                    Option for shhowing individial frames during run-time.

       frame_rate : int
                    Frame-rate of the given video.

       Returns
       -------
       (Returns are not significant here)
       frame_id : int
                  Sequence number of the last sequence
       '''

    if save_dir:
        mkdir_if_missing(save_dir)
    tracker = JDETracker(opt, frame_rate=frame_rate)
    timer = Timer()
    results = []
    frame_id = 0
    for path, img, img0 in dataloader:
        if frame_id % 20 == 0:
            logger.info('Processing frame {} ({:.2f} fps)'.format(
                frame_id, 1. / max(1e-5, timer.average_time)))

        # run tracking
        timer.tic()
        blob = torch.from_numpy(img).cuda().unsqueeze(0)
        online_targets = tracker.update(blob, img0)
        online_tlwhs = []
        online_ids = []
        for t in online_targets:
            tlwh = t.tlwh
            tid = t.track_id
            #            vertical = tlwh[2] / tlwh[3] > 1.6 # wh aspect ratio
            if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
                online_tlwhs.append(tlwh)
                online_ids.append(tid)
        timer.toc()
        # save results
        results.append((frame_id + 1, online_tlwhs, online_ids))
        if show_image or save_dir is not None:
            online_im = vis.plot_tracking(img0,
                                          online_tlwhs,
                                          online_ids,
                                          frame_id=frame_id,
                                          fps=1. / timer.average_time)
        if show_image:
            cv2.imshow('online_im', online_im)
        if save_dir is not None:
            cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)),
                        online_im)
        frame_id += 1
    # save results
    write_results(result_filename, results, data_type)
    return frame_id, timer.average_time, timer.calls