Esempio n. 1
0
    def get_lable_bak(self, gt_objects, gt_relationships,subject_boxes, object_boxes):
        # rearrange the ground truth
        gt_n = gt_objects.shape[0]
        sub_labels_ret = None
        obj_labels_ret = None
        pair_labels_ret = None
        for i in range(gt_n):
            gt_object = gt_objects[i]
            gt_relationship = gt_relationships[i]
            subject_boxe = subject_boxes[i]
            object_boxe = object_boxes[i]
            gt_rel_sub_idx, gt_rel_obj_idx = np.where(gt_relationship > 0) # ground truth number
            gt_sub = gt_object[gt_rel_sub_idx, :5]
            gt_obj = gt_object[gt_rel_obj_idx, :5]
            gt_rel = gt_relationship[gt_rel_sub_idx, gt_rel_obj_idx]

            # compute the overlap
            sub_overlaps = bbox_overlaps(
                np.ascontiguousarray(subject_boxe, dtype=np.float),
                np.ascontiguousarray(gt_sub[:, :4], dtype=np.float))
            obj_overlaps = bbox_overlaps(
                np.ascontiguousarray(object_boxe, dtype=np.float),
                np.ascontiguousarray(gt_obj[:, :4], dtype=np.float))


            sub_gt_assignment = sub_overlaps.argmax(axis=1)
            sub_max_overlaps = sub_overlaps.max(axis=1)
            sub_labels = gt_object[sub_gt_assignment, 4]#????

            sub_fg_inds = np.where(sub_max_overlaps >= 0.5)[0]
            sub_bg_inds = np.where((sub_max_overlaps < 0.5) & (sub_max_overlaps >= 0.0))[0]
            sub_labels[sub_bg_inds] = 0

            obj_gt_assignment = obj_overlaps.argmax(axis=1)
            obj_max_overlaps = obj_overlaps.max(axis=1)
            obj_labels = gt_object[obj_gt_assignment, 4]

            obj_fg_inds = np.where(obj_max_overlaps >= 0.5)[0]
            obj_bg_inds = np.where((obj_max_overlaps < 0.5) & (obj_max_overlaps >= 0.0))[0]
            obj_labels[obj_bg_inds] = 0


            pair_labels = gt_relationship[sub_gt_assignment, obj_gt_assignment]
            keepPairsBG = np.where((sub_labels == 0) | (obj_labels == 0))[0]
            pair_labels[keepPairsBG] = 0

            if sub_labels_ret is None:
                sub_labels_ret = sub_labels
                obj_labels_ret = obj_labels
                pair_labels_ret = pair_labels
            else:
                sub_labels_ret = np.append(sub_labels_ret,sub_labels)
                obj_labels_ret = np.append(obj_labels_ret,obj_labels)
                pair_labels_ret = np.append(pair_labels_ret,pair_labels)

            obj_labels_all_ret = np.append(sub_labels_ret,obj_labels_ret)

        return obj_labels_all_ret,pair_labels_ret
Esempio n. 2
0
def check_msdn_rpn(object_rois, gt_objects, gt_relationships, num_images=1):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """

    # In training stage use gt_obj to choose proper predict obj_roi, here obj_roi got by rpn and concat with gt_box
    overlaps = bbox_overlaps(
        np.ascontiguousarray(object_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_objects[:, :4], dtype=np.float))


    fg_gt_inds = np.where(overlaps.max(axis=0)>=cfg.TRAIN.FG_THRESH)[0]
    gt_union_boxes_num = np.where(gt_relationships > 0)[0].size

#### prepare relationships targets

    # gt_assignment is the correct gt_box that predict box belong to.
    if fg_gt_inds.size > 1:
        # fg_inds is the index of object_rois and labels to get fg_predict_rois
        # Grouping the input object rois
        id_i, id_j = np.meshgrid(range(fg_gt_inds.size), range(fg_gt_inds.size), indexing='ij')
        id_i = id_i.reshape(-1)
        id_j = id_j.reshape(-1)
        # use gt_relationships to choose pair sbj and obj box and relation type.
        pair_labels = gt_relationships[fg_gt_inds[id_i], fg_gt_inds[id_j]]

        predicate_list = pair_labels[pair_labels>0]

        return fg_gt_inds.size, predicate_list.size, gt_union_boxes_num
    else:
        return fg_gt_inds.size, 0, gt_union_boxes_num
    def extract_background_features(self, image, blob, gt_boxes, relu=False):
        from torch.nn.functional import normalize
        from faster_rcnn.utils.cython_bbox import bbox_overlaps
        (im_data, im_info, gt_boxes, num_boxes) = blob
        assert im_data.size(0) == 1

        features, rois = self.rpn(im_data, im_info.data, gt_boxes.data,
                                  num_boxes.data)

        # dets : N x 4, gt_boxes : 1 x 4
        # overlaps : N x 1 overlaps score
        dets = rois.cpu().numpy()[0, :, 1:5]
        overlaps = bbox_overlaps(np.ascontiguousarray(dets, dtype=np.float)\
                , np.ascontiguousarray(gt_boxes.data.cpu().numpy()[0, :, :4], dtype=np.float))
        # max : K max overlaps score about N dets
        overlaps = np.multiply(overlaps, overlaps < cfg.TEST.RPN_NMS_THRESH)
        max_arg = overlaps.argmax(axis=0)[0]

        triplet_rois = Variable(torch.zeros(1, rois.size(2))).cuda()
        triplet_rois[0, 1:5] = rois[0, max_arg, 1:5]
        triplet_features = self.roi_pool(features, triplet_rois.view(-1, 5))
        triplet_features = self.fc7(
            self.fc6(triplet_features.view(triplet_features.size(0),
                                           -1))).squeeze()
        triplet_features = self.relu(
            triplet_features) if relu else triplet_features
        triplet_features = normalize(triplet_features, dim=0)
        return triplet_features
Esempio n. 4
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == len(gt_roidb), \
                'Number of boxes must match number of ground-truth roidb'
        roidb = []
        for i, boxes in enumerate(box_list):
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes),
                                dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes': boxes,
                'gt_overlaps': overlaps,
                'gt_classes': np.zeros((num_boxes, ), dtype=np.int32),
                'flipped': False,
                'seg_areas': np.zeros((num_boxes, ), dtype=np.float32)
            })
        return roidb
Esempio n. 5
0
def image_cls_eval(scores,
                   boxes,
                   gt_boxes,
                   object_class,
                   score_thresh=0.05,
                   overlap_thresh=0.5,
                   nms=True,
                   nms_thresh=0.6):
    '''
	scores/boxes of some class of one image
	keep these that satisfy score_thresh and overlaps_thresh
	and get tp [1, 1, 0, 1, ...]
	:param scores:
	:param boxes:
	:param gt_boxes:
	:param object_class: int
	:param score_thresh:
	:param overlap_thresh:
	:param nms_thresh:
	:return:
	'''
    inds = np.where(scores[:] > score_thresh)[0]
    cls_scores = scores[inds]
    cls_boxes = boxes[inds]

    if nms:
        cls_boxes, cls_scores = nms_detections(cls_boxes, cls_scores,
                                               nms_thresh)

    cls_sorted_inds = np.argsort(-cls_scores)
    cls_boxes, cls_scores = cls_boxes[cls_sorted_inds], cls_scores[
        cls_sorted_inds]

    # get gt_boxes of this class
    cls_gt_boxes = gt_boxes[gt_boxes[:, 4] == object_class]

    if cls_gt_boxes.shape[0] == 0 or cls_scores.size == 0:
        cls_tp = np.zeros(cls_scores.size)
        return cls_scores, cls_tp, cls_gt_boxes.shape[0]

    cls_overlaps = bbox_overlaps(
        np.ascontiguousarray(cls_boxes, dtype=np.float),
        np.ascontiguousarray(cls_gt_boxes[:, :4], dtype=np.float))

    cls_max_overlap = cls_overlaps.max(axis=1)
    cls_assignment = cls_overlaps.argmax(axis=1)
    cls_assignment[cls_max_overlap < overlap_thresh] = -1
    # keep first rois that assigned to a gt box
    _, cls_assignment_keep_inds = np.unique(np.append(np.array([-1]),
                                                      cls_assignment),
                                            return_index=True)
    cls_assignment_keep_inds = cls_assignment_keep_inds[1:] - 1

    cls_tp = np.zeros(cls_scores.size)
    cls_tp[cls_assignment_keep_inds] = 1

    return cls_scores, cls_tp, cls_gt_boxes.shape[0]
Esempio n. 6
0
def test(model, detector, imdb, roidb):

    detector.cuda()
    detector.eval()

    print('Test Detection Performance with ', model.split('/')[-1])
    blob = init_data(is_cuda=True)
    npos, tp, fp = 0, 0, 0
    test_num = len(roidb)
    # display_interval = 1000
    for i in range(test_num):
        gt_boxes = roidb[i]['boxes']
        gt_classes = roidb[i]['gt_classes']
        image = process_img_by_lib(roidb[i]['image'])
        npos += len(gt_boxes)
        try:
            dets, scores, classes = detector.detect(image,
                                                    blob,
                                                    thr=0.7,
                                                    nms_thresh=0.3)
            # dets : N x 4, gt_boxes : K x 4
            # overlaps : N x K overlaps score
            overlaps = bbox_overlaps(np.ascontiguousarray(dets, dtype=np.float) \
                                     , np.ascontiguousarray(gt_boxes, dtype=np.float))
            # max : N overlaps score about K gt boxes
            candidates = overlaps.argmax(axis=1)
            ovmax = overlaps.max(axis=1)
            for k, arg in enumerate(candidates):
                detected_class = imdb._class_to_ind[classes[k]]
                if ovmax[k] > 0.5:
                    if detected_class == gt_classes[arg]:
                        tp += 1
                    else:
                        fp += 1
                else:
                    fp += 1
        except:
            pass

        sys.stdout.write('Eval {:d}/{:d} Precision : {:.2f}, Recall : {:.2f}, Model : {:s}\r'\
                         .format(i+1,test_num,tp/(fp+tp)*100, tp/npos*100, model))
        sys.stdout.flush()
    print(
        '\tPrecision: %.2f%%, Recall: %.2f%%\n' %
        (tp / (fp + tp) * 100, tp / npos * 100), model)
    return tp / (fp + tp) * 100, tp / npos * 100
Esempio n. 7
0
def anchor_target_layer(rpn_cls_score,
                        gt_boxes,
                        gt_ishard,
                        dontcare_areas,
                        im_info,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[4, 8, 16, 32]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    Parameters
    ----------
    rpn_cls_score: for pytorch (1, Ax2, H, W) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
    rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform)
                            that are the regression objectives
    rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg
    rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg,
                            beacuse the numbers of bgs and fgs mays significiantly different
    """
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]

    if DEBUG:
        print('anchors:')
        print(_anchors)
        print('anchor shapes:')
        print((np.hstack((
            _anchors[:, 2::4] - _anchors[:, 0::4],
            _anchors[:, 3::4] - _anchors[:, 1::4],
        ))))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0
    # map of shape (..., H, W)
    # height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    # pytorch (bs, c, h, w)
    height, width = rpn_cls_score.shape[2:4]

    if DEBUG:
        print(('AnchorTargetLayer: height', height, 'width', width))
        print('')
        print(('im_size: ({}, {})'.format(im_info[0], im_info[1])))
        print(('scale: {}'.format(im_info[2])))
        print(('height, width: ({}, {})'.format(height, width)))
        print(('rpn: gt_boxes.shape', gt_boxes.shape))
        print(('rpn: gt_boxes', gt_boxes))

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)  # in W H order
    # K is H x W
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print(('total_anchors', total_anchors))
        print(('inds_inside', len(inds_inside)))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    if DEBUG:
        print(('anchors.shape', anchors.shape))

    # label: 1 is positive, 0 is negative, -1 is dont care
    # (A)
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt), shape is A x G
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)  # (A)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)  # G
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1
    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # preclude dontcare areas
    if dontcare_areas is not None and dontcare_areas.shape[0] > 0:
        # intersec shape is D x A
        intersecs = bbox_intersections(
            np.ascontiguousarray(dontcare_areas, dtype=np.float),  # D x 4
            np.ascontiguousarray(anchors, dtype=np.float)  # A x 4
        )
        intersecs_ = intersecs.sum(axis=0)  # A x 1
        labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1

    # preclude hard samples that are highly occlusioned, truncated or difficult to see
    if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[
            0] > 0:
        assert gt_ishard.shape[0] == gt_boxes.shape[0]
        gt_ishard = gt_ishard.astype(int)
        gt_hardboxes = gt_boxes[gt_ishard == 1, :]
        if gt_hardboxes.shape[0] > 0:
            # H x A
            hard_overlaps = bbox_overlaps(
                np.ascontiguousarray(gt_hardboxes, dtype=np.float),  # H x 4
                np.ascontiguousarray(anchors, dtype=np.float))  # A x 4
            hard_max_overlaps = hard_overlaps.max(axis=0)  # (A)
            labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
            max_intersec_label_inds = hard_overlaps.argmax(axis=1)  # H x 1
            labels[max_intersec_label_inds] = -1  #

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
        # print "was %s inds, disabling %s, now %s inds" % (
        # len(bg_inds), len(disable_inds), np.sum(labels == 0))

    # bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        # num_examples = np.sum(labels >= 0) + 1
        # positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        # negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        positive_weights = np.ones((1, 4))
        negative_weights = np.zeros((1, 4))
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            (np.sum(labels == 1)) + 1)
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            (np.sum(labels == 0)) + 1)
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print('means:')
        print(means)
        print('stdevs:')
        print(stds)

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    if DEBUG:
        print(('rpn: max max_overlap', np.max(max_overlaps)))
        print(('rpn: num_positive', np.sum(labels == 1)))
        print(('rpn: num_negative', np.sum(labels == 0)))
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print(('rpn: num_positive avg', _fg_sum / _count))
        print(('rpn: num_negative avg', _bg_sum / _count))

    # labels
    # pdb.set_trace()
    labels = labels.reshape((1, height, width, A))
    labels = labels.transpose(0, 3, 1, 2)
    rpn_labels = labels.reshape(
        (1, 1, A * height, width)).transpose(0, 2, 3, 1)

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
    # assert bbox_inside_weights.shape[2] == height
    # assert bbox_inside_weights.shape[3] == width

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
    # assert bbox_outside_weights.shape[2] == height
    # assert bbox_outside_weights.shape[3] == width

    rpn_bbox_outside_weights = bbox_outside_weights

    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Esempio n. 8
0
def track():
    def id_track(dataset, features):
        from collections import Counter
        def dist(f1, f2):
            score = (torch.sqrt((f1 - f2) ** 2)).sum(0).data.cpu().numpy()[0]
            return score

        id_list = []
        id_count = {'f' + str(i): [] for i in range(len(features))}
        for dataframe in dataset:
            for i, f in enumerate(features):
                init_val = 1e15
                for data in dataframe:
                    score = dist(f, data['feature'])
                    if score < init_val:
                        init_val = score
                        id = data['id']
                id_count['f' + str(i)].append(id)
        for list in id_count.values():
            c1 = Counter(list)
            most_id = c1.most_common(1)[0][0]
            id_list.append(most_id)
        return id_list
    import os
    imdb_name = 'CaltechPedestrians_test'
    imdb = get_imdb(imdb_name)
    cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml'
    model_dir = 'data/pretrained_model/'
    pre_model_name = 'CaltechPedestrians_train_2_vgg16_0.7_b3.h5'
    pretrained_model = model_dir + pre_model_name
    cfg_from_file(cfg_file)
    name_blocks = pre_model_name.split('_')
    if 'vgg16' in name_blocks:
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    elif 'resnet50' in name_blocks:
        detector = FasterRCNN_RES(classes=imdb.classes, debug=False)
    else:
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    relu = True if 'relu' in name_blocks else False
    network.load_net(pretrained_model, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    blob = init_data(is_cuda=True)

    t = Timer()
    t.tic()
    cap = cv2.VideoCapture(video_file)
    init = True
    while (cap.isOpened()):
        ret, frame = cap.read()
        if ret:
            p = Timer()
            p.tic()

            if init:
                cnt = 1
                fourcc = cv2.VideoWriter_fourcc(*'XVID')
                out = cv2.VideoWriter(output_file, fourcc, fps, (frame.shape[1], frame.shape[0]))
                init = False
            try:
                # detect
                tid = (cnt-1) % tps
                dets, scores, classes = detector.detect(frame, blob, thr=0.7, nms_thresh=0.3)
                frame = np.copy(frame)
                # feature extraction
                features = []
                for i, det in enumerate(dets):
                    gt_box = det[np.newaxis,:]
                    features.append(detector.extract_feature_vector(frame, blob, gt_box, relu=relu))
                    det = tuple(int(x) for x in det)
                    cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2)
                dataframe = []
                if tid == 0:
                    dataset = []
                    for i, f in enumerate(features):
                        data = {}
                        data['id'] = i
                        data['feature'] = f
                        dataframe.append(data)
                    dataset.append(dataframe)
                    anchors = dets
                elif tid > 0 and tid < tps-1:
                    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float) \
                                             , np.ascontiguousarray(dets, dtype=np.float))
                    # max : K max overlaps score about N dets
                    overlaps = np.multiply(overlaps, overlaps > 0.7)
                    max_arg = overlaps.argmax(axis=0)
                    for i, arg in enumerate(max_arg):
                        if arg >= len(features):
                            continue
                        data = {}
                        data['id'] = arg
                        data['feature'] = features[arg]
                        dataframe.append(data)
                    dataset.append(dataframe)
                    anchors = dets
                else:
                    id_list = id_track(dataset, features)
                    for i, id in enumerate(id_list):
                        det = tuple(int(x)-2 for x in dets[i])
                        cv2.putText(frame, 'id: ' + str(id), det[0:2], cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255))
                    # cv2.imshow('demo', frame)
                    # cv2.waitKey(1000)
                    # cv2.destroyAllWindows()
            except:
                pass
            finally:
                if cnt % 10 == 0:
                    print(cnt,'-frame : {:.3f}s'.format(p.toc()))
                cnt += 1
                out.write(frame)
        else:
            break
    runtime = t.toc()
    print('{} frames  /  total spend: {}s  /  {:2.1f} fps'.format(cnt, int(runtime), cnt/runtime))
    cap.release()
    out.release()