コード例 #1
0
class DeepSort(object):
    def __init__(self, model_path):
        self.min_confidence = 0.3
        self.nms_max_overlap = 1.0

        self.extractor = Extractor(model_path, use_cuda=True)

        max_cosine_distance = 0.2
        nn_budget = 100
        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
        self.tracker = Tracker(metric)

    def update(self, bbox_xywh, confidences, ori_img):
        self.height, self.width = ori_img.shape[:2]
        # generate detections
        # features:为特征向量
        features = self._get_features(bbox_xywh, ori_img)
        # dectections包含 self.tlwh(左上角xy),self.confidence,self.feature
        # dectections为ndarray格式
        # 置信度筛选和nms可以考虑删除
        detections = [Detection(bbox_xywh[i], conf, features[i]) for i,conf in enumerate(confidences) if conf>self.min_confidence]

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = non_max_suppression( boxes, self.nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # update tracker
        self.tracker.predict()
        self.tracker.update(detections)

        # output bbox identities
        outputs = []
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh()
            x1,y1,x2,y2 = self._xywh_to_xyxy(box)
            track_id = track.track_id
            outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int))
        if len(outputs) > 0:
            outputs = np.stack(outputs,axis=0)
        return outputs

    def _xywh_to_xyxy(self, bbox_xywh):
        x,y,w,h = bbox_xywh
        x1 = max(int(x-w/2),0)
        x2 = min(int(x+w/2),self.width-1)
        y1 = max(int(y-h/2),0)
        y2 = min(int(y+h/2),self.height-1)
        return x1,y1,x2,y2
    
    def _get_features(self, bbox_xywh, ori_img):
        features = []
        for box in bbox_xywh:
            x1,y1,x2,y2 = self._xywh_to_xyxy(box)
            # print(y1,y2,x1,x2)
            im = ori_img[y1:y2,x1:x2]
            #cv2.imshow("d",im)
            #cv2.waitKey(0)
            feature = self.extractor(im)[0]
            features.append(feature)
        if len(features):
            features = np.stack(features, axis=0)
        else:
            features = np.array([])
        return features
コード例 #2
0
def recognize_from_video():
    try:
        print('[INFO] Webcam mode is activated')
        RECORD_TIME = 80
        capture = cv2.VideoCapture(int(args.video))
        if not capture.isOpened():
            print("[ERROR] webcamera not found")
            sys.exit(1)
    except ValueError:
        if check_file_existance(args.video):
            capture = cv2.VideoCapture(args.video)

    frame_rate = capture.get(cv2.CAP_PROP_FPS)
    if FRAME_SKIP:
        action_recognize_fps = int(args.fps)
    else:
        action_recognize_fps = frame_rate

    if args.savepath != "":
        size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
                int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        fmt = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        writer = cv2.VideoWriter(args.savepath, fmt, action_recognize_fps,
                                 size)
    else:
        writer = None

    # pose estimation
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    if args.arch == "lw_human_pose":
        pose = ailia.PoseEstimator(MODEL_PATH,
                                   WEIGHT_PATH,
                                   env_id=env_id,
                                   algorithm=ALGORITHM)

        detector = None
    else:
        detector = ailia.Detector(DETECTOR_MODEL_PATH,
                                  DETECTOR_WEIGHT_PATH,
                                  len(COCO_CATEGORY),
                                  format=ailia.NETWORK_IMAGE_FORMAT_RGB,
                                  channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                                  range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
                                  algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
                                  env_id=env_id)

        pose = ailia.Net(POSE_MODEL_PATH, POSE_WEIGHT_PATH, env_id=env_id)

    # tracker class instance
    extractor = ailia.Net(EX_MODEL_PATH, EX_WEIGHT_PATH, env_id=env_id)
    metric = NearestNeighborDistanceMetric("cosine", MAX_COSINE_DISTANCE,
                                           NN_BUDGET)
    tracker = Tracker(metric, max_iou_distance=0.7, max_age=70, n_init=3)

    # action recognition
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    model = ailia.Net(ACTION_MODEL_PATH, ACTION_WEIGHT_PATH, env_id=env_id)

    action_data = {}

    frame_nb = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    idx_frame = 0

    time_start = time.time()
    while (True):
        time_curr = time.time()
        if args.video == '0' and time_curr - time_start > RECORD_TIME:
            break
        ret, frame = capture.read()

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        if (not ret) or (frame_nb >= 1 and idx_frame >= frame_nb):
            break

        if FRAME_SKIP:
            mod = round(frame_rate / action_recognize_fps)
            if mod >= 1:
                if idx_frame % mod != 0:
                    idx_frame = idx_frame + 1
                    continue

        input_image, input_data = adjust_frame_size(
            frame,
            frame.shape[0],
            frame.shape[1],
        )
        input_data = cv2.cvtColor(input_data, cv2.COLOR_BGR2BGRA)

        # inferece
        if args.arch == "lw_human_pose":
            _ = pose.compute(input_data)
        else:
            detector.compute(input_data, THRESHOLD, IOU)

        # deepsort format
        h, w = input_image.shape[0], input_image.shape[1]
        if args.arch == "lw_human_pose":
            bbox_xywh, cls_conf, cls_ids = get_detector_result_lw_human_pose(
                pose, h, w)
        else:
            bbox_xywh, cls_conf, cls_ids = get_detector_result(detector, h, w)

        mask = cls_ids == 0
        bbox_xywh = bbox_xywh[mask]

        # bbox dilation just in case bbox too small,
        # delete this line if using a better pedestrian detector
        if args.arch == "pose_resnet":
            # bbox_xywh[:, 3:] *= 1.2   #May need to be removed in the future
            cls_conf = cls_conf[mask]

        # do tracking
        img_crops = []
        for box in bbox_xywh:
            x1, y1, x2, y2 = xywh_to_xyxy(box, h, w)
            img_crops.append(input_image[y1:y2, x1:x2])

        if img_crops:
            # preprocess
            img_batch = np.concatenate([
                normalize_image(resize(img), 'ImageNet')[np.newaxis, :, :, :]
                for img in img_crops
            ],
                                       axis=0).transpose(0, 3, 1, 2)

            # TODO better to pass a batch at once
            # features = extractor.predict(img_batch)
            features = []
            for img in img_batch:
                features.append(extractor.predict(img[np.newaxis, :, :, :])[0])
            features = np.array(features)
        else:
            features = np.array([])

        bbox_tlwh = xywh_to_tlwh(bbox_xywh)
        detections = [
            Detection(bbox_tlwh[i], conf, features[i])
            for i, conf in enumerate(cls_conf) if conf > MIN_CONFIDENCE
        ]

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        nms_max_overlap = 1.0
        indices = non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # update tracker
        tracker.predict()
        tracker.update(detections)

        # update bbox identities
        outputs = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh()
            x1, y1, x2, y2 = tlwh_to_xyxy(box, h, w)
            track_id = track.track_id
            outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)

        # action detection
        actions = []
        persons = []
        if len(outputs) > 0:
            bbox_xyxy = outputs[:, :4]
            identities = outputs[:, -1]
            for i, box in enumerate(bbox_xyxy):
                id = identities[i]

                if not (id in action_data):
                    action_data[id] = np.zeros(
                        (ailia.POSE_KEYPOINT_CNT - 1, TIME_RANGE, 3))

                # action recognition
                action, person = action_recognition(box, input_image, pose,
                                                    detector, model,
                                                    action_data[id])
                actions.append(action)
                persons.append(person)

        # draw box for visualization
        if len(outputs) > 0:
            bbox_tlwh = []
            bbox_xyxy = outputs[:, :4]
            identities = outputs[:, -1]
            frame = draw_boxes(input_image, bbox_xyxy, identities, actions,
                               action_data, (0, 0))

            for bb_xyxy in bbox_xyxy:
                bbox_tlwh.append(xyxy_to_tlwh(bb_xyxy))

        # draw skelton
        for person in persons:
            if person != None:
                display_result(input_image, person)

        if writer is not None:
            writer.write(input_image)

            # show progress
            if idx_frame == "0":
                print()
            print("\r" + str(idx_frame + 1) + " / " + str(frame_nb), end="")
            if idx_frame == frame_nb - 1:
                print()

        cv2.imshow('frame', input_image)

        idx_frame = idx_frame + 1

    if writer is not None:
        writer.release()

    capture.release()
    cv2.destroyAllWindows()
    print('Script finished successfully.')
コード例 #3
0
class DeepSort(object):
    def __init__(self, model_path):
        self.min_confidence = 0.3
        self.nms_max_overlap = 1.0

        self.extractor = Extractor(model_path, use_cuda=True)

        max_cosine_distance = 0.2
        nn_budget = 100
        n_init = 0
        max_age = 30
        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance,
                                               nn_budget)
        self.tracker = Tracker(metric, max_age=max_age, n_init=n_init)

    def update(self, bbox_xywh, confidences, ori_img):
        self.height, self.width = ori_img.shape[:2]

        # generate detections
        features = self._get_features(bbox_xywh, ori_img)
        detections = [
            Detection(bbox_xywh[i], conf, features[i])
            for i, conf in enumerate(confidences) if conf > self.min_confidence
        ]

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # update tracker
        for i in range(2):
            self.tracker.predict()
            self.tracker.update(detections)

        # output bbox identities
        outputs = []
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh()
            x1, y1, x2, y2 = self._xywh_to_xyxy_centernet(box)
            track_id = track.track_id
            outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)

        return outputs

    # for centernet (x1,x2 w,h -> x1,y1,x2,y2)
    def _xywh_to_xyxy_centernet(self, bbox_xywh):
        x1, y1, w, h = bbox_xywh
        x1 = max(x1, 0)
        y1 = max(y1, 0)
        x2 = min(int(x1 + w), self.width - 1)
        y2 = min(int(y1 + h), self.height - 1)
        return int(x1), int(y1), x2, y2

    # for yolo  (centerx,centerx, w,h -> x1,y1,x2,y2)
    def _xywh_to_xyxy_yolo(self, bbox_xywh):
        x, y, w, h = bbox_xywh
        x1 = max(int(x - w / 2), 0)
        x2 = min(int(x + w / 2), self.width - 1)
        y1 = max(int(y - h / 2), 0)
        y2 = min(int(y + h / 2), self.height - 1)
        return x1, y1, x2, y2

    def _get_features(self, bbox_xywh, ori_img):
        features = []
        for box in bbox_xywh:
            x1, y1, x2, y2 = self._xywh_to_xyxy_centernet(box)
            im = ori_img[y1:y2, x1:x2]
            feature = self.extractor(im)[0]
            features.append(feature)
        if len(features):
            features = np.stack(features, axis=0)
        else:
            features = np.array([])
        return features
コード例 #4
0
class KCTracker(object):
    def __init__(self,
                 model_path='yolov3/of_model/yolov3_model_python/',
                 gpu_ids='0',
                 model_name='resid',
                 confidence_l=0.2,
                 confidence_h=0.4,
                 max_cosine_distance=0.2,
                 max_iou_distance=0.7,
                 save_feature=False,
                 use_filter=False,
                 init_extractor=True,
                 max_age=30,
                 std_Q_w=1e-1,
                 std_Q_wv=1e-3,
                 std_R_w=5e-2,
                 cls_=0):
        self.confidence_l = confidence_l
        self.confidence_h = confidence_h
        self.iou_thresh_l = 0.24
        self.iou_thresh = 0.5
        self.nms_max_overlap = 1.0
        self.extractor = None
        self.height, self.width = None, None
        if init_extractor:
            self.extractor = Extractor(model_name=model_name,
                                       load_path=model_path,
                                       gpu_ids=gpu_ids,
                                       cls=cls_)
        max_iou = max_iou_distance
        nn_budget = 100
        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance,
                                               nn_budget)
        self.tracker = Tracker(metric,
                               max_iou_distance=max_iou,
                               max_age=max_age,
                               std_Q_w=std_Q_w,
                               std_Q_wv=std_Q_wv,
                               std_R_w=std_R_w)
        self.all_feature = None
        self.save_feature = save_feature
        self.count = 1
        self.result = []
        self.use_filter = use_filter
        #print('batch mode')

    def saveResult(self, file_name):
        if os.path.exists(file_name):
            os.remove(file_name)
        self.result = np.array(self.result)  # frameid_pid_tlwhc
        if self.use_filter:
            self.result = removeUnMoveLowConfObj(self.result)
        else:
            self.result = removeSmallOrBigBbox(self.result)
        writeResult(self.result, file_name)
        print('save result:', file_name)

    def getFeatureFromImage(self, bbox_tlwhcs, data, input_type, type):
        bbox_tlwhs = bbox_tlwhcs[:, 0:4]
        features = None
        if input_type == 'img':
            self.height, self.width = data.shape[:2]
            try:
                features = self._get_features_batch(bbox_tlwhs, data, type)
            except Exception as e:
                print(e)
        else:  # input_type == 'feature'
            features = data
        return features

    def update(self, frame_id, bbox_tlwhcs, ori_img, input_type='img', type=0):

        #print('ini boxs number:',len(bbox_tlwhcs))
        # print('ini confs number:',len(confidences))
        if len(bbox_tlwhcs) == 0:
            self.count += 1
            return [], []
        confidences = bbox_tlwhcs[:, -1]
        mask_l = (confidences >= self.confidence_l) & (confidences <
                                                       self.confidence_h)
        mask_h = confidences >= self.confidence_h
        bbox_tlwhcs_low = bbox_tlwhcs[mask_l, :]
        bbox_tlwhcs_ture = bbox_tlwhcs[mask_h, :]

        bbox_tlwhcs_new = []
        bbox_tlwhcs_temp = bbox_tlwhcs_low.copy()
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            if len(bbox_tlwhcs_temp) == 0:
                continue
            box_tlwh_temp = track.to_tlwh()
            ious_ = iou(box_tlwh_temp, bbox_tlwhcs_temp[:, 0:4])
            iou_max_ind = np.argmax(ious_)
            if ious_[iou_max_ind] > self.iou_thresh_l:
                bbox_tlwhcs_new.append(bbox_tlwhcs_temp[iou_max_ind])
                np.delete(bbox_tlwhcs_temp, iou_max_ind, axis=0)

        bbox_tlwhcs_new = np.array(bbox_tlwhcs_ture.tolist() + bbox_tlwhcs_new)
        if len(bbox_tlwhcs_new) == 0:
            self.count += 1
            return [], []
        #try:
        #    indices = non_max_suppression(bbox_tlwhcs_new[:, 0:4], 0.6, bbox_tlwhcs_new[:, 4])
        #    bbox_tlwhcs_new = np.array([bbox_tlwhcs_new[i] for i in indices])
        #except Exception as e:
        #    print(e)
        #    return [], []
        if len(bbox_tlwhcs_new) == 0:
            self.count += 1
            return [], []
        bbox_tlwhs_new = bbox_tlwhcs_new[:, 0:4]
        confidences_new = bbox_tlwhcs_new[:, 4]

        features = self.getFeatureFromImage(bbox_tlwhcs_new, ori_img,
                                            input_type, type)

        if self.save_feature:
            if self.all_feature is None and len(features):
                self.all_feature = features
            else:
                self.all_feature = np.vstack((self.all_feature, features))

        detections = [
            Detection(bbox_tlwhs_new[i], conf, features[i], i)
            for i, conf in enumerate(confidences_new)
        ]
        # update tracker
        self.tracker.predict()
        self.tracker.update(detections, self.confidence_h)
        self.count += 1

        # output bbox identities
        outputs = []
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box_tlwh = track.to_tlwh()  # tlwh
            x1, y1, x2, y2 = self._tlwh_to_xyxy(box_tlwh)
            track_id = track.track_id
            conf = track.confidence
            ori_id = track.ori_id
            outputs.append(np.array([track_id, x1, y1, x2, y2, conf, ori_id]))
            self.result.append(
                np.array([
                    frame_id, track_id, x1, y1, box_tlwh[2], box_tlwh[3], conf
                ]))
        bbox_tlwhcs_results = []
        for i, bbox in enumerate(bbox_tlwhcs):
            track_id_ = -1
            for output in outputs:
                if int(output[6]) == i:
                    track_id_ = output[0]
            #if track_id_ == -1:
            #    continue
            box_tlwh = bbox[0:4]
            conf_ = bbox[4]
            x1, y1, x2, y2 = self._tlwh_to_xyxy(box_tlwh)
            bbox_tlwhcs_results.append(
                np.array([x1, y1, x2, y2, conf_, track_id_]))

        if len(bbox_tlwhcs_results) > 0:
            bbox_tlwhcs_results = np.stack(bbox_tlwhcs_results, axis=0)

        return bbox_tlwhcs_results, features

    # for centernet (x1,x2 w,h -> x1,y1,x2,y2)
    def _tlwh_to_xyxy(self, bbox_tlwh):
        x1, y1, w, h = bbox_tlwh
        x2 = x1 + w
        y2 = y1 + h
        return x1, y1, x2, y2

    def _tlwh_to_limit_xyxy(self, bbox_tlwh):
        x1, y1, w, h = bbox_tlwh
        x1 = max(x1, 0)
        y1 = max(y1, 0)
        x2 = min(int(x1 + w), self.width - 1)
        y2 = min(int(y1 + h), self.height - 1)
        return int(x1), int(y1), x2, y2

    # for yolo  (centerx,centerx, w,h -> x1,y1,x2,y2)
    def _cxcywh_to_xyxy(self, bbox_xywh):
        x, y, w, h = bbox_xywh
        x1 = max(int(x - w / 2), 0)
        x2 = min(int(x + w / 2), self.width - 1)
        y1 = max(int(y - h / 2), 0)
        y2 = min(int(y + h / 2), self.height - 1)
        return x1, y1, x2, y2

    def _get_features_batch(self, bbox_tlwhs, ori_img, type):
        imgs = []
        if self.width == None:
            self.height, self.width = ori_img.shape[:2]
        for box in bbox_tlwhs:
            x1, y1, x2, y2 = self._tlwh_to_limit_xyxy(box)
            im = ori_img[int(y1):int(y2), int(x1):int(x2)]
            imgs.append(im)
        features = self.extractor(imgs, 20, feature_type=type)
        return features

    def _get_features(self, bbox_tlwh, ori_img):
        features = []
        if self.width == None:
            self.height, self.width = ori_img.shape[:2]
        for box in bbox_tlwh:
            x1, y1, x2, y2 = self._tlwh_to_limit_xyxy(box)
            im = ori_img[int(y1):int(y2), int(x1):int(x2)]
            feature = self.extractor(im)[0]
            features.append(feature)
        if len(features):
            features = np.stack(features, axis=0)
        else:
            features = np.array([])
        return features

    def saveFeature(self, filename=None):
        if filename is not None:
            np.save(filename, self.all_feature)
            print('save feature:', filename)
コード例 #5
0
ファイル: deepsort.py プロジェクト: onexuan/ailia-models
def recognize_from_video():
    results = []
    idx_frame = 0

    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    detector = init_detector(env_id)
    extractor = ailia.Net(EX_MODEL_PATH, EX_WEIGHT_PATH, env_id=env_id)

    # tracker class instance
    metric = NearestNeighborDistanceMetric(
        "cosine", MAX_COSINE_DISTANCE, NN_BUDGET
    )
    tracker = Tracker(
        metric,
        max_iou_distance=0.7,
        max_age=70,
        n_init=3
    )

    if args.video == '0':
        print('[INFO] Webcam mode is activated')
        capture = cv2.VideoCapture(0)
        if not capture.isOpened():
            print("[ERROR] webcamera not found")
            sys.exit(1)
    else:
        if check_file_existance(args.video):
            capture = cv2.VideoCapture(args.video)

    # create video writer
    if args.savepath is not None:
        writer = get_writer(
            args.savepath,
            int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)),
            int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
        )
    else:
        writer = None

    print('Start Inference...')
    while(True):
        idx_frame += 1
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        # In order to use ailia.Detector, the input should have 4 channels.
        input_img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
        h, w = frame.shape[0], frame.shape[1]

        # do detection
        detector.compute(input_img, THRESHOLD, IOU)
        bbox_xywh, cls_conf, cls_ids = get_detector_result(detector, h, w)

        # select person class
        mask = cls_ids == 0
        bbox_xywh = bbox_xywh[mask]

        # bbox dilation just in case bbox too small,
        # delete this line if using a better pedestrian detector
        bbox_xywh[:, 3:] *= 1.2
        cls_conf = cls_conf[mask]

        # do tracking
        img_crops = []
        for box in bbox_xywh:
            x1, y1, x2, y2 = xywh_to_xyxy(box, h, w)
            img_crops.append(frame[y1:y2, x1:x2])

        if img_crops:
            # preprocess
            img_batch = np.concatenate([
                normalize_image(resize(img), 'ImageNet')[np.newaxis, :, :, :]
                for img in img_crops
            ], axis=0).transpose(0, 3, 1, 2)

            # TODO better to pass a batch at once
            # features = extractor.predict(img_batch)
            features = []
            for img in img_batch:
                features.append(extractor.predict(img[np.newaxis, :, :, :])[0])
            features = np.array(features)
        else:
            features = np.array([])

        bbox_tlwh = xywh_to_tlwh(bbox_xywh)
        detections = [
            Detection(bbox_tlwh[i], conf, features[i])
            for i, conf in enumerate(cls_conf) if conf > MIN_CONFIDENCE
        ]

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        nms_max_overlap = 1.0
        indices = non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # update tracker
        tracker.predict()
        tracker.update(detections)

        # update bbox identities
        outputs = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh()
            x1, y1, x2, y2 = tlwh_to_xyxy(box, h, w)
            track_id = track.track_id
            outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)

        # draw box for visualization
        if len(outputs) > 0:
            bbox_tlwh = []
            bbox_xyxy = outputs[:, :4]
            identities = outputs[:, -1]
            frame = draw_boxes(frame, bbox_xyxy, identities)

            for bb_xyxy in bbox_xyxy:
                bbox_tlwh.append(xyxy_to_tlwh(bb_xyxy))

            results.append((idx_frame - 1, bbox_tlwh, identities))

        cv2.imshow('frame', frame)

        if writer is not None:
            writer.write(frame)

        if args.savepath is not None:
            write_results(args.savepath.split('.')[0] + '.txt', results, 'mot')
        else:
            write_results('result.txt', results, 'mot')

    capture.release()
    cv2.destroyAllWindows()
    print(f'Save results to {args.savepath}')
    print('Script finished successfully.')
コード例 #6
0
class DeepSort(
        object
):  #   DeepSort(torch.jit.ScriptModule):          #按视频帧顺序处理,每一帧的处理
    def __init__(self, model_path):
        super(DeepSort, self).__init__()
        self.min_confidence = 0.3  #根据置信度对检测框进行过滤,即对置信度不足够高的检测框及特征予以删除;
        self.nms_max_overlap = 1.0  #对检测框进行非最大值抑制,消除一个目标身上多个框的情况;
        self.extractor = Extractor(model_path,
                                   use_cuda=True)  #读取当前帧目标检测框的位置及各检测框图像块的深度特征
        max_cosine_distance = 0.2
        nn_budget = 100
        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance,
                                               nn_budget)
        self.tracker = Tracker(metric)

    # @script_method
    def update(self, bbox_xywh, confidences, ori_img):
        self.height, self.width = ori_img.shape[:2]
        # generate detections,features:为特征向量
        features = self._get_features(bbox_xywh, ori_img)
        # dectections包含 self.tlwh(左上角xy),self.confidence,self.feature
        # dectections为ndarray格式
        # 置信度筛选和nms可以考虑删除
        detections = [
            Detection(bbox_xywh[i], conf, features[i])
            for i, conf in enumerate(confidences) if conf > self.min_confidence
        ]

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # update tracker
        self.tracker.predict()
        self.tracker.update(detections)

        # output bbox identities
        outputs = []
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh()
            x1, y1, x2, y2 = self._xywh_to_xyxy(box)
            track_id = track.track_id
            outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)
        return outputs

    # @script_method
    def _xywh_to_xyxy(self, bbox_xywh):
        x, y, w, h = bbox_xywh
        x1 = max(int(x - w / 2), 0)
        x2 = min(int(x + w / 2), self.width - 1)
        y1 = max(int(y - h / 2), 0)
        y2 = min(int(y + h / 2), self.height - 1)
        return x1, y1, x2, y2

    # @script_method
    def _get_features(self, bbox_xywh, ori_img):
        features = []
        for box in bbox_xywh:
            x1, y1, x2, y2 = self._xywh_to_xyxy(box)
            im = ori_img[y1:y2, x1:x2]
            feature = self.extractor(im)[0]
            features.append(feature)
        if len(features):
            features = np.stack(features, axis=0)
        else:
            features = np.array([])
        return features