Ejemplo n.º 1
0
    def _eval_seq_jde(self,
                      dataloader,
                      save_dir=None,
                      show_image=False,
                      frame_rate=30,
                      draw_threshold=0):
        if save_dir:
            if not os.path.exists(save_dir): os.makedirs(save_dir)
        tracker = self.model.tracker
        tracker.max_time_lost = int(frame_rate / 30.0 * tracker.track_buffer)

        timer = MOTTimer()
        frame_id = 0
        self.status['mode'] = 'track'
        self.model.eval()
        results = defaultdict(list)  # support single class and multi classes

        for step_id, data in enumerate(tqdm(dataloader)):
            self.status['step_id'] = step_id
            # forward
            timer.tic()
            pred_dets, pred_embs = self.model(data)

            pred_dets, pred_embs = pred_dets.numpy(), pred_embs.numpy()
            online_targets_dict = self.model.tracker.update(pred_dets,
                                                            pred_embs)
            online_tlwhs = defaultdict(list)
            online_scores = defaultdict(list)
            online_ids = defaultdict(list)
            for cls_id in range(self.cfg.num_classes):
                online_targets = online_targets_dict[cls_id]
                for t in online_targets:
                    tlwh = t.tlwh
                    tid = t.track_id
                    tscore = t.score
                    if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
                    if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
                            3] > tracker.vertical_ratio:
                        continue
                    online_tlwhs[cls_id].append(tlwh)
                    online_ids[cls_id].append(tid)
                    online_scores[cls_id].append(tscore)
                # save results
                results[cls_id].append(
                    (frame_id + 1, online_tlwhs[cls_id], online_scores[cls_id],
                     online_ids[cls_id]))

            timer.toc()
            save_vis_results(data, frame_id, online_ids, online_tlwhs,
                             online_scores, timer.average_time, show_image,
                             save_dir, self.cfg.num_classes)
            frame_id += 1

        return results, frame_id, timer.average_time, timer.calls
Ejemplo n.º 2
0
def predict_video(detector, camera_id):
    if camera_id != -1:
        capture = cv2.VideoCapture(camera_id)
        video_name = 'mot_output.mp4'
    else:
        capture = cv2.VideoCapture(FLAGS.video_file)
        video_name = os.path.split(FLAGS.video_file)[-1]
    fps = 30
    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    print('frame_count', frame_count)
    width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    # yapf: disable
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    # yapf: enable
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)
    out_path = os.path.join(FLAGS.output_dir, video_name)
    if not FLAGS.save_images:
        writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
    frame_id = 0
    timer = MOTTimer()
    results = []
    while (1):
        ret, frame = capture.read()
        if not ret:
            break
        timer.tic()
        online_tlwhs, online_scores, online_ids = detector.predict(
            [frame], FLAGS.threshold)
        timer.toc()

        results.append((frame_id + 1, online_tlwhs, online_scores, online_ids))
        fps = 1. / timer.average_time
        im = mot_vis.plot_tracking(frame,
                                   online_tlwhs,
                                   online_ids,
                                   online_scores,
                                   frame_id=frame_id,
                                   fps=fps)
        if FLAGS.save_images:
            save_dir = os.path.join(FLAGS.output_dir,
                                    video_name.split('.')[-2])
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)),
                        im)
        else:
            writer.write(im)
        frame_id += 1
        print('detect frame:%d' % (frame_id))
        if camera_id != -1:
            cv2.imshow('Tracking Detection', im)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    if FLAGS.save_mot_txts:
        result_filename = os.path.join(FLAGS.output_dir,
                                       video_name.split('.')[-2] + '.txt')
        write_mot_results(result_filename, results)

    if FLAGS.save_images:
        save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
        cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
            save_dir, out_path)
        os.system(cmd_str)
        print('Save video in {}.'.format(out_path))
    else:
        writer.release()
Ejemplo n.º 3
0
    def _eval_seq_sde(self,
                      dataloader,
                      save_dir=None,
                      show_image=False,
                      frame_rate=30,
                      seq_name='',
                      scaled=False,
                      det_file='',
                      draw_threshold=0):
        if save_dir:
            if not os.path.exists(save_dir): os.makedirs(save_dir)
        use_detector = False if not self.model.detector else True
        use_reid = False if not self.model.reid else True

        timer = MOTTimer()
        results = defaultdict(list)
        frame_id = 0
        self.status['mode'] = 'track'
        self.model.eval()
        if use_reid:
            self.model.reid.eval()
        if not use_detector:
            dets_list = load_det_results(det_file, len(dataloader))
            logger.info('Finish loading detection results file {}.'.format(
                det_file))

        tracker = self.model.tracker
        for step_id, data in enumerate(tqdm(dataloader)):
            self.status['step_id'] = step_id
            ori_image = data['ori_image']  # [bs, H, W, 3]
            ori_image_shape = data['ori_image'].shape[1:3]
            # ori_image_shape: [H, W]

            input_shape = data['image'].shape[2:]
            # input_shape: [h, w], before data transforms, set in model config

            im_shape = data['im_shape'][0].numpy()
            # im_shape: [new_h, new_w], after data transforms
            scale_factor = data['scale_factor'][0].numpy()

            empty_detections = False
            # when it has no detected bboxes, will not inference reid model 
            # and if visualize, use original image instead

            # forward
            timer.tic()
            if not use_detector:
                dets = dets_list[frame_id]
                bbox_tlwh = np.array(dets['bbox'], dtype='float32')
                if bbox_tlwh.shape[0] > 0:
                    # detector outputs: pred_cls_ids, pred_scores, pred_bboxes
                    pred_cls_ids = np.array(dets['cls_id'], dtype='float32')
                    pred_scores = np.array(dets['score'], dtype='float32')
                    pred_bboxes = np.concatenate(
                        (bbox_tlwh[:, 0:2],
                         bbox_tlwh[:, 2:4] + bbox_tlwh[:, 0:2]),
                        axis=1)
                else:
                    logger.warning(
                        'Frame {} has not object, try to modify score threshold.'.
                        format(frame_id))
                    empty_detections = True
            else:
                outs = self.model.detector(data)
                outs['bbox'] = outs['bbox'].numpy()
                outs['bbox_num'] = outs['bbox_num'].numpy()

                if len(outs['bbox']) > 0 and empty_detections == False:
                    # detector outputs: pred_cls_ids, pred_scores, pred_bboxes
                    pred_cls_ids = outs['bbox'][:, 0:1]
                    pred_scores = outs['bbox'][:, 1:2]
                    if not scaled:
                        # Note: scaled=False only in JDE YOLOv3 or other detectors
                        # with LetterBoxResize and JDEBBoxPostProcess.
                        #
                        # 'scaled' means whether the coords after detector outputs
                        # have been scaled back to the original image, set True 
                        # in general detector, set False in JDE YOLOv3.
                        pred_bboxes = scale_coords(outs['bbox'][:, 2:],
                                                   input_shape, im_shape,
                                                   scale_factor)
                    else:
                        pred_bboxes = outs['bbox'][:, 2:]
                    pred_dets_old = np.concatenate(
                        (pred_cls_ids, pred_scores, pred_bboxes), axis=1)
                else:
                    logger.warning(
                        'Frame {} has not detected object, try to modify score threshold.'.
                        format(frame_id))
                    empty_detections = True

            if not empty_detections:
                pred_xyxys, keep_idx = clip_box(pred_bboxes, ori_image_shape)
                if len(keep_idx[0]) == 0:
                    logger.warning(
                        'Frame {} has not detected object left after clip_box.'.
                        format(frame_id))
                    empty_detections = True

            if empty_detections:
                timer.toc()
                # if visualize, use original image instead
                online_ids, online_tlwhs, online_scores = None, None, None
                save_vis_results(data, frame_id, online_ids, online_tlwhs,
                                 online_scores, timer.average_time, show_image,
                                 save_dir, self.cfg.num_classes)
                frame_id += 1
                # thus will not inference reid model
                continue

            pred_cls_ids = pred_cls_ids[keep_idx[0]]
            pred_scores = pred_scores[keep_idx[0]]
            pred_dets = np.concatenate(
                (pred_cls_ids, pred_scores, pred_xyxys), axis=1)

            if use_reid:
                crops = get_crops(
                    pred_xyxys,
                    ori_image,
                    w=tracker.input_size[0],
                    h=tracker.input_size[1])
                crops = paddle.to_tensor(crops)

                data.update({'crops': crops})
                pred_embs = self.model(data)['embeddings'].numpy()
            else:
                pred_embs = None

            if isinstance(tracker, DeepSORTTracker):
                online_tlwhs, online_scores, online_ids = [], [], []
                tracker.predict()
                online_targets = tracker.update(pred_dets, pred_embs)
                for t in online_targets:
                    if not t.is_confirmed() or t.time_since_update > 1:
                        continue
                    tlwh = t.to_tlwh()
                    tscore = t.score
                    tid = t.track_id
                    if tscore < draw_threshold: continue
                    if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
                    if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
                            3] > tracker.vertical_ratio:
                        continue
                    online_tlwhs.append(tlwh)
                    online_scores.append(tscore)
                    online_ids.append(tid)
                timer.toc()

                # save results
                results[0].append(
                    (frame_id + 1, online_tlwhs, online_scores, online_ids))
                save_vis_results(data, frame_id, online_ids, online_tlwhs,
                                 online_scores, timer.average_time, show_image,
                                 save_dir, self.cfg.num_classes)

            elif isinstance(tracker, JDETracker):
                # trick hyperparams only used for MOTChallenge (MOT17, MOT20) Test-set
                tracker.track_buffer, tracker.conf_thres = get_trick_hyperparams(
                    seq_name, tracker.track_buffer, tracker.conf_thres)

                online_targets_dict = tracker.update(pred_dets_old, pred_embs)
                online_tlwhs = defaultdict(list)
                online_scores = defaultdict(list)
                online_ids = defaultdict(list)
                for cls_id in range(self.cfg.num_classes):
                    online_targets = online_targets_dict[cls_id]
                    for t in online_targets:
                        tlwh = t.tlwh
                        tid = t.track_id
                        tscore = t.score
                        if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
                        if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
                                3] > tracker.vertical_ratio:
                            continue
                        online_tlwhs[cls_id].append(tlwh)
                        online_ids[cls_id].append(tid)
                        online_scores[cls_id].append(tscore)
                    # save results
                    results[cls_id].append(
                        (frame_id + 1, online_tlwhs[cls_id],
                         online_scores[cls_id], online_ids[cls_id]))
                timer.toc()
                save_vis_results(data, frame_id, online_ids, online_tlwhs,
                                 online_scores, timer.average_time, show_image,
                                 save_dir, self.cfg.num_classes)

            frame_id += 1

        return results, frame_id, timer.average_time, timer.calls
Ejemplo n.º 4
0
def predict_video(detector, reid_model, camera_id):
    if camera_id != -1:
        capture = cv2.VideoCapture(camera_id)
        video_name = 'mot_output.mp4'
    else:
        capture = cv2.VideoCapture(FLAGS.video_file)
        video_name = os.path.split(FLAGS.video_file)[-1]
    # Get Video info : resolution, fps, frame count
    width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(capture.get(cv2.CAP_PROP_FPS))
    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    print("fps: %d, frame_count: %d" % (fps, frame_count))

    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)
    out_path = os.path.join(FLAGS.output_dir, video_name)
    if not FLAGS.save_images:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
    frame_id = 0
    timer = MOTTimer()
    results = defaultdict(list)
    while (1):
        ret, frame = capture.read()
        if not ret:
            break
        timer.tic()
        pred_dets, pred_xyxys = detector.predict([frame], FLAGS.scaled,
                                                 FLAGS.threshold)

        if len(pred_dets) == 1 and np.sum(pred_dets) == 0:
            print('Frame {} has no object, try to modify score threshold.'.
                  format(frame_id))
            timer.toc()
            im = frame
        else:
            # reid process
            crops = reid_model.get_crops(pred_xyxys, frame)
            online_tlwhs, online_scores, online_ids = reid_model.predict(
                crops, pred_dets)
            results[0].append(
                (frame_id + 1, online_tlwhs, online_scores, online_ids))
            timer.toc()

            fps = 1. / timer.average_time
            im = plot_tracking(frame,
                               online_tlwhs,
                               online_ids,
                               online_scores,
                               frame_id=frame_id,
                               fps=fps)

        if FLAGS.save_images:
            save_dir = os.path.join(FLAGS.output_dir,
                                    video_name.split('.')[-2])
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)),
                        im)
        else:
            writer.write(im)

        frame_id += 1
        print('detect frame:%d' % (frame_id))

        if camera_id != -1:
            cv2.imshow('Tracking Detection', im)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    if FLAGS.save_mot_txts:
        result_filename = os.path.join(FLAGS.output_dir,
                                       video_name.split('.')[-2] + '.txt')
        write_mot_results(result_filename, results)

    if FLAGS.save_images:
        save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
        cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
            save_dir, out_path)
        os.system(cmd_str)
        print('Save video in {}.'.format(out_path))
    else:
        writer.release()
Ejemplo n.º 5
0
def predict_video(detector, camera_id):
    video_name = 'mot_output.mp4'
    if camera_id != -1:
        capture = cv2.VideoCapture(camera_id)
    else:
        capture = cv2.VideoCapture(FLAGS.video_file)
        video_name = os.path.split(FLAGS.video_file)[-1]
    # Get Video info : resolution, fps, frame count
    width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(capture.get(cv2.CAP_PROP_FPS))
    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    print("fps: %d, frame_count: %d" % (fps, frame_count))

    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)
    out_path = os.path.join(FLAGS.output_dir, video_name)
    if not FLAGS.save_images:
        fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
        writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
    frame_id = 0
    timer = MOTTimer()
    results = defaultdict(list)  # support single class and multi classes
    num_classes = detector.num_classes
    data_type = 'mcmot' if num_classes > 1 else 'mot'
    ids2names = detector.pred_config.labels

    while (1):
        ret, frame = capture.read()
        if not ret:
            break
        timer.tic()
        online_tlwhs, online_scores, online_ids = detector.predict(
            [frame], FLAGS.threshold)
        timer.toc()

        for cls_id in range(num_classes):
            results[cls_id].append((frame_id + 1, online_tlwhs[cls_id],
                                    online_scores[cls_id], online_ids[cls_id]))

        fps = 1. / timer.average_time
        im = plot_tracking_dict(
            frame,
            num_classes,
            online_tlwhs,
            online_ids,
            online_scores,
            frame_id=frame_id,
            fps=fps,
            ids2names=ids2names)
        if FLAGS.save_images:
            save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            cv2.imwrite(
                os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im)
        else:
            writer.write(im)

        frame_id += 1
        print('detect frame: %d' % (frame_id))
        if camera_id != -1:
            cv2.imshow('Tracking Detection', im)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    if FLAGS.save_mot_txts:
        result_filename = os.path.join(FLAGS.output_dir,
                                       video_name.split('.')[-2] + '.txt')

        write_mot_results(result_filename, results, data_type, num_classes)

    if FLAGS.save_images:
        save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
        cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(save_dir,
                                                              out_path)
        os.system(cmd_str)
        print('Save video in {}.'.format(out_path))
    else:
        writer.release()
def mot_keypoint_unite_predict_video(mot_model,
                                     keypoint_model,
                                     camera_id,
                                     keypoint_batch_size=1):
    if camera_id != -1:
        capture = cv2.VideoCapture(camera_id)
        video_name = 'output.mp4'
    else:
        capture = cv2.VideoCapture(FLAGS.video_file)
        video_name = os.path.split(FLAGS.video_file)[-1]
    # Get Video info : resolution, fps, frame count
    width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(capture.get(cv2.CAP_PROP_FPS))
    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    print("fps: %d, frame_count: %d" % (fps, frame_count))

    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)
    out_path = os.path.join(FLAGS.output_dir, video_name)
    if not FLAGS.save_images:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
    frame_id = 0
    timer_mot = FPSTimer()
    timer_kp = FPSTimer()
    timer_mot_kp = FPSTimer()

    # support single class and multi classes, but should be single class here
    mot_results = defaultdict(list)
    num_classes = mot_model.num_classes
    assert num_classes == 1, 'Only one category mot model supported for uniting keypoint deploy.'
    data_type = 'mot'

    while (1):
        ret, frame = capture.read()
        if not ret:
            break
        timer_mot_kp.tic()
        timer_mot.tic()
        online_tlwhs, online_scores, online_ids = mot_model.predict(
            [frame], FLAGS.mot_threshold)
        timer_mot.toc()
        mot_results[0].append(
            (frame_id + 1, online_tlwhs[0], online_scores[0], online_ids[0]))
        mot_fps = 1. / timer_mot.average_time

        timer_kp.tic()

        keypoint_arch = keypoint_model.pred_config.arch
        if KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown':
            results = convert_mot_to_det(online_tlwhs[0], online_scores[0])
            keypoint_results = predict_with_given_det(frame, results,
                                                      keypoint_model,
                                                      keypoint_batch_size,
                                                      FLAGS.mot_threshold,
                                                      FLAGS.keypoint_threshold,
                                                      FLAGS.run_benchmark)

        else:
            keypoint_results = keypoint_model.predict([frame],
                                                      FLAGS.keypoint_threshold)
        timer_kp.toc()
        timer_mot_kp.toc()
        kp_fps = 1. / timer_kp.average_time
        mot_kp_fps = 1. / timer_mot_kp.average_time

        im = draw_pose(
            frame,
            keypoint_results,
            visual_thread=FLAGS.keypoint_threshold,
            returnimg=True,
            ids=online_ids[0] if KEYPOINT_SUPPORT_MODELS[keypoint_arch]
            == 'keypoint_topdown' else None)

        online_im = plot_tracking_dict(im,
                                       num_classes,
                                       online_tlwhs,
                                       online_ids,
                                       online_scores,
                                       frame_id=frame_id,
                                       fps=mot_kp_fps)

        im = np.array(online_im)

        frame_id += 1
        print('detect frame: %d' % (frame_id))

        if FLAGS.save_images:
            save_dir = os.path.join(FLAGS.output_dir,
                                    video_name.split('.')[-2])
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)),
                        im)
        else:
            writer.write(im)
        if camera_id != -1:
            cv2.imshow('Tracking and keypoint results', im)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    if FLAGS.save_mot_txts:
        result_filename = os.path.join(FLAGS.output_dir,
                                       video_name.split('.')[-2] + '.txt')
        write_mot_results(result_filename, mot_results, data_type, num_classes)

    if FLAGS.save_images:
        save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
        cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
            save_dir, out_path)
        os.system(cmd_str)
        print('Save video in {}.'.format(out_path))
    else:
        writer.release()