예제 #1
0
    def __init__(self,args, data_set,ENABLE_TRACKING=None):
        self.tracker = Sort()
        self.args = args
        self.nms_thres = args.nms
        self.triplet_nms_thres =args.triplet_nms
        self.obj_thres = args.obj_thres
        self.triplet_thres = args.triplet_thres
        self.tobefiltered_objects = [26, 53, 134, 247, 179, 74, 226, 135, 145, 300, 253, 95, 11, 102,87]
        # 26: wheel, 53: backpack, 143:light, 247:camera, 179:board
        # 74:shoe, 226:chair, 135:shelf, 145:button, 300:cake, 253:knob, 95:wall, 11:door, 102:mirror,87:ceiling
        if ENABLE_TRACKING == None:
            self.ENABLE_TRACKING = False if self.args.dataset == 'visual_genome' else True
        else:
            self.ENABLE_TRACKING = ENABLE_TRACKING
        if self.ENABLE_TRACKING and self.args.path_opt.split('/')[-1] == 'VG-DR-Net.yaml':
            self.tobefiltered_predicates = [0,6,10,18,19,20,22,23,24]
            # 0:backgrounds, 6:eat,10:wear, 18:ride, 19:watch, 20:play, 22:enjoy, 23:read, 24:cut

        elif self.ENABLE_TRACKING and self.args.path_opt.split('/')[-1] == 'VG-MSDN.yaml':
            self.tobefiltered_predicates = [12, 18, 27, 28, 30, 31, 32, 35]
        else:
            self.tobefiltered_predicates = []

        # Params for Statistics Based Scene Graph Inference
        self.relation_statistics = relation_prior.load_obj("model/prior/preprocessed/relation_prior_prob")
        self.joint_probability = relation_prior.load_obj("model/prior/preprocessed/object_prior_prob")
        self.spurious_rel_thres = 0.07
        self.rel_infer_thres = 0.9
        self.obj_infer_thres = 0.001
        self.data_set = data_set
        self.detected_obj_set = set()
        self.fasttext = torchtext.vocab.FastText()
        self.word_vecs, self.word_itos,self.word_stoi = self.prepare_wordvecs(num_vocabs=400,ignores=VG_DR_NET_OBJ_IGNORES)
        self.pred_stoi = {self.data_set.predicate_classes[i]: i for i in range(len(self.data_set.predicate_classes))}
예제 #2
0
    def __init__(self, classes, tracker='sort'):
        self.ttype = tracker
        self.classes = classes
        if tracker == 'deep_sort':
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker

            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)  #param
            self.nms_max_overlap = 0.1  #param
            model_path = os.path.join(WORK_DIR, MODEL_DIR,
                                      "mars-small128.ckpt-68577")
            self.encoder = generate_detections.create_box_encoder(model_path)
            self.tracker = Tracker(metric)

            from deep_sort.application_util import preprocessing as prep
            from deep_sort.deep_sort.detection import Detection
            self.prep = prep
            self.Detection = Detection

        elif tracker == 'sort':
            from sort.sort import Sort
            self.tracker = Sort()

        self.trackers = {}
예제 #3
0
 def mergeSort(self):
     test = [
         23, 2, 1, 4213, 43, 1, 21, 4, 1, 213, 43, 23423, 53, 21, 221,
         342312123
     ]
     sortObj = Sort()
     sortObj.mergeSort2(test, 0, len(test) - 1, 'main')
     print(test)
예제 #4
0
 def insertionSort(self):
     test = [
         23, 2, 1, 4213, 43, 1, 21, 4, 1, 213, 43, 23423, 53, 21, 221,
         342312123
     ]
     sortObj = Sort()
     sortObj.insertionSort_While(test)
     print(test)
예제 #5
0
class MultiObjectSORTTracker(MultiObjectTracker):
    def __init__(self, flags, logger):
        self._logger = logger
        self.tracker = Sort(max_age=flags.obstacle_track_max_age,
                            min_hits=1,
                            min_iou=flags.min_matching_iou)

    def reinitialize(self, frame, obstacles):
        """ Reinitializes a multiple obstacle tracker.

        Args:
            frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`):
                Frame to reinitialize with.
            obstacles : List of perception.detection.obstacle.Obstacle.
        """
        detections, labels, ids = self.convert_detections_for_sort_alg(
            obstacles)
        self.tracker.update(detections, labels, ids)

    def track(self, frame):
        """ Tracks obstacles in a frame.

        Args:
            frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`):
                Frame to track in.
        """
        # each track in tracks has format ([xmin, ymin, xmax, ymax], id)
        obstacles = []
        for track in self.tracker.trackers:
            coords = track.predict()[0].tolist()
            # changing to xmin, xmax, ymin, ymax format
            xmin = int(coords[0])
            xmax = int(coords[2])
            ymin = int(coords[1])
            ymax = int(coords[3])
            if xmin < xmax and ymin < ymax:
                bbox = BoundingBox2D(xmin, xmax, ymin, ymax)
                obstacles.append(Obstacle(bbox, 0, track.label, track.id))
            else:
                self._logger.error(
                    "Tracker found invalid bounding box {} {} {} {}".format(
                        xmin, xmax, ymin, ymax))
        return True, obstacles

    def convert_detections_for_sort_alg(self, obstacles):
        converted_detections = []
        labels = []
        ids = []
        for obstacle in obstacles:
            bbox = [
                obstacle.bounding_box_2D.x_min, obstacle.bounding_box_2D.y_min,
                obstacle.bounding_box_2D.x_max, obstacle.bounding_box_2D.y_max,
                obstacle.confidence
            ]
            converted_detections.append(bbox)
            labels.append(obstacle.label)
            ids.append(obstacle.id)
        return (np.array(converted_detections), labels, ids)
예제 #6
0
def assign_ids(detections):
    """
    :param detections:
    :return:
    """
    mot_tracker = Sort()

    tracked_detections = []
    for detections_frame_bboxes in detections:
        if len(detections_frame_bboxes) == 0:
            detections_frame_bboxes = np.zeros((0, 5))
        tracked_detections.append(
            mot_tracker.update(np.array(detections_frame_bboxes)))

    return tracked_detections
예제 #7
0
 def initTrack(self):
     if (self.track):
         self.progressBar.setValue(20)
         if self.options["track"]:
             if self.options["tracker"] == "deep_sort":
                 from deep_sort import generate_detections
                 from deep_sort.deep_sort import nn_matching
                 from deep_sort.deep_sort.tracker import Tracker
                 self.progressBar.setValue(50)
                 metric = nn_matching.NearestNeighborDistanceMetric(
                     "cosine", 0.2, 100)
                 self.tracker = Tracker(metric)
                 self.encoder = generate_detections.create_box_encoder(
                     os.path.abspath(
                         "deep_sort/resources/networks/mars-small128.ckpt-68577"
                     ))
             elif self.options["tracker"] == "sort":
                 from sort.sort import Sort
                 self.encoder = None
                 self.tracker = Sort()
                 self.progressBar.setValue(50)
         if self.options["BK_MOG"] and self.options["track"]:
             fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()
         self.progressBar.setValue(60)
         self.initTFNet()
     else:
         self.initTFNet()
예제 #8
0
파일: guiPy.py 프로젝트: grh1cob/Deepsense
def gui(self):
    source = self.FLAGS.demo
    SaveVideo = self.FLAGS.saveVideo
    if self.FLAGS.track:
        if self.FLAGS.tracker == "deep_sort":
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker
            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)
            tracker = Tracker(metric)
            encoder = generate_detections.create_box_encoder(
                os.path.abspath(
                    "deep_sort/resources/networks/mars-small128.ckpt-68577"))
        elif self.FLAGS.tracker == "sort":
            from sort.sort import Sort
            encoder = None
            tracker = Sort()

    if self.FLAGS.BK_MOG and self.FLAGS.track:
        fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()

    if self.FLAGS.csv:
        f = open('{}.csv'.format(file), 'w')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h'])
        f.flush()
    else:
        f = None
        writer = None

    App(tkinter.Tk(), "Tkinter and OpenCV", 0, tracker, encoder)
예제 #9
0
 def input_track(self):
     """
     Utility function to initialize the sort algorithm
     :return: None
     """
     from sort.sort import Sort
     Tracker = Sort()
     return Tracker, None
예제 #10
0
def video_detect(model, path_to_video, threshold=0.6, track=True):
    mot_tracker = Sort()
    cap = cv2.VideoCapture(path_to_video)
    out = cv2.VideoWriter(path_to_video + '-detections.avi',
                          cv2.VideoWriter_fourcc(*'XVID'), 30.0, (640, 480))
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    with torch.no_grad():
        model.eval()
        model.to(device)
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                print('No more frames')
                break
            pil_img = Image.fromarray(frame)
            tensor_img = to_tensor(pil_img).unsqueeze_(0)
            dets = model(tensor_img.to(device))
            if track:
                tracked_dets = None
                for box, score in zip(dets[0]['boxes'], dets[0]['scores']):
                    if score.item() >= threshold:
                        tracked_det = np.array([
                            torch.cat(
                                (box,
                                 score.reshape(1))).detach().cpu().numpy()
                        ])
                        tracked_dets = np.concatenate(
                            (tracked_dets, tracked_det
                             )) if tracked_dets is not None else tracked_det
                tracked_dets = mot_tracker.update(
                    tracked_dets if tracked_dets is not None else np.empty((
                        0, 5)))
                out.write(np.array(draw_object_id(tracked_dets, pil_img)))
            else:
                out.write(
                    np.array(
                        draw_class_labels(dets,
                                          tensor_img,
                                          get_coco_classes(),
                                          threshold=threshold)[0]))
    cap.release()
    out.release()
    cv2.destroyAllWindows()
예제 #11
0
    def input_track(self):
        """
        Utility function to initialize the sort algorithm
        :return: None
        """
        if self.options.tracker == 'sort':
            from sort.sort import Sort
            encoder = None
            Tracker = Sort()

        return Tracker, encoder
예제 #12
0
def track(video_path, use_gpu=False):
    video = cv2.VideoCapture(video_path)
    ret, frame = video.read()
    if ret:
        frame = cv2.resize(frame, (input_width, input_height))

    if use_gpu:
        caffe.set_mode_gpu()

    tracker = Sort(max_age=10)
    detector = Detector()
    classes = detector.get_classes()

    while ret:
        frame_disp = np.copy(frame)
        bounding_boxes, counting = detector.infer(frame)
        class_counting = zip(classes, counting)

        for pair in class_counting:
            print('{:s} {:03d}'.format(*pair))
        print('')

        if len(bounding_boxes) > 0:
            bounding_boxes = np.array(bounding_boxes, np.int32)

            # convert (x, y, w, h) to (x1, y1, x2, y2)
            bounding_boxes[:, 2:4] += bounding_boxes[:, 0:2]
            bounding_boxes[:, 2:4] -= 1

        track_results = tracker.update(bounding_boxes)
        draw_tracking_results(track_results, frame_disp)

        cv2.imshow('tracking', frame_disp)

        key = cv2.waitKey(1)
        if key == 27:
            return

        ret, frame = video.read()
        if ret:
            frame = cv2.resize(frame, (input_width, input_height))
예제 #13
0
    def __init__(self, **kwargs):
        print(kwargs)
        self._state = {}
        self._statistics = []
        self._class_count = dict(
            zip(kwargs['class_ids'], np.zeros(len(kwargs['class_ids']))))
        # if distance between centers of two bboxes is less than _max_distance then object is staying
        self._max_distance = kwargs[
            'max_distance'] if 'max_distance' in kwargs else DEFAULT_MAX_DISTANCE_BETWEEN_POINTS

        # after _warmup_frames we start to compare bbox's centers for one tracked object
        self._warmup_frames = kwargs[
            'warmup_frames'] if 'warmup_frames' in kwargs else DEFAULT_WARMUP_FRAMES

        self._line_y = kwargs['line_y'] if 'line_y' in kwargs else 0

        min_hits = kwargs[
            'min_hits'] if 'min_hits' in kwargs else DEFAUTL_MIN_HITS
        max_age = kwargs['max_age'] if 'max_age' in kwargs else DEFAULT_MAX_AGE
        #self.display_config()
        self._mot_tracker = Sort(max_age, min_hits)
예제 #14
0
    def __init__(
        self,
        cap,
        mask_filename,
        warp_filename,
        threshold,
        prefix="",
        output_path="crop_images",
    ):
        """CaptureProcessor starts a thread for processing ROIs defined in a mask file.
        The processor does the following tasks:

        - Crops the images to match masks
        - Warps ROI images to remove perspective distortion (if necessary)
        - Saves ROI images to file system (encrypted if necessary)
        - Detects vehicles in ROIs using Yolo object detection
        - Tracks vehicles using SORT algorithm
        - Saves metadata to a JSON file


        Args:
            cap (cv2.VideoCapture): OpenCV's VideoCapture object for either camera or video stream
            mask_filename (str): Filename of mask file in PNG format
            warp_filename (str): Filename of warp file in JSON format
            threshold (int): Threshold for perceptual hash to detect motion in ROI
            prefix (str, optional): Prefix for image and metadata files. Defaults to "".
            output_path (str, optional): Folder to save images and metadata. Defaults to "crop_images".
        """
        self.keep_processing = False
        self.cap = cap
        self.threshold = threshold
        self.prefix = prefix
        self.output_path = output_path
        self.mask_filename = mask_filename
        self.warp_filename = warp_filename
        self.image_cache = []
        self.keep_sending_after_phash_diff = 2.5  # seconds
        self.yolo = Yolov5()
        self.tracker = Sort(max_age=5, min_hits=3, iou_threshold=0.3)
예제 #15
0
class CaptureProcessor:
    def __init__(
        self,
        cap,
        mask_filename,
        warp_filename,
        threshold,
        prefix="",
        output_path="crop_images",
    ):
        """CaptureProcessor starts a thread for processing ROIs defined in a mask file.
        The processor does the following tasks:

        - Crops the images to match masks
        - Warps ROI images to remove perspective distortion (if necessary)
        - Saves ROI images to file system (encrypted if necessary)
        - Detects vehicles in ROIs using Yolo object detection
        - Tracks vehicles using SORT algorithm
        - Saves metadata to a JSON file


        Args:
            cap (cv2.VideoCapture): OpenCV's VideoCapture object for either camera or video stream
            mask_filename (str): Filename of mask file in PNG format
            warp_filename (str): Filename of warp file in JSON format
            threshold (int): Threshold for perceptual hash to detect motion in ROI
            prefix (str, optional): Prefix for image and metadata files. Defaults to "".
            output_path (str, optional): Folder to save images and metadata. Defaults to "crop_images".
        """
        self.keep_processing = False
        self.cap = cap
        self.threshold = threshold
        self.prefix = prefix
        self.output_path = output_path
        self.mask_filename = mask_filename
        self.warp_filename = warp_filename
        self.image_cache = []
        self.keep_sending_after_phash_diff = 2.5  # seconds
        self.yolo = Yolov5()
        self.tracker = Sort(max_age=5, min_hits=3, iou_threshold=0.3)

    def start(self):
        """Start processing thread"""
        self.keep_processing = True
        self.mask = Mask(self.mask_filename)
        self.warp = Warp(self.warp_filename)

        self.yolo_thread = Thread(target=self._yolo_process, args=())
        self.yolo_thread.daemon = True
        self.yolo_thread.start()
        previous_roi_hash = [
            imagehash.phash(Image.fromarray(np.zeros((10, 10))))
        ] * self.mask.ROI_count()
        try:
            spf = 1 / float(self.cap.get(cv2.CAP_PROP_FPS))
        except Exception:
            # our camera does not provide FPS, low value to never wait
            spf = 0.01
        frame_no = -1

        keep_sending = 0
        frame_cache = []
        self.image_cache = []
        frame_date = datetime.now()
        while self.keep_processing:
            # prevent loop lock
            sleep(spf)
            if not self.cap.isOpened():
                sleep(0.5)
                continue
            ret, im = self.cap.read()

            if not ret:
                continue
            if im is None:
                continue
            try:
                if frame_no == self.cap.frame:
                    # we read the same frame twice.
                    continue
                frame_no = self.cap.frame
            except Exception:
                frame_no += 1
            try:
                frame_date = self.cap.frame_date
            except Exception:
                frame_date = datetime.now()

            if time() - keep_sending < self.keep_sending_after_phash_diff:
                # store frames for X seconds after movement
                frame_cache.append((frame_date, frame_no, im))
                im_last = im.copy()
                continue

            if len(frame_cache) > 0:
                # insert the whole block of frames at once
                # sanity check, cache can not be too big:
                # RAM can handle ~ 300 blocks/time to record
                if len(self.image_cache
                       ) < 300 / self.keep_sending_after_phash_diff:
                    self.image_cache.append(frame_cache)
                frame_cache = []
                # set phash based on last image in the block
                for i, roi_im in enumerate(self.mask.apply_ROIs(im_last)):
                    roi_im = self.warp.apply(roi_im, i)
                    roi_hash = imagehash.phash(Image.fromarray(roi_im))
                    previous_roi_hash[i] = roi_hash

            for i, roi_im in enumerate(self.mask.apply_ROIs(im)):
                roi_im = self.warp.apply(roi_im, i)
                roi_hash = imagehash.phash(Image.fromarray(roi_im))

                if previous_roi_hash[i] - roi_hash > self.threshold:
                    # some ROI contains change, keep caching images!
                    keep_sending = time()
                    frame_cache.append((frame_date, frame_no, im))
                    # break from ROI loop
                    break

    def stop(self):
        """Stop processing thread"""
        self.keep_processing = False

    def _yolo_process(self):
        """Run YOLO object detection and update tracker"""
        while self.keep_processing:
            # prevent loop lock
            sleep(0.01)

            if len(self.image_cache) == 0:
                continue
            started = time()
            image_list = self.image_cache.pop(0)
            frames_count = len(image_list)
            # skip frames if we're much behind
            # it could be even more sensitive, we used to get every 3rd frame before this
            # Heuristic model to increase skipping. go to 50% rate quite fast, and top at ~100 cache length
            try:
                skip_rate = int(-6 + 21 * np.log(len(self.image_cache) - 0.8))
            except ValueError:
                skip_rate = 0
            # Skip some frames anyway. we have enough FPS
            skip_rate = max(DEFAULT_SKIPRATE, skip_rate)
            frame_skip = self._discard_n(int(skip_rate), 100)
            timestamp = ""
            for list_index, (frame_date, frame_no,
                             im) in enumerate(image_list):
                if frame_skip[list_index % len(frame_skip)] == 1:
                    # skip frames if queue starts to get too long
                    continue
                if not self.keep_processing:
                    break
                detections = None
                for i, roi_im in enumerate(self.mask.apply_ROIs(im)):
                    roi_im = self.warp.apply(roi_im, i)
                    timestamp = frame_date.strftime(
                        "%Y_%m_%d_%H_%M_%S_%f")[:-3]
                    frame_name = (self.prefix +
                                  f"_ts_{timestamp}_roi_{i:02d}_f_{frame_no}")
                    metadata_name = frame_name + ".json"

                    if ENCRYPT:
                        frame_name += ".aes"
                        encrypt_image(
                            os.path.join(self.output_path, frame_name), roi_im)
                        if DEBUG:
                            cv2.imwrite(
                                os.path.join(self.output_path,
                                             frame_name + ".jpg"),
                                roi_im,
                            )
                    else:
                        frame_name += ".jpg"
                        cv2.imwrite(
                            os.path.join(self.output_path, frame_name),
                            roi_im,
                            [int(cv2.IMWRITE_JPEG_QUALITY), 97],
                        )

                    if not detections:
                        start_yolo = time()
                        all_detections = self.yolo.detect(im)
                        end_yolo = time()
                        detections = [
                            d for d in all_detections
                            if d["label"] in VALID_VEHICLE_CLASSES
                        ]

                        bboxes = np.array([det["bbox"] for det in detections])
                        confidences = np.array(
                            [det["confidence"] for det in detections])

                        start_tracker = time()
                        tracks = None
                        if bboxes.shape[0] == 0 or confidences.shape[0] == 0:
                            tracks = self.tracker.update()
                        else:
                            tracks = self.tracker.update(np.c_[bboxes,
                                                               confidences])

                    roi_detections, roi_iods = self.mask.get_roi_detections(
                        detections, i)

                    track_ids = []
                    if roi_detections:
                        track_ids = self._track_ids_for_detections(
                            im, roi_detections, tracks)
                    end_tracker = time()
                    roi_metadata = {}
                    roi_metadata["detections"] = roi_detections
                    roi_metadata["iods"] = roi_iods
                    roi_metadata["track_ids"] = track_ids
                    roi_metadata["roi_offset"] = self.mask.get_roi_offset(i)
                    roi_metadata["roi_dims"] = [
                        roi_im.shape[1], roi_im.shape[0]
                    ]

                    with open(
                            os.path.join(self.output_path, metadata_name),
                            "w",
                            encoding="utf-8",
                    ) as f:
                        json.dump(roi_metadata, f, ensure_ascii=False)
                    logging.info(
                        "TIMERS: YOLO: {}s, tracker: {}s,  skipper: {}%, cache: {}, tracks: {}"
                        .format(
                            round(end_yolo - start_yolo, 2),
                            round(end_tracker - start_tracker, 2),
                            sum(frame_skip),
                            len(self.image_cache),
                            str(track_ids),
                        ))

            logging.info(
                "YOLO block analysis time. {}s {}FPS, blocks {}, last ts {}".
                format(
                    int(time() - started),
                    round(frames_count / (time() - started), 2),
                    len(self.image_cache),
                    timestamp,
                ))

    def _track_ids_for_detections(self, im, detections, tracks):
        """This function maps bounding boxes received from SORT tracking back to
        original object detections. Matches are determined using a suitable distance threshold.

        Args:
            im (numpy.ndarray): Input image whose dimensions are used to determine suitable threshold
            detections (List): List of dictionaries containing object detection data
            tracks (numpy.ndarray): Bounding boxes and tracking identifiers from SORT algorithm

        Returns:
            List: Tracking identifiers matching object detections
        """
        track_ids = [-1] * len(detections)
        bboxes = np.array([det["bbox"] for det in detections])

        # SORT does not return an index for detection so set threshold based on image size
        sort_match_limit = np.square((im.shape[0] + im.shape[1]) * 0.5 * 0.02)

        for i in range(tracks.shape[0]):
            ss = np.sum(np.square(bboxes - tracks[i, :4]), axis=1)
            min_row = np.argmin(ss, axis=0)

            if ss[min_row] < sort_match_limit:
                track_ids[min_row] = int(tracks[i, 4])
            else:
                track_ids[min_row] = -1

        return track_ids

    def _discard_n(self, n, length=30):
        """from 30 FPS hypothesis, discard N frames.

        Args:
            n (int): Number frames to skip (number of 1's in output array)
            length (int, optional): Length of output array. Defaults to 30.

        Returns:
            List: Array of zeros and ones
        """

        if n <= 0:
            return [0] * length
        if n >= length:
            return [1] * length
        if n < length / 2:
            lin_num = n + 1
            values = (1, 0)
            start_value = 0
        else:
            lin_num = (length - n) + 1
            values = (0, 1)
            start_value = 1
        include = np.linspace(0, length - 1,
                              num=lin_num).astype("int").tolist()
        e = [
            values[0] if k in include else values[1]
            for k in reversed(range(length))
        ]
        e[0] = start_value
        return e
예제 #16
0
 def __init__(self, flags, logger):
     self._logger = logger
     self.tracker = Sort(max_age=flags.obstacle_track_max_age,
                         min_hits=1,
                         min_iou=flags.min_matching_iou)
예제 #17
0
    if len(sys.argv)==1:
        # display help message when no args are passed.
        parser.print_help()
        sys.exit(1)

    if args.date is None or args.campose is None:
        raise argparse.ArgumentTypeError('Please specify the date and camera pose for video clips first!')
    else:
        date = args.date
        cam_pose = args.campose

    total_pcount_each_minute = np.zeros((12, 60), dtype=np.int32)       # 12 hours from 10am to 22pm

    # prepare id tracker
    mot_tracker = Sort(max_age=10, min_hits=3)

    for hour in np.arange(10,22):
        for minute in np.arange(60):
            print("loading ../datasets/TongYing/{}/{}/{:02d}/{:02d}.mp4".format(cam_pose, date, hour, minute))
            cap = cv2.VideoCapture('../datasets/TongYing/{}/{}/{:02d}/{:02d}.mp4'.format(cam_pose, date, hour, minute))

            mot_tracker.update([])      # just in case the first file does not exist

            while (cap.isOpened()):
                ret, frame = cap.read()
                if ret:
                    # resize
                    img = cv2.resize(frame, net_shape[::-1], interpolation=cv2.INTER_CUBIC)
                    # start = time.time()
                    rclasses, rscores, rbboxes = process_image(img, net_shape=net_shape)
                    help="path to the image mask. Default: mask.png",
                    default="mask.png",
                    type=str)

args = parser.parse_args()

if __name__ == "__main__":

    register_coco_instances("my_dataset", {'thing_classes': CLASS_NAMES}, "",
                            "")
    dataset_metadata = MetadataCatalog.get("my_dataset")

    cfg = get_cfg()
    cfg.merge_from_file(
        model_zoo.get_config_file(
            "LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml")
    )  # получение используемой модели
    cfg.MODEL.WEIGHTS = "model_final.pth"  # путь к найденным лучшим весам модели
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # установить порог распознавания объекта в 50% (объекты, распознанные с меньшей вероятностью не будут учитываться)
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(
        CLASS_NAMES)  # число классов для распознавания

    detector = DefaultPredictor(cfg)
    tracker = Sort(max_age=40)

    detect_on_video(args.video_file,
                    args.save_to,
                    detector,
                    tracker,
                    mask_file=args.image_mask,
                    to_mp4=True)
예제 #19
0
# Camera variables
CAMERA_INFO = None
CAMERA_EXTRINSICS = None
CAMERA_PROJECTION_MATRIX = None

# Frames
RADAR_FRAME = 'ti_mmwave'
EGO_VEHICLE_FRAME = 'rviz'
CAMERA_FRAME = 'rc_car/camera'

# Perception models
yolov3 = YOLO(configPath='cfg/yolov3-rc.cfg',
              weightPath='weights/yolov3-rc.weights',
              metaPath='cfg/rc-car_shoes.data')
ipm = InversePerspectiveMapping()
tracker = Sort(max_age=200, min_hits=1, use_dlib=False)

# FPS loggers
FRAME_COUNT = 0
all_fps = FPSLogger('Pipeline')
yolo_fps = FPSLogger('YOLOv3')
sort_fps = FPSLogger('Tracker')

########################### Functions ###########################


def camera_info_callback(camera_info):
    global CAMERA_INFO, CAMERA_PROJECTION_MATRIX
    if CAMERA_INFO is None:
        CAMERA_INFO = camera_info
        CAMERA_PROJECTION_MATRIX = np.matmul(
예제 #20
0
def detect_and_track(file_path, save_path, detection_mode="SSD"):
    # 如果要保存视频,定义视频size
    size = (640, 480)
    save_fps = 24
    # 假设图中最多300个目标,生成300种随机颜色
    colours = np.random.rand(300, 3) * 255
    # 为True保存检测后视频
    write_video_flag = True
    video_capture = cv2.VideoCapture(file_path)
    mot_tracker = Sort()

    if write_video_flag:
        output_video = cv2.VideoWriter(
            save_path + 'output.mp4',
            cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), save_fps, size)
        object_list_file = open(save_path + 'detection.txt', 'w')
        frame_index = -1

    if detection_mode == "SSD":
        ssd = SSD()
    elif detection_mode == "YOLO3":
        yolo = YOLO()
    elif detection_mode == "CENTERNET":
        centernet = CenterNet()

    # appear记录每个出现过的目标存在的帧数量,number记录所有出现过的目标(不重复)
    appear = {}
    number = 0

    while True:
        ret, frame = video_capture.read()
        if ret is not True:
            break
        frame = cv2.resize(frame, size)
        # 记录每一帧开始处理的时间
        start_time = time.time()
        if detection_mode == "SSD":
            image = frame
            classes, scores, bboxes = ssd.process_image(image)
            # 获得检测到的每个目标的左上角和右下角坐标
            result = np.array(
                detect_and_visualization_image.plt_bboxes(
                    image, classes, scores, bboxes))
            rbboxes = []
            for object in result:
                rbboxes.append([object[0], object[1], object[2], object[3]])

        elif detection_mode == "YOLO3":
            image = Image.fromarray(frame[..., ::-1])
            # bboxes为[x,y,w,h]形式坐标,score为目标分数,rbboxes为左上角+右下角坐标形式
            bboxes, scores, rbboxes = yolo.detect_image(image)
            result = []
            for box, score in zip(rbboxes, scores):
                # 使用目标左上角和右下角坐标用于追踪,注意图像的左上角为原点,x轴向右为正,y轴向下为正
                ymin, xmin, ymax, xmax = box
                xmin, ymin = max(0,
                                 np.floor(xmin + 0.5).astype('int32')), max(
                                     0,
                                     np.floor(ymin + 0.5).astype('int32'))
                xmax, ymax = min(image.size[0],
                                 np.floor(xmax + 0.5).astype('int32')), min(
                                     image.size[1],
                                     np.floor(ymax + 0.5).astype('int32'))
                result.append([xmin, ymin, xmax, ymax, score])
            result = np.array(result)
        elif detection_mode == "CENTERNET":
            image = frame
            # 这里的boxes_results是左上角和右下角坐标
            rbboxes, scores, classes = centernet.detect_image(image)
            result = []
            for i in range(len(rbboxes)):
                result.append([
                    rbboxes[i][0], rbboxes[i][1], rbboxes[i][2], rbboxes[i][3],
                    scores[i]
                ])
            result = np.array(result)

        if len(result) != 0:
            # 调用目标检测结果
            det = result[:, 0:5]
        else:
            det = result
        # 调用sort进行数据关联追踪
        trackers = mot_tracker.update(det)
        for object in trackers:
            xmin, ymin, xmax, ymax, index = int(object[0]), int(
                object[1]), int(object[2]), int(object[3]), int(object[4])
            color = (int(colours[index % 300, 0]), int(colours[index % 300,
                                                               1]),
                     int(colours[index % 300, 2]))
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
            cv2.putText(frame, str(index), (xmin, ymin), 0, 5e-3 * 200, color,
                        2)
            if index in appear.keys():
                appear[index] += 1
            else:
                number += 1
                appear[index] = 1

        show_fps = 1. / (time.time() - start_time)
        cv2.putText(frame,
                    text="FPS: " + str(int(show_fps)),
                    org=(3, 15),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.50,
                    color=(0, 255, 0),
                    thickness=2)
        cv2.putText(frame,
                    text="number: " + str(number),
                    org=(3, 30),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.50,
                    color=(0, 255, 0),
                    thickness=2)
        cv2.imshow('result', frame)

        if write_video_flag:
            # 保存视频每一帧
            output_video.write(frame)
            # 更新视频帧编号
            frame_index = frame_index + 1
            # detection.txt写入下一帧的编号
            object_list_file.write(str(frame_index) + ' ')
            # 写入每一帧探测到的目标位置,即目标狂的左上角和右下角坐标
            if len(rbboxes) != 0:
                for i in range(0, len(rbboxes)):
                    object_list_file.write(
                        str(rbboxes[i][0]) + ' ' + str(rbboxes[i][1]) + ' ' +
                        str(rbboxes[i][2]) + ' ' + str(rbboxes[i][3]) + ' ')
            object_list_file.write('\n')

        # 按q可退出
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if write_video_flag:
        output_video.release()
        object_list_file.close()
    cv2.destroyAllWindows()
예제 #21
0
def camera(self):
    file = self.FLAGS.demo
    SaveVideo = self.FLAGS.saveVideo

    if self.FLAGS.track:
        if self.FLAGS.tracker == "deep_sort":
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker
            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)
            tracker = Tracker(metric)
            encoder = generate_detections.create_box_encoder(
                os.path.abspath(
                    "deep_sort/resources/networks/mars-small128.ckpt-68577"))
        elif self.FLAGS.tracker == "sort":
            from sort.sort import Sort
            encoder = None
            tracker = Sort()
    if self.FLAGS.BK_MOG and self.FLAGS.track:
        fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()

    if file == 'camera':
        file = 0
    else:
        assert os.path.isfile(file), \
        'file {} does not exist'.format(file)

    camera = skvideo.io.VideoCapture(file)

    if file == 0:
        self.say('Press [ESC] to quit video')

    assert camera.isOpened(), \
    'Cannot capture source'

    if self.FLAGS.csv:
        f = open('{}.csv'.format(file), 'w')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h'])
        f.flush()
    else:
        f = None
        writer = None
    if file == 0:  #camera window
        cv2.namedWindow('', 0)
        _, frame = camera.read()
        height, width, _ = frame.shape
        cv2.resizeWindow('', width, height)
    else:
        _, frame = camera.read()
        height, width, _ = frame.shape

    if SaveVideo:
        if file == 0:  #camera window
            fps = 1 / self._get_fps(frame)
            if fps < 1:
                fps = 1
        else:
            fps = get_fps_rate(file)

        output_file = 'output_{}'.format(file)
        if os.path.exists(output_file):
            os.remove(output_file)

        videoWriter = skvideo.io.VideoWriter(output_file,
                                             fps=fps,
                                             frameSize=(width, height))
        videoWriter.open()

    # buffers for demo in batch
    buffer_inp = list()
    buffer_pre = list()

    elapsed = 0
    start = timer()
    self.say('Press [ESC] to quit demo')
    #postprocessed = []
    # Loop through frames
    n = 0
    while camera.isOpened():
        elapsed += 1
        _, frame = camera.read()
        if frame is None:
            print('\nEnd of Video')
            break
        if self.FLAGS.skip != n:
            n += 1
            continue
        n = 0
        if self.FLAGS.BK_MOG and self.FLAGS.track:
            fgmask = fgbg.apply(frame)
        else:
            fgmask = None
        preprocessed = self.framework.preprocess(frame)
        buffer_inp.append(frame)
        buffer_pre.append(preprocessed)
        # Only process and imshow when queue is full
        if elapsed % self.FLAGS.queue == 0:
            feed_dict = {self.inp: buffer_pre}
            net_out = self.sess.run(self.out, feed_dict)
            for img, single_out in zip(buffer_inp, net_out):
                if not self.FLAGS.track:
                    postprocessed = self.framework.postprocess(single_out,
                                                               img,
                                                               save=False)
                else:
                    postprocessed = self.framework.postprocess(
                        single_out,
                        img,
                        frame_id=elapsed,
                        csv_file=f,
                        csv=writer,
                        mask=fgmask,
                        encoder=encoder,
                        tracker=tracker,
                        save=False)
                if SaveVideo:
                    videoWriter.write(postprocessed)

            # Clear Buffers
            buffer_inp = list()
            buffer_pre = list()

        if elapsed % 5 == 0:
            sys.stdout.write('\r')
            sys.stdout.write('{0:3.3f} FPS'.format(elapsed /
                                                   (timer() - start)))
            sys.stdout.flush()

    sys.stdout.write('\n')
    if SaveVideo:
        videoWriter.release()
    if self.FLAGS.csv:
        f.close()
    camera.release()
예제 #22
0
    w = videoFile.get(cv2.CAP_PROP_FRAME_WIDTH)
    h = videoFile.get(cv2.CAP_PROP_FRAME_HEIGHT)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter('cut_4_output.mp4', fourcc, 15.0, (int(w), int(h)))
    #fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()

    #store the position of bounding box
    f = open('{}.csv'.format(videoFilePath), 'w')
    writer = csv.writer(f, delimiter=',')
    writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h'])
    f.flush()

    # loading deep_sort/sort tracker
    encoder = None
    tracker = Sort()
    # metric = nn_matching.NearestNeighborDistanceMetric("cosine", 0.2, 100)
    # tracker = Tracker(metric)
    # encoder = generate_detections.create_box_encoder("/Users/deanzhang/Desktop/learnable.ai_project/tf-faster-rcnn/tools/deep_sort/resources/networks/mars-small128.ckpt-68577")

    frame_id = 0
    while True:
        frame_id += 1
        ret, image = videoFile.read()
        im = demo_video(sess, net, image, f, writer, frame_id, encoder,
                        tracker)
        out.write(im)

    videoFile.release()
    out.release()
    cv2.destroyAllWindows()
예제 #23
0
def camera(self):
    file = self.FLAGS.demo
    SaveVideo = self.FLAGS.saveVideo

    if self.FLAGS.track:
        if self.FLAGS.tracker == "deep_sort":
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker
            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)
            tracker = Tracker(metric)
            encoder = generate_detections.create_box_encoder(
                os.path.abspath(
                    "deep_sort/resources/networks/mars-small128.ckpt-68577"))
        elif self.FLAGS.tracker == "sort":
            from sort.sort import Sort
            encoder = None
            tracker = Sort()
    if self.FLAGS.BK_MOG and self.FLAGS.track:
        fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()

    if file == 'camera':
        file = 0
    else:
        assert os.path.isfile(file), \
        'file {} does not exist'.format(file)

    camera = cv2.VideoCapture(file)

    if file == 0:
        self.say('Press [ESC] to quit video')

    assert camera.isOpened(), \
    'Cannot capture source'

    if self.FLAGS.csv:
        f = open('{}.csv'.format(file), 'w')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h'])
        f.flush()
    else:
        f = None
        writer = None
    if file == 0:  #camera window
        cv2.namedWindow(self.FLAGS.object_id, 0)
        _, frame = camera.read()
        height, width, _ = frame.shape
        cv2.resizeWindow(self.FLAGS.object_id, width * 0.5, height * 0.5)
    else:
        _, frame = camera.read()
        height, width, _ = frame.shape

    if self.FLAGS.push_stream:
        ffmpeg_pipe(self, file, width, height)

    if SaveVideo:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        if file == 0:  #camera window
            fps = 1 / self._get_fps(frame)
            if fps < 1:
                fps = 1
        else:
            fps = round(camera.get(cv2.CAP_PROP_FPS))
        videoWriter = cv2.VideoWriter('output_{}'.format(file), fourcc, fps,
                                      (width, height))

    # buffers for demo in batch
    buffer_inp = list()
    buffer_pre = list()

    elapsed = 0
    start = timer()
    self.say('Press [ESC] to quit demo')
    #postprocessed = []
    # Loop through frames
    n = 0
    while camera.isOpened():
        if self.FLAGS.process_status == 1:  # esc
            print("gongjia: Stoped! ")
            break
        if self.FLAGS.process_status == 2:
            #print("gongjia: Paused! ")
            continue
        elapsed += 1
        _, frame = camera.read()
        if frame is None:
            print('\nEnd of Video')
            break
        if self.FLAGS.skip != n:
            n += 1
            continue
        n = 0
        if self.FLAGS.BK_MOG and self.FLAGS.track:
            fgmask = fgbg.apply(frame)
        else:
            fgmask = None
        preprocessed = self.framework.preprocess(frame)
        buffer_inp.append(frame)
        buffer_pre.append(preprocessed)
        # Only process and imshow when queue is full
        if elapsed % self.FLAGS.queue == 0:
            feed_dict = {self.inp: buffer_pre}
            net_out = self.sess.run(self.out, feed_dict)
            for img, single_out in zip(buffer_inp, net_out):
                if not self.FLAGS.track:
                    postprocessed = self.framework.postprocess(single_out, img)
                else:
                    postprocessed = self.framework.postprocess(
                        single_out,
                        img,
                        frame_id=elapsed,
                        csv_file=f,
                        csv=writer,
                        mask=fgmask,
                        encoder=encoder,
                        tracker=tracker)
                if SaveVideo:
                    videoWriter.write(postprocessed)
                if self.FLAGS.display:
                    cv2.imshow(self.FLAGS.object_id, postprocessed)
                if self.FLAGS.push_stream:
                    #im = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                    self.pipe.stdin.write(postprocessed.tobytes())

            # Clear Buffers
            buffer_inp = list()
            buffer_pre = list()

        if elapsed % 5 == 0:
            sys.stdout.write('\r')
            sys.stdout.write(' {0:3.3f} FPS '.format(elapsed /
                                                     (timer() - start)))
            sys.stdout.flush()
        if self.FLAGS.display:
            choice = cv2.waitKey(1)
            if choice == 27:
                break

    cv2.imwrite(
        '{}_{}_counter.jpg'.format(self.FLAGS.demo, self.FLAGS.object_id),
        postprocessed)
    sys.stdout.write('\n')
    if SaveVideo:
        videoWriter.release()
    if self.FLAGS.csv:
        f.close()
    camera.release()
    if self.FLAGS.display:
        cv2.destroyAllWindows()
    if self.FLAGS.push_stream:
        self.pipe.stdin.close()
        self.pipe.wait()
예제 #24
0
class interpreter(object):
    def __init__(self, args, data_set, ENABLE_TRACKING=None):
        self.tracker = Sort()
        self.args = args
        self.nms_thres = args.nms
        self.triplet_nms_thres = args.triplet_nms
        self.obj_thres = args.obj_thres
        self.triplet_thres = args.triplet_thres
        self.tobefiltered_objects = [
            26, 53, 134, 247, 179, 74, 226, 135, 145, 300, 253, 95, 11, 102, 87
        ]
        # 26: wheel, 53: backpack, 143:light, 247:camera, 179:board
        # 74:shoe, 226:chair, 135:shelf, 145:button, 300:cake, 253:knob, 95:wall, 11:door, 102:mirror,87:ceiling
        if ENABLE_TRACKING == None:
            self.ENABLE_TRACKING = False if self.args.dataset == 'visual_genome' else True
        else:
            self.ENABLE_TRACKING = ENABLE_TRACKING
        if self.ENABLE_TRACKING and self.args.path_opt.split(
                '/')[-1] == 'VG-DR-Net.yaml':
            self.tobefiltered_predicates = [0, 6, 10, 18, 19, 20, 22, 23, 24]
            # 0:backgrounds, 6:eat,10:wear, 18:ride, 19:watch, 20:play, 22:enjoy, 23:read, 24:cut

        elif self.ENABLE_TRACKING and self.args.path_opt.split(
                '/')[-1] == 'VG-MSDN.yaml':
            self.tobefiltered_predicates = [12, 18, 27, 28, 30, 31, 32, 35]
        else:
            self.tobefiltered_predicates = []

        # Params for Statistics Based Scene Graph Inference
        self.relation_statistics = prior.load_obj("relation_prior_prob")
        self.joint_probability = prior.load_obj("object_prior_prob")
        self.spurious_rel_thres = 0.07
        self.rel_infer_thres = 0.9
        self.obj_infer_thres = 0.001
        self.data_set = data_set
        self.detected_obj_set = set()
        self.fasttext = torchtext.vocab.FastText()
        self.word_vecs, self.word_itos, self.word_stoi = self.prepare_wordvecs(
            num_vocabs=400, ignores=VG_DR_NET_OBJ_IGNORES)
        self.pred_stoi = {
            self.data_set.predicate_classes[i]: i
            for i in range(len(self.data_set.predicate_classes))
        }

    # p(x, y)
    def cal_p_xy_joint(self, x_ind, y_ind):
        p_xy = self.joint_probability[x_ind, y_ind] / np.sum(
            self.joint_probability)
        return p_xy

    # p(x|y)
    def cal_p_x_given_y(self, x_ind, y_ind):
        single_prob = np.sum(self.joint_probability, axis=1)
        p_y = single_prob[y_ind]
        p_xy = self.joint_probability[x_ind, y_ind]
        return p_xy / p_y

    # p(x|y,z) approximated
    def cal_p_x_given_yz(self, x_ind, y_ind, z_ind):
        p_x_given_y = self.cal_p_x_given_y(x_ind, y_ind)
        p_x_given_z = self.cal_p_x_given_y(x_ind, z_ind)
        return min(p_x_given_y, p_x_given_z)

    # True if p(x, z)^2 < p(x,y)*p(y,z)
    def check_prob_condition(self, x_ind, y_ind, z_ind):
        p_xz = self.cal_p_xy_joint(x_ind, z_ind)
        p_xy = self.cal_p_xy_joint(x_ind, y_ind)
        p_yz = self.cal_p_xy_joint(y_ind, z_ind)
        return p_xz**2 < p_xy * p_yz

    def prepare_wordvecs(self, num_vocabs=400, ignores=VG_DR_NET_OBJ_IGNORES):
        word_inds = range(num_vocabs)
        word_inds = [x for x in word_inds if x not in ignores]
        word_txts = [self.data_set.object_classes[x] for x in word_inds]
        self.word_ind2vec = {
            ind: self.fasttext.vectors[self.fasttext.stoi[x]]
            for ind, x in zip(word_inds, word_txts)
        }

        word_vecs = torch.stack([
            self.fasttext.vectors[self.fasttext.stoi[x]] for x in word_txts
        ]).cuda()
        word_itos = {
            i: self.data_set.object_classes[x]
            for i, x in enumerate(word_inds)
        }
        word_stoi = {
            self.data_set.object_classes[x]: i
            for i, x in enumerate(word_inds)
        }
        return word_vecs, word_itos, word_stoi

    def update_obj_set(self, obj_inds):
        for obj_ind in obj_inds[:, 0]:
            self.detected_obj_set.add(obj_ind)

    def find_disconnected_pairs(self, obj_inds, relationships):
        connected_pairs = set(
            tuple(x) for x in relationships[:, :2].astype(int).tolist())
        disconnected_pairs = set()
        for i in range(len(obj_inds)):
            for j in range(len(obj_inds)):
                if i == j: continue
                if (i, j) in connected_pairs or (j, i) in connected_pairs:
                    continue
                disconnected_pairs.add((i, j))
        return disconnected_pairs

    def missing_relation_inference(self, obj_inds, obj_boxes,
                                   disconnected_pairs):
        infered_relation = set()
        #print('discon:',disconnected_pairs)
        for i in range(len(disconnected_pairs)):
            pair = disconnected_pairs.pop()
            node1_box, node2_box = obj_boxes[pair[0]], obj_boxes[pair[1]]
            distance = self.distance_between_boxes(
                np.stack([node1_box, node2_box], axis=0))[0, 1]
            pair_txt = [
                self.data_set.object_classes[obj_inds[pair[0]][0]],
                self.data_set.object_classes[obj_inds[pair[1]][0]]
            ]
            candidate, prob, direction = prior.most_probable_relation_for_unpaired(
                pair_txt, self.relation_statistics, int(distance))
            if candidate != None and prob > self.rel_infer_thres:
                if not direction: pair = (pair[1], pair[0])
                infered_relation.add(
                    (pair[0], pair[1], self.pred_stoi[candidate], prob))
                pair_txt = [
                    self.data_set.object_classes[obj_inds[pair[0]][0]],
                    self.data_set.object_classes[obj_inds[pair[1]][0]]
                ]
                #print('dsfsfd:',pair_txt[0],pair_txt[1],candidate,prob)
        infered_relation = np.array(list(infered_relation)).reshape(-1, 4)
        #print(infered_relation)
        return infered_relation

    def missing_object_inference(self, obj_inds, disconnected_pairs):
        detected_obj_list = np.array(list(self.detected_obj_set))
        candidate_searchspace = [
            self.word_ind2vec[x] for x in detected_obj_list
        ]
        candidate_searchspace = torch.stack(candidate_searchspace,
                                            dim=0).cuda()
        search_size = candidate_searchspace.shape[0]
        infered_obj_list = []

        for i in range(len(disconnected_pairs)):
            pair = disconnected_pairs.pop()
            ''' wordvec based candidate objects filtering '''
            #print(pair)
            sbj_vec = self.word_ind2vec[obj_inds[pair[0]][0]].cuda()
            obj_vec = self.word_ind2vec[obj_inds[pair[1]][0]].cuda()
            sim_sbj_obj = cosine_similarity(sbj_vec, obj_vec, dim=0)

            sbj_vec = sbj_vec.expand_as(candidate_searchspace)
            obj_vec = obj_vec.expand_as(candidate_searchspace)
            sim_cans_sbj = cosine_similarity(candidate_searchspace,
                                             sbj_vec,
                                             dim=1)
            sim_cans_obj = cosine_similarity(candidate_searchspace,
                                             obj_vec,
                                             dim=1)
            sim_sbj_obj = sim_sbj_obj.expand_as(sim_cans_obj)
            keep = (sim_cans_sbj + sim_cans_obj >
                    2 * sim_sbj_obj).nonzero().view(-1).cpu().numpy()
            #print(keep)
            #print(detected_obj_list)
            candidate_obj_list = detected_obj_list[keep]
            if len(candidate_obj_list) == 0: continue
            ''' statistics based candidate objects filtering '''
            keep = []
            for i, obj_ind in enumerate(candidate_obj_list):
                if self.check_prob_condition(obj_inds[pair[0]][0], obj_ind,
                                             obj_inds[pair[1]][0]):
                    keep.append(i)
            candidate_obj_list = candidate_obj_list[keep]
            if len(candidate_obj_list) == 0: continue
            ''' choose a candidate with best score above threshold'''
            probs = [
                self.cal_p_x_given_yz(candidate, obj_inds[pair[0]][0],
                                      obj_inds[pair[1]][0])
                for candidate in candidate_obj_list
            ]
            chosen_obj = candidate_obj_list[(np.array(probs)).argmax()]
            infered_obj_list.append(chosen_obj)
            #print(max(probs),self.data_set.object_classes[obj_inds[pair[0]][0]],
            #      self.data_set.object_classes[chosen_obj],
            #      self.data_set.object_classes[obj_inds[pair[1]][0]])

    def get_box_centers(self, boxes):
        # Define bounding box info
        center_x = (boxes[:, 0] + boxes[:, 2]) / 2
        center_y = (boxes[:, 1] + boxes[:, 3]) / 2
        centers = np.concatenate(
            [center_x.reshape(-1, 1),
             center_y.reshape(-1, 1)], axis=1)
        return centers

    def distance_between_boxes(self, boxes):
        '''
        returns all possible distances between boxes

        :param boxes:
        :return: dist: distance between boxes[1] and boxes[2] ==> dist[1,2]
        '''
        centers = self.get_box_centers(boxes)
        centers_axis1 = np.repeat(centers, centers.shape[0],
                                  axis=0).reshape(-1, 2)
        centers_axis2 = np.stack([centers for _ in range(centers.shape[0])
                                  ]).reshape(-1, 2)
        dist = np.linalg.norm(centers_axis1 - centers_axis2,
                              axis=1).reshape(-1, centers.shape[0])
        return dist

    def spurious_relation_rejection(self, obj_boxes, obj_cls, relationships):
        if self.args.disable_spurious: return range(len(relationships))
        subject_inds = obj_cls[relationships.astype(int)[:, 0]][:, 0]
        pred_inds = relationships.astype(int)[:, 2]
        object_inds = obj_cls[relationships.astype(int)[:, 1]][:, 0]

        subject_boxes = obj_boxes[relationships.astype(int)[:, 0]]
        object_boxes = obj_boxes[relationships.astype(int)[:, 1]]

        keep = []
        for i, (sbj_ind, pred_ind, obj_ind, sbj_box, obj_box) in enumerate(
                zip(subject_inds, pred_inds, object_inds, subject_boxes,
                    object_boxes)):
            relation_txt = [
                self.data_set.object_classes[sbj_ind],
                self.data_set.predicate_classes[pred_ind],
                self.data_set.object_classes[obj_ind]
            ]
            distance = self.distance_between_boxes(
                np.stack([sbj_box, obj_box], axis=0))[0, 1]
            prob = prior.triplet_prob_from_statistics(relation_txt,
                                                      self.relation_statistics,
                                                      int(distance))
            print('prob: {prob:3.2f}     {sbj:15}{rel:15}{obj:15}'.format(
                prob=prob,
                sbj=relation_txt[0],
                rel=relation_txt[1],
                obj=relation_txt[2]))

            if prob > self.spurious_rel_thres: keep.append(i)

        return keep

    def interpret_graph(self, object_result, predicate_result, im_info):
        cls_prob_object, bbox_object, object_rois, reranked_score = object_result[:
                                                                                  4]
        cls_prob_predicate, mat_phrase = predicate_result[:2]
        region_rois_num = predicate_result[2]

        obj_boxes, obj_scores, obj_cls, \
        subject_inds, object_inds, \
        subject_boxes, object_boxes, \
        subject_IDs, object_IDs, \
        predicate_inds, triplet_scores, relationships = \
            self.interpret_graph_(cls_prob_object, bbox_object, object_rois,
                                    cls_prob_predicate, mat_phrase, im_info,
                                    reranked_score)
        ''' missing object inference '''
        # self.update_obj_set(obj_cls)
        # disconnected_pairs = self.find_disconnected_pairs(obj_cls, relationships)
        # self.missing_object_inference(obj_cls,disconnected_pairs)
        ''' missing object infernce (end) '''
        ''' missing relation inference '''
        # infered_relations = self.missing_relation_inference(obj_cls,obj_boxes,disconnected_pairs)
        # print('size:',relationships.shape,infered_relations.shape)
        #
        # relationships = np.concatenate([relationships,infered_relations],axis=0)
        #
        # predicate_inds = relationships[:, 2].astype(int)
        # subject_boxes = obj_boxes[relationships[:, 0].astype(int)]
        # object_boxes = obj_boxes[relationships[:, 1].astype(int)]
        # subject_IDs = np.array([int(obj_boxes[int(relation[0])][4]) for relation in relationships])
        # object_IDs = np.array([int(obj_boxes[int(relation[1])][4]) for relation in relationships])
        # subject_inds = obj_cls[relationships[:, 0].astype(int)]
        # object_inds = obj_cls[relationships[:, 1].astype(int)]
        # subject_scores = [obj_scores[int(relation[0])] for relation in relationships]
        # pred_scores = [relation[3] / obj_scores[int(relation[0])] / obj_scores[int(relation[1])] for relation in
        #                relationships]
        # object_scores = [obj_scores[int(relation[1])] for relation in relationships]
        # triplet_scores = np.array(zip(subject_scores, pred_scores, object_scores))
        ''' missing relation inference (end) '''

        keep = self.spurious_relation_rejection(obj_boxes, obj_cls,
                                                relationships)

        return obj_boxes, obj_scores, obj_cls, \
               subject_inds[keep], object_inds[keep], \
               subject_boxes[keep], object_boxes[keep], \
               subject_IDs[keep], object_IDs[keep], \
               predicate_inds[keep], triplet_scores[keep], relationships[keep]

    def interpret_graph_(self,
                         cls_prob_object,
                         bbox_object,
                         object_rois,
                         cls_prob_predicate,
                         mat_phrase,
                         im_info,
                         reranked_score=None):

        obj_boxes, obj_scores, obj_cls, subject_inds, object_inds, \
        subject_boxes, object_boxes, predicate_inds, \
        sub_assignment, obj_assignment, total_score = \
            self.interpret_relationships(cls_prob_object, bbox_object, object_rois,
                                         cls_prob_predicate, mat_phrase, im_info,
                                         nms=self.nms_thres, topk_pred=2, topk_obj=3,
                                         use_gt_boxes=False,
                                         triplet_nms=self.triplet_nms_thres,
                                         reranked_score=reranked_score)

        obj_boxes, obj_scores, obj_cls, \
        subject_inds, object_inds, \
        subject_boxes, object_boxes, \
        subject_IDs, object_IDs, \
        predicate_inds, triplet_scores, relationships = self.filter_and_tracking(obj_boxes, obj_scores, obj_cls,
                                                                                     subject_inds, object_inds,
                                                                                     subject_boxes, object_boxes,
                                                                                     predicate_inds,
                                                                                     sub_assignment, obj_assignment,
                                                                                     total_score)

        return obj_boxes, obj_scores, obj_cls, \
        subject_inds, object_inds, \
        subject_boxes, object_boxes, \
        subject_IDs, object_IDs, \
        predicate_inds, triplet_scores, relationships

    def interpret_relationships(self,
                                cls_prob,
                                bbox_pred,
                                rois,
                                cls_prob_predicate,
                                mat_phrase,
                                im_info,
                                nms=-1.,
                                clip=True,
                                min_score=0.01,
                                top_N=100,
                                use_gt_boxes=False,
                                triplet_nms=-1.,
                                topk_pred=2,
                                topk_obj=3,
                                reranked_score=None):

        scores, inds = cls_prob[:, 1:].data.topk(k=topk_obj, dim=1)
        if reranked_score is not None:
            if isinstance(reranked_score, Variable):
                reranked_score = reranked_score.data
            scores *= reranked_score
        inds += 1
        scores, inds = scores.cpu().numpy(), inds.cpu().numpy()
        # filter out objects with wrong class
        for i, ind in enumerate(inds):
            if ind[0] in self.tobefiltered_objects:
                scores[i].fill(0)

        predicate_scores, predicate_inds = cls_prob_predicate[:, 1:].data.topk(
            dim=1, k=topk_pred)
        predicate_inds += 1
        predicate_scores, predicate_inds = predicate_scores.cpu().numpy(
        ).reshape(-1), predicate_inds.cpu().numpy().reshape(-1)

        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data.cpu().numpy()
        box_deltas = np.asarray([
            box_deltas[i, (inds[i][0] * 4):(inds[i][0] * 4 + 4)]
            for i in range(len(inds))
        ],
                                dtype=np.float)
        keep = range(scores.shape[0])
        if use_gt_boxes:
            triplet_nms = -1.
            pred_boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2]
        else:
            pred_boxes = bbox_transform_inv_hdn(
                rois.data.cpu().numpy()[:, 1:5], box_deltas) / im_info[0][2]
            pred_boxes = clip_boxes(pred_boxes, im_info[0][:2] / im_info[0][2])

            # nms
            if nms > 0. and pred_boxes.shape[0] > 0:
                assert nms < 1., 'Wrong nms parameters'
                pred_boxes, scores, inds, keep = nms_detections(pred_boxes,
                                                                scores,
                                                                nms,
                                                                inds=inds)

        sub_list = np.array([], dtype=int)
        obj_list = np.array([], dtype=int)
        pred_list = np.array([], dtype=int)

        # mapping the object id
        mapping = np.ones(cls_prob.size(0), dtype=np.int64) * -1
        mapping[keep] = range(len(keep))

        sub_list = mapping[mat_phrase[:, 0]]
        obj_list = mapping[mat_phrase[:, 1]]
        pred_remain = np.logical_and(sub_list >= 0, obj_list >= 0)
        pred_list = np.where(pred_remain)[0]
        sub_list = sub_list[pred_remain]
        obj_list = obj_list[pred_remain]

        # expand the sub/obj and pred list to k-column
        pred_list = np.vstack([
            pred_list * topk_pred + i for i in range(topk_pred)
        ]).transpose().reshape(-1)
        sub_list = np.vstack([sub_list for i in range(topk_pred)
                              ]).transpose().reshape(-1)
        obj_list = np.vstack([obj_list for i in range(topk_pred)
                              ]).transpose().reshape(-1)

        if use_gt_boxes:
            total_scores = predicate_scores[pred_list]
        else:
            total_scores = predicate_scores[pred_list] * scores[
                sub_list][:, 0] * scores[obj_list][:, 0]

        top_N_list = total_scores.argsort()[::-1][:10000]
        total_scores = total_scores[top_N_list]
        pred_ids = predicate_inds[
            pred_list[top_N_list]]  # category of predicates
        sub_assignment = sub_list[top_N_list]  # subjects assignments
        obj_assignment = obj_list[top_N_list]  # objects assignments
        sub_ids = inds[:, 0][sub_assignment]  # category of subjects
        obj_ids = inds[:, 0][obj_assignment]  # category of objects
        sub_boxes = pred_boxes[sub_assignment]  # boxes of subjects
        obj_boxes = pred_boxes[obj_assignment]  # boxes of objects

        if triplet_nms > 0.:
            sub_ids, obj_ids, pred_ids, sub_boxes, obj_boxes, keep = triplet_nms_py(
                sub_ids, obj_ids, pred_ids, sub_boxes, obj_boxes, triplet_nms)
            sub_assignment = sub_assignment[keep]
            obj_assignment = obj_assignment[keep]
            total_scores = total_scores[keep]
        if len(sub_list) == 0:
            print('No Relatinoship remains')
            # pdb.set_trace()

        return pred_boxes, scores, inds, sub_ids, obj_ids, sub_boxes, obj_boxes, pred_ids, sub_assignment, obj_assignment, total_scores

    def filter_and_tracking(self, obj_boxes, obj_scores, obj_cls, subject_inds,
                            object_inds, subject_boxes, object_boxes,
                            predicate_inds, sub_assignment, obj_assignment,
                            total_score):

        relationships = np.array(
            zip(sub_assignment, obj_assignment, predicate_inds, total_score))

        # filter out bboxes who has low obj_score
        keep_obj = np.where(obj_scores[:, 0] >= self.obj_thres)[0]
        if keep_obj.size == 0:
            print("no object detected ...")
            keep_obj = [0]
        cutline_idx = max(keep_obj)
        obj_scores = obj_scores[:cutline_idx + 1]
        obj_boxes = obj_boxes[:cutline_idx + 1]
        obj_cls = obj_cls[:cutline_idx + 1]

        # filter out triplets whose obj/sbj have low obj_score
        if relationships.size > 0:
            keep_sub_assign = np.where(relationships[:, 0] <= cutline_idx)[0]
            relationships = relationships[keep_sub_assign]
        if relationships.size > 0:
            keep_obj_assign = np.where(relationships[:, 1] <= cutline_idx)[0]
            relationships = relationships[keep_obj_assign]

        # filter out triplets who have low total_score
        if relationships.size > 0:
            keep_rel = np.where(relationships[:, 3] >= self.triplet_thres)[
                0]  # MSDN:0.02, DR-NET:0.03
            # if keep_rel.size > 0:
            #     cutline_idx = max(keep_rel)
            #     relationships = relationships[:cutline_idx + 1]
            relationships = relationships[keep_rel]

        # filter out triplets whose sub equal obj
        if relationships.size > 0:

            #keep_rel = np.where(relationships[:, 0] != relationships[:, 1])[0]
            #relationships = relationships[keep_rel]
            keep_rel = []
            for i, relation in enumerate(relationships):
                if relation[0] != relation[1]:
                    keep_rel.append(i)
            keep_rel = np.array(keep_rel).astype(int)
            relationships = relationships[keep_rel]
            # print('filter1')
            # print(relationships.astype(int))

        # filter out triplets whose predicate is related to human behavior.
        if relationships.size > 0:
            keep_rel = []
            for i, relation in enumerate(relationships):
                if int(relation[2]) not in self.tobefiltered_predicates:
                    keep_rel.append(i)
            keep_rel = np.array(keep_rel).astype(int)
            #print('keep_rel:',keep_rel)
            relationships = relationships[keep_rel]
            # print('filter2')
            # print(relationships.astype(int))

        # Object tracking
        # Filter out all un-tracked objects and triplets
        if self.ENABLE_TRACKING:
            print(obj_boxes.shape)
            tracking_input = np.concatenate(
                (obj_boxes, obj_scores[:, 0].reshape(len(obj_scores), 1)),
                axis=1)
            bboxes_and_uniqueIDs = self.tracker.update(tracking_input)
            keep = filter_untracted(bboxes_and_uniqueIDs, obj_boxes)
            print(relationships.shape)

            # filter out triplets whose obj/sbj is untracked.
            if relationships.size > 0:
                keep_sub_assign = [
                    np.where(relationships[:, 0] == keep_idx)
                    for keep_idx in keep
                ]
                if len(keep_sub_assign) > 0:
                    keep_sub_assign = np.concatenate(keep_sub_assign,
                                                     axis=1).flatten()
                    relationships = relationships[keep_sub_assign]
                else:
                    relationships = relationships[np.array([]).astype(int)]
            if relationships.size > 0:
                keep_obj_assign = [
                    np.where(relationships[:, 1] == keep_idx)
                    for keep_idx in keep
                ]
                if len(keep_obj_assign) > 0:
                    keep_obj_assign = np.concatenate(keep_obj_assign,
                                                     axis=1).flatten()
                    relationships = relationships[keep_obj_assign]
                else:
                    relationships = relationships[np.array([]).astype(int)]
            #
            print('filter3')
            print(relationships.astype(int))
            print(keep)
            rel = relationships.copy()
            for i, k in enumerate(keep):
                relationships[:, :2][rel[:, :2] == k] = i

            sorted = relationships[:, 3].argsort()[::-1]
            relationships = relationships[sorted]
            #print('filter4')
            #print(relationships[:,3])

            subject_inds = obj_cls[relationships[:, 0].astype(int)]
            object_inds = obj_cls[relationships[:, 1].astype(int)]

            obj_boxes = np.concatenate(
                [obj_boxes, np.zeros([obj_boxes.shape[0], 1])], axis=1)
            for i, keep_idx in enumerate(keep):
                obj_boxes[keep_idx] = bboxes_and_uniqueIDs[i]
            obj_scores = obj_scores[keep]
            obj_cls = obj_cls[keep]
            obj_boxes = obj_boxes[keep]

            #obj_boxes = bboxes_and_uniqueIDs

            print(obj_scores.shape)
            print(obj_cls.shape)
            print(obj_boxes.shape)
            print(relationships.shape)

        else:
            obj_boxes = np.concatenate(
                [obj_boxes, np.zeros([obj_boxes.shape[0], 1])], axis=1)
            for i in range(len(obj_boxes)):
                obj_boxes[i][4] = i
            subject_inds = obj_cls[relationships[:, 0].astype(int)]
            object_inds = obj_cls[relationships[:, 1].astype(int)]
            #subject_boxes = obj_boxes[relationships[:, 0].astype(int)]
            #object_boxes = obj_boxes[relationships[:, 1].astype(int)]
            #subject_IDs = subject_boxes[:, 4].astype(int)
            #object_IDs = object_boxes[:, 4].astype(int)

        predicate_inds = relationships[:, 2].astype(int)
        subject_boxes = obj_boxes[relationships[:, 0].astype(int)]
        object_boxes = obj_boxes[relationships[:, 1].astype(int)]
        subject_IDs = np.array([
            int(obj_boxes[int(relation[0])][4]) for relation in relationships
        ])
        object_IDs = np.array([
            int(obj_boxes[int(relation[1])][4]) for relation in relationships
        ])

        subject_scores = [
            obj_scores[int(relation[0])] for relation in relationships
        ]
        pred_scores = [
            relation[3] / obj_scores[int(relation[0])] /
            obj_scores[int(relation[1])] for relation in relationships
        ]
        object_scores = [
            obj_scores[int(relation[1])] for relation in relationships
        ]
        triplet_scores = np.array(
            zip(subject_scores, pred_scores, object_scores))

        #print(relationships)


        return obj_boxes, obj_scores, obj_cls, \
               subject_inds, object_inds, \
               subject_boxes, object_boxes, \
               subject_IDs, object_IDs, \
               predicate_inds, triplet_scores, relationships
예제 #25
0
def camera(self):
    file = self.FLAGS.demo
    SaveVideo = self.FLAGS.saveVideo

    if self.FLAGS.track:
        if self.FLAGS.tracker == "deep_sort":
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker
            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)
            tracker = Tracker(metric)
            encoder = generate_detections.create_box_encoder(
                os.path.abspath(
                    "deep_sort/resources/networks/mars-small128.ckpt-68577"))
        elif self.FLAGS.tracker == "sort":
            from sort.sort import Sort
            encoder = None
            tracker = Sort()
    if self.FLAGS.BK_MOG and self.FLAGS.track:
        fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()

    # if file == 'camera':
    #     file = 0
    # else:
    #     assert os.path.isfile(file), \
    #     'file {} does not exist'.format(file)

    camera1 = cv2.VideoCapture(file[0])
    camera2 = cv2.VideoCapture(file[1])
    camera3 = cv2.VideoCapture(file[2])

    # if file == 0:
    #     self.say('Press [ESC] to quit video')
    #
    # assert camera.isOpened(), \
    # 'Cannot capture source'

    if self.FLAGS.csv:
        f = open('{}.csv'.format(file), 'w')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h'])
        f.flush()
    else:
        f = None
        writer = None

    # if file == 0:#camera window
    #     cv2.namedWindow('', 0)
    #     _, frame = camera.read()
    #     height, width, _ = frame.shape
    #     cv2.resizeWindow('', width, height)
    # else:
    #     _, frame = camera.read()
    #     height, width, _ = frame.shape

    # if SaveVideo:
    #     fourcc = cv2.VideoWriter_fourcc(*'XVID')
    #     if file == 0:#camera window
    #       fps = 1 / self._get_fps(frame)
    #       if fps < 1:
    #         fps = 1
    #     else:
    #         fps = round(camera1.get(cv2.CAP_PROP_FPS))
    #     videoWriter = cv2.VideoWriter(
    #         'output_{}'.format(file), fourcc, fps, (width, height))

    # buffers for demo in batch
    buffer_inp = list()
    buffer_pre = list()

    elapsed = 0
    start = timer()
    self.say('Press [ESC] to quit demo')
    #postprocessed = []
    # Loop through frames
    n = 0
    while (camera1.isOpened() and camera2.isOpened() and camera3.isOpened()):
        elapsed += 1
        ret1, frame1 = camera1.read()
        ret2, frame2 = camera2.read()
        ret3, frame3 = camera3.read()
        #if(ret1 and ret2 and ret3):
        h1, w1 = frame1.shape[:2]
        vis = np.concatenate((frame2, frame1, frame3), axis=1)
        if self.FLAGS.skip != n:
            n += 1
            continue
        n = 0

        # while camera.isOpened():
        #     elapsed += 1
        #     _, frame = camera.read()
        #     if frame is None:
        #         print ('\nEnd of Video')
        #         break
        #     if self.FLAGS.skip != n :
        #         n+=1
        #         continue
        #     n = 0
        if self.FLAGS.BK_MOG and self.FLAGS.track:
            fgmask = fgbg.apply(vis)
        else:
            fgmask = None
        preprocessed = self.framework.preprocess(vis)
        buffer_inp.append(vis)
        buffer_pre.append(preprocessed)
        # Only process and imshow when queue is full
        if elapsed % self.FLAGS.queue == 0:
            feed_dict = {self.inp: buffer_pre}
            net_out = self.sess.run(self.out, feed_dict)
            for img, single_out in zip(buffer_inp, net_out):
                if not self.FLAGS.track:
                    postprocessed = self.framework.postprocess(single_out, img)
                else:
                    #print("else hi")
                    postprocessed = self.framework.postprocess(
                        single_out,
                        img,
                        frame_id=elapsed,
                        csv_file=f,
                        csv=writer,
                        mask=fgmask,
                        encoder=encoder,
                        tracker=tracker)
                if SaveVideo:
                    videoWriter.write(postprocessed)
                if self.FLAGS.display:
                    cv2.imshow('This is postprocessed', postprocessed)
            # Clear Buffers
            buffer_inp = list()
            buffer_pre = list()

        if elapsed % 5 == 0:
            sys.stdout.write('\r')
            sys.stdout.write('{0:3.3f} FPS'.format(elapsed /
                                                   (timer() - start)))
            sys.stdout.flush()
        if self.FLAGS.display:
            choice = cv2.waitKey(1)
            if choice == 27:
                break

    sys.stdout.write('\n')
    if SaveVideo:
        videoWriter.release()
    if self.FLAGS.csv:
        f.close()
    camera1.release()
    camera2.release()
    camera3.release()
    if self.FLAGS.display:
        cv2.destroyAllWindows()
예제 #26
0
def sort(yolo, args):

    images_input = True if os.path.isdir(args.input) else False
    if images_input:
        # get images list
        jpeg_files = glob.glob(os.path.join(args.input, '*.jpeg'))
        jpg_files = glob.glob(os.path.join(args.input, '*.jpg'))
        frame_capture = jpeg_files + jpg_files
        frame_capture.sort()
    else:
        # create video capture stream
        frame_capture = cv2.VideoCapture(0 if args.input ==
                                         '0' else args.input)
        if not frame_capture.isOpened():
            raise IOError("Couldn't open webcam or video")

    # create video save stream if needed
    save_output = True if args.output != "" else False
    if save_output:
        if images_input:
            raise ValueError("image folder input could be saved to video file")

        # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later
        # to convert it to x264 to reduce file size:
        # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4
        #
        #video_FourCC    = cv2.VideoWriter_fourcc(*'XVID') if args.input == '0' else int(frame_capture.get(cv2.CAP_PROP_FOURCC))
        video_FourCC = cv2.VideoWriter_fourcc(
            *'XVID') if args.input == '0' else cv2.VideoWriter_fourcc(*"mp4v")
        video_fps = frame_capture.get(cv2.CAP_PROP_FPS)
        video_size = (int(frame_capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
                      int(frame_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        out = cv2.VideoWriter(args.output, video_FourCC,
                              (5. if args.input == '0' else video_fps),
                              video_size)

    if args.tracking_classes_path:
        # load the object classes used in tracking if have, other class
        # from detector will be ignored
        tracking_class_names = get_classes(args.tracking_classes_path)
    else:
        tracking_class_names = None

    # create instance of the SORT tracker
    tracker = Sort(max_age=5, min_hits=3, iou_threshold=0.3)

    # alloc a set of queues to record motion trace
    # for each track id
    motion_traces = [deque(maxlen=30) for _ in range(9999)]
    total_obj_counter = []

    # initialize a list of colors to represent each possible class label
    np.random.seed(100)
    COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8")

    i = 0
    fps = 0.0
    while True:
        ret, frame = get_frame(frame_capture, i, images_input)
        if ret != True:
            break
        #time.sleep(0.2)
        i += 1

        start_time = time.time()
        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb

        # detect object from image
        _, out_boxes, out_classnames, out_scores = yolo.detect_image(image)
        # get tracking objects
        boxes, class_names, scores = get_tracking_object(out_boxes,
                                                         out_classnames,
                                                         out_scores,
                                                         tracking_class_names,
                                                         convert_box=False)

        # form up detection records
        if len(boxes) != 0:
            detections = np.array([
                bbox + [score]
                for bbox, score, class_name in zip(boxes, scores, class_names)
            ])
        else:
            detections = np.empty((0, 5))

        # Call the tracker
        tracks = tracker.update(detections)

        # show all detection result as white box
        for j, bbox in enumerate(boxes):
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, class_names[j],
                        (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                        (255, 255, 255), 2)

        track_indexes = []
        track_count = 0
        for track in tracks:
            bbox = track[:4]
            track_id = int(track[4])

            # record tracking info and get bbox
            track_indexes.append(int(track_id))
            total_obj_counter.append(int(track_id))

            # show all tracking result as color box
            color = [int(c) for c in COLORS[track_id % len(COLORS)]]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (color), 3)
            cv2.putText(frame, str(track_id),
                        (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                        (color), 2)

            #if track.class_name:
            #cv2.putText(frame, str(track.class_name), (int(bbox[0]+30), int(bbox[1]-20)), 0, 5e-3*150, (color), 2)

            track_count += 1

            # get center point (x,y) of current track bbox and record in queue
            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            motion_traces[track_id].append(center)

            # draw current center point
            thickness = 5
            cv2.circle(frame, (center), 1, color, thickness)
            #draw motion trace
            motion_trace = motion_traces[track_id]
            for j in range(1, len(motion_trace)):
                if motion_trace[j - 1] is None or motion_trace[j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame, (motion_trace[j - 1]), (motion_trace[j]),
                         (color), thickness)

        # show tracking statistics
        total_obj_num = len(set(total_obj_counter))
        cv2.putText(frame, "Total Object Counter: " + str(total_obj_num),
                    (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "Current Object Counter: " + str(track_count),
                    (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0,
                    5e-3 * 200, (0, 255, 0), 3)

        # refresh window
        cv2.namedWindow("SORT", 0)
        cv2.resizeWindow('SORT', 1024, 768)
        # cv2.imshow('SORT', frame)  # Xander commented out

        if save_output:
            #save a frame
            out.write(frame)

        end_time = time.time()
        fps = (fps + (1. / (end_time - start_time))) / 2
        # Press q to stop video
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release everything if job is finished
    if not images_input:
        frame_capture.release()
    if save_output:
        out.release()
    cv2.destroyAllWindows()
예제 #27
0
def camera(self):
    file = self.FLAGS.demo
    SaveVideo = self.FLAGS.saveVideo
    detectedObjects = []
    if self.FLAGS.track :
        if self.FLAGS.tracker == "deep_sort":
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker
            metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", 0.2, 100)
            tracker = Tracker(metric)
            encoder = generate_detections.create_box_encoder(
                os.path.abspath("deep_sort/resources/networks/mars-small128.ckpt-68577"))
        elif self.FLAGS.tracker == "sort":
            from sort.sort import Sort
            encoder = None
            tracker = Sort()
    if self.FLAGS.BK_MOG and self.FLAGS.track :
        fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()

    if file == 'camera':
        file = 0
    else:
        assert os.path.isfile(file), \
        'file {} does not exist'.format(file)

    camera = cv2.VideoCapture(file)

    # if file == 0:
    #     self.say('Press [ESC] to quit video')

    assert camera.isOpened(), \
    'Cannot capture source'
    savedPath = 'result/output_{}'.format(os.path.basename(file))
    if self.FLAGS.csv :
        f = open('{}.csv'.format(file),'w')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['frame_id', 'track_id' , 'x', 'y', 'w', 'h'])
        f.flush()
    else :
        f =None
        writer= None
    if file == 0:#camera window
        cv2.namedWindow('', 0)
        _, frame = camera.read()
        height, width, _ = frame.shape
        cv2.resizeWindow('', width, height)
    else:
        _, frame = camera.read()
        height, width, _ = frame.shape

    if SaveVideo:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        if file == 0:#camera window
          fps = 1 / self._get_fps(frame)
          if fps < 1:
            fps = 1
        else:
            fps = round(camera.get(cv2.CAP_PROP_FPS))
        videoWriter = cv2.VideoWriter(
            savedPath, fourcc, fps, (width, height))

    # buffers for demo in batch
    buffer_inp = list()
    buffer_pre = list()

    elapsed = 0
    start = timer()
    # self.say('Press [ESC] to quit demo')
    #postprocessed = []
    # Loop through frames
    n = 0
    while camera.isOpened():
        elapsed += 1
        _, frame = camera.read()
        if frame is None:
            # print ('\nEnd of Video')
            break
        if self.FLAGS.skip != n :
            n+=1
            continue
        n = 0
        if self.FLAGS.BK_MOG and self.FLAGS.track :
            fgmask = fgbg.apply(frame)
        else :
            fgmask = None
        preprocessed = self.framework.preprocess(frame)
        buffer_inp.append(frame)
        buffer_pre.append(preprocessed)
        # Only process and imshow when queue is full
        if elapsed % self.FLAGS.queue == 0:
            feed_dict = {self.inp: buffer_pre}
            net_out = self.sess.run(self.out, feed_dict)
            for img, single_out in zip(buffer_inp, net_out):
                if not self.FLAGS.track :
                    postprocessed = self.framework.postprocess(
                        single_out, img)
                else :
                    postprocessedTuple = self.framework.postprocess(
                        single_out, img,frame_id = elapsed,
                        csv_file=f,csv=writer,mask = fgmask,
                        encoder=encoder,tracker=tracker)
                    postprocessed = postprocessedTuple[0]
                    detectedObjects.append(postprocessedTuple[1])
                if SaveVideo:
                    videoWriter.write(postprocessed)
                if self.FLAGS.display :
                    cv2.imshow('', postprocessed)
            # Clear Buffers
            buffer_inp = list()
            buffer_pre = list()

        if elapsed % 5 == 0:
            # sys.stdout.write('\r')
            # sys.stdout.write('{0:3.3f} FPS'.format(
            #     elapsed / (timer() - start)))
            sys.stdout.flush()
        if self.FLAGS.display :
            choice = cv2.waitKey(1)
            if choice == 27:
                break
    flattenObjects = sum(detectedObjects, [])
    frameDictionary = dict()
    numDictionary = dict()

    totalFrame = int(camera.get(7)) #CAP_PROP_FRAME_COUNT
    fps = int(camera.get(5)) #CAP_PROP_FPS
    illegalSecond = 30
    stopFrameCountThreshold = fps * illegalSecond

    for object in flattenObjects:
        frameDictionary\
            .setdefault(object.frame, []).append(object.num)
        numDictionary\
            .setdefault(object.num, Car(object.num, Segment(object.frame, object.position), stopFrameCountThreshold))\
            .update(Segment(object.frame, object.position))
    resultJSON = {
        "frames": list(map(lambda key: {"id": key, "carNums": frameDictionary[key]}, frameDictionary)),
        "cars": list(map(lambda value: json.loads(json.dumps(value, default=lambda o: o.__dict__)), numDictionary.values())),
        "resultVideoPath": savedPath
    }
    print(json.dumps(resultJSON, indent=2))
    if SaveVideo:
        videoWriter.release()
    if self.FLAGS.csv :
        f.close()
    camera.release()
    if self.FLAGS.display :
        cv2.destroyAllWindows()
예제 #28
0
def camera(self):
    file = self.FLAGS.demo
    SaveVideo = self.FLAGS.saveVideo

    if self.FLAGS.track :
        if self.FLAGS.tracker == "deep_sort":
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker
            metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", 0.2, 100)
            tracker = Tracker(metric)
            encoder = generate_detections.create_box_encoder(
                os.path.abspath("deep_sort/resources/networks/mars-small128.ckpt-68577"))
        elif self.FLAGS.tracker == "sort":
            from sort.sort import Sort
            encoder = None
            tracker = Sort()
    if self.FLAGS.BK_MOG and self.FLAGS.track :
        fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()


    camera = cv2.VideoCapture(file[0])
    camera1 = cv2.VideoCapture(file[1])

    if file == 0:
        self.say('Press [ESC] to quit video')

    assert camera.isOpened(), \
    'Cannot capture source'

    if self.FLAGS.csv :
        f = open('{}.csv'.format(file),'w')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['frame_id', 'track_id' , 'x', 'y', 'w', 'h'])
        f.flush()
    else :
        f =None
        writer= None
    if file == 0:#camera window
        cv2.namedWindow('', 0)
        _, frame = camera.read()
        height, width, _ = frame.shape
        cv2.resizeWindow('', width, height)
    else:
        ret, frame = camera.read()
        ret1, frame1 = camera1.read()
        height, width, _ = (frame.shape[0], (frame.shape[1]+frame1.shape[1]), 3)

    if SaveVideo:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        if file == 0:#camera window
          fps = 1 / self._get_fps(frame)
          if fps < 1:
            fps = 1
        else:
            fps = round(camera.get(cv2.CAP_PROP_FPS))
        videoWriter = cv2.VideoWriter(
            "/".join(filepath.split("/")[:-1]) + '/output_{}'.format(filepath.split("/")[-1]), fourcc, fps, (width, height))

    # buffers for demo in batch
    buffer_inp = list()
    buffer_pre = list()

    elapsed = 0
    start = timer()
    self.say('Press [ESC] to quit demo')
    #postprocessed = []
    # Loop through frames
    n = 0
    while (camera.isOpened() and camera1.isOpened()):
        elapsed += 1
        _, frame = camera.read()
        _, frame1 = camera1.read()
        if frame is None:
            print ('\nEnd of Video')
            break
        if self.FLAGS.skip != n :
            n+=1
            continue
        n = 0
        if self.FLAGS.BK_MOG and self.FLAGS.track :
            fgmask = fgbg.apply(frame)
        else :
            fgmask = None
        vis = np.concatenate((frame, frame1), axis=1)
        preprocessed = self.framework.preprocess(vis)
        buffer_inp.append(vis)
        buffer_pre.append(preprocessed)
        # Only process and imshow when queue is full
        if elapsed % self.FLAGS.queue == 0:
            feed_dict = {self.inp: buffer_pre}
            net_out = self.sess.run(self.out, feed_dict)
            for img, single_out in zip(buffer_inp, net_out):
                if not self.FLAGS.track :
                    postprocessed = self.framework.postprocess(
                        single_out, img)
                else :
                    postprocessed = self.framework.postprocess(
                        single_out, img, frame_id = elapsed,
                        csv_file=f,csv=writer,mask = fgmask,
                        encoder=encoder,tracker=tracker)
                    # postprocessed1 = self.framework.postprocess(
                    #     single_out, img[:height,int(width/2):width, :3],frame_id = elapsed,
                    #     csv_file=f,csv=writer,mask = fgmask,
                    #     encoder=encoder,tracker=tracker)
                if SaveVideo:
                    videoWriter.write(postprocessed)
                if self.FLAGS.display :
                    cv2.imshow('', postprocessed)
            # Clear Buffers
            buffer_inp = list()
            buffer_pre = list()

        if elapsed % 5 == 0:
            sys.stdout.write('\r')
            sys.stdout.write('{0:3.3f} FPS'.format(
                elapsed / (timer() - start)))
            sys.stdout.flush()
        if self.FLAGS.display :
            choice = cv2.waitKey(1)
            if choice == 27:
                break

    sys.stdout.write('\n')
    if SaveVideo:
        videoWriter.release()
    if self.FLAGS.csv :
        f.close()
    camera.release()
    if self.FLAGS.display :
        cv2.destroyAllWindows()
예제 #29
0
파일: help.py 프로젝트: pribadihcr/DLL-RAPI
def camera(self):
    file = self.FLAGS.demo
    SaveVideo = self.FLAGS.saveVideo

    if self.FLAGS.track:
        if self.FLAGS.tracker == "deep_sort":
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker
            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)
            tracker = Tracker(metric)
            encoder = generate_detections.create_box_encoder(
                os.path.abspath(
                    "deep_sort/resources/networks/mars-small128.ckpt-68577"))
        elif self.FLAGS.tracker == "sort":
            from sort.sort import Sort
            encoder = None
            tracker = Sort()
    if self.FLAGS.BK_MOG and self.FLAGS.track:
        fgbg = cv2.createBackgroundSubtractorMOG2()

    if file == 'camera':
        file = 0
    else:
        assert os.path.isfile(file), \
        'file {} does not exist'.format(file)

    vid = imageio.get_reader(file, 'ffmpeg')  #cv2.VideoCapture(file)
    if file == 0:
        self.say('Press [ESC] to quit video')

    #assert camera.isOpened(), \
    #'Cannot capture source'

    if self.FLAGS.csv:
        f = open('{}.csv'.format(file), 'w')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['frame_id', 'track_id', 'x', 'y', 'w', 'h'])
        f.flush()
    else:
        f = None
        writer = None

    # buffers for demo in batch
    buffer_inp = list()
    buffer_pre = list()

    elapsed = 0
    start = timer()
    self.say('Press [ESC] to quit demo')
    #postprocessed = []
    # Loop through frames
    n = 0

    plt.ion()
    fig = plt.figure()
    ax = plt.gca()
    frame = vid.get_data(0)
    img_artist = ax.imshow(frame)
    for num in range(1, 20000):
        try:
            frame = vid.get_data(num)
            print(num)
        except:
            break
        elapsed += 1
        #_, frame = camera.read()
        if frame is None:
            print('\nEnd of Video')
            break
        if self.FLAGS.skip != n:
            n += 1
            continue
        n = 0
        if self.FLAGS.BK_MOG and self.FLAGS.track:
            fgmask = fgbg.apply(frame)
        else:
            fgmask = None
        preprocessed = self.framework.preprocess(frame)
        buffer_inp.append(frame)
        buffer_pre.append(preprocessed)
        # Only process and imshow when queue is full
        if elapsed % self.FLAGS.queue == 0:
            feed_dict = {self.inp: buffer_pre}
            net_out = self.sess.run(self.out, feed_dict)
            for img, single_out in zip(buffer_inp, net_out):
                if not self.FLAGS.track:
                    postprocessed = self.framework.postprocess(single_out, img)
                else:
                    postprocessed = self.framework.postprocess(
                        single_out,
                        img,
                        frame_id=elapsed,
                        csv_file=f,
                        csv=writer,
                        mask=fgmask,
                        encoder=encoder,
                        tracker=tracker)

                if self.FLAGS.display:
                    #cv2.imshow('', postprocessed)
                    img_artist.set_data(postprocessed)
                    plt.show()
                    plt.pause(0.00001)
            # Clear Buffers
            buffer_inp = list()
            buffer_pre = list()

        if elapsed % 5 == 0:
            sys.stdout.write('\r')
            sys.stdout.write('{0:3.3f} FPS'.format(elapsed /
                                                   (timer() - start)))
            sys.stdout.flush()

    sys.stdout.write('\n')

    if self.FLAGS.csv:
        f.close()
예제 #30
0
def run(detector, number, cam, mask_name, date, name):
    print('Processing video number ', number)

    video_capture = cv2.VideoCapture('/media/aioz-trung-intern/data/sml/' +
                                     cam + '/' + name)
    w = 640
    h = 480

    #if writeVideo_flag:
    # Define the codec and create VideoWriter object
    #w = int(video_capture.get(3))
    #h = int(video_capture.get(4))

    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    out = cv2.VideoWriter('data_res/res_' + cam + '/o_' + name, fourcc, 30,
                          (w, h))

    fps = 0.0
    mask = cv2.imread(mask_name, 0)
    contours, _ = cv2.findContours(np.expand_dims(mask, axis=2),
                                   cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    cont_sorted = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
    x, y, wi, he = cv2.boundingRect(cont_sorted[0])

    #init tracker
    tracker = Sort(use_dlib=True)  #create instance of the SORT tracker
    # Display init
    # colours = np.random.rand(32, 3)  # used only for display
    # plt.ion()
    # fig = plt.figure()
    bbox_stack = []
    avg_people = 0
    count = 0
    video_mask_frame = np.zeros(shape=[480, 640], dtype=np.float64)
    nop_list = []
    x_mask = []
    y_mask = []

    while (video_capture.isOpened()):
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if (not ret):
            break
        count += 1
        #org_frame = cv2.resize(frame, (w, h), interpolation = cv2.INTER_AREA)
        mask_frame = np.zeros(shape=[480, 640], dtype=np.uint8)

        #frame = org_frame.copy()
        #frame[mask==0] = [0,0,0]
        cv2.imwrite('frame.jpg', frame)
        break
        #if (count == 200):
        #    break

        t1 = time.time()

        boxs, _, _ = process(detector, org_frame)
        if (boxs.shape[0] != 0):
            boxs[:, 2] = boxs[:, 2] - boxs[:, 0]
            boxs[:, 3] = boxs[:, 3] - boxs[:, 1]

        #boxs = yolo.detect_image(image)
        #print("box ", np.asarray(boxs).shape, " box: ", boxs)
        # break

        # Draw bbox
        #print(len(boxs))
        num_of_person = 0
        filtered_bbox = []
        for bbox in boxs:
            if (check_intersect(bbox, mask)):
                avg_people += 1
                filtered_bbox.append(bbox)
                #cv2.rectangle(org_frame,(int(bbox[0]), int(bbox[1])), (int(bbox[0]+bbox[2]), int(bbox[1]+bbox[3])),(255,0,0), 2)

        if (len(bbox_stack) != stack_num):
            bbox_stack.append(filtered_bbox)
            for bbox in filtered_bbox:
                cv2.rectangle(org_frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])),
                              (255, 0, 0), 2)
                cv2.circle(mask_frame, (int((2 * bbox[0] + bbox[2]) / 2),
                                        int((2 * bbox[1] + bbox[3]) / 2)), 20,
                           (255), -1)
                x_mask.append(int((2 * bbox[0] + bbox[2]) / 2))
                y_mask.append(int((2 * bbox[1] + bbox[3]) / 2))
            num_of_person = len(filtered_bbox)

        else:
            bbox_stack_len = [len(x) for x in bbox_stack]

            list_counter = Counter(bbox_stack_len)

            argmax = np.argmax(list(list_counter.values()))

            key = list(list_counter.keys())[argmax]

            index = [i for i, e in enumerate(bbox_stack_len) if e == key]

            if (key != len(filtered_bbox)):
                for bbox in bbox_stack[index[-1]]:
                    cv2.rectangle(
                        org_frame, (int(bbox[0]), int(bbox[1])),
                        (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])),
                        (255, 0, 0), 2)
                    cv2.circle(mask_frame, (int((2 * bbox[0] + bbox[2]) / 2),
                                            int((2 * bbox[1] + bbox[3]) / 2)),
                               20, (255), -1)
                    x_mask.append(int((2 * bbox[0] + bbox[2]) / 2))
                    y_mask.append(int((2 * bbox[1] + bbox[3]) / 2))
                num_of_person = len(bbox_stack[index[-1]])
            else:
                for bbox in filtered_bbox:
                    cv2.rectangle(
                        org_frame, (int(bbox[0]), int(bbox[1])),
                        (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])),
                        (255, 0, 0), 2)
                    cv2.circle(mask_frame, (int((2 * bbox[0] + bbox[2]) / 2),
                                            int((2 * bbox[1] + bbox[3]) / 2)),
                               20, (255), -1)
                    x_mask.append(int((2 * bbox[0] + bbox[2]) / 2))
                    y_mask.append(int((2 * bbox[1] + bbox[3]) / 2))
                num_of_person = len(filtered_bbox)

            bbox_stack.append(filtered_bbox)

            # #index = bbox_stack_len.index(key)
            # index = [i for i, e in enumerate(bbox_stack_len) if e == key]
            del bbox_stack[0]

            # if (index[-1] == len(filtered_bbox)):
            #     del bbox_stack[0]
            # else:
            #     del bbox_stack[index[0]]
        # else:
        #     for bbox in filtered_bbox:
        #         cv2.rectangle(org_frame,(int(bbox[0]), int(bbox[1])), (int(bbox[0]+bbox[2]), int(bbox[1]+bbox[3])),(255,0,0), 2)
        cv2.imshow('mask' + cam,
                   cv2.threshold(mask_frame, 1, 255, cv2.THRESH_BINARY)[1])

        nop_list.append(int(num_of_person))

        video_mask_frame += cv2.threshold(mask_frame, 1, 255,
                                          cv2.THRESH_BINARY)[1] / 255.0

        #cv2.rectangle(org_frame,(x,y),(x+wi,y+he),(0,0,255),2)
        #cv2.drawContours(org_frame, contours, 0, (0, 255, 0), 1)

        # Update tracker
        # print(org_frame.shape, boxs)
        # detections = np.array(boxs)
        # if (detections.shape[0] != 0):
        #     detections[:,2] = detections[:,2] + detections[:,0]
        #     detections[:,3] = detections[:,3] + detections[:,1]
        # trackers = tracker.update(detections, frame)

        # Put number and fps
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(org_frame, 'FPS: ' + str(int(fps)), (10, 35),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.putText(org_frame, 'Pps: ' + str(num_of_person), (10, 65),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)

        # ax1 = fig.add_subplot(111, aspect='equal')
        # ax1.imshow(org_frame)

        # for d in trackers:
        #     #f_out.write('%d,%d,%d,%d,x,x,x,x,%.3f,%.3f,%.3f,%.3f\n' % (d[4], frame, 1, 1, d[0], d[1], d[2], d[3]))
        #     d = d.astype(np.int32)
        #     ax1.add_patch(patches.Rectangle((d[0], d[1]), d[2] - d[0], d[3] - d[1], fill=False, lw=3,
        #                                     ec=colours[d[4] % 32, :]))
        #     ax1.set_adjustable('box')
        #     #label
        #     ax1.annotate('id = %d' % (d[4]), xy=(d[0], d[1]), xytext=(d[0], d[1]))
        #     if detections != []:#detector is active in this frame
        #         ax1.annotate(" DETECTOR", xy=(5, 45), xytext=(5, 45))

        # plt.axis('off')
        # fig.canvas.flush_events()
        # plt.draw()
        # fig.tight_layout()
        # #save the frame with tracking boxes
        # ax1.cla()

        # Apply transparent mask to frame
        tmp_mask = org_frame.copy()
        tmp_mask[mask == 255] = 255
        alpha = 0.6
        cv2.addWeighted(org_frame, alpha, tmp_mask, 1 - alpha, 0, org_frame)

        # Write video
        out.write(org_frame)

        cv2.imshow('original' + cam, org_frame)
        #cv2.imshow('masking', frame)

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    out.release()
    cv2.destroyAllWindows()
    np.savetxt('data_res/result/' + str(number) + '.txt', [avg_people / count])
    np.savetxt('data_res/res_' + cam + '/' + name.replace('.mp4', '') + '.txt',
               nop_list,
               fmt='%d')
    np.savetxt('data_res/cor_res_' + cam + '/x' + name.replace('.mp4', '') +
               '.txt',
               x_mask,
               fmt='%d')
    np.savetxt('data_res/cor_res_' + cam + '/y' + name.replace('.mp4', '') +
               '.txt',
               y_mask,
               fmt='%d')
    # Normalize and save
    #video_mask_frame = np.array(255 * (video_mask_frame - min(video_mask_frame.flatten())) / (max(video_mask_frame.flatten()) - min(video_mask_frame.flatten())), dtype=np.uint8)
    #print(video_mask_frame)
    #ax = sns.heatmap(video_mask_frame, vmin=0, vmax=1, cmap='jet')
    #plt.savefig('data_res/heat_res_' + cam + '/' + 'heat_' + cam + '_' + str(number) + '.png')

    #imC = cv2.applyColorMap(video_mask_frame, cv2.COLORMAP_JET)
    #cv2.imwrite('data_res/heat_res_' + cam + '/' + 'heat_' + cam + '_' + str(number) + '.jpg', imC)
    return video_mask_frame