Example #1
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    tracking = True
    writeVideo_flag = False
    asyncVideo_flag = False
    webcamera_flag = True
    ipcamera_flag = False
    udp_flag = True

    file_path = '/workspace/data/C0133_v4.mp4'
    if asyncVideo_flag :
        video_capture = VideoCaptureAsync(file_path)
    elif ipcamera_flag :
        video_capture = cv2.VideoCapture('rtsp://*****:*****@192.168.2.201/ONVIF/MediaInput?profile=def_profile1')
    elif webcamera_flag :
        video_capture = cv2.VideoCapture(0)
    else:
        video_capture = cv2.VideoCapture(file_path)
        
    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    if udp_flag:
        HOST = ''
        PORT = 5000
        address = '192.168.2.255'
        sock =socket(AF_INET, SOCK_DGRAM)
        sock.setsockopt(SOL_SOCKET, SO_BROADCAST, 1)
        sock.bind((HOST, PORT))


    fps = 0.0
    fps_imutils = imutils.video.FPS().start()
    
    savetime = 0

    while True:
        nowtime = datetime.datetime.now().isoformat()
        ret, frame = video_capture.read()  # frame shape 640*480*3
        t1 = time.time()

        if time.time() - savetime >= 30: 
            print('save data') 
            cv2.imwrite("/workspace/images/image.png", frame)
            savetime = time.time()
        image = Image.fromarray(frame[...,::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        if tracking:
            features = encoder(frame, boxes)

            detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in
                        zip(boxes, confidence, classes, features)]
        else:
            detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in
                        zip(boxes, confidence, classes)]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0,
                            1.5e-3 * frame.shape[0], (0, 255, 0), 1)
                # socket
                message = str(nowtime + "," + str(track.track_id) + "," + str(int(bbox[0])) + "," + str(int(bbox[1])) + "," + str(int(bbox[2])) + "," + str(int(bbox[3])))
                bmessage = message.encode('utf-8')
                print(type(bmessage))
                if udp_flag:
                    sock.sendto(message.encode('utf-8'), (address, PORT))


        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2) + "%"
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            if len(classes) > 0:
                cls = det.cls
                cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0,
                            1.5e-3 * frame.shape[0], (0, 255, 0), 1)

        #cv2.imshow('', frame)

        if writeVideo_flag: # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1./(time.time()-t1))) / 2
            print("FPS = %f"%(fps))
        
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
Example #2
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = True
    writeVideo_flag = False
    asyncVideo_flag = False

    file_path = 'D:\\video/[Sala Outside][2020-05-28T16-01-39][2020-05-28T18-02-09].mp4'
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        if tracking:
            features = encoder(frame, boxes)

            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]
        else:
            detections = [
                Detection_YOLO(bbox, confidence, cls)
                for bbox, confidence, cls in zip(boxes, confidence, classes)
            ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0,
                            1.5e-3 * frame.shape[0], (0, 255, 0), 1)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2) + "%"
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            if len(classes) > 0:
                cls = det.cls
                cv2.putText(frame,
                            str(cls) + " " + score,
                            (int(bbox[0]), int(bbox[3])), 0,
                            1.5e-3 * frame.shape[0], (0, 255, 0), 1)

        cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = True
    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'video.webm'
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()
    model_par, valid_transform = model_init_par()
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        if tracking:
            features = encoder(frame, boxes)

            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]
        else:
            detections = [
                Detection_YOLO(bbox, confidence, cls)
                for bbox, confidence, cls in zip(boxes, confidence, classes)
            ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2) + "%"
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            if len(classes) > 0:
                cls = det.cls
                cv2.putText(frame,
                            str(cls) + " " + score,
                            (int(bbox[0]), int(bbox[3])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)

        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                #crop_img = frame[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]
                crop_img = image.crop(
                    [int(bbox[0]),
                     int(bbox[1]),
                     int(bbox[2]),
                     int(bbox[3])])
                #res_txt = demo_par(model_par, valid_transform, crop_img)

                #draw.rectangle(xy=person_bbox[:-1], outline='red', width=1)

                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)
                font = ImageFont.truetype(
                    '/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/arial.ttf',
                    size=10)
                # positive_cnt = 1
                # for txt in res_txt:
                #     if 'personal' in txt:
                #         #draw.text((x1, y1 + 20 * positive_cnt), txt, (255, 0, 0), font=font)
                #         cv2.putText(frame, txt, (int(bbox[0]), int(bbox[1]) + 20 * positive_cnt), 0,
                #                     1e-3 * frame.shape[0], (0, 255, 0), 1)
                #         positive_cnt += 1

        cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
Example #4
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    width = 1280
    height = 720
    rfps = 10

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = args.tracking
    writeVideo_flag = args.writeVideo_flag
    asyncVideo_flag = args.asyncVideo_flag
    webcamera_flag = args.webcamera_flag
    ipcamera_flag = args.ipcamera_flag
    udp_flag = args.udp_flag

    full_cam_addr, key = sd.set_address(args.ipaddress, args.cam_ip,
                                        args.cam_cmd, args.key)
    cam_ip = full_cam_addr.replace(args.cam_cmd,
                                   "").replace("rtsp://*****:*****@", "")
    if args.jpegmode:
        full_cam_addr = full_cam_addr.replace("rtsp", "http")

    print(full_cam_addr)
    print(key)

    if asyncVideo_flag:
        print("load videofile")
        video_capture = VideoCaptureAsync(args.videofile)
    elif ipcamera_flag or args.jpegmode:
        print("load ipcamera")
        video_capture = cv2.VideoCapture(full_cam_addr)
        # width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)
        # height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)
        # rfps = video_capture.get(cv2.CAP_PROP_FPS)
        print("fps:{}width:{}height:{}".format(rfps, width, height))
    elif webcamera_flag:
        print("load webcamera")
        video_capture = cv2.VideoCapture(0)
    else:
        print("load videofile")
        video_capture = cv2.VideoCapture(args.videofile)

    # video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    if udp_flag:
        HOST = ''
        PORT = 5000
        address = '192.168.2.255'
        sock = socket(AF_INET, SOCK_DGRAM)
        sock.setsockopt(SOL_SOCKET, SO_BROADCAST, 1)
        sock.bind((HOST, PORT))

    fps = 0.0

    i = 0

    if not args.maskoff:
        maskbgi = Image.new('RGB', (int(width), int(height)), (0, 0, 0))
        mask = Image.open(args.maskdir + 'mask' + args.ipaddress[-1] +
                          '.png').convert("L").resize(size=(int(width),
                                                            int(height)),
                                                      resample=Image.NEAREST)

    while True:
        nowtime = datetime.datetime.now(
            timezone('Asia/Tokyo')).strftime('%Y-%m-%d %H:%M:%S.%f%z')
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if not ret:
            print('cant read')
            video_capture = cv2.VideoCapture(full_cam_addr)
            continue

        t1 = time.time()

        try:
            image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        except TypeError:
            video_capture = cv2.VideoCapture(full_cam_addr)
            continue
        image = Image.composite(maskbgi, image, mask)
        boxes, confidence, classes = yolo.detect_image(image)

        if tracking:
            features = encoder(frame, boxes)

            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]
        else:
            detections = [
                Detection_YOLO(bbox, confidence, cls)
                for bbox, confidence, cls in zip(boxes, confidence, classes)
            ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]
        car_data = {}
        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                car_data[str(track.track_id)] = [
                    int(bbox[0]),
                    int(bbox[1]),
                    int(bbox[2]),
                    int(bbox[3])
                ]
            sd.send_amqp(
                sd.create_jsondata(cam_ip, nowtime,
                                   time.time() - t1, car_data, args.jsonfile,
                                   args.json_path, i), key, args.AMQPHost)
            i += 1

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        ### 読み飛ばし処理を追加 ###
        if not args.jsonfile and args.skip:
            if fps <= 10:
                for _i in range(int(math.ceil(rfps / fps)) - 1):
                    ret, frame = video_capture.read()

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
Example #5
0
    def yolo_frames(unique_name):
        device = unique_name[1]

        tracking = True

        if tracking:
            gdet = import_module('tools.generate_detections')
            nn_matching = import_module('deep_sort.nn_matching')
            Tracker = import_module('deep_sort.tracker').Tracker

            # Definition of the parameters
            max_cosine_distance = 0.3
            nn_budget = None

            # deep_sort
            model_filename = 'model_data/mars-small128.pb'
            encoder = gdet.create_box_encoder(model_filename, batch_size=1)

            metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
            tracker = Tracker(metric)

        yolo = YOLO()
        nms_max_overlap = 1.0

        num_frames = 0

        get_feed_from = ('camera', device)

        current_date = datetime.datetime.now().date()
        count_dict = {}  # initiate dict for storing counts
        while True:
            cam_id, frame = BaseCamera.get_frame(get_feed_from)
            # image_height, image_width = frame.shape[:2]

            if frame is None:
                break

            num_frames += 1

            if num_frames % 2 != 0:  # only process frames at set number of frame intervals
                continue

            #image = Image.fromarray(frame)
            image = Image.fromarray(frame[..., ::-1])  # convert bgr to rgb
            boxes, confidence, classes = yolo.detect_image(image)
            if tracking:
                features = encoder(frame, boxes)

                detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in
                              zip(boxes, confidence, classes, features)]
            else:
                detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in
                              zip(boxes, confidence, classes)]
            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            class_counter = Counter()  # store counts of each detected class

            if tracking:
                # Call the tracker
                tracker.predict()
                tracker.update(detections)

                track_count = int(0)  # reset counter to 0

                for track in tracker.tracks:
                    if not track.is_confirmed() or track.time_since_update > 1:
                        continue
                    bbox = track.to_tlbr()
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255),
                                  1)  # WHITE BOX
                    cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0,
                                1.5e-3 * frame.shape[0], (0, 255, 0), 1)

                    track_count += 1  # add 1 for each tracked object

                cv2.putText(frame, "Current total count: " + str(track_count), (int(20), int(60 * 5e-3 * frame.shape[0])), 0, 2e-3 * frame.shape[0],
                            (255, 255, 255), 2)

            det_count = int(0)
            for det in detections:
                bbox = det.to_tlbr()
                score = "%.2f" % (det.confidence * 100) + "%"
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0),
                              1)  # BLUE BOX
                if len(classes) > 0:
                    cls = det.cls
                    cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0,
                                1.5e-3 * frame.shape[0], (0, 255, 0), 1)
                    class_counter[cls] += 1
                det_count += 1

            # display counts for each class as they appear
            y = 80 * 5e-3 * frame.shape[0]
            for cls in class_counter:
                class_count = class_counter[cls]
                cv2.putText(frame, str(cls) + " " + str(class_count), (int(20), int(y)), 0, 2e-3 * frame.shape[0],
                            (255, 255, 255), 2)
                y += 20 * 5e-3 * frame.shape[0] #TODO apply this to other text

            # use YOLO counts if tracking is turned off
            if tracking:
                count = track_count
            else:
                count = det_count

            # calculate current minute
            now = datetime.datetime.now()
            rounded_now = now - datetime.timedelta(microseconds=now.microsecond)  # round to nearest second
            current_minute = now.time().minute

            if current_minute == 0 and len(count_dict) > 1:
                count_dict = {}  # reset counts every hour
            else:
                # write counts to file for every set interval of the hour
                write_interval = 5
                if current_minute % write_interval == 0:  # write to file once only every write_interval minutes
                    if current_minute not in count_dict:
                        count_dict[current_minute] = True
                        total_filename = 'Total counts for {}, camera {}.txt'.format(current_date, device)
                        total_count_file = open(total_filename, 'a')
                        print('Writing current total count ({}) to file.'.format(count))
                        total_count_file.write(str(rounded_now) + ", " + device + ', ' + str(count) + "\n")
                        total_count_file.close()

                        # if class exists in class counter, create file and write counts
                        for cls in class_counter:
                            class_count = class_counter[cls]
                            class_filename = '{} counts for {}, camera {}.txt'.format(cls, current_date, device)
                            class_count_file = open(class_filename, 'a')
                            print('Writing current {} count ({}) to file.'.format(cls, class_count))
                            class_count_file.write(str(rounded_now) + ", " + device + ', ' + str(class_count) + "\n")
                            class_count_file.close()

            yield cam_id, frame
Example #6
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = False
    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = ['out.mp4']
    #file_path = ['veed.mp4']
    cols = math.ceil(math.sqrt(len(file_path)))
    rows = math.ceil(len(file_path) / cols)
    singleHeight = int(screenHeight / rows)
    singleWidth = int(screenWidth / cols)
    out_image = np.zeros((screenHeight, screenWidth, 3), np.uint8)

    #if asyncVideo_flag :
    #    video_capture = VideoCaptureAsync(file_path)
    #else:
    #    video_capture = cv2.VideoCapture(file_path)

    video_captures = []
    cameras = []
    prvTimes = []
    localgloballink = []
    imgsSaved = 2

    for i in range(len(file_path)):
        video_captures.append(cv2.VideoCapture(file_path[i]))
        cameras.append(Camera())
        prvTimes.append(time.time())

    #if asyncVideo_flag:
    #    video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
            h = int(video_capture.cap.get(4))
        else:
            w = screenWidth
            h = screenHeight
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()
    frame = []
    globalPersonCount = 1
    for file in file_path:
        frame.append(None)

    curFrame = 1
    gtIndex = 0

    while True:
        allimages = []

        for index in range(len(file_path)):
            cur = time.time()
            ret, frame[index] = video_captures[index].read(
            )  # frame shape 640*480*3
            if ret != True:
                break

            t1 = time.time()

            image = Image.fromarray(frame[index][..., ::-1])  # bgr to rgb
            boxes, confidence, classes = yolo.detect_image(image)

            if tracking:
                features = encoder(frame[index], boxes)

                detections = [
                    Detection(bbox, confidence, cls, feature)
                    for bbox, confidence, cls, feature in zip(
                        boxes, confidence, classes, features)
                ]
            else:
                detections = [
                    Detection_YOLO(bbox, confidence,
                                   cls) for bbox, confidence, cls in zip(
                                       boxes, confidence, classes)
                ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            for det in detections:
                bbox = det.to_tlbr()
                score = "%.2f" % round(det.confidence * 100, 2) + "%"
                cv2.rectangle(frame[index], (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
                if len(classes) > 0:
                    cls = det.cls
                    cv2.putText(frame[index],
                                str(cls) + " " + score,
                                (int(bbox[0]), int(bbox[3])), 0,
                                1e-3 * frame[index].shape[0], (0, 255, 0), 1)

            #nabin's code
            hsvImage = cv2.cvtColor(frame[index], cv2.COLOR_BGR2HSV)

            hungarianmatrix = []
            indexx = 0
            if (len(cameras[index].PersonData) > 0):
                diff = cur - prvTimes[index]
                times = int(diff / 0.05)
                prvTimes[index] = cur
                for data in cameras[index].PersonData:
                    if (data.kf != None):
                        for i in range(times):
                            data.kf.predict()
            nodata = len(cameras[index].PersonData)
            for z in range(len(cameras[index].PersonData)):
                cameras[index].PersonData[z].updated = False
            for det in detections:
                bbox = det.to_tlbr()
                if (nodata == 0):
                    persondata = PersonData()
                    persondata.color = [
                        int(random.randint(0, 255)),
                        int(random.randint(0, 255)),
                        int(random.randint(0, 255))
                    ]
                    persondata.positions.append([(bbox[0] + bbox[2]) / 2,
                                                 (bbox[1] + bbox[3]) / 2])
                    persondata.positions.append([(bbox[0] + bbox[2]) / 2 + 0.1,
                                                 (bbox[1] + bbox[3]) / 2 + 0.1
                                                 ])
                    persondata.top = bbox[0]
                    persondata.left = bbox[1]
                    persondata.lastPosition = bbox
                    persondata.localPersonIndex = cameras[
                        index].localPersonCount
                    persondata.kf = KF(persondata.positions[0][0],
                                       persondata.positions[0][1], 0, 0)
                    persondata.globalPersonIndex = globalPersonCount
                    localgloballink.append([
                        globalPersonCount, index, persondata.localPersonIndex
                    ])
                    globalPersonCount = globalPersonCount + 1
                    cameras[index].localPersonCount = cameras[
                        index].localPersonCount + 1
                    hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]),
                                               int(bbox[0]):int(bbox[2])]
                    persondata.histogram_h = cv2.calcHist([hsvCroppedImage],
                                                          [0], None, [180],
                                                          [0, 180])
                    persondata.histogram_h = np.divide(persondata.histogram_h,
                                                       ((bbox[3] - bbox[1]) *
                                                        (bbox[2] - bbox[0])))
                    cameras[index].PersonData.append(persondata)
                else:
                    hungarianmatrix.append([])
                    hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]),
                                               int(bbox[0]):int(bbox[2])]
                    histogram_h = cv2.calcHist([hsvCroppedImage], [0], None,
                                               [180], [0, 180])
                    histogram_h = np.divide(histogram_h, ((bbox[3] - bbox[1]) *
                                                          (bbox[2] - bbox[0])))

                    for z in range(len(cameras[index].PersonData)):
                        postions = len(
                            cameras[index].PersonData[z].positions) - 1
                        cov = np.cov(
                            np.asarray(
                                cameras[index].PersonData[z].positions).T)
                        #mahal=(distance.mahalanobis([cameras[index].PersonData[z].kf.calulatedmean[0],cameras[index].PersonData[z].kf.calulatedmean[2]],[(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2],cov))/ frame[index].shape[0]
                        mahal = math.sqrt((cameras[index].PersonData[z].
                                           positions[postions][0] -
                                           (bbox[0] + bbox[2]) / 2)**2 +
                                          (cameras[index].PersonData[z].
                                           positions[postions][1] -
                                           (bbox[1] + bbox[3]) / 2)**2
                                          ) / frame[index].shape[0]
                        #mahal=math.sqrt((cameras[index].PersonData[z].kf.calulatedmean[0]-(bbox[0]+bbox[2])/2)**2+(cameras[index].PersonData[z].kf.calulatedmean[1]-(bbox[1]+bbox[3])/2)**2)/ frame[index].shape[0]
                        #mahal=getMahalanbolisDist(cameras[index].PersonData[z].positions,[(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2])
                        mahal += (np.sum(
                            np.absolute(
                                np.subtract(
                                    histogram_h,
                                    cameras[index].PersonData[z].histogram_h)))
                                  )
                        hungarianmatrix[indexx].append(mahal)
                    indexx = indexx + 1
                print(hungarianmatrix)
            if (nodata != 0):
                row_ind = []
                col_ind = []
                if (hungarianmatrix != []):
                    row_ind, col_ind = linear_sum_assignment(hungarianmatrix)
                indexx = 0
                for pos in range(len(col_ind)):
                    if (hungarianmatrix[row_ind[pos]][col_ind[pos]] <
                            2 - detections[row_ind[pos]].confidence):
                        bbox = detections[row_ind[pos]].to_tlbr()
                        detections[row_ind[pos]].localProcessed = True
                        cameras[index].PersonData[col_ind[pos]].updated = True
                        cameras[index].PersonData[col_ind[pos]].top = bbox[0]
                        cameras[index].PersonData[col_ind[pos]].left = bbox[1]
                        cameras[index].PersonData[col_ind[pos]].kf.update([
                            (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
                        ])
                        cameras[index].PersonData[
                            col_ind[pos]].lastPosition = bbox
                        cameras[index].PersonData[
                            col_ind[pos]].positions.append([
                                (bbox[0] + bbox[2]) / 2,
                                (bbox[1] + bbox[3]) / 2
                            ])
                        hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]),
                                                   int(bbox[0]):int(bbox[2])]
                        toadd = detections[row_ind[pos]].confidence - 0.7
                        cameras[index].PersonData[
                            col_ind[pos]].histogram_h = np.add(
                                np.multiply(
                                    cv2.calcHist([hsvCroppedImage], [0], None,
                                                 [180], [0, 180]),
                                    toadd * 1 / (((bbox[3] - bbox[1]) *
                                                  (bbox[2] - bbox[0])))),
                                np.multiply(
                                    cameras[index].PersonData[
                                        col_ind[pos]].histogram_h, 1 - toadd))
                        if (len(cameras[index].PersonData[
                                col_ind[pos]].positions) > 6):
                            cameras[index].PersonData[
                                col_ind[pos]].positions.pop(0)

                for pos in range(len(detections)):
                    if (hasattr(detections[pos], 'localProcessed') == False):
                        bbox = detections[pos].to_tlbr()
                        #if(bbox[1]>hsvImage.shape[0]):
                        #    continue
                        ndata = PersonData()
                        ndata.top = bbox[0]
                        ndata.left = bbox[1]
                        ndata.positions.append([(bbox[0] + bbox[2]) / 2,
                                                (bbox[1] + bbox[3]) / 2])
                        ndata.positions.append([(bbox[0] + bbox[2]) / 2 + 0.1,
                                                (bbox[1] + bbox[3]) / 2 + 0.1])
                        ndata.kf = KF((bbox[0] + bbox[2]) / 2,
                                      (bbox[1] + bbox[3]) / 2, 0, 0)
                        ndata.color = [
                            int(random.randint(0, 255)),
                            int(random.randint(0, 255)),
                            int(random.randint(0, 255))
                        ]
                        ndata.localPersonIndex = cameras[
                            index].localPersonCount
                        ndata.lastPosition = bbox

                        ndata.kf = KF(ndata.positions[0][0],
                                      ndata.positions[0][1], 0, 0)
                        cameras[index].localPersonCount = cameras[
                            index].localPersonCount + 1
                        localgloballink.append(
                            [globalPersonCount, index, ndata.localPersonIndex])
                        ndata.globalPersonIndex = globalPersonCount
                        globalPersonCount = globalPersonCount + 1
                        hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]),
                                                   int(bbox[0]):int(bbox[2])]
                        ndata.histogram_h = cv2.calcHist([hsvCroppedImage],
                                                         [0], None, [180],
                                                         [0, 180])
                        ndata.histogram_h = np.divide(ndata.histogram_h,
                                                      ((bbox[3] - bbox[1]) *
                                                       (bbox[2] - bbox[0])))

                        cameras[index].PersonData.append(ndata)

            #allimages.append([])
            if (len(file_path)) != 1:
                for pdata in cameras[index].PersonData:
                    if (pdata.updated):
                        nimg = cv2.resize(
                            frame[index][int(pdata.lastPosition[1]
                                             ):int(pdata.lastPosition[3]),
                                         int(pdata.lastPosition[0]
                                             ):int(pdata.lastPosition[2])],
                            (64, 128),
                            interpolation=cv2.INTER_AREA)
                        #allimages[len(allimages)-1].append(np.array(nimg))
                        pdata.imgs.append(nimg)
                        if (len(pdata.imgs) == imgsSaved + 1):
                            pdata.imgs.pop(0)
            #nabin's code ends

            if tracking:
                # Call the tracker
                tracker.predict()
                tracker.update(detections)

                for track in tracker.tracks:
                    if not track.is_confirmed() or track.time_since_update > 1:
                        continue
                    bbox = track.to_tlbr()
                    cv2.rectangle(frame[index], (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])),
                                  (255, 255, 255), 2)
                    cv2.putText(frame[index], "ID: " + str(track.track_id),
                                (int(bbox[0]), int(bbox[1])), 0,
                                1e-3 * frame[index].shape[0], (0, 255, 0), 1)

            #if(len(cameras)==2):
            #globalHungarian=[]
            #    for fdata in range(len(cameras[0].PersonData)):
            #        globalHungarian.append([])
            #        for pdata in cameras[1].PersonData:
            #            globalHungarian[fdata].append(np.sum(np.absolute(np.subtract(pdata.histogram_h,cameras[0].PersonData[fdata].histogram_h))))
            #
            #    row_ind, col_ind = linear_sum_assignment(globalHungarian)
            #    for row in range(len(row_ind)):
            #        cv2.putText(frame[0], chr(ord('a')+row),(int(cameras[0].PersonData[row_ind[row]].positions[len(cameras[0].PersonData[row_ind[row]].positions)-1][0]), int(cameras[0].PersonData[row_ind[row]].positions[len(cameras[0].PersonData[row_ind[row]].positions)-1][1])),0, 5e-3 * 200, (0,255,0),2)
            #        cv2.putText(frame[1], chr(ord('a')+row),(int(cameras[1].PersonData[col_ind[row]].positions[len(cameras[1].PersonData[col_ind[row]].positions)-1][0]), int(cameras[1].PersonData[col_ind[row]].positions[len(cameras[1].PersonData[col_ind[row]].positions)-1][1])),0, 5e-3 * 200, (0,255,0),2)

        if (len(cameras) == 1):
            hypos = []
            hyposPos = []
            for person in cameras[0].PersonData:
                if (person.updated == True):
                    cv2.putText(
                        frame[0], str(person.localPersonIndex),
                        (int(person.positions[len(person.positions) - 1][0]),
                         int(person.positions[len(person.positions) - 1][1])),
                        0, 1e-3 * frame[index].shape[0], (0, 255, 0), 1)
                if (person.updated == True):
                    hypos.append(person.localPersonIndex + 1)
                    hyposPos.append([person.top, person.left])
            gts = []
            gtsPos = []
            while gt[gtIndex][0] == curFrame and gtIndex < len(gt):
                gts.append(gt[gtIndex][1])
                gtsPos.append([gt[gtIndex][2], gt[gtIndex][3]])
                gtIndex = gtIndex + 1
            curFrame = curFrame + 1
            dis = mm.distances.norm2squared_matrix(np.array(gtsPos),
                                                   np.array(hyposPos))
            acc.update(gts, hypos, dis)

        else:
            edges = []
            globalHungarian = []
            for i in range(len(cameras)):
                for j in range(i + 1, len(cameras)):
                    globalHungarian = []
                    x = 0
                    xindexes = []
                    yindexes = []
                    stackedimgages = []
                    for pos in range(imgsSaved):
                        stackedimgages.append([])
                        for person in cameras[j].PersonData:
                            if (person.updated == True
                                    and len(person.imgs) == imgsSaved):
                                stackedimgages[pos].append(person.imgs[pos])
                    for fdata in range(len(cameras[i].PersonData)):
                        if (cameras[i].PersonData[fdata].updated == False
                                or len(cameras[i].PersonData[fdata].imgs) !=
                                imgsSaved):
                            continue
                        xindexes.append(fdata)
                        y = 0
                        triplet = test(cameras[i].PersonData[fdata].imgs[0],
                                       stackedimgages[0])
                        for pos in range(1, imgsSaved):
                            triplet = np.add(
                                triplet,
                                test(cameras[i].PersonData[fdata].imgs[pos],
                                     stackedimgages[pos]))
                        globalHungarian.append([])
                        for pdata in range(len(cameras[j].PersonData)):
                            if (cameras[j].PersonData[pdata].updated == False
                                    or len(cameras[j].PersonData[pdata].imgs)
                                    != imgsSaved):
                                continue
                            #globalHungarian[x].append(triplet[y])
                            globalHungarian[x].append(
                                np.sum(
                                    np.absolute(
                                        np.subtract(
                                            cameras[j].PersonData[pdata].
                                            histogram_h, cameras[i].
                                            PersonData[fdata].histogram_h))) *
                                2 + triplet[y])
                            if (x == 0):
                                yindexes.append(pdata)
                            #globalHungarian[fdata].append(np.sum(np.absolute(np.subtract(cameras[j].PersonData[pdata].histogram_h,cameras[i].PersonData[fdata].histogram_h))))
                            #globalHungarian[fdata].append(triplet[pdata])
                            y = y + 1
                        x = x + 1
                    if (len(globalHungarian) != 0):
                        row_ind, col_ind = linear_sum_assignment(
                            globalHungarian)
                        print(globalHungarian)
                        for pos in range(len(row_ind)):
                            if (globalHungarian[row_ind[pos]][col_ind[pos]] <
                                    3.2):
                                edges.append(
                                    (cameras[i].PersonData[xindexes[
                                        row_ind[pos]]].globalPersonIndex,
                                     cameras[j].PersonData[yindexes[
                                         col_ind[pos]]].globalPersonIndex))

            Allcliques = cliques(edges, len(cameras),
                                 globalPersonCount).getCliques()

            for cam in cameras:
                for person in cam.PersonData:
                    isinclique = True
                    for clique in Allcliques:

                        if person.globalPersonIndex in clique:
                            isinclique = False
                            break
                    if isinclique:
                        Allcliques.append([person.globalPersonIndex])

            for sclique in Allcliques:
                indexes = []
                cur = min(sclique)

                for i in range(len(sclique)):
                    isInclique = False
                    prvIndex = cameras[localgloballink[
                        sclique[i] - 1][1]].PersonData[localgloballink[
                            sclique[i] - 1][2]].prvglobalFoundOutPersonIndex
                    if prvIndex == -1:
                        isInclique = True
                    else:
                        for snclique in Allcliques:
                            if prvIndex in snclique:
                                isInclique = True
                                break
                    if isInclique == True:
                        cameras[localgloballink[sclique[i] - 1][1]].PersonData[
                            localgloballink[sclique[i] - 1]
                            [2]].globalFoundOutPersonIndex = cur
                    else:
                        cameras[localgloballink[sclique[i] - 1][1]].PersonData[
                            localgloballink[sclique[i] - 1]
                            [2]].globalFoundOutPersonIndex = prvIndex

            for cam in range(len(cameras)):
                for person in cameras[cam].PersonData:
                    if person.updated == True:
                        cv2.putText(
                            frame[cam], str(person.globalFoundOutPersonIndex),
                            (int(person.positions[len(person.positions) -
                                                  1][0]),
                             int(person.positions[len(person.positions) -
                                                  1][1])), 0,
                            1e-3 * frame[index].shape[0], (0, 255, 0), 2)

            for sclique in Allcliques:
                for i in range(len(sclique)):
                    cameras[localgloballink[sclique[i] - 1][1]].PersonData[
                        localgloballink[sclique[i] - 1]
                        [2]].prvglobalFoundOutPersonIndex = cameras[
                            localgloballink[sclique[i] - 1][1]].PersonData[
                                localgloballink[sclique[i] - 1]
                                [2]].globalFoundOutPersonIndex
        out_image.fill(0)
        vindex = 0
        for row in range(rows):
            for col in range(cols):
                if (vindex == len(file_path)):
                    break
                vidshape = frame[vindex].shape
                curvidheightratio = vidshape[0] / singleHeight
                curvidwidthratio = vidshape[1] / singleWidth

                if (curvidwidthratio < curvidheightratio):
                    #height is small
                    resizedwidth = int(vidshape[1] / vidshape[0] *
                                       singleHeight)
                    nimg = cv2.resize(frame[vindex],
                                      (resizedwidth, singleHeight),
                                      interpolation=cv2.INTER_AREA)
                    widthpos = int(
                        (singleWidth - resizedwidth) / 2) + col * singleWidth
                    out_image[row * singleHeight:(row + 1) * singleHeight,
                              widthpos:widthpos + resizedwidth] = nimg
                else:
                    #width is small
                    resizedheight = int(vidshape[0] / vidshape[1] *
                                        singleWidth)
                    nimg = cv2.resize(frame[vindex],
                                      (singleWidth, resizedheight),
                                      interpolation=cv2.INTER_AREA)
                    heightpos = int(((singleHeight - resizedheight) / 2) +
                                    row * singleHeight)
                    out_image[heightpos:heightpos + resizedheight,
                              col * singleWidth:(col + 1) * singleWidth] = nimg
                vindex = vindex + 1

        if (len(cameras) == 1):
            mh = mm.metrics.create()
            summary = mh.compute(acc,
                                 metrics=['num_frames', 'mota', 'motp'],
                                 name='acc')
            print(summary)
        cv2.imshow('', out_image)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(out_image)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = True
    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'IMG_3326.MOV'
    dfObj = pd.DataFrame(
        columns=['frame_num', 'track', 'cx', 'cy', 'w', 'h', 'track_temp'])
    dfObjDTP = pd.DataFrame(columns=[
        'filename', 'frame_num', 'bb1', 'bb2', 'bb3', 'bb4', 'track',
        'track_temp', 'Height'
    ])

    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        if tracking:
            features = encoder(frame, boxes)

            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]
        else:
            detections = [
                Detection_YOLO(bbox, confidence, cls)
                for bbox, confidence, cls in zip(boxes, confidence, classes)
            ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        if tracking:
            # Call the tracker

            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()

                #Ini buat cropping gambar per frame

                #cropped_image = frame[int(bbox[1]):int(bbox[1])+(int(bbox[3])-int(bbox[1])),int(bbox[0]):int(bbox[0])+(int(bbox[2])-int(bbox[0]))]
                cropped_image = frame[int(bbox[1]):int(bbox[1]) + 256,
                                      int(bbox[0]):int(bbox[0]) + 128]
                # cropped_image = frame[2:5,6:10]

                # Matiin atau comment biar ga ada box putih
                # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                #
                # cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0,
                #             1.5e-3 * frame.shape[0], (0, 255, 0), 1)

                # print(cropped_image)
                dirname = "output_crop/{}".format(track.track_id)
                if not os.path.exists(dirname):
                    os.makedirs(dirname)

                if (cropped_image.size == 0):
                    continue
                else:
                    writeStatus = cv2.imwrite(
                        "output_crop/{}/frame_{}.png".format(
                            track.track_id, frame_index), cropped_image)
                    print("output_crop/{}/frame_{}.png".format(
                        track.track_id, frame_index))

                # Write CSV
                dfObj = dfObj.append(pd.Series([
                    frame_index, track.track_id,
                    int(bbox[0]),
                    int(bbox[1]),
                    int(bbox[2]) - int(bbox[0]),
                    int(bbox[3]) - int(bbox[1]), track.time_since_update
                ],
                                               index=dfObj.columns),
                                     ignore_index=True)

                dfObjDTP = dfObjDTP.append(pd.Series([
                    file_path, frame_index,
                    int(bbox[0]),
                    int(bbox[1]),
                    int(bbox[2]),
                    int(bbox[3]), track.track_id, track.time_since_update,
                    int(bbox[3]) - int(bbox[1])
                ],
                                                     index=dfObjDTP.columns),
                                           ignore_index=True)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2) + "%"

            #Matiin atau comment biar ga ada box putih di crop image
            # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

            # if len(classes) > 0:
            #     cls = det.cls
            #     cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0,
            #                 1.5e-3 * frame.shape[0], (0, 255, 0), 1)

        cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    dfObj = dfObj.sort_values(["track", "frame_num"], ascending=(True, True))
    dfObj.to_csv(r'result_temp.csv', index=False)
    dfObjDTP = dfObjDTP.sort_values(["track", "frame_num"],
                                    ascending=(True, True))
    dfObjDTP.to_csv(r'result_temp_dtp.csv', index=False)
    convert_to_final()
    cv2.destroyAllWindows()
Example #8
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # Calculate cosine Distance Metric 
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    # Flags for process
    tracking = True # Set False if you only want to do detection
    writeVideo_flag = True # Set False if you don't want to write frames locally
    asyncVideo_flag = False # It uses asynchronous processing for better FPS :Warning: Shuttering Problem

    # Video File Path
    file_path = '/mydrive/test.mp4'
    # Check if asyncVideo flag set to True
    if asyncVideo_flag :
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        output_file = os.path.basename(file_path)[:-4]
        out = cv2.VideoWriter('./Output/' + output_file + "-prueba-test.avi", fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        ret, frame = video_capture.read() # Capture frames
        if ret != True:
             break

        t1 = time.time()
        
        # bgr to rgb frame conversion
        image = Image.fromarray(frame[...,::-1])  
        # YOLOv4 Detection
        boxes, confidence, classes = yolo.detect_image(image) 

        if tracking:
            # Encodes the frame and boxes for DeepSORT
            features = encoder(frame, boxes)
            # DeepSORT Detection
            detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in
                          zip(boxes, confidence, classes, features)]
        else:
            # Only YOLOv4 Detection
            detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in
                          zip(boxes, confidence, classes)]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                # Draw white bbox for DeepSORT
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5,(0, 255, 0), 1)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2)
            # Check the class for colored bbox
            if len(classes) > 0:
                cls = det.cls
                center_bbox = (int(bbox[2]), int(bbox[2]))
                if str(cls) == 'person':
                    # Draw Blue bbox for YOLOv4 person detection
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (219, 152, 52), 2)
                elif str(cls) == 'backpack' or 'handbag' or 'suitcase':
                    # Draw Orange bbox for YOLOv4 handbag, backpack and suitcase detection
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (65, 176, 245), 2)

        if not asyncVideo_flag:
            fps = (fps + (1./(time.time()-t1))) / 2
            print("FPS = %f"%(fps))
            cv2.putText(frame, "GPU: NVIDIA Tesla P100", (5, 70), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1)
            cv2.putText(frame, "FPS: %.2f" % fps, (5, 50), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1)

            # draw the timestamp on the frame
            timestamp = datetime.datetime.now()
            ts = timestamp.strftime("%d/%m/%Y, %H:%M:%S")
            cv2.putText(frame, ts, (5, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1)            

        #cv2.imshow('', frame)

        if writeVideo_flag: # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()
          
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()
def main(yolo):

    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = True

    while True:
        print("Waiting...")
        (rpiName, frame) = imageHub.recv_image()
        print("Image Received...")

        t1 = time.time()
        ids = {}
        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        if tracking:
            features = encoder(frame, boxes)

            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]
        else:
            detections = [
                Detection_YOLO(bbox, confidence, cls)
                for bbox, confidence, cls in zip(boxes, confidence, classes)
            ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        if tracking:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                ids[track.track_id] = bbox.tolist()

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2) + "%"
        print(time.time() - t1)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        imageHub.send_reply(json.dumps(ids).encode('ascii'))
    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
Example #10
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    tracking = False
    writeVideo_flag = True
    asyncVideo_flag = False
    #file path for videos input
    file_path = ['out.mp4']
    #calulating number of row and columns based on number of videos input
    cols=math.ceil(math.sqrt(len(file_path)))
    rows=math.ceil(len(file_path)/cols)
    #calulating single video hwight and width based on number of rows/cols and screen width/height
    singleHeight=int(screenHeight/rows)
    singleWidth=int(screenWidth/cols)
    #out_image sent to the screen and file written
    out_image=np.zeros((screenHeight,screenWidth,3), np.uint8)

    #if asyncVideo_flag :
    #    video_capture = VideoCaptureAsync(file_path)
    #else:
    #    video_capture = cv2.VideoCapture(file_path)

    #videos reference to get index later
    video_captures = []
    #number of videos/cameras fed. Used to save person data
    cameras=[]
    #previous time kalman filter was processed
    #prvTimes=[]
    #array to link local index to global person index
    localgloballink=[]
    #number of images saved after a person has been tracked in a single camera
    imgsSaved=6

    #initializing cameras and video_capture variables
    for i in range(len(file_path)):
        video_captures.append(cv2.VideoCapture(file_path[i]))
        cameras.append(Camera())
        #prvTimes.append(time.time())

    #if asyncVideo_flag:
    #    video_capture.start()

    if writeVideo_flag:
        #if asyncVideo_flag:
        #    w = int(video_capture.cap.get(3))
        #    h = int(video_capture.cap.get(4))
        #else:
        #setting width and height of video file written
        w = screenWidth
        h = screenHeight
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        #number of frames processed till now
        frame_index = -1

    #fps
    fps = 0.0
    fps_imutils = imutils.video.FPS().start()
    #frames found in the current run
    frame=[]
    #initializing the frame variables
    for file in file_path:
        frame.append(None)

    #the global person count
    globalPersonCount=1
    #frame count for testing with motmetrics
    curFrame=1;
    #global person index
    gtIndex=0;
    #variable to count the current images saved
    cur_save_count=0

    prvGlobalIndexData=[]

    while True:
        cur_save_count=cur_save_count+1
        #image saved in current run
        #allimages=[]
        
        for index in range(len(file_path)):
            #getting current time for kalman filter
            #cur=time.time()
            #reading a frame from video
            ret, frame[index] = video_captures[index].read()  # frame shape 640*480*3
            if ret != True:
                 break
            #getting current time for file output
            t1 = time.time()
            #changing image from bgr to rgb
            image = Image.fromarray(frame[index][...,::-1])  # bgr to rgb
            #running yolo
            boxes, confidence, classes = yolo.detect_image(image)
            
            #Getting bounding boxes from image data
            if tracking:
                features = encoder(frame[index], boxes)

                detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in
                              zip(boxes, confidence, classes, features)]
            else:
                detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in
                              zip(boxes, confidence, classes)]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]
            #Got bounding boxes from image data
            
            #writing person detection accuracy and putting a bounding boxes around people
            for det in detections:
                bbox = det.to_tlbr()
                score = "%.2f" % round(det.confidence * 100, 2) + "%"
                cv2.rectangle(frame[index], (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
                if len(classes) > 0:
                    cls = det.cls
                    cv2.putText(frame[index], str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0,
                                1e-3 * frame[index].shape[0], (0, 255, 0), 1)

            #changing rgb image data to hsv for hsv histogram
            hsvImage = cv2.cvtColor(frame[index], cv2.COLOR_BGR2HSV)

            #the local hungarian matrix
            hungarianmatrix=[]
            #index for hungerian matrix
            indexx=0
            #if(len(cameras[index].PersonData)>0):
            #    diff=cur-prvTimes[index]
            #    times=int(diff/0.05)
            #    prvTimes[index]=cur
            #    for data in cameras[index].PersonData:
            #        if(data.kf!=None):
            #            for i in range(times):
            #                data.kf.predict()
            #checking the number of previous person data stored
            nodata=len(cameras[index].PersonData);
            #Setting all person updated variable to false
            for z in range(len(cameras[index].PersonData)):
                cameras[index].PersonData[z].updated=False;
            #iterating through current detections
            for det in detections:
                #getting top, left, bottom and right co-ordinated from bounding boxes
                bbox = det.to_tlbr()
                #checking if there was no previous person data, initializing all found out persons directly to camera's person data variable
                if(nodata==0):
                    persondata=PersonData()
                    #persondata.color=[int(random.randint(0,255)),int(random.randint(0,255)),int(random.randint(0,255))]
                    persondata.positions.append([(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2])
                    persondata.positions.append([(bbox[0]+bbox[2])/2+0.1,(bbox[1]+bbox[3])/2+0.1])
                    #persondata.top=bbox[0]
                    #persondata.left=bbox[1]
                    persondata.lastPosition=bbox
                    persondata.localPersonIndex=cameras[index].localPersonCount;
                    #persondata.kf=KF(persondata.positions[0][0],persondata.positions[0][1],0,0)
                    persondata.globalPersonIndex=globalPersonCount;
                    localgloballink.append([globalPersonCount,index,persondata.localPersonIndex])
                    globalPersonCount=globalPersonCount+1
                    cameras[index].localPersonCount=cameras[index].localPersonCount+1;
                    hsvCroppedImage=hsvImage[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]
                    persondata.histogram_h = cv2.calcHist([hsvCroppedImage],[0],None,[180],[0,180])
                    #dividing the hisogram by area so that person bounding box area wont alter histogram values
                    persondata.histogram_h = np.divide(persondata.histogram_h,((bbox[3]-bbox[1])*(bbox[2]-bbox[0])))
                    #adding newly created person object to camera
                    cameras[index].PersonData.append(persondata)
                else:
                    hungarianmatrix.append([])
                    #getting current hsv value from current frame
                    hsvCroppedImage=hsvImage[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]
                    histogram_h = cv2.calcHist([hsvCroppedImage],[0],None,[180],[0,180])
                    histogram_h = np.divide(histogram_h,((bbox[3]-bbox[1])*(bbox[2]-bbox[0])))

                    for z in range(len(cameras[index].PersonData)):
                        postions=len(cameras[index].PersonData[z].positions)-1
                        cov = np.cov(np.asarray(cameras[index].PersonData[z].positions).T)
                        #mahal=(distance.mahalanobis([cameras[index].PersonData[z].kf.calulatedmean[0],cameras[index].PersonData[z].kf.calulatedmean[2]],[(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2],cov))/ frame[index].shape[0]
                        mahal=math.sqrt((cameras[index].PersonData[z].positions[postions][0]-(bbox[0]+bbox[2])/2)**2+(cameras[index].PersonData[z].positions[postions][1]-(bbox[1]+bbox[3])/2)**2)/ frame[index].shape[0]
                        #mahal=math.sqrt((cameras[index].PersonData[z].kf.calulatedmean[0]-(bbox[0]+bbox[2])/2)**2+(cameras[index].PersonData[z].kf.calulatedmean[1]-(bbox[1]+bbox[3])/2)**2)/ frame[index].shape[0]
                        #mahal=getMahalanbolisDist(cameras[index].PersonData[z].positions,[(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2])
                        mahal+=(np.sum(np.absolute(np.subtract(histogram_h,cameras[index].PersonData[z].histogram_h))))
                        hungarianmatrix[indexx].append(mahal)
                    indexx=indexx+1
                #print(hungarianmatrix)
            if(nodata!=0):
                row_ind=[]
                col_ind=[]
                if(hungarianmatrix!=[]):
                    row_ind, col_ind = linear_sum_assignment(hungarianmatrix)
                indexx=0;
                for pos in range(len(col_ind)):
                    if(hungarianmatrix[row_ind[pos]][col_ind[pos]]<2-detections[row_ind[pos]].confidence):
                        bbox=detections[row_ind[pos]].to_tlbr()
                        detections[row_ind[pos]].localProcessed=True
                        cameras[index].PersonData[col_ind[pos]].updated=True
                        cameras[index].PersonData[col_ind[pos]].top=bbox[0]
                        cameras[index].PersonData[col_ind[pos]].left=bbox[1]
                        #cameras[index].PersonData[col_ind[pos]].kf.update([(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2])
                        cameras[index].PersonData[col_ind[pos]].lastPosition=bbox
                        cameras[index].PersonData[col_ind[pos]].positions.append([(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2])
                        hsvCroppedImage=hsvImage[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]
                        toadd=detections[row_ind[pos]].confidence-0.7
                        cameras[index].PersonData[col_ind[pos]].histogram_h = np.add(np.multiply(cv2.calcHist([hsvCroppedImage],[0],None,[180],[0,180]),toadd*1/(((bbox[3]-bbox[1])*(bbox[2]-bbox[0])))),np.multiply(cameras[index].PersonData[col_ind[pos]].histogram_h,1-toadd))
                        if(len(cameras[index].PersonData[col_ind[pos]].positions)>6):
                            cameras[index].PersonData[col_ind[pos]].positions.pop(0);

                for pos in range(len(detections)):
                    if(hasattr(detections[pos], 'localProcessed')==False):
                        bbox = detections[pos].to_tlbr()
                        #if(bbox[1]>hsvImage.shape[0]):
                        #    continue
                        ndata=PersonData()
                        ndata.top=bbox[0]
                        ndata.left=bbox[1]
                        ndata.positions.append([(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2])
                        ndata.positions.append([(bbox[0]+bbox[2])/2+0.1,(bbox[1]+bbox[3])/2+0.1])
                        #ndata.kf=KF((bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2,0,0)
                        ndata.color=[int(random.randint(0,255)),int(random.randint(0,255)),int(random.randint(0,255))]
                        ndata.localPersonIndex=cameras[index].localPersonCount
                        ndata.lastPosition=bbox

                        ndata.kf=KF(ndata.positions[0][0],ndata.positions[0][1],0,0)
                        cameras[index].localPersonCount=cameras[index].localPersonCount+1
                        localgloballink.append([globalPersonCount,index,ndata.localPersonIndex])
                        ndata.globalPersonIndex=globalPersonCount;
                        globalPersonCount=globalPersonCount+1
                        hsvCroppedImage=hsvImage[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]
                        ndata.histogram_h = cv2.calcHist([hsvCroppedImage],[0],None,[180],[0,180])
                        ndata.histogram_h = np.divide(ndata.histogram_h,((bbox[3]-bbox[1])*(bbox[2]-bbox[0])))

                        cameras[index].PersonData.append(ndata)

            #allimages.append([])
            #if(len(file_path))!=1:
            #    for pdata in cameras[index].PersonData:
            #        if(pdata.updated):
            #            nimg=cv2.resize(frame[index][int(pdata.lastPosition[1]):int(pdata.lastPosition[3]),int(pdata.lastPosition[0]):int(pdata.lastPosition[2])], (64,128
            #            ), interpolation = cv2.INTER_AREA)
            #            #allimages[len(allimages)-1].append(np.array(nimg))
            #            pdata.imgs.append(nimg);
            #            if(len(pdata.imgs)==imgsSaved+1):
            #                pdata.imgs.pop(0)
            #nabin's code ends

            if tracking:
                # Call the tracker
                tracker.predict()
                tracker.update(detections)

                for track in tracker.tracks:
                    if not track.is_confirmed() or track.time_since_update > 1:
                        continue
                    bbox = track.to_tlbr()
                    cv2.rectangle(frame[index], (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                    cv2.putText(frame[index], "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0,
                                1e-3 * frame[index].shape[0], (0, 255, 0), 1)

            #if(len(cameras)==2):
            #globalHungarian=[]
            #    for fdata in range(len(cameras[0].PersonData)):
            #        globalHungarian.append([])
            #        for pdata in cameras[1].PersonData:
            #            globalHungarian[fdata].append(np.sum(np.absolute(np.subtract(pdata.histogram_h,cameras[0].PersonData[fdata].histogram_h))))
            #    
            #    row_ind, col_ind = linear_sum_assignment(globalHungarian)
            #    for row in range(len(row_ind)):
            #        cv2.putText(frame[0], chr(ord('a')+row),(int(cameras[0].PersonData[row_ind[row]].positions[len(cameras[0].PersonData[row_ind[row]].positions)-1][0]), int(cameras[0].PersonData[row_ind[row]].positions[len(cameras[0].PersonData[row_ind[row]].positions)-1][1])),0, 5e-3 * 200, (0,255,0),2)
            #        cv2.putText(frame[1], chr(ord('a')+row),(int(cameras[1].PersonData[col_ind[row]].positions[len(cameras[1].PersonData[col_ind[row]].positions)-1][0]), int(cameras[1].PersonData[col_ind[row]].positions[len(cameras[1].PersonData[col_ind[row]].positions)-1][1])),0, 5e-3 * 200, (0,255,0),2)

        if(len(cameras)==1):
            hypos=[];
            hyposPos=[];
            for person in cameras[0].PersonData:
                if(person.updated==True):
                    cv2.putText(frame[0],str(person.localPersonIndex) ,(int(person.positions[len(person.positions)-1][0]), int(person.positions[len(person.positions)-1][1])),0, 1e-3 * frame[index].shape[0], (0,255,0),1)
                if(person.updated==True):
                    hypos.append(person.localPersonIndex+1)
                    hyposPos.append([person.top,person.left])
            gts=[]
            gtsPos=[]
            while gt[gtIndex][0]==curFrame and gtIndex<len(gt):
                gts.append(gt[gtIndex][1])
                gtsPos.append([gt[gtIndex][2],gt[gtIndex][3]])
                gtIndex=gtIndex+1
            curFrame=curFrame+1
            dis=mm.distances.norm2squared_matrix(np.array(gtsPos), np.array(hyposPos))
            acc.update(gts,hypos,dis)

        elif imgsSaved==cur_save_count:
            cur_save_count=0
            edges=[]
            globalHungarian=[]
            for i in range(len(cameras)):
                for j in range(i+1,len(cameras)):
                    globalHungarian=[]
                    x=0
                    xindexes=[]
                    yindexes=[]
                    stackedimgages=[]
                    for pos in range(imgsSaved):
                        stackedimgages.append([])
                        for person in cameras[j].PersonData:
                            if(person.updated==True and len(person.imgs)==imgsSaved):
                                stackedimgages[pos].append(person.imgs[pos])
                    for fdata in range(len(cameras[i].PersonData)):
                        #if(cameras[i].PersonData[fdata].updated==False or len(cameras[i].PersonData[fdata].imgs)!=imgsSaved):
                        if(cameras[i].PersonData[fdata].updated==False or len(cameras[i].PersonData[fdata].imgs)!=imgsSaved):
                            continue
                        xindexes.append(fdata)
                        curclique=[]
                        prvfoundout=-1
                        if len(prvGlobalIndexData)!=0:
                            for single in prvGlobalIndexData:
                                if cameras[i].PersonData[fdata].globalPersonIndex in single:
                                    curclique=single
                                    prvfoundout=single[0]
                                    break;
                        y=0
                        #triplet=test(cameras[i].PersonData[fdata].imgs[0],stackedimgages[0])
                        #for pos in range(1,imgsSaved):
                        #    triplet=np.add(triplet,test(cameras[i].PersonData[fdata].imgs[pos],stackedimgages[pos]))
                        globalHungarian.append([])
                        for pdata in range(len(cameras[j].PersonData)):
                            if(cameras[j].PersonData[pdata].updated==False or len(cameras[j].PersonData[pdata].imgs)!=imgsSaved):
                                continue
                            #globalHungarian[x].append(triplet[y])
                            #val=(np.sum(np.absolute(np.subtract(cameras[j].PersonData[pdata].histogram_h,cameras[i].PersonData[fdata].histogram_h)))+triplet[y])/(0.9+1.4*imgsSaved)#hsv seems to be max 0.9, triplet max seems to be 1.2
                            #if cameras[j].PersonData[pdata].globalPersonIndex in curclique or cameras[j].PersonData[pdata].prvglobalFoundOutPersonIndex==prvfoundout:
                            #    val-=0.2
                            #globalHungarian[x].append(val)
                            globalHungarian[x].append(np.sum(np.absolute(np.subtract(cameras[j].PersonData[pdata].histogram_h,cameras[i].PersonData[fdata].histogram_h))))

                            if(x==0):
                                yindexes.append(pdata)
                            #globalHungarian[fdata].append(np.sum(np.absolute(np.subtract(cameras[j].PersonData[pdata].histogram_h,cameras[i].PersonData[fdata].histogram_h))))
                            #globalHungarian[fdata].append(triplet[pdata])
                            y=y+1
                        x=x+1
                    if(len(globalHungarian)!=0):
                        row_ind, col_ind = linear_sum_assignment(globalHungarian)
                        print(globalHungarian)
                        for pos in range(len(row_ind)):
                            if(globalHungarian[row_ind[pos]][col_ind[pos]]<0.85):
                                edges.append((cameras[i].PersonData[xindexes[row_ind[pos]]].globalPersonIndex,cameras[j].PersonData[yindexes[col_ind[pos]]].globalPersonIndex))
            
            Allcliques=cliques(edges,len(cameras),globalPersonCount).getCliques()

            for cam in cameras:
                for person in cam.PersonData:
                    if len(person.imgs)!=imgsSaved or person.updated==False:
                        continue
                    isinclique=True
                    for clique in Allcliques:
                        if person.globalPersonIndex in clique:
                            isinclique=False
                            break
                    if isinclique:
                        Allcliques.append([person.globalPersonIndex])

            prvGlobalIndexData=[]
            for sclique in Allcliques:
                indexes=[]
                cur=min(sclique)
                prvGlobalIndexData.append([])
                for i in range(len(sclique)):
                    isInclique=False
                    prvIndex=cameras[localgloballink[sclique[i]-1][1]].PersonData[localgloballink[sclique[i]-1][2]].prvglobalFoundOutPersonIndex
                    if prvIndex==-1:
                        isInclique=True
                    else:
                        for snclique in Allcliques:
                            if prvIndex in snclique:
                                isInclique=True
                                break;
                    if isInclique==True:
                        cameras[localgloballink[sclique[i]-1][1]].PersonData[localgloballink[sclique[i]-1][2]].globalFoundOutPersonIndex=cur
                    else:
                        cameras[localgloballink[sclique[i]-1][1]].PersonData[localgloballink[sclique[i]-1][2]].globalFoundOutPersonIndex=prvIndex
                        prvGlobalIndexData[len(prvGlobalIndexData)-1].append(prvIndex)

                    prvGlobalIndexData[len(prvGlobalIndexData)-1].append(sclique[i])

            for cam in range(len(cameras)):
                for person in cameras[cam].PersonData:
                    if person.updated==True:
                        cv2.putText(frame[cam],str(person.globalFoundOutPersonIndex) ,(int(person.positions[len(person.positions)-1][0]), int(person.positions[len(person.positions)-1][1])),0, 1e-3 * frame[index].shape[0], (0,255,0),2)

            
            for sclique in Allcliques:
                for i in range(len(sclique)):
                    cameras[localgloballink[sclique[i]-1][1]].PersonData[localgloballink[sclique[i]-1][2]].prvglobalFoundOutPersonIndex=cameras[localgloballink[sclique[i]-1][1]].PersonData[localgloballink[sclique[i]-1][2]].globalFoundOutPersonIndex
            
        else:
            for cam in range(len(cameras)):
                for person in cameras[cam].PersonData:
                    if person.updated==True:
                        cv2.putText(frame[cam],str(person.globalFoundOutPersonIndex) ,(int(person.positions[len(person.positions)-1][0]), int(person.positions[len(person.positions)-1][1])),0, 1e-3 * frame[index].shape[0], (0,255,0),2)
        out_image.fill(0)
        vindex=0;
        for row in range(rows):
            for col in range(cols):
                if(vindex==len(file_path)):
                    break
                vidshape=frame[vindex].shape
                curvidheightratio=vidshape[0]/singleHeight
                curvidwidthratio=vidshape[1]/singleWidth

                if(curvidwidthratio<curvidheightratio):
                    #height is small
                    resizedwidth=int(vidshape[1]/vidshape[0]*singleHeight)
                    nimg=cv2.resize(frame[vindex], (resizedwidth,singleHeight), interpolation = cv2.INTER_AREA)
                    widthpos=int((singleWidth-resizedwidth)/2)+col*singleWidth
                    out_image[row*singleHeight:(row+1)*singleHeight,widthpos:widthpos+resizedwidth]=nimg
                else:
                    #width is small
                    resizedheight=int(vidshape[0]/vidshape[1]*singleWidth)
                    nimg=cv2.resize(frame[vindex], (singleWidth,resizedheight), interpolation = cv2.INTER_AREA)
                    heightpos=int(((singleHeight-resizedheight)/2)+row*singleHeight)
                    out_image[heightpos:heightpos+resizedheight,col*singleWidth:(col+1)*singleWidth]=nimg
                vindex=vindex+1

        if(len(cameras)==1):
            mh = mm.metrics.create()
            summary = mh.compute(acc, metrics=['num_frames', 'mota', 'motp'], name='acc')
            print(summary)
        cv2.imshow('', out_image)

        if writeVideo_flag: # and not asyncVideo_flag:
            # save a frame
            out.write(out_image)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1./(time.time()-t1))) / 2
            print("FPS = %f"%(fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
Example #11
0
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    folder = "/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/scraped_images/google"
    out_folder = "/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/processed_images"
    filenames = os.listdir(folder)
    images = load_images_from_folder(folder)
    j = -1
    num = 0
    # writeVideo_flag = True
    # asyncVideo_flag = False
    #
    # file_path = 'video.webm'
    # if asyncVideo_flag:
    #     video_capture = VideoCaptureAsync(file_path)
    # else:
    #     video_capture = cv2.VideoCapture(file_path)
    #
    # if asyncVideo_flag:
    #     video_capture.start()
    #
    # if writeVideo_flag:
    #     if asyncVideo_flag:
    #         w = int(video_capture.cap.get(3))
    #         h = int(video_capture.cap.get(4))
    #     else:
    #         w = int(video_capture.get(3))
    #         h = int(video_capture.get(4))
    #     fourcc = cv2.VideoWriter_fourcc(*'XVID')
    #     out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
    #     frame_index = -1

    # fps = 0.0
    # fps_imutils = imutils.video.FPS().start()

    for frame in images:
        j = j + 1
        # ret, frame = video_capture.read()  # frame shape 640*480*3
        # if ret != True:
        #     break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        detections = [
            Detection_YOLO(bbox, confidence, cls)
            for bbox, confidence, cls in zip(boxes, confidence, classes)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]
        out_filename = 1
        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2) + "%"
            # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            # crop_img = frame.crop([int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])])
            w = int(bbox[2])
            h = int(bbox[3])
            y = int(int(bbox[1]))
            x = int(int(bbox[0]))
            crop_img = frame[y:h, x:w]
            print(str(out_filename) + filenames[j])
            cv2.imwrite(out_folder + "/" + str(out_filename) + filenames[j],
                        crop_img)
            num = num + 1
            out_filename = out_filename + 1
            # if len(classes) > 0:
            #     cls = det.cls
            #     cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0,
            #                 1e-3 * frame.shape[0], (0, 255, 0), 1)

            #crop_img = frame[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]

            #res_txt = demo_par(model_par, valid_transform, crop_img)

            #draw.rectangle(xy=person_bbox[:-1], outline='red', width=1)

            # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            # cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0,
            #             1e-3 * frame.shape[0], (0, 255, 0), 1)
            # font = ImageFont.truetype(
            #     '/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/arial.ttf',
            #     size=10)

            # positive_cnt = 1
            # for txt in res_txt:
            #     if 'personal' in txt:
            #         #draw.text((x1, y1 + 20 * positive_cnt), txt, (255, 0, 0), font=font)
            #         cv2.putText(frame, txt, (int(bbox[0]), int(bbox[1]) + 20 * positive_cnt), 0,
            #                     1e-3 * frame.shape[0], (0, 255, 0), 1)
            #         positive_cnt += 1

        # cv2.imshow('', frame)

    cv2.destroyAllWindows()