예제 #1
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network(args.model,args.device)
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model()

    ### TODO: Handle the input stream ###
    if args.webcam != None:
        vc = cv2.VideoCapture(args.webcam)
    else:
        # handle the video or image with -i resources/image_0100.jpeg or .mp4
        vc = cv2.VideoCapture(args.input)

    if not vc.isOpened():
        logging.error(f"Error opening input file (video or image {args.input})")
        exit(1)
    
    does_got_frame,frame = vc.read()

    # last_count = 0
    
    # predict_time_count = 0

    person_in_frame = False
    real_count = 0
    total_count = 0
    input_shape = infer_network.get_input_shape()
    
    while does_got_frame:
        image = preprocess_image(frame,input_shape[3],input_shape[2])

        infer_request_handle = infer_network.async_exec_net(image)
        detections = infer_network.wait(infer_request_handle)
        detections = infer_network.get_output(detections)
        current_count = detections['num_detections']
        # predict_time_count += 1

        # if current_count > last_count and last_count == 0:
        #     start_time = vc.get(cv2.CAP_PROP_POS_MSEC)
        #     total_count = total_count + current_count - last_count
        
        # if current_count < last_count and current_count == 0 and predict_time_count >= 3:
        #     # Person duration in the video is calculated
        #     duration = int((vc.get(cv2.CAP_PROP_POS_MSEC) - start_time) / 1000.0)
        #     # Publish messages to the MQTT server
        #     client.publish("person/duration",
        #                    json.dumps({"duration": duration}))
        
        # when person exit frame
        if current_count == 0:
            if person_in_frame:
                miss_count += 1
                if miss_count > 20:
                    real_count -= 1
                    miss_count = 0
                    duration = int(time.time() - start_time)
                    client.publish("person/duration",json.dumps({"duration": duration}))
                    person_in_frame = False 
        else:
            miss_count = 0
            if real_count == 0:
                real_count += 1
                total_count += 1
                start_time = time.time()
                person_in_frame = True 
                client.publish("person", json.dumps({"total": total_count}))


        # if predict_time_count >= 5:
        #     last_count = current_count
        #     predict_time_count = 0

        client.publish("person", json.dumps({"count": real_count}))

        ### Draw bounding boxes to provide intuition ###
        img = draw_bboxes(frame, detections)
        cv2.putText(img,
            f'current: {real_count} total: {total_count}',
            (0, 100),
            cv2.FONT_HERSHEY_SIMPLEX,
            .5,
            (255,255,255),
            2,
            cv2.LINE_AA)
        sys.stdout.buffer.write(img)
        sys.stdout.flush()

        ### Write an output image if `single_image_mode` ###
        if vc.get(cv2.CAP_PROP_FRAME_COUNT) == 1.0:
            cv2.imwrite('detected.png', img)

        ### Read from the video capture ###
        does_got_frame, frame = vc.read()
    vc.release()
예제 #2
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold
    model_path = args.model
    input = args.input
    device = args.device

    ### TODO: Load the model through `infer_network` ###
    input_dict = infer_network.load_model(model_path, device)

    ### TODO: Handle the input stream ###
    # camera
    input_type = None
    if input == "cam":
        input_type = "cam"
    elif isinstance(input, str) and Path(input).is_file():
        mimetype = mimetypes.guess_type(input)[0]
        if mimetype:
            mimetype = mimetype.split('/')[0]
            if mimetype == 'video':
                input_type = "video"
            elif mimetype == "image":
                input_type = "image"

    if input_type not in ["cam", "video", "image"]:
        raise Exception("Invalid input parameter")

    # handler different inputs
    ### TODO: Handle the input stream ###
    cap = cv2.VideoCapture(input)
    cap.open(input)
    w = int(cap.get(3))
    h = int(cap.get(4))
    request_id = 0

    # counters
    people_total_counter = 0
    people_last_counter = 0
    frames_counter = 0
    # this value can be adjusted according to the model accuracy
    frames_interval_baseline = 50
    start_time = 0
    # labels map
    labels_map = COCO_LABELS
    ### TODO: Loop until stream is over ###
    while cap.isOpened():
        ### TODO: Read from the video capture ###
        flag, frame = cap.read()
        if not flag:
            break
        ### TODO: Pre-process the image as needed ###
        net_input = infer_network.process_input(frame)
        net_input_dict = {'image_tensor': net_input}
        if "image_info" in input_dict:
            net_input_dict['image_info'] = net_input.shape[1:]
        ### TODO: Start asynchronous inference for specified request ###
        infer_network.async_exec_net(net_input_dict, request_id)
        ### TODO: Wait for the result ###
        if infer_network.wait() == 0:
            ### TODO: Get the results of the inference request ###
            output_dict = infer_network.get_output()
            ### TODO: Extract any desired stats from the results ###
            predictions = infer_network.process_output(output_dict,
                                                       target_size=(h, w),
                                                       boxes_threshold=0.3)
            people_curr_count = 0
            for box in predictions:
                label_id = box.label_id
                label = labels_map[label_id]
                # counting the number of people in the frame
                if label == "person":
                    people_curr_count += 1
                    box.draw(frame)
            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            client.publish("person", json.dumps({"count": people_curr_count}))
            frames_counter += 1
            # detect changes between n(frames_interval_baseline) frames
            if frames_counter == frames_interval_baseline:
                # if was a change between on frame to the other, in this case
                # if a new person enters to the scene
                if people_curr_count > people_last_counter:
                    start_time = time.time()
                    # increase the total people counter and send the new value to the gui
                    people_total_counter = people_total_counter + people_curr_count - people_last_counter
                    client.publish("person",
                                   json.dumps({"total": people_total_counter}))
                # Person duration in the video is calculated
                if people_curr_count < people_last_counter:
                    time_delta = int(time.time() - start_time)
                    client.publish("person/duration",
                                   json.dumps({"duration": time_delta}))

                # update the counter
                people_last_counter = people_curr_count
            frames_counter = frames_counter % frames_interval_baseline  # reset the frame counter
        ### TODO: Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()
        cv2.imshow("Frame", frame)
        ### TODO: Write an output image if `single_image_mode` ###
        key = cv2.waitKey(1) & 0xFF
        if key == ord("q"):
            break
        if input_type == "image":
            cv2.imwrite("out.jpg", frame)
    cap.release()
    cv2.destroyAllWindows()
예제 #3
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network(args.model[:-4], args.device)
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### Load the model through `infer_network` ###
    infer_network.load_model()

    ### Handle the input stream ###
    vc = cv2.VideoCapture(args.input)
    if not vc.isOpened():
        logging.error(
            f"Error opening input file (video or image {args.input})")
        exit(1)

    ### Read from the video capture ###
    got_frame, frame = vc.read()

    ### Initialize for stats calculation ###
    #last_detections = None
    last_count = 0
    total_count = 0

    ### Loop until stream is over ###
    while got_frame:

        ### Pre-process the image as needed ###
        image, normalization_consts = preprocess_image(
            frame, width=640, height=640, preserve_aspect_ratio=True)
        batch = image[np.newaxis, :, :, :]

        ### Start asynchronous inference for specified request ###
        infer_request_handle = infer_network.async_exec_net(batch)

        ### Wait for the result ###
        detections_arr = infer_network.async_wait(infer_request_handle)

        ### Get the results of the inference request ###
        detections = infer_network.get_output(
            detections_arr,
            threshold=prob_threshold,
            whitelist_filter=[1],
            normalization_consts=normalization_consts)

        ### Extract any desired stats from the results ###
        ### Calculate and send relevant information on ###
        #TODO improve, use bbox to identify if it is the same person and support multiple people, currently should work for assignment
        current_count = detections['num_detections']
        if current_count > last_count and last_count == 0:
            start_time = vc.get(cv2.CAP_PROP_POS_MSEC)
            total_count = total_count + current_count - last_count
        if current_count < last_count and current_count == 0:
            # Person duration in the video is calculated
            duration = int(
                (vc.get(cv2.CAP_PROP_POS_MSEC) - start_time) / 1000.0)
            # Publish messages to the MQTT server
            client.publish("person/duration",
                           json.dumps({"duration": duration}))

        last_count = current_count

        ### current_count, total_count and duration to the MQTT server ###
        ### Topic "person": keys of "count" and "total" ###
        client.publish(
            "person", json.dumps({
                "count": current_count,
                "total": total_count
            }))

        ### Draw bounding boxes to provide intuition ###
        img = draw_bboxes(frame, detections)
        cv2.putText(img, f'current: {current_count} total: {total_count}',
                    (0, 100), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 2,
                    cv2.LINE_AA)

        ## Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(img)
        sys.stdout.flush()

        ### Write an output image if `single_image_mode` ###
        if vc.get(cv2.CAP_PROP_FRAME_COUNT) == 1.0:
            cv2.imwrite('ov_od.png', img)

        ### Read from the video capture ###
        got_frame, frame = vc.read()
    vc.release()