예제 #1
0
def infer_on_stream(args, client, stats):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    buffer = Buffer()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold
    ### Load the model through `infer_network` ###
    infer_network.load_model(args.model, args.device, args.cpu_extension)
    net_input_shape = infer_network.get_input_shape()
    ##net_input_shape = [1, 3, 600, 600]
    net_output_name = infer_network.get_output_name()
    net_input_name = infer_network.get_input_blob_name()
    net_input_shape = infer_network.get_input_shape()
    net_output_info = infer_network.get_output_info()
    log.info("network output name")
    log.info(net_output_name)
    log.info("network output info")
    log.info(net_output_info.shape)
    log.info("network input shape")
    log.info(net_input_name)
    log.info(net_input_shape)

    ### Handle the input stream ###
    iflag = False
    input_stream_arg = 0 if args.input == "cam" else args.input
    if input_stream_arg.endswith('.jpg') or input_stream_arg.endswith('.bmp'):
        iflag = True

    width = 0
    height = 0
    frame = None
    cap = None
    captureOpen = False
    ## Handle image or stream or CAM
    if iflag:
        frame = cv2.imread(input_stream_arg)
        log.info("single frame shape: %s", frame.shape)
        width = frame.shape[1]
        height = frame.shape[0]
    else:
        log.info("attempting VideoCapture for: %s", input_stream_arg)
        cap = cv2.VideoCapture(input_stream_arg)
        cap.open(args.input)
        captureOpen = True
        width = int(cap.get(3))
        height = int(cap.get(4))

    log.info("input image width: %s, height: %s", width, height)
    #steam input shape:
    input_width = 0
    input_height = 0
    total_person_count = 0
    duration = 0

    cur_request_id = 0
    next_request_id = 1
    render_time = 0
    parsing_time = 0
    waitingOnInference = False
    ### Loop until stream is over ###
    while (captureOpen or iflag or waitingOnInference):
        ### Read from the video capture ###
        flag = True
        key_pressed = None
        if not iflag:
            flag, frame = cap.read()
            if not cap.isOpened():
                captureOpen = False
            key_pressed = cv2.waitKey(60)
        if not flag:
            break
        ### Pre-process the image as needed ###
        input_width = net_input_shape[2]
        input_height = net_input_shape[3]
        p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)

        ### Start asynchronous inference for specified request ###
        start_time = time()
        infer_network.exec_net(p_frame)
        waitingOnInference = True
        render_time = 0
        inf_time = 0

        ### Wait for the result ###
        if infer_network.wait() == 0:
            ### Get the results of the inference request ###
            result = infer_network.get_output()
            inf_time = time() - start_time
            ###restart clock to capture evaluate/draw time
            start_time = time()
            boxes = post_process(result, width, height, PERSON_CLASS)
            ##if len(boxes) > 1:
            ##log.info("initial boxes: %s", boxes)
            boxes = list(boxes.values())
            boxes = nms(boxes)
            buffer_avg = 0

            if (iflag):
                boxes = filter_confidence(boxes, args.prob_threshold)

            if len(boxes) > 0:
                ##we have a person in frame (maybe)
                first_prop = boxes[0]
                confidence = first_prop[4]
                buffer.add(confidence)
                buffer_avg = buffer.average()
                if confidence > args.prob_threshold:
                    if duration > 0:
                        ##this is not the first time they have been in the frame
                        ##increase duration and move along
                        duration = duration + 1
                    else:
                        ##very first time this person has entered the frame
                        ##pulse out new count
                        total_person_count = total_person_count + 1
                        duration = duration + 1
                    client.publish(
                        "person",
                        json.dumps({
                            "count": 1,
                            "total": total_person_count
                        }))
                    draw_box(frame, boxes, inf_time)
                else:
                    ##we have a person in frame, but they don't meet confidence threshold
                    if duration > 0:
                        ##we know we were tracking someone last frame
                        ##so check our rolling buffer average
                        if buffer_avg > BUFFER_AVERAGE_CUTOFF:
                            ##same person, keep counting, move along
                            duration = duration + 1
                            client.publish(
                                "person",
                                json.dumps({
                                    "count": 1,
                                    "total": total_person_count
                                }))
                            draw_box(frame, boxes, inf_time)
                        else:
                            ##log.info("NO-DRAW: c:%s, b:%s, d:%s : else:if:else", confidence, buffer_avg, duration)
                            ##no longer meet confidence or buffer avg
                            client.publish(
                                "person",
                                json.dumps({
                                    "count": 0,
                                    "total": total_person_count
                                }))
                            client.publish("person/duration",
                                           json.dumps({"duration": duration}))
                            duration = 0
                            buffer.flush()
                    else:
                        ##log.info("NO-DRAW: c:%s, b:%s, d:%s : else:else", confidence, buffer_avg, duration)
                        ##also nobody in the last frame (duration == 0)
                        client.publish(
                            "person",
                            json.dumps({
                                "count": 0,
                                "total": total_person_count
                            }))
            else:
                ##no boxes with our target class was found, make sure we didn't see one in the last frame (or so)
                buffer.add(0)
                buffer_avg = buffer.average()
                if buffer_avg > BUFFER_AVERAGE_CUTOFF:
                    ##we has someone previously, keep counting, move along
                    duration = duration + 1
                else:
                    ##nobody previously, nobody now, make sure we say so
                    client.publish(
                        "person",
                        json.dumps({
                            "count": 0,
                            "total": total_person_count
                        }))
                    if duration > 0:
                        ##we were previously tracking someone, pulse out duration before zeroing out
                        client.publish("person/duration",
                                       json.dumps({"duration": duration}))
                        duration = 0

            render_time = time() - start_time
            render_time_message = "OpenCV rendering time: {:.3f} ms".format(
                render_time * 1e3)
            cv2.putText(frame, render_time_message, (15, 45),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
            stats.append(dict(it=inf_time, rt=render_time))
            sys.stdout.buffer.write(frame)
            sys.stdout.flush()
        if key_pressed == 27:
            break
        if iflag and not waitingOnInference:
            iflag = False
        if infer_network.wait() == 0:
            iflag = False
            waitingOnInference = False
    if cap:
        cap.release()
        cv2.destroyAllWindows()
    client.disconnect()