def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """

    input_type = args.input
    single_image_mode = False
    request_id = 0
    time_count = 0
    pre_time = 0
    counter = 0
    last_count = 0
    current_count = 0
    total_count = 0
    duration = 0
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    global prob_threshold
    prob_threshold = args.prob_threshold

    ### Load the model through `infer_network` ###
    n, c, h, w = infer_network.load_model(
        args.model, request_id, args.device, args.cpu_extension)[1]

    ### Handle the input stream ###
    if input_type == 'CAM':
        input_stream = 0

    # Check for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = input_type

    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    # Get and open video capture
    cap = cv2.VideoCapture(input_stream)
    cap.open(input_stream)

    # Grab the shape of the input
    global width, height
    width = int(cap.get(3))
    height = int(cap.get(4))

    ### Loop until stream is over ###
    while cap.isOpened():
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        ### Read from the video capture ###
        image = cv2.resize(frame, (w, h))

        ### Pre-process the image as needed ###
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))

        ### Start asynchronous inference for specified request ###
        inf_start = time.time()

        infer_network.exec_net(image, request_id)

        ### Wait for the result ###
        if infer_network.wait(request_id) == 0:
            det_time = time.time() - inf_start

            ### Get the results of the inference request ###
            result = infer_network.get_output(request_id)
            inference_time_message = "Inference time: {:.3f}ms".format(det_time*1000)
            font = cv2.FONT_HERSHEY_SIMPLEX
            color = (174, 32, 141)
            cv2.putText(frame, inference_time_message,(20, 20), font, 0.6, color, 1)

            ### Extract any desired stats from the results ###
            frame, detected = draw_boxes(frame, result)

            ### Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            if detected != counter:
                last_count = counter
                counter = detected
                if time_count >= 3:
                    pre_time = time_count
                    time_count = 0
                else:
                    time_count = pre_time + time_count
                    pre_time = 0
            else:
                time_count += 1
                if time_count >= 10:
                    current_count = counter
                    if time_count == 20 and current_count > last_count:
                        total_count += current_count - last_count
                        client.publish("person", json.dumps({"total_counts": total_count}))
                    elif time_count == 20 and current_count < last_count:
                        duration = int(pre_time)
                        client.publish('person/duration', json.dumps({'duration': duration}))
                
            client.publish("person", json.dumps({"count": current_count}))


            # Break if escape key pressed
            if key_pressed == 27:
               break

        ### Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        ### Write an output image if `single_image_mode` ###
        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)

    # Release the capture and destroy any OpenCV windows
    cap.release()
    cv2.destroyAllWindows()
    # Disconnect from MQTT
    client.disconnect()
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(args.model, args.device, args.cpu_extension, \
        args.debug)
    net_input_shape = infer_network.get_input_shape()
    if args.debug:
        print("Input shape of the model: " + str(net_input_shape))
    ### TODO: Handle the input stream ###
    cap = cv2.VideoCapture(args.input)
    cap.open(args.input)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frames = 0
    found = False
    total = 0
    ### TODO: Loop until stream is over ###
    if args.debug:
        print("Input size: "+str((height, width)))
    while cap.isOpened():
        ### TODO: Read from the video capture ###
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        ### TODO: Pre-process the image as needed ###
        if frame_count == -1:
            frame = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420)
        p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose((2,0,1))
        p_frame = p_frame.reshape(1, *p_frame.shape)

        ### TODO: Start asynchronous inference for specified request ###
        infer_network.exec_net(p_frame)

        ### TODO: Wait for the result ###
        if infer_network.wait() == 0:

            ### TODO: Get the results of the inference request ###
            result = infer_network.get_output()

            ### TODO: Extract any desired stats from the results ###
            frame, count = draw_boxes(frame, result, args, width, height)

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            if not found and count > 0:
                total = total + count
                found = True
            if found and count > 0:
                frames = frames + 1
            if found and count == 0:
                found = False
                client.publish("person/duration", 
                json.dumps({"duration": int(frames/fps)}))
                frames = 0
            client.publish("person", 
            json.dumps({"count": count, "total": total}))
            
        ### TODO: Send the frame to the FFMPEG server ###
        if not args.debug and (frame_count > 0 or frame_count == -1):
            sys.stdout.buffer.write(frame)  
            sys.stdout.flush()
        ### TODO: Write an output image if `single_image_mode` ###
        else:
            cv2.imwrite("output.jpg", frame)
            print("Image saved to output.jpg")
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
Ejemplo n.º 3
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    network = Network()
    # Set Probability threshold for detections
    if not args.prob_threshold is None:
        prob_threshold = args.prob_threshold
    else:
        prob_threshold = 0.3

    ### TODO: Load the model through `infer_network` ###
    network.load_model(args.model, args.cpu_extension, args.device)
    pedestrian_input_shape = network.get_input_shape()

    networkReIdentification = Network()
    networkReIdentification.load_model(args.model2, args.cpu_extension,
                                       args.device)
    identification_input_shape = networkReIdentification.get_input_shape()
    # print('Models Loaded Successfully')

    #checking that input stream is are in supported extensions
    if not args.input.split('.')[1] in EXTENSIONS:
        print("The input file is not supported yet")
        exit(1)

    ### TODO: Handle the input stream ###
    cap = cv2.VideoCapture(args.input)
    fps = FPS().start()
    ### TODO: Loop until stream is over ###

    last_detection_time = None
    start = None

    total_unique_persons = []
    while (cap.isOpened()):
        ### TODO: Read from the video capture ###
        isAnyFrameLeft, frame = cap.read()
        width = int(cap.get(3))
        height = int(cap.get(4))
        ### TODO: Pre-process the image as needed ###
        if not isAnyFrameLeft:
            sys.stdout.flush()
            break
        displayFrame = frame.copy()

        processed_frame = pre_process(frame,
                                      net_input_shape=pedestrian_input_shape)
        ### TODO: Start asynchronous inference for specified request ###
        inference_start_time = time.time()
        network.exec_net(processed_frame)
        ### TODO: Wait for the result ###
        last_x_min = 0
        last_x_max = 0
        last_y_max = 0
        last_y_min = 0

        if network.wait() == 0:
            inference_end_time = time.time()
            total_inference_time = inference_end_time - inference_start_time
            cv2.putText(displayFrame,
                        "Inference time: " +
                        str(round(total_inference_time * 1000, 3)) + "ms",
                        (5, 15),
                        cv2.FONT_HERSHEY_PLAIN,
                        0.9, (230, 50, 2),
                        lineType=cv2.LINE_8,
                        thickness=1)

            # print("Inference Time "+ total_inference_time)
            ### TODO: Get the results of the inference request ###
            result = network.get_all_output()

            ### TODO: Extract any desired stats from the results ###
            output = result['DetectionOutput']
            counter = 0

            for detection in output[0][0]:
                image_id, label, conf, x_min, y_min, x_max, y_max = detection

                if conf > prob_threshold:
                    # print("label " + str(label) + "imageid"+ str(image_id))
                    x_min = int(x_min * width)
                    x_max = int(x_max * width)
                    y_min = int(y_min * height)
                    y_max = int(y_max * height)

                    try:
                        if conf > 0.85:
                            crop_person = frame[y_min:y_max, x_min:x_max]
                            total_unique_persons = reidentification(
                                networkReIdentification, crop_person,
                                identification_input_shape,
                                total_unique_persons, conf)

                    except Exception as err:
                        # print(err)
                        pass
                    # print(err)

                    x_min_diff = last_x_min - x_min
                    x_max_diff = last_x_max - x_max

                    if x_min_diff > 0 and x_max_diff > 0:  # ignore multiple drawn bounding boxes
                        # cv2.waitKey(0)
                        continue

                    y_min_diff = abs(last_y_min) - abs(y_min)

                    counter = counter + 1

                    last_x_min = x_min
                    last_x_max = x_max
                    last_y_max = y_max
                    last_y_min = y_min

                    cv2.rectangle(displayFrame, (x_min, y_min), (x_max, y_max),
                                  (0, 255, 0), 2)

                    activity = ""
                    # print("Y  => " + str(y_min_diff) + " " + str(y_max_diff))
                    if (y_min_diff >= -20):
                        activity = "standing"
                    elif y_min_diff < -21 and y_min_diff > -41:
                        activity = "moving"
                    else:
                        activity = "walking"

                    cv2.putText(displayFrame,
                                activity, (x_max + 10, y_min + 50),
                                cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                1, (230, 50, 2),
                                lineType=cv2.LINE_8,
                                thickness=1)

                    last_detection_time = datetime.now()
                    # print(total_detected)
                    if start is None:
                        start = time.time()
                        time.clock()

                # cv2.putText(displayFrame, "Totol Unique Persons: "+str(len(total_unique_persons)),(50,150),
                #             cv2.FONT_HERSHEY_COMPLEX, 1, (100, 150, 250),
                #             lineType=cv2.LINE_4, thickness=2)

                # if start is not None and counter == 0:
                #     elapsed = time.time() - start
                #     client.publish("person/duration", json.dumps({"duration": elapsed}))
                #     start = None

                if last_detection_time is not None:
                    # if last_detection_time.minute
                    second_diff = (datetime.now() -
                                   last_detection_time).total_seconds()
                    # print(second_diff)
                    if second_diff >= 1.5:
                        if start is not None:
                            elapsed = time.time() - start
                            client.publish(
                                "person/duration",
                                json.dumps({"duration":
                                            elapsed - second_diff}))
                            # start = None
                            last_detection_time = None
                            start = None

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            client.publish(
                "person",
                json.dumps({
                    "count": str(counter),
                    "total": len(total_unique_persons)
                }))
            ### Topic "person/duration": key of "duration" ###

        sys.stdout.buffer.write(displayFrame)
        #
        # imshow("frame", displayFrame)

        ### TODO: Send the frame to the FFMPEG server ###

        ### TODO: Write an output image if `single_image_mode` ###

        if cv2.waitKey(1) & 0xFF == ord('q'):
            sys.stdout.flush()
            break
Ejemplo n.º 4
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    #ading requring variable
    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ###
    #arguments like args.m, .d,.cpu_extension, in bulid_argparsar function
    infer_network.load_model(args.model, args.device, args.cpu_extension)
    net_input_shape = infer_network.get_input_shape()
    ### TODO: Handle the input stream ###

    ### TODO: Loop until stream is over ###
    stream_input, mode = capture(args.input)
    single_image_mode = mode
    cap = cv2.VideoCapture(stream_input)
    cap.open(args.input)
    #width and height is import parameters in pretrained model
    global width, height
    width = int(cap.get(3))
    height = int(cap.get(4))
    ### TODO: Loop until stream is over ###
    while cap.isOpened():
        # Read the next frame
        #in this captured input stay in frame
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        ### TODO: Read from the video capture ###

        ### TODO: Pre-process the image as needed ###
        p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)

        ### TODO: Start asynchronous inference for specified request ###
        # https://github.com/intel-iot-devkit/people-counter-python/blob/master/main.py taken refernce for this section
        inf_start = time.time()
        infer_network.exec_net(p_frame)
        ### TODO: Wait for the result ###
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            # Results of the output layer of the network
            result = infer_network.get_output(cur_request_id)
            if args.perf_counts:
                perf_count = infer_network.performance_counter(cur_request_id)
                performance_counts(perf_count)


### TODO: Get the results of the inference request ###
            result, count = draw_boxes(p_frame, result)
            #Display inference time
            inf_time_message = "Inference time: {:.3f}ms"\
                               .format(det_time * 1000)
            cv2.putText(result, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.45, (200, 10, 10), 1)
            #time = time.time()

            ### TODO: Extract any desired stats from the results ###
            #client.publish("time",json.dumps({"Time":time}))
            if count > last_count:
                start_time = time.time()
                total_count = total_count + count - last_count
                client.publish("person", json.dumps({"total": total_count}))
            if count < last_count:
                duration = int(time.time() - start_time)
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))
            client.publish("person", json.dumps({"count": count}))
            last_count = count

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
    # output = np.ascontiguousarray(output, dtype=np.float32)

    ### TODO: Send the frame to the FFMPEG server ###
    #output = cv2.resize(output,(net_input_shape[3],net_input_shape[2]))
    # frame = np.dstack((result,result,result))
    #frame = np.uint8(result)
        sys.stdout.buffer.write(result)
        sys.stdout.flush()
        #print(output)

        ### TODO: Write an output image if `single_image_mode` ###
        if single_image_mode:
            cv2.imwrite('output_image.jpg', result)
Ejemplo n.º 5
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    single_image_mode = False

    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0

    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    infer_network = Network()
    ### TODO: Load the model through `infer_network` ###
    prob_threshold = args.prob_threshold

    infer_network = Network()

    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          cur_request_id,
                                          args.cpu_extension)[1]

    if args.input == "CAM":
        input_stream = 0

    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input

    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"
    ### TODO: Handle the input stream ###

    cap = cv2.VideoCapture(input_stream)
    ### TODO: Loop until stream is over ###

    if input_stream:
        ### TODO: Read from the video capture ###
        cap.open(args.input)

    if not cap.isOpened():
        log.error("ERR Unable to open the source")

    global initial_w, initial_h, prob_threshold
    initial_w = cap.get(3)
    initial_h = cap.get(4)

    ### TODO: Pre-process the image as needed ###
    while cap.isOpened():

        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        image = cv2.resize(frame, (w, h))

        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))

        inf_start = time.time()

        ### TODO: Start asynchronous inference for specified request ###
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start

            result = infer_network.get_output(cur_request_id)

            perf_count = infer_network.performance_counter(cur_request_id)
            ### TODO: Wait for the result ###

            frame, current_count = ssd_out(frame, result)

            inf_time_message = "Inference Time: {:.3f}ms"\
                                .format(det_time * 1000)

            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            if current_count < last_count:
                duration = int(time.time() - start_time)

                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.
    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # check if we provided a TF model or an IR
    is_tensorflow = os.path.splitext(args.model)[1] == '.pb'

    # Initialise the class
    if is_tensorflow:
        from inference_tf import NetworkTf
        infer_network = NetworkTf()
    else:
        from inference import Network
        infer_network = Network()

    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### Load the model through `infer_network` ###
    infer_network.load_model(args.model, args.device, args.cpu_extension)

    if not is_tensorflow:
        net_input_shape = infer_network.get_input_shape()

    ### Handle the input ###
    is_single_image_mode = os.path.splitext(args.input)[1] in ['.jpg', '.png']

    if not is_single_image_mode:
        cap = cv2.VideoCapture(args.input)
        cap.open(args.input)
        # Grab the shape and FPS rate of the input
        width = int(cap.get(3))
        height = int(cap.get(4))
        fps = cap.get(cv2.CAP_PROP_FPS)

    # init the total of detected persons
    total = 0

    # init the number of frames
    nb_frames = 0

    # init the total inference time, to be divided by the number of frames at the end
    total_inference_time = 0

    # an array to keep track of previously detected persons
    previously_detected_persons = []

    # the average duration of a single person presence
    duration = 0

    # fixme  : for debugging
    max_percent = [0]

    ### Loop until stream is over ###
    while is_single_image_mode or cap.isOpened():

        if is_single_image_mode:
            print("Single image mode. Analyze ", args.input)
            frame = cv2.imread(args.input)
            height = frame.shape[0]
            width = frame.shape[1]
        else:
            ### Read from the video capture ###
            flag, frame = cap.read()
            nb_frames = nb_frames + 1
            if not flag:
                break
            key_pressed = cv2.waitKey(60)

        if is_tensorflow:
            p_frame = frame
        else:
            p_frame = preprocess_image(
                frame, (net_input_shape[3], net_input_shape[2]))

        ### Start asynchronous inference for specified request ###
        inference_start = time.time()
        infer_network.exec_net(p_frame)

        ### Wait for the result ###
        if infer_network.wait() == 0:

            # record the inference time
            total_inference_time = total_inference_time + (time.time() -
                                                           inference_start)

            ### Get the results of the inference request ###
            result = infer_network.get_output()

            ### Create output frame
            out_frame, detected_persons = create_output_image(
                frame, result, width, height, (0, 0, 255),
                float(args.prob_threshold), nb_frames)

            if not is_single_image_mode:
                # if there's detected persons in the frame
                count = 0
                if len(detected_persons) > 0:
                    # for each new detection
                    for person in detected_persons:
                        # check if there was a person with a matching bounding box
                        is_new_person = True
                        for index, previous_person in enumerate(
                                previously_detected_persons):
                            # if same person, updating to last coords
                            if is_same_person(person, previous_person,
                                              max_percent):
                                # keep the timestamp of the first detection
                                person[4] = previous_person[4]
                                previously_detected_persons[index] = person
                                is_new_person = False
                                break
                        if is_new_person:
                            total = total + 1
                            publish_last_duration(previously_detected_persons,
                                                  client, fps)
                            previously_detected_persons.append(person)

                #print('previously_detected_persons=',previously_detected_persons)
                #print('max_percent=',max_percent)

                ### Extract any desired stats from the results ###

                ### Calculate and send relevant information on ###
                ### current_count, total_count and duration to the MQTT server ###
                ### Topic "person": keys of "count" and "total" ###
                ### Topic "person/duration": key of "duration" ###
                duration = get_avg_duration(previously_detected_persons, fps)
                #print('count:', len(detected_persons), " total:", len(previously_detected_persons), " person/duration:", duration)
                client.publish(
                    "person",
                    json.JSONEncoder().encode({
                        "count":
                        len(detected_persons),
                        "total":
                        len(previously_detected_persons)
                    }))

        ### Write an output image if is in single_image_mode ###
        if is_single_image_mode:
            print("Write output file in 'single_image.png'")
            cv2.imwrite('single_image.png', out_frame)
        else:
            ### Send the frame to the FFMPEG server ###
            sys.stdout.buffer.write(out_frame)
            sys.stdout.flush()

        # Break if single_image_mode or escape key pressed
        if is_single_image_mode or key_pressed == 27:
            break

    # publish duration for the last detected person
    if not is_single_image_mode:
        publish_last_duration(previously_detected_persons, client, fps)

    # Release the capture and destroy any OpenCV windows
    if not is_single_image_mode:
        cap.release()
        cv2.destroyAllWindows()
Ejemplo n.º 7
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.
    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold
    
    if args.input=="CAM":
        camera = cv2.VideoCapture(0)
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        infer_network.load_model(args.model, 1, args.device, args.cpu_extension)
        input_shape = infer_network.get_input_shape()
        img = cv2.imread(args.input, cv2.IMREAD_COLOR)
        resized_frame = cv2.resize(img, (input_shape[3], input_shape[2]))
        frame_preproc = np.transpose(np.expand_dims(resized_frame.copy(), axis=0), (0,3,1,2))
        infer_network.exec_net(frame_preproc)
        if infer_network.wait()==0:
            outputs = infer_network.get_output()
            box_frame, count, bbox = get_bounding_box(img, outputs, prob_threshold)
            cv2.putText(box_frame, "Count:"+str(count), (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
            cv2.imwrite('output.jpg', box_frame)
        return
    else:
        if not os.path.isfile(args.input):
            exit(1)
        camera = cv2.VideoCapture(args.input)
    if (camera.isOpened()== False): 
        exit(1)
    cur_req_id=0
    next_req_id=1
    num_requests=2
    infer_network.load_model(args.model, num_requests, args.device, args.cpu_extension)
    input_shape = infer_network.get_input_shape()
    ret, frame = camera.read()
    total_count=0
    pres_count = 0
    prev_count=0
    start_time=0 
    no_bbox=0
    duration=0
    prev_bbox_x = 0

    while camera.isOpened():
        ret, next_frame = camera.read()
        if not ret:
            break
        key = cv2.waitKey(60)
        resized_frame = cv2.resize(next_frame.copy(), (input_shape[3], input_shape[2]))
        frame_preproc = np.transpose(np.expand_dims(resized_frame.copy(), axis=0), (0,3,1,2))
        infer_network.exec_net(frame_preproc.copy(), req_id=next_req_id)
        if infer_network.wait(cur_req_id)==0:
            outputs = infer_network.get_output(cur_req_id)
            frame, pres_count, bbox = get_bounding_box(frame.copy(), outputs[0], prob_threshold)
            box_w = frame.shape[1]
            topleft, bottomright = bbox
        
            if pres_count>prev_count:
                start_time = time.time()
                total_count+=pres_count-prev_count
                no_bbox=0
                client.publish("person", json.dumps({"total":total_count}))
            elif pres_count<prev_count:
                if no_bbox<=20:
                    pres_count=prev_count
                    no_bbox+=1
                elif prev_bbox_x<box_w-200:
                    pres_count=prev_count
                    no_bbox=0
                else:
                    duration = int(time.time()-start_time)
                    client.publish("person/duration", json.dumps({"duration":duration}))
            if not (topleft==None and bottomright==None):
                prev_bbox_x=int((topleft[0]+bottomright[0])/2)
            prev_count=pres_count
                    
            client.publish("person", json.dumps({"count":pres_count}))
            if key==27:
                break

        sys.stdout.buffer.write(frame)
        sys.stdout.flush()
        cur_req_id, next_req_id = next_req_id, cur_req_id
        frame = next_frame

    #output_video.release()
    camera.release()
    cv2.destroyAllWindows()
    client.disconnect()
Ejemplo n.º 8
0
def infer_on_stream(args,client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold=args.prob_threshold
    # Set request id
    req_id=0

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(args.model, args.device, args.cpu_extension)
    net_input_shape = infer_network.get_input_shape()
    
    ### TODO: Handle the input stream ###
    input=args.input
    is_one_image=False
    #check for live camera feed
    if input.lower()=='cam':
        input=0   
    image_formats=[".png",".jpg",".bmp",".jpeg"]
    for i in range(len(image_formats)):
        if input==args.input:
            if input.endswith(image_formats[i]):
                is_one_image=True
                break
        else:
            input=0
    # Get and open video capture
    capture = cv2.VideoCapture(input)
    capture.open(input)
    if not capture.isOpened():
        print("ERROR! Unable to open input source")
        exit(1)

    # Grab the shape of the input 
    width = int(capture.get(3))
    height = int(capture.get(4))
    
    # Set global variables for people counting
    current_count = 0
    time_start = 0
    duration = 0
    #previous_duration=0
    total_count = 0
    total_count4text = 0
    previous_count = 0
    omitted_results = 0
    ### TODO: Loop until stream is over ###
    while capture.isOpened():
        ### TODO: Read from the video capture ###
        flag, frame = capture.read()[:]
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        ### TODO: Pre-process the image as needed ###
        _width=net_input_shape[3]
        _height=net_input_shape[2]
        p_frame = cv2.resize(frame, (_width, _height))
        p_frame = p_frame.transpose((2,0,1))
        p_frame = p_frame.reshape(net_input_shape[0], net_input_shape[1],_height,_width)
        inference_start=time.time()
        ### TODO: Start asynchronous inference for specified request ###
        infer_network.exec_net(req_id,p_frame)
        ### TODO: Wait for the result ###
        if infer_network.wait(req_id) == 0:
            inference_time=(time.time()-inference_start)*1000
            inference_time=round(inference_time,2)
            if is_one_image==False:
                ### write some info onto frame 
                # Uncomment the following codes to see the stats in video output as well
                '''
                people_in_message = "people in frame : "+str(current_count)
                cv2.putText(frame, people_in_message, (10, 15),cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 200, 0), 1, cv2.LINE_AA, False)
                total_count_message = "total people counted : "+str(total_count4text)
                cv2.putText(frame, total_count_message, (10, 35),cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 200, 150), 1, cv2.LINE_AA, False)
                '''
                frame_message = "omitted results : "+str(omitted_results)
                cv2.putText(frame, frame_message, (10, 55),cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 200), 1, cv2.LINE_AA, False)
        
            inference_time_message = "inference time : "+str(inference_time)+" ms"
            cv2.putText(frame, inference_time_message, (10, 420),cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA, False)
            
            ### TODO: Get the results of the inference request ###
            result = infer_network.get_output(req_id)
            ### TODO: Extract any desired stats from the results ###
            out_frame = draw_bounding_boxes(frame, result, prob_threshold, width, height)
            
            ### TODO: Calculate current_count, total_count and duration
            ### TODO: send relevant information on current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            
            current_count=0
            for r in result[0][0]:
                confidence = r[2]
                if confidence > prob_threshold:
                    current_count+=1

            ## on state change e.g if new person enter
            if current_count > previous_count:
                #store current time for calculating duration
                time_start=time.time()
                total_count += current_count - previous_count
            
            ## on state change e.g: if person left
            if current_count < previous_count:
                #calcute the time a person spent
                duration = time.time()-time_start
                #convert duration, from float to integer
                duration = int(duration)
                ## to avoid counting person more than one
                ## person detected should be there for atleast 2sec
                if duration>=2:
                    total_count = total_count
                else:
                    # substract previous count from total_count
                    # and count it as omitted frame
                    total_count=total_count-previous_count
                    omitted_results=omitted_results+1
                
                # Publish messages to the MQTT server, topic:person, key:total
                client.publish(topic="person", payload=json.dumps({"total" : total_count}))
                total_count4text=total_count
                if duration>=2:
                    # Publish messages to the MQTT server, topic:duration, key:duration [when person left]
                    client.publish(topic="person/duration", payload=json.dumps({"duration" : duration}))
                
            # Publish message to the MQTT server, topic: person, key:count
            client.publish(topic="person", payload=json.dumps({"count" : current_count}))
            previous_count = current_count
            # Break if escape key pressed
            if key_pressed == 27:
                break
            # save current frame if s key pressed
            if key_pressed == ord('s'):
                cv2.imwrite('output_frame.png',frame)

        #cv2.imshow("People Counter By Ibrahim",frame)
        ### TODO: Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()
        ### TODO: Write an output image if `single_image_mode` ###
        if is_one_image==True:
            ### write the number of people in the image
            people_in_message = "people in the image : "+str(current_count)
            cv2.putText(frame, people_in_message, (10, 15),cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 100), 1, cv2.LINE_AA, False)
            cv2.imwrite("output_image.jpg",frame)

    # Release the capture and destroy any OpenCV windows
    capture.release()
    cv2.destroyAllWindows()
    ### TODO: Disconnect from MQTT
    client.disconnect()
Ejemplo n.º 9
0
def main():
    args = build_argparser().parse_args()

    account_name = args.account_name
    account_key = args.account_key

    if account_name is "" or account_key is "":
        print("Invalid account name or account key!")
        sys.exit(1)
    elif account_name is not None and account_key is None:
        print("Please provide account key using -ak option!")
        sys.exit(1)        
    elif account_name is None and account_key is not None:
        print("Please provide account name using -an option!")
        sys.exit(1) 
    elif account_name is None and account_key is None:
        upload_azure = 0
    else:
        print("Uploading the results to Azure storage \""+ account_name+ "\"" )
        upload_azure = 1
        create_cloud_container(account_name, account_key)

    #if args.input == 'cam':
        #input_stream = 0
    #else:
    input_stream = args.input
    assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)
    if cap is None or not cap.isOpened():
        print('Warning: unable to open video source: ', args.input)
        sys.exit(1)

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, 0, args.cpu_extension)

    print("To stop the execution press Esc button")
    initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    store_aisle = cv2.VideoWriter(os.path.join(args.output_dir, "store_aisle.mp4"),cv2.VideoWriter_fourcc(*'avc1'), fps, (initial_w, initial_h), True)
    job_id = os.environ['PBS_JOBID']
    progress_file_path = os.path.join(args.output_dir,'i_progress_'+str(job_id)+'.txt')
    infer_time_start = time.time()
    frame_count = 1
    ret, frame = cap.read()
    while cap.isOpened():
        ret, next_frame = cap.read()
        if not ret:
            break
        frame_count = frame_count + 1
        in_frame = cv2.resize(next_frame, (w, h))
        # Change data layout from HWC to CHW
        in_frame = in_frame.transpose((2, 0, 1))
        in_frame = in_frame.reshape((n, c, h, w))

        # Start asynchronous inference for specified request.
        inf_start = time.time()
        infer_network.exec_net(0, in_frame)
        # Wait for the result
        infer_network.wait(0)
        det_time = time.time() - inf_start

        people_count = 0
        
        # Results of the output layer of the network
        res = infer_network.get_output(0)
        for obj in res[0][0]:
            # Draw only objects when probability more than specified threshold
            if obj[2] > args.prob_threshold:
                xmin = int(obj[3] * initial_w)
                ymin = int(obj[4] * initial_h)
                xmax = int(obj[5] * initial_w)
                ymax = int(obj[6] * initial_h)
                class_id = int(obj[1])
                # Draw bounding box
                color = (min(class_id * 12.5, 255), min(class_id * 7, 255),
                              min(class_id * 5, 255))
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
                people_count = people_count + 1

        people_count_message = "People Count : " + str(people_count)
        inf_time_message = "Inference time: {:.3f} ms".format(det_time * 1000)
        cv2.putText(frame, inf_time_message, (15, 25), cv2.FONT_HERSHEY_COMPLEX, 1,
                         (255, 255, 255), 2)
        cv2.putText(frame, people_count_message, (15, 65), cv2.FONT_HERSHEY_COMPLEX, 1,
                         (255, 255, 255), 2)
        store_aisle.write(frame)
        time_interval = MULTIPLICATION_FACTOR * fps
        if frame_count % time_interval == 0:
            apply_time_stamp_and_save(frame, people_count, upload_azure)
        if frame_count%10 == 0: 
            progressUpdate(progress_file_path, int(time.time()-infer_time_start), frame_count, video_len)
        frame = next_frame

    if args.output_dir:
        total_time = time.time() - infer_time_start
        with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f:
            f.write(str(round(total_time, 1))+'\n')
            f.write(str(frame_count)+'\n')
    cap.release()
    infer_network.clean()
Ejemplo n.º 10
0
def main():
    """
    Load the network and parse the SSD output.
    :return: None
    """
    # Connect to the MQTT server
    client = mqtt.Client()
    client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)

    args = build_argparser().parse_args()

    # Flag for the input image
    single_image_mode = False

    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          cur_request_id,
                                          args.cpu_extension)[1]

    # Checks for live feed
    if args.input == 'CAM':
        input_stream = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input

    # Checks for video file
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    # Getting video fps'
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Create an array of zeros the size of the frame count
    detection_frames = np.zeros(frame_count)

    if input_stream:
        cap.open(args.input)

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")
    global initial_w, initial_h, prob_threshold
    prob_threshold = args.prob_threshold
    initial_w = cap.get(3)
    initial_h = cap.get(4)

    #while cap.isOpened():
    for i in range(frame_count):
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        # Start async inference
        image = cv2.resize(frame, (w, h))
        # Change data layout from HWC to CHW
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))

        # Start asynchronous inference for specified request.
        inf_start = time.time()
        infer_network.exec_net(cur_request_id, image)

        # Wait for the result
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            # Results of the output layer of the network
            result = infer_network.get_output(cur_request_id)
            if args.perf_counts:
                perf_count = infer_network.performance_counter(cur_request_id)
                #performance_counts(perf_count)

            frame, current_count, detected_class = ssd_out(frame, result)

            # add detected class to detection array
            detection_frames[i] = current_count

            # Print video statistics

            # Printing fps
            fps_text = "Video FPS: " + str(fps)
            cv2.putText(frame, fps_text, (15, 15), cv2.FONT_HERSHEY_COMPLEX,
                        0.5, (200, 10, 10), 1)

            # Printing frame count
            frame_count_text = "Frame " + str(i) + "/" + str(frame_count)
            cv2.putText(frame, frame_count_text, (15, 30),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            # Print inference time
            inf_time_message = "Inference time: {:.3f}ms"\
            .format(det_time * 1000)
            cv2.putText(frame, inf_time_message, (15, 45),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            # Printing detection results on statistics
            detection_text = "Last 2 Seconds of Detections: "
            cv2.putText(frame, detection_text, (15, 400),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
            arr_txt = str(detection_frames[i - WAIT_CONSTANT:i])
            cv2.putText(frame, arr_txt, (15, 415), cv2.FONT_HERSHEY_COMPLEX,
                        0.5, (200, 10, 10), 1)

            # Wait 2 seconds before publishing anything
            if i > WAIT_CONSTANT:

                if 1 in detection_frames[i - WAIT_CONSTANT:i]:
                    current_count = 1

                # Verify is any detections ocurred in the last second
                # When new person enters the video

                # When new person enters the video
                if current_count > last_count:
                    start_time = time.time()
                    total_count = total_count + current_count - last_count
                    client.publish("person", json.dumps({"total":
                                                         total_count}))

                # Person duration in the video is calculated
                if current_count < last_count:
                    duration = int(time.time() - start_time)
                    # Publish messages to the MQTT server
                    client.publish("person/duration",
                                   json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

            if key_pressed == 27:
                break

        # Send frame to the ffmpeg server
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()

    # Saving detection frames for debugging
    filepath = "../detections.txt"
    with open(filepath, 'w') as file_handler:
        for item in detection_frames:
            file_handler.write("{}\n".format(item))
Ejemplo n.º 11
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """

    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold
    if DEBUG:
        print("probability threshold: ", prob_threshold)
        print("device: ", args.device)
        print("model_xml: ", args.model)

    if args.cpu_extension == 'auto':
        cpu_extension = get_cpu_extension()
    else:
        cpu_extension = args.cpu_extension

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(device=args.device,
                             model_xml=args.model,
                             cpu_extension=cpu_extension)
    ### TODO: Handle the input stream ###
    isImage = None  #placeholder to check if we have an image of video input
    if (args.input).lower() == 'cam':
        isImage = False
        args.input = 0
    elif (args.input).endswith('.jpg') or (args.input).endswith('.bmp'):
        isImage = True  #input is image
    else:
        isImage = False  #we have a video stream as input

    if DEBUG:
        print("args.input: ", args.input)

    inp = cv2.VideoCapture(args.input)
    inp.open(args.input)

    #get the shape of the input
    width = int(inp.get(3))
    height = int(inp.get(4))
    if DEBUG:
        print("input image widht: ", width)
        print("input image height: ", height)

    #get the input shape of the networkd
    net_input_shape = infer_network.get_input_shape()
    if DEBUG:
        print("input_shape: ", net_input_shape)
        print("input_shape width: ", net_input_shape[2])
        print("input_shape height: ", net_input_shape[3])

    if isImage:
        vid_capt = None
    else:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        model = (args.model.split('/')[-1])[:-4]  #model name
        vid_capt = cv2.VideoWriter(
            'output_video_' + model + '_' + args.device + '_' +
            str(prob_threshold) + '.mp4', fourcc, 25, (width, height))

    request_id = 0
    counter = 0
    persons = []
    total_persons = 0
    buffer_size = 0
    inference_time = 0

    ### TODO: Loop until stream is over ###
    while inp.isOpened():

        ### TODO: Read from the video capture ###
        flag, frame = inp.read()
        if not flag:
            break  #video ended

        #to cancel easily
        key_pressed = cv2.waitKey(60)
        if key_pressed == 27:
            break
        counter += 1
        ### TODO: Pre-process the image as needed ###
        if DEBUG:
            print("------------------------------", (net_input_shape))

        start_inference = time.time()
        prep_frame = preprocess_frame(frame, net_input_shape[2],
                                      net_input_shape[3])

        #if we want to run async, the request_is equals counter
        if args.request_type == 'async':
            request_id = counter
            if DEBUG:
                print('-------------------request id:', request_id)

    ### TODO: Start asynchronous inference for specified request ###
        infer_network.exec_net(image=prep_frame, request_id=request_id)
        ### TODO: Wait for the result ###
        if infer_network.wait(request_id=request_id) == 0:
            output = infer_network.get_output(request_id=request_id)
            if DEBUG:
                print('output is:', output)
            inference_time += (time.time() - start_inference)
            ### TODO: Get the results of the inference request ###
            out_frame = get_results(frame, output, counter, prob_threshold,
                                    width, height, persons)
            ### TODO: Extract any desired stats from the results ###
            vid_capt.write(out_frame)
            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            total_persons = 0

            if len(persons) > 0:
                total_persons = (persons[-1]).getPersonId() + 1
                if DEBUG:
                    print("total_persons:", total_persons)

            counted_persons = 0
            p_time = 0
            for p in persons:
                if p.isTracked():
                    counted_persons += 1
                    p_time = (counter - p.getFrameIn()) / FRAMERATE
                if DEBUG:
                    print("for person: ", p.toString(), " the time spent is: ",
                          p_time)
            client.publish(
                "person",
                json.dumps({
                    "count": counted_persons,
                    "total": total_persons
                }))
            client.publish("person/duration", json.dumps({"duration": p_time}))

            if DEBUG:
                print('======== MQTT ===========')
                print("count: ", counted_persons)
                print("total_count: ", total_persons)

        ### TODO: Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(out_frame)
        buffer_size += sys.getsizeof(out_frame)
        sys.stdout.flush()

        ### TODO: Write an output image if `single_image_mode` ###
        if isImage:
            cv2.imwrite(
                'output_image' + model + '_' + args.device + '_' +
                str(prob_threshold) + '.jpg', out_frame)

    #cleanup the mess
    if not isImage:
        vid_capt.release()
    inp.release()
    cv2.destroyAllWindows()
    client.disconnect()

    #update stats array
    return buffer_size, inference_time
Ejemplo n.º 12
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    total_count = 0
    last_count = 0
    start_time = 0
    request_id = 0
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(args.model, args.device)
    net_input_shape = infer_network.get_input_shape()
    ### TODO: Handle the input stream ###
    if args.input == "CAM":
        input_stream = 0
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    try:
        cap = cv2.VideoCapture(args.input)
    except FileNotFoundError:
        print("Cannot locate video file: " + args.input)
    except Exception as e:
        print("Something went wrong with the video file: ", e)

    if input_stream:
        cap.open(args.input)
    if not cap.isOpened():
        log.error("Can't open video source")

    width = int(cap.get(3))
    height = int(cap.get(4))
    ### TODO: Loop until stream is over ###

    ### TODO: Read from the video capture ###

    ### TODO: Pre-process the image as needed ###

    ### TODO: Start asynchronous inference for specified request ###

    ### TODO: Wait for the result ###

    ### TODO: Get the results of the inference request ###

    ### TODO: Extract any desired stats from the results ###

    ### TODO: Calculate and send relevant information on ###
    ### current_count, total_count and duration to the MQTT server ###
    ### Topic "person": keys of "count" and "total" ###
    ### Topic "person/duration": key of "duration" ###

    ### TODO: Send the frame to the FFMPEG server ###

    ### TODO: Write an output image if `single_image_mode` ###
    while cap.isOpened():
        # Read the next frame
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        pro_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
        pro_frame = pro_frame.transpose((2, 0, 1))
        pro_frame = pro_frame.reshape(1, *pro_frame.shape)

        inf_start = time.time()
        infer_network.exec_net(pro_frame)

        if infer_network.wait() == 0:
            det_time = time.time() - inf_start
            result = infer_network.get_output()

            current_count = 0
            for box in result[0][0]:
                # Draw bounding box for object when it's probability is more than
                #  the specified threshold
                conf = box[2]
                if box[2] > prob_threshold:
                    xmin = int(box[3] * width)
                    ymin = int(box[4] * height)
                    xmax = int(box[5] * width)
                    ymax = int(box[6] * height)
                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
                                  (225, 225, 225), 1)
                    current_count = current_count + 1
            inf_time_message = "Inference time: {:.3f}ms" \
                .format(det_time * 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))
            if current_count < last_count:
                duration = int(time.time() - start_time)
                client.publish("person/duration",
                               json.dumps({"duration": duration}))
            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

            if key_pressed == 27:
                break
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clear()
Ejemplo n.º 13
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """

    # Flag for the input image
    single_image_mode = False

    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    # Initialize the Inference Engine
    infer_network = Network()

    # Load the network model into the IE
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          cur_request_id,
                                          args.cpu_extension)[1]

    # Checks for live feed
    if args.input == 'CAM':
        input_stream = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input

    # Checks for video file
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    if input_stream:
        cap.open(args.input)

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")
    global initial_w, initial_h, prob_threshold
    prob_threshold = args.prob_threshold
    initial_w = cap.get(3)
    initial_h = cap.get(4)

    while cap.isOpened():

        #Reading the next frame
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        # Pre-process the frame
        image = cv2.resize(frame, (w, h))

        # Change data layout from HWC to CHW
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))

        inf_start = time.time()

        # Perform inference on the frame
        infer_network.exec_net(cur_request_id, image)

        if infer_network.wait(cur_request_id) == 0:

            det_time = time.time() - inf_start

            result = infer_network.get_output(cur_request_id)
            #if args.perf_counts:
            perf_count = infer_network.performance_counter(cur_request_id)
            #performance_counts(perf_count)

            frame, current_count = ssd_out(frame, result)

            inf_time_message = "Inference time: {:.3f}ms"\
                               .format(det_time * 1000)

            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            # When new person enters the video
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            # Person duration in the video is calculated
            if current_count < last_count:
                duration = int(time.time() - start_time)
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###

        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
def main():
    """
    Load the network and parse the output.

    :return: None
    """
    global CLIENT
    global KEEP_RUNNING
    global DELAY
    global SIG_CAUGHT
    global prob_threshold
    global rate
    global TARGET_DEVICE
    global is_async_mode

    CLIENT = mqtt.Client()
    CLIENT.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)
    CLIENT.subscribe(TOPIC)

    try:
        pointx = int(os.environ['POINTX'])
        pointy = int(os.environ['POINTY'])
        width = int(os.environ['WIDTH'])
        height = int(os.environ['HEIGHT'])
    except KeyError:
        pointx = 0
        pointy = 0
        width = 0
        height = 0
    try:
        # Number of seconds between data updates to MQTT server
        rate = float(os.environ['RATE'])
    except KeyError:
        rate = 1
    try:
        # Probability threshold for detections filtering
        prob_threshold = float(os.environ['PROB_THRESHOLD'])
    except KeyError:
        prob_threshold = 0.7

    if 'DEVICE' in os.environ.keys():
        TARGET_DEVICE = os.environ['DEVICE']

    if 'MULTI' not in TARGET_DEVICE and TARGET_DEVICE not in accepted_devices:
        print("Unsupported device: " + TARGET_DEVICE)
        sys.exit(2)
    elif 'MULTI' in TARGET_DEVICE:
        target_devices = TARGET_DEVICE.split(':')[1].split(',')
        for multi_device in target_devices:
            if multi_device not in accepted_devices:
                print("Unsupported device: " + TARGET_DEVICE)
                sys.exit(2)

    cpu_extension = os.environ[
        'CPU_EXTENSION'] if 'CPU_EXTENSION' in os.environ.keys() else None

    model = os.environ["MODEL"]
    if 'FLAG' in os.environ.keys():
        async_mode = os.environ['FLAG']
        if async_mode == "sync":
            is_async_mode = False
        else:
            is_async_mode = True

    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    logger = log.getLogger()
    render_time = 0
    roi_x = pointx
    roi_y = pointy
    roi_w = width
    roi_h = height

    assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format(
        CONFIG_FILE)
    config = json.loads(open(CONFIG_FILE).read())

    for idx, item in enumerate(config['inputs']):
        if item['video'].isdigit():
            input_stream = int(item['video'])
        else:
            input_stream = item['video']

    cap = cv2.VideoCapture(input_stream)

    if not cap.isOpened():
        logger.error("ERROR! Unable to open video source")
        sys.exit(1)

    # Init inference request IDs
    cur_request_id = 0
    next_request_id = 1

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(model, TARGET_DEVICE, 1, 1, 2,
                                          cpu_extension)[1]

    message_thread = Thread(target=message_runner, args=())
    message_thread.setDaemon(True)
    message_thread.start()

    if is_async_mode:
        print("Application running in async mode...")
    else:
        print("Application running in sync mode...")

    ret, frame = cap.read()
    while ret:

        ret, next_frame = cap.read()
        if not ret:
            KEEP_RUNNING = False
            break

        initial_wh = [cap.get(3), cap.get(4)]

        if next_frame is None:
            KEEP_RUNNING = False
            log.error("ERROR! blank FRAME grabbed")
            break

        # If either default values or negative numbers are given,
        # then we will default to start of the FRAME
        if roi_x <= 0 or roi_y <= 0:
            roi_x = 0
            roi_y = 0
        if roi_w <= 0:
            roi_w = next_frame.shape[1]
        if roi_h <= 0:
            roi_h = next_frame.shape[0]
        key_pressed = cv2.waitKey(1)

        # 'c' key pressed
        if key_pressed == 99:
            # Give operator chance to change the area
            # Select rectangle from left upper corner, dont display crosshair
            ROI = cv2.selectROI("Assembly Selection", frame, True, False)
            print("Assembly Area Selection: -x = {}, -y = {}, -w = {},"
                  " -h = {}".format(ROI[0], ROI[1], ROI[2], ROI[3]))
            roi_x = ROI[0]
            roi_y = ROI[1]
            roi_w = ROI[2]
            roi_h = ROI[3]
            cv2.destroyAllWindows()

        cv2.rectangle(frame, (roi_x, roi_y), (roi_x + roi_w, roi_y + roi_h),
                      (0, 0, 255), 2)
        selected_region = [roi_x, roi_y, roi_w, roi_h]

        in_frame_fd = cv2.resize(next_frame, (w, h))
        # Change data layout from HWC to CHW
        in_frame_fd = in_frame_fd.transpose((2, 0, 1))
        in_frame_fd = in_frame_fd.reshape((n, c, h, w))

        # Start asynchronous inference for specified request.
        inf_start = time.time()
        if is_async_mode:
            # Async enabled and only one video capture
            infer_network.exec_net(next_request_id, in_frame_fd)
        else:
            # Async disabled
            infer_network.exec_net(cur_request_id, in_frame_fd)
        # Wait for the result
        infer_network.wait(cur_request_id)
        det_time = time.time() - inf_start
        # Results of the output layer of the network
        res = infer_network.get_output(cur_request_id)
        # Parse SSD output
        ssd_out(res, initial_wh, selected_region)

        # Draw performance stats
        inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \
            "Inference time: {:.3f} ms".format(det_time * 1000)
        render_time_message = "OpenCV rendering time: {:.3f} ms". \
            format(render_time * 1000)

        if not INFO.safe:
            warning = "HUMAN IN ASSEMBLY AREA: PAUSE THE MACHINE!"
            cv2.putText(frame, warning, (15, 100), cv2.FONT_HERSHEY_COMPLEX,
                        0.8, (0, 0, 255), 2)

        log_message = "Async mode is on." if is_async_mode else \
            "Async mode is off."
        cv2.putText(frame, log_message, (15, 15), cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, (255, 255, 255), 1)
        cv2.putText(frame, inf_time_message, (15, 35),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
        cv2.putText(frame, render_time_message, (15, 55),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
        cv2.putText(frame, "Worker Safe: {}".format(INFO.safe), (15, 75),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)

        render_start = time.time()
        cv2.imshow("Restricted Zone Notifier", frame)
        render_end = time.time()
        render_time = render_end - render_start

        frame = next_frame

        if key_pressed == 27:
            print("Attempting to stop background threads")
            KEEP_RUNNING = False
            break
            # Tab key pressed
        if key_pressed == 9:
            is_async_mode = not is_async_mode
            print("Switched to {} mode".format(
                "async" if is_async_mode else "sync"))

        if is_async_mode:
            # Swap infer request IDs
            cur_request_id, next_request_id = next_request_id, cur_request_id

    infer_network.clean()
    message_thread.join()
    cap.release()
    cv2.destroyAllWindows()
    CLIENT.disconnect()
Ejemplo n.º 15
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    
    frame_count = 0 
    #A flag to check if image or not
    single_image_mode = False
    
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    # Load the model through `infer_network` ###
    infer_network.load_model(model = args.model , device = args.device, cpu_extension = args.cpu_extension)

    #Handle the input stream ###
    if args.input == 'CAM':
        stream = 0
    elif args.input.endswith('.jpg') or args.input.endswith('.png'):
        single_input_image = True
        stream = args.input
    else:
        stream = args.input
    
    capture = cv2.VideoCapture(stream)

    width = int(capture.get(3))
    height = int(capture.get(4))

    model_input_shape = infer_network.get_input_shape()

    #Initializing necessary variables for calculations
    total_people = 0
    people_in_last_frame = 0
    start_time = time.time()
    duration = None
    people_in_frame = 0
    thres = 0.45
    current_frame_request_id = 0
    next_frame_request_id = 1
    prev_flag = False
    
    if not capture.isOpened():
        exit()
    
    _, current_frame = capture.read()
    
    processed_frame = cv2.resize(current_frame, (model_input_shape[3], model_input_shape[2]))
    processed_frame = processed_frame.transpose((2,0,1))
    processed_frame = processed_frame.reshape(1,*processed_frame.shape)
    
    executable_net = infer_network.exec_net(image = processed_frame, request_id = current_frame_request_id)
    
    #Loop until stream is over ###
    while capture.isOpened():

        #Read from the video capture ###
        flag, next_frame = capture.read()

        if not flag:
            break
        
        #Needed with cv2.imshow() method
        key_pressed = cv2.waitKey(60)

        #Pre-process the image as needed ###
        processed_frame = cv2.resize(next_frame, (model_input_shape[3], model_input_shape[2]))
        processed_frame = processed_frame.transpose((2,0,1))
        processed_frame = processed_frame.reshape(1,*processed_frame.shape)

        #Start asynchronous inference for specified request ###
        executable_net = infer_network.exec_net(image = processed_frame, request_id = next_frame_request_id)
       
        # Wait for the result ###
        if infer_network.wait(request_id = current_frame_request_id) == 0:
        
            #Get the results of the inference request ###
            outputs = infer_network.get_output(request_id = current_frame_request_id)

            
            #Extract any desired stats from the results ###
            frame, people_in_frame, cur_flag = process_outputs(current_frame, outputs, width, height, thres)
            
            #cv2.imwrite('output',out_frame)
            current_time = time.time()
            cv2.putText(frame, 'Inference Time: ' + "%.2f" % (current_time - start_time), (30,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
            cv2.putText(frame, str(frame_count), (30,210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)

            #cv2.imshow('Output', out_frame) 

            #Calculate and send relevant information on ###
            if people_in_frame > people_in_last_frame and not prev_flag:
                total_people += (people_in_frame - people_in_last_frame)
                new_people_time = time.time()
                client.publish("person", json.dumps({"total": total_people}))

            if people_in_frame < people_in_last_frame:
                duration = time.time() - new_people_time
                client.publish("person/duration", json.dumps({"duration": duration}))

            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            client.publish("person", json.dumps({"count":people_in_frame}))
            ### Topic "person/duration": key of "duration" ###
           
        
            people_in_last_frame = people_in_frame
            prev_flag = cur_flag
        
            current_frame = next_frame
            current_frame_request_id, next_frame_request_id = next_frame_request_id, current_frame_request_id
        
        if key_pressed == 27:
           break

        frame_count += 1
        #Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        #Write an output image if `single_image_mode` ###
        if single_image_mode:
            cv2.imwrite('out.jpg', frame)
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """

    # Initialise the class
    #log.info("Creating Inference Engine...")
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold
    ### TODO: Load the model through `infer_network` ###
    #log.info("Loading network files:\n\t{}".format(args.model))
    #log.info("Loading model to the plugin")
    infer_network.load_model(args.model, args.device, args.cpu_extension)
    #log.info("Preparing inputs")
    infer_network.network.batch_size = 1
    net_input_shape = infer_network.get_input_shape()
    ### TODO: Handle the input stream ###
    #check for input stream  is a cam?
    input_stream = 0 if args.input == "cam" else args.input
    try:
        cap = cv2.VideoCapture(input_stream)
    except FileNotFoundError:
        print("File {} not available".format(input_stream))
    except Exception as e:
        print("error on loading file:{}".format(e))
        exit(1)
    number_input_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    number_input_frames = 1 if number_input_frames != -1 and number_input_frames < 0 else number_input_frames
    current_count = 0
    total_count = 0
    duration = 0
    count_start_time = 0
    person_num_trigger = 3  # maximum total number before alarm
    min_duration = 5  # minimum time (s) before alarm
    k_ref = 5  #  number of frames before account for non detection
    k = 0
    det_time = []  #inference times array

    cap.open(input_stream)
    ### TODO: Loop until stream is over ###
    while cap.isOpened():

        ### TODO: Read from the video capture ###
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        ### TODO: Pre-process the image as needed ###
        p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)
        ### TODO: Start asynchronous inference for specified request ###
        #log.info("Starting inference...")
        start_time = time()
        infer_network.exec_net(p_frame, 0)
        det_time.append(time() - start_time)
        ### TODO: Wait for the result ###
        # Collecting object detection results
        objects = list()
        if infer_network.wait(0) == 0:
            det_time.append(time() - start_time)
            ### TODO: Get the results of the inference request ###
            result = infer_network.get_output(0)
            #print(key_pressed)
            ### TODO: Extract any desired stats from the results ###pip
            start_time = time()
            objects = process_result(frame, p_frame, result, infer_network,
                                     prob_threshold, log, prob_threshold)
            parsing_time = time() - start_time
            # Draw performance stats over frame
            inf_time_message = "Inference time: {:.3f} ms **** Inference mean time :{:.3f} ms ".format(
                det_time[-1] * 1e3,
                sum(det_time) * 1e3 / len(det_time))
            parsing_message = "YOLO parsing time is {:.3f} ms".format(
                parsing_time * 1e3)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
            cv2.putText(frame, parsing_message, (15, 30),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ####
            num_detection = len(objects)
            delta = num_detection - current_count
            if delta > 0 and k == 0:
                count_start_time = time()
                current_count = num_detection
                k = 0
            elif delta < 0 and k >= k_ref:
                duration = time() - count_start_time
                total_count += current_count
                current_count = num_detection
                MQTT_MSG_DURATION = json.dumps({"duration": duration})
                client.publish("person/duration", MQTT_MSG_DURATION)
                k = 0
                #cv2.putText(frame, "past person duration: {}".format(time()- count_start_time), (15, 115), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
            elif delta < 0 and k < k_ref:
                k += 1
            if delta == 0 and k > 0:
                k = 0
            MQTT_MSG_PERSON = json.dumps({"count": current_count})
            client.publish("person", MQTT_MSG_PERSON)

            if total_count > person_num_trigger:
                cv2.putText(
                    frame,
                    "number: {} of total people more than trigger limit:{}".
                    format(total_count, person_num_trigger), (15, 45),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)

            if (time() -
                    count_start_time) > min_duration and current_count >= 1:
                cv2.putText(
                    frame,
                    "Duration: {:2f} s of this person more than trigger limit: {} s"
                    .format(time() - count_start_time, min_duration), (15, 60),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)

            for obj in objects:
                color = (int(min(obj['class_id'] * 12.5,
                                 255)), min(obj['class_id'] * 7,
                                            255), min(obj['class_id'] * 5,
                                                      255))
                det_label = str(obj['class_id'])
                cv2.rectangle(frame, (obj['xmin'], obj['ymin']),
                              (obj['xmax'], obj['ymax']), color, 2)
                cv2.putText(
                    frame, "#" + det_label + ' ' +
                    str(round(obj['confidence'] * 100, 1)) + ' %',
                    (obj['xmin'], obj['ymin'] - 7), cv2.FONT_HERSHEY_COMPLEX,
                    0.6, color, 1)

        ### TODO: Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()
        ### TODO: Write an output image if `single_image_mode` ###
        if number_input_frames == 1:
            cv2.imwrite("out.png", frame)

        # Break if escape key pressed

        if key_pressed == 27:
            break

    # Release the capture and destroy any OpenCV windows
    cap.release()
    cv2.destroyAllWindows()
Ejemplo n.º 17
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.
    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """

    infer_network = Network()
    one_image = False

    infer_network.load_model(args.model, args.device, args.cpu_extension)
    network_shape = infer_network.get_input_shape()
    if args.input == 'CAM':
        validator = 0
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        one_image = True
        validator = args.input
    else:
        validator = args.input
        assert os.path.isfile(args.input), "file doesn't exist"

    cap = cv2.VideoCapture(validator)

    if validator:
        cap.open(args.input)

    if (cap.isOpened() == False):
        exit(1)

    total_counter = 0
    pres_counter = 0
    prev_counter = 0
    beginning_time = 0
    num_bounding_box = 0
    timing = 0
    prev_bounding_box = 0
    req_id = 0

    while cap.isOpened():

        flag, frame = cap.read()
        probability_threshold = args.prob_threshold
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        processed_image = cv2.resize(frame,
                                     (network_shape[3], network_shape[2]))
        processed_image = processed_image.transpose((2, 0, 1))
        processed_image = processed_image.reshape(1, *processed_image.shape)

        infer_network.exec_net(processed_image)

        if infer_network.wait(req_id) == 0:

            network_output = infer_network.get_output()

            frame, pres_counter, bounding_box = extract_box(
                frame.copy(), network_output, probability_threshold)
            box_width = frame.shape[1]
            tl, br = bounding_box

            if pres_counter > prev_counter:
                beginning_time = time.time()
                total_counter += pres_counter - prev_counter
                num_bounding_box = 0
                client.publish("person", json.dumps({"total": total_counter}))

            elif pres_counter < prev_counter:
                if num_bounding_box <= 20:
                    pres_counter = prev_counter
                    num_bounding_box += 1
                elif prev_bounding_box < box_width - 200:
                    pres_counter = prev_counter
                    num_bounding_box = 0
                else:
                    timing = int(time.time() - beginning_time)
                    client.publish("person/duration",
                                   json.dumps({"duration": timing}))

            if not (tl == None and br == None):
                prev_bounding_box = int((tl[0] + br[0]) / 2)
            prev_counter = pres_counter

            client.publish("person", json.dumps({"count": pres_counter}))

        frame = frame.copy(order='C')

        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

    if one_image:
        cv2.imwrite('output_image.jpg', frame)

    cap.release()
    client.disconnect()
Ejemplo n.º 18
0
def main():
    args = build_argparser().parse_args()
    cap = cv2.VideoCapture(args.input)
    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, 0, args.cpu_extension)[1]

    print("To stop the execution press Esc button")
    initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = 1
    accumulated_image = np.zeros((initial_h, initial_w), np.uint8)
    mog = cv2.createBackgroundSubtractorMOG2()
    ret, frame = cap.read()
    while cap.isOpened():
        ret, next_frame = cap.read()
        if not ret:
            break
        frame_count = frame_count + 1
        in_frame = cv2.resize(next_frame, (w, h))
        # Change data layout from HWC to CHW
        in_frame = in_frame.transpose((2, 0, 1))
        in_frame = in_frame.reshape((n, c, h, w))

        # Start asynchronous inference for specified request.
        inf_start = time.time()
        infer_network.exec_net(0, in_frame)
        # Wait for the result
        infer_network.wait(0)
        det_time = time.time() - inf_start

        people_count = 0

        # Converting to Grayscale
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Remove the background
        fgbgmask = mog.apply(gray)
        # Thresholding the image
        thresh = 2
        max_value = 2
        threshold_image = cv2.threshold(fgbgmask, thresh, max_value,
                                                      cv2.THRESH_BINARY)[1]
        # Adding to the accumulated image
        accumulated_image = cv2.add(threshold_image, accumulated_image)
        colormap_image = cv2.applyColorMap(accumulated_image, cv2.COLORMAP_HOT)

        # Results of the output layer of the network
        res = infer_network.get_output(0)
        for obj in res[0][0]:
            # Draw only objects when probability more than specified threshold
            if obj[2] > args.prob_threshold:
                xmin = int(obj[3] * initial_w)
                ymin = int(obj[4] * initial_h)
                xmax = int(obj[5] * initial_w)
                ymax = int(obj[6] * initial_h)
                class_id = int(obj[1])
                # Draw bounding box
                color = (min(class_id * 12.5, 255), min(class_id * 7, 255),
                              min(class_id * 5, 255))
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
                people_count = people_count + 1

        people_count_message = "People Count : " + str(people_count)
        inf_time_message = "Inference time: {:.3f} ms".format(det_time * 1000)
        cv2.putText(frame, inf_time_message, (15, 25), cv2.FONT_HERSHEY_COMPLEX, 1,
                         (255, 255, 255), 2)
        cv2.putText(frame, people_count_message, (15, 65), cv2.FONT_HERSHEY_COMPLEX, 1,
                         (255, 255, 255), 2)
        final_result_overlay = cv2.addWeighted(frame, P_COUNT_FRAME_WEIGHTAGE,
                                                    colormap_image,
                                                    COLORMAP_FRAME_WEIGHTAGE_1, 0)
        cv2.imshow("Detection Results", final_result_overlay)

        time_interval = MULTIPLICATION_FACTOR * fps

        frame = next_frame

        key = cv2.waitKey(1)
        if key == 27:
            break
    cap.release()
    cv2.destroyAllWindows()
    infer_network.clean()
Ejemplo n.º 19
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    # ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(args.model, args.device, args.cpu_extension)
    net_input_shape = infer_network.get_input_shape()

    log.info("Selected Network input Layer type is " +
             str(type(net_input_shape)) + " And shape is " +
             str(net_input_shape))
    log.info("Required input img size W " + str(net_input_shape[3]) + " H " +
             str(net_input_shape[2]))

    # ### TODO: Handle the input stream ###
    # cap = cv2.VideoCapture(args.input)
    cap, error_flag, image_flag = check_input_type(args.input,
                                                   args.cam_id)  #call function
    if error_flag:  # Check for invalid file extension
        log.error("Program stopped")
        return
    elif image_flag:  #check for image
        INPUT_IMAGE = args.input
        img = cv2.imread(INPUT_IMAGE)
        if (type(img) is not np.ndarray
            ):  #check for if image read empty same as img.empty()
            log.error("Error: Invalid image or path")
            log.error("Use -h argument for help")
            return
    else:
        cap.open(args.input)

    # Get input feed height and width
    img_width = int(cap.get(3))
    img_height = int(cap.get(4))

    if img_width < 1 or img_width is None:  # If input path is wrong
        log.error("Error! Can't read Input: Check path")
        return

    log.info("feed frame size W " + str(img_width) + " H " + str(img_height))

    # Initialize video writer if video mode
    if args.write_video is "Y":  # only if args given Y
        if not image_flag:
            # Video writer Linux
            log.info("---Opencv video writer debug LIN---")
            fourcc = cv2.VideoWriter_fourcc(*'MP4V')
            out = cv2.VideoWriter('out.mp4', 0x00000021, 30,
                                  (img_width, img_height))
            log.info("-------------------------------")

    # Initialized varible utilized inside loop
    frame_count = 0
    total_people_count = 0
    last_state = 0
    delay_on = 0
    delay_off = (time.time() * 1000
                 )  # Initialize timer before loop to get actual time
    delay_diff_on = 0
    delay_diff_off = 0
    duration = 0
    duration_timebase = 0
    duration_fpsbase = 0
    count_people_image = 0

    # Second counting timer initialized
    sec_on = (time.time() * 1000)  # Timer for update stat on terminal START
    sec_diff = 0
    cv_drawstate_time_s = 0
    cv_drawstate_time_e = 0
    count_flag = False

    frame_count_onstate = 0
    frame_count_offstate = 0

    # Accuracy Log
    log_acount = 0
    log_frame_no = []
    log_person_counted = []
    log_duration_fpsbase = []
    log_duration_timebase = []
    log_infer_time = []

    # error_log = {'MuliBoxDetected':{}}
    log_ecount = 0  #counter for error log in case of multiple box count
    log_multicounted = []

    # ### TODO: Loop until stream is over ###
    while cap.isOpened():
        frame_count += 1  # Global frame Count no of frame processed.
        # Read the next frame
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(1)

        ### TODO: Read from the video capture ###
        ### TODO: Pre-process the image as needed ###
        p_frame = preprocess_frame(
            frame, net_input_shape[3],
            net_input_shape[2])  #from extracted input function
        ### TODO: Start asynchronous inference for specified request ###
        infer_network.exec_net(p_frame)
        ### TODO: Wait for the result ###
        inferreq_start_time = (time.time() * 1000)  # Timer for inference START
        if infer_network.wait() == 0:
            inferreq_end_time = (
                time.time() *
                1000) - inferreq_start_time  # Timer for inference END
            log_infer_time.append(float("{:.2f}".format(inferreq_end_time)))

            ### TODO: Get the results of the inference request ###
            blob, result = infer_network.get_output()

            # If model outputs multiple blob, print available blob infirmation
            if frame_count == 1:  # Print only Once
                for name, output_ in blob.items(
                ):  #Find the possible BLOBS for name,
                    log.info("The name of available blob is :" + str(name))

            ### TODO: Extract any desired stats from the results ###
            color = selectBoxcolor(args.box_color)
            cv_drawboxtime_s = (time.time() * 1000
                                )  # Timer for drawing box on frame START
            frame, count_box, countmultipeople = draw_boxes(
                frame, result, img_width, img_height, color,
                args.prob_threshold)
            cv_drawboxtime_e = (
                time.time() *
                1000) - cv_drawboxtime_s  #Timer for drawing box on frame END

            count_people_image = countmultipeople  # Variable For image stat only
            ### TODO: Calculate and send relevant information on ###
            if count_box != last_state:  #Anythinkg under this will executed only once if state changes.
                log_acount += 1  # increase stat change counter
                if count_box == 1:
                    count_flag = True  # Flag for verify if counting
                    delay_on = (time.time() * 1000)  # Timer for on delay START
                    delay_diff_off = (time.time() * 1000
                                      ) - delay_off  # Timer for off delay END
                    delay_diff_on = 0  # Timer for on delay RESET

                    frame_count_onstate = frame_count  # Frame count is Global FPS counter
                    frame_count_offstate = frame_count - frame_count_offstate  # Calculates the difference
                else:
                    count_flag = False
                    delay_diff_on = (time.time() *
                                     1000) - delay_on  # Timer for on delay END
                    delay_off = (time.time() * 1000
                                 )  # Timer for off delay START
                    delay_diff_off = 0  # Timer for off delay RESET

                    frame_count_onstate = frame_count - frame_count_onstate  # Calculates the difference
                    frame_count_offstate = frame_count

                if delay_diff_on > args.delay_band:
                    total_people_count += 1  # Debug is placed above because count is not added yet.
                    duration_timebase = delay_diff_on / 1000  # Convert to Sec.
                    duration_fpsbase = frame_count_onstate / args.fps  # Local use
                    duration = duration_fpsbase  # global set

                    # Accuracy log, individual list log, termianl friendly
                    log_person_counted.append(total_people_count)
                    log_duration_timebase.append(
                        "{:.2f}".format(duration_timebase))
                    log_duration_fpsbase.append(duration_fpsbase)
                    log_frame_no.append(frame_count)  # Log frame no of video

                    ### current_count, total_count and duration to the MQTT server ###
                    ### Topic "person": keys of "count" and "total" ###
                    client.publish("person",
                                   json.dumps({"total": total_people_count}))
                    ### Topic "person/duration": key of "duration" ###
                    client.publish("person/duration",
                                   json.dumps({"duration": duration}))
                client.publish("person",
                               json.dumps({"count": countmultipeople}))

                last_state = count_box

            else:
                if countmultipeople not in (
                        0, 1):  #In case of multiple people detected
                    log_ecount += 1  # Increase error counter
                    # Nested list Frame and multipeople people count
                    log_multicounted.append([
                        'F: ' + str(frame_count) + ' C: ' +
                        str(countmultipeople)
                    ])

        ### This part needed to be optimized
        if args.toggle_video is "ON":  # If video feed is off stop unnecessory processing
            cv_drawstate_time_s = (time.time() * 1000
                                   )  # TImer for draw stat on frame START
            # Draw inference time on image
            label = "Inference time: " + str(
                "{:.2f}".format(inferreq_end_time)) + "ms"  #string label
            cv2.putText(frame, label, (15, 20), cv2.FONT_HERSHEY_COMPLEX, 0.4,
                        BOXCOLOR['BLUE'], 1)
            label1 = "Total people count: " + str(
                total_people_count)  #string label
            if image_flag or countmultipeople > 1:
                label1 = "Total people count: " + str(
                    count_people_image)  #string label
            else:
                label1 = "Total people count: " + str(total_people_count)
            cv2.putText(frame, label1, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.4,
                        BOXCOLOR['BLUE'], 1)
            if countmultipeople > 1 or image_flag is True:
                label2 = "Average Time stayed: N/A"
            else:
                label2 = "Average Time stayed: " + str(
                    "{:.2f}".format(duration)) + "Sec."  #string label
            cv2.putText(frame, label2, (15, 40), cv2.FONT_HERSHEY_COMPLEX, 0.4,
                        BOXCOLOR['BLUE'], 1)

            # People count exceed alarm
            if countmultipeople > args.alarm_people or duration > args.alarm_duration:
                draw_framelinered(frame, img_height, img_width)
                if countmultipeople > args.alarm_people:
                    label3 = "Alarm: people count limit exceeded! limit: " + str(
                        args.alarm_people)  #string label
                    cv2.putText(frame, label3, (15, 50),
                                cv2.FONT_HERSHEY_COMPLEX, 0.4, BOXCOLOR['RED'],
                                1)
                else:
                    label4 = "Alarm: Person stayed longer! limit: " + str(
                        args.alarm_duration) + "Sec."  #string label
                    cv2.putText(frame, label4, (15, 60),
                                cv2.FONT_HERSHEY_COMPLEX, 0.4, BOXCOLOR['RED'],
                                1)
            else:
                draw_framelinegreen(frame, img_height, img_width)
                # Draw cv process time
            label5 = "CV Frame process time: " + str(
                "{:.2f}".format(cv_drawboxtime_e +
                                cv_drawstate_time_e)) + "ms"  #string label
            cv2.putText(frame, label5, (15, 70), cv2.FONT_HERSHEY_COMPLEX, 0.4,
                        BOXCOLOR['BLUE'], 1)
            cv_drawstate_time_e = (
                time.time() *
                1000) - cv_drawstate_time_s  # TImer for draw stat on frame END
        else:
            # Stats of time of cv processing on image frame
            sec_diff = (time.time() *
                        1000) - sec_on  # Timer for update stat on terminal END
            # log.info"time in ms: ",sec_diff) # Debug
            if sec_diff > 1000 or sec_diff > 2000:  # update stat roughly every 1 sec.
                os.system('cls' if os.name == 'nt' else
                          'clear')  # Clear the terminal
                print()  # Blank print
                print("Video feed is OFF, Terminal will refresh every sec.")
                print("Press ctlr+c to stop execution.")
                print("Checkout log_xxx.txt for stats.")
                # People count on terminal
                if countmultipeople > 1:
                    print("Total people count: ", countmultipeople)
                else:
                    print("Current people count: ", total_people_count)
                    print("Total people count: ", total_people_count)
                    print("Average Time stayed: "
                          "{:.2f}".format(duration), " Sec.")
                # Alarm on terminal
                if countmultipeople > args.alarm_people or duration > args.alarm_duration:
                    if countmultipeople > args.alarm_people:
                        print("##### Alarm1 #####")
                        print("People count limit exceeded! limit: " +
                              str(args.alarm_people))
                        print("##################")
                    else:
                        print("##### Alarm2 #####")
                        print("Person stayed longer! limit: " +
                              str(args.alarm_duration) + "Sec.")  #string label
                        print("##################")
                print("-----Stats for time -----")
                print("Inference Time(ms):",
                      "{:.2f}".format(inferreq_end_time))
                print("Draw boundingBox time(ms):",
                      "{:.2f}".format(cv_drawboxtime_e))
                print("Draw state time(ms):",
                      "{:.2f}".format(cv_drawstate_time_e))
                print("--------------------------")
                sec_on = (time.time() * 1000
                          )  # Timer for update stat on terminal RESET
                sec_diff = 0  # Timer for update stat on terminal RESET

        # Adjusting timers with inference and cv processing time to fix counting and duration.
        if count_flag:

            delay_on = delay_on + inferreq_end_time + cv_drawboxtime_e + cv_drawstate_time_e

        else:
            delay_off = delay_off + inferreq_end_time + cv_drawboxtime_e + cv_drawstate_time_e

        ### TODO: Send the frame to the FFMPEG server ###
        # Write video or image file
        if not image_flag:
            if args.toggle_video is "ON":
                sys.stdout.buffer.write(frame)  # Send to ffmpeg
                sys.stdout.flush()  # Send to ffmpeg
                # cv2.namedWindow('frame', cv2.WINDOW_NORMAL)
                # cv2.imshow('frame',frame)
            if args.write_video is "Y":
                out.write(frame)
        else:
            ### TODO: Write an output image if `single_image_mode` ###
            cv2.imwrite('output_image.jpg', frame)
            print("Image saved sucessfully!")

        if key_pressed == 27:
            break

    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()

    # Dump to log txt file
    log.info("Last frame prcessed no: " + str(frame_count))
    log.info("-----AccuracyLog-----")
    if len(log_person_counted) > 1:  # Only if counting single person
        log.info("No Of person:")
        log.info(str(log_person_counted))
        # log.info("Duration stayed timebase:")
        # log.info(str(log_duration_timebase))
        log.info("Duration stayed fpsbase:")
        log.info(str(log_duration_fpsbase))
        log.info("Frame No.:")
        log.info(str(log_frame_no))
        log_infer_time = np.array(log_infer_time)  # Convert list to np array
        log.info("Inference time:[min max avg.]")
        log.info(
            str([
                log_infer_time.min(),
                log_infer_time.max(),
                (float("{:.2f}".format(np.average(log_infer_time))))
            ]))
    else:
        log.info("N/A")
        log_infer_time = np.array(log_infer_time)  # Convert list to np array
        log.info("Inference time:[min max avg.]")
        log.info(([
            log_infer_time.min(),
            log_infer_time.max(),
            (float("{:.2f}".format(np.average(log_infer_time))))
        ]))
        log.info("-----Error log-----")
        if len(log_multicounted) < 10 and len(
                log_multicounted) > 1:  # Only if counting single person
            log.info("Frame No: Count")
            log.info(str(log_multicounted))
        else:
            log.info("N/A")
        log.info("-----Finish!------")
Ejemplo n.º 20
0
def intruder_detector():
    """
    Process the input source frame by frame and detects intruder, if any.

    :return status: 0 on success, negative value on failure
    """
    global CONF_CANDIDATE_CONFIDENCE
    global LOG_WIN_HEIGHT
    global LOG_WIN_WIDTH
    global CONFIG_FILE
    global video_caps
    global conf_labels_file_path
    global is_async_mode

    parse_args()
    ret = check_args()
    if ret != 0:
        return ret, ""

    if not os.path.isfile(CONFIG_FILE):
        return -12, ""

    if not os.path.isfile(conf_labels_file_path):
        return -13, ""

    # Creates subdirectory to save output snapshots
    pathlib.Path(os.getcwd() + '/output/').mkdir(parents=True, exist_ok=True)

    # Read the configuration file
    ret, req_labels = get_input()
    if ret != 0:
        return ret, req_labels[0]

    if not video_caps:
        return -14, ''

    # Get the labels that are used in the application
    ret, label_names, used_labels = get_used_labels(req_labels)
    if ret != 0:
        return ret, ''
    if True not in used_labels:
        return -15, ''

    # Init a rolling log to store events
    rolling_log_size = int((LOG_WIN_HEIGHT - 15) / 20)
    log_list = collections.deque(maxlen=rolling_log_size)

    # Open a file for intruder logs
    log_file = open(LOG_FILE_PATH, 'w')
    if not log_file:
        return -16, ''

    # Initializing VideoWriter for each source
    for video_cap in video_caps:
        ret, ret_value = video_cap.init_vw(int(video_cap.input_height),
                                           int(video_cap.input_width))
        if ret != 0:
            return ret, ret_value

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(model_xml, TARGET_DEVICE, 1, 1, 2,
                                          CPU_EXTENSION)[1]
    # Arrange windows so that they are not overlapping
    arrange_windows()

    min_fps = min([i.vc.get(cv2.CAP_PROP_FPS) for i in video_caps])
    signal.signal(
        signal.SIGINT,
        signal_handler,
    )
    no_more_data = [False] * len(video_caps)
    start_time = time.time()
    inf_time = 0
    next_request_id = 1
    cur_request_id = 0
    # Main loop starts here. Loop over all the video captures

    if is_async_mode:
        print("Application running in async mode...")
    else:
        print("Application running in sync mode...")

    while True:
        for idx, video_cap in enumerate(video_caps):
            # Get a new frame
            vfps = int(round(video_cap.vc.get(cv2.CAP_PROP_FPS)))
            for i in range(0, int(round(vfps / min_fps))):
                if is_async_mode:
                    ret, video_cap.next_frame = video_cap.vc.read()
                else:
                    ret, video_cap.frame = video_cap.vc.read()
                video_cap.loop_frames += 1
                # If no new frame or error in reading a frame, exit the loop
                if not ret:
                    no_more_data[idx] = True
                    break
            if no_more_data[idx]:
                stream_end_frame = numpy.zeros((int(
                    video_cap.input_height), int(video_cap.input_width), 1),
                                               dtype='uint8')
                stream_end_message = "Stream from {} has ended.".format(
                    video_cap.cam_name)
                cv2.putText(stream_end_frame, stream_end_message,
                            (int(video_cap.input_width / 2) - 30,
                             int(video_cap.input_height / 2) - 30),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
                cv2.imshow(video_cap.cam_name, stream_end_frame)
                continue
            for i in range(video_cap.no_of_labels):
                video_cap.current_count[i] = 0
                video_cap.changed_count[i] = False

            # Resize to expected size (in model .xml file)
            # Input frame is resized to infer resolution
            if is_async_mode:
                in_frame = cv2.resize(video_cap.next_frame, (w, h))
                in_frame = in_frame.transpose((2, 0, 1))
                in_frame = in_frame.reshape((n, c, h, w))

                # Start asynchronous inference for specified request.
                infer_network.exec_net(next_request_id, in_frame)
                video_cap.frame = video_cap.next_frame
                # Async enabled and only one video capture
                if len(video_caps) == 1:
                    videoCapResult = video_cap
                # Async enabled and more than one video capture
                else:
                    # Get previous index
                    videoCapResult = video_caps[idx - 1 if idx -
                                                1 >= 0 else len(video_caps) -
                                                1]

            else:
                in_frame = cv2.resize(video_cap.frame, (w, h))
                in_frame = in_frame.transpose((2, 0, 1))
                in_frame = in_frame.reshape((n, c, h, w))

                # Start synchronous inference for specified request.
                infer_network.exec_net(cur_request_id, in_frame)
                videoCapResult = video_cap

            inf_start = time.time()
            # Wait for the result
            if infer_network.wait(cur_request_id) == 0:
                inf_time = time.time() - inf_start
                # Results of the output layer of the network
                res = infer_network.get_output(cur_request_id)
                for obj in res[0][0]:
                    label = int(obj[1]) - 1
                    # Draw the bounding box around the object when the probability is more than specified threshold
                    if obj[2] > CONF_THRESHOLD_VALUE and used_labels[label]:
                        videoCapResult.current_count[label] += 1
                        xmin = int(obj[3] * videoCapResult.input_width)
                        ymin = int(obj[4] * videoCapResult.input_height)
                        xmax = int(obj[5] * videoCapResult.input_width)
                        ymax = int(obj[6] * videoCapResult.input_height)
                        # Draw bounding box around the intruder detected
                        cv2.rectangle(videoCapResult.frame, (xmin, ymin),
                                      (xmax, ymax), (0, 255, 0), 4, 16)

                for i in range(videoCapResult.no_of_labels):
                    if videoCapResult.candidate_count[
                            i] == videoCapResult.current_count[i]:
                        videoCapResult.candidate_confidence[i] += 1
                    else:
                        videoCapResult.candidate_confidence[i] = 0
                        videoCapResult.candidate_count[
                            i] = videoCapResult.current_count[i]

                    if videoCapResult.candidate_confidence[
                            i] == CONF_CANDIDATE_CONFIDENCE:
                        videoCapResult.candidate_confidence[i] = 0
                        videoCapResult.changed_count[i] = True
                    else:
                        continue

                    if videoCapResult.current_count[
                            i] > videoCapResult.last_correct_count[i]:
                        videoCapResult.total_count[
                            i] += videoCapResult.current_count[
                                i] - videoCapResult.last_correct_count[i]
                        det_objs = videoCapResult.current_count[
                            i] - videoCapResult.last_correct_count[i]
                        total_count = sum(videoCapResult.total_count)
                        for det_obj in range(det_objs):
                            current_time = time.strftime("%H:%M:%S")
                            log = "{} - Intruder {} detected on {}".format(
                                current_time, label_names[i],
                                videoCapResult.cam_name)
                            log_list.append(log)
                            log_file.write(log + "\n")
                            event = Event(event_time=current_time,
                                          intruder=label_names[i],
                                          count=total_count,
                                          frame=videoCapResult.frame_count)
                            videoCapResult.events.append(event)

                        snapshot_name = "output/intruder_{}.png".format(
                            total_count)
                        cv2.imwrite(snapshot_name, videoCapResult.frame)
                    videoCapResult.last_correct_count[
                        i] = videoCapResult.current_count[i]

                # Create intruder log window, add logs to the frame and display it
                log_window = numpy.zeros((LOG_WIN_HEIGHT, LOG_WIN_WIDTH, 1),
                                         dtype='uint8')
                for i, log in enumerate(log_list):
                    cv2.putText(log_window, log, (10, 20 * i + 15),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255),
                                1)
                cv2.imshow("Intruder Log", log_window)
                videoCapResult.frame_count += 1

                # Video output
                videoCapResult.vw.write(videoCapResult.frame)
                log_message = "Async mode is on." if is_async_mode else \
                    "Async mode is off."
                cv2.putText(videoCapResult.frame, log_message,
                            (10, int(videoCapResult.input_height) - 50),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 10, 10), 1)
                inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \
                    "Inference time: {:.3f} ms".format(inf_time * 1000)
                cv2.putText(videoCapResult.frame, inf_time_message,
                            (10, int(videoCapResult.input_height) - 30),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
                fps_time = time.time() - start_time
                fps_message = "FPS: {:.3f} fps".format(1 / fps_time)
                cv2.putText(videoCapResult.frame, fps_message,
                            (10, int(videoCapResult.input_height) - 10),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

                # Display the video output
                cv2.imshow(videoCapResult.cam_name, videoCapResult.frame)

            start_time = time.time()

            # Loop video to mimic continuous input if LOOP_VIDEO flag is True
            if LOOP_VIDEO and not videoCapResult.is_cam:
                vfps = int(round(videoCapResult.vc.get(cv2.CAP_PROP_FPS)))
                # If a video capture has ended restart it
                if videoCapResult.loop_frames > videoCapResult.vc.get(
                        cv2.CAP_PROP_FRAME_COUNT) - int(round(vfps / min_fps)):
                    videoCapResult.loop_frames = 0
                    videoCapResult.vc.set(cv2.CAP_PROP_POS_FRAMES, 0)

            if is_async_mode:
                # Swap infer request IDs
                cur_request_id, next_request_id = next_request_id, cur_request_id

        if cv2.waitKey(1) == 27:
            break

        if cv2.waitKey(1) == 9:
            is_async_mode = not is_async_mode
            print("Switched to {} mode".format(
                "async" if is_async_mode else "sync"))

        if False not in no_more_data:
            break

    ret = save_json()
    if ret != 0:
        return ret, ''

    infer_network.clean()
    log_file.close()
    return [0, '']
Ejemplo n.º 21
0
def infer_on_video(args):
    # Initialize the Inference Engine
    plugin = Network()

    # Load the network model into the IE
    plugin.load_model(args.m, args.d, CPU_EXTENSION)
    net_input_shape = plugin.get_input_shape()

    # Get and open video capture
    cap = cv2.VideoCapture(args.i)
    cap.open(args.i)

    # Grab the shape of the input
    width = int(cap.get(3))
    height = int(cap.get(4))

    # Create a video writer for the output video
    fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
    out = cv2.VideoWriter('out.mp4', fourcc, 15, (width, height))
    frame_count = 0

    # Process frames until the video ends, or process is exited
    while cap.isOpened():
        # Read the next frame
        flag, input_frame = cap.read()

        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        # Pre-process the frame
        input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)
        p_frame = cv2.resize(input_frame,
                             (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)

        # Perform inference on the frame
        plugin.async_inference(p_frame)

        # Get the output of inference
        if plugin.wait() == 0:
            result = plugin.extract_output()
            result = result.transpose((1, 2, 0))

            # Get semantic mask for person class
            person_mask = np.uint8(result == 15)
            person_mask = np.dstack([person_mask, person_mask, person_mask])
            person_mask = cv2.resize(person_mask, (width, height))

            # Create the overlay mask
            overlay = np.zeros_like(person_mask)
            overlay[:] = (127, 0, 0)

            # Add overlay-mask over input frame
            overlay_mask = person_mask * overlay
            assert person_mask.shape == overlay_mask.shape, "Raw person mask and overlay mask should be of same dimensions"
            output_frame = cv2.addWeighted(input_frame, 1, overlay_mask, 0.9,
                                           0)

            # Write output frames to video
            output_frame = cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR)
            out.write(output_frame)

            # Pipe output frames to server
            pipe.stdin.write(output_frame.tostring())

            # Show frame counter
            frame_count = frame_count + 1
            print('Frame count: ' + str(frame_count))

        # Break on keyboard interrupt
        if key_pressed == 27:
            break

    # Release the capture and destroy any OpenCV windows
    cap.release()
    out.release()
    cv2.destroyAllWindows()
Ejemplo n.º 22
0
def infer_on_stream(args):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.
    

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    #client = mqtt.Client()
    #client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)
    # Initialise the class
    plugin = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold
    model = args.model
    ### TODO: Load the model through `infer_network` ###
    plugin.load_model(model, args.device, args.cpu_extension)
    net_input_shape = plugin.get_input_shape()

    if args.input == 'CAM':
        input_stream = 0
        single_image = False
    elif args.input[-4:] in [".jpg", ".bmp"]:
        single_image = True
        input_stream = args.input
    else:
        single_image = False
        input_stream = args.input
        assert os.path.isfile(input_stream)

    cap = cv2.VideoCapture(input_stream)
    cap.open(input_stream)
    if not cap.isOpened():
        log.error("Unable to open video source")

    # Grab the shape of the input
    width = int(cap.get(3))
    height = int(cap.get(4))

    ### TODO: Handle the input stream ###
    out = cv2.VideoWriter('out1.mp4', 0x00000021, 30, (width, height))
    ### TODO: Loop until stream is over ###
    while cap.isOpened():
        # Read the next frame
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        ### TODO: Read from the video capture ###

        ### TODO: Pre-process the image as needed ###
        p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)
        ### TODO: Start asynchronous inference for specified request ###
        t1 = time.time()
        plugin.exec_net(p_frame)
        ### TODO: Wait for the result ###
        if plugin.wait() == 0:
            ### TODO: Extract any desired stats from the results ###
            result = plugin.extract_output()
            t2 = time.time()
            ### TODO: Get the results of the inference request ###
            s1 = t2 - t1
            ### TODO: Calculate and send relevant information on ###
            frame, s = draw_boxes(frame, result, args, width, height)
            t3 = time.time() - t2
            txt = "current_count: %d" % current_count + " total_count: %d" % tcount
            cv2.putText(frame, txt, (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (255, 0, 0), 2)
            txt1 = "duration: %d" % s
            cv2.putText(frame, txt1, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (255, 0, 0), 2)
            txt2 = "Inference time: {:.3f}ms".format(
                s1 * 1000) + " FPS: {:.3f}".format(1 / s1)
            cv2.putText(frame, txt2, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (255, 0, 0), 2)

            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###

            #client.publish("person", json.dumps({"total":tcount}), retain=False)
            #client.publish('person', json.dumps({'count': current_count, 'total': count}),retain=False)

            ### Topic "person/duration": key of "duration" ###
            #client.publish("person/duration", json.dumps({"duration": int(s)}), retain=False)

            out.write(frame)

        ### TODO: Send the frame to the FFMPEG server ###
        #sys.stdout.buffer.write(frame)
        #sys.stdout.flush()
        ### TODO: Write an output image if `single_image_mode` ###
        if single_image:
            cv2.imwrite("output.jpg", frame)
        if key_pressed == 27:
            break

    cap.release()
    cv2.destroyAllWindows()
    ### TODO: Disconnect from MQTT
    #sclient.disconnect()
    out.release()
Ejemplo n.º 23
0
def infer_on_stream(args, m_client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.
    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    global initial_w, initial_h, prob_threshold
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold
    Model = args.model

    Device = args.device
    Cpu = args.cpu_extension

    start_time = 0
    cur_request_id = 0
    last_count = 0
    total_count = 0

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(Model, Cpu, Device)
    network_shape = infer_network.get_input_shape()

    ### TODO: Handle the input stream ###
    # Checks for live feed
    if args.input == 'CAM':
        input_validated = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_validated = args.input

    # Checks for video file
    else:
        input_validated = args.input
        assert os.path.isfile(args.input), "file doesn't exist"

    ### TODO: Handle the input stream ###
    cap = cv2.VideoCapture(input_validated)
    cap.open(input_validated)
    prob_threshold = args.prob_threshold
    w = int(cap.get(3))
    h = int(cap.get(4))
    temp = 0
    tk = 0
    in_shape = network_shape['image_tensor']

    #iniatilize variables

    duration_prev = 0
    counter_total = 0
    dur = 0
    request_id = 0

    report = 0
    counter = 0
    counter_prev = 0
    initial_w = cap.get(3)
    initial_h = cap.get(4)

    ### TODO: Loop until stream is over ###
    while cap.isOpened():
        ### TODO: Read from the video capture ###
        flag, frame = cap.read()
        if not flag:
            break

        ### TODO: Pre-process the image as needed ###
        image = cv2.resize(frame, (in_shape[3], in_shape[2]))
        image_p = image.transpose((2, 0, 1))
        image_p = image_p.reshape(1, *image_p.shape)

        ### TODO: Start asynchronous inference for specified request ###
        net_input = {'image_tensor': image_p, 'image_info': image_p.shape[1:]}
        duration_report = None
        inf_start = time.time()
        infer_network.exec_net(net_input, request_id)

        color = (255, 0, 0)
        ### TODO: Wait for the result ###
        if infer_network.wait() == 0:
            det_time = time.time() - inf_start
            ### TODO: Get the results of the inference request ###
            net_output = infer_network.get_output()

            # Draw Bounting Box
            frame, current_count, d, tk = draw_outputs(net_output, frame,
                                                       initial_w, initial_h,
                                                       temp, tk)

            # Printing Inference Time
            inf_time_message = "Inference time: {:.3f}ms".format(det_time *
                                                                 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1)

            # Calculate and send relevant information
            if current_count > last_count:  # New entry
                start_time = time.time()
                total_count = total_count + current_count - last_count
                m_client.publish("person", json.dumps({"total": total_count}))

            if current_count < last_count:  # Average Time
                duration = int(time.time() - start_time)
                m_client.publish("person/duration",
                                 json.dumps({"duration": duration}))

            # Adding overlays to the frame
            txt2 = "Distance: %d" % d + " Lost frame: %d" % tk
            cv2.putText(frame, txt2, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5,
                        color, 1)

            txt2 = "Current count: %d " % current_count
            cv2.putText(frame, txt2, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5,
                        color, 1)

            if current_count > 3:
                txt2 = "Alert! Maximum count reached"
                (text_width,
                 text_height) = cv2.getTextSize(txt2,
                                                cv2.FONT_HERSHEY_COMPLEX,
                                                0.5,
                                                thickness=1)[0]
                text_offset_x = 10
                text_offset_y = frame.shape[0] - 10
                # make the coords of the box with a small padding of two pixels
                box_coords = ((text_offset_x, text_offset_y + 2),
                              (text_offset_x + text_width,
                               text_offset_y - text_height - 2))
                cv2.rectangle(frame, box_coords[0], box_coords[1], (0, 0, 0),
                              cv2.FILLED)
                cv2.putText(frame, txt2, (text_offset_x, text_offset_y),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1)

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            m_client.publish("person",
                             json.dumps({"count":
                                         current_count}))  # People Count
            last_count = current_count
            temp = d
        ### TODO: Send the frame to the FFMPEG server ###
        #  Resize the frame
        frame = cv2.resize(frame, (768, 432))
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

    cap.release()
    cv2.destroyAllWindows()
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ###
    infer_network = Network()
    infer_network_vals = infer_network.load_model(
        model=args.model, device=args.device, cpu_extension=args.cpu_extension)
    log.debug(infer_network_vals)
    input_shape = infer_network.get_input_shape()

    ### TODO: Handle the input stream ###
    if args.input == 'CAM':
        input_stream = 0
        single_image = False
    elif args.input[-4:] in [".jpg", ".bmp"]:
        single_image = True
        input_stream = args.input
    else:
        single_image = False
        input_stream = args.input
        assert os.path.isfile(input_stream)

    capture = cv2.VideoCapture(input_stream)
    capture.open(input_stream)
    if not capture.isOpened():
        log.error("Unable to open video source")

    logger.debug("W+H: " + str(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + "-" +
                 str(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    t0 = 0
    infer_time = 0
    t1 = 0
    process_time = 0
    request_id = 0
    total_count = 0
    previous_count = 0
    num_persons_in = 0
    current_count = 0
    stay_time = 0
    max_stay_time = 0
    mean_stay_time = 0

    track_threshold = 0.1
    max_len = 30

    # this list is to transform values in an excel file
    data_list = []

    # queue to accumulate last "max_len" number of detections
    track = deque(maxlen=max_len)

    ### TODO: Loop until stream is over ###
    while capture.isOpened():
        data_element = {}
        ### TODO: Read from the video capture ###
        flag, frame = capture.read()
        if not flag:
            break

        ### TODO: Pre-process the image as needed ###
        logger.debug("size: ".format(input_shape))
        resized_frame = cv2.resize(frame, (input_shape[3], input_shape[2]))
        transposed_resized_frame = resized_frame.transpose((2, 0, 1))
        resh_transposed_resized_frame = transposed_resized_frame.reshape(
            input_shape)

        ### TODO: Start asynchronous inference for specified request ###
        t0 = time.time()
        infer_network.exec_net(request_id, resh_transposed_resized_frame)
        ### TODO: Wait for the result ###
        if infer_network.wait(request_id) == 0:
            ### TODO: Get the results of the inference request ###
            result = infer_network.get_output(request_id, frame.shape,
                                              prob_threshold)
            t1 = time.time()
            infer_time = t1 - t0
            ### TODO: Extract any desired stats from the results ###
            current_count, bb_frame = count_persons(result, frame)
            process_time = time.time() - t1
            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###

            # append number of detections to "track" queue
            track.append(current_count)
            # proportion of frames with a positive detection
            num_tracked = 0
            if np.sum(track) / max_len > track_threshold:
                num_tracked = 1

            if num_tracked > previous_count:
                logger.debug("INTO IF ------------------------------------")
                start_time = time.time()
                num_persons_in = num_tracked - previous_count
                total_count += num_persons_in
                previous_count = num_tracked
                client.publish("person",
                               json.dumps({"total": total_count}),
                               retain=True)
                # client.publish("person", json.dumps({"count":num_tracked}), retain=True)

            ### Topic "person/duration": key of "duration" ###
            if num_tracked < previous_count:
                previous_count = num_tracked
                # client.publish("person", json.dumps({"count":num_tracked}), retain=True)

            if num_tracked > 0:
                stay_time += (time.time() - start_time) / 10
                logger.debug("Duration: {}".format(stay_time))

            if total_count > 0:
                mean_stay_time = stay_time / total_count
                client.publish("person/duration",
                               json.dumps({"duration": int(mean_stay_time)}))

            client.publish("person",
                           json.dumps({"count": num_tracked}),
                           retain=True)

        data_element['time'] = time.strftime("%H:%M:%S", time.localtime())
        data_element['current_count'] = current_count
        data_element['num_tracked'] = num_tracked
        data_element['num_persons_in'] = num_persons_in
        data_element['previous_count'] = previous_count
        data_element['total_count'] = total_count
        data_element['stay_time'] = stay_time
        data_element['mean_stay_time'] = mean_stay_time
        data_element['infer_time'] = infer_time
        data_element['process_time'] = process_time
        data_element['result'] = result

        data_list.append(data_element)

        logger.debug(
            "NUM TRACKED: {} - {} - PREVIOUS COUNT: {} - TOTAL COUNT: {} - STAY TIME: {}"
            .format(num_tracked, np.sum(track), previous_count, total_count,
                    mean_stay_time))
        key_pressed = cv2.waitKey(60)
        if key_pressed == 27:
            write_file(data_list)
            capture.release()
            cv2.destroyAllWindows()
            client.disconnect()
            break

        ### TODO: Send the frame to the FFMPEG server ###
        logger.debug("Image_size: {}".format(bb_frame.shape))

        sys.stdout.buffer.write(bb_frame)
        sys.stdout.flush()

        ### TODO: Write an output image if `single_image_mode` ###
        if single_image:
            cv2.imwrite("output.jpg", bb_frame)

    write_file(data_list)
    capture.release()
    cv2.destroyAllWindows()
    client.disconnect()
Ejemplo n.º 25
0
def infer_on_video(args):
    ### TODO: Initialize the Inference Engine
    ntw = Network()

    ### TODO: Load the network model into the IE
    ntw.load_model(args.m, args.d, CPU_EXTENSION)
    iptShape = ntw.get_input_shape()

    # Get and open video capture
    cap = cv2.VideoCapture(args.i)
    cap.open(args.i)

    # Grab the shape of the input
    width = int(cap.get(3))
    height = int(cap.get(4))

    # Create a video writer for the output video
    # The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')`
    # on Mac, and `0x00000021` on Linux
    out = cv2.VideoWriter('out.mp4', 0x00000021, 30, (width, height))

    # Process frames until the video ends, or process is exited
    #reset frame count
    frameCnt = 0
    col = convert_color(args.col)
    conf = float(args.conf)
    frames = {}

    while cap.isOpened():
        # Read the next frame
        flag, frame = cap.read()

        if not flag:
            #wait for all inference to complete
            for i in range(frameCnt):
                rqs = ntw.exec_network.requests[i]
                status = rqs.wait()

                ### TODO: Get the output of inference
                #if inference was successful, draw boxes
                #if not successful, do nothing, no box drawn, and just output original frame
                f = frames[i]

                if status == 0:
                    boxes = rqs.outputs['detection_out'][0][0]

                    ### TODO: Update the frame to include detected bounding boxes
                    f = draw_boxes(f, boxes, width, height, conf, col)

                # Write out the frame
                out.write(f)

            # Release the out writer and destroy any OpenCV windows
            out.release()
            cv2.destroyAllWindows()

            print('total frames: {}'.format(frameCnt))

            break

        key_pressed = cv2.waitKey(60)

        ### TODO: Pre-process the frame
        frames[frameCnt] = frame

        ppImg = preProc(frame, iptShape[3], iptShape[2])

        ### TODO: Perform inference on the frame
        rqs = ntw.async_inference(frameCnt, ppImg)

        #there is no point in performing an async request,
        #if we wait for the inference to complete before processing the next frame
        #intead, moved all waiting to the end of the while loop
        frameCnt += 1

        # Break if escape key pressed
        if key_pressed == 27:
            break

    # Release the capture
    cap.release()
Ejemplo n.º 26
0
def main():
    model_xml = (os.environ["MODEL"])
    input_source = (os.environ["INPUT"])
    device = os.environ['DEVICE'] if 'DEVICE' in os.environ.keys() else 'CPU'
    cpu_extension = os.environ[
        'CPU_EXTENSION'] if 'CPU_EXTENSION' in os.environ.keys() else None
    try:
        # Probability threshold for detections filtering
        prob_threshold = float(os.environ['PROB_THRESHOLD'])
    except KeyError:
        prob_threshold = 0.5
    try:
        # Specify the azure storage name to upload results to cloud.
        account_name = os.environ['ACCOUNT_NAME']
    except:
        account_name = None
    try:
        # Specify the azure storage key to upload results to cloud.
        account_key = os.environ['ACCOUNT_KEY']
    except:
        account_key = None

    if account_name is "" or account_key is "":
        print("Invalid account name or account key!")
        sys.exit(1)
    elif account_name is not None and account_key is None:
        print("Please provide account key using -ak option!")
        sys.exit(1)
    elif account_name is None and account_key is not None:
        print("Please provide account name using -an option!")
        sys.exit(1)
    elif account_name is None and account_key is None:
        upload_azure = 0
    else:
        print("Uploading the results to Azure storage \"" + account_name +
              "\"")
        upload_azure = 1
        create_cloud_container(account_name, account_key)

    if input_source == 'cam':
        input_stream = 0
    else:
        input_stream = input_source
        assert os.path.isfile(
            input_source), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)
    if cap is None or not cap.isOpened():
        print('Warning: unable to open video source: ', input_source)
        sys.exit(1)

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(model_xml, device, 1, 1, 0,
                                          cpu_extension)[1]

    print("To stop the execution press Esc button")
    initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = 1
    accumulated_image = np.zeros((initial_h, initial_w), np.uint8)
    mog = cv2.createBackgroundSubtractorMOG2()
    ret, frame = cap.read()
    while cap.isOpened():
        ret, next_frame = cap.read()
        if not ret:
            break
        frame_count = frame_count + 1
        in_frame = cv2.resize(next_frame, (w, h))
        # Change data layout from HWC to CHW
        in_frame = in_frame.transpose((2, 0, 1))
        in_frame = in_frame.reshape((n, c, h, w))

        # Start asynchronous inference for specified request.
        inf_start = time.time()
        infer_network.exec_net(0, in_frame)
        # Wait for the result
        infer_network.wait(0)
        det_time = time.time() - inf_start
        people_count = 0

        # Converting to Grayscale
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Remove the background
        fgbgmask = mog.apply(gray)

        # Thresholding the image
        thresh = 2
        max_value = 2
        threshold_image = cv2.threshold(fgbgmask, thresh, max_value,
                                        cv2.THRESH_BINARY)[1]
        # Adding to the accumulated image
        accumulated_image = cv2.add(threshold_image, accumulated_image)
        colormap_image = cv2.applyColorMap(accumulated_image, cv2.COLORMAP_HOT)

        # Results of the output layer of the network
        res = infer_network.get_output(0)
        for obj in res[0][0]:
            # Draw only objects when probability more than specified threshold
            if obj[2] > prob_threshold:
                xmin = int(obj[3] * initial_w)
                ymin = int(obj[4] * initial_h)
                xmax = int(obj[5] * initial_w)
                ymax = int(obj[6] * initial_h)
                class_id = int(obj[1])
                # Draw bounding box
                color = (min(class_id * 12.5,
                             255), min(class_id * 7,
                                       255), min(class_id * 5, 255))
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
                people_count = people_count + 1

        people_count_message = "People Count : " + str(people_count)
        inf_time_message = "Inference time: {:.3f} ms".format(det_time * 1000)
        cv2.putText(frame, inf_time_message, (15, 25),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
        cv2.putText(frame, people_count_message, (15, 65),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
        final_result_overlay = cv2.addWeighted(frame, P_COUNT_FRAME_WEIGHTAGE,
                                               colormap_image,
                                               COLORMAP_FRAME_WEIGHTAGE_1, 0)
        cv2.imshow("Detection Results", final_result_overlay)

        time_interval = MULTIPLICATION_FACTOR * fps
        if frame_count % time_interval == 0:
            apply_time_stamp_and_save(final_result_overlay, people_count,
                                      upload_azure)

        frame = next_frame

        key = cv2.waitKey(1)
        if key == 27:
            break
    cap.release()
    cv2.destroyAllWindows()
    infer_network.clean()
Ejemplo n.º 27
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    """

    # Read command line arguments

    model = args.model  # path to the model IR
    batch_size = args.batch  # set the batch size
    device = args.device  # device name to perform inference on
    cpu_ext = args.cpu_extension  # CPU extension
    concurrency = args.concurrency  # number of concurrent infer requests
    volatility = args.volatility  # volatility threshold
    prob_threshold = args.prob_threshold  # threshold for detections
    duration_alarm_threshold = args.duration_alarm  # longest stay allowed
    crowd_alarm_threshold = args.crowd_alarm  # max people allowed

    ### Load the model through `infer_network` ###

    infer_network = Network()
    infer_network.load_model(model, batch_size, concurrency, device, cpu_ext)
    net_input_shape = infer_network.get_input_shape()

    ### Handle the input stream ###

    if args.input is None or args.input.lower() == 'cam':
        input = 0
    else:
        input = args.input

    # VideoCapture supports images too
    cap = cv2.VideoCapture(input)
    assert cap.isOpened(), "Failed to open the input"

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    total_people_count = 0
    last_stable_people_count = 0
    mismatch_count = 0  # deviation from the last stable detection
    total_duration = 0  # total duration in frames
    current_duration = 0  # current person's stay duration in frames

    frames = []  # frames to batch
    q = deque()  # infer request queue

    ### Loop until stream is over ###
    while cap.isOpened():

        ### Read from the video capture ###

        captured, next_frame = cap.read()

        if captured:
            frames.append(next_frame)

        ### Pre-process the image as needed ###

        if len(frames) >= batch_size or not captured and frames:
            h = net_input_shape[2]
            w = net_input_shape[3]
            resized_frames = [
                cv2.resize(f, (w, h)).transpose(2, 0, 1)[None, ...]
                for f in frames
            ]
            frame_batch = np.concatenate(resized_frames, axis=0)
            request = infer_network.exec_net(frame_batch)
            q.append((request, frames))
            frames = []

        ### Start asynchronous inference for specified request ###

        # If the number of concurrent requests hit the limit,
        # we have to wait. Also if the end of the stream has
        # been reached, process what we have in the queue.
        if len(q) >= concurrency or not captured:
            if not q:
                break

            prev_request, prev_frames = q.popleft()

            ### Wait for the result ###
            ### Get the results of the inference request ###

            detected, boxes = infer_network.get_output(
                request=prev_request, class_id=1, confidence=prob_threshold)

            for i, prev_frame in enumerate(prev_frames):
                ### Extract stats from the results ###

                cur_people_count = int(detected[i])
                if last_stable_people_count != cur_people_count:
                    mismatch_count += 1
                else:
                    mismatch_count = 0

                # Check if we have a new stable value
                if mismatch_count > volatility or frame_count <= 1:

                    ### Calculate and send relevant information ###
                    ### on current_count, total_count and duration ###
                    ### to the MQTT server ###

                    last_stable_people_count = cur_people_count
                    total_people_count += last_stable_people_count
                    mismatch_count = 0

                    if last_stable_people_count > 0:  # person entered
                        current_duration = 1
                        total_duration += 1
                    else:  # person left
                        current_duration = 0

                        # Send average duration to the server
                        # (average duration is calculated in
                        # terms of the original video and doesn't
                        # depend on inference time or network delays)
                        ### Topic "person/duration": key of "duration" ###

                        avg_duration_payload = json.dumps({
                            'duration':
                            total_duration / total_people_count / fps
                        })

                        client.publish(topic='person/duration',
                                       payload=avg_duration_payload)

                    # Send new people count to the server
                    ### Topic "person": keys of "count" and "total" ###

                    people_count_payload = json.dumps({
                        'count':
                        last_stable_people_count,
                        'total':
                        total_people_count
                    })

                    client.publish(topic='person',
                                   payload=people_count_payload)

                else:  # Last stable count remains the same
                    if last_stable_people_count > 0:
                        current_duration += 1
                        total_duration += 1

                # Prepare the output frame

                if detected[i]:
                    box = boxes[i]
                    x_min = int(box[0] * prev_frame.shape[1])
                    y_min = int(box[1] * prev_frame.shape[0])
                    x_max = int(box[2] * prev_frame.shape[1])
                    y_max = int(box[3] * prev_frame.shape[0])
                    output_frame = cv2.rectangle(prev_frame, (x_min, y_min),
                                                 (x_max, y_max), (0, 255, 0))

                else:  # nothing detected or error
                    output_frame = prev_frame

                # Alarms

                if duration_alarm_threshold >= 0 \
                    and current_duration/fps > duration_alarm_threshold:
                    cv2.putText(output_frame,
                                text="Chop-chop! "
                                "Don't stay for too long. "
                                "Life is short!",
                                org=(20, output_frame.shape[0] - 60),
                                fontFace=cv2.FONT_HERSHEY_DUPLEX,
                                fontScale=0.9,
                                color=(100, 250, 250),
                                thickness=2)

                if crowd_alarm_threshold >= 0 \
                    and total_people_count > crowd_alarm_threshold:
                    cv2.putText(output_frame,
                                text="Too many people. "
                                "Beware COVID-19!",
                                org=(20, output_frame.shape[0] - 20),
                                fontFace=cv2.FONT_HERSHEY_TRIPLEX,
                                fontScale=1,
                                color=(0, 0, 255),
                                thickness=2)

                ### Send the frame to the FFMPEG server ###
                ### Write an output image if `single_image_mode` ###
                sys.stdout.buffer.write(output_frame)
                sys.stdout.buffer.flush()

    cap.release()
    cv2.destroyAllWindows()
def main():
    """
    Load the network and parse the output.
    :return: None
    """
    global INFO
    global DELAY
    global POSE_CHECKED

    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = args_parser().parse_args()
    logger = log.getLogger()
    job_id = os.environ['PBS_JOBID']
    job_id = job_id.rstrip().split('.')[0]

    #if args.input == 'cam':
    # input_stream = 0
    #else:
    input_stream = args.input
    assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)
    initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    shopper = cv2.VideoWriter(
        os.path.join(args.output_dir, job_id, "shopper.mp4"),
        cv2.VideoWriter_fourcc(*"AVC1"), fps, (initial_w, initial_h), True)
    frame_count = 0
    progress_file_path = os.path.join(args.output_dir, job_id,
                                      'i_progress.txt')
    infer_time_start = time.time()

    if input_stream:
        cap.open(args.input)
        # Adjust DELAY to match the number of FPS of the video file
        DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS)

    if not cap.isOpened():
        logger.error("ERROR! Unable to open video source")
        return

    # Initialise the class
    infer_network = Network()
    infer_network_pose = Network()
    # Load the network to IE plugin to get shape of input layer

    plugin, (n_fd, c_fd, h_fd,
             w_fd) = infer_network.load_model(args.model, args.device, 1, 1, 0,
                                              args.cpu_extension)
    n_hp, c_hp, h_hp, w_hp = infer_network_pose.load_model(
        args.posemodel, args.device, 1, 3, 0, args.cpu_extension, plugin)[1]

    ret, frame = cap.read()

    while ret:
        looking = 0
        ret, next_frame = cap.read()
        frame_count += 1
        if not ret:
            print("checkpoint *BREAKING")
            break

        if next_frame is None:
            log.error("checkpoint ERROR! blank FRAME grabbed")
            break

        initial_wh = [cap.get(3), cap.get(4)]
        in_frame_fd = cv2.resize(next_frame, (w_fd, h_fd))
        # Change data layout from HWC to CHW
        in_frame_fd = in_frame_fd.transpose((2, 0, 1))
        in_frame_fd = in_frame_fd.reshape((n_fd, c_fd, h_fd, w_fd))

        # Start asynchronous inference for specified request
        inf_start_fd = time.time()
        infer_network.exec_net(0, in_frame_fd)
        # Wait for the result
        infer_network.wait(0)
        det_time_fd = time.time() - inf_start_fd

        # Results of the output layer of the network
        res = infer_network.get_output(0)

        # Parse face detection output
        faces = face_detection(res, args, initial_wh)

        if len(faces) != 0:
            # Look for poses
            for res_hp in faces:
                xmin, ymin, xmax, ymax = res_hp
                head_pose = frame[ymin:ymax, xmin:xmax]
                in_frame_hp = cv2.resize(head_pose, (w_hp, h_hp))
                in_frame_hp = in_frame_hp.transpose((2, 0, 1))
                in_frame_hp = in_frame_hp.reshape((n_hp, c_hp, h_hp, w_hp))

                inf_start_hp = time.time()
                infer_network_pose.exec_net(0, in_frame_hp)
                infer_network_pose.wait(0)
                det_time_hp = time.time() - inf_start_hp

                # Parse head pose detection results
                angle_p_fc = infer_network_pose.get_output(0, "angle_p_fc")
                angle_y_fc = infer_network_pose.get_output(0, "angle_y_fc")
                if ((angle_y_fc > -22.5) & (angle_y_fc < 22.5) &
                    (angle_p_fc > -22.5) & (angle_p_fc < 22.5)):
                    looking += 1
                    POSE_CHECKED = True
                    INFO = INFO._replace(looker=looking)
                else:
                    INFO = INFO._replace(looker=looking)
        else:
            INFO = INFO._replace(looker=0)

        # Draw performance stats
        inf_time_message = "Face Inference time: {:.3f} ms.".format(
            det_time_fd * 1000)

        if POSE_CHECKED:
            cv2.putText(
                frame, "Head pose Inference time: {:.3f} ms.".format(
                    det_time_hp * 1000), (0, 35), cv2.FONT_HERSHEY_SIMPLEX,
                0.5, (255, 255, 255), 1)
        cv2.putText(frame, inf_time_message, (0, 15), cv2.FONT_HERSHEY_COMPLEX,
                    0.5, (255, 255, 255), 1)
        cv2.putText(frame, "Shopper: {}".format(INFO.shopper), (0, 90),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        cv2.putText(frame, "Looker: {}".format(INFO.looker), (0, 110),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

        shopper.write(frame)
        if (frame_count % 10 == 0) or ((frame_count + 1) / video_len == 1):
            print("Frame Count: ", frame_count, "video length", video_len)
            progressUpdate(progress_file_path,
                           int(time.time() - infer_time_start),
                           frame_count + 1, video_len)
        frame = next_frame
    if args.output_dir:
        total_time = round(time.time() - infer_time_start, 2)
        stats = {}
        stats['time'] = str(total_time)
        stats['frames'] = str(video_len)
        stats['fps'] = str(round(video_len / total_time, 2))
        with open(os.path.join(args.output_dir, job_id, 'stats.json'),
                  'w') as json_file:
            json.dump(stats, json_file)

    infer_network.clean()
    infer_network_pose.clean()
    cap.release()
Ejemplo n.º 29
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """

    # Initialise people counters
    person_last_count = 0
    person_total_count = 0
    person_start_time = 0

    # only 1 async inference request at a time
    current_request_id = 0

    client.publish("person", json.dumps({"total": person_total_count}))
    client.publish("person", json.dumps({"count": 0}))

    # Handle Different Input Streams
    single_image_mode = False

    if args.input == 'CAM':
        input_stream = 0
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    # Initialise the Network class
    infer_network = Network()

    #     if args.cpu_extension and "CPU" in args.device:
    #         infer_network.add_extension(args.cpu_extension, "CPU")
    #         log.info("CPU extension loaded: {}".format(args.cpu_extension))

    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(args.model, args.device, args.cpu_extension)

    # get width and height of image the model process
    net_input_shape = infer_network.get_input_shape()

    ### TODO: Handle the input stream ###
    cap = cv2.VideoCapture(args.input)

    cap.open(args.input)

    source_width = int(cap.get(3))
    source_height = int(cap.get(4))

    #out = cv2.VideoWriter('out.mp4', 0x00000021, 30, (source_width, source_height))

    ### TODO: Loop until stream is over ###
    while cap.isOpened():

        ### TODO: Read from the video capture ###
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(44)

        ### TODO: Pre-process the image as needed ###
        p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
        # HWC => CHW
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)  #(n,c,h,w)

        ### TODO: Start asynchronous inference for specified request ###
        inference_start = time.time()
        infer_network.exec_net(p_frame, current_request_id)

        ### TODO: Wait for the result ###
        if infer_network.wait(current_request_id) == 0:
            inference_stop = time.time() - inference_start

            ### TODO: Get the results of the inference request ###
            results = infer_network.get_output(current_request_id)

            if args.perf_counts:
                perf_count = infer_network.performance_counter(
                    current_request_id)
                log.basicConfig(stream=sys.stdout, level=log.DEBUG)
                log_performance_counts(perf_count)

            ### TODO: Extract any desired stats from the results ###
            out_frame, person_count = frame_and_count(p_frame, results,
                                                      net_input_shape[3],
                                                      net_input_shape[2],
                                                      args.prob_threshold)

            # to avoid counting persons repeatedly for low confidence detectors or
            # detectors where no instance detection is available
            # here we could use from skimage.metrics import structural_similarity as ssim
            # in order to discard counting from very similar frames.

            # out_frame shape (1,c,h,w)

            inference_time_txt = "Inference time {:.3f}ms".format(
                inference_stop * 1000)
            cv2.putText(out_frame, inference_time_txt, (15, 15),
                        cv2.FONT_HERSHEY_PLAIN, 0.5, (200, 10, 10), 1)

            out_frame = out_frame[0]
            out_frame = out_frame.transpose((1, 2, 0))
            out_frame = cv2.resize(out_frame, (source_width, source_height))

            ### TODO: Calculate and send relevant information on ###
            ### Perform analysis on the output to determine the number of people in ###
            ### frame, time spent in frame, and the total number of people counted ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            # Publish messages to the MQTT server

            if person_count > person_last_count:
                person_start_time = time.time()
                person_total_count = person_total_count + person_count - person_last_count
                client.publish("person",
                               json.dumps({"total": person_total_count}))

            elif person_count < person_last_count:
                person_duration = int(time.time() - person_start_time)
                client.publish("person/duration",
                               json.dumps({"duration": person_duration}))

            client.publish("person", json.dumps({"count": person_count}))
            person_last_count = person_count

            if args.max_person_count and person_count > args.max_person_count:
                txt = "Max person count alert!"
                (txt_width,
                 txt_height) = cv2.getTextSize(txt,
                                               cv2.FONT_HERSHEY_PLAIN,
                                               0.5,
                                               thickness=1)[0]
                txt_offset_x = 10
                txt_offset_y = out_frame.shape[0] - 10
                box_coords = ((txt_offset_x, txt_offset_y + 2),
                              (txt_offset_x + txt_width,
                               xt_offset_y - text_height - 2))
                cv2.rectangle(out_frame, box_coords[0], box_coords[1],
                              (0, 0, 0), cv2.FILLED)

                cv2.putText(out_frame, txt2, (text_offset_x, text_offset_y),
                            cv2.FONT_HERSHEY_PLAIN, 0.5, (0, 0, 255), 1)

        ### TODO: Write an output image if `single_image_mode` ###
            if single_image_mode:
                cv2.imwrite('out_image.jpg', out_frame)

        ### TODO: Send the frame to the FFMPEG server ###
            sys.stdout.buffer.write(out_frame)
            sys.stdout.flush()

            if key_pressed == 27:
                break

    # Release the out writer, capture, and destroy any OpenCV windows
    cap.release()
    cv2.destroyAllWindows()
Ejemplo n.º 30
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """

    present_count = 0
    preceding_count = 0
    total_count = 0
    start_time = 0
    duration = 0
    frame_count = 0
    wait_time = 57
    single_image_mode = False

    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    args.prob_threshold = float(args.prob_threshold)

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(args.model, args.device, args.cpu_extension)
    rfcnn_input_shape = infer_network.get_input_shape()
    print(rfcnn_input_shape)
    # width and height input to the model
    dsize = (rfcnn_input_shape[3], rfcnn_input_shape[2])

    # single image mode

    single_image_format = ['jpg', 'tif', 'png', 'jpeg', 'bmp']
    if args.input.split(".")[-1].lower() in single_image_format:
        single_image_mode = True
        frame = cv2.imread(args.input)
        height, width, channel = frame.shape
        p_frame = preprocess_frame(frame, dsize)
        infer_network.exec_net(p_frame)

        if infer_network.wait() == 0:
            ### TODO: Get the results of the inference request ###
            infer_result = infer_network.get_output()

            ### TODO: Extract any desired stats from the results ###

            single_frame, present_count = extract_stats(
                frame, infer_result, args, width, height)
            ### TODO: Write an output image if `single_image_mode` ###
            cv2.imwrite("image.jpg", single_frame)

    ### TODO: Handle the input stream ###

    input_stream = cv2.VideoCapture(args.input)
    input_stream.open(args.input)

    width = int(input_stream.get(3))
    height = int(input_stream.get(4))

    # Create a video output to see your result
    #out = cv2.VideoWriter('out.mp4',0x00000021,30,(width,height))

    ### TODO: Loop until stream is over ###
    while input_stream.isOpened() and not single_image_mode:
        ### TODO: Read from the video capture ###
        flag, frame = input_stream.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        ### TODO: Pre-process the image as needed ###
        p_frame = preprocess_frame(frame, dsize)
        ### TODO: Start asynchronous inference for specified request ###

        infer_network.exec_net(p_frame)

        ### TODO: Wait for the result ###
        if infer_network.wait() == 0:

            ### TODO: Get the results of the inference request ###
            infer_result = infer_network.get_output()

            ### TODO: Extract any desired stats from the results ###

            out_frame, present_count = extract_stats(frame, infer_result, args,
                                                     width, height)

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###

            # when a person is in the video
            if present_count > preceding_count:
                start_time = time.time()
                total_count += present_count - preceding_count
                frame_count = 0

                payload_total_count = {"total": total_count}
                client.publish("person", json.dumps(payload_total_count))

            # when there is one less person
            if present_count < preceding_count and frame_count < wait_time:
                present_count = preceding_count
                frame_count += 1

            # when there is one less person for up to 30 frames
            if present_count < preceding_count and frame_count == wait_time:
                duration = int(time.time() - start_time)

                payload_duration = {"duration": duration}
                client.publish("person/duration", json.dumps(payload_duration))

            preceding_count = present_count

            payload_present_count = {"count": present_count}
            client.publish("person", json.dumps(payload_present_count))

            ### TODO: Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(out_frame)
        sys.stdout.flush()
        if key_pressed == 27:
            break

    # -- release the out writer, capture and destroy any opencv windows
    input_stream.release()
    cv2.destroyAllWindows()
    client.disconnect()