def infer_on_stream(args, client, stats): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() buffer = Buffer() # Set Probability threshold for detections prob_threshold = args.prob_threshold ### Load the model through `infer_network` ### infer_network.load_model(args.model, args.device, args.cpu_extension) net_input_shape = infer_network.get_input_shape() ##net_input_shape = [1, 3, 600, 600] net_output_name = infer_network.get_output_name() net_input_name = infer_network.get_input_blob_name() net_input_shape = infer_network.get_input_shape() net_output_info = infer_network.get_output_info() log.info("network output name") log.info(net_output_name) log.info("network output info") log.info(net_output_info.shape) log.info("network input shape") log.info(net_input_name) log.info(net_input_shape) ### Handle the input stream ### iflag = False input_stream_arg = 0 if args.input == "cam" else args.input if input_stream_arg.endswith('.jpg') or input_stream_arg.endswith('.bmp'): iflag = True width = 0 height = 0 frame = None cap = None captureOpen = False ## Handle image or stream or CAM if iflag: frame = cv2.imread(input_stream_arg) log.info("single frame shape: %s", frame.shape) width = frame.shape[1] height = frame.shape[0] else: log.info("attempting VideoCapture for: %s", input_stream_arg) cap = cv2.VideoCapture(input_stream_arg) cap.open(args.input) captureOpen = True width = int(cap.get(3)) height = int(cap.get(4)) log.info("input image width: %s, height: %s", width, height) #steam input shape: input_width = 0 input_height = 0 total_person_count = 0 duration = 0 cur_request_id = 0 next_request_id = 1 render_time = 0 parsing_time = 0 waitingOnInference = False ### Loop until stream is over ### while (captureOpen or iflag or waitingOnInference): ### Read from the video capture ### flag = True key_pressed = None if not iflag: flag, frame = cap.read() if not cap.isOpened(): captureOpen = False key_pressed = cv2.waitKey(60) if not flag: break ### Pre-process the image as needed ### input_width = net_input_shape[2] input_height = net_input_shape[3] p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) ### Start asynchronous inference for specified request ### start_time = time() infer_network.exec_net(p_frame) waitingOnInference = True render_time = 0 inf_time = 0 ### Wait for the result ### if infer_network.wait() == 0: ### Get the results of the inference request ### result = infer_network.get_output() inf_time = time() - start_time ###restart clock to capture evaluate/draw time start_time = time() boxes = post_process(result, width, height, PERSON_CLASS) ##if len(boxes) > 1: ##log.info("initial boxes: %s", boxes) boxes = list(boxes.values()) boxes = nms(boxes) buffer_avg = 0 if (iflag): boxes = filter_confidence(boxes, args.prob_threshold) if len(boxes) > 0: ##we have a person in frame (maybe) first_prop = boxes[0] confidence = first_prop[4] buffer.add(confidence) buffer_avg = buffer.average() if confidence > args.prob_threshold: if duration > 0: ##this is not the first time they have been in the frame ##increase duration and move along duration = duration + 1 else: ##very first time this person has entered the frame ##pulse out new count total_person_count = total_person_count + 1 duration = duration + 1 client.publish( "person", json.dumps({ "count": 1, "total": total_person_count })) draw_box(frame, boxes, inf_time) else: ##we have a person in frame, but they don't meet confidence threshold if duration > 0: ##we know we were tracking someone last frame ##so check our rolling buffer average if buffer_avg > BUFFER_AVERAGE_CUTOFF: ##same person, keep counting, move along duration = duration + 1 client.publish( "person", json.dumps({ "count": 1, "total": total_person_count })) draw_box(frame, boxes, inf_time) else: ##log.info("NO-DRAW: c:%s, b:%s, d:%s : else:if:else", confidence, buffer_avg, duration) ##no longer meet confidence or buffer avg client.publish( "person", json.dumps({ "count": 0, "total": total_person_count })) client.publish("person/duration", json.dumps({"duration": duration})) duration = 0 buffer.flush() else: ##log.info("NO-DRAW: c:%s, b:%s, d:%s : else:else", confidence, buffer_avg, duration) ##also nobody in the last frame (duration == 0) client.publish( "person", json.dumps({ "count": 0, "total": total_person_count })) else: ##no boxes with our target class was found, make sure we didn't see one in the last frame (or so) buffer.add(0) buffer_avg = buffer.average() if buffer_avg > BUFFER_AVERAGE_CUTOFF: ##we has someone previously, keep counting, move along duration = duration + 1 else: ##nobody previously, nobody now, make sure we say so client.publish( "person", json.dumps({ "count": 0, "total": total_person_count })) if duration > 0: ##we were previously tracking someone, pulse out duration before zeroing out client.publish("person/duration", json.dumps({"duration": duration})) duration = 0 render_time = time() - start_time render_time_message = "OpenCV rendering time: {:.3f} ms".format( render_time * 1e3) cv2.putText(frame, render_time_message, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) stats.append(dict(it=inf_time, rt=render_time)) sys.stdout.buffer.write(frame) sys.stdout.flush() if key_pressed == 27: break if iflag and not waitingOnInference: iflag = False if infer_network.wait() == 0: iflag = False waitingOnInference = False if cap: cap.release() cv2.destroyAllWindows() client.disconnect()