def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ input_type = args.input single_image_mode = False request_id = 0 time_count = 0 pre_time = 0 counter = 0 last_count = 0 current_count = 0 total_count = 0 duration = 0 # Initialise the class infer_network = Network() # Set Probability threshold for detections global prob_threshold prob_threshold = args.prob_threshold ### Load the model through `infer_network` ### n, c, h, w = infer_network.load_model( args.model, request_id, args.device, args.cpu_extension)[1] ### Handle the input stream ### if input_type == 'CAM': input_stream = 0 # Check for input image elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_image_mode = True input_stream = input_type else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" # Get and open video capture cap = cv2.VideoCapture(input_stream) cap.open(input_stream) # Grab the shape of the input global width, height width = int(cap.get(3)) height = int(cap.get(4)) ### Loop until stream is over ### while cap.isOpened(): flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) ### Read from the video capture ### image = cv2.resize(frame, (w, h)) ### Pre-process the image as needed ### image = image.transpose((2, 0, 1)) image = image.reshape((n, c, h, w)) ### Start asynchronous inference for specified request ### inf_start = time.time() infer_network.exec_net(image, request_id) ### Wait for the result ### if infer_network.wait(request_id) == 0: det_time = time.time() - inf_start ### Get the results of the inference request ### result = infer_network.get_output(request_id) inference_time_message = "Inference time: {:.3f}ms".format(det_time*1000) font = cv2.FONT_HERSHEY_SIMPLEX color = (174, 32, 141) cv2.putText(frame, inference_time_message,(20, 20), font, 0.6, color, 1) ### Extract any desired stats from the results ### frame, detected = draw_boxes(frame, result) ### Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### if detected != counter: last_count = counter counter = detected if time_count >= 3: pre_time = time_count time_count = 0 else: time_count = pre_time + time_count pre_time = 0 else: time_count += 1 if time_count >= 10: current_count = counter if time_count == 20 and current_count > last_count: total_count += current_count - last_count client.publish("person", json.dumps({"total_counts": total_count})) elif time_count == 20 and current_count < last_count: duration = int(pre_time) client.publish('person/duration', json.dumps({'duration': duration})) client.publish("person", json.dumps({"count": current_count})) # Break if escape key pressed if key_pressed == 27: break ### Send the frame to the FFMPEG server ### sys.stdout.buffer.write(frame) sys.stdout.flush() ### Write an output image if `single_image_mode` ### if single_image_mode: cv2.imwrite('output_image.jpg', frame) # Release the capture and destroy any OpenCV windows cap.release() cv2.destroyAllWindows() # Disconnect from MQTT client.disconnect()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold ### TODO: Load the model through `infer_network` ### infer_network.load_model(args.model, args.device, args.cpu_extension, \ args.debug) net_input_shape = infer_network.get_input_shape() if args.debug: print("Input shape of the model: " + str(net_input_shape)) ### TODO: Handle the input stream ### cap = cv2.VideoCapture(args.input) cap.open(args.input) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) frames = 0 found = False total = 0 ### TODO: Loop until stream is over ### if args.debug: print("Input size: "+str((height, width))) while cap.isOpened(): ### TODO: Read from the video capture ### flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) ### TODO: Pre-process the image as needed ### if frame_count == -1: frame = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420) p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2,0,1)) p_frame = p_frame.reshape(1, *p_frame.shape) ### TODO: Start asynchronous inference for specified request ### infer_network.exec_net(p_frame) ### TODO: Wait for the result ### if infer_network.wait() == 0: ### TODO: Get the results of the inference request ### result = infer_network.get_output() ### TODO: Extract any desired stats from the results ### frame, count = draw_boxes(frame, result, args, width, height) ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### if not found and count > 0: total = total + count found = True if found and count > 0: frames = frames + 1 if found and count == 0: found = False client.publish("person/duration", json.dumps({"duration": int(frames/fps)})) frames = 0 client.publish("person", json.dumps({"count": count, "total": total})) ### TODO: Send the frame to the FFMPEG server ### if not args.debug and (frame_count > 0 or frame_count == -1): sys.stdout.buffer.write(frame) sys.stdout.flush() ### TODO: Write an output image if `single_image_mode` ### else: cv2.imwrite("output.jpg", frame) print("Image saved to output.jpg") cap.release() cv2.destroyAllWindows() client.disconnect()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class network = Network() # Set Probability threshold for detections if not args.prob_threshold is None: prob_threshold = args.prob_threshold else: prob_threshold = 0.3 ### TODO: Load the model through `infer_network` ### network.load_model(args.model, args.cpu_extension, args.device) pedestrian_input_shape = network.get_input_shape() networkReIdentification = Network() networkReIdentification.load_model(args.model2, args.cpu_extension, args.device) identification_input_shape = networkReIdentification.get_input_shape() # print('Models Loaded Successfully') #checking that input stream is are in supported extensions if not args.input.split('.')[1] in EXTENSIONS: print("The input file is not supported yet") exit(1) ### TODO: Handle the input stream ### cap = cv2.VideoCapture(args.input) fps = FPS().start() ### TODO: Loop until stream is over ### last_detection_time = None start = None total_unique_persons = [] while (cap.isOpened()): ### TODO: Read from the video capture ### isAnyFrameLeft, frame = cap.read() width = int(cap.get(3)) height = int(cap.get(4)) ### TODO: Pre-process the image as needed ### if not isAnyFrameLeft: sys.stdout.flush() break displayFrame = frame.copy() processed_frame = pre_process(frame, net_input_shape=pedestrian_input_shape) ### TODO: Start asynchronous inference for specified request ### inference_start_time = time.time() network.exec_net(processed_frame) ### TODO: Wait for the result ### last_x_min = 0 last_x_max = 0 last_y_max = 0 last_y_min = 0 if network.wait() == 0: inference_end_time = time.time() total_inference_time = inference_end_time - inference_start_time cv2.putText(displayFrame, "Inference time: " + str(round(total_inference_time * 1000, 3)) + "ms", (5, 15), cv2.FONT_HERSHEY_PLAIN, 0.9, (230, 50, 2), lineType=cv2.LINE_8, thickness=1) # print("Inference Time "+ total_inference_time) ### TODO: Get the results of the inference request ### result = network.get_all_output() ### TODO: Extract any desired stats from the results ### output = result['DetectionOutput'] counter = 0 for detection in output[0][0]: image_id, label, conf, x_min, y_min, x_max, y_max = detection if conf > prob_threshold: # print("label " + str(label) + "imageid"+ str(image_id)) x_min = int(x_min * width) x_max = int(x_max * width) y_min = int(y_min * height) y_max = int(y_max * height) try: if conf > 0.85: crop_person = frame[y_min:y_max, x_min:x_max] total_unique_persons = reidentification( networkReIdentification, crop_person, identification_input_shape, total_unique_persons, conf) except Exception as err: # print(err) pass # print(err) x_min_diff = last_x_min - x_min x_max_diff = last_x_max - x_max if x_min_diff > 0 and x_max_diff > 0: # ignore multiple drawn bounding boxes # cv2.waitKey(0) continue y_min_diff = abs(last_y_min) - abs(y_min) counter = counter + 1 last_x_min = x_min last_x_max = x_max last_y_max = y_max last_y_min = y_min cv2.rectangle(displayFrame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) activity = "" # print("Y => " + str(y_min_diff) + " " + str(y_max_diff)) if (y_min_diff >= -20): activity = "standing" elif y_min_diff < -21 and y_min_diff > -41: activity = "moving" else: activity = "walking" cv2.putText(displayFrame, activity, (x_max + 10, y_min + 50), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (230, 50, 2), lineType=cv2.LINE_8, thickness=1) last_detection_time = datetime.now() # print(total_detected) if start is None: start = time.time() time.clock() # cv2.putText(displayFrame, "Totol Unique Persons: "+str(len(total_unique_persons)),(50,150), # cv2.FONT_HERSHEY_COMPLEX, 1, (100, 150, 250), # lineType=cv2.LINE_4, thickness=2) # if start is not None and counter == 0: # elapsed = time.time() - start # client.publish("person/duration", json.dumps({"duration": elapsed})) # start = None if last_detection_time is not None: # if last_detection_time.minute second_diff = (datetime.now() - last_detection_time).total_seconds() # print(second_diff) if second_diff >= 1.5: if start is not None: elapsed = time.time() - start client.publish( "person/duration", json.dumps({"duration": elapsed - second_diff})) # start = None last_detection_time = None start = None ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### client.publish( "person", json.dumps({ "count": str(counter), "total": len(total_unique_persons) })) ### Topic "person/duration": key of "duration" ### sys.stdout.buffer.write(displayFrame) # # imshow("frame", displayFrame) ### TODO: Send the frame to the FFMPEG server ### ### TODO: Write an output image if `single_image_mode` ### if cv2.waitKey(1) & 0xFF == ord('q'): sys.stdout.flush() break
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class #ading requring variable cur_request_id = 0 last_count = 0 total_count = 0 start_time = 0 infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold ### TODO: Load the model through `infer_network` ### #arguments like args.m, .d,.cpu_extension, in bulid_argparsar function infer_network.load_model(args.model, args.device, args.cpu_extension) net_input_shape = infer_network.get_input_shape() ### TODO: Handle the input stream ### ### TODO: Loop until stream is over ### stream_input, mode = capture(args.input) single_image_mode = mode cap = cv2.VideoCapture(stream_input) cap.open(args.input) #width and height is import parameters in pretrained model global width, height width = int(cap.get(3)) height = int(cap.get(4)) ### TODO: Loop until stream is over ### while cap.isOpened(): # Read the next frame #in this captured input stay in frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) ### TODO: Read from the video capture ### ### TODO: Pre-process the image as needed ### p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) ### TODO: Start asynchronous inference for specified request ### # https://github.com/intel-iot-devkit/people-counter-python/blob/master/main.py taken refernce for this section inf_start = time.time() infer_network.exec_net(p_frame) ### TODO: Wait for the result ### if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start # Results of the output layer of the network result = infer_network.get_output(cur_request_id) if args.perf_counts: perf_count = infer_network.performance_counter(cur_request_id) performance_counts(perf_count) ### TODO: Get the results of the inference request ### result, count = draw_boxes(p_frame, result) #Display inference time inf_time_message = "Inference time: {:.3f}ms"\ .format(det_time * 1000) cv2.putText(result, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.45, (200, 10, 10), 1) #time = time.time() ### TODO: Extract any desired stats from the results ### #client.publish("time",json.dumps({"Time":time})) if count > last_count: start_time = time.time() total_count = total_count + count - last_count client.publish("person", json.dumps({"total": total_count})) if count < last_count: duration = int(time.time() - start_time) # Publish messages to the MQTT server client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": count})) last_count = count ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### # output = np.ascontiguousarray(output, dtype=np.float32) ### TODO: Send the frame to the FFMPEG server ### #output = cv2.resize(output,(net_input_shape[3],net_input_shape[2])) # frame = np.dstack((result,result,result)) #frame = np.uint8(result) sys.stdout.buffer.write(result) sys.stdout.flush() #print(output) ### TODO: Write an output image if `single_image_mode` ### if single_image_mode: cv2.imwrite('output_image.jpg', result)
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ single_image_mode = False cur_request_id = 0 last_count = 0 total_count = 0 start_time = 0 # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold infer_network = Network() ### TODO: Load the model through `infer_network` ### prob_threshold = args.prob_threshold infer_network = Network() n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, cur_request_id, args.cpu_extension)[1] if args.input == "CAM": input_stream = 0 elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_image_mode = True input_stream = args.input else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" ### TODO: Handle the input stream ### cap = cv2.VideoCapture(input_stream) ### TODO: Loop until stream is over ### if input_stream: ### TODO: Read from the video capture ### cap.open(args.input) if not cap.isOpened(): log.error("ERR Unable to open the source") global initial_w, initial_h, prob_threshold initial_w = cap.get(3) initial_h = cap.get(4) ### TODO: Pre-process the image as needed ### while cap.isOpened(): flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) image = cv2.resize(frame, (w, h)) image = image.transpose((2, 0, 1)) image = image.reshape((n, c, h, w)) inf_start = time.time() ### TODO: Start asynchronous inference for specified request ### if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start result = infer_network.get_output(cur_request_id) perf_count = infer_network.performance_counter(cur_request_id) ### TODO: Wait for the result ### frame, current_count = ssd_out(frame, result) inf_time_message = "Inference Time: {:.3f}ms"\ .format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) if current_count < last_count: duration = int(time.time() - start_time) client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": current_count})) last_count = current_count sys.stdout.buffer.write(frame) sys.stdout.flush() if single_image_mode: cv2.imwrite('output_image.jpg', frame)
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # check if we provided a TF model or an IR is_tensorflow = os.path.splitext(args.model)[1] == '.pb' # Initialise the class if is_tensorflow: from inference_tf import NetworkTf infer_network = NetworkTf() else: from inference import Network infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold ### Load the model through `infer_network` ### infer_network.load_model(args.model, args.device, args.cpu_extension) if not is_tensorflow: net_input_shape = infer_network.get_input_shape() ### Handle the input ### is_single_image_mode = os.path.splitext(args.input)[1] in ['.jpg', '.png'] if not is_single_image_mode: cap = cv2.VideoCapture(args.input) cap.open(args.input) # Grab the shape and FPS rate of the input width = int(cap.get(3)) height = int(cap.get(4)) fps = cap.get(cv2.CAP_PROP_FPS) # init the total of detected persons total = 0 # init the number of frames nb_frames = 0 # init the total inference time, to be divided by the number of frames at the end total_inference_time = 0 # an array to keep track of previously detected persons previously_detected_persons = [] # the average duration of a single person presence duration = 0 # fixme : for debugging max_percent = [0] ### Loop until stream is over ### while is_single_image_mode or cap.isOpened(): if is_single_image_mode: print("Single image mode. Analyze ", args.input) frame = cv2.imread(args.input) height = frame.shape[0] width = frame.shape[1] else: ### Read from the video capture ### flag, frame = cap.read() nb_frames = nb_frames + 1 if not flag: break key_pressed = cv2.waitKey(60) if is_tensorflow: p_frame = frame else: p_frame = preprocess_image( frame, (net_input_shape[3], net_input_shape[2])) ### Start asynchronous inference for specified request ### inference_start = time.time() infer_network.exec_net(p_frame) ### Wait for the result ### if infer_network.wait() == 0: # record the inference time total_inference_time = total_inference_time + (time.time() - inference_start) ### Get the results of the inference request ### result = infer_network.get_output() ### Create output frame out_frame, detected_persons = create_output_image( frame, result, width, height, (0, 0, 255), float(args.prob_threshold), nb_frames) if not is_single_image_mode: # if there's detected persons in the frame count = 0 if len(detected_persons) > 0: # for each new detection for person in detected_persons: # check if there was a person with a matching bounding box is_new_person = True for index, previous_person in enumerate( previously_detected_persons): # if same person, updating to last coords if is_same_person(person, previous_person, max_percent): # keep the timestamp of the first detection person[4] = previous_person[4] previously_detected_persons[index] = person is_new_person = False break if is_new_person: total = total + 1 publish_last_duration(previously_detected_persons, client, fps) previously_detected_persons.append(person) #print('previously_detected_persons=',previously_detected_persons) #print('max_percent=',max_percent) ### Extract any desired stats from the results ### ### Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### duration = get_avg_duration(previously_detected_persons, fps) #print('count:', len(detected_persons), " total:", len(previously_detected_persons), " person/duration:", duration) client.publish( "person", json.JSONEncoder().encode({ "count": len(detected_persons), "total": len(previously_detected_persons) })) ### Write an output image if is in single_image_mode ### if is_single_image_mode: print("Write output file in 'single_image.png'") cv2.imwrite('single_image.png', out_frame) else: ### Send the frame to the FFMPEG server ### sys.stdout.buffer.write(out_frame) sys.stdout.flush() # Break if single_image_mode or escape key pressed if is_single_image_mode or key_pressed == 27: break # publish duration for the last detected person if not is_single_image_mode: publish_last_duration(previously_detected_persons, client, fps) # Release the capture and destroy any OpenCV windows if not is_single_image_mode: cap.release() cv2.destroyAllWindows()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold if args.input=="CAM": camera = cv2.VideoCapture(0) elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): infer_network.load_model(args.model, 1, args.device, args.cpu_extension) input_shape = infer_network.get_input_shape() img = cv2.imread(args.input, cv2.IMREAD_COLOR) resized_frame = cv2.resize(img, (input_shape[3], input_shape[2])) frame_preproc = np.transpose(np.expand_dims(resized_frame.copy(), axis=0), (0,3,1,2)) infer_network.exec_net(frame_preproc) if infer_network.wait()==0: outputs = infer_network.get_output() box_frame, count, bbox = get_bounding_box(img, outputs, prob_threshold) cv2.putText(box_frame, "Count:"+str(count), (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3) cv2.imwrite('output.jpg', box_frame) return else: if not os.path.isfile(args.input): exit(1) camera = cv2.VideoCapture(args.input) if (camera.isOpened()== False): exit(1) cur_req_id=0 next_req_id=1 num_requests=2 infer_network.load_model(args.model, num_requests, args.device, args.cpu_extension) input_shape = infer_network.get_input_shape() ret, frame = camera.read() total_count=0 pres_count = 0 prev_count=0 start_time=0 no_bbox=0 duration=0 prev_bbox_x = 0 while camera.isOpened(): ret, next_frame = camera.read() if not ret: break key = cv2.waitKey(60) resized_frame = cv2.resize(next_frame.copy(), (input_shape[3], input_shape[2])) frame_preproc = np.transpose(np.expand_dims(resized_frame.copy(), axis=0), (0,3,1,2)) infer_network.exec_net(frame_preproc.copy(), req_id=next_req_id) if infer_network.wait(cur_req_id)==0: outputs = infer_network.get_output(cur_req_id) frame, pres_count, bbox = get_bounding_box(frame.copy(), outputs[0], prob_threshold) box_w = frame.shape[1] topleft, bottomright = bbox if pres_count>prev_count: start_time = time.time() total_count+=pres_count-prev_count no_bbox=0 client.publish("person", json.dumps({"total":total_count})) elif pres_count<prev_count: if no_bbox<=20: pres_count=prev_count no_bbox+=1 elif prev_bbox_x<box_w-200: pres_count=prev_count no_bbox=0 else: duration = int(time.time()-start_time) client.publish("person/duration", json.dumps({"duration":duration})) if not (topleft==None and bottomright==None): prev_bbox_x=int((topleft[0]+bottomright[0])/2) prev_count=pres_count client.publish("person", json.dumps({"count":pres_count})) if key==27: break sys.stdout.buffer.write(frame) sys.stdout.flush() cur_req_id, next_req_id = next_req_id, cur_req_id frame = next_frame #output_video.release() camera.release() cv2.destroyAllWindows() client.disconnect()
def infer_on_stream(args,client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold=args.prob_threshold # Set request id req_id=0 ### TODO: Load the model through `infer_network` ### infer_network.load_model(args.model, args.device, args.cpu_extension) net_input_shape = infer_network.get_input_shape() ### TODO: Handle the input stream ### input=args.input is_one_image=False #check for live camera feed if input.lower()=='cam': input=0 image_formats=[".png",".jpg",".bmp",".jpeg"] for i in range(len(image_formats)): if input==args.input: if input.endswith(image_formats[i]): is_one_image=True break else: input=0 # Get and open video capture capture = cv2.VideoCapture(input) capture.open(input) if not capture.isOpened(): print("ERROR! Unable to open input source") exit(1) # Grab the shape of the input width = int(capture.get(3)) height = int(capture.get(4)) # Set global variables for people counting current_count = 0 time_start = 0 duration = 0 #previous_duration=0 total_count = 0 total_count4text = 0 previous_count = 0 omitted_results = 0 ### TODO: Loop until stream is over ### while capture.isOpened(): ### TODO: Read from the video capture ### flag, frame = capture.read()[:] if not flag: break key_pressed = cv2.waitKey(60) ### TODO: Pre-process the image as needed ### _width=net_input_shape[3] _height=net_input_shape[2] p_frame = cv2.resize(frame, (_width, _height)) p_frame = p_frame.transpose((2,0,1)) p_frame = p_frame.reshape(net_input_shape[0], net_input_shape[1],_height,_width) inference_start=time.time() ### TODO: Start asynchronous inference for specified request ### infer_network.exec_net(req_id,p_frame) ### TODO: Wait for the result ### if infer_network.wait(req_id) == 0: inference_time=(time.time()-inference_start)*1000 inference_time=round(inference_time,2) if is_one_image==False: ### write some info onto frame # Uncomment the following codes to see the stats in video output as well ''' people_in_message = "people in frame : "+str(current_count) cv2.putText(frame, people_in_message, (10, 15),cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 200, 0), 1, cv2.LINE_AA, False) total_count_message = "total people counted : "+str(total_count4text) cv2.putText(frame, total_count_message, (10, 35),cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 200, 150), 1, cv2.LINE_AA, False) ''' frame_message = "omitted results : "+str(omitted_results) cv2.putText(frame, frame_message, (10, 55),cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 200), 1, cv2.LINE_AA, False) inference_time_message = "inference time : "+str(inference_time)+" ms" cv2.putText(frame, inference_time_message, (10, 420),cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA, False) ### TODO: Get the results of the inference request ### result = infer_network.get_output(req_id) ### TODO: Extract any desired stats from the results ### out_frame = draw_bounding_boxes(frame, result, prob_threshold, width, height) ### TODO: Calculate current_count, total_count and duration ### TODO: send relevant information on current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### current_count=0 for r in result[0][0]: confidence = r[2] if confidence > prob_threshold: current_count+=1 ## on state change e.g if new person enter if current_count > previous_count: #store current time for calculating duration time_start=time.time() total_count += current_count - previous_count ## on state change e.g: if person left if current_count < previous_count: #calcute the time a person spent duration = time.time()-time_start #convert duration, from float to integer duration = int(duration) ## to avoid counting person more than one ## person detected should be there for atleast 2sec if duration>=2: total_count = total_count else: # substract previous count from total_count # and count it as omitted frame total_count=total_count-previous_count omitted_results=omitted_results+1 # Publish messages to the MQTT server, topic:person, key:total client.publish(topic="person", payload=json.dumps({"total" : total_count})) total_count4text=total_count if duration>=2: # Publish messages to the MQTT server, topic:duration, key:duration [when person left] client.publish(topic="person/duration", payload=json.dumps({"duration" : duration})) # Publish message to the MQTT server, topic: person, key:count client.publish(topic="person", payload=json.dumps({"count" : current_count})) previous_count = current_count # Break if escape key pressed if key_pressed == 27: break # save current frame if s key pressed if key_pressed == ord('s'): cv2.imwrite('output_frame.png',frame) #cv2.imshow("People Counter By Ibrahim",frame) ### TODO: Send the frame to the FFMPEG server ### sys.stdout.buffer.write(frame) sys.stdout.flush() ### TODO: Write an output image if `single_image_mode` ### if is_one_image==True: ### write the number of people in the image people_in_message = "people in the image : "+str(current_count) cv2.putText(frame, people_in_message, (10, 15),cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 100), 1, cv2.LINE_AA, False) cv2.imwrite("output_image.jpg",frame) # Release the capture and destroy any OpenCV windows capture.release() cv2.destroyAllWindows() ### TODO: Disconnect from MQTT client.disconnect()
def main(): args = build_argparser().parse_args() account_name = args.account_name account_key = args.account_key if account_name is "" or account_key is "": print("Invalid account name or account key!") sys.exit(1) elif account_name is not None and account_key is None: print("Please provide account key using -ak option!") sys.exit(1) elif account_name is None and account_key is not None: print("Please provide account name using -an option!") sys.exit(1) elif account_name is None and account_key is None: upload_azure = 0 else: print("Uploading the results to Azure storage \""+ account_name+ "\"" ) upload_azure = 1 create_cloud_container(account_name, account_key) #if args.input == 'cam': #input_stream = 0 #else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) if cap is None or not cap.isOpened(): print('Warning: unable to open video source: ', args.input) sys.exit(1) # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, 0, args.cpu_extension) print("To stop the execution press Esc button") initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) store_aisle = cv2.VideoWriter(os.path.join(args.output_dir, "store_aisle.mp4"),cv2.VideoWriter_fourcc(*'avc1'), fps, (initial_w, initial_h), True) job_id = os.environ['PBS_JOBID'] progress_file_path = os.path.join(args.output_dir,'i_progress_'+str(job_id)+'.txt') infer_time_start = time.time() frame_count = 1 ret, frame = cap.read() while cap.isOpened(): ret, next_frame = cap.read() if not ret: break frame_count = frame_count + 1 in_frame = cv2.resize(next_frame, (w, h)) # Change data layout from HWC to CHW in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() infer_network.exec_net(0, in_frame) # Wait for the result infer_network.wait(0) det_time = time.time() - inf_start people_count = 0 # Results of the output layer of the network res = infer_network.get_output(0) for obj in res[0][0]: # Draw only objects when probability more than specified threshold if obj[2] > args.prob_threshold: xmin = int(obj[3] * initial_w) ymin = int(obj[4] * initial_h) xmax = int(obj[5] * initial_w) ymax = int(obj[6] * initial_h) class_id = int(obj[1]) # Draw bounding box color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255)) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2) people_count = people_count + 1 people_count_message = "People Count : " + str(people_count) inf_time_message = "Inference time: {:.3f} ms".format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 25), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) cv2.putText(frame, people_count_message, (15, 65), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) store_aisle.write(frame) time_interval = MULTIPLICATION_FACTOR * fps if frame_count % time_interval == 0: apply_time_stamp_and_save(frame, people_count, upload_azure) if frame_count%10 == 0: progressUpdate(progress_file_path, int(time.time()-infer_time_start), frame_count, video_len) frame = next_frame if args.output_dir: total_time = time.time() - infer_time_start with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f: f.write(str(round(total_time, 1))+'\n') f.write(str(frame_count)+'\n') cap.release() infer_network.clean()
def main(): """ Load the network and parse the SSD output. :return: None """ # Connect to the MQTT server client = mqtt.Client() client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL) args = build_argparser().parse_args() # Flag for the input image single_image_mode = False cur_request_id = 0 last_count = 0 total_count = 0 start_time = 0 # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, cur_request_id, args.cpu_extension)[1] # Checks for live feed if args.input == 'CAM': input_stream = 0 # Checks for input image elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_image_mode = True input_stream = args.input # Checks for video file else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) # Getting video fps' fps = cap.get(cv2.CAP_PROP_FPS) frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Create an array of zeros the size of the frame count detection_frames = np.zeros(frame_count) if input_stream: cap.open(args.input) if not cap.isOpened(): log.error("ERROR! Unable to open video source") global initial_w, initial_h, prob_threshold prob_threshold = args.prob_threshold initial_w = cap.get(3) initial_h = cap.get(4) #while cap.isOpened(): for i in range(frame_count): flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) # Start async inference image = cv2.resize(frame, (w, h)) # Change data layout from HWC to CHW image = image.transpose((2, 0, 1)) image = image.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() infer_network.exec_net(cur_request_id, image) # Wait for the result if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start # Results of the output layer of the network result = infer_network.get_output(cur_request_id) if args.perf_counts: perf_count = infer_network.performance_counter(cur_request_id) #performance_counts(perf_count) frame, current_count, detected_class = ssd_out(frame, result) # add detected class to detection array detection_frames[i] = current_count # Print video statistics # Printing fps fps_text = "Video FPS: " + str(fps) cv2.putText(frame, fps_text, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) # Printing frame count frame_count_text = "Frame " + str(i) + "/" + str(frame_count) cv2.putText(frame, frame_count_text, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) # Print inference time inf_time_message = "Inference time: {:.3f}ms"\ .format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) # Printing detection results on statistics detection_text = "Last 2 Seconds of Detections: " cv2.putText(frame, detection_text, (15, 400), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) arr_txt = str(detection_frames[i - WAIT_CONSTANT:i]) cv2.putText(frame, arr_txt, (15, 415), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) # Wait 2 seconds before publishing anything if i > WAIT_CONSTANT: if 1 in detection_frames[i - WAIT_CONSTANT:i]: current_count = 1 # Verify is any detections ocurred in the last second # When new person enters the video # When new person enters the video if current_count > last_count: start_time = time.time() total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) # Person duration in the video is calculated if current_count < last_count: duration = int(time.time() - start_time) # Publish messages to the MQTT server client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": current_count})) last_count = current_count if key_pressed == 27: break # Send frame to the ffmpeg server sys.stdout.buffer.write(frame) sys.stdout.flush() if single_image_mode: cv2.imwrite('output_image.jpg', frame) cap.release() cv2.destroyAllWindows() client.disconnect() infer_network.clean() # Saving detection frames for debugging filepath = "../detections.txt" with open(filepath, 'w') as file_handler: for item in detection_frames: file_handler.write("{}\n".format(item))
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold if DEBUG: print("probability threshold: ", prob_threshold) print("device: ", args.device) print("model_xml: ", args.model) if args.cpu_extension == 'auto': cpu_extension = get_cpu_extension() else: cpu_extension = args.cpu_extension ### TODO: Load the model through `infer_network` ### infer_network.load_model(device=args.device, model_xml=args.model, cpu_extension=cpu_extension) ### TODO: Handle the input stream ### isImage = None #placeholder to check if we have an image of video input if (args.input).lower() == 'cam': isImage = False args.input = 0 elif (args.input).endswith('.jpg') or (args.input).endswith('.bmp'): isImage = True #input is image else: isImage = False #we have a video stream as input if DEBUG: print("args.input: ", args.input) inp = cv2.VideoCapture(args.input) inp.open(args.input) #get the shape of the input width = int(inp.get(3)) height = int(inp.get(4)) if DEBUG: print("input image widht: ", width) print("input image height: ", height) #get the input shape of the networkd net_input_shape = infer_network.get_input_shape() if DEBUG: print("input_shape: ", net_input_shape) print("input_shape width: ", net_input_shape[2]) print("input_shape height: ", net_input_shape[3]) if isImage: vid_capt = None else: fourcc = cv2.VideoWriter_fourcc(*'mp4v') model = (args.model.split('/')[-1])[:-4] #model name vid_capt = cv2.VideoWriter( 'output_video_' + model + '_' + args.device + '_' + str(prob_threshold) + '.mp4', fourcc, 25, (width, height)) request_id = 0 counter = 0 persons = [] total_persons = 0 buffer_size = 0 inference_time = 0 ### TODO: Loop until stream is over ### while inp.isOpened(): ### TODO: Read from the video capture ### flag, frame = inp.read() if not flag: break #video ended #to cancel easily key_pressed = cv2.waitKey(60) if key_pressed == 27: break counter += 1 ### TODO: Pre-process the image as needed ### if DEBUG: print("------------------------------", (net_input_shape)) start_inference = time.time() prep_frame = preprocess_frame(frame, net_input_shape[2], net_input_shape[3]) #if we want to run async, the request_is equals counter if args.request_type == 'async': request_id = counter if DEBUG: print('-------------------request id:', request_id) ### TODO: Start asynchronous inference for specified request ### infer_network.exec_net(image=prep_frame, request_id=request_id) ### TODO: Wait for the result ### if infer_network.wait(request_id=request_id) == 0: output = infer_network.get_output(request_id=request_id) if DEBUG: print('output is:', output) inference_time += (time.time() - start_inference) ### TODO: Get the results of the inference request ### out_frame = get_results(frame, output, counter, prob_threshold, width, height, persons) ### TODO: Extract any desired stats from the results ### vid_capt.write(out_frame) ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### total_persons = 0 if len(persons) > 0: total_persons = (persons[-1]).getPersonId() + 1 if DEBUG: print("total_persons:", total_persons) counted_persons = 0 p_time = 0 for p in persons: if p.isTracked(): counted_persons += 1 p_time = (counter - p.getFrameIn()) / FRAMERATE if DEBUG: print("for person: ", p.toString(), " the time spent is: ", p_time) client.publish( "person", json.dumps({ "count": counted_persons, "total": total_persons })) client.publish("person/duration", json.dumps({"duration": p_time})) if DEBUG: print('======== MQTT ===========') print("count: ", counted_persons) print("total_count: ", total_persons) ### TODO: Send the frame to the FFMPEG server ### sys.stdout.buffer.write(out_frame) buffer_size += sys.getsizeof(out_frame) sys.stdout.flush() ### TODO: Write an output image if `single_image_mode` ### if isImage: cv2.imwrite( 'output_image' + model + '_' + args.device + '_' + str(prob_threshold) + '.jpg', out_frame) #cleanup the mess if not isImage: vid_capt.release() inp.release() cv2.destroyAllWindows() client.disconnect() #update stats array return buffer_size, inference_time
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ total_count = 0 last_count = 0 start_time = 0 request_id = 0 # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold ### TODO: Load the model through `infer_network` ### infer_network.load_model(args.model, args.device) net_input_shape = infer_network.get_input_shape() ### TODO: Handle the input stream ### if args.input == "CAM": input_stream = 0 else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" try: cap = cv2.VideoCapture(args.input) except FileNotFoundError: print("Cannot locate video file: " + args.input) except Exception as e: print("Something went wrong with the video file: ", e) if input_stream: cap.open(args.input) if not cap.isOpened(): log.error("Can't open video source") width = int(cap.get(3)) height = int(cap.get(4)) ### TODO: Loop until stream is over ### ### TODO: Read from the video capture ### ### TODO: Pre-process the image as needed ### ### TODO: Start asynchronous inference for specified request ### ### TODO: Wait for the result ### ### TODO: Get the results of the inference request ### ### TODO: Extract any desired stats from the results ### ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### ### TODO: Send the frame to the FFMPEG server ### ### TODO: Write an output image if `single_image_mode` ### while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) pro_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) pro_frame = pro_frame.transpose((2, 0, 1)) pro_frame = pro_frame.reshape(1, *pro_frame.shape) inf_start = time.time() infer_network.exec_net(pro_frame) if infer_network.wait() == 0: det_time = time.time() - inf_start result = infer_network.get_output() current_count = 0 for box in result[0][0]: # Draw bounding box for object when it's probability is more than # the specified threshold conf = box[2] if box[2] > prob_threshold: xmin = int(box[3] * width) ymin = int(box[4] * height) xmax = int(box[5] * width) ymax = int(box[6] * height) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (225, 225, 225), 1) current_count = current_count + 1 inf_time_message = "Inference time: {:.3f}ms" \ .format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) if current_count > last_count: start_time = time.time() total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) if current_count < last_count: duration = int(time.time() - start_time) client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": current_count})) last_count = current_count if key_pressed == 27: break sys.stdout.buffer.write(frame) sys.stdout.flush() cap.release() cv2.destroyAllWindows() client.disconnect() infer_network.clear()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Flag for the input image single_image_mode = False cur_request_id = 0 last_count = 0 total_count = 0 start_time = 0 # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold # Initialize the Inference Engine infer_network = Network() # Load the network model into the IE n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, cur_request_id, args.cpu_extension)[1] # Checks for live feed if args.input == 'CAM': input_stream = 0 # Checks for input image elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_image_mode = True input_stream = args.input # Checks for video file else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) if input_stream: cap.open(args.input) if not cap.isOpened(): log.error("ERROR! Unable to open video source") global initial_w, initial_h, prob_threshold prob_threshold = args.prob_threshold initial_w = cap.get(3) initial_h = cap.get(4) while cap.isOpened(): #Reading the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) # Pre-process the frame image = cv2.resize(frame, (w, h)) # Change data layout from HWC to CHW image = image.transpose((2, 0, 1)) image = image.reshape((n, c, h, w)) inf_start = time.time() # Perform inference on the frame infer_network.exec_net(cur_request_id, image) if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start result = infer_network.get_output(cur_request_id) #if args.perf_counts: perf_count = infer_network.performance_counter(cur_request_id) #performance_counts(perf_count) frame, current_count = ssd_out(frame, result) inf_time_message = "Inference time: {:.3f}ms"\ .format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) # When new person enters the video if current_count > last_count: start_time = time.time() total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) # Person duration in the video is calculated if current_count < last_count: duration = int(time.time() - start_time) # Publish messages to the MQTT server client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": current_count})) last_count = current_count ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### sys.stdout.buffer.write(frame) sys.stdout.flush() if single_image_mode: cv2.imwrite('output_image.jpg', frame)
def main(): """ Load the network and parse the output. :return: None """ global CLIENT global KEEP_RUNNING global DELAY global SIG_CAUGHT global prob_threshold global rate global TARGET_DEVICE global is_async_mode CLIENT = mqtt.Client() CLIENT.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL) CLIENT.subscribe(TOPIC) try: pointx = int(os.environ['POINTX']) pointy = int(os.environ['POINTY']) width = int(os.environ['WIDTH']) height = int(os.environ['HEIGHT']) except KeyError: pointx = 0 pointy = 0 width = 0 height = 0 try: # Number of seconds between data updates to MQTT server rate = float(os.environ['RATE']) except KeyError: rate = 1 try: # Probability threshold for detections filtering prob_threshold = float(os.environ['PROB_THRESHOLD']) except KeyError: prob_threshold = 0.7 if 'DEVICE' in os.environ.keys(): TARGET_DEVICE = os.environ['DEVICE'] if 'MULTI' not in TARGET_DEVICE and TARGET_DEVICE not in accepted_devices: print("Unsupported device: " + TARGET_DEVICE) sys.exit(2) elif 'MULTI' in TARGET_DEVICE: target_devices = TARGET_DEVICE.split(':')[1].split(',') for multi_device in target_devices: if multi_device not in accepted_devices: print("Unsupported device: " + TARGET_DEVICE) sys.exit(2) cpu_extension = os.environ[ 'CPU_EXTENSION'] if 'CPU_EXTENSION' in os.environ.keys() else None model = os.environ["MODEL"] if 'FLAG' in os.environ.keys(): async_mode = os.environ['FLAG'] if async_mode == "sync": is_async_mode = False else: is_async_mode = True log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) logger = log.getLogger() render_time = 0 roi_x = pointx roi_y = pointy roi_w = width roi_h = height assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format( CONFIG_FILE) config = json.loads(open(CONFIG_FILE).read()) for idx, item in enumerate(config['inputs']): if item['video'].isdigit(): input_stream = int(item['video']) else: input_stream = item['video'] cap = cv2.VideoCapture(input_stream) if not cap.isOpened(): logger.error("ERROR! Unable to open video source") sys.exit(1) # Init inference request IDs cur_request_id = 0 next_request_id = 1 # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(model, TARGET_DEVICE, 1, 1, 2, cpu_extension)[1] message_thread = Thread(target=message_runner, args=()) message_thread.setDaemon(True) message_thread.start() if is_async_mode: print("Application running in async mode...") else: print("Application running in sync mode...") ret, frame = cap.read() while ret: ret, next_frame = cap.read() if not ret: KEEP_RUNNING = False break initial_wh = [cap.get(3), cap.get(4)] if next_frame is None: KEEP_RUNNING = False log.error("ERROR! blank FRAME grabbed") break # If either default values or negative numbers are given, # then we will default to start of the FRAME if roi_x <= 0 or roi_y <= 0: roi_x = 0 roi_y = 0 if roi_w <= 0: roi_w = next_frame.shape[1] if roi_h <= 0: roi_h = next_frame.shape[0] key_pressed = cv2.waitKey(1) # 'c' key pressed if key_pressed == 99: # Give operator chance to change the area # Select rectangle from left upper corner, dont display crosshair ROI = cv2.selectROI("Assembly Selection", frame, True, False) print("Assembly Area Selection: -x = {}, -y = {}, -w = {}," " -h = {}".format(ROI[0], ROI[1], ROI[2], ROI[3])) roi_x = ROI[0] roi_y = ROI[1] roi_w = ROI[2] roi_h = ROI[3] cv2.destroyAllWindows() cv2.rectangle(frame, (roi_x, roi_y), (roi_x + roi_w, roi_y + roi_h), (0, 0, 255), 2) selected_region = [roi_x, roi_y, roi_w, roi_h] in_frame_fd = cv2.resize(next_frame, (w, h)) # Change data layout from HWC to CHW in_frame_fd = in_frame_fd.transpose((2, 0, 1)) in_frame_fd = in_frame_fd.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() if is_async_mode: # Async enabled and only one video capture infer_network.exec_net(next_request_id, in_frame_fd) else: # Async disabled infer_network.exec_net(cur_request_id, in_frame_fd) # Wait for the result infer_network.wait(cur_request_id) det_time = time.time() - inf_start # Results of the output layer of the network res = infer_network.get_output(cur_request_id) # Parse SSD output ssd_out(res, initial_wh, selected_region) # Draw performance stats inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \ "Inference time: {:.3f} ms".format(det_time * 1000) render_time_message = "OpenCV rendering time: {:.3f} ms". \ format(render_time * 1000) if not INFO.safe: warning = "HUMAN IN ASSEMBLY AREA: PAUSE THE MACHINE!" cv2.putText(frame, warning, (15, 100), cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 0, 255), 2) log_message = "Async mode is on." if is_async_mode else \ "Async mode is off." cv2.putText(frame, log_message, (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, inf_time_message, (15, 35), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, render_time_message, (15, 55), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, "Worker Safe: {}".format(INFO.safe), (15, 75), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) render_start = time.time() cv2.imshow("Restricted Zone Notifier", frame) render_end = time.time() render_time = render_end - render_start frame = next_frame if key_pressed == 27: print("Attempting to stop background threads") KEEP_RUNNING = False break # Tab key pressed if key_pressed == 9: is_async_mode = not is_async_mode print("Switched to {} mode".format( "async" if is_async_mode else "sync")) if is_async_mode: # Swap infer request IDs cur_request_id, next_request_id = next_request_id, cur_request_id infer_network.clean() message_thread.join() cap.release() cv2.destroyAllWindows() CLIENT.disconnect()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ frame_count = 0 #A flag to check if image or not single_image_mode = False # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold # Load the model through `infer_network` ### infer_network.load_model(model = args.model , device = args.device, cpu_extension = args.cpu_extension) #Handle the input stream ### if args.input == 'CAM': stream = 0 elif args.input.endswith('.jpg') or args.input.endswith('.png'): single_input_image = True stream = args.input else: stream = args.input capture = cv2.VideoCapture(stream) width = int(capture.get(3)) height = int(capture.get(4)) model_input_shape = infer_network.get_input_shape() #Initializing necessary variables for calculations total_people = 0 people_in_last_frame = 0 start_time = time.time() duration = None people_in_frame = 0 thres = 0.45 current_frame_request_id = 0 next_frame_request_id = 1 prev_flag = False if not capture.isOpened(): exit() _, current_frame = capture.read() processed_frame = cv2.resize(current_frame, (model_input_shape[3], model_input_shape[2])) processed_frame = processed_frame.transpose((2,0,1)) processed_frame = processed_frame.reshape(1,*processed_frame.shape) executable_net = infer_network.exec_net(image = processed_frame, request_id = current_frame_request_id) #Loop until stream is over ### while capture.isOpened(): #Read from the video capture ### flag, next_frame = capture.read() if not flag: break #Needed with cv2.imshow() method key_pressed = cv2.waitKey(60) #Pre-process the image as needed ### processed_frame = cv2.resize(next_frame, (model_input_shape[3], model_input_shape[2])) processed_frame = processed_frame.transpose((2,0,1)) processed_frame = processed_frame.reshape(1,*processed_frame.shape) #Start asynchronous inference for specified request ### executable_net = infer_network.exec_net(image = processed_frame, request_id = next_frame_request_id) # Wait for the result ### if infer_network.wait(request_id = current_frame_request_id) == 0: #Get the results of the inference request ### outputs = infer_network.get_output(request_id = current_frame_request_id) #Extract any desired stats from the results ### frame, people_in_frame, cur_flag = process_outputs(current_frame, outputs, width, height, thres) #cv2.imwrite('output',out_frame) current_time = time.time() cv2.putText(frame, 'Inference Time: ' + "%.2f" % (current_time - start_time), (30,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2) cv2.putText(frame, str(frame_count), (30,210), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2) #cv2.imshow('Output', out_frame) #Calculate and send relevant information on ### if people_in_frame > people_in_last_frame and not prev_flag: total_people += (people_in_frame - people_in_last_frame) new_people_time = time.time() client.publish("person", json.dumps({"total": total_people})) if people_in_frame < people_in_last_frame: duration = time.time() - new_people_time client.publish("person/duration", json.dumps({"duration": duration})) ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### client.publish("person", json.dumps({"count":people_in_frame})) ### Topic "person/duration": key of "duration" ### people_in_last_frame = people_in_frame prev_flag = cur_flag current_frame = next_frame current_frame_request_id, next_frame_request_id = next_frame_request_id, current_frame_request_id if key_pressed == 27: break frame_count += 1 #Send the frame to the FFMPEG server ### sys.stdout.buffer.write(frame) sys.stdout.flush() #Write an output image if `single_image_mode` ### if single_image_mode: cv2.imwrite('out.jpg', frame)
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class #log.info("Creating Inference Engine...") infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold ### TODO: Load the model through `infer_network` ### #log.info("Loading network files:\n\t{}".format(args.model)) #log.info("Loading model to the plugin") infer_network.load_model(args.model, args.device, args.cpu_extension) #log.info("Preparing inputs") infer_network.network.batch_size = 1 net_input_shape = infer_network.get_input_shape() ### TODO: Handle the input stream ### #check for input stream is a cam? input_stream = 0 if args.input == "cam" else args.input try: cap = cv2.VideoCapture(input_stream) except FileNotFoundError: print("File {} not available".format(input_stream)) except Exception as e: print("error on loading file:{}".format(e)) exit(1) number_input_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) number_input_frames = 1 if number_input_frames != -1 and number_input_frames < 0 else number_input_frames current_count = 0 total_count = 0 duration = 0 count_start_time = 0 person_num_trigger = 3 # maximum total number before alarm min_duration = 5 # minimum time (s) before alarm k_ref = 5 # number of frames before account for non detection k = 0 det_time = [] #inference times array cap.open(input_stream) ### TODO: Loop until stream is over ### while cap.isOpened(): ### TODO: Read from the video capture ### flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) ### TODO: Pre-process the image as needed ### p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) ### TODO: Start asynchronous inference for specified request ### #log.info("Starting inference...") start_time = time() infer_network.exec_net(p_frame, 0) det_time.append(time() - start_time) ### TODO: Wait for the result ### # Collecting object detection results objects = list() if infer_network.wait(0) == 0: det_time.append(time() - start_time) ### TODO: Get the results of the inference request ### result = infer_network.get_output(0) #print(key_pressed) ### TODO: Extract any desired stats from the results ###pip start_time = time() objects = process_result(frame, p_frame, result, infer_network, prob_threshold, log, prob_threshold) parsing_time = time() - start_time # Draw performance stats over frame inf_time_message = "Inference time: {:.3f} ms **** Inference mean time :{:.3f} ms ".format( det_time[-1] * 1e3, sum(det_time) * 1e3 / len(det_time)) parsing_message = "YOLO parsing time is {:.3f} ms".format( parsing_time * 1e3) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) cv2.putText(frame, parsing_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" #### num_detection = len(objects) delta = num_detection - current_count if delta > 0 and k == 0: count_start_time = time() current_count = num_detection k = 0 elif delta < 0 and k >= k_ref: duration = time() - count_start_time total_count += current_count current_count = num_detection MQTT_MSG_DURATION = json.dumps({"duration": duration}) client.publish("person/duration", MQTT_MSG_DURATION) k = 0 #cv2.putText(frame, "past person duration: {}".format(time()- count_start_time), (15, 115), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) elif delta < 0 and k < k_ref: k += 1 if delta == 0 and k > 0: k = 0 MQTT_MSG_PERSON = json.dumps({"count": current_count}) client.publish("person", MQTT_MSG_PERSON) if total_count > person_num_trigger: cv2.putText( frame, "number: {} of total people more than trigger limit:{}". format(total_count, person_num_trigger), (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) if (time() - count_start_time) > min_duration and current_count >= 1: cv2.putText( frame, "Duration: {:2f} s of this person more than trigger limit: {} s" .format(time() - count_start_time, min_duration), (15, 60), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) for obj in objects: color = (int(min(obj['class_id'] * 12.5, 255)), min(obj['class_id'] * 7, 255), min(obj['class_id'] * 5, 255)) det_label = str(obj['class_id']) cv2.rectangle(frame, (obj['xmin'], obj['ymin']), (obj['xmax'], obj['ymax']), color, 2) cv2.putText( frame, "#" + det_label + ' ' + str(round(obj['confidence'] * 100, 1)) + ' %', (obj['xmin'], obj['ymin'] - 7), cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1) ### TODO: Send the frame to the FFMPEG server ### sys.stdout.buffer.write(frame) sys.stdout.flush() ### TODO: Write an output image if `single_image_mode` ### if number_input_frames == 1: cv2.imwrite("out.png", frame) # Break if escape key pressed if key_pressed == 27: break # Release the capture and destroy any OpenCV windows cap.release() cv2.destroyAllWindows()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ infer_network = Network() one_image = False infer_network.load_model(args.model, args.device, args.cpu_extension) network_shape = infer_network.get_input_shape() if args.input == 'CAM': validator = 0 elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): one_image = True validator = args.input else: validator = args.input assert os.path.isfile(args.input), "file doesn't exist" cap = cv2.VideoCapture(validator) if validator: cap.open(args.input) if (cap.isOpened() == False): exit(1) total_counter = 0 pres_counter = 0 prev_counter = 0 beginning_time = 0 num_bounding_box = 0 timing = 0 prev_bounding_box = 0 req_id = 0 while cap.isOpened(): flag, frame = cap.read() probability_threshold = args.prob_threshold if not flag: break key_pressed = cv2.waitKey(60) processed_image = cv2.resize(frame, (network_shape[3], network_shape[2])) processed_image = processed_image.transpose((2, 0, 1)) processed_image = processed_image.reshape(1, *processed_image.shape) infer_network.exec_net(processed_image) if infer_network.wait(req_id) == 0: network_output = infer_network.get_output() frame, pres_counter, bounding_box = extract_box( frame.copy(), network_output, probability_threshold) box_width = frame.shape[1] tl, br = bounding_box if pres_counter > prev_counter: beginning_time = time.time() total_counter += pres_counter - prev_counter num_bounding_box = 0 client.publish("person", json.dumps({"total": total_counter})) elif pres_counter < prev_counter: if num_bounding_box <= 20: pres_counter = prev_counter num_bounding_box += 1 elif prev_bounding_box < box_width - 200: pres_counter = prev_counter num_bounding_box = 0 else: timing = int(time.time() - beginning_time) client.publish("person/duration", json.dumps({"duration": timing})) if not (tl == None and br == None): prev_bounding_box = int((tl[0] + br[0]) / 2) prev_counter = pres_counter client.publish("person", json.dumps({"count": pres_counter})) frame = frame.copy(order='C') sys.stdout.buffer.write(frame) sys.stdout.flush() if one_image: cv2.imwrite('output_image.jpg', frame) cap.release() client.disconnect()
def main(): args = build_argparser().parse_args() cap = cv2.VideoCapture(args.input) # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, 0, args.cpu_extension)[1] print("To stop the execution press Esc button") initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) frame_count = 1 accumulated_image = np.zeros((initial_h, initial_w), np.uint8) mog = cv2.createBackgroundSubtractorMOG2() ret, frame = cap.read() while cap.isOpened(): ret, next_frame = cap.read() if not ret: break frame_count = frame_count + 1 in_frame = cv2.resize(next_frame, (w, h)) # Change data layout from HWC to CHW in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() infer_network.exec_net(0, in_frame) # Wait for the result infer_network.wait(0) det_time = time.time() - inf_start people_count = 0 # Converting to Grayscale gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Remove the background fgbgmask = mog.apply(gray) # Thresholding the image thresh = 2 max_value = 2 threshold_image = cv2.threshold(fgbgmask, thresh, max_value, cv2.THRESH_BINARY)[1] # Adding to the accumulated image accumulated_image = cv2.add(threshold_image, accumulated_image) colormap_image = cv2.applyColorMap(accumulated_image, cv2.COLORMAP_HOT) # Results of the output layer of the network res = infer_network.get_output(0) for obj in res[0][0]: # Draw only objects when probability more than specified threshold if obj[2] > args.prob_threshold: xmin = int(obj[3] * initial_w) ymin = int(obj[4] * initial_h) xmax = int(obj[5] * initial_w) ymax = int(obj[6] * initial_h) class_id = int(obj[1]) # Draw bounding box color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255)) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2) people_count = people_count + 1 people_count_message = "People Count : " + str(people_count) inf_time_message = "Inference time: {:.3f} ms".format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 25), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) cv2.putText(frame, people_count_message, (15, 65), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) final_result_overlay = cv2.addWeighted(frame, P_COUNT_FRAME_WEIGHTAGE, colormap_image, COLORMAP_FRAME_WEIGHTAGE_1, 0) cv2.imshow("Detection Results", final_result_overlay) time_interval = MULTIPLICATION_FACTOR * fps frame = next_frame key = cv2.waitKey(1) if key == 27: break cap.release() cv2.destroyAllWindows() infer_network.clean()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold # ### TODO: Load the model through `infer_network` ### infer_network.load_model(args.model, args.device, args.cpu_extension) net_input_shape = infer_network.get_input_shape() log.info("Selected Network input Layer type is " + str(type(net_input_shape)) + " And shape is " + str(net_input_shape)) log.info("Required input img size W " + str(net_input_shape[3]) + " H " + str(net_input_shape[2])) # ### TODO: Handle the input stream ### # cap = cv2.VideoCapture(args.input) cap, error_flag, image_flag = check_input_type(args.input, args.cam_id) #call function if error_flag: # Check for invalid file extension log.error("Program stopped") return elif image_flag: #check for image INPUT_IMAGE = args.input img = cv2.imread(INPUT_IMAGE) if (type(img) is not np.ndarray ): #check for if image read empty same as img.empty() log.error("Error: Invalid image or path") log.error("Use -h argument for help") return else: cap.open(args.input) # Get input feed height and width img_width = int(cap.get(3)) img_height = int(cap.get(4)) if img_width < 1 or img_width is None: # If input path is wrong log.error("Error! Can't read Input: Check path") return log.info("feed frame size W " + str(img_width) + " H " + str(img_height)) # Initialize video writer if video mode if args.write_video is "Y": # only if args given Y if not image_flag: # Video writer Linux log.info("---Opencv video writer debug LIN---") fourcc = cv2.VideoWriter_fourcc(*'MP4V') out = cv2.VideoWriter('out.mp4', 0x00000021, 30, (img_width, img_height)) log.info("-------------------------------") # Initialized varible utilized inside loop frame_count = 0 total_people_count = 0 last_state = 0 delay_on = 0 delay_off = (time.time() * 1000 ) # Initialize timer before loop to get actual time delay_diff_on = 0 delay_diff_off = 0 duration = 0 duration_timebase = 0 duration_fpsbase = 0 count_people_image = 0 # Second counting timer initialized sec_on = (time.time() * 1000) # Timer for update stat on terminal START sec_diff = 0 cv_drawstate_time_s = 0 cv_drawstate_time_e = 0 count_flag = False frame_count_onstate = 0 frame_count_offstate = 0 # Accuracy Log log_acount = 0 log_frame_no = [] log_person_counted = [] log_duration_fpsbase = [] log_duration_timebase = [] log_infer_time = [] # error_log = {'MuliBoxDetected':{}} log_ecount = 0 #counter for error log in case of multiple box count log_multicounted = [] # ### TODO: Loop until stream is over ### while cap.isOpened(): frame_count += 1 # Global frame Count no of frame processed. # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(1) ### TODO: Read from the video capture ### ### TODO: Pre-process the image as needed ### p_frame = preprocess_frame( frame, net_input_shape[3], net_input_shape[2]) #from extracted input function ### TODO: Start asynchronous inference for specified request ### infer_network.exec_net(p_frame) ### TODO: Wait for the result ### inferreq_start_time = (time.time() * 1000) # Timer for inference START if infer_network.wait() == 0: inferreq_end_time = ( time.time() * 1000) - inferreq_start_time # Timer for inference END log_infer_time.append(float("{:.2f}".format(inferreq_end_time))) ### TODO: Get the results of the inference request ### blob, result = infer_network.get_output() # If model outputs multiple blob, print available blob infirmation if frame_count == 1: # Print only Once for name, output_ in blob.items( ): #Find the possible BLOBS for name, log.info("The name of available blob is :" + str(name)) ### TODO: Extract any desired stats from the results ### color = selectBoxcolor(args.box_color) cv_drawboxtime_s = (time.time() * 1000 ) # Timer for drawing box on frame START frame, count_box, countmultipeople = draw_boxes( frame, result, img_width, img_height, color, args.prob_threshold) cv_drawboxtime_e = ( time.time() * 1000) - cv_drawboxtime_s #Timer for drawing box on frame END count_people_image = countmultipeople # Variable For image stat only ### TODO: Calculate and send relevant information on ### if count_box != last_state: #Anythinkg under this will executed only once if state changes. log_acount += 1 # increase stat change counter if count_box == 1: count_flag = True # Flag for verify if counting delay_on = (time.time() * 1000) # Timer for on delay START delay_diff_off = (time.time() * 1000 ) - delay_off # Timer for off delay END delay_diff_on = 0 # Timer for on delay RESET frame_count_onstate = frame_count # Frame count is Global FPS counter frame_count_offstate = frame_count - frame_count_offstate # Calculates the difference else: count_flag = False delay_diff_on = (time.time() * 1000) - delay_on # Timer for on delay END delay_off = (time.time() * 1000 ) # Timer for off delay START delay_diff_off = 0 # Timer for off delay RESET frame_count_onstate = frame_count - frame_count_onstate # Calculates the difference frame_count_offstate = frame_count if delay_diff_on > args.delay_band: total_people_count += 1 # Debug is placed above because count is not added yet. duration_timebase = delay_diff_on / 1000 # Convert to Sec. duration_fpsbase = frame_count_onstate / args.fps # Local use duration = duration_fpsbase # global set # Accuracy log, individual list log, termianl friendly log_person_counted.append(total_people_count) log_duration_timebase.append( "{:.2f}".format(duration_timebase)) log_duration_fpsbase.append(duration_fpsbase) log_frame_no.append(frame_count) # Log frame no of video ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### client.publish("person", json.dumps({"total": total_people_count})) ### Topic "person/duration": key of "duration" ### client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": countmultipeople})) last_state = count_box else: if countmultipeople not in ( 0, 1): #In case of multiple people detected log_ecount += 1 # Increase error counter # Nested list Frame and multipeople people count log_multicounted.append([ 'F: ' + str(frame_count) + ' C: ' + str(countmultipeople) ]) ### This part needed to be optimized if args.toggle_video is "ON": # If video feed is off stop unnecessory processing cv_drawstate_time_s = (time.time() * 1000 ) # TImer for draw stat on frame START # Draw inference time on image label = "Inference time: " + str( "{:.2f}".format(inferreq_end_time)) + "ms" #string label cv2.putText(frame, label, (15, 20), cv2.FONT_HERSHEY_COMPLEX, 0.4, BOXCOLOR['BLUE'], 1) label1 = "Total people count: " + str( total_people_count) #string label if image_flag or countmultipeople > 1: label1 = "Total people count: " + str( count_people_image) #string label else: label1 = "Total people count: " + str(total_people_count) cv2.putText(frame, label1, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.4, BOXCOLOR['BLUE'], 1) if countmultipeople > 1 or image_flag is True: label2 = "Average Time stayed: N/A" else: label2 = "Average Time stayed: " + str( "{:.2f}".format(duration)) + "Sec." #string label cv2.putText(frame, label2, (15, 40), cv2.FONT_HERSHEY_COMPLEX, 0.4, BOXCOLOR['BLUE'], 1) # People count exceed alarm if countmultipeople > args.alarm_people or duration > args.alarm_duration: draw_framelinered(frame, img_height, img_width) if countmultipeople > args.alarm_people: label3 = "Alarm: people count limit exceeded! limit: " + str( args.alarm_people) #string label cv2.putText(frame, label3, (15, 50), cv2.FONT_HERSHEY_COMPLEX, 0.4, BOXCOLOR['RED'], 1) else: label4 = "Alarm: Person stayed longer! limit: " + str( args.alarm_duration) + "Sec." #string label cv2.putText(frame, label4, (15, 60), cv2.FONT_HERSHEY_COMPLEX, 0.4, BOXCOLOR['RED'], 1) else: draw_framelinegreen(frame, img_height, img_width) # Draw cv process time label5 = "CV Frame process time: " + str( "{:.2f}".format(cv_drawboxtime_e + cv_drawstate_time_e)) + "ms" #string label cv2.putText(frame, label5, (15, 70), cv2.FONT_HERSHEY_COMPLEX, 0.4, BOXCOLOR['BLUE'], 1) cv_drawstate_time_e = ( time.time() * 1000) - cv_drawstate_time_s # TImer for draw stat on frame END else: # Stats of time of cv processing on image frame sec_diff = (time.time() * 1000) - sec_on # Timer for update stat on terminal END # log.info"time in ms: ",sec_diff) # Debug if sec_diff > 1000 or sec_diff > 2000: # update stat roughly every 1 sec. os.system('cls' if os.name == 'nt' else 'clear') # Clear the terminal print() # Blank print print("Video feed is OFF, Terminal will refresh every sec.") print("Press ctlr+c to stop execution.") print("Checkout log_xxx.txt for stats.") # People count on terminal if countmultipeople > 1: print("Total people count: ", countmultipeople) else: print("Current people count: ", total_people_count) print("Total people count: ", total_people_count) print("Average Time stayed: " "{:.2f}".format(duration), " Sec.") # Alarm on terminal if countmultipeople > args.alarm_people or duration > args.alarm_duration: if countmultipeople > args.alarm_people: print("##### Alarm1 #####") print("People count limit exceeded! limit: " + str(args.alarm_people)) print("##################") else: print("##### Alarm2 #####") print("Person stayed longer! limit: " + str(args.alarm_duration) + "Sec.") #string label print("##################") print("-----Stats for time -----") print("Inference Time(ms):", "{:.2f}".format(inferreq_end_time)) print("Draw boundingBox time(ms):", "{:.2f}".format(cv_drawboxtime_e)) print("Draw state time(ms):", "{:.2f}".format(cv_drawstate_time_e)) print("--------------------------") sec_on = (time.time() * 1000 ) # Timer for update stat on terminal RESET sec_diff = 0 # Timer for update stat on terminal RESET # Adjusting timers with inference and cv processing time to fix counting and duration. if count_flag: delay_on = delay_on + inferreq_end_time + cv_drawboxtime_e + cv_drawstate_time_e else: delay_off = delay_off + inferreq_end_time + cv_drawboxtime_e + cv_drawstate_time_e ### TODO: Send the frame to the FFMPEG server ### # Write video or image file if not image_flag: if args.toggle_video is "ON": sys.stdout.buffer.write(frame) # Send to ffmpeg sys.stdout.flush() # Send to ffmpeg # cv2.namedWindow('frame', cv2.WINDOW_NORMAL) # cv2.imshow('frame',frame) if args.write_video is "Y": out.write(frame) else: ### TODO: Write an output image if `single_image_mode` ### cv2.imwrite('output_image.jpg', frame) print("Image saved sucessfully!") if key_pressed == 27: break cap.release() cv2.destroyAllWindows() client.disconnect() # Dump to log txt file log.info("Last frame prcessed no: " + str(frame_count)) log.info("-----AccuracyLog-----") if len(log_person_counted) > 1: # Only if counting single person log.info("No Of person:") log.info(str(log_person_counted)) # log.info("Duration stayed timebase:") # log.info(str(log_duration_timebase)) log.info("Duration stayed fpsbase:") log.info(str(log_duration_fpsbase)) log.info("Frame No.:") log.info(str(log_frame_no)) log_infer_time = np.array(log_infer_time) # Convert list to np array log.info("Inference time:[min max avg.]") log.info( str([ log_infer_time.min(), log_infer_time.max(), (float("{:.2f}".format(np.average(log_infer_time)))) ])) else: log.info("N/A") log_infer_time = np.array(log_infer_time) # Convert list to np array log.info("Inference time:[min max avg.]") log.info(([ log_infer_time.min(), log_infer_time.max(), (float("{:.2f}".format(np.average(log_infer_time)))) ])) log.info("-----Error log-----") if len(log_multicounted) < 10 and len( log_multicounted) > 1: # Only if counting single person log.info("Frame No: Count") log.info(str(log_multicounted)) else: log.info("N/A") log.info("-----Finish!------")
def intruder_detector(): """ Process the input source frame by frame and detects intruder, if any. :return status: 0 on success, negative value on failure """ global CONF_CANDIDATE_CONFIDENCE global LOG_WIN_HEIGHT global LOG_WIN_WIDTH global CONFIG_FILE global video_caps global conf_labels_file_path global is_async_mode parse_args() ret = check_args() if ret != 0: return ret, "" if not os.path.isfile(CONFIG_FILE): return -12, "" if not os.path.isfile(conf_labels_file_path): return -13, "" # Creates subdirectory to save output snapshots pathlib.Path(os.getcwd() + '/output/').mkdir(parents=True, exist_ok=True) # Read the configuration file ret, req_labels = get_input() if ret != 0: return ret, req_labels[0] if not video_caps: return -14, '' # Get the labels that are used in the application ret, label_names, used_labels = get_used_labels(req_labels) if ret != 0: return ret, '' if True not in used_labels: return -15, '' # Init a rolling log to store events rolling_log_size = int((LOG_WIN_HEIGHT - 15) / 20) log_list = collections.deque(maxlen=rolling_log_size) # Open a file for intruder logs log_file = open(LOG_FILE_PATH, 'w') if not log_file: return -16, '' # Initializing VideoWriter for each source for video_cap in video_caps: ret, ret_value = video_cap.init_vw(int(video_cap.input_height), int(video_cap.input_width)) if ret != 0: return ret, ret_value # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(model_xml, TARGET_DEVICE, 1, 1, 2, CPU_EXTENSION)[1] # Arrange windows so that they are not overlapping arrange_windows() min_fps = min([i.vc.get(cv2.CAP_PROP_FPS) for i in video_caps]) signal.signal( signal.SIGINT, signal_handler, ) no_more_data = [False] * len(video_caps) start_time = time.time() inf_time = 0 next_request_id = 1 cur_request_id = 0 # Main loop starts here. Loop over all the video captures if is_async_mode: print("Application running in async mode...") else: print("Application running in sync mode...") while True: for idx, video_cap in enumerate(video_caps): # Get a new frame vfps = int(round(video_cap.vc.get(cv2.CAP_PROP_FPS))) for i in range(0, int(round(vfps / min_fps))): if is_async_mode: ret, video_cap.next_frame = video_cap.vc.read() else: ret, video_cap.frame = video_cap.vc.read() video_cap.loop_frames += 1 # If no new frame or error in reading a frame, exit the loop if not ret: no_more_data[idx] = True break if no_more_data[idx]: stream_end_frame = numpy.zeros((int( video_cap.input_height), int(video_cap.input_width), 1), dtype='uint8') stream_end_message = "Stream from {} has ended.".format( video_cap.cam_name) cv2.putText(stream_end_frame, stream_end_message, (int(video_cap.input_width / 2) - 30, int(video_cap.input_height / 2) - 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) cv2.imshow(video_cap.cam_name, stream_end_frame) continue for i in range(video_cap.no_of_labels): video_cap.current_count[i] = 0 video_cap.changed_count[i] = False # Resize to expected size (in model .xml file) # Input frame is resized to infer resolution if is_async_mode: in_frame = cv2.resize(video_cap.next_frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) # Start asynchronous inference for specified request. infer_network.exec_net(next_request_id, in_frame) video_cap.frame = video_cap.next_frame # Async enabled and only one video capture if len(video_caps) == 1: videoCapResult = video_cap # Async enabled and more than one video capture else: # Get previous index videoCapResult = video_caps[idx - 1 if idx - 1 >= 0 else len(video_caps) - 1] else: in_frame = cv2.resize(video_cap.frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) # Start synchronous inference for specified request. infer_network.exec_net(cur_request_id, in_frame) videoCapResult = video_cap inf_start = time.time() # Wait for the result if infer_network.wait(cur_request_id) == 0: inf_time = time.time() - inf_start # Results of the output layer of the network res = infer_network.get_output(cur_request_id) for obj in res[0][0]: label = int(obj[1]) - 1 # Draw the bounding box around the object when the probability is more than specified threshold if obj[2] > CONF_THRESHOLD_VALUE and used_labels[label]: videoCapResult.current_count[label] += 1 xmin = int(obj[3] * videoCapResult.input_width) ymin = int(obj[4] * videoCapResult.input_height) xmax = int(obj[5] * videoCapResult.input_width) ymax = int(obj[6] * videoCapResult.input_height) # Draw bounding box around the intruder detected cv2.rectangle(videoCapResult.frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 4, 16) for i in range(videoCapResult.no_of_labels): if videoCapResult.candidate_count[ i] == videoCapResult.current_count[i]: videoCapResult.candidate_confidence[i] += 1 else: videoCapResult.candidate_confidence[i] = 0 videoCapResult.candidate_count[ i] = videoCapResult.current_count[i] if videoCapResult.candidate_confidence[ i] == CONF_CANDIDATE_CONFIDENCE: videoCapResult.candidate_confidence[i] = 0 videoCapResult.changed_count[i] = True else: continue if videoCapResult.current_count[ i] > videoCapResult.last_correct_count[i]: videoCapResult.total_count[ i] += videoCapResult.current_count[ i] - videoCapResult.last_correct_count[i] det_objs = videoCapResult.current_count[ i] - videoCapResult.last_correct_count[i] total_count = sum(videoCapResult.total_count) for det_obj in range(det_objs): current_time = time.strftime("%H:%M:%S") log = "{} - Intruder {} detected on {}".format( current_time, label_names[i], videoCapResult.cam_name) log_list.append(log) log_file.write(log + "\n") event = Event(event_time=current_time, intruder=label_names[i], count=total_count, frame=videoCapResult.frame_count) videoCapResult.events.append(event) snapshot_name = "output/intruder_{}.png".format( total_count) cv2.imwrite(snapshot_name, videoCapResult.frame) videoCapResult.last_correct_count[ i] = videoCapResult.current_count[i] # Create intruder log window, add logs to the frame and display it log_window = numpy.zeros((LOG_WIN_HEIGHT, LOG_WIN_WIDTH, 1), dtype='uint8') for i, log in enumerate(log_list): cv2.putText(log_window, log, (10, 20 * i + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.imshow("Intruder Log", log_window) videoCapResult.frame_count += 1 # Video output videoCapResult.vw.write(videoCapResult.frame) log_message = "Async mode is on." if is_async_mode else \ "Async mode is off." cv2.putText(videoCapResult.frame, log_message, (10, int(videoCapResult.input_height) - 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 10, 10), 1) inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \ "Inference time: {:.3f} ms".format(inf_time * 1000) cv2.putText(videoCapResult.frame, inf_time_message, (10, int(videoCapResult.input_height) - 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) fps_time = time.time() - start_time fps_message = "FPS: {:.3f} fps".format(1 / fps_time) cv2.putText(videoCapResult.frame, fps_message, (10, int(videoCapResult.input_height) - 10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) # Display the video output cv2.imshow(videoCapResult.cam_name, videoCapResult.frame) start_time = time.time() # Loop video to mimic continuous input if LOOP_VIDEO flag is True if LOOP_VIDEO and not videoCapResult.is_cam: vfps = int(round(videoCapResult.vc.get(cv2.CAP_PROP_FPS))) # If a video capture has ended restart it if videoCapResult.loop_frames > videoCapResult.vc.get( cv2.CAP_PROP_FRAME_COUNT) - int(round(vfps / min_fps)): videoCapResult.loop_frames = 0 videoCapResult.vc.set(cv2.CAP_PROP_POS_FRAMES, 0) if is_async_mode: # Swap infer request IDs cur_request_id, next_request_id = next_request_id, cur_request_id if cv2.waitKey(1) == 27: break if cv2.waitKey(1) == 9: is_async_mode = not is_async_mode print("Switched to {} mode".format( "async" if is_async_mode else "sync")) if False not in no_more_data: break ret = save_json() if ret != 0: return ret, '' infer_network.clean() log_file.close() return [0, '']
def infer_on_video(args): # Initialize the Inference Engine plugin = Network() # Load the network model into the IE plugin.load_model(args.m, args.d, CPU_EXTENSION) net_input_shape = plugin.get_input_shape() # Get and open video capture cap = cv2.VideoCapture(args.i) cap.open(args.i) # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) # Create a video writer for the output video fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') out = cv2.VideoWriter('out.mp4', fourcc, 15, (width, height)) frame_count = 0 # Process frames until the video ends, or process is exited while cap.isOpened(): # Read the next frame flag, input_frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) # Pre-process the frame input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB) p_frame = cv2.resize(input_frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) # Perform inference on the frame plugin.async_inference(p_frame) # Get the output of inference if plugin.wait() == 0: result = plugin.extract_output() result = result.transpose((1, 2, 0)) # Get semantic mask for person class person_mask = np.uint8(result == 15) person_mask = np.dstack([person_mask, person_mask, person_mask]) person_mask = cv2.resize(person_mask, (width, height)) # Create the overlay mask overlay = np.zeros_like(person_mask) overlay[:] = (127, 0, 0) # Add overlay-mask over input frame overlay_mask = person_mask * overlay assert person_mask.shape == overlay_mask.shape, "Raw person mask and overlay mask should be of same dimensions" output_frame = cv2.addWeighted(input_frame, 1, overlay_mask, 0.9, 0) # Write output frames to video output_frame = cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR) out.write(output_frame) # Pipe output frames to server pipe.stdin.write(output_frame.tostring()) # Show frame counter frame_count = frame_count + 1 print('Frame count: ' + str(frame_count)) # Break on keyboard interrupt if key_pressed == 27: break # Release the capture and destroy any OpenCV windows cap.release() out.release() cv2.destroyAllWindows()
def infer_on_stream(args): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ #client = mqtt.Client() #client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL) # Initialise the class plugin = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold model = args.model ### TODO: Load the model through `infer_network` ### plugin.load_model(model, args.device, args.cpu_extension) net_input_shape = plugin.get_input_shape() if args.input == 'CAM': input_stream = 0 single_image = False elif args.input[-4:] in [".jpg", ".bmp"]: single_image = True input_stream = args.input else: single_image = False input_stream = args.input assert os.path.isfile(input_stream) cap = cv2.VideoCapture(input_stream) cap.open(input_stream) if not cap.isOpened(): log.error("Unable to open video source") # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) ### TODO: Handle the input stream ### out = cv2.VideoWriter('out1.mp4', 0x00000021, 30, (width, height)) ### TODO: Loop until stream is over ### while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) ### TODO: Read from the video capture ### ### TODO: Pre-process the image as needed ### p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) ### TODO: Start asynchronous inference for specified request ### t1 = time.time() plugin.exec_net(p_frame) ### TODO: Wait for the result ### if plugin.wait() == 0: ### TODO: Extract any desired stats from the results ### result = plugin.extract_output() t2 = time.time() ### TODO: Get the results of the inference request ### s1 = t2 - t1 ### TODO: Calculate and send relevant information on ### frame, s = draw_boxes(frame, result, args, width, height) t3 = time.time() - t2 txt = "current_count: %d" % current_count + " total_count: %d" % tcount cv2.putText(frame, txt, (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) txt1 = "duration: %d" % s cv2.putText(frame, txt1, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) txt2 = "Inference time: {:.3f}ms".format( s1 * 1000) + " FPS: {:.3f}".format(1 / s1) cv2.putText(frame, txt2, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### #client.publish("person", json.dumps({"total":tcount}), retain=False) #client.publish('person', json.dumps({'count': current_count, 'total': count}),retain=False) ### Topic "person/duration": key of "duration" ### #client.publish("person/duration", json.dumps({"duration": int(s)}), retain=False) out.write(frame) ### TODO: Send the frame to the FFMPEG server ### #sys.stdout.buffer.write(frame) #sys.stdout.flush() ### TODO: Write an output image if `single_image_mode` ### if single_image: cv2.imwrite("output.jpg", frame) if key_pressed == 27: break cap.release() cv2.destroyAllWindows() ### TODO: Disconnect from MQTT #sclient.disconnect() out.release()
def infer_on_stream(args, m_client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() global initial_w, initial_h, prob_threshold # Set Probability threshold for detections prob_threshold = args.prob_threshold Model = args.model Device = args.device Cpu = args.cpu_extension start_time = 0 cur_request_id = 0 last_count = 0 total_count = 0 ### TODO: Load the model through `infer_network` ### infer_network.load_model(Model, Cpu, Device) network_shape = infer_network.get_input_shape() ### TODO: Handle the input stream ### # Checks for live feed if args.input == 'CAM': input_validated = 0 # Checks for input image elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_image_mode = True input_validated = args.input # Checks for video file else: input_validated = args.input assert os.path.isfile(args.input), "file doesn't exist" ### TODO: Handle the input stream ### cap = cv2.VideoCapture(input_validated) cap.open(input_validated) prob_threshold = args.prob_threshold w = int(cap.get(3)) h = int(cap.get(4)) temp = 0 tk = 0 in_shape = network_shape['image_tensor'] #iniatilize variables duration_prev = 0 counter_total = 0 dur = 0 request_id = 0 report = 0 counter = 0 counter_prev = 0 initial_w = cap.get(3) initial_h = cap.get(4) ### TODO: Loop until stream is over ### while cap.isOpened(): ### TODO: Read from the video capture ### flag, frame = cap.read() if not flag: break ### TODO: Pre-process the image as needed ### image = cv2.resize(frame, (in_shape[3], in_shape[2])) image_p = image.transpose((2, 0, 1)) image_p = image_p.reshape(1, *image_p.shape) ### TODO: Start asynchronous inference for specified request ### net_input = {'image_tensor': image_p, 'image_info': image_p.shape[1:]} duration_report = None inf_start = time.time() infer_network.exec_net(net_input, request_id) color = (255, 0, 0) ### TODO: Wait for the result ### if infer_network.wait() == 0: det_time = time.time() - inf_start ### TODO: Get the results of the inference request ### net_output = infer_network.get_output() # Draw Bounting Box frame, current_count, d, tk = draw_outputs(net_output, frame, initial_w, initial_h, temp, tk) # Printing Inference Time inf_time_message = "Inference time: {:.3f}ms".format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) # Calculate and send relevant information if current_count > last_count: # New entry start_time = time.time() total_count = total_count + current_count - last_count m_client.publish("person", json.dumps({"total": total_count})) if current_count < last_count: # Average Time duration = int(time.time() - start_time) m_client.publish("person/duration", json.dumps({"duration": duration})) # Adding overlays to the frame txt2 = "Distance: %d" % d + " Lost frame: %d" % tk cv2.putText(frame, txt2, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) txt2 = "Current count: %d " % current_count cv2.putText(frame, txt2, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) if current_count > 3: txt2 = "Alert! Maximum count reached" (text_width, text_height) = cv2.getTextSize(txt2, cv2.FONT_HERSHEY_COMPLEX, 0.5, thickness=1)[0] text_offset_x = 10 text_offset_y = frame.shape[0] - 10 # make the coords of the box with a small padding of two pixels box_coords = ((text_offset_x, text_offset_y + 2), (text_offset_x + text_width, text_offset_y - text_height - 2)) cv2.rectangle(frame, box_coords[0], box_coords[1], (0, 0, 0), cv2.FILLED) cv2.putText(frame, txt2, (text_offset_x, text_offset_y), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1) ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### m_client.publish("person", json.dumps({"count": current_count})) # People Count last_count = current_count temp = d ### TODO: Send the frame to the FFMPEG server ### # Resize the frame frame = cv2.resize(frame, (768, 432)) sys.stdout.buffer.write(frame) sys.stdout.flush() cap.release() cv2.destroyAllWindows()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold ### TODO: Load the model through `infer_network` ### infer_network = Network() infer_network_vals = infer_network.load_model( model=args.model, device=args.device, cpu_extension=args.cpu_extension) log.debug(infer_network_vals) input_shape = infer_network.get_input_shape() ### TODO: Handle the input stream ### if args.input == 'CAM': input_stream = 0 single_image = False elif args.input[-4:] in [".jpg", ".bmp"]: single_image = True input_stream = args.input else: single_image = False input_stream = args.input assert os.path.isfile(input_stream) capture = cv2.VideoCapture(input_stream) capture.open(input_stream) if not capture.isOpened(): log.error("Unable to open video source") logger.debug("W+H: " + str(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + "-" + str(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) t0 = 0 infer_time = 0 t1 = 0 process_time = 0 request_id = 0 total_count = 0 previous_count = 0 num_persons_in = 0 current_count = 0 stay_time = 0 max_stay_time = 0 mean_stay_time = 0 track_threshold = 0.1 max_len = 30 # this list is to transform values in an excel file data_list = [] # queue to accumulate last "max_len" number of detections track = deque(maxlen=max_len) ### TODO: Loop until stream is over ### while capture.isOpened(): data_element = {} ### TODO: Read from the video capture ### flag, frame = capture.read() if not flag: break ### TODO: Pre-process the image as needed ### logger.debug("size: ".format(input_shape)) resized_frame = cv2.resize(frame, (input_shape[3], input_shape[2])) transposed_resized_frame = resized_frame.transpose((2, 0, 1)) resh_transposed_resized_frame = transposed_resized_frame.reshape( input_shape) ### TODO: Start asynchronous inference for specified request ### t0 = time.time() infer_network.exec_net(request_id, resh_transposed_resized_frame) ### TODO: Wait for the result ### if infer_network.wait(request_id) == 0: ### TODO: Get the results of the inference request ### result = infer_network.get_output(request_id, frame.shape, prob_threshold) t1 = time.time() infer_time = t1 - t0 ### TODO: Extract any desired stats from the results ### current_count, bb_frame = count_persons(result, frame) process_time = time.time() - t1 ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### # append number of detections to "track" queue track.append(current_count) # proportion of frames with a positive detection num_tracked = 0 if np.sum(track) / max_len > track_threshold: num_tracked = 1 if num_tracked > previous_count: logger.debug("INTO IF ------------------------------------") start_time = time.time() num_persons_in = num_tracked - previous_count total_count += num_persons_in previous_count = num_tracked client.publish("person", json.dumps({"total": total_count}), retain=True) # client.publish("person", json.dumps({"count":num_tracked}), retain=True) ### Topic "person/duration": key of "duration" ### if num_tracked < previous_count: previous_count = num_tracked # client.publish("person", json.dumps({"count":num_tracked}), retain=True) if num_tracked > 0: stay_time += (time.time() - start_time) / 10 logger.debug("Duration: {}".format(stay_time)) if total_count > 0: mean_stay_time = stay_time / total_count client.publish("person/duration", json.dumps({"duration": int(mean_stay_time)})) client.publish("person", json.dumps({"count": num_tracked}), retain=True) data_element['time'] = time.strftime("%H:%M:%S", time.localtime()) data_element['current_count'] = current_count data_element['num_tracked'] = num_tracked data_element['num_persons_in'] = num_persons_in data_element['previous_count'] = previous_count data_element['total_count'] = total_count data_element['stay_time'] = stay_time data_element['mean_stay_time'] = mean_stay_time data_element['infer_time'] = infer_time data_element['process_time'] = process_time data_element['result'] = result data_list.append(data_element) logger.debug( "NUM TRACKED: {} - {} - PREVIOUS COUNT: {} - TOTAL COUNT: {} - STAY TIME: {}" .format(num_tracked, np.sum(track), previous_count, total_count, mean_stay_time)) key_pressed = cv2.waitKey(60) if key_pressed == 27: write_file(data_list) capture.release() cv2.destroyAllWindows() client.disconnect() break ### TODO: Send the frame to the FFMPEG server ### logger.debug("Image_size: {}".format(bb_frame.shape)) sys.stdout.buffer.write(bb_frame) sys.stdout.flush() ### TODO: Write an output image if `single_image_mode` ### if single_image: cv2.imwrite("output.jpg", bb_frame) write_file(data_list) capture.release() cv2.destroyAllWindows() client.disconnect()
def infer_on_video(args): ### TODO: Initialize the Inference Engine ntw = Network() ### TODO: Load the network model into the IE ntw.load_model(args.m, args.d, CPU_EXTENSION) iptShape = ntw.get_input_shape() # Get and open video capture cap = cv2.VideoCapture(args.i) cap.open(args.i) # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) # Create a video writer for the output video # The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')` # on Mac, and `0x00000021` on Linux out = cv2.VideoWriter('out.mp4', 0x00000021, 30, (width, height)) # Process frames until the video ends, or process is exited #reset frame count frameCnt = 0 col = convert_color(args.col) conf = float(args.conf) frames = {} while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: #wait for all inference to complete for i in range(frameCnt): rqs = ntw.exec_network.requests[i] status = rqs.wait() ### TODO: Get the output of inference #if inference was successful, draw boxes #if not successful, do nothing, no box drawn, and just output original frame f = frames[i] if status == 0: boxes = rqs.outputs['detection_out'][0][0] ### TODO: Update the frame to include detected bounding boxes f = draw_boxes(f, boxes, width, height, conf, col) # Write out the frame out.write(f) # Release the out writer and destroy any OpenCV windows out.release() cv2.destroyAllWindows() print('total frames: {}'.format(frameCnt)) break key_pressed = cv2.waitKey(60) ### TODO: Pre-process the frame frames[frameCnt] = frame ppImg = preProc(frame, iptShape[3], iptShape[2]) ### TODO: Perform inference on the frame rqs = ntw.async_inference(frameCnt, ppImg) #there is no point in performing an async request, #if we wait for the inference to complete before processing the next frame #intead, moved all waiting to the end of the while loop frameCnt += 1 # Break if escape key pressed if key_pressed == 27: break # Release the capture cap.release()
def main(): model_xml = (os.environ["MODEL"]) input_source = (os.environ["INPUT"]) device = os.environ['DEVICE'] if 'DEVICE' in os.environ.keys() else 'CPU' cpu_extension = os.environ[ 'CPU_EXTENSION'] if 'CPU_EXTENSION' in os.environ.keys() else None try: # Probability threshold for detections filtering prob_threshold = float(os.environ['PROB_THRESHOLD']) except KeyError: prob_threshold = 0.5 try: # Specify the azure storage name to upload results to cloud. account_name = os.environ['ACCOUNT_NAME'] except: account_name = None try: # Specify the azure storage key to upload results to cloud. account_key = os.environ['ACCOUNT_KEY'] except: account_key = None if account_name is "" or account_key is "": print("Invalid account name or account key!") sys.exit(1) elif account_name is not None and account_key is None: print("Please provide account key using -ak option!") sys.exit(1) elif account_name is None and account_key is not None: print("Please provide account name using -an option!") sys.exit(1) elif account_name is None and account_key is None: upload_azure = 0 else: print("Uploading the results to Azure storage \"" + account_name + "\"") upload_azure = 1 create_cloud_container(account_name, account_key) if input_source == 'cam': input_stream = 0 else: input_stream = input_source assert os.path.isfile( input_source), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) if cap is None or not cap.isOpened(): print('Warning: unable to open video source: ', input_source) sys.exit(1) # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(model_xml, device, 1, 1, 0, cpu_extension)[1] print("To stop the execution press Esc button") initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) frame_count = 1 accumulated_image = np.zeros((initial_h, initial_w), np.uint8) mog = cv2.createBackgroundSubtractorMOG2() ret, frame = cap.read() while cap.isOpened(): ret, next_frame = cap.read() if not ret: break frame_count = frame_count + 1 in_frame = cv2.resize(next_frame, (w, h)) # Change data layout from HWC to CHW in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() infer_network.exec_net(0, in_frame) # Wait for the result infer_network.wait(0) det_time = time.time() - inf_start people_count = 0 # Converting to Grayscale gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Remove the background fgbgmask = mog.apply(gray) # Thresholding the image thresh = 2 max_value = 2 threshold_image = cv2.threshold(fgbgmask, thresh, max_value, cv2.THRESH_BINARY)[1] # Adding to the accumulated image accumulated_image = cv2.add(threshold_image, accumulated_image) colormap_image = cv2.applyColorMap(accumulated_image, cv2.COLORMAP_HOT) # Results of the output layer of the network res = infer_network.get_output(0) for obj in res[0][0]: # Draw only objects when probability more than specified threshold if obj[2] > prob_threshold: xmin = int(obj[3] * initial_w) ymin = int(obj[4] * initial_h) xmax = int(obj[5] * initial_w) ymax = int(obj[6] * initial_h) class_id = int(obj[1]) # Draw bounding box color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255)) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2) people_count = people_count + 1 people_count_message = "People Count : " + str(people_count) inf_time_message = "Inference time: {:.3f} ms".format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 25), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) cv2.putText(frame, people_count_message, (15, 65), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) final_result_overlay = cv2.addWeighted(frame, P_COUNT_FRAME_WEIGHTAGE, colormap_image, COLORMAP_FRAME_WEIGHTAGE_1, 0) cv2.imshow("Detection Results", final_result_overlay) time_interval = MULTIPLICATION_FACTOR * fps if frame_count % time_interval == 0: apply_time_stamp_and_save(final_result_overlay, people_count, upload_azure) frame = next_frame key = cv2.waitKey(1) if key == 27: break cap.release() cv2.destroyAllWindows() infer_network.clean()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client """ # Read command line arguments model = args.model # path to the model IR batch_size = args.batch # set the batch size device = args.device # device name to perform inference on cpu_ext = args.cpu_extension # CPU extension concurrency = args.concurrency # number of concurrent infer requests volatility = args.volatility # volatility threshold prob_threshold = args.prob_threshold # threshold for detections duration_alarm_threshold = args.duration_alarm # longest stay allowed crowd_alarm_threshold = args.crowd_alarm # max people allowed ### Load the model through `infer_network` ### infer_network = Network() infer_network.load_model(model, batch_size, concurrency, device, cpu_ext) net_input_shape = infer_network.get_input_shape() ### Handle the input stream ### if args.input is None or args.input.lower() == 'cam': input = 0 else: input = args.input # VideoCapture supports images too cap = cv2.VideoCapture(input) assert cap.isOpened(), "Failed to open the input" fps = cap.get(cv2.CAP_PROP_FPS) frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) total_people_count = 0 last_stable_people_count = 0 mismatch_count = 0 # deviation from the last stable detection total_duration = 0 # total duration in frames current_duration = 0 # current person's stay duration in frames frames = [] # frames to batch q = deque() # infer request queue ### Loop until stream is over ### while cap.isOpened(): ### Read from the video capture ### captured, next_frame = cap.read() if captured: frames.append(next_frame) ### Pre-process the image as needed ### if len(frames) >= batch_size or not captured and frames: h = net_input_shape[2] w = net_input_shape[3] resized_frames = [ cv2.resize(f, (w, h)).transpose(2, 0, 1)[None, ...] for f in frames ] frame_batch = np.concatenate(resized_frames, axis=0) request = infer_network.exec_net(frame_batch) q.append((request, frames)) frames = [] ### Start asynchronous inference for specified request ### # If the number of concurrent requests hit the limit, # we have to wait. Also if the end of the stream has # been reached, process what we have in the queue. if len(q) >= concurrency or not captured: if not q: break prev_request, prev_frames = q.popleft() ### Wait for the result ### ### Get the results of the inference request ### detected, boxes = infer_network.get_output( request=prev_request, class_id=1, confidence=prob_threshold) for i, prev_frame in enumerate(prev_frames): ### Extract stats from the results ### cur_people_count = int(detected[i]) if last_stable_people_count != cur_people_count: mismatch_count += 1 else: mismatch_count = 0 # Check if we have a new stable value if mismatch_count > volatility or frame_count <= 1: ### Calculate and send relevant information ### ### on current_count, total_count and duration ### ### to the MQTT server ### last_stable_people_count = cur_people_count total_people_count += last_stable_people_count mismatch_count = 0 if last_stable_people_count > 0: # person entered current_duration = 1 total_duration += 1 else: # person left current_duration = 0 # Send average duration to the server # (average duration is calculated in # terms of the original video and doesn't # depend on inference time or network delays) ### Topic "person/duration": key of "duration" ### avg_duration_payload = json.dumps({ 'duration': total_duration / total_people_count / fps }) client.publish(topic='person/duration', payload=avg_duration_payload) # Send new people count to the server ### Topic "person": keys of "count" and "total" ### people_count_payload = json.dumps({ 'count': last_stable_people_count, 'total': total_people_count }) client.publish(topic='person', payload=people_count_payload) else: # Last stable count remains the same if last_stable_people_count > 0: current_duration += 1 total_duration += 1 # Prepare the output frame if detected[i]: box = boxes[i] x_min = int(box[0] * prev_frame.shape[1]) y_min = int(box[1] * prev_frame.shape[0]) x_max = int(box[2] * prev_frame.shape[1]) y_max = int(box[3] * prev_frame.shape[0]) output_frame = cv2.rectangle(prev_frame, (x_min, y_min), (x_max, y_max), (0, 255, 0)) else: # nothing detected or error output_frame = prev_frame # Alarms if duration_alarm_threshold >= 0 \ and current_duration/fps > duration_alarm_threshold: cv2.putText(output_frame, text="Chop-chop! " "Don't stay for too long. " "Life is short!", org=(20, output_frame.shape[0] - 60), fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=0.9, color=(100, 250, 250), thickness=2) if crowd_alarm_threshold >= 0 \ and total_people_count > crowd_alarm_threshold: cv2.putText(output_frame, text="Too many people. " "Beware COVID-19!", org=(20, output_frame.shape[0] - 20), fontFace=cv2.FONT_HERSHEY_TRIPLEX, fontScale=1, color=(0, 0, 255), thickness=2) ### Send the frame to the FFMPEG server ### ### Write an output image if `single_image_mode` ### sys.stdout.buffer.write(output_frame) sys.stdout.buffer.flush() cap.release() cv2.destroyAllWindows()
def main(): """ Load the network and parse the output. :return: None """ global INFO global DELAY global POSE_CHECKED log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = args_parser().parse_args() logger = log.getLogger() job_id = os.environ['PBS_JOBID'] job_id = job_id.rstrip().split('.')[0] #if args.input == 'cam': # input_stream = 0 #else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) shopper = cv2.VideoWriter( os.path.join(args.output_dir, job_id, "shopper.mp4"), cv2.VideoWriter_fourcc(*"AVC1"), fps, (initial_w, initial_h), True) frame_count = 0 progress_file_path = os.path.join(args.output_dir, job_id, 'i_progress.txt') infer_time_start = time.time() if input_stream: cap.open(args.input) # Adjust DELAY to match the number of FPS of the video file DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS) if not cap.isOpened(): logger.error("ERROR! Unable to open video source") return # Initialise the class infer_network = Network() infer_network_pose = Network() # Load the network to IE plugin to get shape of input layer plugin, (n_fd, c_fd, h_fd, w_fd) = infer_network.load_model(args.model, args.device, 1, 1, 0, args.cpu_extension) n_hp, c_hp, h_hp, w_hp = infer_network_pose.load_model( args.posemodel, args.device, 1, 3, 0, args.cpu_extension, plugin)[1] ret, frame = cap.read() while ret: looking = 0 ret, next_frame = cap.read() frame_count += 1 if not ret: print("checkpoint *BREAKING") break if next_frame is None: log.error("checkpoint ERROR! blank FRAME grabbed") break initial_wh = [cap.get(3), cap.get(4)] in_frame_fd = cv2.resize(next_frame, (w_fd, h_fd)) # Change data layout from HWC to CHW in_frame_fd = in_frame_fd.transpose((2, 0, 1)) in_frame_fd = in_frame_fd.reshape((n_fd, c_fd, h_fd, w_fd)) # Start asynchronous inference for specified request inf_start_fd = time.time() infer_network.exec_net(0, in_frame_fd) # Wait for the result infer_network.wait(0) det_time_fd = time.time() - inf_start_fd # Results of the output layer of the network res = infer_network.get_output(0) # Parse face detection output faces = face_detection(res, args, initial_wh) if len(faces) != 0: # Look for poses for res_hp in faces: xmin, ymin, xmax, ymax = res_hp head_pose = frame[ymin:ymax, xmin:xmax] in_frame_hp = cv2.resize(head_pose, (w_hp, h_hp)) in_frame_hp = in_frame_hp.transpose((2, 0, 1)) in_frame_hp = in_frame_hp.reshape((n_hp, c_hp, h_hp, w_hp)) inf_start_hp = time.time() infer_network_pose.exec_net(0, in_frame_hp) infer_network_pose.wait(0) det_time_hp = time.time() - inf_start_hp # Parse head pose detection results angle_p_fc = infer_network_pose.get_output(0, "angle_p_fc") angle_y_fc = infer_network_pose.get_output(0, "angle_y_fc") if ((angle_y_fc > -22.5) & (angle_y_fc < 22.5) & (angle_p_fc > -22.5) & (angle_p_fc < 22.5)): looking += 1 POSE_CHECKED = True INFO = INFO._replace(looker=looking) else: INFO = INFO._replace(looker=looking) else: INFO = INFO._replace(looker=0) # Draw performance stats inf_time_message = "Face Inference time: {:.3f} ms.".format( det_time_fd * 1000) if POSE_CHECKED: cv2.putText( frame, "Head pose Inference time: {:.3f} ms.".format( det_time_hp * 1000), (0, 35), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, inf_time_message, (0, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, "Shopper: {}".format(INFO.shopper), (0, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, "Looker: {}".format(INFO.looker), (0, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) shopper.write(frame) if (frame_count % 10 == 0) or ((frame_count + 1) / video_len == 1): print("Frame Count: ", frame_count, "video length", video_len) progressUpdate(progress_file_path, int(time.time() - infer_time_start), frame_count + 1, video_len) frame = next_frame if args.output_dir: total_time = round(time.time() - infer_time_start, 2) stats = {} stats['time'] = str(total_time) stats['frames'] = str(video_len) stats['fps'] = str(round(video_len / total_time, 2)) with open(os.path.join(args.output_dir, job_id, 'stats.json'), 'w') as json_file: json.dump(stats, json_file) infer_network.clean() infer_network_pose.clean() cap.release()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise people counters person_last_count = 0 person_total_count = 0 person_start_time = 0 # only 1 async inference request at a time current_request_id = 0 client.publish("person", json.dumps({"total": person_total_count})) client.publish("person", json.dumps({"count": 0})) # Handle Different Input Streams single_image_mode = False if args.input == 'CAM': input_stream = 0 elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_image_mode = True input_stream = args.input else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" # Initialise the Network class infer_network = Network() # if args.cpu_extension and "CPU" in args.device: # infer_network.add_extension(args.cpu_extension, "CPU") # log.info("CPU extension loaded: {}".format(args.cpu_extension)) # Set Probability threshold for detections prob_threshold = args.prob_threshold ### TODO: Load the model through `infer_network` ### infer_network.load_model(args.model, args.device, args.cpu_extension) # get width and height of image the model process net_input_shape = infer_network.get_input_shape() ### TODO: Handle the input stream ### cap = cv2.VideoCapture(args.input) cap.open(args.input) source_width = int(cap.get(3)) source_height = int(cap.get(4)) #out = cv2.VideoWriter('out.mp4', 0x00000021, 30, (source_width, source_height)) ### TODO: Loop until stream is over ### while cap.isOpened(): ### TODO: Read from the video capture ### flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(44) ### TODO: Pre-process the image as needed ### p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) # HWC => CHW p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) #(n,c,h,w) ### TODO: Start asynchronous inference for specified request ### inference_start = time.time() infer_network.exec_net(p_frame, current_request_id) ### TODO: Wait for the result ### if infer_network.wait(current_request_id) == 0: inference_stop = time.time() - inference_start ### TODO: Get the results of the inference request ### results = infer_network.get_output(current_request_id) if args.perf_counts: perf_count = infer_network.performance_counter( current_request_id) log.basicConfig(stream=sys.stdout, level=log.DEBUG) log_performance_counts(perf_count) ### TODO: Extract any desired stats from the results ### out_frame, person_count = frame_and_count(p_frame, results, net_input_shape[3], net_input_shape[2], args.prob_threshold) # to avoid counting persons repeatedly for low confidence detectors or # detectors where no instance detection is available # here we could use from skimage.metrics import structural_similarity as ssim # in order to discard counting from very similar frames. # out_frame shape (1,c,h,w) inference_time_txt = "Inference time {:.3f}ms".format( inference_stop * 1000) cv2.putText(out_frame, inference_time_txt, (15, 15), cv2.FONT_HERSHEY_PLAIN, 0.5, (200, 10, 10), 1) out_frame = out_frame[0] out_frame = out_frame.transpose((1, 2, 0)) out_frame = cv2.resize(out_frame, (source_width, source_height)) ### TODO: Calculate and send relevant information on ### ### Perform analysis on the output to determine the number of people in ### ### frame, time spent in frame, and the total number of people counted ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### # Publish messages to the MQTT server if person_count > person_last_count: person_start_time = time.time() person_total_count = person_total_count + person_count - person_last_count client.publish("person", json.dumps({"total": person_total_count})) elif person_count < person_last_count: person_duration = int(time.time() - person_start_time) client.publish("person/duration", json.dumps({"duration": person_duration})) client.publish("person", json.dumps({"count": person_count})) person_last_count = person_count if args.max_person_count and person_count > args.max_person_count: txt = "Max person count alert!" (txt_width, txt_height) = cv2.getTextSize(txt, cv2.FONT_HERSHEY_PLAIN, 0.5, thickness=1)[0] txt_offset_x = 10 txt_offset_y = out_frame.shape[0] - 10 box_coords = ((txt_offset_x, txt_offset_y + 2), (txt_offset_x + txt_width, xt_offset_y - text_height - 2)) cv2.rectangle(out_frame, box_coords[0], box_coords[1], (0, 0, 0), cv2.FILLED) cv2.putText(out_frame, txt2, (text_offset_x, text_offset_y), cv2.FONT_HERSHEY_PLAIN, 0.5, (0, 0, 255), 1) ### TODO: Write an output image if `single_image_mode` ### if single_image_mode: cv2.imwrite('out_image.jpg', out_frame) ### TODO: Send the frame to the FFMPEG server ### sys.stdout.buffer.write(out_frame) sys.stdout.flush() if key_pressed == 27: break # Release the out writer, capture, and destroy any OpenCV windows cap.release() cv2.destroyAllWindows()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ present_count = 0 preceding_count = 0 total_count = 0 start_time = 0 duration = 0 frame_count = 0 wait_time = 57 single_image_mode = False # Initialise the class infer_network = Network() # Set Probability threshold for detections args.prob_threshold = float(args.prob_threshold) ### TODO: Load the model through `infer_network` ### infer_network.load_model(args.model, args.device, args.cpu_extension) rfcnn_input_shape = infer_network.get_input_shape() print(rfcnn_input_shape) # width and height input to the model dsize = (rfcnn_input_shape[3], rfcnn_input_shape[2]) # single image mode single_image_format = ['jpg', 'tif', 'png', 'jpeg', 'bmp'] if args.input.split(".")[-1].lower() in single_image_format: single_image_mode = True frame = cv2.imread(args.input) height, width, channel = frame.shape p_frame = preprocess_frame(frame, dsize) infer_network.exec_net(p_frame) if infer_network.wait() == 0: ### TODO: Get the results of the inference request ### infer_result = infer_network.get_output() ### TODO: Extract any desired stats from the results ### single_frame, present_count = extract_stats( frame, infer_result, args, width, height) ### TODO: Write an output image if `single_image_mode` ### cv2.imwrite("image.jpg", single_frame) ### TODO: Handle the input stream ### input_stream = cv2.VideoCapture(args.input) input_stream.open(args.input) width = int(input_stream.get(3)) height = int(input_stream.get(4)) # Create a video output to see your result #out = cv2.VideoWriter('out.mp4',0x00000021,30,(width,height)) ### TODO: Loop until stream is over ### while input_stream.isOpened() and not single_image_mode: ### TODO: Read from the video capture ### flag, frame = input_stream.read() if not flag: break key_pressed = cv2.waitKey(60) ### TODO: Pre-process the image as needed ### p_frame = preprocess_frame(frame, dsize) ### TODO: Start asynchronous inference for specified request ### infer_network.exec_net(p_frame) ### TODO: Wait for the result ### if infer_network.wait() == 0: ### TODO: Get the results of the inference request ### infer_result = infer_network.get_output() ### TODO: Extract any desired stats from the results ### out_frame, present_count = extract_stats(frame, infer_result, args, width, height) ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### # when a person is in the video if present_count > preceding_count: start_time = time.time() total_count += present_count - preceding_count frame_count = 0 payload_total_count = {"total": total_count} client.publish("person", json.dumps(payload_total_count)) # when there is one less person if present_count < preceding_count and frame_count < wait_time: present_count = preceding_count frame_count += 1 # when there is one less person for up to 30 frames if present_count < preceding_count and frame_count == wait_time: duration = int(time.time() - start_time) payload_duration = {"duration": duration} client.publish("person/duration", json.dumps(payload_duration)) preceding_count = present_count payload_present_count = {"count": present_count} client.publish("person", json.dumps(payload_present_count)) ### TODO: Send the frame to the FFMPEG server ### sys.stdout.buffer.write(out_frame) sys.stdout.flush() if key_pressed == 27: break # -- release the out writer, capture and destroy any opencv windows input_stream.release() cv2.destroyAllWindows() client.disconnect()