def intruder_detector(): """ Process the input source frame by frame and detects intruder, if any. :return status: 0 on success, negative value on failure """ global CONF_CANDIDATE_CONFIDENCE global LOG_WIN_HEIGHT global LOG_WIN_WIDTH global CONFIG_FILE global video_caps global conf_labels_file_path global is_async_mode global UI global LOOP_VIDEO parse_args() ret = check_args() if ret != 0: return ret, "" if not os.path.isfile(CONFIG_FILE): return -12, "" if not os.path.isfile(conf_labels_file_path): return -13, "" # Creates subdirectory to save output snapshots pathlib.Path(os.getcwd() + '/output/').mkdir(parents=True, exist_ok=True) # Read the configuration file ret, req_labels = get_input() if ret != 0: return ret, req_labels[0] if not video_caps: return -14, '' # Get the labels that are used in the application ret, label_names, used_labels = get_used_labels(req_labels) if ret != 0: return ret, '' if True not in used_labels: return -15, '' # Init a rolling log to store events rolling_log_size = int((LOG_WIN_HEIGHT - 15) / 20) log_list = collections.deque(maxlen=rolling_log_size) # Open a file for intruder logs log_file = open(LOG_FILE_PATH, 'w') if not log_file: return -16, '' # Initializing VideoWriter for each source if UI and not LOOP_VIDEO: for video_cap in video_caps: ret, ret_value = video_cap.init_vw(int(video_cap.input_height), int(video_cap.input_width)) if ret != 0: return ret, ret_value # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(model_xml, TARGET_DEVICE, 1, 1, 2, CPU_EXTENSION)[1] # Arrange windows so that they are not overlapping arrange_windows() min_fps = min([i.vc.get(cv2.CAP_PROP_FPS) for i in video_caps]) signal.signal( signal.SIGINT, signal_handler, ) no_more_data = [False] * len(video_caps) start_time = time.time() inf_time = 0 next_request_id = 1 cur_request_id = 0 # Main loop starts here. Loop over all the video captures if is_async_mode: print("Application running in async mode...") else: print("Application running in sync mode...") while True: for idx, video_cap in enumerate(video_caps): # Get a new frame vfps = int(round(video_cap.vc.get(cv2.CAP_PROP_FPS))) for i in range(0, int(round(vfps / min_fps))): if is_async_mode: ret, video_cap.next_frame = video_cap.vc.read() else: ret, video_cap.frame = video_cap.vc.read() video_cap.loop_frames += 1 # If no new frame or error in reading a frame, exit the loop if not ret: no_more_data[idx] = True break if no_more_data[idx]: stream_end_frame = numpy.zeros((int( video_cap.input_height), int(video_cap.input_width), 1), dtype='uint8') stream_end_message = "Stream from {} has ended.".format( video_cap.cam_name) cv2.putText(stream_end_frame, stream_end_message, (int(video_cap.input_width / 2) - 30, int(video_cap.input_height / 2) - 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) cv2.imshow(video_cap.cam_name, stream_end_frame) continue for i in range(video_cap.no_of_labels): video_cap.current_count[i] = 0 video_cap.changed_count[i] = False # Resize to expected size (in model .xml file) # Input frame is resized to infer resolution if is_async_mode: in_frame = cv2.resize(video_cap.next_frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) # Start asynchronous inference for specified request. infer_network.exec_net(next_request_id, in_frame) video_cap.frame = video_cap.next_frame # Async enabled and only one video capture if len(video_caps) == 1: videoCapResult = video_cap # Async enabled and more than one video capture else: # Get previous index videoCapResult = video_caps[idx - 1 if idx - 1 >= 0 else len(video_caps) - 1] else: in_frame = cv2.resize(video_cap.frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) # Start synchronous inference for specified request. infer_network.exec_net(cur_request_id, in_frame) videoCapResult = video_cap inf_start = time.time() # Wait for the result if infer_network.wait(cur_request_id) == 0: inf_time = time.time() - inf_start # Results of the output layer of the network res = infer_network.get_output(cur_request_id) for obj in res[0][0]: label = int(obj[1]) - 1 # Draw the bounding box around the object when the probability is more than specified threshold if obj[2] > CONF_THRESHOLD_VALUE and used_labels[label]: videoCapResult.current_count[label] += 1 xmin = int(obj[3] * videoCapResult.input_width) ymin = int(obj[4] * videoCapResult.input_height) xmax = int(obj[5] * videoCapResult.input_width) ymax = int(obj[6] * videoCapResult.input_height) # Draw bounding box around the intruder detected cv2.rectangle(videoCapResult.frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 4, 16) for i in range(videoCapResult.no_of_labels): if videoCapResult.candidate_count[ i] == videoCapResult.current_count[i]: videoCapResult.candidate_confidence[i] += 1 else: videoCapResult.candidate_confidence[i] = 0 videoCapResult.candidate_count[ i] = videoCapResult.current_count[i] if videoCapResult.candidate_confidence[ i] == CONF_CANDIDATE_CONFIDENCE: videoCapResult.candidate_confidence[i] = 0 videoCapResult.changed_count[i] = True else: continue if videoCapResult.current_count[ i] > videoCapResult.last_correct_count[i]: videoCapResult.total_count[ i] += videoCapResult.current_count[ i] - videoCapResult.last_correct_count[i] det_objs = videoCapResult.current_count[ i] - videoCapResult.last_correct_count[i] total_count = sum(videoCapResult.total_count) for det_obj in range(det_objs): current_time = time.strftime("%H:%M:%S") log = "{} - Intruder {} detected on {}".format( current_time, label_names[i], videoCapResult.cam_name) log_list.append(log) log_file.write(log + "\n") event = Event(event_time=current_time, intruder=label_names[i], count=total_count, frame=videoCapResult.frame_count) videoCapResult.events.append(event) snapshot_name = "output/intruder_{}.png".format( total_count) cv2.imwrite(snapshot_name, videoCapResult.frame) videoCapResult.last_correct_count[ i] = videoCapResult.current_count[i] # Create intruder log window, add logs to the frame and display it log_window = numpy.zeros((LOG_WIN_HEIGHT, LOG_WIN_WIDTH, 1), dtype='uint8') for i, log in enumerate(log_list): cv2.putText(log_window, log, (10, 20 * i + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.imshow("Intruder Log", log_window) videoCapResult.frame_count += 1 # Video output if UI and not LOOP_VIDEO: videoCapResult.vw.write(videoCapResult.frame) log_message = "Async mode is on." if is_async_mode else \ "Async mode is off." cv2.putText(videoCapResult.frame, log_message, (10, int(videoCapResult.input_height) - 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 10, 10), 1) inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \ "Inference time: {:.3f} ms".format(inf_time * 1000) cv2.putText(videoCapResult.frame, inf_time_message, (10, int(videoCapResult.input_height) - 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) fps_time = time.time() - start_time fps_message = "FPS: {:.3f} fps".format(1 / fps_time) cv2.putText(videoCapResult.frame, fps_message, (10, int(videoCapResult.input_height) - 10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) # Display the video output cv2.imshow(videoCapResult.cam_name, videoCapResult.frame) start_time = time.time() # Loop video to mimic continuous input if LOOP_VIDEO flag is True if LOOP_VIDEO and not videoCapResult.is_cam: vfps = int(round(videoCapResult.vc.get(cv2.CAP_PROP_FPS))) # If a video capture has ended restart it if videoCapResult.loop_frames > videoCapResult.vc.get( cv2.CAP_PROP_FRAME_COUNT) - int(round(vfps / min_fps)): videoCapResult.loop_frames = 0 videoCapResult.vc.set(cv2.CAP_PROP_POS_FRAMES, 0) if is_async_mode: # Swap infer request IDs cur_request_id, next_request_id = next_request_id, cur_request_id if cv2.waitKey(1) == 27: break if cv2.waitKey(1) == 9: is_async_mode = not is_async_mode print("Switched to {} mode".format( "async" if is_async_mode else "sync")) if False not in no_more_data: break ret = save_json() if ret != 0: return ret, '' infer_network.clean() log_file.close() return [0, '']
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections # prob_threshold = args.prob_threshold cur_request_id = 0 last_count = 0 total_count = 0 start_time = 0 time_on_video = 0 time_not_on_video = 0 image_mode = False positive_count = 0 ### TODO: Load the model through `infer_network` ### n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, cur_request_id, args.cpu_extension)[1] ### TODO: Handle the input stream ### # Checks for image input if args.input.endswith('.jpg') or args.input.endswith('.png') or \ args.input.endswith('.bmp'): image_mode = True media_stream = args.input # Checks for webcam input elif args.input == 'CAM': media_stream = 0 # Check for video input else: media_stream = args.input assert os.path.isfile(args.input) ### TODO: Loop until stream is over ### capture = cv2.VideoCapture(media_stream) if media_stream: capture.open(args.input) if not capture.isOpened(): log.error("Not able to open the video file!") ### TODO: Read from the video capture ### # global width, height, prob_threshold prob_threshold = args.prob_threshold width = capture.get(3) height = capture.get(4) while capture.isOpened(): check, frame = capture.read() if not check: break ### TODO: Pre-process the image as needed ### image = cv2.resize(frame, (w, h)) image = image.transpose(2, 0, 1) image = image.reshape(n, c, h, w) ### TODO: Start asynchronous inference for specified request ### inference_start = time.time() infer_network.exec_net(cur_request_id, image) ### TODO: Wait for the result ### if infer_network.wait(cur_request_id) == 0: inference_time = time.time() - inference_start ### TODO: Get the results of the inference request ### result = infer_network.get_output(cur_request_id) # if perf_counts: # perf_count = infer_network.exec_net(cur_request_id) # performance_counts(perf_count) ### TODO: Extract any desired stats from the results ### current_count = 0 track_frames = {} track_person = {positive_count: 0} frame_count = 0 for character in result[0][0]: if character[2] > prob_threshold: frame_count += 1 track_frames[frame_count] = character[2] start_time_not_on_video = time.time() positive_count += 1 track_person[positive_count] = time_on_video xmin = int(character[3] * width) ymin = int(character[4] * height) xmax = int(character[5] * width) ymax = int(character[6] * height) frame = cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 55, 255), 1) time_on_video = start_time_not_on_video - start_time if time_on_video > 3: if current_count > 1: current_count = last_count else: current_count += 1 else: current_count = last_count ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### if current_count > last_count: start_time = time.time() time_not_on_video = time.time() - start_time_not_on_video if current_count == 1 and last_count == 0: if time_on_video > 2: total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) if current_count < last_count: if current_count == 0: start_time_not_on_video = time.time() time_on_video = int(time.time() - start_time) if last_count == 0 and time_not_on_video < 0.005: time_on_video = track_person[positive_count] + time_on_video client.publish("person/duration", json.dumps({"duration": time_on_video})) client.publish("person", json.dumps({"count": current_count})) last_count = current_count cv2.putText( frame, "Inference time = {:.2f} ms".format( (inference_time * 1000)), (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) cv2.putText(frame, "Persons in video frame = {:}".format(last_count), (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) cv2.putText(frame, "Total count = {:}".format(total_count), (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) cv2.putText(frame, "Time on video = {:.2f} s".format(time_on_video), (15, 60), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) cv2.putText( frame, "Time not on video = {:.3f} s".format( time_not_on_video * 1000), (15, 75), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) key = cv2.waitKey(15) if key == ord('q'): break ### TODO: Send the frame to the FFMPEG server ### sys.stdout.buffer.write(frame) sys.stdout.flush() ### TODO: Write an output image if `single_image_mode` ### if image_mode: cv2.imwrite('output.jpg', frame) # cv2.imshow('frame', frame) capture.release() cv2.destroyAllWindows() client.disconnect() infer_network.clean()
def main(): """ Load the network and parse the SSD output. :return: None """ # Connect to the MQTT server client = mqtt.Client() client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL) log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) # Flag for the input image single_image_mode = False cur_request_id = 0 last_count = 0 total_count = 0 start_time = 0 model = os.environ['MODEL'] device = os.environ['DEVICE'] if 'DEVICE' in os.environ.keys() else 'CPU' cpu_extension = os.environ[ 'CPU_EXTENSION'] if 'CPU_EXTENSION' in os.environ.keys() else None # Checks for live feed if os.environ['INPUT'] == 'CAM': input_stream = 0 # Checks for input image elif os.environ['INPUT'].endswith('.jpg') or os.environ['INPUT'].endswith( '.bmp'): single_image_mode = True input_stream = os.environ['INPUT'] # Checks for video file else: input_stream = os.environ['INPUT'] assert os.path.isfile( os.environ['INPUT']), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) if input_stream: cap.open(os.environ['INPUT']) if not cap.isOpened(): log.error("ERROR! Unable to open video source") # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(model, device, 1, 1, cur_request_id, cpu_extension)[1] global initial_w, initial_h initial_w = cap.get(3) initial_h = cap.get(4) fps = cap.get(cv2.CAP_PROP_FPS) cmdstring = ( 'ffmpeg', '-y', '-r', '%d' % (fps), # overwrite, 60fps '-s', '%dx%d' % (initial_w, initial_h), # size of image string '-pixel_format', 'bgr24', # format '-f', 'rawvideo', '-i', '-', # tell ffmpeg to expect raw video from the pipe 'http://localhost:8090/fac.ffm') # output encoding p = subprocess.Popen(cmdstring, stdin=subprocess.PIPE) while cap.isOpened(): flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) # Start async inference inf_start = time.time() image = cv2.resize(frame, (w, h)) # Change data layout from HWC to CHW image = image.transpose((2, 0, 1)) image = image.reshape((n, c, h, w)) # Start asynchronous inference for specified request. infer_network.exec_net(cur_request_id, image) # Wait for the result if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start # Results of the output layer of the network result = infer_network.get_output(cur_request_id) if os.environ['PERF_COUNTS'] > str(0): perf_count = infer_network.performance_counter(cur_request_id) performance_counts(perf_count) frame, current_count = ssd_parser(frame, result) inf_time_message = "Inference time: {:.3f}ms" \ .format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) # When new person enters the video if current_count > last_count: start_time = time.time() total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) # Person duration in the video is calculated if current_count < last_count: duration = int(time.time() - start_time) # Publish messages to the MQTT server client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": current_count})) last_count = current_count if key_pressed == 27: break p.stdin.write(frame.tostring()) if single_image_mode: cv2.imwrite('output_image.jpg', frame) infer_network.clean() cap.release() cv2.destroyAllWindows() client.disconnect() infer_network.clean()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ args = build_argparser().parse_args() single_image_mode = False # Initialise the class infer_network = Network() model = args.model video_file = args.input extnsn = args.cpu_extension device = args.device start_time = 0 cur_request_id = 0 last_count = 0 total_count = 0 n, c, h, w = infer_network.load_model(model, device, 1, 1, cur_request_id, extnsn)[1] ### TODO: Handle the input stream ### # Checks for live feed if video_file == 'CAM': input_stream = 0 # Checks for input image elif video_file.endswith('.jpg') or video_file.endswith('.bmp'): single_image_mode = True input_stream = video_file else: input_stream = video_file assert os.path.isfile(video_file), "File doesn't exist" try: # Capture video capture = cv2.VideoCapture(video_file) except FileNotFoundError: print("Cannot locate the file: " + video_file) except Exception as e: print("Something went wrong with the file: " + e) global initial_w, initial_h, prob_threshold total_count = 0 duration = 0 initial_w = capture.get(3) initial_h = capture.get(4) # Set Probability threshold for detections prob_threshold = args.prob_threshold temp = 0 tk = 0 #Loop until stream is over while capture.isOpened(): flag, frame = capture.read() if not flag: break key_pressed = cv2.waitKey(60) #Pre-processing the input/frame image = cv2.resize(frame, (w, h)) image = image.transpose((2, 0, 1)) image.reshape((n, c, h, w)) #Async inference inf_start = time.time() infer_network.exec_net(cur_request_id, image) color = (255, 0, 0) #Waiting for result if infer_network.wait(cur_request_id) == 0: time_elapsed = time.time() - inf_start #Result from the inference result = infer_network.get_output(cur_request_id) #Bounting box frame, current_count, d, tk = draw_box(result, frame, initial_w, initial_h, temp, tk) #inference time inf_timemsg = "Inference Time: {:,3f}ms".format(time_elapsed * 1000) cv2.putText(frame, inf_timemsg, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) #Calculating and sending info if current_count > last_count: start_time = time.time() total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) if current_count < last_count: duration = int(time.time() - start_time) client.publish("person/duration", json.dumps({"duration": duration})) text_2 = "Distance: %d" % d + " Lost frame: %d" % tk cv2.putText(frame, text_2, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) text_2 = "Current count: %d" % current_count cv2.putText(frame, text_2, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) if current_count > 3: text_2 = "Maximum count reached!!!" (text_width, text_height) = cv2.getTextSize(text_2, cv2.FONT_HERSHEY_COMPLEX, 0.5, thickness=1)[0] text_offset_x = 10 text_offset_y = frame.shape[0] - 10 box_coords = ((text_offset_x, text_offset_y + 2), (text_offset_x + text_width, text_offset_y - text_height - 2)) cv2.rectangle(frame, box_coords[0], box_coords[1], (0, 0, 0), cv2.FILLED) cv2.putText(frame, text_2, (text_offset_x, text_offset_y), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1) client.publish("person", json.dumps({"count": current_count})) last_count = current_count temp = d if key_pressed == 27: break sys.stdout.buffer.write(frame) sys.stdout.flush() #Saving Image if single_image_mode: cv2.write('output_image.jpg', frame) capture.release() cv2.destroyAllWindows() client.disconnect() infer_network.clean()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # my) init parameters current_count = 0 total_count = 0 duration = 0 last_count = 0 start_time = 0 isFirst = True single_image_mode = False # Initialise the class (ok) infer_network = Network() # Set Probability threshold for detections (ok) prob_threshold = args.prob_threshold ### TODO: Load the model through `infer_network` ### (ok) infer_network.load_model(args.model, device="CPU", cpu_extension=args.cpu_extension) n, c, h, w = infer_network.get_input_shape() ### TODO: Handle the input stream ### (ok) if args.input == 'CAM': input_stream = 0 elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_image_mode = True input_stream = args.input else: input_stream = args.input #assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) cap.open(input_stream) ### TODO: Loop until stream is over ###(ok) while cap.isOpened(): ### TODO: Read from the video capture ###(ok) ret, frame = cap.read() key_pressed = cv2.waitKey(60) if not ret: break ### TODO: Pre-process the image as needed ###(ok) image = cv2.resize(frame, (w, h)) image = image.transpose((2, 0, 1)) image = image.reshape((n, c, h, w)) ### TODO: Start asynchronous inference for specified request ###(ok) infer_network.exec_net(image) ### TODO: Wait for the result ###(ok) if infer_network.wait() == 0: ### TODO: Get the results of the inference request ###(ok) result = infer_network.get_output() ### TODO: Extract any desired stats from the results ###(ok) boxes, score = post_detection(result, frame.shape, prob_threshold) for box in boxes: xmin, ymin, xmax, ymax = box cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 238, 255), 1) ### TODO: Calculate and send relevant information on ###(ok) ### current_count, total_count and duration to the MQTT server ### if len(boxes) != current_count: if isFirst: ts1 = time.time() isFirst = False if time.time() - ts1 > 0.5: current_count = len(boxes) isFirst = True ### Topic "person": keys of "count" and "total" ###(ok) if current_count > last_count: start_time = time.time() total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) ### Topic "person/duration": key of "duration" ###(ok) if current_count < last_count: duration = int(time.time() - start_time) # Publish messages to the MQTT server client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": current_count})) last_count = current_count if key_pressed == ord('q'): break ### TODO: Send the frame to the FFMPEG server ###(ok) sys.stdout.buffer.write(frame) sys.stdout.flush() ### TODO: Write an output image if `single_image_mode` ###(ok) if single_image_mode: cv2.imwrite('output_image.jpg', frame) cap.release() cv2.destroyAllWindows() client.disconnect() infer_network.clean()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_net = Network() ### TODO: Load the model through `infer_network` ### exec_net = infer_net.load_model(args.model, args.device, args.cpu_extension) ### TODO: Handle the input stream ### # Checks for live feed if args.input == 'CAM': input_stream = 0 # Checks for input image elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_image_mode = True input_stream = args.input # Checks for video file else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) if input_stream: cap.open(args.input) if not cap.isOpened(): log.error("ERROR! Unable to open video source") global initial_w, initial_h, prob_threshold, FRAME_THRES, count_conf # Set Probability threshold for detections prob_threshold = args.prob_threshold initial_w = cap.get(3) initial_h = cap.get(4) FRAME_THRES = args.frame_threshold count_conf = 0 # Flag for the input image single_image_mode = False # output video for testing fps = int(cap.get(cv2.CAP_PROP_FPS)) people_counter = cv2.VideoWriter("people_counter.mp4", 0x00000021, fps, (int(initial_w), int(initial_h)), True) #cv2.VideoWriter_fourcc(*"AVC1"), cur_request_id = 0 last_count = 0 prev_count, current_count = 0, 0 total_count = 0 start_time = 0 ## assess perf det_time = [] input_capture_time = [] frame_count = 0 total_start = time.time() ### TODO: Loop until stream is over ### while cap.isOpened(): ### TODO: Read from the video capture ### input_capture_start = time.time() flag, frame = cap.read() frame_count += 1 if not flag: break key_pressed = cv2.waitKey(60) ### TODO: Pre-process the image as needed ### image = infer_net.preprocess(frame) input_capture_time.append(time.time() - input_capture_start) ### TODO: Start asynchronous inference for specified request ### inf_start = time.time() infer_net.execute(cur_request_id, image) ### TODO: Wait for the result ### if infer_net.wait(cur_request_id) == 0: det_time.append(time.time() - inf_start) ### TODO: Get the results of the inference request ### result = infer_net.get_output(cur_request_id) ### TODO: Extract any desired stats from the results ### frame, detected_count = ssd_out(frame, result) if detected_count == prev_count: current_count = prev_count count_conf = 0 else: count_conf += 1 if count_conf == FRAME_THRES: current_count = detected_count #update prev_count and current_count count_conf = 0 prev_count = current_count ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### # When new person enters the video if current_count > last_count: start_frame = cap.get(cv2.CAP_PROP_POS_FRAMES) total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) cv2.putText(frame, str(total_count)+' '+ str(current_count)+ ' '+ str(cap.get(cv2.CAP_PROP_POS_FRAMES)),\ (15, 15),cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) ### Topic "person/duration": key of "duration" ### # Person duration in the video is calculated if current_count < last_count: duration = int( (cap.get(cv2.CAP_PROP_POS_FRAMES) - start_frame) / int(cap.get(cv2.CAP_PROP_FPS))) # Publish messages to the MQTT server client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": current_count})) last_count = current_count people_counter.write(frame) ### TODO: Send the frame to the FFMPEG server ### sys.stdout.buffer.write(frame) sys.stdout.flush() ### TODO: Write an output image if `single_image_mode` ### if single_image_mode: cv2.imwrite('output_image.jpg', frame) total_time = time.time() - total_start with open('stats.txt', 'w') as f: f.write(str(round(total_time, 1)) + '\n') f.write(str(frame_count) + '\n') #print('input capture time: avg', sum(input_capture_time)*1000/len(input_capture_time), 'ms| min ', min(input_capture_time)*1000,'|max ', max(input_capture_time)*1000) #print('detection time: avg', sum(det_time)*1000/len(det_time), 'ms | min ', min(det_time)*1000,'| max ' , max(det_time)*1000) cap.release() cv2.destroyAllWindows() client.disconnect() infer_net.clean()
def main(): """ Load the network and parse the SSD output. :return: None """ # Connect to the MQTT server client = mqtt.Client() client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL) args = build_argparser().parse_args() # Flag for the input image single_image_mode = False cur_request_id = 0 last_count = 0 total_count = 0 start_time = 0 # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, cur_request_id, args.cpu_extension)[1] # Checks for live feed if args.input == 'CAM': input_stream = 0 # Checks for input image elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_image_mode = True input_stream = args.input # Checks for video file else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) if input_stream: cap.open(args.input) if not cap.isOpened(): log.error("ERROR! Unable to open video source") global initial_w, initial_h, prob_threshold prob_threshold = args.prob_threshold initial_w = cap.get(3) initial_h = cap.get(4) while cap.isOpened(): flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) # Start asynchronous inference for specified request. inf_start = time.time() img_preprocessed = preprocess(n, c, h, w, frame) infer_network.exec_net(cur_request_id, img_preprocessed) # Wait for the result if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start # Results of the output layer of the network output = infer_network.get_output(cur_request_id, 'DetectionOutput') detections = output[0, 0, :, :] current_count = 0 for detection in detections: # If only the cifidence rate is above 0.5, then proceed confidence = detection[2] if confidence > .5: current_count += 1 # detection class idx = detection[1] class_name = coco_classes[idx] log.info(" " + str(idx) + " " + str(confidence) + " " + class_name) if int(idx) == 1: #only person # Get the box to be displayed axis = detection[3:7] * (initial_w, initial_h, initial_w, initial_h) (start_X, start_Y, end_X, end_Y) = axis.astype(np.int)[:4] cv2.rectangle(frame, (start_X, start_Y), (end_X, end_Y), (0, 55, 255), thickness=2) cv2.putText(frame, class_name, (start_X, start_Y), cv2.FONT_ITALIC, (.0005 * initial_w), (0, 0, 255)) #boxes, labels, probs if args.perf_counts: perf_count = infer_network.performance_counter(cur_request_id) performance_counts(perf_count) #frame, current_count = ssd_out(frame, result) inf_time_message = "Inference time: {:.3f}ms"\ .format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) # When new person enters the video if current_count > last_count: start_time = time.time() total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) # Person duration in the video is calculated if current_count < last_count: duration = int(time.time() - start_time) # Publish messages to the MQTT server client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": current_count})) last_count = current_count if key_pressed == 27: break # Send frame to the ffmpeg server sys.stdout.buffer.write(frame) sys.stdout.flush() if single_image_mode: cv2.imwrite('output_image.jpg', frame) current_count = 0 cap.release() cv2.destroyAllWindows() client.disconnect() infer_network.clean()
def main(): """ Load the network and parse the SSD output. :return: None """ # Connect to the MQTT server client = mqtt.Client() client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL) args = build_argparser().parse_args() # Flag for the input image single_image_mode = False cur_request_id = 0 last_count = 0 total_count = 0 start_time = 0 # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, cur_request_id, args.cpu_extension)[1] # Checks for input image if args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_image_mode = True input_stream = args.input # Checks for video file else: input_stream = args.input #assert os.path.isfile(args.input), "Specified input file doesn't exist" if os.path.isfile(args.input): ##works for local file: gstreamer_pipeline = ( 'filesrc location = %s ! qtdemux ! h264parse ! avdec_h264 ! videoconvert ! appsink sync=false' % (input_stream)) wk = 33 else: #RTSP stream: gstreamer_pipeline = ( 'rtspsrc location=%s ! queue ! rtph264depay ! h264parse config-interval=-1 ! avdec_h264 ! videoconvert ! appsink sync=false' % (input_stream)) wk = 1 cap = cv2.VideoCapture(gstreamer_pipeline, cv2.CAP_GSTREAMER) output_stream = args.output #gstreamer_out = ('appsrc ! h264parse config-interval=-1 ! flvmux streamable=true ! rtmpsink location=%s sync=false' % (output_stream)) #gstreamer_out = ("appsrc ! videoconvert ! x264enc tune=zerolatency threads=1 speed-preset=superfast ! flvmux streamable=true ! rtmpsink location='%s live=1'" % (output_stream)) gstreamer_out = ( "appsrc ! videoconvert ! x264enc tune=zerolatency bitrate=5000 tune=zerolatency speed-preset=ultrafast ! flvmux streamable=true ! rtmpsink location='%s live=1'" % (output_stream)) #gstreamer_out = ('appsrc ! videoconvert ! video/x-raw,framerate=25/1 ! videoconvert ! x264enc tune="zerolatency" threads=1 ! h264parse ! flvmux streamable=true ! rtmpsink location=%s async=false' % (output_stream)) #gstreamer_out = ('appsrc ! queue ! videoconvert ! video/x-raw ! x264enc ! h264parse ! rtmpsink location=%s async=false' % (output_stream)) #gstreamer_out = ("appsrc ! 'video/x-raw, width=1920, height=1080, framerate=25/1' ! videoconvert ! x264enc bframes=0 b-adapt=false speed-preset=1 tune=0x00000004 ! h264parse ! flvmux ! rtmpsink location='%s live=1' async=false" % (output_stream)) #gstreamer_out = ("appsrc ! 'video/x-raw, width=1920, height=1080, framerate=25/1' ! videoconvert ! h264parse ! flvmux streamable=true ! rtmpsink location=%s" % (output_stream)) #gstreamer_out = ("appsrc ! h264parse ! flvmux streamable=true ! rtmpsink location='%s'" % (output_stream)) #fcc = cv2.VideoWriter_fourcc(*'X264') fps = int(args.fps) out = cv2.VideoWriter(gstreamer_out, -1, fps, (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))), True) if not cap.isOpened(): log.error("ERROR! Unable to open video source") global initial_w, initial_h, prob_threshold prob_threshold = args.prob_threshold initial_w = cap.get(3) initial_h = cap.get(4) while cap.isOpened(): flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(wk) # Start async inference image = cv2.resize(frame, (w, h)) # Change data layout from HWC to CHW image = image.transpose((2, 0, 1)) image = image.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() infer_network.exec_net(cur_request_id, image) # Wait for the result if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start # Results of the output layer of the network result = infer_network.get_output(cur_request_id) if args.perf_counts: perf_count = infer_network.performance_counter(cur_request_id) performance_counts(perf_count) frame, current_count = ssd_out(frame, result) resol = str(int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))) + 'x' + str( int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) client.publish("video/fps", fps) client.publish("video/resolution", resol) client.publish("person", current_count) client.publish("person/inference", "{:.3f}ms"\ .format(det_time * 1000)) last_count = current_count if key_pressed == 27: break # Send frame to the ffmpeg server #sys.stdout.buffer.write(frame) out.write(frame) #print('Count:' + str(current_count)) #print('FPS:' + str(int(cap.get(cv2.CAP_PROP_FPS)))) #print('Resol:' + str(resol)) #sys.stdout.flush() if single_image_mode: cv2.imwrite('output_image.jpg', frame) cap.release() cv2.destroyAllWindows() client.disconnect() infer_network.clean()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ #Initial, global variables for counting current_request_id = 0 start_time = 0 last_count = 0 total_count = 0 # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold ### Load the model through `infer_network` ### infer_network.load_model(args.model, args.device, current_request_id, args.cpu_extension) model_input_shape = infer_network.get_input_shape() ### Handle the input stream ### single_image_mode = False while args.input == 'CAM': input_stream = 0 if args.input.endswith('.jpg') or args.input.endswith('.png') or args.input.endswith('.bmp'): single_image_mode = True input_stream = args.input else: input_stream = args.input assert os.path.isfile(args.input),"The input file does not exist" cap = cv2.VideoCapture(input_stream) if input_stream: cap.open(input_stream) if not cap.IsOpened(): log.error('Error! The video file/source is not opening' ) #inital width and height taken from the input initial_width = int(cap.get(3)) initial_height = int(cap.get(4)) ### Loop until stream is over ### while cap.isOpened(): ### Read from the video capture ### flag, frame = cap.read() if not flag: break pressed_key = cv2.waitKey(60) ### Pre-process the image as needed ### width = model_input_shape[3] height = model_input_shape[2] processed_input_image = cv2.resize(frame,(width, height)) processed_input_image = processed_input_image.transpose((2, 0, 1)) processed_input_image = processed_input_image.reshape(model_input_shape[0], model_input_shape[1], height, width) ### Start asynchronous inference for specified request ### start_of_inference = time.time() infer_network.exec_net(current_request_id, processed_input_image) ### Wait for the result ### if infer_network.wait(current_request_id) == 0: detection_time = int(time.time() - start_of_inference) * 1000 ### Get the results of the inference request ### result = infer_network.get_output(current_request_id) ### Extract any desired stats from the results ### frame, present_count = draw_rectangular_box(frame, result, initial_width, initial_height, prob_threshold) ##Find out the inference time and write the result on the video as text. inf_time_msg = "Inference time: {:.5f}ms".format(detection_time) cv2.putText(frame, inf_time_msg, (20,10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) #Person's count is calculated here if present_count > last_count: start_time = time.time() total_count += present_count - last_count client.publish('person', json.dumps({"total": total_count})) #Duration is calculated here if present_count < last_count: person_duration = int(time.time() - start_time) # This is to prevent double counting. Higher value to ensure that the app does not get oversensitive# if person_duration > 5: total_count -= 1 client.publish('person/duration', json.dumps({"duration": person_duration})) #if present_count >=4: #print('Alert! Number of people exceeds the limit! Please take necessary action.') client.publish('person', json.dumps({"count": present_count})) last_count = present_count # End if escape key is pressed if pressed_key == 27: break ### Send the frame to the FFMPEG server ### sys.stdout.buffer.write(frame) sys.stdout.flush() ### Write an output image if `single_image_mode` ### if single_image_mode: cv2.imWrite('output_image.jpg', frame) cap.release() cv2.DestroyAllWindows() client.disconnect() infer_network.clean()
def main(): """ Load the network and parse the output. :return: None """ global DELAY global CLIENT global SIG_CAUGHT log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() logger = log.getLogger() render_time = 0 roi_x = args.pointx roi_y = args.pointy roi_w = args.width roi_h = args.height if args.input == 'cam': input_stream = 0 else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) if not cap.isOpened(): logger.error("ERROR! Unable to open video source") sys.exit(1) if input_stream: # Adjust DELAY to match the number of FPS of the video file DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS) # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, 0, args.cpu_extension) ret, frame = cap.read() video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) frame_count = 0 job_id = os.environ['PBS_JOBID'] result_file = open(os.path.join(args.output_dir,'output_'+str(job_id)+'.txt'), "w") progress_file_path = os.path.join(args.output_dir,'i_progress_'+str(job_id)+'.txt') infer_time_start = time.time() while ret: dims = "" ret, next_frame = cap.read() if not ret: break initial_wh = [cap.get(3), cap.get(4)] if next_frame is None: log.error("ERROR! blank FRAME grabbed") break # If either default values or negative numbers are given, # then we will default to start of the FRAME if roi_x <= 0 or roi_y <= 0: roi_x = 0 roi_y = 0 if roi_w <= 0: roi_w = next_frame.shape[1] if roi_h <= 0: roi_h = next_frame.shape[0] key_pressed = cv2.waitKey(int(DELAY)) selected_region = [roi_x, roi_y, roi_w, roi_h] selected_region = [roi_x, roi_y, roi_w, roi_h] x_max1= str(selected_region[0]) x_min1=str(selected_region[0] + selected_region[2]) y_min1=str(selected_region[1] + selected_region[3]) y_max1=str(selected_region[1]) in_frame_fd = cv2.resize(next_frame, (w, h)) # Change data layout from HWC to CHW in_frame_fd = in_frame_fd.transpose((2, 0, 1)) in_frame_fd = in_frame_fd.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() infer_network.exec_net(0, in_frame_fd) # Wait for the result infer_network.wait(0) det_time = time.time() - inf_start # Results of the output layer of the network res = infer_network.get_output(0) # Parse SSD output ssd_out(res, args, initial_wh, selected_region) est = str(render_time * 1000) time1 = round(det_time*1000) Worker = INFO.safe out_list = [str(frame_count), x_min1, y_min1, x_max1, y_max1,str(Worker), est, str(time1)] for i in range(len(out_list)): dims += out_list[i]+' ' dims += '\n' result_file.write(dims) render_start = time.time() render_end = time.time() render_time = render_end - render_start frame_count += 1 if frame_count%10 == 0: progressUpdate(progress_file_path, int(time.time()-infer_time_start), frame_count, video_len) frame = next_frame if key_pressed == 27: print("Attempting to stop background threads") break if args.output_dir is None: cv2.destroyAllWindows() else: total_time = time.time() - infer_time_start with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f: f.write(str(round(total_time, 1))+'\n') f.write(str(frame_count)+'\n') infer_network.clean() cap.release() cv2.destroyAllWindows()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections model = args.model input_mode = args.input device = args.device # Single image flag single_image_input_mode = False start_time = 0 cur_request_id = 0 last_count = 0 total_count = 0 duration = 0 color = (255, 0, 0) temp_dist = 0 tk = 0 ### TODO: Load the model through `infer_network` ### infer_network.load_model(model, device) net_input_shape = infer_network.get_input_shape() n, c, h, w = infer_network.load_model(model, device)[1] log.info("Input Dimensions of the loaded model {}{}{}{}".format( n, c, h, w)) ### TODO: Handle the input stream ### # Live Camera feed if input_mode == 'CAMERA': input_stream = 0 # Single Image elif input_mode.endswith('.jpg') or input_mode.endswith('.bmp'): single_image_input_mode = True input_stream = input_mode else: input_stream = input_mode assert os.path.isfile(input_mode), "Specified input file doesn't exist" try: cap = cv2.VideoCapture(input_stream) except FileNotFoundError: print("Cannot locate input stream file: " + video_file) except Exception as e: print("Unknown error in input stream: ", e) global initial_w, initial_h, prob_threshold # Input frame width and height. width = cap.get(3) height = cap.get(4) prob_threshold = args.prob_threshold ### TODO: Loop until stream is over ### while cap.isOpened(): ### TODO: Read from the video capture ### flag, frame = cap.read() print("coming here") ### TODO: Pre-process the image as needed ### if not flag: break key_pressed = cv2.waitKey(60) log.info("Input frame size:- {}".format(frame.shape)) pro_image = cv2.resize(frame, (w, h)) log.info("resize frame shape:- {}".format(pro_image.shape)) pro_image = pro_image.transpose((2, 0, 1)) log.info("transposing frame:- {}".format(pro_image.shape)) pro_image = pro_image.reshape((n, c, h, w)) log.info("final processed image {}".format(pro_image.shape)) ### TODO: Start asynchronous inference for specified request ### inf_start = time.time() log.info("starting the inference engine") infer_network.exec_net(pro_image) ### TODO: Wait for the result ### if infer_network.wait() == 0: log.info("Coming to infer network result section") det_time = time.time() - inf_start ### TODO: Get the results of the inference request ### result = infer_network.get_output() ### TODO: Extract any desired stats from the results ### out_frame, current_count, dist, tk = draw_masks( result, frame, width, height, temp_dist, tk) # Printing Inference Time inf_time_message = "Inference time: {:.3f}ms".format(det_time * 1000) cv2.putText(out_frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) # Calculate and send relevant information if current_count > last_count: start_time = time.time() total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) if current_count < last_count: duration = int(time.time() - start_time) client.publish("person/duration", json.dumps({"duration": duration})) # Adding overlays to the frame txt2 = "Distance: %d" % dist + " Lost frame: %d" % tk cv2.putText(out_frame, txt2, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) txt2 = "Current count: %d " % current_count cv2.putText(out_frame, txt2, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) if current_count > 3: txt2 = "Alert! Maximum count reached" (text_width, text_height) = cv2.getTextSize(txt2, cv2.FONT_HERSHEY_COMPLEX, 0.5, thickness=1)[0] text_offset_x = 10 text_offset_y = frame.shape[0] - 10 # make the coords of the box with a small padding of two pixels box_coords = ((text_offset_x, text_offset_y + 2), (text_offset_x + text_width, text_offset_y - text_height - 2)) cv2.rectangle(out_frame, box_coords[0], box_coords[1], (0, 0, 0), cv2.FILLED) cv2.putText(out_frame, txt2, (text_offset_x, text_offset_y), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1) client.publish("person", json.dumps({"count": current_count})) # People Count last_count = current_count temp_dist = dist # Display the resulting frame cv2.imshow('Output_Frame', out_frame) # Break if escape key is key_pressed if key_pressed == 27: break ### TODO: Send the frame to the FFMPEG server ### sys.stdout.buffer.write(out_frame) sys.stdout.flush() ### TODO: Write an output image if `single_image_mode` ### if single_image_input_mode: cv2.imwrite('output_image.jpg', out_frame) cap.release() cv2.destroyAllWindows() client.disconnect() infer_network.clean()
def infer_on_stream(args, client): # Initialise the class infer_network = Network() single_img = False start_time = 0 cur_request_id = 0 last_count = 0 total_count = 0 n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, cur_request_id, args.cpu_extension)[1] # Handle the input stream if args.input == 'CAM': args.input = 0 elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_img = True else: assert os.path.isfile(args.input), "Specified input file doesn't exist" cap, width, height = get_stream_source(args.input) # initial setup total_count = 0 duration = 0 color = (255, 0, 0) # Loop until stream is over while cap.isOpened(): # Read from the video capture flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) # Pre-process the image as needed # Start async inference image = preprocess_image(frame, n, c, h, w) # Start asynchronous inference for specified request inf_start = time.time() infer_network.exec_net(cur_request_id, image) # Wait for the result if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start # Get the results of the inference request result = infer_network.get_output(cur_request_id) # Draw Bounting Box frame, current_count = draw_outputs(result, frame, width, height, args.prob_threshold) # Printing Inference Time inf_time_message = "Inference time: {:.3f}ms".format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) # Calculate and send relevant information if current_count > last_count: # New entry start_time = time.time() total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) if current_count < last_count: duration = int(time.time() - start_time) client.publish("person/duration", json.dumps({"duration": duration})) txt2 = "Current count: {}".format(current_count) cv2.putText(frame, txt2, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) if current_count > 5: txt2 = "Alert! Maximum count reached" (text_width, text_height) = cv2.getTextSize(txt2, cv2.FONT_HERSHEY_COMPLEX, 0.5, thickness=1)[0] text_offset_x = 10 text_offset_y = frame.shape[0] - 10 # make the coords of the box with a small padding of two pixels box_coords = ((text_offset_x, text_offset_y + 2), (text_offset_x + text_width, text_offset_y - text_height - 2)) cv2.rectangle(frame, box_coords[0], box_coords[1], (0, 0, 0), cv2.FILLED) cv2.putText(frame, txt2, (text_offset_x, text_offset_y), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1) client.publish("person", json.dumps({"count": current_count})) last_count = current_count if key_pressed == 27: break # Send the frame to the FFMPEG server sys.stdout.buffer.write(frame) sys.stdout.flush() # Save the Image if single_img: cv2.imwrite('output_image.jpg', frame) cap.release() cv2.destroyAllWindows() client.disconnect() infer_network.clean()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold ### Load the model through `infer_network` ### log.info("Loading the model through Inference Engine...") infer_network.load_model(args.model, args.device, args.cpu_extension) net_input_shape = infer_network.get_input_shape() ### Handle the input stream ### # Set flag for the input image single_image_mode = False # Checks for live feed if args.input == 'CAM': input_stream = 0 # Checks for input image elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_image_mode = True input_stream = args.input # Checks for video file else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) if input_stream: cap.open(args.input) if not cap.isOpened(): log.error("ERROR! Unable to open video source") width = int(cap.get(3)) height = int(cap.get(4)) ## Define and set global variables global had_found global total_count duration = 0 ### Loop until stream is over ### while cap.isOpened(): ### Read from the video capture ### # get return value and frame retval, frame = cap.read() if not retval: break key_pressed = cv2.waitKey(60) #wait for 60 ms ### Pre-process the image as needed ### pr_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) pr_frame = pr_frame.transpose( (2, 0, 1)) #transpose layout from HWC to CHW pr_frame = pr_frame.reshape(1, *pr_frame.shape) ### Start asynchronous inference for specified request ### inf_start = time.time() infer_network.exec_net(pr_frame) ### Wait for the result ### if infer_network.wait() == 0: det_time = time.time() - inf_start ### Get the results of the inference request ### result = infer_network.get_output() ### Extract any desired stats from the results ### # get and draw the bounding box for person frame, p_counts = count_draw(frame, result, args, width, height) ### Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### # get unique class from the frame # because our scenario is one person entering frame and exiting at a time. unique_classes = get_uclasses(result, width, height) # check if the ID (15) of persons enters frame (present) # and use function get_total to calculate and get total count and duration if 15 in unique_classes: total_count, duration = get_total() # check if the ID (15) of persons exits the frame (absent) # set alreadyFound to False if person already counted and already found # and publish the person/duration to MQTT Server with duration and total count elif 15 not in unique_classes and had_counted == True and had_found == True: client.publish("person/duration", json.dumps({"duration": duration})) had_found = False # otherwise, set alreadyFound to false else: if had_found == True: log.info("Person counted already...") had_found = False # Draw performance stats on the frame total_message = "The Total Count: {}".format(total_count) current_message = "The Current Count: {}".format(p_counts) duration_message = "Duration in Frame: {} sec".format(duration) inf_time_message = "Inference time: {:.3f}ms".format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) cv2.putText(frame, current_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) cv2.putText(frame, total_message, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) cv2.putText(frame, duration_message, (15, 60), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) #Publish to MQTT Server client.publish("person", json.dumps({"count": p_counts})) ### Send the frame to the FFMPEG server ### sys.stdout.buffer.write(frame) sys.stdout.flush() ### Write an output image if `single_image_mode` ### if single_image_mode: cv2.imwrite('output_image.jpg', frame) infer_network.clean() # Release the capture and destroy any OpenCV windows cap.release() cv2.destroyAllWindows() ### TODO: Disconnect from MQTT client.loop_stop() client.disconnect()
def main(): """ Load the network and parse the SSD output. :return: None """ # Connect to the MQTT server client = mqtt.Client() client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL) args = build_argparser().parse_args() total_count = 0 last_count = 0 start_time = 0 request_id = 0 # Initialize the Inference Engine infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold infer_network.load_model(args.model, args.device, num_requests=0) n, c, h, w = infer_network.get_input_shape() if args.input == "CAM": input_stream = 0 else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" try: cap = cv2.VideoCapture(args.input) except FileNotFoundError: print("Cannot locate video file: " + args.input) except Exception as e: print("Something else went wrong with the video file: ", e) if input_stream: cap.open(args.input) if not cap.isOpened(): log.error("Can't to open video source") prob_threshold = args.prob_threshold cap_w = cap.get(3) cap_h = cap.get(4) while cap.isOpened(): flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) img = cv2.resize(frame, (w, h)) img = img.transpose((2, 0, 1)) img = img.reshape((n, c, h, w)) inf_start = time.time() infer_network.exec_net(img, request_id=0) if infer_network.wait(request_id) == 0: det_time = time.time() - inf_start result = infer_network.get_output(request_id) current_count = 0 for obj in result[0][0]: # Draw bounding box for object when it's probability is more than # the specified threshold if obj[2] > prob_threshold: xmin = int(obj[3] * cap_w) ymin = int(obj[4] * cap_h) xmax = int(obj[5] * cap_w) ymax = int(obj[6] * cap_h) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (225, 225, 225), 1) current_count = current_count + 1 inf_time_message = "Inference time: {:.3f}ms" \ .format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) if current_count > last_count: start_time = time.time() total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) if current_count < last_count: duration = int(time.time() - start_time) client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": current_count})) last_count = current_count if key_pressed == 27: break sys.stdout.buffer.write(frame) sys.stdout.flush() cap.release() cv2.destroyAllWindows() client.disconnect() infer_network.clean()
def main(): """ Load the network and parse the SSD output. :return: None """ args = build_argparser().parse_args() # Flag for the input image single_image_mode = False total_count = 0 cur_request_id = 0 last_count = 0 total_count = 0 start_time = 0 # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, cur_request_id, args.cpu_extension) # Checks for live feed #if args.input == 'CAM': #input_stream = 0 # Checks for input image if args.input.endswith('.jpg') or args.input.endswith('.bmp'): single_image_mode = True input_stream = args.input # Checks for video file else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) frame_count = 0 job_id = os.environ['PBS_JOBID'] job_id = job_id.rstrip().split('.')[0] progress_file_path = os.path.join(args.output_dir, str(job_id), 'i_progress.txt') infer_time_start = time.time() if input_stream: cap.open(args.input) if not cap.isOpened(): log.error("ERROR! Unable to open video source") global initial_w, initial_h, prob_threshold prob_threshold = args.prob_threshold initial_w = cap.get(3) initial_h = cap.get(4) people_counter = cv2.VideoWriter( os.path.join(args.output_dir, str(job_id), "people_counter.mp4"), cv2.VideoWriter_fourcc(*"AVC1"), fps, (int(initial_w), int(initial_h)), True) while cap.isOpened(): flag, frame = cap.read() frame_count += 1 if not flag: break # Start async inference image = cv2.resize(frame, (w, h)) # Change data layout from HWC to CHW image = image.transpose((2, 0, 1)) image = image.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() infer_network.exec_net(cur_request_id, image) # Wait for the result if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start # Results of the output layer of the network result = infer_network.get_output(cur_request_id) if args.perf_counts: perf_count = infer_network.performance_counter(cur_request_id) performance_counts(perf_count) frame, current_count = ssd_out(frame, result) inf_time_message = "Inference time: {:.3f}ms"\ .format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) current_count_message = "Current count: {}"\ .format(current_count) cv2.putText(frame, current_count_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) last_count = current_count people_counter.write(frame) if frame_count % 10 == 0 or frame_count % video_len == 0: print("frame_count: {}, video_len: {}".format( frame_count, video_len)) progressUpdate(progress_file_path, int(time.time() - infer_time_start), frame_count, video_len) if single_image_mode: cv2.imwrite('output_image.jpg', frame) if args.output_dir: total_time = round(time.time() - infer_time_start, 2) stats = {} stats['time'] = str(total_time) stats['frames'] = str(frame_count) stats['fps'] = str(round(frame_count / total_time, 2)) with open(os.path.join(args.output_dir, str(job_id), 'stats.json'), 'w') as f: json.dump(stats, f) cap.release() infer_network.clean()
def main(): """ Load the network and parse the output. :return: None """ global DELAY global CLIENT global SIG_CAUGHT global KEEP_RUNNING CLIENT = mqtt.Client() CLIENT.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL) CLIENT.subscribe(TOPIC) log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() logger = log.getLogger() render_time = 0 roi_x = args.pointx roi_y = args.pointy roi_w = args.width roi_h = args.height if args.input == 'cam': input_stream = 0 else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) if not cap.isOpened(): logger.error("ERROR! Unable to open video source") sys.exit(1) if input_stream: # Adjust DELAY to match the number of FPS of the video file DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS) # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, 0, args.cpu_extension) message_thread = Thread(target=message_runner, args=()) message_thread.setDaemon(True) message_thread.start() ret, frame = cap.read() while ret: ret, next_frame = cap.read() if not ret: KEEP_RUNNING = False break initial_wh = [cap.get(3), cap.get(4)] if next_frame is None: KEEP_RUNNING = False log.error("ERROR! blank FRAME grabbed") break # If either default values or negative numbers are given, # then we will default to start of the FRAME if roi_x <= 0 or roi_y <= 0: roi_x = 0 roi_y = 0 if roi_w <= 0: roi_w = next_frame.shape[1] if roi_h <= 0: roi_h = next_frame.shape[0] key_pressed = cv2.waitKey(int(DELAY)) # 'c' key pressed if key_pressed == 99: # Give operator chance to change the area # Select rectangle from left upper corner, dont display crosshair ROI = cv2.selectROI("Assembly Selection", frame, True, False) print("Assembly Area Selection: -x = {}, -y = {}, -w = {}," " -h = {}".format(ROI[0], ROI[1], ROI[2], ROI[3])) roi_x = ROI[0] roi_y = ROI[1] roi_w = ROI[2] roi_h = ROI[3] cv2.destroyAllWindows() cv2.rectangle(frame, (roi_x, roi_y), (roi_x + roi_w, roi_y + roi_h), (0, 0, 255), 2) selected_region = [roi_x, roi_y, roi_w, roi_h] in_frame_fd = cv2.resize(next_frame, (w, h)) # Change data layout from HWC to CHW in_frame_fd = in_frame_fd.transpose((2, 0, 1)) in_frame_fd = in_frame_fd.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() infer_network.exec_net(0, in_frame_fd) # Wait for the result infer_network.wait(0) det_time = time.time() - inf_start # Results of the output layer of the network res = infer_network.get_output(0) # Parse SSD output ssd_out(res, args, initial_wh, selected_region) # Draw performance stats inf_time_message = "Inference time: {:.3f} ms".format(det_time * 1000) render_time_message = "OpenCV rendering time: {:.3f} ms". \ format(render_time * 1000) if not INFO.safe: warning = "HUMAN IN ASSEMBLY AREA: PAUSE THE MACHINE!" cv2.putText(frame, warning, (15, 80), cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 0, 255), 2) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, render_time_message, (15, 35), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, "Worker Safe: {}".format(INFO.safe), (15, 55), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) render_start = time.time() cv2.imshow("Restricted Zone Notifier", frame) render_end = time.time() render_time = render_end - render_start frame = next_frame if key_pressed == 27: print("Attempting to stop background threads") KEEP_RUNNING = False break infer_network.clean() message_thread.join() cap.release() cv2.destroyAllWindows() CLIENT.disconnect()
def main(): args = build_argparser().parse_args() account_name = args.account_name account_key = args.account_key if account_name is "" or account_key is "": print("Invalid account name or account key!") sys.exit(1) elif account_name is not None and account_key is None: print("Please provide account key using -ak option!") sys.exit(1) elif account_name is None and account_key is not None: print("Please provide account name using -an option!") sys.exit(1) elif account_name is None and account_key is None: upload_azure = 0 else: print("Uploading the results to Azure storage \"" + account_name + "\"") upload_azure = 1 create_cloud_container(account_name, account_key) #if args.input == 'cam': #input_stream = 0 #else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) if cap is None or not cap.isOpened(): print('Warning: unable to open video source: ', args.input) sys.exit(1) # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, 0, args.cpu_extension) print("To stop the execution press Esc button") initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) store_aisle = cv2.VideoWriter( os.path.join(args.output_dir, "store_aisle.mp4"), cv2.VideoWriter_fourcc(*'avc1'), fps, (initial_w, initial_h), True) job_id = os.environ['PBS_JOBID'] progress_file_path = os.path.join(args.output_dir, 'i_progress_' + str(job_id) + '.txt') infer_time_start = time.time() frame_count = 1 ret, frame = cap.read() while cap.isOpened(): ret, next_frame = cap.read() if not ret: break frame_count = frame_count + 1 in_frame = cv2.resize(next_frame, (w, h)) # Change data layout from HWC to CHW in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() infer_network.exec_net(0, in_frame) # Wait for the result infer_network.wait(0) det_time = time.time() - inf_start people_count = 0 # Results of the output layer of the network res = infer_network.get_output(0) for obj in res[0][0]: # Draw only objects when probability more than specified threshold if obj[2] > args.prob_threshold: xmin = int(obj[3] * initial_w) ymin = int(obj[4] * initial_h) xmax = int(obj[5] * initial_w) ymax = int(obj[6] * initial_h) class_id = int(obj[1]) # Draw bounding box color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255)) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2) people_count = people_count + 1 people_count_message = "People Count : " + str(people_count) inf_time_message = "Inference time: {:.3f} ms".format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 25), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) cv2.putText(frame, people_count_message, (15, 65), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) store_aisle.write(frame) time_interval = MULTIPLICATION_FACTOR * fps if frame_count % time_interval == 0: apply_time_stamp_and_save(frame, people_count, upload_azure) if frame_count % 10 == 0: progressUpdate(progress_file_path, int(time.time() - infer_time_start), frame_count, video_len) frame = next_frame if args.output_dir: total_time = time.time() - infer_time_start with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f: f.write(str(round(total_time, 1)) + '\n') f.write(str(frame_count) + '\n') cap.release() infer_network.clean()
def main(): model_xml = (os.environ["MODEL"]) input_source = (os.environ["INPUT"]) device = os.environ['DEVICE'] if 'DEVICE' in os.environ.keys() else 'CPU' cpu_extension = os.environ[ 'CPU_EXTENSION'] if 'CPU_EXTENSION' in os.environ.keys() else None try: # Probability threshold for detections filtering prob_threshold = float(os.environ['PROB_THRESHOLD']) except KeyError: prob_threshold = 0.5 try: # Specify the azure storage name to upload results to cloud. account_name = os.environ['ACCOUNT_NAME'] except: account_name = None try: # Specify the azure storage key to upload results to cloud. account_key = os.environ['ACCOUNT_KEY'] except: account_key = None if account_name is "" or account_key is "": print("Invalid account name or account key!") sys.exit(1) elif account_name is not None and account_key is None: print("Please provide account key using -ak option!") sys.exit(1) elif account_name is None and account_key is not None: print("Please provide account name using -an option!") sys.exit(1) elif account_name is None and account_key is None: upload_azure = 0 else: print("Uploading the results to Azure storage \"" + account_name + "\"") upload_azure = 1 create_cloud_container(account_name, account_key) if input_source == 'cam': input_stream = 0 else: input_stream = input_source assert os.path.isfile( input_source), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) if cap is None or not cap.isOpened(): print('Warning: unable to open video source: ', input_source) sys.exit(1) # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(model_xml, device, 1, 1, 0, cpu_extension) print("To stop the execution press Esc button") initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) frame_count = 1 accumulated_image = np.zeros((initial_h, initial_w), np.uint8) mog = cv2.createBackgroundSubtractorMOG2() ret, frame = cap.read() while cap.isOpened(): ret, next_frame = cap.read() if not ret: break frame_count = frame_count + 1 in_frame = cv2.resize(next_frame, (w, h)) # Change data layout from HWC to CHW in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() infer_network.exec_net(0, in_frame) # Wait for the result infer_network.wait(0) det_time = time.time() - inf_start people_count = 0 # Converting to Grayscale gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Remove the background fgbgmask = mog.apply(gray) # Thresholding the image thresh = 2 max_value = 2 threshold_image = cv2.threshold(fgbgmask, thresh, max_value, cv2.THRESH_BINARY)[1] # Adding to the accumulated image accumulated_image = cv2.add(threshold_image, accumulated_image) colormap_image = cv2.applyColorMap(accumulated_image, cv2.COLORMAP_HOT) # Results of the output layer of the network res = infer_network.get_output(0) for obj in res[0][0]: # Draw only objects when probability more than specified threshold if obj[2] > prob_threshold: xmin = int(obj[3] * initial_w) ymin = int(obj[4] * initial_h) xmax = int(obj[5] * initial_w) ymax = int(obj[6] * initial_h) class_id = int(obj[1]) # Draw bounding box color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255)) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2) people_count = people_count + 1 people_count_message = "People Count : " + str(people_count) inf_time_message = "Inference time: {:.3f} ms".format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 25), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) cv2.putText(frame, people_count_message, (15, 65), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) final_result_overlay = cv2.addWeighted(frame, P_COUNT_FRAME_WEIGHTAGE, colormap_image, COLORMAP_FRAME_WEIGHTAGE_1, 0) cv2.imshow("Detection Results", final_result_overlay) time_interval = MULTIPLICATION_FACTOR * fps if frame_count % time_interval == 0: apply_time_stamp_and_save(final_result_overlay, people_count, upload_azure) frame = next_frame key = cv2.waitKey(1) if key == 27: break cap.release() cv2.destroyAllWindows() infer_network.clean()
def main(): """ Load the network and parse the output. :return: None """ get_args() prevReq = 0 currReq = 1 prevVideo = None vid_finished = [False] * len(videos) min_FPS = min([videos[i][1].video.get(cv2.CAP_PROP_FPS) for i in range(len(videos))]) wait_time = int(round(1000 / min_FPS / len(videos))) # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer batch_size, channels, model_height, model_width = \ infer_network.load_model(conf_modelLayers, targetDevice, 1, 1, 2, cpu_extension) while True: for index, currVideo in videos: # Read image from video/cam vfps = int(round(currVideo.video.get(cv2.CAP_PROP_FPS))) for i in range(0, int(round(vfps / min_FPS))): ret, current_img = currVideo.video.read() if not ret: vid_finished[index] = True break if vid_finished[index]: stream_end_frame = np.zeros((int(currVideo.height), int(currVideo.width), 1), dtype='uint8') cv2.putText(stream_end_frame, "Input file {} has ended".format (name_of_videos[index][1].split('/')[-1]) , (10, int(currVideo.height/2)), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) cv2.imshow(currVideo.name, stream_end_frame) continue # Transform image to model input rsImg = cv2.resize(current_img, (model_width, model_height)) rsImg = rsImg.transpose((2, 0, 1)) rsImg = rsImg.reshape( (batch_size, channels, model_height, model_width)) infer_start_time = datetime.datetime.now() # Infer current image infer_network.exec_net(currReq, rsImg) # Wait for previous request to end if infer_network.wait(prevReq) == 0: infer_end_time = (datetime.datetime.now() - infer_start_time) * 1000 in_frame_workers = [] people = 0 result = infer_network.get_output(prevReq) # Filter output for obj in result[0][0]: if obj[2] > conf_inferConfidenceThreshold: xmin = int(obj[3] * prevVideo.width) ymin = int(obj[4] * prevVideo.height) xmax = int(obj[5] * prevVideo.width) ymax = int(obj[6] * prevVideo.height) ymin = ymin - int(padding * (ymax - ymin)) in_frame_workers.append((xmin, ymin, xmax, ymax)) people += 1 violations = detect_workers(in_frame_workers, previous_img) # Check if detected violations equals previous frames if violations == prevVideo.currentViolationCount: prevVideo.currentViolationCountConfidence += 1 # If frame threshold is reached, change validated count if prevVideo.currentViolationCountConfidence == conf_inFrameViolationsThreshold: # If another violation occurred, save image if prevVideo.currentViolationCount > prevVideo.prevViolationCount: prevVideo.totalViolations += ( prevVideo.currentViolationCount - prevVideo.prevViolationCount) prevVideo.prevViolationCount = prevVideo.currentViolationCount else: prevVideo.currentViolationCountConfidence = 0 prevVideo.currentViolationCount = violations # Check if detected people count equals previous frames if people == prevVideo.currentPeopleCount: prevVideo.currentPeopleCountConfidence += 1 # If frame threshold is reached, change validated count if prevVideo.currentPeopleCountConfidence == conf_inFrameViolationsThreshold: prevVideo.currentTotalPeopleCount += ( prevVideo.currentPeopleCount - prevVideo.prevPeopleCount) if prevVideo.currentTotalPeopleCount > prevVideo.prevPeopleCount: prevVideo.totalPeopleCount += prevVideo.currentTotalPeopleCount - prevVideo.prevPeopleCount prevVideo.prevPeopleCount = prevVideo.currentPeopleCount else: prevVideo.currentPeopleCountConfidence = 0 prevVideo.currentPeopleCount = people frame_end_time = datetime.datetime.now() cv2.putText(previous_img, 'Total people count: ' + str( prevVideo.totalPeopleCount), (10, prevVideo.height - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.putText(previous_img, 'Current people count: ' + str( prevVideo.currentTotalPeopleCount), (10, prevVideo.height - 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.putText(previous_img, 'Total violation count: ' + str( prevVideo.totalViolations), (10, prevVideo.height - 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.putText(previous_img, 'FPS: %0.2fs' % (1 / ( frame_end_time - prevVideo.frame_start_time).total_seconds()), (10, prevVideo.height - 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.putText(previous_img, 'Inference time: {}ms'.format((infer_end_time).total_seconds()), (10, prevVideo.height - 130), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.imshow(prevVideo.name, previous_img) prevVideo.frame_start_time = datetime.datetime.now() # Swap currReq, prevReq = prevReq, currReq previous_img = current_img prevVideo = currVideo # Exit if ESC key is pressed if cv2.waitKey(wait_time) == 27: print("Attempting to stop input files") break if False not in vid_finished: break infer_network.clean() cv2.destroyAllWindows()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold # Flag for the input image single_image_mode = False cur_request_id = 0 last_count = 0 total_count = 0 start_time = 0 ### TODO: Load the model through `infer_network` ### n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, cur_request_id, args.cpu_extension)[1] ### TODO: Handle the input stream ### if args.input == 'CAM': input_stream = 0 # Checks for input image elif args.input.endswith('.jpg') or args.input.endswith('.bmp') : single_image_mode = True input_stream = args.input # Checks for video file else: input_stream = args.input assert os.path.isfile(args.input), "Input file doesn't exist" cap = cv2.VideoCapture(input_stream) if input_stream: cap.open(args.input) if not cap.isOpened(): log.error("ERROR! Unable to open video source") global initial_w, initial_h, prob_threshold prob_threshold = args.prob_threshold initial_w = cap.get(3) initial_h = cap.get(4) lagtime = 0 path_out = './output.avi' #fourcc = cv2.VideoWriter_fourcc(*"mp4v") fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(path_out, fourcc, 24.0, (768,432)) ### TODO: Loop until stream is over ### while cap.isOpened(): ### TODO: Read from the video capture ### flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) ### TODO: Pre-process the image as needed ### image = pre_process_image(frame, n, c, h, w) ### TODO: Start asynchronous inference for specified request ### inf_start = time.time() infer_network.exec_net(cur_request_id, image) ### TODO: Wait for the result ### if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start ### TODO: Get the results of the inference request ### result = infer_network.get_output(cur_request_id) ### TODO: Extract any desired stats from the results ### if args.perf_counts: perf_count = infer_network.performance_counter(cur_request_id) performance_counts(perf_count) frame, current_count = ssd_out(frame, result) inf_time_message = "Inference time: {:.3f}ms"\ .format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) ### TODO: Calculate and send relevant information on ### client.publish(inf_time_message) # write new frame #print("frame size : ", frame.shape[1] ", " frame.shape[0]) #out.write(frame) ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### ### Topic "person/duration": key of "duration" ### if current_count < last_count: duration = int(time.time() - start_time) if duration > 0: # Publish messages to the MQTT server client.publish("person/duration", json.dumps({"duration": duration + lagtime})) else: lagtime += 1 log.warning(lagtime) client.publish("person", json.dumps({"count": current_count})) last_count = current_count if key_pressed == 27: break # Send frame to the ffmpeg server sys.stdout.buffer.write(frame) sys.stdout.flush() ### TODO: Send the frame to the FFMPEG server ### ### TODO: Write an output image if `single_image_mode` ### if single_image_mode: cv2.imwrite('output_image.jpg', frame) #out.release() cap.release() cv2.destroyAllWindows() client.disconnect() infer_network.clean()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class plugin = Network() client = connect_mqtt() # Set Probability threshold for detections prob_threshold = args.prob_threshold single_image_mode = False cur_request_id = 0 last_count = 0 total_count = 0 start_time = 0 current_count = 0 #Load the model through `infer_network` plugin.load_model(args.model,args.cpu_extension,args.device,cur_request_id) net_input_shape = plugin.get_input_shape() #Handle the input stream if args.input == 'CAM': input_stream = 0 elif args.input.endswith('.bmp') or args.input.endswith('.jpg'): single_image_mode = True input_stream = args.input else: input_stream = args.input cap = cv2.VideoCapture(input_stream) cap.open(input_stream) if not cap.isOpened(): log.warning("Unable to open video source") # Grab the shape of the input w = int(cap.get(3)) h = int(cap.get(4)) in_shape = net_input_shape['image_tensor'] #Loop until stream is over while(cap.isOpened()): #Read from the video capture ret, frame = cap.read() if not ret: break key_pressed = cv2.waitKey(60) #Pre-process the image as needed p_frame = cv2.resize(frame, (in_shape[3], in_shape[2])) p_frame = p_frame.transpose((2,0,1)) p_frame = p_frame.reshape(1, *p_frame.shape) #Start asynchronous inference for specified request net_input = {'image_tensor': p_frame,'image_info': p_frame.shape[1:]} plugin.exec_net(net_input,cur_request_id) #TODO: Wait for the result inf_start = time.time() if plugin.wait(cur_request_id) == 0: det_time = time.time() - inf_start #Get the results of the inference request start_time = time.time() result = plugin.get_output() end_time = time.time() log.warning("Elapsed Time:", end_time-start_time) #Extract any desired stats from the results current_count = 0 for obj in result[0][0]: # Draw bounding box for object when it's probability is more than # the specified threshold if obj[2] > prob_threshold: xmin = int(obj[3] * w) ymin = int(obj[4] * h) xmax = int(obj[5] * w) ymax = int(obj[6] * h) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) current_count = current_count + 1 inf_time_message = "Inference time: {:.3f}ms"\ .format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) #Calculate and send relevant information on #current_count, total_count and duration to the MQTT server #Topic "person": keys of "count" and "total" # When new person enters the video if current_count > last_count: start_time = time.time() total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) # Person duration in the video is calculated if current_count < last_count: duration = int(time.time() - start_time) # Publish messages to the MQTT server client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": current_count})) last_count = current_count if key_pressed == 27: break #Send the frame to the FFMPEG server sys.stdout.buffer.write(frame) sys.stdout.flush() #Write an output image if `single_image_mode` if single_image_mode: cv2.imwrite('output_image.jpg', frame) cap.release() cv2.destroyAllWindows() client.disconnect() plugin.clean()
def main(): global CONFIG_FILE global is_async_mode global CONFIDENCE global POSE_CHECKED global INFO global COUNTER global ALARM_ON global yawns global yawn_status global EYE_AR_CONSEC_FRAMES global ear global leftEye global rightEye args = build_argparser().parse_args() try: CONFIDENCE = float(os.environ['CONFIDENCE']) except: CONFIDENCE = 0.5 assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format( CONFIG_FILE) config = json.loads(open(CONFIG_FILE).read()) for idx, item in enumerate(config['inputs']): if item['video'].isdigit(): input_stream = int(item['video']) cap = cv2.VideoCapture(input_stream) if not cap.isOpened(): print("\nCamera not plugged in... Exiting...\n") sys.exit(0) else: input_stream = item['video'] cap = cv2.VideoCapture(input_stream) if not cap.isOpened(): print("\nUnable to open video file... Exiting...\n") sys.exit(0) fps = cap.get(cv2.CAP_PROP_FPS) if args.flag == "async": is_async_mode = True print('Application running in async mode') else: is_async_mode = False print('Application running in sync mode') # Initialise the class infer_network = Network() infer_network_pose = Network() # Load the network to IE plugin to get shape of input layer plugin, (n_fd, c_fd, h_fd, w_fd) = infer_network.load_model(args.modelface, args.device, 1, 1, 2, args.cpu_extension) n_hp, c_hp, h_hp, w_hp = infer_network_pose.load_model( args.modelpose, args.device, 1, 3, 2, args.cpu_extension, plugin)[1] print("To stop the execution press Esc button") initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file. out = cv2.VideoWriter(CWD + '/output_snapshots/outpy.mp4', 0x00000021, 10, (initial_w, initial_h)) frame_count = 1 #ret, frame = cap.read() cur_request_id = 0 next_request_id = 1 while cap.isOpened(): looking = 0 ret, frame = cap.read() start_time = time.time() if not ret: break frame_count = frame_count + 1 initial_wh = [cap.get(3), cap.get(4)] in_frame = cv2.resize(frame, (w_fd, h_fd)) # Change data layout from HWC to CHW in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n_fd, c_fd, h_fd, w_fd)) # Start asynchronous inference for specified request. inf_start = time.time() if is_async_mode: infer_network.exec_net(next_request_id, in_frame) else: infer_network.exec_net(cur_request_id, in_frame) # Wait for the result if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start people_count = 0 # Converting to Grayscale gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) #Start region drowsiness detect # detect faces in the grayscale frame rects = detector(gray, 0) # loop over the face detections for rect in rects: shape = predictor(gray, rect) shape = face_utils.shape_to_np(shape) leftEye = shape[lStart:lEnd] rightEye = shape[rStart:rEnd] leftEAR = eye_aspect_ratio(leftEye) rightEAR = eye_aspect_ratio(rightEye) ear = (leftEAR + rightEAR) / 2.0 leftEyeHull = cv2.convexHull(leftEye) #print(leftEyeHull, leftEyeHull.dtype) rightEyeHull = cv2.convexHull(rightEye) cv2.drawContours(frame, [leftEyeHull], -1, (0, 255, 0), 1) cv2.drawContours(frame, [rightEyeHull], -1, (0, 255, 0), 1) #to calculate yawn mouth = shape[mStart:mEnd] for (x, y) in mouth: cv2.circle(frame, (x, y), 1, (0, 0, 255), -1) frame, lip_distance = mouth_open(frame) prev_yawn_status = yawn_status if ear < EYE_AR_THRESH: COUNTER += 1 # if the eyes were closed for a sufficient number of times then sound the alarm if COUNTER >= EYE_AR_CONSEC_FRAMES: # if the alarm is not on, turn it on if not ALARM_ON: ALARM_ON = True # check to see if an alarm file was supplied, # and if so, start a thread to have the alarm # sound played in the background if alarm != "": t = Thread(target=sound_alarm, args=(alarm, )) t.deamon = True t.start() else: COUNTER = 0 ALARM_ON = False if lip_distance > MOUTH_OPEN_THRESH: yawn_status = True else: yawn_status = False if prev_yawn_status == True and yawn_status == False: yawns += 1 #end region drowsiness # Results of the output layer of the network res = infer_network.get_output(cur_request_id) # Parse face detection output faces = face_detection(res, initial_wh) if len(faces) != 0: # Look for poses for res_hp in faces: xmin, ymin, xmax, ymax = res_hp head_pose = frame[ymin:ymax, xmin:xmax] in_frame_hp = cv2.resize(head_pose, (w_hp, h_hp)) in_frame_hp = in_frame_hp.transpose((2, 0, 1)) in_frame_hp = in_frame_hp.reshape((n_hp, c_hp, h_hp, w_hp)) inf_start_hp = time.time() infer_network_pose.exec_net(0, in_frame_hp) infer_network_pose.wait(0) det_time_hp = time.time() - inf_start_hp # Parse head pose detection results angle_p_fc = infer_network_pose.get_output(0, "angle_p_fc") angle_y_fc = infer_network_pose.get_output(0, "angle_y_fc") angle_r_fc = infer_network_pose.get_output(0, "angle_r_fc") if ((angle_y_fc > -22.5) & (angle_y_fc < 22.5) & (angle_p_fc > -22.5) & (angle_p_fc < 22.5) & (angle_r_fc > -22.5) & (angle_r_fc < 22.5)): looking += 1 POSE_CHECKED = True INFO = INFO._replace(looker=looking) #print("Subject is looking") INFO = INFO._replace( msg= "Looking Staright, you are doing great ! Keep it up!" ) else: INFO = INFO._replace(looker=looking) #print("Subject is not looking") INFO = INFO._replace(msg="WATCH THE ROAD!") else: INFO = INFO._replace(looker=0) time_interval = MULTIPLICATION_FACTOR * fps if frame_count % time_interval == 0: (frame, people_count) #frame = next_frame if is_async_mode: cur_request_id, next_request_id = next_request_id, cur_request_id #print("FPS : {}".format(1/(time.time() - start_time))) # Draw performance stats inf_time_message = "Face Inference time: N\A for async mode" if is_async_mode else \ "Inference time: {:.3f} ms".format(det_time * 1000) head_inf_time_message = "Head pose Inference time: N\A for async mode" if is_async_mode else \ "Inference time: {:.3f} ms".format(det_time_hp * 1000) cv2.putText(frame, head_inf_time_message, (0, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) log_message = "Async mode is on." if is_async_mode else \ "Async mode is off." cv2.putText(frame, log_message, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, inf_time_message, (0, 35), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, "Driver: {}".format(INFO.driver), (0, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) cv2.putText(frame, INFO.msg, (75, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) output_text = " Yawn frame Count: " + str(yawns) cv2.putText(frame, output_text, (0, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (230, 0, 0), 2) if yawn_status == True: cv2.putText(frame, "Driver is Yawning!! BE AWAKE!!", (0, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) if COUNTER >= EYE_AR_CONSEC_FRAMES: cv2.putText(frame, "Drowsiness Alert!!", (400, 35), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) cv2.putText(frame, " Eye Aspect Ratio(EAR): {:.2f}".format(ear), (400, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) else: cv2.putText(frame, "Driver is Awake!! ", (400, 35), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.putText(frame, "Eye Aspect Ratio(EAR): {:.2f}".format(ear), (400, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) if ALARM_ON == True: cv2.putText(frame, "BE AWAKE!! Alarm ON", (0, 170), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) else: cv2.putText(frame, "Alarm OFF", (0, 170), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow("Detection Results", frame) # Write the frame into the file 'output.avi' out.write(frame) # Frames are read at an interval of 1 millisecond key = cv2.waitKey(1) if key == 27: break cap.release() cv2.destroyAllWindows() infer_network.clean() infer_network_pose.clean()
def main(): global CONFIG_FILE global is_async_mode args = build_argparser().parse_args() assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format( CONFIG_FILE) config = json.loads(open(CONFIG_FILE).read()) for idx, item in enumerate(config['inputs']): if item['video'].isdigit(): input_stream = int(item['video']) cap = cv2.VideoCapture(input_stream) if not cap.isOpened(): print("\nCamera not plugged in... Exiting...\n") sys.exit(0) else: input_stream = item['video'] cap = cv2.VideoCapture(input_stream) if not cap.isOpened(): print("\nUnable to open video file... Exiting...\n") sys.exit(0) fps = cap.get(cv2.CAP_PROP_FPS) if args.flag == "async": is_async_mode = True print('Application running in async mode') else: is_async_mode = False print('Application running in sync mode') # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, 2, args.cpu_extension)[1] print("To stop the execution press Esc button") initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_count = 1 accumulated_image = np.zeros((initial_h, initial_w), np.uint8) mog = cv2.createBackgroundSubtractorMOG2() ret, frame = cap.read() cur_request_id = 0 next_request_id = 1 while cap.isOpened(): ret, next_frame = cap.read() start_time = time.time() if not ret: break frame_count = frame_count + 1 in_frame = cv2.resize(next_frame, (w, h)) # Change data layout from HWC to CHW in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() if isasyncmode: infer_network.exec_net(next_request_id, in_frame) else: infer_network.exec_net(cur_request_id, in_frame) # Wait for the result if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start people_count = 0 # Converting to Grayscale gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Remove the background fgbgmask = mog.apply(gray) # Thresholding the image thresh = 2 max_value = 2 threshold_image = cv2.threshold(fgbgmask, thresh, max_value, cv2.THRESH_BINARY)[1] # Adding to the accumulated image accumulated_image = cv2.add(threshold_image, accumulated_image) colormap_image = cv2.applyColorMap(accumulated_image, cv2.COLORMAP_HOT) # Results of the output layer of the network res = infer_network.get_output(cur_request_id) for obj in res[0][0]: # Draw only objects when probability more than specified threshold if obj[2] > args.prob_threshold: xmin = int(obj[3] * initial_w) ymin = int(obj[4] * initial_h) xmax = int(obj[5] * initial_w) ymax = int(obj[6] * initial_h) class_id = int(obj[1]) # Draw bounding box color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255)) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2) people_count = people_count + 1 people_count_message = "People Count : " + str(people_count) cv2.putText(frame, people_count_message, (15, 65), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) cv2.imshow("Detection Results", frame) time_interval = MULTIPLICATION_FACTOR * fps if frame_count % time_interval == 0: apply_time_stamp_and_save(frame, people_count) frame = next_frame if isasyncmode: cur_request_id, next_request_id = next_request_id, cur_request_id print("FPS : {}".format(1 / (time.time() - start_time))) # Frames are read at an interval of 1 millisecond key = cv2.waitKey(1) if key == 27: break cap.release() cv2.destroyAllWindows() infer_network.clean()
def main(): """ Load the network and parse the SSD output. :return: None """ # Connect to the MQTT server client = mqtt.Client() client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL) client.subscribe(TOPIC) args = build_argparser().parse_args() # Flag for the input image single_image_mode = False cur_request_id = 0 last_count = 0 total_count = 0 start_time = 0 # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, cur_request_id, args.cpu_extension)[1] assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format( CONFIG_FILE) config = json.loads(open(CONFIG_FILE).read()) for idx, item in enumerate(config['inputs']): if item['video'].isdigit(): input_stream = int(item['video']) elif [ item['video'].endswith('.jpg') or item['video'].endswith('.bmp') ]: single_image_mode = True input_stream = item['video'] else: input_stream = item['video'] cap = cv2.VideoCapture(input_stream) if input_stream: cap.open(input_stream) if not cap.isOpened(): log.error("ERROR! Unable to open video source") global initial_w, initial_h, prob_threshold prob_threshold = args.prob_threshold initial_w = cap.get(3) initial_h = cap.get(4) while cap.isOpened(): flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(1) # Start async inference image = cv2.resize(frame, (w, h)) # Change data layout from HWC to CHW image = image.transpose((2, 0, 1)) image = image.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() infer_network.exec_net(cur_request_id, image) # Wait for the result if infer_network.wait(cur_request_id) == 0: det_time = time.time() - inf_start # Results of the output layer of the network result = infer_network.get_output(cur_request_id) if args.perf_counts: perf_count = infer_network.performance_counter(cur_request_id) performance_counts(perf_count) frame, current_count = ssd_out(frame, result) inf_time_message = "Inference time: {:.3f}ms"\ .format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) # When new person enters the video if current_count > last_count: start_time = time.time() total_count = total_count + current_count - last_count client.publish("person", json.dumps({"total": total_count})) # Person duration in the video is calculated if current_count < last_count: duration = int(time.time() - start_time) # Publish messages to the MQTT server client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": current_count})) last_count = current_count if key_pressed == 27: break # Send frame to the ffmpeg server sys.stdout.buffer.write(frame) sys.stdout.flush() if single_image_mode: cv2.imwrite('output_image.jpg', frame) cap.release() cv2.destroyAllWindows() client.disconnect() infer_network.clean()
def main(): """ Load the network and parse the output. :return: None """ get_args() global is_async_mode nextReq = 1 currReq = 0 nextReq_s = 1 currReq_s = 0 prevVideo = None vid_finished = [False] * len(videos) min_FPS = min( [videos[i][1].video.get(cv2.CAP_PROP_FPS) for i in range(len(videos))]) # Initialise the class infer_network = Network() infer_network_safety = Network() # Load the network to IE plugin to get shape of input layer plugin, (batch_size, channels, model_height, model_width) = \ infer_network.load_model(conf_modelLayers, targetDevice, 1, 1, 2, cpu_extension) if use_safety_model: batch_size_sm, channels_sm, model_height_sm, model_width_sm = \ infer_network_safety.load_model(conf_safety_modelLayers, targetDevice, 1, 1, 2, cpu_extension, plugin)[1] while True: for index, currVideo in videos: # Read image from video/cam vfps = int(round(currVideo.video.get(cv2.CAP_PROP_FPS))) for i in range(0, int(round(vfps / min_FPS))): ret, current_img = currVideo.video.read() if not ret: vid_finished[index] = True break if vid_finished[index]: stream_end_frame = np.zeros( (int(currVideo.height), int(currVideo.width), 1), dtype='uint8') cv2.putText( stream_end_frame, "Input file {} has ended".format( name_of_videos[index][1].split('/')[-1]), (10, int(currVideo.height / 2)), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) cv2.imshow(currVideo.name, stream_end_frame) continue # Transform image to person detection model input rsImg = cv2.resize(current_img, (model_width, model_height)) rsImg = rsImg.transpose((2, 0, 1)) rsImg = rsImg.reshape( (batch_size, channels, model_height, model_width)) infer_start_time = datetime.datetime.now() # Infer current image if is_async_mode: infer_network.exec_net(nextReq, rsImg) else: infer_network.exec_net(currReq, rsImg) prevVideo = currVideo previous_img = current_img # Wait for previous request to end if infer_network.wait(currReq) == 0: infer_end_time = (datetime.datetime.now() - infer_start_time) * 1000 in_frame_workers = [] people = 0 violations = 0 hard_hat_detection = False vest_detection = False result = infer_network.get_output(currReq) # Filter output for obj in result[0][0]: if obj[2] > conf_inferConfidenceThreshold: xmin = int(obj[3] * prevVideo.width) ymin = int(obj[4] * prevVideo.height) xmax = int(obj[5] * prevVideo.width) ymax = int(obj[6] * prevVideo.height) xmin = int(xmin - padding) if (xmin - padding) > 0 else 0 ymin = int(ymin - padding) if (ymin - padding) > 0 else 0 xmax = int(xmax + padding) if ( xmax + padding) < prevVideo.width else prevVideo.width ymax = int(ymax + padding) if ( ymax + padding) < prevVideo.height else prevVideo.height cv2.rectangle(previous_img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) people += 1 in_frame_workers.append((xmin, ymin, xmax, ymax)) new_frame = previous_img[ymin:ymax, xmin:xmax] if use_safety_model: # Transform image to safety model input in_frame_sm = cv2.resize( new_frame, (model_width_sm, model_height_sm)) in_frame_sm = in_frame_sm.transpose((2, 0, 1)) in_frame_sm = in_frame_sm.reshape( (batch_size_sm, channels_sm, model_height_sm, model_width_sm)) infer_start_time_sm = datetime.datetime.now() if is_async_mode: infer_network_safety.exec_net( nextReq_s, in_frame_sm) else: infer_network_safety.exec_net( currReq_s, in_frame_sm) # Wait for the result infer_network_safety.wait(currReq_s) infer_end_time_sm = (datetime.datetime.now() - infer_start_time_sm) * 1000 result_sm = infer_network_safety.get_output( currReq_s) # Filter output hard_hat_detection = False vest_detection = False detection_list = [] for obj_sm in result_sm[0][0]: if (obj_sm[2] > 0.4): # Detect safety vest if (int(obj_sm[1])) == 2: xmin_sm = int(obj_sm[3] * (xmax - xmin)) ymin_sm = int(obj_sm[4] * (ymax - ymin)) xmax_sm = int(obj_sm[5] * (xmax - xmin)) ymax_sm = int(obj_sm[6] * (ymax - ymin)) if vest_detection == False: detection_list.append([ xmin_sm + xmin, ymin_sm + ymin, xmax_sm + xmin, ymax_sm + ymin ]) vest_detection = True # Detect hard-hat if int(obj_sm[1]) == 4: xmin_sm_v = int(obj_sm[3] * (xmax - xmin)) ymin_sm_v = int(obj_sm[4] * (ymax - ymin)) xmax_sm_v = int(obj_sm[5] * (xmax - xmin)) ymax_sm_v = int(obj_sm[6] * (ymax - ymin)) if hard_hat_detection == False: detection_list.append([ xmin_sm_v + xmin, ymin_sm_v + ymin, xmax_sm_v + xmin, ymax_sm_v + ymin ]) hard_hat_detection = True if hard_hat_detection is False or vest_detection is False: violations += 1 for _rect in detection_list: cv2.rectangle(current_img, (_rect[0], _rect[1]), (_rect[2], _rect[3]), (0, 255, 0), 2) if is_async_mode: currReq_s, nextReq_s = nextReq_s, currReq_s # Use OpenCV if worker-safety-model is not provided else: violations = detect_workers( in_frame_workers, previous_img) # Check if detected violations equals previous frames if violations == prevVideo.currentViolationCount: prevVideo.currentViolationCountConfidence += 1 # If frame threshold is reached, change validated count if prevVideo.currentViolationCountConfidence == conf_inFrameViolationsThreshold: # If another violation occurred, save image if prevVideo.currentViolationCount > prevVideo.prevViolationCount: prevVideo.totalViolations += ( prevVideo.currentViolationCount - prevVideo.prevViolationCount) prevVideo.prevViolationCount = prevVideo.currentViolationCount else: prevVideo.currentViolationCountConfidence = 0 prevVideo.currentViolationCount = violations # Check if detected people count equals previous frames if people == prevVideo.currentPeopleCount: prevVideo.currentPeopleCountConfidence += 1 # If frame threshold is reached, change validated count if prevVideo.currentPeopleCountConfidence == conf_inFrameViolationsThreshold: prevVideo.currentTotalPeopleCount += ( prevVideo.currentPeopleCount - prevVideo.prevPeopleCount) if prevVideo.currentTotalPeopleCount > prevVideo.prevPeopleCount: prevVideo.totalPeopleCount += prevVideo.currentTotalPeopleCount - prevVideo.prevPeopleCount prevVideo.prevPeopleCount = prevVideo.currentPeopleCount else: prevVideo.currentPeopleCountConfidence = 0 prevVideo.currentPeopleCount = people frame_end_time = datetime.datetime.now() cv2.putText( previous_img, 'Total people count: ' + str(prevVideo.totalPeopleCount), (10, prevVideo.height - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.putText( previous_img, 'Current people count: ' + str(prevVideo.currentTotalPeopleCount), (10, prevVideo.height - 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.putText( previous_img, 'Total violation count: ' + str(prevVideo.totalViolations), (10, prevVideo.height - 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.putText( previous_img, 'FPS: %0.2fs' % (1 / (frame_end_time - prevVideo.frame_start_time).total_seconds()), (10, prevVideo.height - 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.putText(previous_img, "Inference time: N\A for async mode" if is_async_mode else\ "Inference time: {:.3f} ms".format((infer_end_time).total_seconds()), (10, prevVideo.height - 130), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) cv2.imshow(prevVideo.name, previous_img) prevVideo.frame_start_time = datetime.datetime.now() # Swap if is_async_mode: currReq, nextReq = nextReq, currReq previous_img = current_img prevVideo = currVideo if cv2.waitKey(1) == 27: print("Attempting to stop input files") infer_network.clean() infer_network_safety.clean() cv2.destroyAllWindows() return if False not in vid_finished: infer_network.clean() infer_network_safety.clean() cv2.destroyAllWindows() break
def main(): """ Load the network and parse the output. :return: None """ global INFO global DELAY global POSE_CHECKED log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = args_parser().parse_args() logger = log.getLogger() #if args.input == 'cam': # input_stream = 0 #else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) shopper = cv2.VideoWriter(os.path.join(args.output_dir, "shopper.mp4"), cv2.VideoWriter_fourcc(*"AVC1"), fps, (initial_w, initial_h), True) frame_count = 0 job_id = os.environ['PBS_JOBID'] progress_file_path = os.path.join(args.output_dir,'i_progress_'+str(job_id)+'.txt') infer_time_start = time.time() if input_stream: cap.open(args.input) # Adjust DELAY to match the number of FPS of the video file DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS) if not cap.isOpened(): logger.error("ERROR! Unable to open video source") return # Initialise the class infer_network = Network() infer_network_pose = Network() # Load the network to IE plugin to get shape of input layer plugin, (n_fd, c_fd, h_fd, w_fd) = infer_network.load_model(args.model, args.device, 1, 1, 0, args.cpu_extension) n_hp, c_hp, h_hp, w_hp = infer_network_pose.load_model(args.posemodel, args.device, 1, 3, 0, args.cpu_extension, plugin)[1] ret, frame = cap.read() while ret: looking = 0 ret, next_frame = cap.read() frame_count += 1 if not ret: print ("checkpoint *BREAKING") break if next_frame is None: log.error("checkpoint ERROR! blank FRAME grabbed") break initial_wh = [cap.get(3), cap.get(4)] in_frame_fd = cv2.resize(next_frame, (w_fd, h_fd)) # Change data layout from HWC to CHW in_frame_fd = in_frame_fd.transpose((2, 0, 1)) in_frame_fd = in_frame_fd.reshape((n_fd, c_fd, h_fd, w_fd)) # Start asynchronous inference for specified request inf_start_fd = time.time() infer_network.exec_net(0, in_frame_fd) # Wait for the result infer_network.wait(0) det_time_fd = time.time() - inf_start_fd # Results of the output layer of the network res = infer_network.get_output(0) # Parse face detection output faces = face_detection(res, args, initial_wh) if len(faces) != 0: # Look for poses for res_hp in faces: xmin, ymin, xmax, ymax = res_hp head_pose = frame[ymin:ymax, xmin:xmax] in_frame_hp = cv2.resize(head_pose, (w_hp, h_hp)) in_frame_hp = in_frame_hp.transpose((2, 0, 1)) in_frame_hp = in_frame_hp.reshape((n_hp, c_hp, h_hp, w_hp)) inf_start_hp = time.time() infer_network_pose.exec_net(0, in_frame_hp) infer_network_pose.wait(0) det_time_hp = time.time() - inf_start_hp # Parse head pose detection results angle_p_fc = infer_network_pose.get_output(0, "angle_p_fc") angle_y_fc = infer_network_pose.get_output(0, "angle_y_fc") if ((angle_y_fc > -22.5) & (angle_y_fc < 22.5) & (angle_p_fc > -22.5) & (angle_p_fc < 22.5)): looking += 1 POSE_CHECKED = True INFO = INFO._replace(looker=looking) else: INFO = INFO._replace(looker=looking) else: INFO = INFO._replace(looker=0) # Draw performance stats inf_time_message = "Face Inference time: {:.3f} ms.".format(det_time_fd * 1000) if POSE_CHECKED: cv2.putText(frame, "Head pose Inference time: {:.3f} ms.".format(det_time_hp * 1000), (0, 35), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, inf_time_message, (0, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, "Shopper: {}".format(INFO.shopper), (0, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, "Looker: {}".format(INFO.looker), (0, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) shopper.write(frame) if frame_count%10 == 0: progressUpdate(progress_file_path, int(time.time()-infer_time_start), frame_count, video_len) frame = next_frame if args.output_dir: total_time = time.time() - infer_time_start with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f: f.write(str(round(total_time, 1))+'\n') f.write(str(frame_count)+'\n') infer_network.clean() infer_network_pose.clean() cap.release()
def intruder_detector(): """ Process the input source frame by frame and detects intruder, if any. :return status: 0 on success, negative value on failure """ global CONF_CANDIDATE_CONFIDENCE global LOG_WIN_HEIGHT global LOG_WIN_WIDTH global CONF_FILE global video_caps global conf_labels_file_path parse_args() if not os.path.isfile(CONF_FILE): return -12, "" if not os.path.isfile(conf_labels_file_path): return -13, "" # Creates subdirectory to save output snapshots pathlib.Path(os.getcwd() + '/output/').mkdir(parents=True, exist_ok=True) # Read the configuration file ret, req_labels = get_input() if ret != 0: return ret, req_labels[0] if not video_caps: return -14, '' # Get the labels that are used in the application ret, label_names, used_labels = get_used_labels(req_labels) if ret != 0: return ret, '' if True not in used_labels: return -15, '' # Init a rolling log to store events rolling_log_size = int((LOG_WIN_HEIGHT - 15) / 20) log_list = collections.deque(maxlen=rolling_log_size) # Open a file for intruder logs log_file = open(LOG_FILE_PATH, 'w') if not log_file: return -16, '' # Initializing VideoWriter for each source for video_cap in video_caps: ret, ret_value = video_cap.init_vw(int(video_cap.input_height), int(video_cap.input_width)) if ret != 0: return ret, ret_value # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(model_xml, TARGET_DEVICE, 1, 1, 0, CPU_EXTENSION) min_fps = min([i.vc.get(cv2.CAP_PROP_FPS) for i in video_caps]) no_more_data = [False] * len(video_caps) start_time = time.time() inf_time = 0 fourcc = cv2.VideoWriter_fourcc(*'avc1') statsVideo = cv2.VideoWriter(os.path.join(output_dir, 'Statistics.mp4'), fourcc, min_fps, (LOG_WIN_WIDTH, LOG_WIN_HEIGHT), True) job_id = os.environ['PBS_JOBID'] progress_file_path = os.path.join(output_dir, 'i_progress_' + str(job_id) + '.txt') infer_start_time = time.time() # Main loop starts here. Loop over all the video captures while True: for idx, video_cap in enumerate(video_caps): # Get a new frame vfps = int(round(video_cap.vc.get(cv2.CAP_PROP_FPS))) for i in range(0, int(round(vfps / min_fps))): ret, video_cap.frame = video_cap.vc.read() video_cap.loop_frames += 1 # If no new frame or error in reading a frame, exit the loop if not ret: no_more_data[idx] = True break if no_more_data[idx]: stream_end_frame = numpy.zeros((int( video_cap.input_height), int(video_cap.input_width), 1), dtype='uint8') stream_end_message = "Stream from {} has ended.".format( video_cap.cam_name) cv2.putText(stream_end_frame, stream_end_message, (int(video_cap.input_width / 2) - 30, int(video_cap.input_height / 2) - 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) continue for i in range(video_cap.no_of_labels): video_cap.current_count[i] = 0 video_cap.changed_count[i] = False # Resize to expected size (in model .xml file) # Input frame is resized to infer resolution in_frame = cv2.resize(video_cap.frame, (w, h)) # PRE-PROCESS STAGE: # Convert image to format expected by inference engine # IE expects planar, convert from packed # Change data layout from HWC to CHW in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() infer_network.exec_net(0, in_frame) # Wait for the result if infer_network.wait(0) == 0: inf_time = time.time() - inf_start # Results of the output layer of the network res = infer_network.get_output(0) for obj in res[0][0]: label = int(obj[1]) - 1 # Draw the bounding box around the object when the probability is more than specified threshold if obj[2] > CONF_THRESHOLD_VALUE and used_labels[label]: video_cap.current_count[label] += 1 xmin = int(obj[3] * video_cap.input_width) ymin = int(obj[4] * video_cap.input_height) xmax = int(obj[5] * video_cap.input_width) ymax = int(obj[6] * video_cap.input_height) # Draw bounding box around the intruder detected cv2.rectangle(video_cap.frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 4, 16) for i in range(video_cap.no_of_labels): if video_cap.candidate_count[i] == video_cap.current_count[ i]: video_cap.candidate_confidence[i] += 1 else: video_cap.candidate_confidence[i] = 0 video_cap.candidate_count[i] = video_cap.current_count[ i] if video_cap.candidate_confidence[ i] == CONF_CANDIDATE_CONFIDENCE: video_cap.candidate_confidence[i] = 0 video_cap.changed_count[i] = True else: continue if video_cap.current_count[ i] > video_cap.last_correct_count[i]: video_cap.total_count[i] += video_cap.current_count[ i] - video_cap.last_correct_count[i] det_objs = video_cap.current_count[ i] - video_cap.last_correct_count[i] total_count = sum(video_cap.total_count) for det_obj in range(det_objs): current_time = time.strftime("%H:%M:%S") log = "{} - Intruder {} detected on {}".format( current_time, label_names[i], video_cap.cam_name) print(log) log_list.append(log) log_file.write(log + "\n") event = Event(event_time=current_time, intruder=label_names[i], count=total_count, frame=video_cap.frame_count) video_cap.events.append(event) snapshot_name = "output/intruder_{}.png".format( total_count) cv2.imwrite(snapshot_name, video_cap.frame) video_cap.last_correct_count[i] = video_cap.current_count[ i] # Create intruder log window, add logs to the frame and display it log_window = numpy.zeros((LOG_WIN_HEIGHT, LOG_WIN_WIDTH, 1), dtype='uint8') for i, log in enumerate(log_list): cv2.putText(log_window, log, (10, 20 * i + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) log_window = cv2.cvtColor(log_window, cv2.COLOR_GRAY2BGR) statsVideo.write(log_window) video_cap.frame_count += 1 # Video output inf_time_message = "Inference time: {:.3f} ms".format(inf_time * 1000) cv2.putText(video_cap.frame, inf_time_message, (10, int(video_cap.input_height) - 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) fps_time = time.time() - start_time fps_message = "FPS: {:.3f} fps".format(1 / fps_time) cv2.putText(video_cap.frame, fps_message, (10, int(video_cap.input_height) - 10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) # Display the video output video_cap.vw.write(video_cap.frame) if video_cap.frame_count % 10 == 0: progressUpdate(progress_file_path, time.time() - infer_start_time, video_cap.frame_count, int(video_cap.vc.get(cv2.CAP_PROP_FRAME_COUNT))) start_time = time.time() # Loop video to mimic continuous input if LOOP_VIDEO flag is True if LOOP_VIDEO and not video_cap.is_cam: vfps = int(round(video_cap.vc.get(cv2.CAP_PROP_FPS))) # If a video capture has ended restart it if video_cap.loop_frames > video_cap.vc.get( cv2.CAP_PROP_FRAME_COUNT) - int(round(vfps / min_fps)): video_cap.loop_frames = 0 video_cap.vc.set(cv2.CAP_PROP_POS_FRAMES, 0) if False not in no_more_data: progressUpdate(progress_file_path, time.time() - infer_start_time, int(video_cap.vc.get(cv2.CAP_PROP_FRAME_COUNT)), int(video_cap.vc.get(cv2.CAP_PROP_FRAME_COUNT))) break no_more_data = False t2 = time.time() - infer_start_time for videos in video_caps: with open(os.path.join(output_dir, 'stats.txt'), 'w') as f: f.write('{} \n'.format(round(t2))) f.write('{} \n'.format(videos.frame_count)) infer_network.clean() log_file.close() return 0, ''
def main(): """ Load the network and parse the output. :return: None """ global DELAY global CLIENT global SIG_CAUGHT global KEEP_RUNNING global TARGET_DEVICE global is_async_mode CLIENT = mqtt.Client() CLIENT.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL) CLIENT.subscribe(TOPIC) log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() logger = log.getLogger() render_time = 0 roi_x = args.pointx roi_y = args.pointy roi_w = args.width roi_h = args.height check_args() assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format( CONFIG_FILE) config = json.loads(open(CONFIG_FILE).read()) for idx, item in enumerate(config['inputs']): if item['video'].isdigit(): input_stream = int(item['video']) else: input_stream = item['video'] cap = cv2.VideoCapture(input_stream) if not cap.isOpened(): logger.error("ERROR! Unable to open video source") sys.exit(1) # Init inference request IDs cur_request_id = 0 next_request_id = 1 # Initialise the class infer_network = Network() # Load the network to IE plugin to get shape of input layer n, c, h, w = infer_network.load_model(args.model, TARGET_DEVICE, 1, 1, 2, args.cpu_extension)[1] message_thread = Thread(target=message_runner, args=()) message_thread.setDaemon(True) message_thread.start() if is_async_mode: print("Application running in async mode...") else: print("Application running in sync mode...") ret, frame = cap.read() while ret: ret, next_frame = cap.read() if not ret: KEEP_RUNNING = False break initial_wh = [cap.get(3), cap.get(4)] if next_frame is None: KEEP_RUNNING = False log.error("ERROR! blank FRAME grabbed") break # If either default values or negative numbers are given, # then we will default to start of the FRAME if roi_x <= 0 or roi_y <= 0: roi_x = 0 roi_y = 0 if roi_w <= 0: roi_w = next_frame.shape[1] if roi_h <= 0: roi_h = next_frame.shape[0] key_pressed = cv2.waitKey(1) # 'c' key pressed if key_pressed == 99: # Give operator chance to change the area # Select rectangle from left upper corner, dont display crosshair ROI = cv2.selectROI("Restricted Area Selection", frame, True, False) print("Restricted Area Selection: -x = {}, -y = {}, -w = {}," " -h = {}".format(ROI[0], ROI[1], ROI[2], ROI[3])) roi_x = ROI[0] roi_y = ROI[1] roi_w = ROI[2] roi_h = ROI[3] cv2.destroyAllWindows() cv2.rectangle(frame, (roi_x, roi_y), (roi_x + roi_w, roi_y + roi_h), (0, 0, 255), 2) selected_region = [roi_x, roi_y, roi_w, roi_h] in_frame_fd = cv2.resize(next_frame, (w, h)) # Change data layout from HWC to CHW in_frame_fd = in_frame_fd.transpose((2, 0, 1)) in_frame_fd = in_frame_fd.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() if is_async_mode: # Async enabled and only one video capture infer_network.exec_net(next_request_id, in_frame_fd) else: # Async disabled infer_network.exec_net(cur_request_id, in_frame_fd) # Wait for the result infer_network.wait(cur_request_id) det_time = time.time() - inf_start # Results of the output layer of the network res = infer_network.get_output(cur_request_id) # Parse SSD output ssd_out(res, args, initial_wh, selected_region) # Draw performance stats inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \ "Inference time: {:.3f} ms".format(det_time * 1000) render_time_message = "OpenCV rendering time: {:.3f} ms". \ format(render_time * 1000) if not INFO.safe: warning = "Baby in Dangerous Zone: Please Act Fast!" cv2.putText(frame, warning, (15, 100), cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 0, 255), 2) log_message = "Async mode is on." if is_async_mode else \ "Async mode is off." cv2.putText(frame, log_message, (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, inf_time_message, (15, 35), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, render_time_message, (15, 55), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) cv2.putText(frame, "Baby Safe: {}".format(INFO.safe), (15, 75), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) render_start = time.time() cv2.imshow("Restricted Zone Notifier", frame) render_end = time.time() render_time = render_end - render_start frame = next_frame if key_pressed == 27: print("Attempting to stop background threads") KEEP_RUNNING = False break # Tab key pressed if key_pressed == 9: is_async_mode = not is_async_mode print("Switched to {} mode".format( "async" if is_async_mode else "sync")) if is_async_mode: # Swap infer request IDs cur_request_id, next_request_id = next_request_id, cur_request_id infer_network.clean() message_thread.join() cap.release() cv2.destroyAllWindows() CLIENT.disconnect()
def main(): # Plugin initialization for specified device and load extensions library global rolling_log global TARGET_DEVICE global videoCapsJson env_parser() check_args() parse_conf_file() if TARGET_DEVICE not in acceptedDevices: print("Unsupporterd device " + TARGET_DEVICE + ". Defaulting to CPU") TARGET_DEVICE = 'CPU' # Initialize the class infer_network = Network() # Load the network to IE Plugin n, c, h, w = infer_network.load_model(model_xml, TARGET_DEVICE, 1, 1, 2, CPU_EXTENSION)[1] minFPS = min([i.cap.get(cv2.CAP_PROP_FPS) for i in videoCaps]) waitTime = int( round(1000 / minFPS / len(videoCaps))) # wait time in ms between showing frames for vc in videoCaps: vc.init_vw(h, w, minFPS) statsWidth = w if w > 345 else 345 statsHeight = h if h > (len(videoCaps) * 20 + 15) else ( len(videoCaps) * 20 + 15) statsVideo = cv2.VideoWriter(os.path.join('resources', 'Statistics.mp4'), 0x00000021, minFPS, (statsWidth, statsHeight), True) if not statsVideo.isOpened(): print("Couldn't open stats video for writing") sys.exit(4) # Read the labels file if labels_file: with open(labels_file, 'r') as f: labels_map = [x.strip() for x in f] else: labels_map = None # Init a rolling log to store events rolling_log_size = int((h - 15) / 20) rolling_log = collections.deque(maxlen=rolling_log_size) # Init inference request IDs cur_request_id = 0 next_request_id = 1 # Start with async mode enabled is_async_mode = True if not UI_OUTPUT: # Arrange windows so they are not overlapping arrange_windows(w, h) print("To stop the execution press Esc button") for idx, vc in enumerate(videoCaps): vc.start_time = datetime.datetime.now() vc.pos = idx if UI_OUTPUT: videoCapsJson = videoCaps.copy() while True: # If all video captures are closed stop the loop no_more_data = [videoCap.closed for videoCap in videoCaps] # loop over all video captures for idx, videoCapInfer in enumerate(videoCaps): # read the next frame vfps = int(round(videoCapInfer.cap.get(cv2.CAP_PROP_FPS))) for i in range(0, int(round(vfps / minFPS))): ret, frame = videoCapInfer.cap.read() videoCapInfer.cur_frame_count += 1 # If the read failed close the program if not ret: no_more_data[idx] = True break if no_more_data[idx]: if UI_OUTPUT: videoCaps.pop(idx) continue else: stream_end_frame = np.zeros((h, w, 1), dtype='uint8') cv2.putText( stream_end_frame, "Input file {} has ended".format( videoCapInfer.cap_name), (20, 150), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1) cv2.imshow(videoCapInfer.cap_name, stream_end_frame) cv2.waitKey(waitTime) videoCaps.pop(idx) continue # Copy the current frame for later use videoCapInfer.cur_frame = frame.copy() videoCapInfer.initial_w = videoCapInfer.cap.get(3) videoCapInfer.initial_h = videoCapInfer.cap.get(4) # Resize and change the data layout so it is compatible in_frame = cv2.resize(videoCapInfer.cur_frame, (w, h)) in_frame = in_frame.transpose( (2, 0, 1)) # Change data layout from HWC to CHW in_frame = in_frame.reshape((n, c, h, w)) infer_start = datetime.datetime.now() if is_async_mode: # Async enabled and only one video capture infer_network.exec_net(next_request_id, in_frame) if (len(videoCaps) == 1): videoCapResult = videoCapInfer # Async enabled and more than one video capture else: # Get previous index videoCapResult = videoCaps[idx - 1 if idx - 1 >= 0 else len(videoCaps) - 1] else: # Async disabled infer_network.exec_net(next_request_id, in_frame) videoCapResult = videoCapInfer if infer_network.wait(cur_request_id) == 0: infer_end = datetime.datetime.now() res = infer_network.get_output(cur_request_id) infer_duration = infer_end - infer_start current_count = 0 # Parse detection results of the current request for obj in res[0][0]: class_id = int(obj[1]) # Draw only objects when probability more than specified threshold if (obj[2] > PROB_THRESHOLD and videoCapResult.req_label in labels_map and labels_map.index( videoCapResult.req_label) == class_id - 1): current_count += 1 xmin = int(obj[3] * videoCapResult.initial_w) ymin = int(obj[4] * videoCapResult.initial_h) xmax = int(obj[5] * videoCapResult.initial_w) ymax = int(obj[6] * videoCapResult.initial_h) # Draw box cv2.rectangle(videoCapResult.cur_frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 4, 16) if videoCapResult.candidate_count is current_count: videoCapResult.candidate_confidence += 1 else: videoCapResult.candidate_confidence = 0 videoCapResult.candidate_count = current_count if videoCapResult.candidate_confidence is FRAME_THRESHOLD: videoCapResult.candidate_confidence = 0 if current_count > videoCapResult.last_correct_count: videoCapResult.total_count += current_count - videoCapResult.last_correct_count if current_count is not videoCapResult.last_correct_count: if UI_OUTPUT: currtime = datetime.datetime.now().strftime( "%H:%M:%S") fr = FrameInfo(videoCapResult.frames, current_count, currtime) videoCapResult.countAtFrame.append(fr) new_objects = current_count - videoCapResult.last_correct_count for _ in range(new_objects): strng = "{} - {} detected on {}". \ format(time.strftime("%H:%M:%S"), videoCapResult.req_label, videoCapResult.cap_name) rolling_log.append(strng) videoCapResult.frames += 1 videoCapResult.last_correct_count = current_count else: videoCapResult.frames += 1 videoCapResult.cur_frame = cv2.resize(videoCapResult.cur_frame, (w, h)) if UI_OUTPUT: imgName = videoCapResult.cap_name imgName = imgName.split()[0] + "_" + chr( ord(imgName.split()[1]) + 1) imgName += "_" + str(videoCapResult.frames) frameNames.append(imgName) imgName = CONF_VIDEODIR + imgName + ".jpg" cv2.imwrite(imgName, videoCapResult.cur_frame) videoCapsJson[ videoCapResult. pos].countAtFrame = videoCapResult.countAtFrame a = saveJSON() if a: return a if not UI_OUTPUT: # Add log text to each frame log_message = "Async mode is on." if is_async_mode else \ "Async mode is off." cv2.putText(videoCapResult.cur_frame, log_message, (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) log_message = "Total {} count: {}" \ .format(videoCapResult.req_label, videoCapResult.total_count) cv2.putText(videoCapResult.cur_frame, log_message, (10, h - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) log_message = "Current {} count: {}" \ .format(videoCapResult.req_label, videoCapResult.last_correct_count) cv2.putText(videoCapResult.cur_frame, log_message, (10, h - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.putText( videoCapResult.cur_frame, 'Infer wait: %0.3fs' % (infer_duration.total_seconds()), (10, h - 70), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # Display inferred frame and stats stats = numpy.zeros((statsHeight, statsWidth, 1), dtype='uint8') for i, log in enumerate(rolling_log): cv2.putText(stats, log, (10, i * 20 + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.imshow(STATS_WINDOW_NAME, stats) if idx == 0: stats = cv2.cvtColor(stats, cv2.COLOR_GRAY2BGR) statsVideo.write(stats) end_time = datetime.datetime.now() cv2.putText( videoCapResult.cur_frame, 'FPS: %0.2fs' % (1 / (end_time - videoCapResult.start_time).total_seconds()), (10, h - 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.imshow(videoCapResult.cap_name, videoCapResult.cur_frame) videoCapResult.start_time = datetime.datetime.now() videoCapResult.video.write(videoCapResult.cur_frame) # Wait if necessary for the required time key = cv2.waitKey(waitTime) # Esc key pressed if key == 27: cv2.destroyAllWindows() infer_network.clean() print("Finished") return # Tab key pressed if key == 9: is_async_mode = not is_async_mode print("Switched to {} mode".format( "async" if is_async_mode else "sync")) if is_async_mode: # Swap infer request IDs cur_request_id, next_request_id = next_request_id, cur_request_id # Loop video if LOOP_VIDEO = True and input isn't live from USB camera if LOOP_VIDEO and not videoCapInfer.is_cam: vfps = int(round(videoCapInfer.cap.get(cv2.CAP_PROP_FPS))) # If a video capture has ended restart it if (videoCapInfer.cur_frame_count > videoCapInfer.cap.get(cv2.CAP_PROP_FRAME_COUNT) - int(round(vfps / minFPS))): videoCapInfer.cur_frame_count = 0 videoCapInfer.cap.set(cv2.CAP_PROP_POS_FRAMES, 0) if False not in no_more_data: break infer_network.clean() cv2.destroyAllWindows()
def main(): args = build_argparser().parse_args() client = connect_mqtt() global initial_w, initial_h, prob_threshold # Initialise the class network = Network() # Set Probability threshold for detections if args.prob_threshold is None: prob_threshold = args.prob_threshold else: prob_threshold = 0.4 image_mode = False cur_request_id = 0 last = 0 total = 0 start = 0 # Load the network to IE plugin to get shape of input layer n, c, h, w = network.load_model(args.model, args.device, 1, 1, cur_request_id, args.cpu_extension)[1] if args.input == 'CAM': input_stream = 0 # Checks for input image elif args.input.endswith('.jpg') or args.input.endswith('.bmp'): image_mode = True input_stream = args.input # Checks for video file else: input_stream = args.input cap = cv2.VideoCapture(input_stream) if input_stream: cap.open(args.input) initial_w = cap.get(3) initial_h = cap.get(4) while cap.isOpened(): flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) # Start async inference image = cv2.resize(frame, (w, h)) # Change data layout from HWC to CHW image = image.transpose((2, 0, 1)) image = image.reshape((n, c, h, w)) # Start asynchronous inference for specified request. inf_start = time.time() network.exec_net(cur_request_id, image) # Wait for the result if network.wait(cur_request_id) == 0: det_time = time.time() - inf_start # Results of the output layer of the network result = network.get_output(cur_request_id) frame, current_count = model_out(frame, result) inf_time_message = "Inference time: {:.3f}ms"\ .format(det_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) # When new person enters the video if current_count > last: start = time.time() total += current_count - last client.publish("person", json.dumps({"total": total})) # Person duration in the video is calculated if current_count < last: duration = int(time.time() - start) # Publish messages to the MQTT server client.publish("person/duration", json.dumps({"duration": duration})) client.publish("person", json.dumps({"count": current_count})) last = current_count if key_pressed == 27: break # Send frame to the ffmpeg server sys.stdout.buffer.write(frame) sys.stdout.flush() if image_mode: cv2.imwrite('output_image.jpg', frame) cap.release() cv2.destroyAllWindows() client.disconnect() network.clean()