class CameraPublisher: def __init__(self, src=0): # initialize the video camera stream and read the first frame # from the stream self.stream = cv2.VideoCapture(src) if not self.stream.isOpened(): raise Exception("Video/Camera device not found at: {}".format(src)) self.pub = rospy.Publisher("camera", String) rospy.init_node("img_raw", anonymous=True) (self.grabbed, self.frame) = self.stream.read() # initialize the variable used to indicate if the thread should # be stopped self.stopped = False self.f = FPS() self.f.start() def start(self): # start the thread to read frames from the video stream t = Thread(target=self.update, args=()) t.daemon = True t.start() return self def update(self): # keep looping infinitely until the thread is stopped while True: # if the thread indicator variable is set, stop the thread if self.stopped: return # otherwise, read the next frame from the stream (self.grabbed, self.frame) = self.stream.read() self.pub.publish(self.frame) self.f.update() def read(self): # return the frame most recently read return self.grabbed, self.frame def stop(self): # indicate that the thread should be stopped self.stopped = True self.f.stop() def get_dimensions(self): c = int(self.stream.get(3)) r = int(self.stream.get(4)) return r, c def get_raw_frames(self): return self.f.get_frames()
def run_mask_detection(video_path, detection_graph, label_map, categories, category_index, show_window, visualize, write_output, ros_enabled, usage_check, graph_trace_enabled=False, score_node=None, expand_node=None): logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) from tf_object_detection.utils import ops as utils_ops from PIL import Image from tf_object_detection.utils import visualization_utils as vis_util config = tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True)) labels_per_frame = [] boxes_per_frame = [] cpu_usage_dump = "" mem_usage_dump = "" time_usage_dump = "" if ros_enabled: from utils.ros_op import DetectionPublisher, CameraSubscriber pub = DetectionPublisher() sub = CameraSubscriber() if graph_trace_enabled: from tensorflow.python.client import timeline if usage_check: timer = Timer() logger.info("Initial startup") cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage( cpu_usage_dump, mem_usage_dump, time_usage_dump, timer) if ros_enabled: if not sub.is_running(): return Exception("[ERROR: Camera Node not running]") else: vid = WebcamVideoStream(src=video_path).start() r, c = vid.get_dimensions() logger.debug("Frame width: {} height: {}".format(r, c)) if write_output: trackedVideo = cv2.VideoWriter( 'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0, (c, r)) record = open("record.txt", "w") count = 0 # Detection with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=config) as sess: options = None run_metadata = None # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph( ).get_tensor_by_name(tensor_name) detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, r, c) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') # Using the split model hack if score_node is not None and expand_node is not None: score_out = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores:0') expand_out = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1:0') score_in = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores_1:0') expand_in = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1_1:0') if usage_check: fps = FPS().start() if graph_trace_enabled: options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # Read video frame by frame and perform inference while (vid.is_running()): try: # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. logger.debug("Frame {}".format(count)) retval, curr_frame = vid.read() if not retval: logger.info("Video ending at frame {}".format(count)) break if show_window: if cv2.waitKey(1) & 0xFF == ord('q'): break # Expand dimensions since the model expects images to have shape: [1, None, None, 3] curr_frame_expanded = np.expand_dims(curr_frame, axis=0) # Actual detection. start = time.time() if score_node is None and expand_node is None: output_dict = sess.run( tensor_dict, feed_dict={image_tensor: curr_frame_expanded}, options=options, run_metadata=run_metadata) else: raise Exception("Split model not supported for mask") end = time.time() boxes = output_dict['detection_boxes'] scores = output_dict['detection_scores'] classes = output_dict['detection_classes'] # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int( output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict[ 'detection_boxes'][0] output_dict['detection_scores'] = output_dict[ 'detection_scores'][0] output_dict['detection_masks'] = output_dict[ 'detection_masks'][0] logger.info(output_dict['detection_masks'].shape) if usage_check: fps.update() logger.info("Session run time: {:.4f}".format(end - start)) logger.info("Frame {}".format(count)) cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage( cpu_usage_dump, mem_usage_dump, time_usage_dump, timer) if graph_trace_enabled: fetched_timeline = timeline.Timeline( run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format( ) with open('graph_timeline.json', 'w') as f: f.write(chrome_trace) (r, c, _) = curr_frame.shape logger.debug("image height:{}, width:{}".format(r, c)) # get boxes that pass the min requirements and their pixel coordinates filtered_boxes = parse_tf_output(curr_frame.shape, boxes, scores, classes) if ros_enabled: # TODO: Send the detected info to other systems every frame logger.info("Publishing bboxes") logger.info("".join([str(i) for i in filtered_boxes])) pub.send_boxes(filtered_boxes) if write_output: record.write(str(count) + "\n") for i in range(len(filtered_boxes)): record.write("{}\n".format(str(filtered_boxes[i]))) # Visualization of the results of a detection. if visualize: # drawn_img = overlay(curr_frame, category_index, filtered_boxes) vis_util.visualize_boxes_and_labels_on_image_array( curr_frame, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) if show_window: window_name = "stream" cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.imshow(window_name, curr_frame) if write_output: trackedVideo.write(curr_frame) else: logger.info("".join([str(i) for i in filtered_boxes])) count += 1 # Quick benchmarking if timer.get_elapsed_time() >= 60: break except KeyboardInterrupt: logger.info("Ctrl + C Pressed. Attempting graceful exit") break if usage_check: fps.stop() logger.info("[USAGE] elasped time: {:.2f}".format(fps.elapsed())) logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps())) logger.info("[USAGE] inferenced frames: {}".format(fps.get_frames())) logger.info("[USAGE] raw frames: {}".format(vid.get_raw_frames())) logger.info("[USAGE] Total Time elapsed: {:.2f} seconds".format( timer.get_elapsed_time())) with open("cpu_usage.txt", "w") as c: c.write(cpu_usage_dump) with open("mem_usage.txt", "w") as m: m.write(mem_usage_dump) with open("time_usage.txt", "w") as t: t.write(time_usage_dump) vid.stop() logger.debug("Result: {} frames".format(count)) if visualize: cv2.destroyAllWindows() if write_output: record.close() trackedVideo.release() return labels_per_frame, boxes_per_frame
def run_segmentation(video_path, detection_graph, label_map, categories, category_index, show_window, visualize, write_output, ros_enabled, usage_check, graph_trace_enabled=False, score_node=None, expand_node=None): logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) from tf_object_detection.utils import ops as utils_ops from PIL import Image from tf_object_detection.utils import visualization_utils as vis_util config = tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True)) labels_per_frame = [] boxes_per_frame = [] cpu_usage_dump = "" mem_usage_dump = "" time_usage_dump = "" if ros_enabled: from utils.ros_op import DetectionPublisher, CameraSubscriber pub = DetectionPublisher() sub = CameraSubscriber() if graph_trace_enabled: from tensorflow.python.client import timeline if usage_check: timer = Timer() logger.info("Initial startup") cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage( cpu_usage_dump, mem_usage_dump, time_usage_dump, timer) if ros_enabled: if not sub.is_running(): return Exception("[ERROR: Camera Node not running]") else: vid = WebcamVideoStream(src=video_path).start() r, c = vid.get_dimensions() logger.debug("Frame width: {} height: {}".format(r, c)) if write_output: trackedVideo = cv2.VideoWriter( 'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0, (c, r)) record = open("record.txt", "w") count = 0 # Detection with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=config) as sess: options = None run_metadata = None # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } seg_tensor = "SemanticPredictions:0" image_tensor = tf.get_default_graph().get_tensor_by_name( 'ImageTensor:0') if usage_check: fps = FPS().start() if graph_trace_enabled: options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # Read video frame by frame and perform inference while (vid.is_running()): try: # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. logger.debug("Frame {}".format(count)) retval, curr_frame = vid.read() if not retval: logger.info("Video ending at frame {}".format(count)) break if show_window: if cv2.waitKey(1) & 0xFF == ord('q'): break # curr_frame = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2RGB) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] curr_frame_expanded = np.expand_dims(curr_frame, axis=0) # Actual detection. start = time.time() if score_node is None and expand_node is None: output_dict = sess.run( seg_tensor, feed_dict={image_tensor: curr_frame_expanded}, options=options, run_metadata=run_metadata) else: raise Exception( "Split model not supported for segmentation") end = time.time() if usage_check: fps.update() logger.info("Session run time: {:.4f}".format(end - start)) logger.info("Frame {}".format(count)) cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage( cpu_usage_dump, mem_usage_dump, time_usage_dump, timer) if graph_trace_enabled: fetched_timeline = timeline.Timeline( run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format( ) with open('graph_timeline.json', 'w') as f: f.write(chrome_trace) (r, c, _) = curr_frame.shape logger.debug("image height:{}, width:{}".format(r, c)) if ros_enabled: # TODO: Send the detected info to other systems every frame logger.info("Publishing bboxes") logger.info("".join([str(i) for i in filtered_boxes])) pub.send_boxes(filtered_boxes) if write_output: record.write(str(count) + "\n") for i in range(len(filtered_boxes)): record.write("{}\n".format(str(filtered_boxes[i]))) # Visualization of the results of a detection. if visualize: logger.warning("visualize not implmented!") else: logger.info(output_dict.shape) count += 1 # Quick benchmarking if timer.get_elapsed_time() >= 60: break except KeyboardInterrupt: logger.info("Ctrl + C Pressed. Attempting graceful exit") break if usage_check: fps.stop() logger.info("[USAGE] elasped time: {:.2f}".format(fps.elapsed())) logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps())) logger.info("[USAGE] inferenced frames: {}".format(fps.get_frames())) logger.info("[USAGE] raw frames: {}".format(vid.get_raw_frames())) logger.info("[USAGE] Total Time elapsed: {:.2f} seconds".format( timer.get_elapsed_time())) with open("cpu_usage.txt", "w") as c: c.write(cpu_usage_dump) with open("mem_usage.txt", "w") as m: m.write(mem_usage_dump) with open("time_usage.txt", "w") as t: t.write(time_usage_dump) vid.stop() logger.debug("Result: {} frames".format(count)) if visualize: cv2.destroyAllWindows() if write_output: record.close() trackedVideo.release() return labels_per_frame, boxes_per_frame
def run_detection(video_path, detection_graph, label_map, categories, category_index, show_window, visualize, write_output, ros_enabled, usage_check, graph_trace_enabled=False, score_node=None, expand_node=None): logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) config = tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True)) labels_per_frame = [] boxes_per_frame = [] cpu_usage_dump = "" mem_usage_dump = "" time_usage_dump = "" if ros_enabled: from utils.ros_op import DetectionPublisher, CameraSubscriber pub = DetectionPublisher() sub = CameraSubscriber() if graph_trace_enabled: from tensorflow.python.client import timeline if usage_check: timer = Timer() logger.info("Initial startup") cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage( cpu_usage_dump, mem_usage_dump, time_usage_dump, timer) if ros_enabled: if not sub.is_running(): return Exception("[ERROR: Camera Node not running]") else: vid = WebcamVideoStream(src=video_path).start() r, c = vid.get_dimensions() logger.debug("Frame width: {} height: {}".format(r, c)) if write_output: trackedVideo = cv2.VideoWriter( 'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0, (c, r)) record = open("record.txt", "w") count = 0 # Detection with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=config) as sess: options = None run_metadata = None # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Using the split model hack if score_node is not None and expand_node is not None: score_out = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores:0') expand_out = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1:0') score_in = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores_1:0') expand_in = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1_1:0') if usage_check: fps = FPS().start() if graph_trace_enabled: options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # Read video frame by frame and perform inference while (vid.is_running()): try: # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. logger.debug("Frame {}".format(count)) retval, curr_frame = vid.read() if not retval: logger.info("Video ending at frame {}".format(count)) break if show_window: if cv2.waitKey(1) & 0xFF == ord('q'): break # Expand dimensions since the model expects images to have shape: [1, None, None, 3] curr_frame_expanded = np.expand_dims(curr_frame, axis=0) curr_frame_expanded = np.int8(curr_frame_expanded) # Actual detection. start = time.time() if score_node is None and expand_node is None: (boxes, scores, classes) = sess.run( [ detection_boxes, detection_scores, detection_classes ], feed_dict={image_tensor: curr_frame_expanded}, options=options, run_metadata=run_metadata) if graph_trace_enabled: write_trace(run_metadata, timeline, "graph_timeline_nosplit.json") else: # Split Detection in two sessions. (score, expand) = sess.run( [score_out, expand_out], feed_dict={image_tensor: curr_frame_expanded}, options=options, run_metadata=run_metadata) if graph_trace_enabled: write_trace(run_metadata, timeline, "graph_timeline_conv.json") (boxes, scores, classes) = sess.run([ detection_boxes, detection_scores, detection_classes ], feed_dict={ score_in: score, expand_in: expand }, options=options, run_metadata=run_metadata) if graph_trace_enabled: write_trace(run_metadata, timeline, "graph_timeline_nms.json") end = time.time() if usage_check: fps.update() logger.info("Session run time: {:.4f}".format(end - start)) logger.info("Frame {}".format(count)) cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage( cpu_usage_dump, mem_usage_dump, time_usage_dump, timer) (r, c, _) = curr_frame.shape logger.debug("image height:{}, width:{}".format(r, c)) # get boxes that pass the min requirements and their pixel coordinates filtered_boxes = parse_tf_output(curr_frame.shape, boxes, scores, classes) if ros_enabled: # TODO: Send the detected info to other systems every frame logger.info("Publishing bboxes") logger.info("".join([str(i) for i in filtered_boxes])) pub.send_boxes(filtered_boxes) if write_output: record.write(str(count) + "\n") for i in range(len(filtered_boxes)): record.write("{}\n".format(str(filtered_boxes[i]))) # Visualization of the results of a detection. if visualize: drawn_img = overlay(curr_frame, category_index, filtered_boxes) if show_window: window_name = "stream" cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.imshow(window_name, drawn_img) if write_output: trackedVideo.write(drawn_img) else: logger.info("".join([str(i) for i in filtered_boxes])) count += 1 # Quick benchmarking if timer.get_elapsed_time() >= 60: break except KeyboardInterrupt: logger.info("Ctrl + C Pressed. Attempting graceful exit") break if usage_check: fps.stop() logger.info("[USAGE] elasped time: {:.2f}".format(fps.elapsed())) logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps())) logger.info("[USAGE] inferenced frames: {}".format(fps.get_frames())) logger.info("[USAGE] raw frames: {}".format(vid.get_raw_frames())) logger.info("[USAGE] Total Time elapsed: {:.2f} seconds".format( timer.get_elapsed_time())) with open("cpu_usage.txt", "w") as c: c.write(cpu_usage_dump) with open("mem_usage.txt", "w") as m: m.write(mem_usage_dump) with open("time_usage.txt", "w") as t: t.write(time_usage_dump) vid.stop() logger.debug("Result: {} frames".format(count)) if visualize: cv2.destroyAllWindows() if write_output: record.close() trackedVideo.release() return labels_per_frame, boxes_per_frame
# # draw the bounding box of the face along with the associated # # probability # text = "{:.2f}%".format(confidence * 100) # y = startY - 10 if startY - 10 > 10 else startY + 10 # cv2.rectangle(frame, (startX, startY), (endX, endY), # (0, 0, 255), 2) # cv2.putText(frame, text, (startX, y), # cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2) # show the output frame cv2.imshow("Didux.io", frame) cv2.setWindowProperty('Didux.io', cv2.WND_PROP_ASPECT_RATIO, cv2.WINDOW_FREERATIO) cv2.setWindowProperty('Didux.io', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) fps.update() if cv2.waitKey(30) & 0xFF == ord('q'): break if fps._numFrames < args["num_frames"]: fps.update() if fps._numFrames == args["num_frames"]: # stop the timer and display FPS information fps.stop() print("[INFO] elasped time: {:.2f}".format(fps.elapsed())) print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) fps.start() cv2.destroyAllWindows() cv2.waitKey(1) vs.stop()
def run_detection(video_path, model_path, model_name, weights_path, classes, show_window=True, visualize=True, write_output=False, is_cpu=False, ros_enabled=False, usage_check=False): # Calling the class of the model spec = importlib.util.spec_from_file_location(model_name, model_path) mod = importlib.util.module_from_spec(spec) spec.loader.exec_module(mod) Net = getattr(mod, model_name) model = Net(classes) model = torch.nn.DataParallel(model) model = load_my_state_dict(model, torch.load(weights_path)) model.eval() cpu_usage_dump = "" mem_usage_dump = "" time_usage_dump = "" if not is_cpu: model = model.cuda() else: return Exception("[ERROR: CPU mode not implemented]") if usage_check: timer = Timer() logger.info("Initial startup") cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage( cpu_usage_dump, mem_usage_dump, time_usage_dump, timer) if ros_enabled: if not sub.is_running(): return Exception("[ERROR: Camera Node not running]") else: vid = WebcamVideoStream(src=video_path).start() r, c = vid.get_dimensions() logger.info("Video frame width: {} height: {}".format(r, c)) if write_output: trackedVideo = cv2.VideoWriter( 'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0, (c, r)) record = open("record.txt", "w") count = 0 if usage_check: fps = FPS().start() # Read video frame by frame and perform inference while (vid.is_running()): try: # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. logger.debug("Frame {}".format(count)) retval, curr_frame = vid.read() if not retval: logger.info("Video ending at frame {}".format(count)) break if show_window: if cv2.waitKey(1) & 0xFF == ord('q'): break start = time.time() # Convert numpy img to PyTorch Tensor, then expand dimension for model convert = Compose([ToTensor()]) img_tensor = convert(curr_frame) img_tensor = img_tensor.unsqueeze(0) con = time.time() logger.debug("img conversion time: {:.4f}".format(con - start)) if (not is_cpu): image = img_tensor.cuda() inputs = Variable(image, volatile=True) outputs = model(inputs) out = time.time() logger.debug("output time: {:.4f}".format(out - con)) # Visualization only works on cpu tensor if visualize: label = outputs[0].max( 0)[1].byte().cpu().data # Mask to be published else: label = outputs[0].max( 0)[1].byte().data # Mask to be published l = time.time() logger.debug("labeling time: {:.4f}".format(l - out)) end = time.time() if usage_check: fps.update() logger.info("Session run time: {:.4f}".format(end - start)) logger.info("Frame {}".format(count)) cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage( cpu_usage_dump, mem_usage_dump, time_usage_dump, timer) # TODO: Publish Segmentation if ros_enabled: logger.info("Publishing segmengatation via ROS") else: logger.info("Publishing segmentation via custom module") # Visualization of the results of a detection. if visualize: # Visualizes based off of cityscape classes; this step takes a ton of time! label_color = Colorize()(label.unsqueeze(0)) label_color = np.moveaxis(label_color.numpy(), 0, -1) label_color = label_color[..., ::-1] vis = time.time() logger.debug("visualization time: {:.4f}".format(vis - end)) if show_window: window_name = "stream" cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.imshow(window_name, label_color) if write_output: trackedVideo.write(label_color) count += 1 # # Quick benchmarking # if timer.get_elapsed_time() >= 60: # break except KeyboardInterrupt: logger.info("Ctrl + C Pressed. Attempting graceful exit") break if usage_check: fps.stop() logger.info("[USAGE] elasped time: {:.2f}".format(fps.elapsed())) logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps())) logger.info("[USAGE] inferenced frames: {}".format(fps.get_frames())) logger.info("[USAGE] raw frames: {}".format(vid.get_raw_frames())) logger.info("[USAGE] Total Time elapsed: {:.2f} seconds".format( timer.get_elapsed_time())) with open("cpu_usage.txt", "w") as c: c.write(cpu_usage_dump) with open("mem_usage.txt", "w") as m: m.write(mem_usage_dump) with open("time_usage.txt", "w") as t: t.write(time_usage_dump) vid.stop() logger.debug("Result: {} frames".format(count)) if visualize: cv2.destroyAllWindows() if write_output: record.close() trackedVideo.release()
def detect(self): #TODO: make a generalized detection workflow labels_per_frame = [] boxes_per_frame = [] if self.benchmark: from benchmark.usage import Timer, UsageTracker self.logger.info("Initial startup") timer = Timer() usage = UsageTracker(timer) usage.get_usage() self.logger.debug("Frame width: {} height: {}".format( self.width, self.height)) if self.write_output: self.trackedVideo = cv2.VideoWriter( 'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0, (self.width, self.height)) self.record = open("record.txt", "w") count = 0 if self.benchmark: fps = FPS().start() while self.feed.is_running: try: # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. self.logger.debug("Frame {}".format(count)) retval, curr_frame = self.feed.read() if not retval: self.logger.info("Video ending at frame {}".format(count)) break if self.show_stream: if cv2.waitKey(1) & 0xFF == ord('q'): break # Actual detection. start = time.time() output = self.model.inference(curr_frame) end = time.time() if self.task == "segmentation" and self.library == "pytorch": mask = output.data if self.benchmark: fps.update() self.logger.info("Session run time: {:.4f}".format(end - start)) self.logger.info("Frame {}".format(count)) usage.get_usage() # TODO: Publish Output if self.ros_enabled: self.logger.info("Publishing via ROS") else: self.logger.info("Publishing via custom module") if self.show_stream: #TODO: set which type of visualization to use based on task if self.task == "segmentation" and self.library == "pytorch": vis_output = output.cpu().data self._visualize(self.task, vis_output) count += 1 # Quick benchmarking if timer.get_elapsed_time() >= 60: break except KeyboardInterrupt: self.logger.info("Ctrl + C Pressed. Attempting graceful exit") break if self.benchmark: fps.stop() self.logger.info("[USAGE] elasped time: {:.2f}".format( fps.elapsed())) self.logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps())) self.logger.info("[USAGE] inferenced frames: {}".format( fps.get_frames())) self.logger.info("[USAGE] raw frames: {}".format( self.feed.get_raw_frames())) self.logger.info( "[USAGE] Total Time elapsed: {:.2f} seconds".format( timer.get_elapsed_time())) usage.dump_usage() self.feed.stop() self.logger.debug("Result: {} frames".format(count)) if self.show_stream: cv2.destroyAllWindows() if self.write_output: self.record.close() self.trackedVideo.release()
class WebcamVideoStream: def __init__(self, src=0, res=None): # initialize the video camera stream and read the first frame # from the stream self.stream = cv2.VideoCapture(src) if not self.stream.isOpened(): raise Exception("Video/Camera device not found at: {}".format(src)) (self.grabbed, self.frame) = self.stream.read() self.resize = None res = (480, 480) # res is a tuple of (width, height) if res is not None: self.resize = res # initialize the variable used to indicate if the thread should # be stopped self.stopped = False self.f = FPS() self.f.start() def start(self): # start the thread to read frames from the video stream t = Thread(target=self.update, args=()) t.daemon = True t.start() return self def update(self): # keep looping infinitely until the thread is stopped while True: # if the thread indicator variable is set, stop the thread if self.stopped: return # otherwise, read the next frame from the stream (self.grabbed, self.frame) = self.stream.read() self.f.update() def read(self): # return the frame most recently read if self.resize is not None: self.frame = cv2.resize(self.frame, self.resize) return self.grabbed, self.frame def stop(self): # indicate that the thread should be stopped self.stopped = True self.f.stop() # TODO: Weird error "VIDIOC_DQBUF: Invalid argument" self.stream.release() def get_dimensions(self): if self.resize is not None: return self.resize[0], self.resize[1] c = int(self.stream.get(3)) r = int(self.stream.get(4)) return r, c def get_raw_frames(self): return self.f.get_frames() def is_running(self): if self.stopped: return False else: return True