def parse_argv(argv): #The following img.png is taken from # 'https://user-images.githubusercontent.com/11736571/77320690-099af300-6d37-11ea-9d86-24f14dc2d540.png' input_image_path = "./images/img.png" output_image_dir = None str_filters = None filters = None if len(argv) >= 2: input_image_path = argv[1] if len(argv) >= 3: output_image_dir = argv[2] if len(argv) == 4: # Specify a string like this [person,motorcycle] or "[person,motorcycle]" , str_filters = argv[3] filtersParser = FiltersParser(str_filters, fp.COCO_CLASSES) filters = filtersParser.get_filters() print(filters) if not os.path.exists(input_image_path): print("Not found {}".format(input_image_path)) raise Exception("Not found {}".format(input_image_path)) if not os.path.exists(output_image_dir): os.makedirs(output_image_dir) return (input_image_path, output_image_dir, filters)
def parse_args(argv): input_image_path = None #"images/img.png" output_image_dir = None filters = None frozen_graph_path = None label_path = None if len(argv) >= 2: input_image_path = argv[1] if len(argv) >= 3: output_image_dir = argv[2] if len(argv) >= 4: str_filters = argv[3] filtersParser = FiltersParser(str_filters) filters = filtersParser.get_filters() if len(argv) >= 5: frozen_graph_path = argv[4] if len(argv) == 6: label_path = argv[5] if not os.path.exists(input_image_path): raise Exception("Not found input_image_path {}".format(input_image_path)) output_image_dir = os.path.join(os.getcwd(), output_image_dir) if not os.path.exists(output_image_dir): os.makedirs(output_image_dir) return (input_image_path, output_image_dir, filters, frozen_graph_path, label_path)
def detect(self, image_filepath, output_image_dir, filename_prefix, filters=None): #OpenCV image in BGR format image = cv2.imread(image_filepath) predictions = self.predictor(image) #Convert image to RGB format image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) catalog = MetadataCatalog.get(self.config.DATASETS.TRAIN[0]) (vis_output, detected_objects, objects_stats) = self.visualize(filters, predictions, image, catalog, 1.2, self.instance_mode) image = vis_output.get_image() image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if output_image_dir is not None: if filters is None: filters = "" filtersParser = FiltersParser(str(filters)) filtered_image_filepath = filtersParser.get_ouput_filename( image_filepath, output_image_dir) filtered_image_filename = os.path.basename(filtered_image_filepath) #basename = os.path.basename(image_filepath) prefixed_filename = filename_prefix + filtered_image_filename output_filepath = os.path.join(output_image_dir, prefixed_filename) cv2.imwrite(output_filepath, image) print("Saved detected image to {}".format(output_filepath)) CSV = ".csv" STATS = "_stats" detected_objects_path = output_filepath + CSV objects_stats_path = output_filepath + STATS + CSV self.save_detected_objects(detected_objects, detected_objects_path) self.save_objects_stats(objects_stats, objects_stats_path) else: cv2.imshow('Results', image)
def detect(self, filename, filters): filtersParser = FiltersParser(self.class_names) filters = filtersParser.parse(filters) self.NL = "\n" self.SEP = "," image = cv2.imread(filename) height, width, channels = image.shape[:3] # Darknet doesn't accept numpy images. # Create one with image we reuse for each detect darknet_image = darknet.make_image(width, height, 3) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_resized = cv2.resize(image_rgb, (width, height), interpolation=cv2.INTER_LINEAR) darknet.copy_image_from_bytes(darknet_image, image_resized.tobytes()) detections = darknet.detect_image(self.network, self.class_names, darknet_image, thresh=self.THRESHOLD) darknet.free_image(darknet_image) drawer = DetectedObjectDrawer() image, objects_detail, objects_stats = drawer.draw_boxes_with_filters( detections, image_resized, self.class_colors, filters) rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) sfilters = self.filters_to_string(filters) out_filename = self.output_dir + "/" + sfilters + os.path.basename( filename) cv2.imwrite(out_filename, rgb_image) print("=== Saved image file {}".format(out_filename)) self.save_detected_objects(out_filename, objects_detail) self.save_objects_stats(out_filename, objects_stats)
def visualize(self, filters, path, img, im0s, pred): save_txt = True save_img = True view_img = False t2 = time_synchronized() print("visualize ") for i, det in enumerate(pred): # detections per image if self.webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(self.output) / Path(p).name) txt_path = str(Path(self.output) / Path(p).stem) + ( '_%g' % self.dataset.frame if self.dataset.mode == 'video' else '') #s += '%gx%g ' % img.shape[2:] # print string s = "" imsize = img.shape[2:] print("Image_shape {}".format(imsize)) gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class class_name = self.names[int(c)] #2020/09/01 #Apply the filters if filters != None: if class_name in filters: s += '%ss, %g \n' % (self.names[int(c)], n ) # add to string else: s += '%ss, %g \n' % (self.names[int(c)], n ) # add to string # Write results i = 1 detected_objects = [] for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh #with open(txt_path + '.csv', 'w') as f: name = self.names[int(cls)] cf = '%.2f' % (conf) c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])) #print("{} {}".format(c1, c2)) x, y = c1 x2, y2 = c2 w = x2 - x h = y2 - y if save_img or view_img: # Add bbox to image class_name = self.names[int(cls)] #label = '%s %.2f' % (self.names[int(cls)], conf) #2020/09/01 Apply filters if filters != None: if class_name in filters: label = '%s %s' % (i, class_name) data = "{}, {}, {}, {}, {}, {}, {}\n".format( str(i), name, str(cf), x, y, w, h) detected_objects.append(data) #print("class_name '{}' filters {}".format(class_name, filters)) plot_one_box(xyxy, im0, label=label, color=self.colors[int(cls)], line_thickness=3) i += 1 else: pass else: label = '%s %s' % (i, class_name) data = "{}, {}, {}, {}, {}, {}, {}\n".format( str(i), name, str(cf), x, y, w, h) detected_objects.append(data) plot_one_box(xyxy, im0, label=label, color=self.colors[int(cls)], line_thickness=3) i += 1 filters_name = "" if filters is not None: parser = FiltersParser(str(filters)) filters_name = parser.get_filters_name() #print("text_path {}".format(txt_path)) arr = os.path.split(txt_path) #print(arr) #filtered_filename = filters_name + arr[1] filtered_output_path = txt_path + filters_name # os.path.join(arr[0], filtered_filename) print("Output file path {}".format(filtered_output_path)) saved_objects_csvfile = self.save_detected_objects_as_csvfile( filtered_output_path, detected_objects) saved_stats_csvfile = self.save_stats_as_csvfile( filtered_output_path, s) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - self.t1)) # Save results (image with detections) dir, basename = os.path.split(save_path) basename_without_ext, ext = os.path.splitext(basename) filtered_image_filename = basename_without_ext + filters_name + ext saved_image_file = os.path.join(dir, filtered_image_filename) if save_img: if self.dataset.mode == 'images': print("Saved image to {}".format(saved_image_file)) cv2.imwrite(saved_image_file, im0) else: if vid_path != saved_image_file: # new video vid_path = saved_image_file if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( saved_image_file, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) #save_txt = True saved_triple_files = (saved_image_file, saved_objects_csvfile, saved_stats_csvfile) return saved_triple_files
source = None output = None filters = None # classnames_list something like this "[person,car]" if len(sys.argv) >= 2: source = sys.argv[1] if len(sys.argv) >= 3: output = sys.argv[2] if not os.path.exists(output): os.makedirs(output) if len(sys.argv) == 4: str_filters = sys.argv[3] filtersParser = FiltersParser(str_filters) filters = filtersParser.get_filters() print("source {}".format(source)) print("output {}".format(output)) print("filters {}".format(filters)) saved_triple_files = None with torch.no_grad(): detector = Yolov5ObjectDetector() saved_triple_files = detector.detect(filters, source, output) except: traceback.print_exc()
def detect(self, image_path, image_output_dir, filters=None): image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = self.load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. #output_dict = run_inference_for_single_image(image_np, detection_graph) with self.detection_graph.as_default(): with tf.compat.v1.Session() as sess: # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ self.NUM_DETECTIONS, self.DETECTION_CLASSES, self.DETECTION_BOXES, self.DETECTION_MASKS, self.DETECTION_SCORES, ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if self.DETECTION_MASKS in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict[self.DETECTION_BOXES], [0]) detection_masks = tf.squeeze(tensor_dict[self.DETECTION_MASKS], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict[self.NUM_DETECTIONS][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_np.shape[0], image_np.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict[self.DETECTION_MASKS] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') # Run inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image_np, 0)}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict[self.NUM_DETECTIONS] = int(output_dict[self.NUM_DETECTIONS][0]) output_dict[self.DETECTION_CLASSES] = output_dict[self.DETECTION_CLASSES][0].astype(np.uint8) output_dict[self.DETECTION_BOXES] = output_dict[self.DETECTION_BOXES][0] output_dict[self.DETECTION_SCORES] = output_dict[self.DETECTION_SCORES][0] if self.DETECTION_MASKS in output_dict: output_dict[self.DETECTION_MASKS] = output_dict[self.DETECTION_MASKS][0] filename_only = self.get_filename_only(image_path) output_image_filepath = os.path.join(image_output_dir, filename_only) print("filters {}".format(filters)) if filters is not None: parser = FiltersParser(str(filters)) output_image_filepath = parser.get_ouput_filename(image_path, image_output_dir) print(output_image_filepath) # Draw detected boxes, classes, scores onto image_np, # and save it to the output_image_filepath self.visualize(filters, image_np, output_dict, output_image_filepath)