# Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box
def main(): # If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow pkg = importlib.util.find_spec('tensorflow') if pkg is None: from tflite_runtime.interpreter import Interpreter else: from tensorflow.lite.python.interpreter import Interpreter args = getParameters() MODEL_NAME = args.modeldir GRAPH_NAME = args.graph LABELMAP_NAME = args.labels SLEEP_TIME = args.sleep CAMERA_IP = args.cameraip SHOW_LOG = args.showlog MIN_CONF_THRESHOLD = args.threshold resW, resH = args.resolution.split('x') imW, imH = int(resW), int(resH) # Get path to current working directory CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del(labels[0]) # Load the Tensorflow Lite model and get details interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream( resolution=(imW, imH), framerate=30, camera_ip=CAMERA_IP).start() time.sleep(1) # Start time startTime = time.time() # Mean of people people_mean = 0 while True: # Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results # Bounding box coordinates of detected objects boxes = interpreter.get_tensor(output_details[0]['index'])[0] classes = interpreter.get_tensor(output_details[1]['index'])[ 0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]['index'])[ 0] # Confidence of detected objects # Current people detected current_num_people = 0 # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if scores[i] > MIN_CONF_THRESHOLD and scores[i] <= 1.0 and labels[int(classes[i])] == 'person': current_num_people += 1 # Update people mean people_mean = (people_mean + current_num_people) / 2 if SHOW_LOG: print("Current people mean: " + str(people_mean)) # If should send information to Firebase if time.time() - startTime > SLEEP_TIME: # Update start time startTime = time.time() sendDataToFirebase(round(people_mean)) # Clean up videostream.stop()
class Detector: """ Perform object detection with the given model. The model is a quantized tflite file which if the detector can not find it at the path it will download it from neuralet repository automatically. :param config: Is a ConfigEngine instance which provides necessary parameters. """ def __init__(self, config, model_name, variables): self.config = config self.model_name = model_name self.model_variables = variables # Frames Per Second self.fps = None self.model_file = 'ped_ssd_mobilenet_v2_quantized_edgetpu.tflite' self.model_path = '/repo/data/edgetpu/' + self.model_file # Get the model .tflite file path from the config. # If there is no .tflite file in the path it will be downloaded automatically from base_url user_model_path = self.model_variables['ModelPath'] if len(user_model_path) > 0: print('using %s as model' % user_model_path) self.model_path = user_model_path else: base_url = 'https://media.githubusercontent.com/media/neuralet/neuralet-models/master/edge-tpu/' url = base_url + self.model_name + '/' + self.model_file if not os.path.isfile(self.model_path): print('model does not exist under: ', self.model_path, 'downloading from ', url) wget.download(url, self.model_path) # Load TFLite model and allocate tensors device_id = self.config.get_section_dict("Detector").get("DeviceId") if device_id: self.interpreter = Interpreter( self.model_path, experimental_delegates=[load_delegate("libedgetpu.so.1", options={"device": device_id})] ) else: self.interpreter = Interpreter(self.model_path, experimental_delegates=[load_delegate("libedgetpu.so.1")]) self.interpreter.allocate_tensors() # Get the model input and output tensor details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() # Get class id from config self.class_id = int(self.model_variables['ClassID']) self.score_threshold = float(self.model_variables['MinScore']) def inference(self, resized_rgb_image): """ inference function sets input tensor to input image and gets the output. The interpreter instance provides corresponding detection output which is used for creating result Args: resized_rgb_image: uint8 numpy array with shape (img_height, img_width, channels) Returns: result: a dictionary contains of [{"id": 0, "bbox": [x1, y1, x2, y2], "score":s%}, {...}, {...}, ...] """ input_image = np.expand_dims(resized_rgb_image, axis=0) # Fill input tensor with input_image self.interpreter.set_tensor(self.input_details[0]["index"], input_image) t_begin = time.perf_counter() self.interpreter.invoke() inference_time = time.perf_counter() - t_begin # Second self.fps = convert_infr_time_to_fps(inference_time) # The function `get_tensor()` returns a copy of the tensor data. # Use `tensor()` in order to get a pointer to the tensor. boxes = self.interpreter.get_tensor(self.output_details[0]['index']) labels = self.interpreter.get_tensor(self.output_details[1]['index']) scores = self.interpreter.get_tensor(self.output_details[2]['index']) # TODO: will be used for getting number of objects # num = self.interpreter.get_tensor(self.output_details[3]['index']) result = [] for i in range(boxes.shape[1]): # number of boxes if labels[0, i] == self.class_id and scores[0, i] > self.score_threshold: result.append({"id": str(self.class_id) + '-' + str(i), "bbox": boxes[0, i, :], "score": scores[0, i]}) return result
# object_name2 = door_labels[int(door_classes[i])] # Look up object name from "labels" array using class index # door_label = '%s: %d%%' % (object_name2, int(door_scores[i]*100)) # Example: 'person: 72%' # door_labelSize, door_baseLine = cv2.getTextSize(door_label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size # door_label_ymin = max(door_ymin, door_labelSize[1] + 10) # Make sure not to draw label too close to top of window # cv2.rectangle(frame, (door_xmin, door_label_ymin-door_labelSize[1]-10), (door_xmin+door_labelSize[0], door_label_ymin+door_baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in # cv2.putText(frame, door_label, (door_xmin, door_label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # ''' # object_name2 = door_labels[int(door_classes[i])] # Look up object name from "labels" array using class index # if object_name2=='open': # door=1 # #loitering # loitering = loitering_prediction(loitering_interpreter,loitering_input_details,loitering_output_details,frame,CLASSIFIER_CKPT) animal_interpreter.set_tensor(input_details[0]['index'], input_data) animal_interpreter.invoke() # Retrieve detection results animal_boxes = animal_interpreter.get_tensor( animal_output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects animal_classes = animal_interpreter.get_tensor( animal_output_details[1]['index'])[ 0] # Class index of detected objects animal_scores = animal_interpreter.get_tensor( animal_output_details[2]['index'])[ 0] # Confidence of detected objects animal_output_data = animal_interpreter.get_tensor( animal_output_details[0]['index']) num = animal_interpreter.get_tensor(animal_output_details[3]['index'])[ 0] # Total number of detected objects (inaccurate and not needed)
if len(sys.argv) < 3: print('Usage:', sys.argv[0], '<model_path> <test_image_dir>') exit() model_path = str(sys.argv[1]) # Creates tflite interpreter if 'edgetpu' in model_path: interpreter = Interpreter(model_path, experimental_delegates=[ load_delegate(EDGETPU_SHARED_LIB)]) else: interpreter = Interpreter(model_path) interpreter.allocate_tensors() interpreter.invoke() # warmup input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() width = input_details[0]['shape'][2] height = input_details[0]['shape'][1] def run_inference(interpreter, image): interpreter.set_tensor(input_details[0]['index'], image) interpreter.invoke() boxes = interpreter.get_tensor(output_details[0]['index'])[0] classes = interpreter.get_tensor(output_details[1]['index'])[0] scores = interpreter.get_tensor(output_details[2]['index'])[0] # num_detections = interpreter.get_tensor(output_details[3]['index'])[0] return boxes, classes, scores
class ObjectDetector(object): def __init__(self, model_path, label_path, use_coral_flag, use_tpu_flag, res_x, res_y, min_conf_threshold): self.res_y = res_y self.res_x = res_x self.use_coral_flag = use_coral_flag if use_coral_flag: from edgetpu.detection.engine import DetectionEngine from edgetpu.utils import dataset_utils self.min_conf_threshold = min_conf_threshold # Load the label map with open(label_path, 'r') as f: self.labels = [line.strip() for line in f.readlines()] if self.labels[0] == '???': del (self.labels[0]) if use_tpu_flag: self.interpreter = Interpreter( model_path=model_path, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) else: self.interpreter = Interpreter(model_path=model_path) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.is_floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 #Coral if use_coral_flag: self.engine = DetectionEngine(model_path) self.labels = dataset_utils.read_label_file(label_path) _, height, width, _ = self.engine.get_input_tensor_shape() def apply_coral_model(self, input_data): print("here") ans = self.engine.detect_with_input_tensor(input_data, threshold=0.05, top_k=10) print("here2") for obj in ans: if self.labels: print(self.labels[obj.label_id]) print('score = ', obj.score) box = obj.bounding_box.flatten().tolist() print('box = ', box) def apply_tflite_model(self, input_data): # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() # Retrieve detection results boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[ 0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[ 0] # Confidence of detected objects return (boxes, classes, scores) def process_frame(self, frame): frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.is_floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std if self.use_coral_flag: self.apply_coral_model(input_data) scores = [] else: (boxes, classes, scores) = self.apply_tflite_model(input_data) return (frame, boxes, classes, scores) def is_interesting_object(self, scores, classes): is_interesting_object = False interesting_classes = [] for i in range(len(scores)): if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)): is_interesting_object = True interesting_classes.append(self.labels[int(classes[i])]) return is_interesting_object, interesting_classes def draw_frame(self, frame, boxes, classes, scores): # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * self.res_y))) xmin = int(max(1, (boxes[i][1] * self.res_x))) ymax = int(min(self.res_y, (boxes[i][2] * self.res_y))) xmax = int(min(self.res_x, (boxes[i][3] * self.res_x))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 4) # Draw label object_name = self.labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text (flag, encodedImage) = cv2.imencode(".jpg", frame) return encodedImage
class TFInferenceEngine: """Thin wrapper around TFLite Interpreter. The official TFLite API is moving fast and still changes frequently. This class intends to abstract out underlying TF changes to some extend. It dynamically detects if EdgeTPU is available and uses it. Otherwise falls back to TFLite Runtime. """ def __init__(self, model=None, labels=None, confidence_threshold=0.8, top_k=10): """Create an instance of Tensorflow inference engine. :Parameters: ---------- model: dict { 'tflite': path, 'edgetpu': path, } Where path is of type string and points to the location of frozen graph file (AI model). labels : string Location of file with model labels. confidence_threshold : float Inference confidence threshold. top_k : type Inference top-k threshold. """ assert model assert model['tflite'], 'TFLite AI model path required.' model_tflite = model['tflite'] assert os.path.isfile(model_tflite), \ 'TFLite AI model file does not exist: {}' \ .format(model_tflite) self._model_tflite_path = model_tflite model_edgetpu = model.get('edgetpu', None) if model_edgetpu: assert os.path.isfile(model_edgetpu), \ 'EdgeTPU AI model file does not exist: {}' \ .format(model_edgetpu) self._model_edgetpu_path = model_edgetpu assert labels, 'AI model labels path required.' assert os.path.isfile(labels), \ 'AI model labels file does not exist: {}' \ .format(labels) self._model_labels_path = labels self._confidence_threshold = confidence_threshold self._top_k = top_k log.debug( 'Loading AI model:\n' 'TFLite graph: %r\n' 'EdgeTPU graph: %r\n' 'Labels %r.' 'Condidence threshod: %.0f%%' 'top-k: %d', model_tflite, model_edgetpu, labels, confidence_threshold * 100, top_k) # EdgeTPU is not available in testing and other environments # load dynamically as needed # edgetpu_class = 'DetectionEngine' # module_object = import_module('edgetpu.detection.engine', # packaage=edgetpu_class) # target_class = getattr(module_object, edgetpu_class) self._tf_interpreter = _get_edgetpu_interpreter(model=model_edgetpu) if not self._tf_interpreter: log.debug('EdgeTPU not available. Will use TFLite CPU runtime.') self._tf_interpreter = Interpreter(model_path=model_tflite) assert self._tf_interpreter self._tf_interpreter.allocate_tensors() # check the type of the input tensor self._tf_input_details = self._tf_interpreter.get_input_details() self._tf_output_details = self._tf_interpreter.get_output_details() self._tf_is_quantized_model = \ self.input_details[0]['dtype'] != np.float32 @property def input_details(self): return self._tf_input_details @property def output_details(self): return self._tf_output_details @property def is_quantized(self): return self._tf_is_quantized_model @property def labels_path(self): """ Location of labels file. :Returns: ------- string Path to AI model labels. """ return self._model_labels_path @property def confidence_threshold(self): """ Inference confidence threshold. :Returns: ------- float Confidence threshold for inference results. Only results at or above this threshold should be returned by each engine inference. """ return self._confidence_threshold @property def top_k(self): """ Inference top-k threshold. :Returns: ------- int Max number of results to be returned by each inference. Ordered by confidence score. """ return self._top_k def infer(self): """Invoke model inference on current input tensor.""" return self._tf_interpreter.invoke() def set_tensor(self, index=None, tensor_data=None): """Set tensor data at given reference index.""" assert isinstance(index, int) self._tf_interpreter.set_tensor(index, tensor_data) def get_tensor(self, index=None): """Return tensor data at given reference index.""" assert isinstance(index, int) return self._tf_interpreter.get_tensor(index)
class PalmDetection: def __init__(self, palm_model_path, anchors_path): self.interp_palm = Interpreter(palm_model_path) self.interp_palm.allocate_tensors() output_details = self.interp_palm.get_output_details() input_details = self.interp_palm.get_input_details() self.in_idx = input_details[0]['index'] self.out_reg_idx = output_details[0]['index'] self.out_clf_idx = output_details[1]['index'] # reading the SSD anchors with open(anchors_path, "r") as csv_f: self.anchors = np.r_[[ x for x in csv.reader(csv_f, quoting=csv.QUOTE_NONNUMERIC) ]] # 90° rotation matrix used to create the alignment trianlge self.R90 = np.r_[[[0, 1], [-1, 0]]] # trianlge target coordinates used to move the detected hand # into the right position self._target_triangle = np.float32([[128, 128], [128, 0], [0, 128]]) self._target_box = np.float32([ [0, 0, 1], [256, 0, 1], [256, 256, 1], [0, 256, 1], ]) @staticmethod def _sigm(x): return 1 / (1 + np.exp(-x)) @staticmethod def _im_normalize(img): return np.ascontiguousarray(2 * ((img / 255) - 0.5).astype('float32')) def preprocess_img(self, img): # fit the image into a 256x256 square shape = np.r_[img.shape] self.rgb_shape = shape pad = (shape.max() - shape[:2]).astype('uint32') // 2 img_pad = np.pad(img, ((pad[0], pad[0]), (pad[1], pad[1]), (0, 0)), mode='constant') img_small = cv2.resize(img_pad, (256, 256)) img_small = np.ascontiguousarray(img_small) img_norm = self._im_normalize(img_small) return img_pad, img_norm, pad def predict_hand_boxes(self, img_norm, pad): self.interp_palm.set_tensor(self.in_idx, img_norm.reshape(1, 256, 256, 3)) self.interp_palm.invoke() out_reg = self.interp_palm.get_tensor(self.out_reg_idx)[0] # bbox out_clf = self.interp_palm.get_tensor(self.out_clf_idx)[0, :, 0] # scores detecion_mask = self._sigm(out_clf) > 0.7 print(np.sum(detecion_mask)) candidate_detect = out_reg[detecion_mask] candidate_anchors = self.anchors[detecion_mask] if candidate_detect.shape[0] == 0: return None keep = nms(candidate_detect, 0.5) bboex = [] for idx in keep: dx, dy, w, h = candidate_detect[idx, :4] center_wo_offst = candidate_anchors[idx, :2] * 256 dx += center_wo_offst[0] - pad[1] dy += center_wo_offst[1] - pad[0] bboex.append((dx, dy, w, h)) print('keep', keep) return bboex def __call__(self, img): img_pad, img_norm, pad = self.preprocess_img(img) return self.predict_hand_boxes(img_norm, pad)
class object_detector: def __init__(self): PATH_TO_CKPT = rospy.get_param("/object_detector/weights_path") PATH_TO_LABELS = rospy.get_param("/object_detector/labels_path") camera_input = rospy.get_param("/object_detector/cam_feed") use_tpu = int(rospy.get_param("/object_detector/tpu")) self.min_conf_threshold = float( rospy.get_param("/object_detector/threshold")) self.imW = int(rospy.get_param("/object_detector/imW")) self.imH = int(rospy.get_param("/object_detector/imH")) pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_tpu: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_tpu: from tensorflow.lite.python.interpreter import load_delegate if use_tpu: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] if self.labels[0] == '???': del (self.labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_tpu: self.interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) else: self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 # Initialize frame rate calculation self.frame_rate_calc = 1 self.freq = cv2.getTickFrequency() self.image_pub = rospy.Publisher("/detected_image", Image, queue_size=10) self.bridge = CvBridge() self.image_sub = rospy.Subscriber(camera_input, Image, self.callback) def callback(self, data): t1 = cv2.getTickCount() try: cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") except CvBridgeError as e: print(e) frame = cv_image.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() # Retrieve detection results boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[ 0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[ 0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) output = [ x for x in zip(classes, boxes, scores) if x[2] > self.min_conf_threshold and x[2] <= 1.0 ] print("Output", output)
class ImageCapture(Thread): def __init__(self): super().__init__() self.logger = logging.getLogger(__name__) self.logger.debug('Init image capture') self.WIDTH=640 self.HEIGHT=480 # Initialize the camera self.camera = PiCamera() # Set the camera resolution self.camera.resolution = (self.WIDTH, self.HEIGHT) # Set the number of frames per second self.camera.framerate = 32 # Generates a 3D RGB array and stores it in rawCapture self.raw_capture = PiRGBArray(self.camera, size=(self.WIDTH, self.HEIGHT)) # Wait a certain number of seconds to allow the camera time to warmup time.sleep(0.1) # load COCO labels self.labels = {} self.load_labels("./models/coco_labels.txt") # init the tf interpreter self.interpreter = Interpreter("./models/detect.tflite") self.interpreter.allocate_tensors() _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape'] # current image self.image=None # loop bool self.running=True def run(self): self.logger.debug('starting capture thread') while self.running: # proces the next camera frame self.nextFrame() time.sleep(0.05) def nextFrame(self): self.logger.debug('Capturing next frame') # Capture frames continuously from the camera self.camera.capture(self.raw_capture, format="bgr", use_video_port=True) # analyse the raw image to detect objects self.analyse() # convert raw image to jpeg res, self.image=cv2.imencode('.JPEG', self.raw_capture.array) # Clear the stream in preparation for the next frame self.raw_capture.truncate(0) def getEncodedImage(self): self.logger.debug('Returning current jpeg image base64 encoded') if self.image is not None: return base64.b64encode(self.image.tobytes()).decode('utf-8') return None def analyse(self): self.logger.debug('analyse raw frame for common objects') # resize the image resized = cv2.resize(self.raw_capture.array, (self.input_width,self.input_height), interpolation = cv2.INTER_AREA) results = self.detect_objects(resized, 0.4) self.annotate_objects(results) def load_labels(self, path): """Loads the labels file. Supports files with or without index numbers.""" self.logger.debug('loading labels from '+path) with open(path, 'r', encoding='utf-8') as f: lines = f.readlines() for row_number, content in enumerate(lines): pair = re.split(r'[:\s]+', content.strip(), maxsplit=1) if len(pair) == 2 and pair[0].strip().isdigit(): self.labels[int(pair[0])] = pair[1].strip() else: self.labels[row_number] = pair[0].strip() def set_input_tensor(self, image): """Sets the input tensor.""" self.logger.debug('setting input tensor') tensor_index = self.interpreter.get_input_details()[0]['index'] input_tensor = self.interpreter.tensor(tensor_index)()[0] input_tensor[:, :] = image def get_output_tensor(self, index): """Returns the output tensor at the given index.""" self.logger.debug('getting output tensor') output_details = self.interpreter.get_output_details()[index] tensor = np.squeeze(self.interpreter.get_tensor(output_details['index'])) return tensor def detect_objects(self, image, threshold): """Returns a list of detection results, each a dictionary of object info.""" self.logger.debug('starting to detect objects') self.set_input_tensor(image) self.interpreter.invoke() # Get all output details boxes = self.get_output_tensor(0) classes = self.get_output_tensor(1) scores = self.get_output_tensor(2) count = int(self.get_output_tensor(3)) results = [] for i in range(count): if scores[i] >= threshold: result = { 'bounding_box': boxes[i], 'class_id': classes[i], 'score': scores[i] } results.append(result) return results def annotate_objects(self, results): """Draws the bounding box and label for each object in the results.""" self.logger.debug('annotate objects') for obj in results: # Convert the bounding box figures from relative coordinates # to absolute coordinates based on the original resolution ymin, xmin, ymax, xmax = obj['bounding_box'] xmin = int(xmin * self.WIDTH) xmax = int(xmax * self.WIDTH) ymin = int(ymin * self.HEIGHT) ymax = int(ymax * self.HEIGHT) cv2.rectangle(self.raw_capture.array, (xmin,ymin), (xmax, ymax), (0,255,0), 2) cv2.putText(self.raw_capture.array, self.labels[obj['class_id']], (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
def predict(): MODEL_NAME = "model" GRAPH_NAME = 'glyphs.tflite' LABELMAP_NAME = "labelmap.txt" min_conf_threshold = float(0.3) use_TPU = False IM_NAME = None IM_DIR = "images" if (IM_NAME and IM_DIR): print( 'Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.' ) sys.exit() if (not IM_NAME and not IM_DIR): IM_NAME = 'test1.jpg' pkg = importlib.util.find_spec('tensorflow') if pkg is None: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate if use_TPU: if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' CWD_PATH = os.getcwd() if IM_DIR: PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_DIR) images = glob.glob(PATH_TO_IMAGES + '/*') elif IM_NAME: PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_NAME) images = glob.glob(PATH_TO_IMAGES) PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] if labels[0] == '???': del (labels[0]) if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 results = {} for image_path in images: image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (width, height)) input_data = np.expand_dims(image_resized, axis=0) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() boxes = interpreter.get_tensor(output_details[0]['index'])[0] classes = interpreter.get_tensor(output_details[1]['index'])[0] scores = interpreter.get_tensor(output_details[2]['index'])[0] num = interpreter.get_tensor(output_details[3]['index'])[0] results['boxes'] = boxes results['classes'] = classes results['scores'] = scores results['num'] = num return results cv2.destroyAllWindows()
class NanoDetTFLite(object): # Constant definition for post process STRIDES = (8, 16, 32) REG_MAX = 7 PROJECT = np.arange(REG_MAX + 1) # Constant definition for Standardization MEAN = np.array([103.53, 116.28, 123.675], dtype=np.float32) MEAN = MEAN.reshape(1, 1, 3) STD = np.array([57.375, 57.12, 58.395], dtype=np.float32) STD = STD.reshape(1, 1, 3) def __init__( self, model_path='model_float16_quant.tflite', input_shape=320, class_score_th=0.35, nms_th=0.6, num_threads=1, ): self.input_shape = (input_shape, input_shape) self.class_score_th = class_score_th self.nms_th = nms_th # load model self.interpreter = Interpreter(model_path=model_path, num_threads=num_threads) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() # Calculate grid points for each stride self.grid_points = [] for index in range(len(self.STRIDES)): grid_point = self._make_grid_point( (int(self.input_shape[0] / self.STRIDES[index]), int(self.input_shape[1] / self.STRIDES[index])), self.STRIDES[index], ) self.grid_points.append(grid_point) def inference(self, image): temp_image = copy.deepcopy(image) image_height, image_width = image.shape[0], image.shape[1] # Pre process: Standardization, Reshape resize_image, new_height, new_width, top, left = self._resize_image( temp_image) x = self._pre_process(resize_image) # Inference execution self.interpreter.set_tensor(self.input_details[0]['index'], x) self.interpreter.invoke() results = [] results.append( self.interpreter.get_tensor( self.output_details[5]['index'])) # cls_pred_stride_8 results.append( self.interpreter.get_tensor( self.output_details[4]['index'])) # dis_pred_stride_8 results.append( self.interpreter.get_tensor( self.output_details[3]['index'])) # cls_pred_stride_16 results.append( self.interpreter.get_tensor( self.output_details[2]['index'])) # dis_pred_stride_16 results.append( self.interpreter.get_tensor( self.output_details[1]['index'])) # cls_pred_stride_32 results.append( self.interpreter.get_tensor( self.output_details[0]['index'])) # dis_pred_stride_32 # Post-process: NMS, grid -> coordinate transformation bboxes, scores, class_ids = self._post_process(results) # Post-process: Convert coordinates to fit image size ratio_height = image_height / new_height ratio_width = image_width / new_width for i in range(bboxes.shape[0]): bboxes[i, 0] = max(int((bboxes[i, 0] - left) * ratio_width), 0) bboxes[i, 1] = max(int((bboxes[i, 1] - top) * ratio_height), 0) bboxes[i, 2] = min( int((bboxes[i, 2] - left) * ratio_width), image_width, ) bboxes[i, 3] = min( int((bboxes[i, 3] - top) * ratio_height), image_height, ) return bboxes, scores, class_ids def _make_grid_point(self, grid_size, stride): grid_height, grid_width = grid_size shift_x = np.arange(0, grid_width) * stride shift_y = np.arange(0, grid_height) * stride xv, yv = np.meshgrid(shift_x, shift_y) xv = xv.flatten() yv = yv.flatten() cx = xv + 0.5 * (stride - 1) cy = yv + 0.5 * (stride - 1) return np.stack((cx, cy), axis=-1) def _resize_image(self, image, keep_ratio=True): top, left = 0, 0 new_height, new_width = self.input_shape[0], self.input_shape[1] if keep_ratio and image.shape[0] != image.shape[1]: hw_scale = image.shape[0] / image.shape[1] if hw_scale > 1: new_height = self.input_shape[0] new_width = int(self.input_shape[1] / hw_scale) resize_image = cv2.resize( image, (new_width, new_height), interpolation=cv2.INTER_AREA, ) left = int((self.input_shape[1] - new_width) * 0.5) resize_image = cv2.copyMakeBorder( resize_image, 0, 0, left, self.input_shape[1] - new_width - left, cv2.BORDER_CONSTANT, value=0, ) else: new_height = int(self.input_shape[0] * hw_scale) new_width = self.input_shape[1] resize_image = cv2.resize( image, (new_width, new_height), interpolation=cv2.INTER_AREA, ) top = int((self.input_shape[0] - new_height) * 0.5) resize_image = cv2.copyMakeBorder( resize_image, top, self.input_shape[0] - new_height - top, 0, 0, cv2.BORDER_CONSTANT, value=0, ) else: resize_image = cv2.resize( image, self.input_shape, interpolation=cv2.INTER_AREA, ) return resize_image, new_height, new_width, top, left def _pre_process(self, image): # Standardization image = image.astype(np.float32) image = (image - self.MEAN) / self.STD # Reshape image = image.reshape(-1, self.input_shape[0], self.input_shape[1], 3) return image def _softmax(self, x, axis=1): x_exp = np.exp(x) x_sum = np.sum(x_exp, axis=axis, keepdims=True) s = x_exp / x_sum return s def _post_process(self, predict_results): class_scores = predict_results[::2] bbox_predicts = predict_results[1::2] bboxes, scores, class_ids = self._get_bboxes_single( class_scores, bbox_predicts, 1, rescale=False, ) return bboxes.astype(np.int32), scores, class_ids def _get_bboxes_single( self, class_scores, bbox_predicts, scale_factor, rescale=False, topk=1000, ): bboxes = [] scores = [] # Convert bounding box coordinates for each stride for stride, class_score, bbox_predict, grid_point in zip( self.STRIDES, class_scores, bbox_predicts, self.grid_points): # Dimension adjustment if class_score.ndim == 3: class_score = class_score.squeeze(axis=0) if bbox_predict.ndim == 3: bbox_predict = bbox_predict.squeeze(axis=0) # Convert the bounding box to relative coordinates and relative distance bbox_predict = bbox_predict.reshape(-1, self.REG_MAX + 1) bbox_predict = self._softmax(bbox_predict, axis=1) bbox_predict = np.dot(bbox_predict, self.PROJECT).reshape(-1, 4) bbox_predict *= stride # Target in descending order of score if 0 < topk < class_score.shape[0]: max_scores = class_score.max(axis=1) topk_indexes = max_scores.argsort()[::-1][0:topk] grid_point = grid_point[topk_indexes, :] bbox_predict = bbox_predict[topk_indexes, :] class_score = class_score[topk_indexes, :] # Convert the bounding box to absolute coordinates x1 = grid_point[:, 0] - bbox_predict[:, 0] y1 = grid_point[:, 1] - bbox_predict[:, 1] x2 = grid_point[:, 0] + bbox_predict[:, 2] y2 = grid_point[:, 1] + bbox_predict[:, 3] x1 = np.clip(x1, 0, self.input_shape[1]) y1 = np.clip(y1, 0, self.input_shape[0]) x2 = np.clip(x2, 0, self.input_shape[1]) y2 = np.clip(y2, 0, self.input_shape[0]) bbox = np.stack([x1, y1, x2, y2], axis=-1) bboxes.append(bbox) scores.append(class_score) # Scale adjustment bboxes = np.concatenate(bboxes, axis=0) if rescale: bboxes /= scale_factor scores = np.concatenate(scores, axis=0) # Non-Maximum Suppression bboxes_wh = bboxes.copy() bboxes_wh[:, 2:4] = bboxes_wh[:, 2:4] - bboxes_wh[:, 0:2] class_ids = np.argmax(scores, axis=1) scores = np.max(scores, axis=1) indexes = cv2.dnn.NMSBoxes( bboxes_wh.tolist(), scores.tolist(), self.class_score_th, self.nms_th, ) # Check the number of cases after NMS processing if len(indexes) > 0: bboxes = bboxes[indexes[:, 0]] scores = scores[indexes[:, 0]] class_ids = class_ids[indexes[:, 0]] else: bboxes = np.array([]) scores = np.array([]) class_ids = np.array([]) return bboxes, scores, class_ids
class DetectMask(): def __init__(self): print("init") self.MODEL_NAME = "D:\TP_PROGS\Projects\TeProjSahara\model_faceMask\FaceMask_tflite" self.GRAPH_NAME = "model4.tflite" self.LABELMAP_NAME = "lbl.txt" self.min_conf_threshold = 0.2 # use_TPU = args.edgetpu # self.listOfObjDetec = [] # Path to label map file self.PATH_TO_LABELS = os.path.join(self.MODEL_NAME, self.LABELMAP_NAME) print(self.PATH_TO_LABELS) # Path to .tflite file, which contains the model that is used for object detection self.PATH_TO_CKPT = os.path.join(self.MODEL_NAME, self.GRAPH_NAME) # Load the label map with open(self.PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if self.labels[0] == '???': del (self.labels[0]) # Load the Tensorflow Lite model. self.interpreter = Interpreter(model_path=self.PATH_TO_CKPT) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] def masKDetect(self, img): # clearing list of prev objs # self.listOfObjDetec.clear() imH, imW, _ = img.shape image_resized = cv2.resize(img, (self.width, self.height)) input_data = np.expand_dims(image_resized, axis=0) self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() # results boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[ 0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[ 0] # Confidence of detected objects for i in range(len(scores)): if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)): # getting label/class object_name = self.labels[int( classes[i] )] # Look up object name from "labels" array using class index print("detected:", object_name, ":", int(scores[i] * 100)) # self.listOfObjDetec.append(object_name) # debug ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label # object_name = labels[int(classes[i])] # Look up object name from "labels" array using class label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( img, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(img, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # print(self.listOfObjDetec) # objDict = dict(Counter(self.listOfObjDetec)) # print(objDict) # strg = "" # for i in objDict: # print(i) # strg += "Detected " + str(objDict[i]) + " " + i + "\n" # print(strg) # All the results have been drawn on the image, now display the image # print(object_name) cv2.imshow('Mask detector', img) cv2.waitKey(0) cv2.destroyAllWindows() if object_name == "Nomask": object_name = "No mask" strg = "Person is wearing " + object_name # Press any key to continue to next image, or press 'q' to quit return strg else: return "No Face Detected"
class Classifier: """ Perform image classification with the given model. The model is a .h5 file which if the classifier can not find it at the path it will download it from neuralet repository automatically. :param config: Is a Config instance which provides necessary parameters. """ def __init__(self, config): self.config = config self.model_name = "OFMClassifier_edgetpu.tflite" if os.path.isfile(config.CLASSIFIER_MODEL_PATH): self.model_path = config.CLASSIFIER_MODEL_PATH else: self.model_path = 'data/classifiers/edgetpu/' if not os.path.isdir(self.model_path): os.makedirs(self.model_path) self.model_path = self.model_path + self.model_name # Frames Per Second self.fps = None if not os.path.isfile(self.model_path): url = "https://raw.githubusercontent.com/neuralet/neuralet-models/master/edge-tpu/OFMClassifier/OFMClassifier_edgetpu.tflite" print("model does not exist under: ", self.model_path, "downloading from ", url) wget.download(url, self.model_path) # Load TFLite model and allocate tensors self.interpreter = Interpreter(self.model_path, experimental_delegates=[load_delegate("libedgetpu.so.1")]) self.interpreter.allocate_tensors() # Get the model input and output tensor details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() def inference(self, resized_rgb_image) -> list: """ Inference function sets input tensor to input image and gets the output. The interpreter instance provides corresponding class id output which is used for creating result Args: resized_rgb_image: Array of images with shape (no_images, img_height, img_width, channels) Returns: result: List of class id for each input image. ex: [0, 0, 1, 1, 0] scores: The classification confidence for each class. ex: [.99, .75, .80, 1.0] """ if np.shape(resized_rgb_image)[0] == 0: return [], [] resized_rgb_image = (resized_rgb_image * 255).astype("uint8") result = [] net_results = [] for img in resized_rgb_image: img = np.expand_dims(img, axis=0) self.interpreter.set_tensor(self.input_details[0]["index"], img) t_begin = time.perf_counter() self.interpreter.invoke() inference_time = time.perf_counter() - t_begin # Second self.fps = convert_infr_time_to_fps(inference_time) net_output = self.interpreter.get_tensor(self.output_details[0]['index'])[0] net_results.append(net_output) result.append(np.argmax(net_output)) # returns class id # TODO: optimized without for scores = [] for i, itm in enumerate(net_results): scores.append(1) return result, scores
class Detector(object): def __init__(self, label_file, model_file, threshold): self._threshold = float(threshold) self.labels = self.load_labels(label_file) self.interpreter = Interpreter(model_file) self.interpreter.allocate_tensors() _, self.input_height, self.input_width, _ = self.interpreter.get_input_details( )[0]['shape'] self.tensor_index = self.interpreter.get_input_details()[0]['index'] def load_labels(self, path): with open(path, 'r') as f: return { i: line.strip() for i, line in enumerate(f.read().replace('"', '').split(',')) } def preprocess(self, img): img = cv2.resize(img, (self.input_width, self.input_height)) img = img.astype(np.float32) img = img / 255. img = img - 0.5 img = img * 2. img = img[:, :, ::-1] img = np.expand_dims(img, 0) return img def get_output_tensor(self, index): """Returns the output tensor at the given index.""" output_details = self.interpreter.get_output_details()[index] tensor = np.squeeze( self.interpreter.get_tensor(output_details['index'])) return tensor def detect_objects(self, image): """Returns a list of detection results, each a dictionary of object info.""" img = self.preprocess(image) self.interpreter.set_tensor(self.tensor_index, img) self.interpreter.invoke() # Get all output details boxes = self.get_output_tensor(0) return boxes def detect(self, original_image): self.output_width, self.output_height = original_image.shape[0:2] start_time = time.time() results = self.detect_objects(image) elapsed_ms = (time.time() - start_time) * 1000 fps = 1 / elapsed_ms * 1000 print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}". format(fps, elapsed_ms)) def _to_original_scale(boxes): minmax_boxes = to_minmax(boxes) minmax_boxes[:, 0] *= self.output_width minmax_boxes[:, 2] *= self.output_width minmax_boxes[:, 1] *= self.output_height minmax_boxes[:, 3] *= self.output_height return minmax_boxes.astype(np.int) boxes, probs = self.run(results) print(boxes) if len(boxes) > 0: boxes = _to_original_scale(boxes) original_image = draw_boxes(original_image, boxes, probs, self.labels) return cv2.imencode('.jpg', original_image)[1].tobytes() def run(self, netout): anchors = [ 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 ] nms_threshold = 0.2 """Convert Yolo network output to bounding box # Args netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes) YOLO neural network output array # Returns boxes : array, shape of (N, 4) coordinate scale is normalized [0, 1] probs : array, shape of (N, nb_classes) """ grid_h, grid_w, nb_box = netout.shape[:3] boxes = [] # decode the output by the network netout[..., 4] = _sigmoid(netout[..., 4]) netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax( netout[..., 5:]) netout[..., 5:] *= netout[..., 5:] > self._threshold for row in range(grid_h): for col in range(grid_w): for b in range(nb_box): # from 4th element onwards are confidence and class classes classes = netout[row, col, b, 5:] if np.sum(classes) > 0: # first 4 elements are x, y, w, and h x, y, w, h = netout[row, col, b, :4] x = (col + _sigmoid(x) ) / grid_w # center position, unit: image width y = (row + _sigmoid(y) ) / grid_h # center position, unit: image height w = anchors[2 * b + 0] * np.exp( w) / grid_w # unit: image width h = anchors[2 * b + 1] * np.exp( h) / grid_h # unit: image height confidence = netout[row, col, b, 4] box = BoundBox(x, y, w, h, confidence, classes) boxes.append(box) boxes = nms_boxes(boxes, len(classes), nms_threshold, self._threshold) boxes, probs = boxes_to_array(boxes) return boxes, probs
def analyzing(self): CWD_PATH = os.getcwd() output_path = os.path.join(CWD_PATH, 'analyze') preProcess = PreProcess() index = preProcess.nameImage('analyze') # If both an image AND a folder are specified, throw an error if (self.IM_NAME and self.IM_DIR): print('you can only use IM_NAME OR IM_DIR') sys.exit() # If neither an image or a folder are specified, default to using 'test1.jpg' for image name if (not self.IM_NAME and not self.IM_DIR): self.IM_DIR = 'new' # Import TensorFlow libraries # If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tensorflow') if pkg is None: from tflite_runtime.interpreter import Interpreter else: from tensorflow.lite.python.interpreter import Interpreter # Get path to current working directory # Define path to images and grab all image filenames if self.IM_DIR: PATH_TO_IMAGES = os.path.join(CWD_PATH, self.IM_DIR) images = glob.glob(PATH_TO_IMAGES + '/*') elif self.IM_NAME: PATH_TO_IMAGES = os.path.join(CWD_PATH, self.IM_NAME) images = glob.glob(PATH_TO_IMAGES) # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, self.MODEL_NAME, self.GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, self.MODEL_NAME, self.LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] print(width, height) floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Loop over every image and perform detection for image_path in images: leaf = flower = melon = 0 # Load image and resize to expected shape [1xHxWx3] image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (width, height), interpolation=cv2.INTER_AREA) input_data = np.expand_dims(image_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor(output_details[1]['index'])[ 0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]['index'])[ 0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( image, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(image, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text if (object_name == 'leaf'): leaf = leaf + 1 elif (object_name == 'flower'): flower = flower + 1 else: melon = melon + 1 # All the results have been drawn on the image, now display the image print('image', index, ':') print('leaf:', leaf) print('flower:', flower) print('melon:', melon) uploadToFirebase = DbFirebase(leaves=leaf, flowers=flower, melons=melon) uploadToFirebase.add() cv2.imshow('Object detector', image) out = os.path.join(output_path, str(index) + ".jpg") cv2.imwrite(out, image) index = index + 1 # Press any key to continue to next image, or press 'q' to quit cv2.waitKey(1) preProcess.moveImage() # Clean up cv2.destroyAllWindows()