class Inference: def __init__(self): self.interpreter = None self.input_details = None self.output_details = None self.height_for_model = None self.width_for_model = None self.floating_model = None self.min_conf_threshold = None self.labels = None def get_interpreter(self, model, enable_tpu, labels, min_conf_threshold=0.5): """ Returns interpreter from the model Args: model -> full path of the .tflite file , enable_tpu-> whether you want to use Edge TPU or not Returns: interpreter object Raises: Raises exception if the Runtime Library for Edge TPU is not found """ if enable_tpu == 'true': try: # loading the Edge TPU Runtime model, *device = model.split('@') if os.path.exists(model): load_delegate(EDGETPU_SHARED_LIB) self.interpreter = Interpreter( model_path=model, experimental_delegates=[ load_delegate( EDGETPU_SHARED_LIB, {'device': device[0]} if device else {}) ]) else: _LOGGER.exception( "Please make sure the model file exists ") except OSError: _LOGGER.exception("Please install runtime for edge tpu ") except ValueError: _LOGGER.exception("Make sure edge tpu is plugged in ") else: self.interpreter = Interpreter(model_path=model) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height_for_model = self.input_details[0]['shape'][1] self.width_for_model = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.min_conf_threshold = min_conf_threshold self.labels = labels return self.interpreter def perform_inference(self, input_data): """ Returns bounding box , class , score Args: input_data -> the input data to be fed into the model Returns: boxes -> an array of bounding box of the objects detected classes-> an array of the class of objects detected scores-> an array of scores(0 to 1) of the objects detected Raises: None """ self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() # Retrieve detection results # Bounding box coordinates of detected objects boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] # Class index of detected objects classes = self.interpreter.get_tensor( self.output_details[1]['index'])[0] # Confidence of detected objects scores = self.interpreter.get_tensor( self.output_details[2]['index'])[0] return boxes, classes, scores
def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): # Usage: # PyTorch: weights = *.pt # TorchScript: *.torchscript # CoreML: *.mlmodel # OpenVINO: *.xml # TensorFlow: *_saved_model # TensorFlow: *.pb # TensorFlow Lite: *.tflite # TensorFlow Edge TPU: *_edgetpu.tflite # ONNX Runtime: *.onnx # OpenCV DNN: *.onnx with dnn=True # TensorRT: *.engine from models.experimental import attempt_download, attempt_load # scoped to avoid circular import super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix = Path(w).suffix.lower() suffixes = [ '.pt', '.torchscript', '.onnx', '.engine', '.tflite', '.pb', '', '.mlmodel', '.xml' ] check_suffix(w, suffixes) # check weights have acceptable suffix pt, jit, onnx, engine, tflite, pb, saved_model, coreml, xml = ( suffix == x for x in suffixes) # backends stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults w = attempt_download(w) # download if not local if data: # data.yaml path (optional) with open(data, errors='ignore') as f: names = yaml.safe_load(f)['names'] # class names if pt: # PyTorch model = attempt_load(weights if isinstance(weights, list) else w, map_location=device) stride = max(int(model.stride.max()), 32) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names self.model = model # explicitly assign for to(), cpu(), cuda(), half() elif jit: # TorchScript LOGGER.info(f'Loading {w} for TorchScript inference...') extra_files = {'config.txt': ''} # model metadata model = torch.jit.load(w, _extra_files=extra_files) if extra_files['config.txt']: d = json.loads(extra_files['config.txt']) # extra_files dict stride, names = int(d['stride']), d['names'] elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements(('opencv-python>=4.5.4', )) net = cv2.dnn.readNetFromONNX(w) elif onnx: # ONNX Runtime LOGGER.info(f'Loading {w} for ONNX Runtime inference...') cuda = torch.cuda.is_available() check_requirements( ('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime')) import onnxruntime providers = ['CUDAExecutionProvider', 'CPUExecutionProvider' ] if cuda else ['CPUExecutionProvider'] session = onnxruntime.InferenceSession(w, providers=providers) elif xml: # OpenVINO LOGGER.info(f'Loading {w} for OpenVINO inference...') check_requirements( ('openvino-dev', ) ) # requires openvino-dev: https://pypi.org/project/openvino-dev/ import openvino.inference_engine as ie core = ie.IECore() network = core.read_network( model=w, weights=Path(w).with_suffix('.bin')) # *.xml, *.bin paths executable_network = core.load_network(network, device_name='CPU', num_requests=1) elif engine: # TensorRT LOGGER.info(f'Loading {w} for TensorRT inference...') import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) logger = trt.Logger(trt.Logger.INFO) with open(w, 'rb') as f, trt.Runtime(logger) as runtime: model = runtime.deserialize_cuda_engine(f.read()) bindings = OrderedDict() for index in range(model.num_bindings): name = model.get_binding_name(index) dtype = trt.nptype(model.get_binding_dtype(index)) shape = tuple(model.get_binding_shape(index)) data = torch.from_numpy(np.empty( shape, dtype=np.dtype(dtype))).to(device) bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr())) binding_addrs = OrderedDict( (n, d.ptr) for n, d in bindings.items()) context = model.create_execution_context() batch_size = bindings['images'].shape[0] elif coreml: # CoreML LOGGER.info(f'Loading {w} for CoreML inference...') import coremltools as ct model = ct.models.MLModel(w) else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) if saved_model: # SavedModel LOGGER.info( f'Loading {w} for TensorFlow SavedModel inference...') import tensorflow as tf model = tf.keras.models.load_model(w) elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt LOGGER.info( f'Loading {w} for TensorFlow GraphDef inference...') import tensorflow as tf def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped return x.prune( tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu from tflite_runtime.interpreter import Interpreter, load_delegate except ImportError: import tensorflow as tf Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate, if 'edgetpu' in w.lower( ): # Edge TPU https://coral.ai/software/#edgetpu-runtime LOGGER.info( f'Loading {w} for TensorFlow Lite Edge TPU inference...' ) delegate = { 'Linux': 'libedgetpu.so.1', 'Darwin': 'libedgetpu.1.dylib', 'Windows': 'edgetpu.dll' }[platform.system()] interpreter = Interpreter( model_path=w, experimental_delegates=[load_delegate(delegate)]) else: # Lite LOGGER.info( f'Loading {w} for TensorFlow Lite inference...') interpreter = Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs self.__dict__.update(locals()) # assign all variables to self
# Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Loop over every image and perform detection for image_path in images: # Load image and resize to expected shape [1xHxWx3] image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (width, height))
class PoseEngine(): """Engine used for pose tasks.""" def __init__(self, model_path, mirror=False): """Creates a PoseEngine with given model. Args: model_path: String, path to TF-Lite Flatbuffer file. mirror: Flip keypoints horizontally. Raises: ValueError: An error occurred when model output is invalid. """ edgetpu_delegate = load_delegate(EDGETPU_SHARED_LIB) posenet_decoder_delegate = load_delegate(POSENET_SHARED_LIB) self._interpreter = Interpreter( model_path, experimental_delegates=[edgetpu_delegate, posenet_decoder_delegate]) self._interpreter.allocate_tensors() self._mirror = mirror self._input_tensor_shape = self.get_input_tensor_shape() if (self._input_tensor_shape.size != 4 or self._input_tensor_shape[3] != 3 or self._input_tensor_shape[0] != 1): raise ValueError( ('Image model should have input shape [1, height, width, 3]!' ' This model has {}.'.format(self._input_tensor_shape))) _, self._input_height, self._input_width, self._input_depth = self.get_input_tensor_shape() self._input_type = self._interpreter.get_input_details()[0]['dtype'] self._inf_time = 0 def run_inference(self, input_data): """Run inference using the zero copy feature from pycoral and returns inference time in ms. """ start = time.monotonic() edgetpu.run_inference(self._interpreter, input_data) self._inf_time = time.monotonic() - start return (self._inf_time * 1000) def DetectPosesInImage(self, img): """Detects poses in a given image. For ideal results make sure the image fed to this function is close to the expected input size - it is the caller's responsibility to resize the image accordingly. Args: img: numpy array containing image """ input_details = self._interpreter.get_input_details() image_width, image_height = img.size resized_image = img.resize( (self._input_width, self._input_height), Image.NEAREST) input_data = np.expand_dims(resized_image, axis=0) if self._input_type is np.float32: # Floating point versions of posenet take image data in [-1,1] range. input_data = np.float32(resized_image) / 128.0 - 1.0 else: # Assuming to be uint8 input_data = np.asarray(resized_image) self.run_inference(input_data.flatten()) return self.ParseOutput() def get_input_tensor_shape(self): """Returns input tensor shape.""" return self._interpreter.get_input_details()[0]['shape'] def get_output_tensor(self, idx): """Returns output tensor view.""" return np.squeeze(self._interpreter.tensor( self._interpreter.get_output_details()[idx]['index'])()) def ParseOutput(self): """Parses interpreter output tensors and returns decoded poses.""" keypoints = self.get_output_tensor(0) keypoint_scores = self.get_output_tensor(1) pose_scores = self.get_output_tensor(2) num_poses = self.get_output_tensor(3) poses = [] for i in range(int(num_poses)): pose_score = pose_scores[i] pose_keypoints = {} for j, point in enumerate(keypoints[i]): y, x = point if self._mirror: y = self._input_width - y pose_keypoints[KeypointType(j)] = Keypoint( Point(x, y), keypoint_scores[i, j]) poses.append(Pose(pose_keypoints, pose_score)) return poses, self._inf_time
def process_frame(frame): global entry, lime_count, marker_count, lime_sizes, found_list, total_marker_width, pixel_per_metric interpreter = Interpreter(model_path=PATH_TO_CKPT, num_threads=4) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #frame_rgb = frame frame_resized = cv2.resize(frame_rgb, (width, height)) #frame_resized = cv2.resize(frame_rgb, (480, 320)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input try: start_time = time.time() interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() elapsed_time.append(time.time() - start_time) except: print('Thread Error: interpreter not reference') # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 4) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window cv2.rectangle(frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # counting objects and measure diameter of lime if xmin < LINE2 and xmax > LINE1 and not entry: entry = True if entry and xmax <= LINE1: entry = False if (int(classes[i]) + 1 == 1): lime_found = time.time() - start_total_time try: lime_count += 1 lime_diameter = ( (xmax - xmin) + (ymax - ymin)) / (2 * pixel_per_metric) lime_sizes.append(lime_diameter) found_list.append(lime_found) print( f'lime {lime_count} is found at {lime_found}, Diameter(size): {lime_diameter * 1000:.3f} mm' ) except: # marker must came first for calculating pixel/metric lime_count -= 1 marker_count += 1 total_marker_width += ((xmax - xmin) + (ymax - ymin)) / 2 pixel_per_metric = (total_marker_width / marker_count) / MARKER_DIAMETER elif (int(classes[i]) + 1 == 2): marker_count += 1 total_marker_width += ((xmax - xmin) + (ymax - ymin)) / 2 pixel_per_metric = (total_marker_width / marker_count) / MARKER_DIAMETER # insert Lime Count information text font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText( frame, 'Lime Count: ' + str(lime_count), (10, 35), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX, ) # insert Marker Count information text cv2.putText( frame, 'Marker Count: ' + str(marker_count), (10, 55), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX, ) # overlay with line pt1 = (LINE1, 0) pt2 = (LINE1, int(sqsize)) cv2.line(frame, pt1, pt2, (0, 0, 255), 2) pt1 = (LINE2, 0) pt2 = (LINE2, int(sqsize)) cv2.line(frame, pt1, pt2, (0, 0, 255), 2) frame = cv2.resize(frame, (480, 320)) return frame
class DetectorTFLite: def __init__(self, path_to_checkpoint, path_to_labelmap, filter_labels=None): self.filter_labels = filter_labels with open(path_to_labelmap, 'r') as f: self.labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if self.labels[0] == '???': del (self.labels[0]) self.interpreter = Interpreter(model_path=path_to_checkpoint) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.tf_height = self.input_details[0]['shape'][1] self.tf_width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 def ExtractBoxes(self, imH, imW, boxes, classes, scores): det_boxes = [] for i in range(len(scores)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() miny = int(max(1, (boxes[i][0] * imH))) minx = int(max(1, (boxes[i][1] * imW))) maxy = int(min(imH, (boxes[i][2] * imH))) maxx = int(min(imW, (boxes[i][3] * imW))) label = self.labels[int(classes[i])] det_boxes.append((minx, miny, maxx, maxy, label, float(scores[i]))) return det_boxes def DetectFromImage(self, img): imH, imW, _ = img.shape # Acquire frame and resize to expected shape [1xHxWx3] frame_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.tf_width, self.tf_height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() # Retrieve detection results boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[ 0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[ 0] # Confidence of detected objects return self.ExtractBoxes(imH, imW, boxes, classes, scores) def DisplayDetection(self, image, box, det_time=None): img = image.copy() x_min = box[0] y_min = box[1] x_max = box[2] y_max = box[3] cls = str(box[4]) score = str(np.round(box[-1], 2)) text = cls + ": " + score cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 1) cv2.rectangle(img, (x_min, y_min - 20), (x_min, y_min), (255, 255, 255), -1) cv2.putText(img, text, (x_min + 5, y_min - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) if det_time != None: fps = round(1000. / det_time, 1) fps_txt = str(fps) + " FPS" cv2.putText(img, fps_txt, (25, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2) return img
def startStream(self, modeldir, graph, labels, threshold, resolution, edgetpu): MODEL_NAME = modeldir GRAPH_NAME = graph LABELMAP_NAME = labels min_conf_threshold = float(threshold) resW, resH = resolution.split('x') imW, imH = int(resW), int(resH) use_TPU = edgetpu # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW, imH), framerate=30).start() time.sleep(1) # Create window cv2.namedWindow('Object detector', cv2.WINDOW_NORMAL) #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): while True: # Start timer (for calculating frame rate) t1 = cv2.getTickCount() # Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor(output_details[1]['index'])[ 0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]['index'])[ 0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0) and (labels[int(classes[i])] == 'person')): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) # print(self.detect) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # Draw circle in center xcenter = xmin + (int(round((xmax - xmin) / 2))) ycenter = ymin + (int(round((ymax - ymin) / 2))) self.detect = setDetect(xcenter, ycenter, imH, imW) cv2.circle(frame, (xcenter, ycenter), 5, (0, 0, 255), thickness=-1) # Print info # print('Object ' + str(i) + ': ' + object_name + ' at (' + str(xcenter) + ', ' + str(ycenter) + ')') # Draw framerate in corner of frame cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) # All the results have been drawn on the frame, so it's time to display it. cv2.imshow('Object detector', frame) # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 # Press 'q' to quit if cv2.waitKey(1) == ord('q'): break # Clean up cv2.destroyAllWindows() videostream.stop()
class object_detector: def __init__(self): PATH_TO_CKPT = rospy.get_param("/object_detector/weights_path") PATH_TO_LABELS=rospy.get_param("/object_detector/labels_path") camera_input=rospy.get_param("/object_detector/cam_feed") use_tpu=int(rospy.get_param("/object_detector/tpu")) self.min_conf_threshold = float(rospy.get_param("/object_detector/threshold")) self.imW = int(rospy.get_param("/object_detector/imW")) self.imH = int(rospy.get_param("/object_detector/imH")) pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_tpu: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_tpu: from tensorflow.lite.python.interpreter import load_delegate if use_tpu: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] if self.labels[0] == '???': del(self.labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_tpu: self.interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) else: self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 # Initialize frame rate calculation self.frame_rate_calc = 1 self.freq = cv2.getTickFrequency() self.image_pub = rospy.Publisher("/detected_image",Image,queue_size=10) self.bridge = CvBridge() self.image_sub = rospy.Subscriber(camera_input,Image,self.callback) def callback(self,data): t1 = cv2.getTickCount() try: cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") except CvBridgeError as e: print(e) frame = cv_image.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'],input_data) self.interpreter.invoke() # Retrieve detection results boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1,(boxes[i][0] * self.imH))) xmin = int(max(1,(boxes[i][1] * self.imW))) ymax = int(min(self.imH,(boxes[i][2] * self.imH))) xmax = int(min(self.imW,(boxes[i][3] * self.imW))) cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2) # Draw label object_name = self.labels[int(classes[i])] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # All the results have been drawn on the frame, so it's time to display it. # Draw framerate in corner of frame # cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA) t2 = cv2.getTickCount() time1 = (t2-t1)/self.freq frame_rate_calc= 1/time1 cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA) try: self.image_pub.publish(self.bridge.cv2_to_imgmsg(frame, "bgr8")) except CvBridgeError as e: print(e)
class TeachableMachine: """ Functions: :meth:`~openpibo.vision.TeachableMachine.load` :meth:`~openpibo.vision.TeachableMachine.predict` 파이보의 카메라 Teachable Machine 기능을 사용합니다. * ``이미지 프로젝트``의 ``표준 이미지 모델``을 사용합니다. * ``Teachable Machine``에서 학습한 모델을 적용하여 추론할 수 있습니다. example:: from openpibo.vision import TeachableMachine tm = TeachableMachine() # 아래의 모든 예제 이전에 위 코드를 먼저 사용합니다. """ def __init__(self): pass def load(self, model_path, label_path): """ Teachable Machine 모델을 초기화 합니다. example:: tm.load('model_keras.h5', 'labels.txt') :param str model_path: Teachable Machine에서 학습한 모델 파일 :param str label_path: Teachable Machine에서 학습한 라벨 파일 """ with open(label_path, 'r') as f: c = f.readlines() class_names = [item.split(maxsplit=1)[1].strip('\n') for item in c] # Load TFLite model and allocate tensors self.interpreter = Interpreter(model_path=model_path) self.interpreter.allocate_tensors() # Get input and output tensors. self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() # check the type of the input tensor self.floating_model = self.input_details[0]['dtype'] == np.float32 self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.class_names = class_names def predict(self, img): """ 적용한 Teachable Machine 모델을 기반으로 추론합니 example:: cm = Camera() img = cm.read() tm.predict(img) :param numpy.ndarray img: 이미지 객체 :returns: 추론 결과, 가장 높은 확률을 가진 클래스 명 """ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (self.width, self.height)) image = Image.fromarray(img) # Add a batch dimension input_data = np.expand_dims(image, axis=0) if self.floating_model: input_data = (np.float32(input_data) - 127.5) / 127.5 # feed data to input tensor and run the interpreter self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() # Obtain results and map them to the classes preds = self.interpreter.get_tensor(self.output_details[0]['index']) preds = np.squeeze(preds) return self.class_names[np.argmax(preds)], preds
class PoseEngine: """Engine used for pose tasks.""" def __init__(self, model_path, mirror=False, offsetRefineStep=2, scoreThreshold=0.8, maxPoseDetections=5, nmsRadius=30, minPoseConfidence=0.15): """Creates a PoseEngine with given model. Args: model_path: String, path to TF-Lite Flatbuffer file. mirror: Flip keypoints horizontally Raises: ValueError: An error occurred when model output is invalid. """ self.interpreter = Interpreter(model_path) self.interpreter.allocate_tensors() self._mirror = mirror self._input_tensor_shape = self.get_input_tensor_shape() if (self._input_tensor_shape.size != 4 or self._input_tensor_shape[3] != 3 or self._input_tensor_shape[0] != 1): raise ValueError( ('Image model should have input shape [1, height, width, 3]!' ' This model has {}.'.format(self._input_tensor_shape))) _, self.image_height, self.image_width, self.image_depth = self.get_input_tensor_shape( ) self.heatmaps_nx = self.interpreter.get_output_details()[0]['shape'][2] self.heatmaps_ny = self.interpreter.get_output_details()[0]['shape'][1] self.heatmaps_stride_x = self.getStride(self.image_width, self.heatmaps_nx) self.heatmaps_stride_y = self.getStride(self.image_height, self.heatmaps_ny) self.quant_heatmaps_r, self.quant_heatmaps_off = self.interpreter.get_output_details( )[0]['quantization'] self.quant_offsets_short_r, self.quant_offsets_short_off = self.interpreter.get_output_details( )[1]['quantization'] self.quant_offsets_mid_r, self.quant_offsets_mid_off = self.interpreter.get_output_details( )[2]['quantization'] self.offsetRefineStep = offsetRefineStep self.scoreThreshold = scoreThreshold self.maxPoseDetections = maxPoseDetections self.nmsRadius = nmsRadius self.sqRadius = self.nmsRadius * self.nmsRadius self.minPoseConfidence = minPoseConfidence # The API returns all the output tensors flattened and concatenated. We # have to figure out the boundaries from the tensor shapes & sizes. offset = 0 self._output_offsets = [0] for size in self.get_all_output_tensors_sizes(): offset += size self._output_offsets.append(offset) def getStride(self, l, n): strides = (8, 16, 32) return strides[np.argmin(np.abs(strides - l / n))] def get_input_tensor_shape(self): return self.interpreter.get_input_details()[0]['shape'] def get_all_output_tensors_sizes(self): sizes = np.array([], dtype='int32') for d in self.interpreter.get_output_details(): s = np.squeeze(self.interpreter.get_tensor( d['index'])).flatten().size sizes = np.append(sizes, int(s)) return sizes def DetectPosesInImage(self, img): """Detects poses in a given image. For ideal results make sure the image fed to this function is close to the expected input size - it is the caller's responsibility to resize the image accordingly. Args: img: numpy array containing image """ # Extend or crop the input to match the input shape of the network. if img.shape[0] < self.image_height or img.shape[1] < self.image_width: img = np.pad( img, [[0, max(0, self.image_height - img.shape[0])], [0, max(0, self.image_width - img.shape[1])], [0, 0]], mode='constant') img = img[0:self.image_height, 0:self.image_width] assert (img.shape == tuple(self._input_tensor_shape[1:])) # Run the inference (API expects the data to be flattened) return self.ParseOutput(self.run_inference(img)) def run_inference(self, img): tensor_index = self.interpreter.get_input_details()[0]['index'] input_tensor = self.interpreter.tensor(tensor_index) input_tensor()[:, :, :, :] = np.frombuffer(img, dtype='uint8').reshape( input_tensor().shape) start_time = time.monotonic() self.interpreter.invoke() elapsed_ms = (time.monotonic() - start_time) * 1000 out = np.empty(0) for d in self.interpreter.get_output_details(): o = np.squeeze(self.interpreter.get_tensor(d['index'])).flatten() out = np.append(out, o) return (elapsed_ms, out) def logistic(self, x): return 1 / (1 + np.exp(-x)) def isPeak(self, heatmaps_flat, index): maxindex = index // len(KEYPOINTS) maxkeypoint = index % len(KEYPOINTS) y_index = maxindex // self.heatmaps_nx x_index = maxindex % self.heatmaps_nx y_index_min = np.max((y_index - 1, 0)) y_index_max = np.min((y_index + 1, self.heatmaps_ny - 1)) x_index_min = np.max((x_index - 1, 0)) x_index_max = np.min((x_index + 1, self.heatmaps_nx - 1)) for y_current in range(y_index_min, y_index_max + 1): for x_current in range(x_index_min, x_index_max + 1): index_current = len(KEYPOINTS) * ( y_current * self.heatmaps_nx + x_current) + maxkeypoint if (heatmaps_flat[index_current] > heatmaps_flat[index]) and (index_current != index): return False return True def ParseOutput(self, output): inference_time, output = output outputs = [ output[i:j] for i, j in zip(self._output_offsets, self._output_offsets[1:]) ] heatmaps = outputs[0].reshape(-1, len(KEYPOINTS)) offsets_short_y = outputs[1].reshape( -1, 2 * len(KEYPOINTS))[:, 0:len(KEYPOINTS)] offsets_short_x = outputs[1].reshape( -1, 2 * len(KEYPOINTS))[:, len(KEYPOINTS):2 * len(KEYPOINTS)] offsets_mid_fwd_y = outputs[2].reshape( -1, 4 * len(poseChain))[:, 0:len(poseChain)] offsets_mid_fwd_x = outputs[2].reshape( -1, 4 * len(poseChain))[:, len(poseChain):2 * len(poseChain)] offsets_mid_bwd_y = outputs[2].reshape( -1, 4 * len(poseChain))[:, 2 * len(poseChain):3 * len(poseChain)] offsets_mid_bwd_x = outputs[2].reshape( -1, 4 * len(poseChain))[:, 3 * len(poseChain):4 * len(poseChain)] heatmaps = self.logistic( (heatmaps - self.quant_heatmaps_off) * self.quant_heatmaps_r) heatmaps_flat = heatmaps.flatten() offsets_short_y = (offsets_short_y - self.quant_offsets_short_off ) * self.quant_offsets_short_r offsets_short_x = (offsets_short_x - self.quant_offsets_short_off ) * self.quant_offsets_short_r offsets_mid_fwd_y = (offsets_mid_fwd_y - self.quant_offsets_mid_off ) * self.quant_offsets_mid_r offsets_mid_fwd_x = (offsets_mid_fwd_x - self.quant_offsets_mid_off ) * self.quant_offsets_mid_r offsets_mid_bwd_y = (offsets_mid_bwd_y - self.quant_offsets_mid_off ) * self.quant_offsets_mid_r offsets_mid_bwd_x = (offsets_mid_bwd_x - self.quant_offsets_mid_off ) * self.quant_offsets_mid_r # Obtaining the peaks of heatmaps larger than scoreThreshold orderedindices = np.argsort(heatmaps_flat)[::-1] largeheatmaps_indices = np.empty(0, dtype='int32') for i in range(len(orderedindices)): if heatmaps_flat[orderedindices[i]] < self.scoreThreshold: break if self.isPeak(heatmaps_flat, orderedindices[i]): largeheatmaps_indices = np.append(largeheatmaps_indices, orderedindices[i]) pose_list = np.full(self.maxPoseDetections * 2 * len(KEYPOINTS), 0.0, dtype='float32').reshape(-1, len(KEYPOINTS), 2) maxindex_list = np.full(self.maxPoseDetections * len(KEYPOINTS), -1, dtype='int32').reshape(-1, len(KEYPOINTS)) score_list = np.full(self.maxPoseDetections * len(KEYPOINTS), 0.0, dtype='float32').reshape(-1, len(KEYPOINTS)) pose_score_list = np.full(self.maxPoseDetections, 0.0, dtype='float32') nPoses = 0 # obtaining at most maxPoseDetections poses for point in range(len(largeheatmaps_indices)): if nPoses >= self.maxPoseDetections: break # obtain a root canidate maxindex = largeheatmaps_indices[point] // len(KEYPOINTS) maxkeypoint = largeheatmaps_indices[point] % len(KEYPOINTS) y = self.heatmaps_stride_y * (maxindex // self.heatmaps_nx) x = self.heatmaps_stride_x * (maxindex % self.heatmaps_nx) y += offsets_short_y[maxindex, maxkeypoint] x += offsets_short_x[maxindex, maxkeypoint] # skip keypoint with (x, y) that is close to the existing keypoints skip = 0 for p in range(nPoses): y_exist = pose_list[p, maxkeypoint, 0] x_exist = pose_list[p, maxkeypoint, 1] if (y_exist - y) * (y_exist - y) + (x_exist - x) * ( x_exist - x) < self.sqRadius: skip = 1 break if skip == 1: continue # setting the maxkeypoint as root pose_list[nPoses, maxkeypoint, 0] = y pose_list[nPoses, maxkeypoint, 1] = x maxindex_list[nPoses, maxkeypoint] = maxindex score_list[nPoses, maxkeypoint] = heatmaps[maxindex, maxkeypoint] # backward decoding for edge in reversed(range(len(poseChain))): sourceKeypointId = parentToChildEdges[edge] targetKeypointId = childToParentEdges[edge] if maxindex_list[nPoses, sourceKeypointId] != -1 and maxindex_list[ nPoses, targetKeypointId] == -1: maxindex = maxindex_list[nPoses, sourceKeypointId] y = pose_list[nPoses, sourceKeypointId, 0] x = pose_list[nPoses, sourceKeypointId, 1] y += offsets_mid_bwd_y[maxindex, edge] x += offsets_mid_bwd_x[maxindex, edge] y_index = np.clip(round(y / self.heatmaps_stride_y), 0, self.heatmaps_ny - 1) x_index = np.clip(round(x / self.heatmaps_stride_x), 0, self.heatmaps_nx - 1) maxindex_list[ nPoses, targetKeypointId] = self.heatmaps_nx * y_index + x_index for i in range(self.offsetRefineStep): y_index = np.clip(round(y / self.heatmaps_stride_y), 0, self.heatmaps_ny - 1) x_index = np.clip(round(x / self.heatmaps_stride_x), 0, self.heatmaps_nx - 1) maxindex_list[ nPoses, targetKeypointId] = self.heatmaps_nx * y_index + x_index y = self.heatmaps_stride_y * y_index x = self.heatmaps_stride_x * x_index y += offsets_short_y[maxindex_list[nPoses, targetKeypointId], targetKeypointId] x += offsets_short_x[maxindex_list[nPoses, targetKeypointId], targetKeypointId] pose_list[nPoses, targetKeypointId, 0] = y pose_list[nPoses, targetKeypointId, 1] = x score_list[nPoses, targetKeypointId] = heatmaps[ maxindex_list[nPoses, targetKeypointId], targetKeypointId] # forward decoding for edge in range(len(poseChain)): sourceKeypointId = childToParentEdges[edge] targetKeypointId = parentToChildEdges[edge] if maxindex_list[nPoses, sourceKeypointId] != -1 and maxindex_list[ nPoses, targetKeypointId] == -1: maxindex = maxindex_list[nPoses, sourceKeypointId] y = pose_list[nPoses, sourceKeypointId, 0] x = pose_list[nPoses, sourceKeypointId, 1] y += offsets_mid_fwd_y[maxindex, edge] x += offsets_mid_fwd_x[maxindex, edge] y_index = np.clip(round(y / self.heatmaps_stride_y), 0, self.heatmaps_ny - 1) x_index = np.clip(round(x / self.heatmaps_stride_x), 0, self.heatmaps_nx - 1) maxindex_list[ nPoses, targetKeypointId] = self.heatmaps_nx * y_index + x_index for i in range(self.offsetRefineStep): y_index = np.clip(round(y / self.heatmaps_stride_y), 0, self.heatmaps_ny - 1) x_index = np.clip(round(x / self.heatmaps_stride_x), 0, self.heatmaps_nx - 1) maxindex_list[ nPoses, targetKeypointId] = self.heatmaps_nx * y_index + x_index y = self.heatmaps_stride_y * y_index x = self.heatmaps_stride_x * x_index y += offsets_short_y[maxindex_list[nPoses, targetKeypointId], targetKeypointId] x += offsets_short_x[maxindex_list[nPoses, targetKeypointId], targetKeypointId] pose_list[nPoses, targetKeypointId, 0] = y pose_list[nPoses, targetKeypointId, 1] = x score_list[nPoses, targetKeypointId] = heatmaps[ maxindex_list[nPoses, targetKeypointId], targetKeypointId] # calclate pose score score = 0 for k in range(len(KEYPOINTS)): y = pose_list[nPoses, k, 0] x = pose_list[nPoses, k, 1] closekeypoint_exists = False for p in range(nPoses): y_exist = pose_list[p, k, 0] x_exist = pose_list[p, k, 1] if (y_exist - y) * (y_exist - y) + (x_exist - x) * ( x_exist - x) < self.sqRadius: closekeypoint_exists = True break if not closekeypoint_exists: score += score_list[nPoses, k] score /= len(KEYPOINTS) if score > self.minPoseConfidence: pose_score_list[nPoses] = score nPoses += 1 else: for k in range(len(KEYPOINTS)): maxindex_list[nPoses, k] = -1 # Convert the poses to a friendlier format of keypoints with associated # scores. poses = [] for pose_i in range(nPoses): keypoint_dict = {} for point_i, point in enumerate(pose_list[pose_i]): keypoint = Keypoint(KEYPOINTS[point_i], point, score_list[pose_i, point_i]) if self._mirror: keypoint.yx[1] = self.image_width - keypoint.yx[1] keypoint_dict[KEYPOINTS[point_i]] = keypoint poses.append(Pose(keypoint_dict, pose_score_list[pose_i])) return poses, inference_time
class Detector: """ Perform object detection with the given model. The model is a quantized tflite file which if the detector can not find it at the path it will download it from neuralet repository automatically. :param config: Is a ConfigEngine instance which provides necessary parameters. """ def __init__(self, config): self.config = config # Get the model name from the config self.model_name = self.config.get_section_dict('Detector')['Name'] # Frames Per Second self.fps = None self.model_file = 'ped_ssdlite_mobilenet_v2_quantized_edgetpu.tflite' self.model_path = 'libs/detectors/edgetpu/data/' + self.model_file # Get the model .tflite file path from the config. # If there is no .tflite file in the path it will be downloaded automatically from base_url user_model_path = self.config.get_section_dict('Detector')['ModelPath'] if len(user_model_path) > 0: print('using %s as model' % user_model_path) self.model_path = user_model_path else: base_url = 'https://raw.githubusercontent.com/neuralet/neuralet-models/master/edge-tpu/' url = base_url + self.model_name + '/' + self.model_file if not os.path.isfile(self.model_path): print('model does not exist under: ', self.model_path, 'downloading from ', url) wget.download(url, self.model_path) # Load TFLite model and allocate tensors self.interpreter = Interpreter( self.model_path, experimental_delegates=[load_delegate("libedgetpu.so.1")]) self.interpreter.allocate_tensors() # Get the model input and output tensor details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() # Get class id from config self.class_id = int( self.config.get_section_dict('Detector')['ClassID']) self.score_threshold = float( self.config.get_section_dict('Detector')['MinScore']) def inference(self, resized_rgb_image): """ inference function sets input tensor to input image and gets the output. The interpreter instance provides corresponding detection output which is used for creating result Args: resized_rgb_image: uint8 numpy array with shape (img_height, img_width, channels) Returns: result: a dictionary contains of [{"id": 0, "bbox": [x1, y1, x2, y2], "score":s%}, {...}, {...}, ...] """ input_image = np.expand_dims(resized_rgb_image, axis=0) # Fill input tensor with input_image self.interpreter.set_tensor(self.input_details[0]["index"], input_image) t_begin = time.perf_counter() self.interpreter.invoke() inference_time = time.perf_counter() - t_begin # Second self.fps = convert_infr_time_to_fps(inference_time) # The function `get_tensor()` returns a copy of the tensor data. # Use `tensor()` in order to get a pointer to the tensor. boxes = self.interpreter.get_tensor(self.output_details[0]['index']) labels = self.interpreter.get_tensor(self.output_details[1]['index']) scores = self.interpreter.get_tensor(self.output_details[2]['index']) # TODO: will be used for getting number of objects # num = self.interpreter.get_tensor(self.output_details[3]['index']) result = [] for i in range(boxes.shape[1]): # number of boxes if labels[0, i] == self.class_id and scores[0, i] > self.score_threshold: result.append({ "id": str(self.class_id) + '-' + str(i), "bbox": boxes[0, i, :], "score": scores[0, i] }) return result
class Detector(object): def __init__(self, label_file, model_file, threshold): self._threshold = float(threshold) self.labels = self.load_labels(label_file) self.interpreter = Interpreter(model_file) self.interpreter.allocate_tensors() _, self.input_height, self.input_width, _ = self.interpreter.get_input_details( )[0]['shape'] def load_labels(self, path): with open(path, 'r') as f: return { i: line.strip() for i, line in enumerate(f.read().replace('"', '').split(',')) } def set_input_tensor(self, image): """Sets the input tensor.""" tensor_index = self.interpreter.get_input_details()[0]['index'] input_tensor = self.interpreter.tensor(tensor_index)()[0] input_tensor[:, :] = image def get_output_tensor(self, index): """Returns the output tensor at the given index.""" output_details = self.interpreter.get_output_details()[index] tensor = np.squeeze( self.interpreter.get_tensor(output_details['index'])) return tensor def detect_objects(self, image): """Returns a list of detection results, each a dictionary of object info.""" self.set_input_tensor(image) self.interpreter.invoke() # Get all output details boxes = self.get_output_tensor(0) return boxes def detect(self, original_image): self.output_width, self.output_height = original_image.shape[0:2] start_time = time.time() image = cv2.resize(original_image, (self.input_width, self.input_height)) #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) results = self.detect_objects(image) elapsed_ms = (time.time() - start_time) * 1000 fps = 1 / elapsed_ms * 1000 print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}". format(fps, elapsed_ms)) def _to_original_scale(boxes): minmax_boxes = to_minmax(boxes) minmax_boxes[:, 0] *= self.output_width minmax_boxes[:, 2] *= self.output_width minmax_boxes[:, 1] *= self.output_height minmax_boxes[:, 3] *= self.output_height return minmax_boxes.astype(np.int) boxes, probs = self.run(results) print(boxes) if len(boxes) > 0: boxes = _to_original_scale(boxes) original_image = draw_boxes(original_image, boxes, probs, self.labels) return cv2.imencode('.jpg', original_image)[1].tobytes() def run(self, netout): anchors = [ 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 ] nms_threshold = 0.2 """Convert Yolo network output to bounding box # Args netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes) YOLO neural network output array # Returns boxes : array, shape of (N, 4) coordinate scale is normalized [0, 1] probs : array, shape of (N, nb_classes) """ grid_h, grid_w, nb_box = netout.shape[:3] boxes = [] # decode the output by the network netout[..., 4] = _sigmoid(netout[..., 4]) netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax( netout[..., 5:]) netout[..., 5:] *= netout[..., 5:] > self._threshold for row in range(grid_h): for col in range(grid_w): for b in range(nb_box): # from 4th element onwards are confidence and class classes classes = netout[row, col, b, 5:] if np.sum(classes) > 0: # first 4 elements are x, y, w, and h x, y, w, h = netout[row, col, b, :4] x = (col + _sigmoid(x) ) / grid_w # center position, unit: image width y = (row + _sigmoid(y) ) / grid_h # center position, unit: image height w = anchors[2 * b + 0] * np.exp( w) / grid_w # unit: image width h = anchors[2 * b + 1] * np.exp( h) / grid_h # unit: image height confidence = netout[row, col, b, 4] box = BoundBox(x, y, w, h, confidence, classes) boxes.append(box) boxes = nms_boxes(boxes, len(classes), nms_threshold, self._threshold) boxes, probs = boxes_to_array(boxes) return boxes, probs
class ObjectDetectorLite(): def __init__(self, model_path='detect.tflite', threads_num=4): try: self.interpreter = Interpreter(model_path=model_path) #self.interpreter.set_num_threads(threads_num) except: self.interpreter = tf.lite.Interpreter(model_path=model_path) self.interpreter.set_num_threads(threads_num) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() def _boxes_coordinates(self, image, boxes, classes, scores, max_boxes_to_draw=20, min_score_thresh=.5): if not max_boxes_to_draw: max_boxes_to_draw = boxes.shape[0] number_boxes = min(max_boxes_to_draw, boxes.shape[0]) person_boxes = [] for i in range(number_boxes): if scores is None or scores[i] > min_score_thresh: box = tuple(boxes[i].tolist()) ymin, xmin, ymax, xmax = box im_height, im_width, _ = image.shape left, right, top, bottom = [ int(z) for z in (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height) ] person_boxes.append([(left, top), (right, bottom), scores[i], LABELS[classes[i]]]) return person_boxes def detect(self, image, threshold=0.1): # Resize and normalize image for network input frame = cv2.resize(image, (300, 300)) frame = np.expand_dims(frame, axis=0) frame = frame.astype('uint8') # run model self.interpreter.set_tensor(self.input_details[0]['index'], frame) start_time = time.time() self.interpreter.invoke() stop_time = time.time() print("time: ", stop_time - start_time) # get results boxes = self.interpreter.get_tensor(self.output_details[0]['index']) classes = self.interpreter.get_tensor(self.output_details[1]['index']) scores = self.interpreter.get_tensor(self.output_details[2]['index']) num = self.interpreter.get_tensor(self.output_details[3]['index']) # Find detected boxes coordinates return self._boxes_coordinates(image, np.squeeze(boxes[0]), np.squeeze(classes[0] + 1).astype( np.int32), np.squeeze(scores[0]), min_score_thresh=threshold)
def obj_center(args, objX, objY, centerX, centerY): # signal trap to handle keyboard interrupt signal.signal(signal.SIGINT, signal_handler) # Define and parse input arguments parser = argparse.ArgumentParser() parser.add_argument('--modeldir', help='Folder the .tflite file is located in', required=True) parser.add_argument( '--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite') parser.add_argument( '--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt') parser.add_argument( '--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5) parser.add_argument( '--resolution', help= 'Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.', default='1280x720') args = parser.parse_args() MODEL_NAME = args.modeldir GRAPH_NAME = args.graph LABELMAP_NAME = args.labels min_conf_threshold = float(args.threshold) resW, resH = args.resolution.split('x') imW, imH = int(resW), int(resH) # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library from tflite_runtime.interpreter import Interpreter print("using runtime not tensorflow") # Get path to current working directory CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW, imH), framerate=30).start() time.sleep(1) while True: # Start timer (for calculating frame rate) t1 = cv2.getTickCount() print(t1) # Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects # num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)) and labels[int(classes[i])] == "book": # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) #Center Coordinates of the found Object bookX = (xmin + xmax) / 2 bookY = (ymin + ymax) / 2 objX.value = bookX objY.value = bookY cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # Draw framerate in corner of frame cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) # All the results have been drawn on the frame, so it's time to display it. cv2.imshow('Object detector', frame) # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 # Press 'q' to quit if cv2.waitKey(1) == ord('q'): break # Clean up cv2.destroyAllWindows() videostream.stop()
def setup_ssd_edgetpu(modelParas): # Get Args MODEL_NAME = modelParas[0] GRAPH_NAME = modelParas[1] LABELMAP_NAME = modelParas[2] min_conf_threshold = float(modelParas[3]) resW, resH = modelParas[4:6] imW, imH = int(resW), int(resH) use_TPU = modelParas[6] # Import TensorFlow libraries # If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library # If using Edge TPU, assign filename for Edge TPU model if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 tfParas = [ height, width, floating_model, labels, input_mean, input_std, input_details, min_conf_threshold, imH, imW, interpreter, output_details ] return tfParas
class Classifier: """ Perform image classification with the given model. The model is an int8 quantized tflite file which if the classifier can not find it at the path it will download it from neuralet repository automatically. :param config: Is a ConfigEngine instance which provides necessary parameters. """ def __init__(self, config): self.config = config self.model_name = "OFMClassifier_edgetpu.tflite" self.model_path = '/repo/data/edgetpu/' + self.model_name self.fps = None if not os.path.isfile(self.model_path): url = "https://raw.githubusercontent.com/neuralet/neuralet-models/master/edge-tpu/OFMClassifier/OFMClassifier_edgetpu.tflite" # noqa print("model does not exist under: ", self.model_path, "downloading from ", url) wget.download(url, self.model_path) # Load TFLite model and allocate tensors self.interpreter = Interpreter( self.model_path, experimental_delegates=[load_delegate("libedgetpu.so.1")]) self.interpreter.allocate_tensors() # Get the model input and output tensor details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() def inference(self, resized_rgb_images) -> list: """ Inference function sets input tensor to input image and gets the output. The interpreter instance provides corresponding class id output which is used for creating result Args: resized_rgb_images: Array of images with shape (no_images, img_height, img_width, channels) Returns: result: List of class id for each input image. ex: [0, 0, 1, 1, 0] scores: The classification confidence for each class. ex: [.99, .75, .80, 1.0] """ if np.shape(resized_rgb_images)[0] == 0: return [], [] resized_rgb_images = (resized_rgb_images * 255).astype("uint8") result = [] net_results = [] for img in resized_rgb_images: img = np.expand_dims(img, axis=0) self.interpreter.set_tensor(self.input_details[0]["index"], img) t_begin = time.perf_counter() self.interpreter.invoke() inference_time = time.perf_counter() - t_begin # Second self.fps = convert_infr_time_to_fps(inference_time) net_output = self.interpreter.get_tensor( self.output_details[0]['index'])[0] net_results.append(net_output) result.append(np.argmax(net_output)) # returns class id # TODO: optimized without for scores = [] for i, itm in enumerate(net_results): scores.append((itm[result[i]] - 1) / 255.0) return result, scores
CWD_PATH = os.getcwd() EDGE_TPU = False if EDGE_TPU: face_model_path = 'model/face-detector-quantized_edgetpu.tflite' face_interpreter = Interpreter(model_path=os.path.join(CWD_PATH, face_model_path), experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) else: face_model_path = 'model/face_detection_front.tflite' face_interpreter = Interpreter(model_path=os.path.join(CWD_PATH, face_model_path)) face_interpreter.allocate_tensors() # Get model details face_input_details = face_interpreter.get_input_details()[0] face_output_details = face_interpreter.get_output_details() height = face_input_details['shape'][1] width = face_input_details['shape'][2] # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW, imH)).start() time.sleep(1) anchors = np.load('anchors.npy') while True: # opencv # Start timer (for calculating frame rate) t1 = cv2.getTickCount()
def start(self, Handler): self.thread = Thread(target=self.update, args=()) self.thread.start() self.startTime = time.time() MODEL_NAME = "Sample_TFLite_model" GRAPH_NAME = 'edgetpu.tflite' LABELMAP_NAME = 'labelmap.txt' min_conf_threshold = float(0.5) resW, resH = ('1080x720').split('x') imW, imH = int(resW), int(resH) pkg = importlib.util.find_spec('tensorflow') if pkg is None: from tflite_runtime.interpreter import Interpreter from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter from tensorflow.lite.python.interpreter import load_delegate CWD_PATH = os.getcwd() PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # BUG of Tensorflow: first label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # To save each photo of every run with a different name. # On second boot the photos will be overwritten, in order to not consume too much space pic_counter = 0 # Every 10 frames captured where you spot a thief, take a picture counter = 0 self.SMS_Flag = 0 while True: # Increment time if the flag was changed to 1 if (Handler.getTimeFlag() == 1): self.startTime = time.time() Handler.setTimeFlag(0) print("Time incremented") # Stops everything after 20 seconds from the last timer if (time.time() >= self.startTime + 20): Handler.setCameraState(0, self.MQTT) print("Camera STOPPED after {}".format(time.time() - self.initialTime)) break # Start timer (for calculating frame rate) t1 = cv2.getTickCount() # Grab frame from video stream frame1 = self.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor(output_details[1]['index'])[ 0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]['index'])[ 0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) fakeFlag = 1 # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0) and classes[i] == 0): counter += 1 # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 255), 2) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 1) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1) # Draw label text # Fakeflag for the promo video if (fakeFlag == 0): names = [ "Frame1.png", "Frame3.png", "Frame4.png", "Frame5.png", "Frame6.png" ] requests.post( 'https://api.telegram.org/YOUR_TOKEN/sendMessage', data={ 'chat_id': 297501031, 'text': "Attention, a human presence has been detected in the house!\nHere are the photos:" }) for i in range(len(names)): requests.post( 'https://api.telegram.org/YOUR_TOKEN/sendPhoto', data={'chat_id': 297501031}, files={ 'photo': open('./{}'.format(names[i]), 'rb') }) time.sleep(2) fakeFlag = 1 # Counter for the frame that recognize a person elif (counter > 5): print("ATTENTION, INTRUSION DETECTED!") if self.SMS_Flag == 0: for number in self.numbers: self.MQTT.publish("SMS_ALERT_CAM", json.dumps({"number": number})) self.SMS_Flag = 1 for ID in self.IDs: if (pic_counter == 0): requests.post( 'https://api.telegram.org/YOUR_TOKEN/sendMessage', data={ 'chat_id': ID, 'text': "ATTENTION, a human presence has been detected in the house\nHere are the photos" }) filename = "me_{}.jpg".format(pic_counter) cv2.imwrite(filename, frame) requests.post( 'https://api.telegram.org/YOUR_TOKEN/sendPhoto', data={'chat_id': ID}, files={ 'photo': open('./{}'.format(filename), 'rb') }) counter = 0 pic_counter += 1 # Draw framerate in corner of frame cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 # Clean up self.stop() cv2.destroyAllWindows() print("Window destroyed") Handler.destroyCamera()
class YOLOV5: def __init__(self, wanted_labels=None, model_file=None, label_file=None, num_threads=None, edgetpu=False, libedgetpu=None, score_threshold=0.25): basedir = os.getenv('DEEPDISHHOME','.') if model_file is None: model_file = os.path.join(basedir, 'detectors/yolov5/yolov5s-int8.tflite') if label_file is None: label_file = os.path.join(basedir, 'detectors/yolov5/coco_classes.txt') self.cfg_file = os.path.join(basedir, 'detectors/yolov5/yolov5s.yaml') if wanted_labels is None: wanted_labels = ['person'] self.wanted_labels = wanted_labels self.label_file = label_file self.score_threshold = score_threshold self.labels = self._get_labels() self.use_edgetpu = edgetpu self.int8 = False if 'saved_model' in model_file: self.mode = 'saved_model' if 'keras' not in sys.modules: print('yolov5: saved_model mode requires keras') sys.exit(1) elif '.tflite' in model_file: self.mode = 'tflite' if 'int8' in model_file: self.int8 = True else: print('unable to determine format of yolov5 model') sys.exit(1) if libedgetpu is None: libedgetpu = edgetpu_lib_name() if self.mode == 'tflite': # Load TFLite model and allocate tensors. self.interpreter = Interpreter( model_path=model_file, num_threads=num_threads, experimental_delegates=[load_delegate(libedgetpu)] if self.use_edgetpu else None) self.interpreter.allocate_tensors() self.num_threads = num_threads # Get input and output tensors. self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() _, self.height, self.width, _ = self.input_details[0]['shape'].tolist() elif self.mode == 'saved_model': self.model = keras.models.load_model(model_file) self.num_threads = 1 _, self.height, self.width, _ = self.model.inputs[0].shape.as_list() yaml_file = Path(self.cfg_file) with open(yaml_file) as f: cfg = yaml.load(f, Loader=yaml.FullLoader) self.anchors = cfg['anchors'] def _get_labels(self): labels_path = os.path.expanduser(self.label_file) with open(labels_path) as f: labels = {i: line.strip() for i, line in enumerate(f.readlines())} return labels def detect_image(self, img): img_size = img.size img_resized = img.convert('RGB').resize((self.width, self.height), Image.ANTIALIAS) input_data = np.expand_dims(img_resized, 0).astype(np.float32) if self.int8: scale, zero_point = self.input_details[0]['quantization'] input_data = (input_data / scale + zero_point).astype(np.uint8) if self.mode == 'tflite': self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() output_data = self.interpreter.get_tensor(self.output_details[0]['index']) raw = np.copy(output_data) elif self.mode == 'saved_model': input_data /= 255.0 output_data = self.model(input_data).numpy() if self.int8: scale, zero_point = self.output_details[0]['quantization'] output_data = output_data.astype(np.float32) output_data = (output_data - zero_point) * scale x = np.copy(output_data) boxes = np.copy(x[..., :4]) boxes[..., 0] = x[..., 0] - x[..., 2] / 2 boxes[..., 1] = x[..., 1] - x[..., 3] / 2 boxes[..., 2] = x[..., 0] + x[..., 2] / 2 boxes[..., 3] = x[..., 1] + x[..., 3] / 2 x[..., 5:] *= x[..., 4:5] best_classes = np.expand_dims(np.argmax(x[..., 5:], axis=-1), axis=-1) confidences = np.take_along_axis(x, best_classes + 5, axis=-1) y = np.concatenate((boxes, confidences, best_classes.astype(np.float32)), axis=-1) y = y[np.where(y[..., 4] >= self.score_threshold)] y[...,:4] *= np.array([img_size[0], img_size[1], img_size[0], img_size[1]]) return_boxs = [] return_lbls = [] return_scrs = [] for *xyxy, score, labelidx in y: label=self.labels[int(labelidx)] if label in self.wanted_labels and score >= self.score_threshold: tlwh = np.copy(xyxy) tlwh[2] = xyxy[2] - xyxy[0] tlwh[3] = xyxy[3] - xyxy[1] return_boxs.append(list(tlwh)) return_lbls.append(label) return_scrs.append(score) return (return_boxs, return_lbls, return_scrs)
class WakeWord2: def __init__(self): # Sliding window self.window = np.zeros(int(RECORD_DURATION * RESAMPLE_RATE) * 2) # Load model self.interpreter = Interpreter(WAKEWORD_MODEL_PATH) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() # Thread and flags self.ON = True self.running = True self.wakeword_flag = False self.wakeword_thread = threading.Thread(target=self.wakeword_run, name="wakeword_thread") self.wakeword_thread.start() print('WakeWord Initialized') def close(self): self.running = False self.wakeword_thread.join() return # Background loop that continuously checks for wake words def wakeword_run(self): with sd.InputStream(channels=NUM_CHANNELS, samplerate=SAMPLE_RATE, blocksize=int(SAMPLE_RATE * RECORD_DURATION), callback=self.wakeword_process): while self.running: pass def __call__(self): if self.wakeword_flag: self.wakeword_flag = False return True return False def wakeword_process(self, rec, frames, time, error): # Start timing for testing start_time = timeit.default_timer() # Notify if errors if error: print("Error: ", error) # Remove 2nd dimension from recording sample and downsample rec = np.squeeze(rec) rec = scipy.signal.decimate(rec, DOWNSAMPLE) # Analyze a sliding window if the sound that overlaps with last window by 50% # to catch wake words that might span time segments self.window[:len(self.window) // 2] = self.window[len(self.window) // 2:] self.window[len(self.window) // 2:] = rec # Process image with MFCC (Mel Frequency Cepstrum) that scales the frequency in order # to match more closely what the human ear can hear mfccs = python_speech_features.base.mfcc(self.window, samplerate=RESAMPLE_RATE, winlen=0.256, winstep=0.050, numcep=NUM_MFCC, nfilt=26, nfft=2048, preemph=0.0, ceplifter=0, appendEnergy=False, winfunc=np.hanning) mfccs = mfccs.transpose() # Make prediction from model in_tensor = np.float32( mfccs.reshape(1, mfccs.shape[0], mfccs.shape[1], 1)) self.interpreter.set_tensor(self.input_details[0]['index'], in_tensor) self.interpreter.invoke() output_data = self.interpreter.get_tensor( self.output_details[0]['index']) val = output_data[0][0] # test for the wake word ('go') if val > WORD_THRESHOLD: print('listening') self.wakeword_flag = True if DEBUG_ACC: # print accuracy of each detection print(val) if DEBUG_TIME: # print processing time for a sound clip print(timeit.default_timer() - start_time)
class camera_interface(): """ The main interface for using the camera and determining the grip we need to be in. https://www.hackster.io/gatoninja236/scan-qr-codes-in-real-time-with-raspberry-pi-a5268b Attributes: count (int): Count of saved screenshots. File titles are frame'count'.jpg. cap (cv2 VideoCapture): The VideoCapture object. detector (QRCodeDetector): The QR Code detecting object. """ def __init__(self,resolution=(640,480),framerate=30): self.count = 0 # self.cap = cv2.VideoCapture(0) self.vs = VideoStream(resolution=(1280,720),framerate=30).start() # self.stream = cv2.VideoCapture(0) # ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) # ret = self.stream.set(3,resolution[0]) # ret = self.stream.set(4,resolution[1]) #Wait for the camera to startup for one seconds time.sleep(1) print("[INFO] Created video capture object") print("[INFO] loading model...") #Load the tflite model and labelmap # Get path to current working directory GRAPH_NAME = "detect.tflite" MODEL_NAME = "Camera_Interpreter/Coco" LABELMAP_NAME = "labelmap.txt" CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if self.labels[0] == '???': del(self.labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument use_TPU = False if use_TPU: self.interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 # QR code detection object # self.detector = cv2.QRCodeDetector() self.cam_data = "" self.object_spotted = False self.test_count = 0 self.killed_thread = False self.cam_image = None self.cam_image_index = 0 self.object_spotted_T0 = 0 self.object_not_spotted_delta_req = 3 #Initialize the paused flag to false self.temp_pause = False def camera_read_threader(self): #Start the read cam thread read_cam = threading.Thread(target=self.read_cam_thread, args=()) read_cam.start() while(self.cam_image_index == 0): time.sleep(0.05) #Start the image decode thread decoder = threading.Thread(target=self.decode_image_thread, args=()) decoder.start() while not self.killed_thread and read_cam.is_alive() and decoder.is_alive(): time.sleep(0.25) #Flag is thrown or error, so ensure flag is thrown and wait for threads to join self.killed_thread = True read_cam.join() decoder.join() def decode_image_thread(self): previous_index = None while not self.killed_thread: #Detect and decode the stored image if it's ready # t = time.time() if(previous_index != self.cam_image_index and (not self.temp_pause)): previous_index = self.cam_image_index # data, _, _ = self.detector.detectAndDecode(self.cam_image) Deprecated QR Code reader data, score = self.detect_main_object(self.cam_image) # print("[INFO] Camera objects: " + data) # if(data not in grips._value2member_map_): # data = grips.openGrip.value #If the camera sees an object, skip the time requirement if(data != ""): self.cam_data = data self.object_spotted_T0 = time.time() self.object_spotted = True #If the camera doesn't see an object, require a delay before reporting nothing else: if((time.time() - self.object_spotted_T0) > self.object_not_spotted_delta_req): # print("[DEBUG] Delta Req passed; reporting no object now") self.cam_data = data self.object_spotted = False #####No sleep since detecting/decoding takes significant time, just do it as fast as possible # print("[INFO] Time to decode image: " + (str(time.time() - t))) time.sleep(0.01) def detect_main_object(self, frame1): min_conf_threshold = 0.35 # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'],input_data) self.interpreter.invoke() # Retrieve detection results # boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0] # Confidence of detected objects highest_scoring_label = "" highest_score = 0 for i in range(len(scores)): object_name = self.labels[int(classes[i])] # Look up object name from "labels" array using class index if((scores[i] > min_conf_threshold) and (scores[i] <= 1.0) and (scores[i] > highest_score) and (object_name in grips._value2member_map_)): # Draw label highest_scoring_label = object_name highest_score = scores[i] return (highest_scoring_label, highest_score) def read_cam_thread(self): while not self.killed_thread: if(not self.temp_pause): # t = time.time() #Get camera image, rescale, and store in class variable frame = self.vs.read() self.cam_image = imutils.resize(frame, width=400) #Increase index by 1 self.cam_image_index += 1 #Pause temply time.sleep(0.2) # print("Time to save/resize new image: " + (str(time.time() - t))) # def read_cam(self): # # get the image # _, img = self.cap.read() #TODO: #14 Downscale the resolution for faster processing # # get bounding box coords and data # data, bbox, _ = self.detector.detectAndDecode(img) # #Define a parameter we can easily read later if anything is detected # is_object = False # #Update parameter/output the data we found, if any # if data: # #print("data found: ", data) # is_object = True # #return the information we got from the camera # # cv2.imwrite("frame1.jpg", img) # save frame as JPEG file # return data, bbox, img, is_object # def read_cam_display_out(self): # #Call the standard method to get the qr data / bounding box # data, bbox, img, _ = self.read_cam() # # if there is a bounding box, draw one, along with the data # if(bbox is not None): # for i in range(len(bbox)): # cv2.line(img, tuple(bbox[i][0]), tuple(bbox[(i+1) % len(bbox)][0]), color=(255, # 0, 255), thickness=2) # cv2.putText(img, data, (int(bbox[0][0][0]), int(bbox[0][0][1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, # 0.5, (0, 255, 0), 2) # #if data: # #print("data found: ", data) # # display the image preview # cv2.imshow("code detector", img) # # save the image # cv2.imwrite("frame1.jpg", img) # save frame as JPEG file # #self.count += 1 def end_camera_session(self): #Stop the camera thread self.killed_thread = True time.sleep(0.1) #Release the camera object self.vs.stop()
def approximation(limit): detect = 0 MODEL_NAME = 'obj_detection_tflite' GRAPH_NAME = 'detect.tflite' LABELMAP_NAME = 'labelmap.txt' min_conf_threshold = 0.6 imW, imH = 1280, 720 pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter else: from tensorflow.lite.python.interpreter import Interpreter CWD_PATH = os.getcwd() PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] if labels[0] == '???': del (labels[0]) interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 pi_camera = PiCamera(resolution=(imW, imH), framerate=30).start() time.sleep(1) p_height = 0 p_width = 0 detections = 0 approximation_detected = False timer_mark = timer_start = time.time() while timer_mark - timer_start < limit: print(timer_mark - timer_start) frame1 = pi_camera.read() frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detections objects classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detections objects scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detections objects for i in range(len(scores)): if (scores[i] > min_conf_threshold) and (scores[i] <= 1.0): y_min = int(max(1, (boxes[i][0] * imH))) x_min = int(max(1, (boxes[i][1] * imW))) y_max = int(min(imH, (boxes[i][2] * imH))) x_max = int(min(imW, (boxes[i][3] * imW))) object_name = labels[int(classes[i])] if object_name == 'car' or object_name == 'bus' or object_name == 'truck': detections += 1 if (y_max - y_min) > p_height * 1.15 or (x_max - x_min) > p_width * 1.15\ and detections > 1: play_sound_notification("waiting") limit += 3 p_height = y_max - y_min p_width = x_max - x_min timer_mark = time.time() cv2.destroyAllWindows() pi_camera.stop()
img = cv2.imread('Capture.png') h = img.shape[0] w = img.shape[1] img = cv2.resize(img, (256, 144)) img = np.asarray(img) img = img / 255. img = img.astype(np.float32) img = img[np.newaxis, :, :, :] # Tensorflow Lite interpreter = Interpreter(model_path='model_float16_quant.tflite', num_threads=4) interpreter.allocate_tensors() input_details = interpreter.get_input_details()[0]['index'] output_details = interpreter.get_output_details()[0]['index'] interpreter.set_tensor(input_details, img) interpreter.invoke() output = interpreter.get_tensor(output_details) print(output.shape) out1 = output[0][:, :, 0] out2 = output[0][:, :, 1] out1 = np.invert((out1 > 0.5) * 255) out2 = np.invert((out2 > 0.5) * 255) print('out1:', out1.shape) print('out2:', out2.shape)
def detectPenKey(img): # parser = argparse.ArgumentParser() # parser.add_argument('--modeldir', help='Folder the .tflite file is located in', # default='models\\model_objDetec\\penKeyModel') # parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', # default='model-9020516539576614912_tflite_2021-04-01T07_44_31.691148Z_model.tflite') # parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', # default='labels.txt') # parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', # default=0.5) # parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir', # default=None) # parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.', # default=None) # parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', # action='store_true') # # args = parser.parse_args() listOfObjDetec = [] MODEL_NAME = "models\\model_objDetec\\penKeyModel" GRAPH_NAME = "model-9020516539576614912_tflite_2021-04-01T07_44_31.691148Z_model.tflite" LABELMAP_NAME = "labels.txt" min_conf_threshold = float(0.5) use_TPU = False # # Parse input image name and directory. # IM_NAME = args.image # IM_DIR = args.imagedir # # # If both an image AND a folder are specified, throw an error # if (IM_NAME and IM_DIR): # print('Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.') # sys.exit() # # # If neither an image or a folder are specified, default to using 'test1.jpg' for image name # if (not IM_NAME and not IM_DIR): # IM_NAME = 'keys11.jpg' # # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Define path to images and grab all image filenames # if IM_DIR: # PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_DIR) # images = glob.glob(PATH_TO_IMAGES + '/*') # # elif IM_NAME: # PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_NAME) # images = glob.glob(PATH_TO_IMAGES) # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Loop over every image and perform detection # for image_path in images: # Load image and resize to expected shape [1xHxWx3] image = img image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (width, height)) input_data = np.expand_dims(image_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: print("hello") input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # getting label/class object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index print("detected:", object_name, ":", int(scores[i] * 100)) listOfObjDetec.append(object_name) # debug ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label # object_name = labels[int(classes[i])] # Look up object name from "labels" array using class label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window cv2.rectangle(img, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(img, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text if listOfObjDetec: print(listOfObjDetec) objDict = dict(Counter(listOfObjDetec)) print(objDict) strg = "Detected " for i in objDict: print(i) strg += "" + str(objDict[i]) + " " + i + ", " print(strg) # All the results have been drawn on the image, now display the image cv2.imshow('Object detector', img) cv2.waitKey(0) cv2.destroyAllWindows() # Press any key to continue to next image, or press 'q' to quit return strg else: return "No Objects Detected"
def get_item_dictionary(): # Define and parse input arguments parser = argparse.ArgumentParser() # parser.add_argument('--modeldir', help='Folder the .tflite file is located in', # required=True) parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite') parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt') parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5) parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir', default=None) parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.', default=None) parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true') args = parser.parse_args() MODEL_NAME = "Sample_TFLite_model" GRAPH_NAME = args.graph LABELMAP_NAME = args.labels min_conf_threshold = float(args.threshold) use_TPU = args.edgetpu # Parse input image name and directory. IM_NAME = args.image IM_DIR = args.imagedir # If both an image AND a folder are specified, throw an error if (IM_NAME and IM_DIR): print('Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.') sys.exit() # If neither an image or a folder are specified, default to using 'test1.jpg' for image name if (not IM_NAME and not IM_DIR): import picamera # print("about to take a photo") with picamera.PiCamera() as camera: camera.resolution = (1280,720) camera.capture("/home/pi/Desktop/tflite1/test_picam.jpg") # print("taken photo") IM_NAME = '/home/pi/Desktop/tflite1/CS190_P2/test_image.jpg' # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Define path to images and grab all image filenames if IM_DIR: PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_DIR) images = glob.glob(PATH_TO_IMAGES + '/*') elif IM_NAME: PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_NAME) images = glob.glob(PATH_TO_IMAGES) # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = '/home/pi/Desktop/tflite1/CS190_P2/src/edge/Sample_TFLite_model/detect.tflite' # Path to label map file PATH_TO_LABELS = '/home/pi/Desktop/tflite1/CS190_P2/src/edge/Sample_TFLite_model/labelmap.txt' # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del(labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 result_label = [] # Loop over every image and perform detection for image_path in images: # Load image and resize to expected shape [1xHxWx3] image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (width, height)) input_data = np.expand_dims(image_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'],input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1,(boxes[i][0] * imH))) xmin = int(max(1,(boxes[i][1] * imW))) ymax = int(min(imH,(boxes[i][2] * imH))) xmax = int(min(imW,(boxes[i][3] * imW))) cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2) # Draw label object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index if object_name == 'potted plant': object_name = 'pineapple' label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%' result_label.append(object_name) labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window cv2.rectangle(image, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(image, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # All the results have been drawn on the image, now display the image # cv2.imshow('Object detector', image) cv2.imwrite("detected_test.jpg", image) print(Counter(result_label)) # Press any key to continue to next image, or press 'q' to quit # # if cv2.waitKey(0) == ord('q'): # break # Clean up # cv2.destroyAllWindows() return dict(Counter(result_label))
class ImageDetection: def __init__(self, modeldir): GRAPH_NAME = 'detect.tflite' LABELMAP_NAME = 'labelmap.txt' CWD_PATH = os.getcwd() PATH_TO_CKPT = os.path.join(CWD_PATH, modeldir, GRAPH_NAME) PATH_TO_LABELS = os.path.join(CWD_PATH, modeldir, LABELMAP_NAME) with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] if self.labels[0] == '???': del (self.labels[0]) self.min_conf_threshold = 0.6 self.input_mean = 127.5 self.input_std = 127.5 self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) def detect(self, image_path): image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) im_h, im_w, _ = image.shape image_resized = cv2.resize(image_rgb, (self.width, self.height)) input_data = np.expand_dims(image_resized, axis=0) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] classes = self.interpreter.get_tensor( self.output_details[1]['index'])[0] scores = self.interpreter.get_tensor( self.output_details[2]['index'])[0] detect_text = "" for i in range(len(scores)): if self.min_conf_threshold < scores[i] <= 1.0: ymin = int(max(1, (boxes[i][0] * im_h))) xmin = int(max(1, (boxes[i][1] * im_w))) ymax = int(min(im_h, (boxes[i][2] * im_h))) xmax = int(min(im_w, (boxes[i][3] * im_w))) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) object_name = self.labels[int(classes[i])] label = '%s: %d%%' % (object_name, int(scores[i] * 100)) label_size, base_line = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) label_ymin = max(ymin, label_size[1] + 10) cv2.rectangle( image, (xmin, label_ymin - label_size[1] - 10), (xmin + label_size[0], label_ymin + base_line - 10), (255, 255, 255), cv2.FILLED) cv2.putText(image, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) detect_text = detect_text + " " + object_name cv2.imshow('Detector', image) os.system('echo %s | festival --tts & ' % detect_text) sleep(5) cv2.destroyAllWindows() return
def object_detection(): label_out = [] mid_x_out = [] mid_y_out = [] class VideoStream: """Camera object that controls video streaming from the Picamera""" def __init__(self, resolution=(640, 480), framerate=30): # Initialize the PiCamera and the camera image stream self.stream = cv2.VideoCapture(0) ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) ret = self.stream.set(3, resolution[0]) ret = self.stream.set(4, resolution[1]) # Read first frame from the stream (self.grabbed, self.frame) = self.stream.read() # Variable to control when the camera is stopped self.stopped = False def start(self): # Start the thread that reads frames from the video stream Thread(target=self.update, args=()).start() return self def update(self): # Keep looping indefinitely until the thread is stopped while True: # If the camera is stopped, stop the thread if self.stopped: # Close camera resources self.stream.release() return # Otherwise, grab the next frame from the stream (self.grabbed, self.frame) = self.stream.read() def read(self): # Return the most recent frame return self.frame def stop(self): # Indicate that the camera and thread should be stopped self.stopped = True # Define and parse input arguments parser = argparse.ArgumentParser() parser.add_argument('--modeldir', help='Folder the .tflite file is located in', required=True) parser.add_argument( '--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite') parser.add_argument( '--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt') parser.add_argument( '--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5) parser.add_argument( '--resolution', help= 'Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.', default='1280x720') parser.add_argument( '--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true') args = parser.parse_args() MODEL_NAME = args.modeldir GRAPH_NAME = args.graph LABELMAP_NAME = args.labels min_conf_threshold = float(args.threshold) resW, resH = args.resolution.split('x') imW, imH = int(resW), int(resH) use_TPU = args.edgetpu # Import TensorFlow libraries # If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tensorflow') if pkg is None: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW, imH), framerate=30).start() time.sleep(1) #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): while True: flag = 0 # Start timer (for calculating frame rate) t1 = cv2.getTickCount() # Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) cv2.circle(frame, (xmin, ymin), 5, (255, 255, 0), cv2.FILLED) cv2.circle(frame, (xmax, ymax), 5, (0, 255, 255), cv2.FILLED) x_diff = xmax - xmin y_diff = ymax - ymin mid_x = x_diff / 2 + xmin mid_x = math.ceil(mid_x) mid_y = ymin + y_diff / 2 mid_y = math.ceil(mid_y) cv2.circle(frame, (0, 0), 5, (0, 0, 255), cv2.FILLED) cv2.circle(frame, (mid_x, mid_y), 5, (255, 255, 255), cv2.FILLED) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text label_out.append(label) mid_x_out.append(mid_x) mid_y_out.append(mid_y) # Draw framerate in corner of frame cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) # All the results have been drawn on the frame, so it's time to display it. #cv2.imshow('Object detector', frame) # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 (h, w) = frame.shape[:2] cv2.waitKey(100) break # Clean up cv2.destroyAllWindows() videostream.stop() return (label_out, mid_x_out, mid_y_out, h / 2, w / 2)
class Detection: def __init__(self): self.MODEL_NAME = "detect" self.GRAPH_NAME = "detect.tflite" self.LABELMAP_NAME = "label_map.txt" self.min_conf_threshold = 0.70 self.resW, self.resH = (1280, 720) self.imW, self.imH = int(self.resW), int(self.resH) # self.use_TPU = (True if 'projects' in str(os.getcwd()) else False) self.use_TPU = False self.frame_rate_calc = None self.item_detected = False self.latest_item = None self.detection_counter = [ { "name": "apple", "counter": 0 }, { "name": "aubergine", "counter": 0 }, { "name": "banana", "counter": 0 }, { "name": "broccoli", "counter": 0 }, { "name": "cucumber", "counter": 0 }, { "name": "orange", "counter": 0 }, { "name": "paprika", "counter": 0 }, { "name": "pear", "counter": 0 } ] # Import TFLite requirements self.pkg = importlib.util.find_spec('tflite_runtime') if self.pkg: from tflite_runtime.interpreter import Interpreter if self.use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if self.use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if self.use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (self.GRAPH_NAME == 'detect.tflite'): self.GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() PATH_TO_CKPT = "/home/pi/projects/smartcart-device/dojo/tflite/{}".format(self.GRAPH_NAME) PATH_TO_LABELS = "/home/pi/projects/smartcart-device/dojo/tflite/{}".format( self.LABELMAP_NAME) PATH_TO_OBJ_NAMES = "/home/pi/projects/smartcart-device/dojo/yolo/yolov4_smartcart/tflite/coco.names" # Load the label map with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] # Fix for potential label map issue if self.labels[0] == '???': del (self.labels[0]) if self.use_TPU: self.interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() print("Model loaded and tensors allocated") # Get model details self.input_details = self.interpreter.get_input_details() #print("Input details: {}".format(self.input_details)) self.output_details = self.interpreter.get_output_details() #print("Output detais: {}".format(self.output_details)) self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 # Initialize frame rate calculation self.frame_rate_calc = 1 self.freq = cv2.getTickFrequency() # Initialize video stream self.videostream = VideoStream(resolution=(self.imW, self.imH)) self.videostream = self.videostream.start() def filter_boxes(self, box_xywh, scores, score_threshold=0.4, input_shape=tf.constant([416, 416])): scores_max = tf.math.reduce_max(scores, axis=-1) mask = scores_max >= score_threshold class_boxes = tf.boolean_mask(box_xywh, mask) pred_conf = tf.boolean_mask(scores, mask) class_boxes = tf.reshape(class_boxes, [tf.shape(scores)[0], -1, tf.shape(class_boxes)[-1]]) pred_conf = tf.reshape(pred_conf, [tf.shape(scores)[0], -1, tf.shape(pred_conf)[-1]]) box_xy, box_wh = tf.split(class_boxes, (2, 2), axis=-1) input_shape = tf.cast(input_shape, dtype=tf.float32) box_yx = box_xy[..., ::-1] box_hw = box_wh[..., ::-1] box_mins = (box_yx - (box_hw / 2.)) / input_shape box_maxes = (box_yx + (box_hw / 2.)) / input_shape boxes = tf.concat([ box_mins[..., 0:1], # y_min box_mins[..., 1:2], # x_min box_maxes[..., 0:1], # y_max box_maxes[..., 1:2] # x_max ], axis=-1) # return tf.concat([boxes, pred_conf], axis=-1) return (boxes, pred_conf) def read_class_names(self, class_file_name): names = {} with open(class_file_name, 'r') as data: for ID, name in enumerate(data): names[ID] = name.strip('\n') return names # TODO: Definde cfg.YOLO.CLASSES def draw_bbox(self, image, bboxes, classes, show_label=True): num_classes = len(classes) image_h, image_w, _ = image.shape hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) random.seed(0) random.shuffle(colors) random.seed(None) out_boxes, out_scores, out_classes, num_boxes = bboxes for i in range(num_boxes[0]): if int(out_classes[0][i]) < 0 or int(out_classes[0][i]) > num_classes: continue coor = out_boxes[0][i] coor[0] = int(coor[0] * image_h) coor[2] = int(coor[2] * image_h) coor[1] = int(coor[1] * image_w) coor[3] = int(coor[3] * image_w) fontScale = 0.5 score = out_scores[0][i] class_ind = int(out_classes[0][i]) bbox_color = colors[class_ind] bbox_thick = int(0.6 * (image_h + image_w) / 600) c1, c2 = (coor[1], coor[0]), (coor[3], coor[2]) cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) if show_label: bbox_mess = '%s: %.2f' % (classes[class_ind], score) t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0] c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3) cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1) # filled cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA) return image def perform(self): while True: t1 = cv2.getTickCount() frame1 = self.videostream.read() print("Frame read from stream") frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) # input_data = np.expand_dims(frame_resized, axis=0) image_data = cv2.resize(frame, (608, 608)) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) # if self.floating_model: # input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], images_data) print("Performing detection") self.interpreter.invoke() print("Detection performed") pred = [self.interpreter.get_tensor(self.output_details[i]['index']) for i in range(len(self.output_details))] boxes, pred_conf = self.filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([608, 608])) boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=0.3, # TODO: Make var score_threshold=0.3 # TODO: Make var ) pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] class_names = self.read_class_names( "/home/pi/projects/smartcart-device/dojo/yolo/yolov4_smartcart/tflite/coco.names") print("Drawing bounding boxes") frame = self.draw_bbox(frame, pred_bbox, class_names) #frame = Image.fromarray(frame.astype(np.uint8)) # cv2.imshow('Object detector',frame.astype(np.uint8)) time.sleep(5) image = cv2.cvtColor(np.array(frame), cv2.COLOR_BGR2RGB) if cv2.waitKey(1) == ord('x'): break if self.item_detected: break return self.item_detected, self.latest_item def run(self, cloud=False): #while True: # for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): # Start timer (for calculating frame rate) t1 = cv2.getTickCount() # Grab frame from video stream frame1 = self.videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) if cloud: # TODO: Send image to cloud and get data back content_type = 'image/jpeg' headers = {'content-type': content_type} _, img_encoded = cv2.imencode('.jpg', frame_rgb) request_address = "http://a24dcb00998c.ngrok.io/api/detect" # send http request with image and receive response print("Sending image to cloud api and awaiting response") response = requests.post(request_address, data=img_encoded.tostring(), headers=headers) print("Response received:") print(json.loads(response.text)) else: input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) #print("Detection started") self.interpreter.invoke() #print("Detection complete") # Retrieve detection results #print(self.output_details) boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] # Bounding coordinates of objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0] # Confidence of detected objects num = self.interpreter.get_tensor(self.output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) max_score = 0 # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)): # Specify that item has been detected #self.item_detected = True #if scores[i] > max_score: #max_score = scores[i] #self.latest_item = self.labels[int(classes[i])] # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * self.imH))) xmin = int(max(1, (boxes[i][1] * self.imW))) ymax = int(min(self.imH, (boxes[i][2] * self.imH))) xmax = int(min(self.imW, (boxes[i][3] * self.imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label object_name = self.labels[int(classes[i])] # Look up object name from "labels" array using class index self.increase_detection_counter(object_name, scores[i]) label = '%s: %d%%' % (object_name, int(scores[i] * 100)) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window cv2.rectangle(frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # Draw framerate in corner of frame cv2.putText(frame, 'FPS: {0:.2f}'.format(self.frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) # All the results have been drawn on the frame, so it's time to display it. cv2.imshow('Object detector', frame) if cv2.waitKey(1) == ord('x'): cv2.destroyAllWindows() #break # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / self.freq self.frame_rate_calc = 1 / time1 self.item_detected, self.latest_item = self.get_object_with_score_five() if self.item_detected: self.reset_detection_counter() return self.item_detected, self.latest_item def increase_detection_counter(self, detected_item, score): for object in self.detection_counter: if object["name"] == detected_item: object["counter"]+=score def get_object_with_score_five(self): max_score = 0 latest_object = "None" detected_object = False for object in self.detection_counter: if object["counter"] >= 5 and object["counter"] > max_score: latest_object = object["name"] detected_object = True max_score = object["counter"] return detected_object, latest_object def reset_detection_counter(self): self.detection_counter = [ { "name": "apple", "counter": 0 }, { "name": "aubergine", "counter": 0 }, { "name": "banana", "counter": 0 }, { "name": "broccoli", "counter": 0 }, { "name": "cucumber", "counter": 0 }, { "name": "orange", "counter": 0 }, { "name": "paprika", "counter": 0 }, { "name": "pear", "counter": 0 } ] def destroy(self): # Clean up cv2.destroyAllWindows() self.videostream.stop()
def objectsCount(MODEL_NAME, GRAPH_NAME, LABELMAP_NAME, min_conf_threshold, use_TPU, IM_NAME, IM_DIR): import os import cv2 import numpy as np import sys import glob import importlib.util # If both an image AND a folder are specified, throw an error if (IM_NAME and IM_DIR): print( 'Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.' ) sys.exit() # If neither an image or a folder are specified, default to using 'test1.jpg' for image name if (not IM_NAME and not IM_DIR): IM_NAME = 'test1.jpg' # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # Get path to current working directory CWD_PATH = os.getcwd() # Define path to images and grab all image filenames if IM_DIR: PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_DIR) images = glob.glob(PATH_TO_IMAGES + '/*') elif IM_NAME: PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_NAME) images = glob.glob(PATH_TO_IMAGES) # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 objects_list = { } #create the dictionary where the traffic names and number of cars detected will be saved # Loop over every image and perform detection for image_path in images: # Load image and resize to expected shape [1xHxWx3] image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (width, height)) input_data = np.expand_dims(image_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) objects_count = 0 #instantiate detected object counts # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index if (object_name == 'car'): objects_count = objects_count + 1 #get the count of cars detected in the image objects_list[image_path] = objects_count return (objects_list)
class CamDetect: def __init__(self, camera): self.camera = camera MODEL_DIR = 'models/coco_ssd_1' MODEL_NAME = 'detect.tflite' LABELMAP_NAME = 'labelmap.txt' PATH_TO_MODEL = os.path.join(MODEL_DIR, MODEL_NAME) PATH_TO_LABELS = os.path.join(MODEL_DIR, LABELMAP_NAME) self.min_conf_threshold = .40 # Load Labels with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if self.labels[0] == '???': del (self.labels[0]) # Load Model self.interpreter = Interpreter(model_path=PATH_TO_MODEL) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] print('MODEL IMAGE SHAPE: ', self.input_details[0]['shape']) self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.obj_flag = False # True for found new objects self.new_item = None self.ON = True # A False value ends the barcode loop and the thread self.running = True self.detection_thread = threading.Thread(target=self.detection_loop, name="detection_loop") self.detection_thread.start() print("Object Detection initialized") def __call__(self): if self.obj_flag: self.obj_flag = False return True return False # Get the latest item based on upc scan search def get_item(self): return self.new_item def close(self): # Clean up self.ON = False cv2.destroyAllWindows() def detection_loop(self): print("detection loop started") while self.ON: # Read frame from camera connection frame = self.camera.read_frame() in_height, in_width, in_channels = frame.shape frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - 127.5) / 127.5 print('Floating model') # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() # Retrieve detection results boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0] # Confidence of detected objects # Loop over all detections and draw detection box if confidence is above minimum threshold ilist = [51, 52, 53, 54, 55, 56, 57, 58, 59, 60] object_name = ' ' top_score = 0 top_name = ' ' last_name = ' ' for i in range(len(scores)): if ((int(classes[i]) in ilist) and (scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(0, boxes[i][0]) * in_height) xmin = int(max(0, boxes[i][1]) * in_width) ymax = int(min(1, boxes[i][2]) * in_height) xmax = int(min(1, boxes[i][3]) * in_width) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 1) # Draw label into **input frame** object_name = self.labels[int(classes[i])] # Look up object name from "labels" array using class index item_txt = '%s: %d%%' % (object_name, int(scores[i] * 100)) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(item_txt, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window cv2.rectangle(frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, item_txt, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) # Draw label text # Draw circle in center xcenter = xmin + (int(round((xmax - xmin) / 2))) ycenter = ymin + (int(round((ymax - ymin) / 2))) cv2.circle(frame, (xcenter, ycenter), 5, (0, 0, 255), thickness=-1) if scores[i] > top_score: top_score = scores[i] top_name = self.labels[int(classes[i])] # Print info print('Object ', str(classes[i]), ': ', object_name, 'score:', scores[i]) # SELECT WHICH ITEM GETS SAVED if top_name != last_name: last_name = top_name object_item = parsepy.item() object_item.name = top_name object_item.upc = ' ' object_item.imageURL = ' ' print("writing new object", object_item.name) self.new_item = object_item self.obj_flag = True # All the results have been drawn on the frame, so it's time to display it. if SHOW_DETECTION_VIDEO and in_height > 0: out_scale_fct = 1 frame = cv2.resize(frame, (int(in_width * out_scale_fct), int(in_height * out_scale_fct))) frame = cv2.normalize(frame, frame, 0, 255, cv2.NORM_MINMAX) cv2.imshow('Objects', frame) cv2.moveWindow('Objects', 10, 10) cv2.waitKey(200)