def approximation(limit): detect = 0 MODEL_NAME = 'obj_detection_tflite' GRAPH_NAME = 'detect.tflite' LABELMAP_NAME = 'labelmap.txt' min_conf_threshold = 0.6 imW, imH = 1280, 720 pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter else: from tensorflow.lite.python.interpreter import Interpreter CWD_PATH = os.getcwd() PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] if labels[0] == '???': del (labels[0]) interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 pi_camera = PiCamera(resolution=(imW, imH), framerate=30).start() time.sleep(1) p_height = 0 p_width = 0 detections = 0 approximation_detected = False timer_mark = timer_start = time.time() while timer_mark - timer_start < limit: print(timer_mark - timer_start) frame1 = pi_camera.read() frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detections objects classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detections objects scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detections objects for i in range(len(scores)): if (scores[i] > min_conf_threshold) and (scores[i] <= 1.0): y_min = int(max(1, (boxes[i][0] * imH))) x_min = int(max(1, (boxes[i][1] * imW))) y_max = int(min(imH, (boxes[i][2] * imH))) x_max = int(min(imW, (boxes[i][3] * imW))) object_name = labels[int(classes[i])] if object_name == 'car' or object_name == 'bus' or object_name == 'truck': detections += 1 if (y_max - y_min) > p_height * 1.15 or (x_max - x_min) > p_width * 1.15\ and detections > 1: play_sound_notification("waiting") limit += 3 p_height = y_max - y_min p_width = x_max - x_min timer_mark = time.time() cv2.destroyAllWindows() pi_camera.stop()
# Make pedictions and time it for i in range(N): print(f"On step {i}/{N}...") data_tmp = data[(i) * batch_size + 1:(i + 1) * batch_size, :, :, :] #data_tmp = data_tmp[np.newaxis, :, :, :] print("Shape of data_tmp:", data_tmp.shape) data_tmp = np.array(data_tmp, dtype=np.float32) if i == 0: interpreter.resize_tensor_input(0, [data_tmp.shape[0], 5, 512, 1]) interpreter.allocate_tensors() t0 = time.time() interpreter.set_tensor(input_details[0]['index'], data_tmp) interpreter.invoke() t = time.time() - t0 if i != 0: times.append(t) print(times) mean = np.mean(times) std = np.std(times) times_mean.append(mean) times_std.append(std)
def humancheck(self): human = False #define a name for the snapshot by giving a number img = "/home/pi/Desktop/snapshots/" + str(self.pic_name) + ".jpg" self.camera.capture(img) self.pic_name = self.pic_name + 1 # Define and parse input argumets MODEL_NAME = 'Sample_TFLite_model' GRAPH_NAME = 'detect.tflite' LABELMAP_NAME = 'labelmap.txt' min_conf_threshold = 0.5 # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter else: from tensorflow.lite.python.interpreter import Interpreter # Get path to current working directory CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. del (labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Load image and resize to expected shape [1xHxWx3] image = cv2.imread(img) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (height, width)) input_data = np.expand_dims(image_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index if object_name == 'person': #if label is person stop looking and label it as a human human = True break if human == True: shutil.move(img, img[:27] + "humans/" + img[27:]) print('Human found!', scores[i] * 100, '%') else: print('No humans found')
class TFInferenceEngine: """Thin wrapper around TFLite Interpreter. The official TFLite API is moving fast and still changes frequently. This class intends to abstract out underlying TF changes to some extend. It dynamically detects if EdgeTPU is available and uses it. Otherwise falls back to TFLite Runtime. """ def __init__(self, model=None, labels=None, confidence_threshold=0.8, top_k=10): """Create an instance of Tensorflow inference engine. :Parameters: ---------- model: dict { 'tflite': path, 'edgetpu': path, } Where path is of type string and points to the location of frozen graph file (AI model). labels : string Location of file with model labels. confidence_threshold : float Inference confidence threshold. top_k : type Inference top-k threshold. """ assert model assert model['tflite'], 'TFLite AI model path required.' model_tflite = model['tflite'] assert os.path.isfile(model_tflite), \ 'TFLite AI model file does not exist: {}' \ .format(model_tflite) self._model_tflite_path = model_tflite model_edgetpu = model.get('edgetpu', None) if model_edgetpu: assert os.path.isfile(model_edgetpu), \ 'EdgeTPU AI model file does not exist: {}' \ .format(model_edgetpu) self._model_edgetpu_path = model_edgetpu assert labels, 'AI model labels path required.' assert os.path.isfile(labels), \ 'AI model labels file does not exist: {}' \ .format(labels) self._model_labels_path = labels self._confidence_threshold = confidence_threshold self._top_k = top_k log.debug( 'Loading AI model:\n' 'TFLite graph: %r\n' 'EdgeTPU graph: %r\n' 'Labels %r.' 'Condidence threshod: %.0f%%' 'top-k: %d', model_tflite, model_edgetpu, labels, confidence_threshold * 100, top_k) # EdgeTPU is not available in testing and other environments # load dynamically as needed # edgetpu_class = 'DetectionEngine' # module_object = import_module('edgetpu.detection.engine', # packaage=edgetpu_class) # target_class = getattr(module_object, edgetpu_class) self._tf_interpreter = _get_edgetpu_interpreter(model=model_edgetpu) if not self._tf_interpreter: log.debug('EdgeTPU not available. Will use TFLite CPU runtime.') self._tf_interpreter = Interpreter(model_path=model_tflite) assert self._tf_interpreter self._tf_interpreter.allocate_tensors() # check the type of the input tensor self._tf_input_details = self._tf_interpreter.get_input_details() self._tf_output_details = self._tf_interpreter.get_output_details() self._tf_is_quantized_model = \ self.input_details[0]['dtype'] != np.float32 @property def input_details(self): return self._tf_input_details @property def output_details(self): return self._tf_output_details @property def is_quantized(self): return self._tf_is_quantized_model @property def labels_path(self): """ Location of labels file. :Returns: ------- string Path to AI model labels. """ return self._model_labels_path @property def confidence_threshold(self): """ Inference confidence threshold. :Returns: ------- float Confidence threshold for inference results. Only results at or above this threshold should be returned by each engine inference. """ return self._confidence_threshold @property def top_k(self): """ Inference top-k threshold. :Returns: ------- int Max number of results to be returned by each inference. Ordered by confidence score. """ return self._top_k def infer(self): """Invoke model inference on current input tensor.""" return self._tf_interpreter.invoke() def set_tensor(self, index=None, tensor_data=None): """Set tensor data at given reference index.""" assert isinstance(index, int) self._tf_interpreter.set_tensor(index, tensor_data) def get_tensor(self, index=None): """Return tensor data at given reference index.""" assert isinstance(index, int) return self._tf_interpreter.get_tensor(index)
class objectRecog(): def __init__(self): self.MODEL_NAME = 'objectRecog/model' self.GRAPH_NAME = 'detect.tflite' self.LABELMAP_NAME = 'labelmap.txt' self.min_conf_threshold = 0.5 self.resW, self.resH = '1280x720'.split('x') self.imW, self.imH = int(self.resW), int(self.resH) self.use_TPU = False self.pkg = importlib.util.find_spec('tflite_runtime') if self.pkg: from tflite_runtime.interpreter import Interpreter if self.use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if self.use_TPU: from tensorflow.lite.python.interpreter import load_delegate if self.use_TPU: if (self.GRAPH_NAME == 'detect.tflite'): self.GRAPH_NAME = 'edgetpu.tflite' CWD_PATH = os.getcwd() PATH_TO_CKPT = os.path.join(CWD_PATH, self.MODEL_NAME, self.GRAPH_NAME) PATH_TO_LABELS = os.path.join(CWD_PATH, self.MODEL_NAME, self.LABELMAP_NAME) print(PATH_TO_LABELS) with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] if self.labels[0] == '???': del (self.labels[0]) if self.use_TPU: self.interpreter = s = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 def detObjects(self, frame1): frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) input_data = np.expand_dims(frame_resized, axis=0) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() # Retrieve detection results boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[ 0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[ 0] # Confidence of detected objects detectedObjects = [] for i in range(len(scores)): if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * self.imH))) xmin = int(max(1, (boxes[i][1] * self.imW))) ymax = int(min(self.imH, (boxes[i][2] * self.imH))) xmax = int(min(self.imW, (boxes[i][3] * self.imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label object_name = self.labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text detectedObjects.append(object_name) return detectedObjects, frame
class SsdMobileNet: def __init__(self, frozenGraphFilename, device='cpu'): self.boxes = None self.scores = None self.classes = None self.num_detections = None #self.interpreter = tf.lite.Interpreter(frozenGraphFilename) self.interpreter = Interpreter(frozenGraphFilename) self.__load_graph(device) self.__init_predictor() def __load_graph(self, device): # TFLITE INTERPRETER CON. #tf.logging.set_verbosity(tf.logging.DEBUG) self.interpreter.allocate_tensors() def __init_predictor(self): # obtaining the input-output shapes and types self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() def predict(self, images, params, thresh=0, batch=1, warmup=0, iterations=1): # Warmup x_matrix = np.array(images) if params[const.PRECISION] == const.FP32: x_matrix = np.array(images, dtype=np.float32) self.interpreter.set_tensor(self.input_details[0]['index'], x_matrix) for i in range(warmup): self.interpreter.invoke() matrix_0 = self.interpreter.get_tensor( self.output_details[0]['index']) matrix_1 = self.interpreter.get_tensor( self.output_details[1]['index']) matrix_2 = self.interpreter.get_tensor( self.output_details[2]['index']) matrix_3 = self.interpreter.get_tensor( self.output_details[3]['index']) # Measure times = [] for i in range(iterations): t0 = time.time() self.interpreter.invoke() boxes = self.interpreter.get_tensor( self.output_details[0]['index']) classes = self.interpreter.get_tensor( self.output_details[1]['index']) scores = self.interpreter.get_tensor( self.output_details[2]['index']) num_detections = self.interpreter.get_tensor( self.output_details[3]['index']) t1 = time.time() ts = t1 - t0 times.append(ts) # Report results = {"seconds": times, "predictions": []} for i in range(len(num_detections)): thisResult = [] for d in range(int(num_detections[0])): # Note the weird bbox coords: y1, x1, y2, x2 !! box = boxes[i][d].tolist() x = { 'score': scores[i][d].tolist(), 'box': [box[1], box[0], box[3], box[2]], 'class': (classes[i][d] + 1).tolist() } thisResult.append(x) results['predictions'].append(thisResult) return results def predict_runtime(self, images, params, max=5): x_matrix = np.array(images) if params[const.PRECISION] == const.FP32: x_matrix = np.array(images, dtype=np.float32) start = time.time() self.interpreter.invoke() boxes = self.interpreter.get_tensor(self.output_details[0]['index']) classes = self.interpreter.get_tensor(self.output_details[1]['index']) scores = self.interpreter.get_tensor(self.output_details[2]['index']) num_detections = self.interpreter.get_tensor( self.output_details[3]['index']) end = time.time() - start return end
class PoseEngine: """Engine used for pose tasks.""" def __init__(self, model_path, mirror=False): """Creates a PoseEngine with given model. Args: model_path: String, path to TF-Lite Flatbuffer file. mirror: Flip keypoints horizontally Raises: ValueError: An error occurred when model output is invalid. """ self._mirror = mirror edgetpu_delegate = load_delegate(EDGETPU_SHARED_LIB) posenet_decoder_delegate = load_delegate(POSENET_SHARED_LIB) self._interpreter = Interpreter( model_path, experimental_delegates=[edgetpu_delegate, posenet_decoder_delegate]) self._interpreter.allocate_tensors() self._input_tensor_shape = self._interpreter.get_input_details()[0]['shape'] self._input_details = self._interpreter.get_input_details() if (self._input_tensor_shape.size != 4 or self._input_tensor_shape[3] != 3 or self._input_tensor_shape[0] != 1): raise ValueError( ('Image model should have input shape [1, height, width, 3]!' ' This model has {}.'.format(self._input_tensor_shape))) _, self.image_height, self.image_width, self.image_depth = self._input_tensor_shape # Auto-detect stride size def calcStride(h,w,L): return int((2*h*w)/(math.sqrt(h**2 + 4*h*L*w - 2*h*w + w**2) - h - w)) details = self._interpreter.get_output_details()[4] self.heatmap_zero_point = details['quantization_parameters']['zero_points'][0] self.heatmap_scale = details['quantization_parameters']['scales'][0] heatmap_size = self._interpreter.tensor(details['index'])().nbytes self.stride = calcStride(self.image_height, self.image_width, heatmap_size) self.heatmap_size = (self.image_width // self.stride + 1, self.image_height // self.stride + 1) details = self._interpreter.get_output_details()[5] self.parts_zero_point = details['quantization_parameters']['zero_points'][0] self.parts_scale = details['quantization_parameters']['scales'][0] print("Heatmap size: ", self.heatmap_size) print("Stride: ", self.stride, self.heatmap_size) def DetectPosesInImage(self, img): """Detects poses in a given image. For ideal results make sure the image fed to this function is close to the expected input size - it is the caller's responsibility to resize the image accordingly. Args: img: numpy array containing image """ # Extend or crop the input to match the input shape of the network. if img.shape[0] < self.image_height or img.shape[1] < self.image_width: pads = [[0, max(0, self.image_height - img.shape[0])], [0, max(0, self.image_width - img.shape[1])], [0, 0]] img = np.pad(img, pads, mode='constant') img = img[0:self.image_height, 0:self.image_width] assert (img.shape == tuple(self._input_tensor_shape[1:])) # Run the inference (API expects the data to be flattened) inference_time, outputs = self.run_inference(img) poses = self._parse_poses(outputs) heatmap, bodyparts = self._parse_heatmaps(outputs) return inference_time, poses, heatmap, bodyparts def ParseOutputs(self, outputs): poses = self._parse_poses(outputs) heatmap, bodyparts = self._parse_heatmaps(outputs) return poses, heatmap, bodyparts def _parse_poses(self, outputs): keypoints = outputs[0].reshape(-1, len(KEYPOINTS), 2) keypoint_scores = outputs[1].reshape(-1, len(KEYPOINTS)) pose_scores = outputs[2].flatten() nposes = int(outputs[3][0]) # Convert the poses to a friendlier format of keypoints with associated # scores. poses = [] for pose_i in range(nposes): keypoint_dict = {} for point_i, point in enumerate(keypoints[pose_i]): keypoint = Keypoint(KEYPOINTS[point_i], point, keypoint_scores[pose_i, point_i]) if self._mirror: keypoint.yx[1] = self.image_width - keypoint.yx[1] keypoint_dict[KEYPOINTS[point_i]] = keypoint poses.append(Pose(keypoint_dict, pose_scores[pose_i])) return poses def softmax(self, y, axis): y = y - np.expand_dims(np.max(y, axis = axis), axis) y = np.exp(y) return y / np.expand_dims(np.sum(y, axis = axis), axis) def _parse_heatmaps(self, outputs): # Heatmaps are really float32. heatmap = (outputs[4].astype(np.float32) - self.heatmap_zero_point) * self.heatmap_scale heatmap = np.reshape(heatmap, [self.heatmap_size[1], self.heatmap_size[0]]) part_heatmap = (outputs[5].astype(np.float32) - self.parts_zero_point) * self.parts_scale part_heatmap = np.reshape(part_heatmap, [self.heatmap_size[1], self.heatmap_size[0], -1]) part_heatmap = self.softmax(part_heatmap, axis=2) return heatmap, part_heatmap def run_inference(self, input): start_time = time.monotonic() self._interpreter.set_tensor(self._input_details[0]['index'], np.expand_dims(input, axis=0)) self._interpreter.invoke() duration_ms = (time.monotonic() - start_time) * 1000 output = [] for details in self._interpreter.get_output_details(): tensor = self._interpreter.get_tensor(details['index']) output.append(tensor) return (duration_ms, output)
def startStream(self, modeldir, graph, labels, threshold, resolution, edgetpu): MODEL_NAME = modeldir GRAPH_NAME = graph LABELMAP_NAME = labels min_conf_threshold = float(threshold) resW, resH = resolution.split('x') imW, imH = int(resW), int(resH) use_TPU = edgetpu # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW, imH), framerate=30).start() time.sleep(1) # Create window cv2.namedWindow('Object detector', cv2.WINDOW_NORMAL) #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): while True: # Start timer (for calculating frame rate) t1 = cv2.getTickCount() # Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor(output_details[1]['index'])[ 0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]['index'])[ 0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0) and (labels[int(classes[i])] == 'person')): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) # print(self.detect) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # Draw circle in center xcenter = xmin + (int(round((xmax - xmin) / 2))) ycenter = ymin + (int(round((ymax - ymin) / 2))) self.detect = setDetect(xcenter, ycenter, imH, imW) cv2.circle(frame, (xcenter, ycenter), 5, (0, 0, 255), thickness=-1) # Print info # print('Object ' + str(i) + ': ' + object_name + ' at (' + str(xcenter) + ', ' + str(ycenter) + ')') # Draw framerate in corner of frame cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) # All the results have been drawn on the frame, so it's time to display it. cv2.imshow('Object detector', frame) # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 # Press 'q' to quit if cv2.waitKey(1) == ord('q'): break # Clean up cv2.destroyAllWindows() videostream.stop()
class camera_interface(): """ The main interface for using the camera and determining the grip we need to be in. https://www.hackster.io/gatoninja236/scan-qr-codes-in-real-time-with-raspberry-pi-a5268b Attributes: count (int): Count of saved screenshots. File titles are frame'count'.jpg. cap (cv2 VideoCapture): The VideoCapture object. detector (QRCodeDetector): The QR Code detecting object. """ def __init__(self,resolution=(640,480),framerate=30): self.count = 0 # self.cap = cv2.VideoCapture(0) self.vs = VideoStream(resolution=(1280,720),framerate=30).start() # self.stream = cv2.VideoCapture(0) # ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) # ret = self.stream.set(3,resolution[0]) # ret = self.stream.set(4,resolution[1]) #Wait for the camera to startup for one seconds time.sleep(1) print("[INFO] Created video capture object") print("[INFO] loading model...") #Load the tflite model and labelmap # Get path to current working directory GRAPH_NAME = "detect.tflite" MODEL_NAME = "Camera_Interpreter/Coco" LABELMAP_NAME = "labelmap.txt" CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if self.labels[0] == '???': del(self.labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument use_TPU = False if use_TPU: self.interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 # QR code detection object # self.detector = cv2.QRCodeDetector() self.cam_data = "" self.object_spotted = False self.test_count = 0 self.killed_thread = False self.cam_image = None self.cam_image_index = 0 self.object_spotted_T0 = 0 self.object_not_spotted_delta_req = 3 #Initialize the paused flag to false self.temp_pause = False def camera_read_threader(self): #Start the read cam thread read_cam = threading.Thread(target=self.read_cam_thread, args=()) read_cam.start() while(self.cam_image_index == 0): time.sleep(0.05) #Start the image decode thread decoder = threading.Thread(target=self.decode_image_thread, args=()) decoder.start() while not self.killed_thread and read_cam.is_alive() and decoder.is_alive(): time.sleep(0.25) #Flag is thrown or error, so ensure flag is thrown and wait for threads to join self.killed_thread = True read_cam.join() decoder.join() def decode_image_thread(self): previous_index = None while not self.killed_thread: #Detect and decode the stored image if it's ready # t = time.time() if(previous_index != self.cam_image_index and (not self.temp_pause)): previous_index = self.cam_image_index # data, _, _ = self.detector.detectAndDecode(self.cam_image) Deprecated QR Code reader data, score = self.detect_main_object(self.cam_image) # print("[INFO] Camera objects: " + data) # if(data not in grips._value2member_map_): # data = grips.openGrip.value #If the camera sees an object, skip the time requirement if(data != ""): self.cam_data = data self.object_spotted_T0 = time.time() self.object_spotted = True #If the camera doesn't see an object, require a delay before reporting nothing else: if((time.time() - self.object_spotted_T0) > self.object_not_spotted_delta_req): # print("[DEBUG] Delta Req passed; reporting no object now") self.cam_data = data self.object_spotted = False #####No sleep since detecting/decoding takes significant time, just do it as fast as possible # print("[INFO] Time to decode image: " + (str(time.time() - t))) time.sleep(0.01) def detect_main_object(self, frame1): min_conf_threshold = 0.35 # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'],input_data) self.interpreter.invoke() # Retrieve detection results # boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0] # Confidence of detected objects highest_scoring_label = "" highest_score = 0 for i in range(len(scores)): object_name = self.labels[int(classes[i])] # Look up object name from "labels" array using class index if((scores[i] > min_conf_threshold) and (scores[i] <= 1.0) and (scores[i] > highest_score) and (object_name in grips._value2member_map_)): # Draw label highest_scoring_label = object_name highest_score = scores[i] return (highest_scoring_label, highest_score) def read_cam_thread(self): while not self.killed_thread: if(not self.temp_pause): # t = time.time() #Get camera image, rescale, and store in class variable frame = self.vs.read() self.cam_image = imutils.resize(frame, width=400) #Increase index by 1 self.cam_image_index += 1 #Pause temply time.sleep(0.2) # print("Time to save/resize new image: " + (str(time.time() - t))) # def read_cam(self): # # get the image # _, img = self.cap.read() #TODO: #14 Downscale the resolution for faster processing # # get bounding box coords and data # data, bbox, _ = self.detector.detectAndDecode(img) # #Define a parameter we can easily read later if anything is detected # is_object = False # #Update parameter/output the data we found, if any # if data: # #print("data found: ", data) # is_object = True # #return the information we got from the camera # # cv2.imwrite("frame1.jpg", img) # save frame as JPEG file # return data, bbox, img, is_object # def read_cam_display_out(self): # #Call the standard method to get the qr data / bounding box # data, bbox, img, _ = self.read_cam() # # if there is a bounding box, draw one, along with the data # if(bbox is not None): # for i in range(len(bbox)): # cv2.line(img, tuple(bbox[i][0]), tuple(bbox[(i+1) % len(bbox)][0]), color=(255, # 0, 255), thickness=2) # cv2.putText(img, data, (int(bbox[0][0][0]), int(bbox[0][0][1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, # 0.5, (0, 255, 0), 2) # #if data: # #print("data found: ", data) # # display the image preview # cv2.imshow("code detector", img) # # save the image # cv2.imwrite("frame1.jpg", img) # save frame as JPEG file # #self.count += 1 def end_camera_session(self): #Stop the camera thread self.killed_thread = True time.sleep(0.1) #Release the camera object self.vs.stop()
class DetectorTFLite: def __init__(self, path_to_checkpoint, path_to_labelmap, filter_labels=None): self.filter_labels = filter_labels with open(path_to_labelmap, 'r') as f: self.labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if self.labels[0] == '???': del (self.labels[0]) self.interpreter = Interpreter(model_path=path_to_checkpoint) self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.tf_height = self.input_details[0]['shape'][1] self.tf_width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 def ExtractBoxes(self, imH, imW, boxes, classes, scores): det_boxes = [] for i in range(len(scores)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() miny = int(max(1, (boxes[i][0] * imH))) minx = int(max(1, (boxes[i][1] * imW))) maxy = int(min(imH, (boxes[i][2] * imH))) maxx = int(min(imW, (boxes[i][3] * imW))) label = self.labels[int(classes[i])] det_boxes.append((minx, miny, maxx, maxy, label, float(scores[i]))) return det_boxes def DetectFromImage(self, img): imH, imW, _ = img.shape # Acquire frame and resize to expected shape [1xHxWx3] frame_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.tf_width, self.tf_height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() # Retrieve detection results boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[ 0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[ 0] # Confidence of detected objects return self.ExtractBoxes(imH, imW, boxes, classes, scores) def DisplayDetection(self, image, box, det_time=None): img = image.copy() x_min = box[0] y_min = box[1] x_max = box[2] y_max = box[3] cls = str(box[4]) score = str(np.round(box[-1], 2)) text = cls + ": " + score cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 1) cv2.rectangle(img, (x_min, y_min - 20), (x_min, y_min), (255, 255, 255), -1) cv2.putText(img, text, (x_min + 5, y_min - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) if det_time != None: fps = round(1000. / det_time, 1) fps_txt = str(fps) + " FPS" cv2.putText(img, fps_txt, (25, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2) return img
class YOLOV5: def __init__(self, wanted_labels=None, model_file=None, label_file=None, num_threads=None, edgetpu=False, libedgetpu=None, score_threshold=0.25): basedir = os.getenv('DEEPDISHHOME','.') if model_file is None: model_file = os.path.join(basedir, 'detectors/yolov5/yolov5s-int8.tflite') if label_file is None: label_file = os.path.join(basedir, 'detectors/yolov5/coco_classes.txt') self.cfg_file = os.path.join(basedir, 'detectors/yolov5/yolov5s.yaml') if wanted_labels is None: wanted_labels = ['person'] self.wanted_labels = wanted_labels self.label_file = label_file self.score_threshold = score_threshold self.labels = self._get_labels() self.use_edgetpu = edgetpu self.int8 = False if 'saved_model' in model_file: self.mode = 'saved_model' if 'keras' not in sys.modules: print('yolov5: saved_model mode requires keras') sys.exit(1) elif '.tflite' in model_file: self.mode = 'tflite' if 'int8' in model_file: self.int8 = True else: print('unable to determine format of yolov5 model') sys.exit(1) if libedgetpu is None: libedgetpu = edgetpu_lib_name() if self.mode == 'tflite': # Load TFLite model and allocate tensors. self.interpreter = Interpreter( model_path=model_file, num_threads=num_threads, experimental_delegates=[load_delegate(libedgetpu)] if self.use_edgetpu else None) self.interpreter.allocate_tensors() self.num_threads = num_threads # Get input and output tensors. self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() _, self.height, self.width, _ = self.input_details[0]['shape'].tolist() elif self.mode == 'saved_model': self.model = keras.models.load_model(model_file) self.num_threads = 1 _, self.height, self.width, _ = self.model.inputs[0].shape.as_list() yaml_file = Path(self.cfg_file) with open(yaml_file) as f: cfg = yaml.load(f, Loader=yaml.FullLoader) self.anchors = cfg['anchors'] def _get_labels(self): labels_path = os.path.expanduser(self.label_file) with open(labels_path) as f: labels = {i: line.strip() for i, line in enumerate(f.readlines())} return labels def detect_image(self, img): img_size = img.size img_resized = img.convert('RGB').resize((self.width, self.height), Image.ANTIALIAS) input_data = np.expand_dims(img_resized, 0).astype(np.float32) if self.int8: scale, zero_point = self.input_details[0]['quantization'] input_data = (input_data / scale + zero_point).astype(np.uint8) if self.mode == 'tflite': self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() output_data = self.interpreter.get_tensor(self.output_details[0]['index']) raw = np.copy(output_data) elif self.mode == 'saved_model': input_data /= 255.0 output_data = self.model(input_data).numpy() if self.int8: scale, zero_point = self.output_details[0]['quantization'] output_data = output_data.astype(np.float32) output_data = (output_data - zero_point) * scale x = np.copy(output_data) boxes = np.copy(x[..., :4]) boxes[..., 0] = x[..., 0] - x[..., 2] / 2 boxes[..., 1] = x[..., 1] - x[..., 3] / 2 boxes[..., 2] = x[..., 0] + x[..., 2] / 2 boxes[..., 3] = x[..., 1] + x[..., 3] / 2 x[..., 5:] *= x[..., 4:5] best_classes = np.expand_dims(np.argmax(x[..., 5:], axis=-1), axis=-1) confidences = np.take_along_axis(x, best_classes + 5, axis=-1) y = np.concatenate((boxes, confidences, best_classes.astype(np.float32)), axis=-1) y = y[np.where(y[..., 4] >= self.score_threshold)] y[...,:4] *= np.array([img_size[0], img_size[1], img_size[0], img_size[1]]) return_boxs = [] return_lbls = [] return_scrs = [] for *xyxy, score, labelidx in y: label=self.labels[int(labelidx)] if label in self.wanted_labels and score >= self.score_threshold: tlwh = np.copy(xyxy) tlwh[2] = xyxy[2] - xyxy[0] tlwh[3] = xyxy[3] - xyxy[1] return_boxs.append(list(tlwh)) return_lbls.append(label) return_scrs.append(score) return (return_boxs, return_lbls, return_scrs)
t1 = cv2.getTickCount() # Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() cut_img = np.zeros((640, 640, 3), np.uint8) cut_img[80:560] = frame frame_resized = cv2.resize(cut_img, (width, height), cv2.INTER_AREA) st1 = time.time() if EDGE_TPU: input_data = np.expand_dims(frame_resized, axis=0) else: input_data = np.expand_dims(frame_resized / 127.5 - 1, axis=0).astype(np.float32) face_interpreter.set_tensor(face_input_details['index'], input_data) face_interpreter.invoke() raw_box = face_interpreter.get_tensor(face_output_details[0]['index'])[0] raw_score = face_interpreter.get_tensor(face_output_details[1]['index'])[0] st2 = time.time() # 3. Postprocess the raw predictions: detections = _tensors_to_detections(raw_box, raw_score, anchors) # 4. Non-maximum suppression to remove overlapping detections: faces = _weighted_non_max_suppression(detections) print('Inference time: ', (st2 - st1) * 1000, ' Post-processing: ', (time.time() - st2) * 1000) for rc in faces: cv2.rectangle(frame, (int(rc[1]*imW), int(rc[0]*imW - 80)), (int(rc[3]*imW), int(rc[2]*imW - 80)), (0, 255, 0), 2) for i in range(6):
class Detection: def __init__(self): self.MODEL_NAME = "detect" self.GRAPH_NAME = "detect.tflite" self.LABELMAP_NAME = "label_map.txt" self.min_conf_threshold = 0.70 self.resW, self.resH = (1280, 720) self.imW, self.imH = int(self.resW), int(self.resH) # self.use_TPU = (True if 'projects' in str(os.getcwd()) else False) self.use_TPU = False self.frame_rate_calc = None self.item_detected = False self.latest_item = None self.detection_counter = [ { "name": "apple", "counter": 0 }, { "name": "aubergine", "counter": 0 }, { "name": "banana", "counter": 0 }, { "name": "broccoli", "counter": 0 }, { "name": "cucumber", "counter": 0 }, { "name": "orange", "counter": 0 }, { "name": "paprika", "counter": 0 }, { "name": "pear", "counter": 0 } ] # Import TFLite requirements self.pkg = importlib.util.find_spec('tflite_runtime') if self.pkg: from tflite_runtime.interpreter import Interpreter if self.use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if self.use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if self.use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (self.GRAPH_NAME == 'detect.tflite'): self.GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() PATH_TO_CKPT = "/home/pi/projects/smartcart-device/dojo/tflite/{}".format(self.GRAPH_NAME) PATH_TO_LABELS = "/home/pi/projects/smartcart-device/dojo/tflite/{}".format( self.LABELMAP_NAME) PATH_TO_OBJ_NAMES = "/home/pi/projects/smartcart-device/dojo/yolo/yolov4_smartcart/tflite/coco.names" # Load the label map with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] # Fix for potential label map issue if self.labels[0] == '???': del (self.labels[0]) if self.use_TPU: self.interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() print("Model loaded and tensors allocated") # Get model details self.input_details = self.interpreter.get_input_details() #print("Input details: {}".format(self.input_details)) self.output_details = self.interpreter.get_output_details() #print("Output detais: {}".format(self.output_details)) self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 # Initialize frame rate calculation self.frame_rate_calc = 1 self.freq = cv2.getTickFrequency() # Initialize video stream self.videostream = VideoStream(resolution=(self.imW, self.imH)) self.videostream = self.videostream.start() def filter_boxes(self, box_xywh, scores, score_threshold=0.4, input_shape=tf.constant([416, 416])): scores_max = tf.math.reduce_max(scores, axis=-1) mask = scores_max >= score_threshold class_boxes = tf.boolean_mask(box_xywh, mask) pred_conf = tf.boolean_mask(scores, mask) class_boxes = tf.reshape(class_boxes, [tf.shape(scores)[0], -1, tf.shape(class_boxes)[-1]]) pred_conf = tf.reshape(pred_conf, [tf.shape(scores)[0], -1, tf.shape(pred_conf)[-1]]) box_xy, box_wh = tf.split(class_boxes, (2, 2), axis=-1) input_shape = tf.cast(input_shape, dtype=tf.float32) box_yx = box_xy[..., ::-1] box_hw = box_wh[..., ::-1] box_mins = (box_yx - (box_hw / 2.)) / input_shape box_maxes = (box_yx + (box_hw / 2.)) / input_shape boxes = tf.concat([ box_mins[..., 0:1], # y_min box_mins[..., 1:2], # x_min box_maxes[..., 0:1], # y_max box_maxes[..., 1:2] # x_max ], axis=-1) # return tf.concat([boxes, pred_conf], axis=-1) return (boxes, pred_conf) def read_class_names(self, class_file_name): names = {} with open(class_file_name, 'r') as data: for ID, name in enumerate(data): names[ID] = name.strip('\n') return names # TODO: Definde cfg.YOLO.CLASSES def draw_bbox(self, image, bboxes, classes, show_label=True): num_classes = len(classes) image_h, image_w, _ = image.shape hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) random.seed(0) random.shuffle(colors) random.seed(None) out_boxes, out_scores, out_classes, num_boxes = bboxes for i in range(num_boxes[0]): if int(out_classes[0][i]) < 0 or int(out_classes[0][i]) > num_classes: continue coor = out_boxes[0][i] coor[0] = int(coor[0] * image_h) coor[2] = int(coor[2] * image_h) coor[1] = int(coor[1] * image_w) coor[3] = int(coor[3] * image_w) fontScale = 0.5 score = out_scores[0][i] class_ind = int(out_classes[0][i]) bbox_color = colors[class_ind] bbox_thick = int(0.6 * (image_h + image_w) / 600) c1, c2 = (coor[1], coor[0]), (coor[3], coor[2]) cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) if show_label: bbox_mess = '%s: %.2f' % (classes[class_ind], score) t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0] c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3) cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1) # filled cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA) return image def perform(self): while True: t1 = cv2.getTickCount() frame1 = self.videostream.read() print("Frame read from stream") frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) # input_data = np.expand_dims(frame_resized, axis=0) image_data = cv2.resize(frame, (608, 608)) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) # if self.floating_model: # input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], images_data) print("Performing detection") self.interpreter.invoke() print("Detection performed") pred = [self.interpreter.get_tensor(self.output_details[i]['index']) for i in range(len(self.output_details))] boxes, pred_conf = self.filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([608, 608])) boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=0.3, # TODO: Make var score_threshold=0.3 # TODO: Make var ) pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] class_names = self.read_class_names( "/home/pi/projects/smartcart-device/dojo/yolo/yolov4_smartcart/tflite/coco.names") print("Drawing bounding boxes") frame = self.draw_bbox(frame, pred_bbox, class_names) #frame = Image.fromarray(frame.astype(np.uint8)) # cv2.imshow('Object detector',frame.astype(np.uint8)) time.sleep(5) image = cv2.cvtColor(np.array(frame), cv2.COLOR_BGR2RGB) if cv2.waitKey(1) == ord('x'): break if self.item_detected: break return self.item_detected, self.latest_item def run(self, cloud=False): #while True: # for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): # Start timer (for calculating frame rate) t1 = cv2.getTickCount() # Grab frame from video stream frame1 = self.videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) if cloud: # TODO: Send image to cloud and get data back content_type = 'image/jpeg' headers = {'content-type': content_type} _, img_encoded = cv2.imencode('.jpg', frame_rgb) request_address = "http://a24dcb00998c.ngrok.io/api/detect" # send http request with image and receive response print("Sending image to cloud api and awaiting response") response = requests.post(request_address, data=img_encoded.tostring(), headers=headers) print("Response received:") print(json.loads(response.text)) else: input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) #print("Detection started") self.interpreter.invoke() #print("Detection complete") # Retrieve detection results #print(self.output_details) boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] # Bounding coordinates of objects classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0] # Class index of detected objects scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0] # Confidence of detected objects num = self.interpreter.get_tensor(self.output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) max_score = 0 # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)): # Specify that item has been detected #self.item_detected = True #if scores[i] > max_score: #max_score = scores[i] #self.latest_item = self.labels[int(classes[i])] # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * self.imH))) xmin = int(max(1, (boxes[i][1] * self.imW))) ymax = int(min(self.imH, (boxes[i][2] * self.imH))) xmax = int(min(self.imW, (boxes[i][3] * self.imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label object_name = self.labels[int(classes[i])] # Look up object name from "labels" array using class index self.increase_detection_counter(object_name, scores[i]) label = '%s: %d%%' % (object_name, int(scores[i] * 100)) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window cv2.rectangle(frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # Draw framerate in corner of frame cv2.putText(frame, 'FPS: {0:.2f}'.format(self.frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) # All the results have been drawn on the frame, so it's time to display it. cv2.imshow('Object detector', frame) if cv2.waitKey(1) == ord('x'): cv2.destroyAllWindows() #break # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / self.freq self.frame_rate_calc = 1 / time1 self.item_detected, self.latest_item = self.get_object_with_score_five() if self.item_detected: self.reset_detection_counter() return self.item_detected, self.latest_item def increase_detection_counter(self, detected_item, score): for object in self.detection_counter: if object["name"] == detected_item: object["counter"]+=score def get_object_with_score_five(self): max_score = 0 latest_object = "None" detected_object = False for object in self.detection_counter: if object["counter"] >= 5 and object["counter"] > max_score: latest_object = object["name"] detected_object = True max_score = object["counter"] return detected_object, latest_object def reset_detection_counter(self): self.detection_counter = [ { "name": "apple", "counter": 0 }, { "name": "aubergine", "counter": 0 }, { "name": "banana", "counter": 0 }, { "name": "broccoli", "counter": 0 }, { "name": "cucumber", "counter": 0 }, { "name": "orange", "counter": 0 }, { "name": "paprika", "counter": 0 }, { "name": "pear", "counter": 0 } ] def destroy(self): # Clean up cv2.destroyAllWindows() self.videostream.stop()
class ImageDetection: def __init__(self, modeldir): GRAPH_NAME = 'detect.tflite' LABELMAP_NAME = 'labelmap.txt' CWD_PATH = os.getcwd() PATH_TO_CKPT = os.path.join(CWD_PATH, modeldir, GRAPH_NAME) PATH_TO_LABELS = os.path.join(CWD_PATH, modeldir, LABELMAP_NAME) with open(PATH_TO_LABELS, 'r') as f: self.labels = [line.strip() for line in f.readlines()] if self.labels[0] == '???': del (self.labels[0]) self.min_conf_threshold = 0.6 self.input_mean = 127.5 self.input_std = 127.5 self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) def detect(self, image_path): image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) im_h, im_w, _ = image.shape image_resized = cv2.resize(image_rgb, (self.width, self.height)) input_data = np.expand_dims(image_resized, axis=0) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] classes = self.interpreter.get_tensor( self.output_details[1]['index'])[0] scores = self.interpreter.get_tensor( self.output_details[2]['index'])[0] detect_text = "" for i in range(len(scores)): if self.min_conf_threshold < scores[i] <= 1.0: ymin = int(max(1, (boxes[i][0] * im_h))) xmin = int(max(1, (boxes[i][1] * im_w))) ymax = int(min(im_h, (boxes[i][2] * im_h))) xmax = int(min(im_w, (boxes[i][3] * im_w))) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) object_name = self.labels[int(classes[i])] label = '%s: %d%%' % (object_name, int(scores[i] * 100)) label_size, base_line = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) label_ymin = max(ymin, label_size[1] + 10) cv2.rectangle( image, (xmin, label_ymin - label_size[1] - 10), (xmin + label_size[0], label_ymin + base_line - 10), (255, 255, 255), cv2.FILLED) cv2.putText(image, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) detect_text = detect_text + " " + object_name cv2.imshow('Detector', image) os.system('echo %s | festival --tts & ' % detect_text) sleep(5) cv2.destroyAllWindows() return
frame = cv2.imread( '/home/root/model/test_cat.jpg') # reading frames from cam # print(frame) height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] frameo = cv2.resize( frame, (height, width)) #resizing frame according to model prescribed size if floating_model: input_data = (np.float32(input_data) / 218) - 1 else: input_data = np.expand_dims(frameo, axis=0) # Expand the shape of array interpreter.set_tensor(input_details[0]['index'], input_data) #passing data to model interpreter.invoke() #making predictions boundbox = interpreter.get_tensor( output_details[0]['index']) #getting output obj_class = interpreter.get_tensor( output_details[1]['index']) #getting outout score = interpreter.get_tensor(output_details[2]['index']) #getting output num = interpreter.get_tensor( output_details[3]['index']) #Always equals to 10 for i in range(int(num)): top, left, bottom, right = boundbox[0][ i] #getting the postion of detected object classId = int(obj_class[0][i]) #getting class of object
w = img.shape[1] img = cv2.resize(img, (256, 144)) img = np.asarray(img) img = img / 255. img = img.astype(np.float32) img = img[np.newaxis, :, :, :] # Tensorflow Lite interpreter = Interpreter(model_path='model_float16_quant.tflite', num_threads=4) interpreter.allocate_tensors() input_details = interpreter.get_input_details()[0]['index'] output_details = interpreter.get_output_details()[0]['index'] interpreter.set_tensor(input_details, img) interpreter.invoke() output = interpreter.get_tensor(output_details) print(output.shape) out1 = output[0][:, :, 0] out2 = output[0][:, :, 1] out1 = np.invert((out1 > 0.5) * 255) out2 = np.invert((out2 > 0.5) * 255) print('out1:', out1.shape) print('out2:', out2.shape) out1 = cv2.resize(np.uint8(out1), (w, h)) out2 = cv2.resize(np.uint8(out2), (w, h))
class ButtDetector: def __init__(self): if TPU: self.interpreter = Interpreter(model_path=os.path.join(MODEL_DIR, 'butt_detecter_quantized_edgetpu.tflite'), experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) else: self.interpreter = Interpreter(model_path=os.path.join(MODEL_DIR, 'butt_detecter_quantized.tflite')) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] self.floating_model = (self.input_details[0]['dtype'] == np.float32) with open(os.path.join(MODEL_DIR, "labelmap.txt"), 'r') as f: self.labels = [line.strip() for line in f.readlines()] self.butt_nums = 0 self.detected_status = False def detect_butts(self, frame, count_ret=False): if count_ret: # st_time = time.time() image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) im_h, im_w, _ = frame.shape image_resized = cv2.resize(image_rgb, (self.width, self.height)) input_data = np.expand_dims(image_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self.floating_model: input_data = (np.float32(input_data) - INPUT_MEAN) / INPUT_STD # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0] # Loop over all detections and draw detection box if confidence is above minimum threshold detected_butts = 0 for i in range(len(scores)): if (scores[i] > THRESHOLD) and (scores[i] <= 1.0): detected_butts += 1 if self.butt_nums == 0 and detected_butts > 0: self.butt_nums = detected_butts self.detected_status = True elif self.butt_nums != 0 and detected_butts != 0 and self.butt_nums != detected_butts: self.butt_nums = detected_butts self.detected_status = True elif self.butt_nums != 0 and detected_butts == 0: self.butt_nums = detected_butts self.detected_status = False elif self.butt_nums != 0 and self.butt_nums == detected_butts: self.detected_status = False # print(f"[INFO] Processing Time: {time.time() - st_time}") cv2.putText(frame, f"The number of Butts: {self.butt_nums}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) return frame
def process_frame(frame): global entry, lime_count, marker_count, lime_sizes, found_list, total_marker_width, pixel_per_metric interpreter = Interpreter(model_path=PATH_TO_CKPT, num_threads=4) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #frame_rgb = frame frame_resized = cv2.resize(frame_rgb, (width, height)) #frame_resized = cv2.resize(frame_rgb, (480, 320)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input try: start_time = time.time() interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() elapsed_time.append(time.time() - start_time) except: print('Thread Error: interpreter not reference') # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 4) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window cv2.rectangle(frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # counting objects and measure diameter of lime if xmin < LINE2 and xmax > LINE1 and not entry: entry = True if entry and xmax <= LINE1: entry = False if (int(classes[i]) + 1 == 1): lime_found = time.time() - start_total_time try: lime_count += 1 lime_diameter = ( (xmax - xmin) + (ymax - ymin)) / (2 * pixel_per_metric) lime_sizes.append(lime_diameter) found_list.append(lime_found) print( f'lime {lime_count} is found at {lime_found}, Diameter(size): {lime_diameter * 1000:.3f} mm' ) except: # marker must came first for calculating pixel/metric lime_count -= 1 marker_count += 1 total_marker_width += ((xmax - xmin) + (ymax - ymin)) / 2 pixel_per_metric = (total_marker_width / marker_count) / MARKER_DIAMETER elif (int(classes[i]) + 1 == 2): marker_count += 1 total_marker_width += ((xmax - xmin) + (ymax - ymin)) / 2 pixel_per_metric = (total_marker_width / marker_count) / MARKER_DIAMETER # insert Lime Count information text font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText( frame, 'Lime Count: ' + str(lime_count), (10, 35), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX, ) # insert Marker Count information text cv2.putText( frame, 'Marker Count: ' + str(marker_count), (10, 55), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX, ) # overlay with line pt1 = (LINE1, 0) pt2 = (LINE1, int(sqsize)) cv2.line(frame, pt1, pt2, (0, 0, 255), 2) pt1 = (LINE2, 0) pt2 = (LINE2, int(sqsize)) cv2.line(frame, pt1, pt2, (0, 0, 255), 2) frame = cv2.resize(frame, (480, 320)) return frame
def detection(): # Define and parse input arguments #parser = argparse.ArgumentParser() #parser.add_argument('--modeldir', help='Folder the .tflite file is located in', # required=True) #parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', # default='detect.tflite') #parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', # default='labelmap.txt') #parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', # default=0.5) #parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir', # default=None) #parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.', # default=None) #parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', # action='store_true') #args = parser.parse_args() MODEL_NAME = 'Sample_TFLite_model' GRAPH_NAME = 'detect.tflite' LABELMAP_NAME = 'labelmap.txt' min_conf_threshold = float(0.5) use_TPU = None # Parse input image name and directory. IM_NAME = None IM_DIR = None # If both an image AND a folder are specified, throw an error if (IM_NAME and IM_DIR): print( 'Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.' ) sys.exit() # If neither an image or a folder are specified, default to using 'test1.jpg' for image name if (not IM_NAME and not IM_DIR): IM_NAME = 'test1.jpg' # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Define path to images and grab all image filenames if IM_DIR: PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_DIR) images = glob.glob(PATH_TO_IMAGES + '/*') elif IM_NAME: PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_NAME) images = glob.glob(PATH_TO_IMAGES) # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Loop over every image and perform detection for image_path in images: # Load image and resize to expected shape [1xHxWx3] image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (width, height)) input_data = np.expand_dims(image_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' score_detection = int(scores[i] * 100) #print("Your object is a ",object_name, " by : ",test_reCup, "%") labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( image, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(image, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text return (object_name == 'cup' and score_detection >= 55) # All the results have been drawn on the image, now display the image cv2.imshow('Object detector', image) # Press any key to continue to next image, or press 'q' to quit if cv2.waitKey(0) == ord('q'): break # Clean up cv2.destroyAllWindows()
def object_detection(): label_out = [] mid_x_out = [] mid_y_out = [] class VideoStream: """Camera object that controls video streaming from the Picamera""" def __init__(self, resolution=(640, 480), framerate=30): # Initialize the PiCamera and the camera image stream self.stream = cv2.VideoCapture(0) ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) ret = self.stream.set(3, resolution[0]) ret = self.stream.set(4, resolution[1]) # Read first frame from the stream (self.grabbed, self.frame) = self.stream.read() # Variable to control when the camera is stopped self.stopped = False def start(self): # Start the thread that reads frames from the video stream Thread(target=self.update, args=()).start() return self def update(self): # Keep looping indefinitely until the thread is stopped while True: # If the camera is stopped, stop the thread if self.stopped: # Close camera resources self.stream.release() return # Otherwise, grab the next frame from the stream (self.grabbed, self.frame) = self.stream.read() def read(self): # Return the most recent frame return self.frame def stop(self): # Indicate that the camera and thread should be stopped self.stopped = True # Define and parse input arguments parser = argparse.ArgumentParser() parser.add_argument('--modeldir', help='Folder the .tflite file is located in', required=True) parser.add_argument( '--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite') parser.add_argument( '--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt') parser.add_argument( '--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5) parser.add_argument( '--resolution', help= 'Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.', default='1280x720') parser.add_argument( '--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true') args = parser.parse_args() MODEL_NAME = args.modeldir GRAPH_NAME = args.graph LABELMAP_NAME = args.labels min_conf_threshold = float(args.threshold) resW, resH = args.resolution.split('x') imW, imH = int(resW), int(resH) use_TPU = args.edgetpu # Import TensorFlow libraries # If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tensorflow') if pkg is None: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print(PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW, imH), framerate=30).start() time.sleep(1) #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): while True: flag = 0 # Start timer (for calculating frame rate) t1 = cv2.getTickCount() # Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) cv2.circle(frame, (xmin, ymin), 5, (255, 255, 0), cv2.FILLED) cv2.circle(frame, (xmax, ymax), 5, (0, 255, 255), cv2.FILLED) x_diff = xmax - xmin y_diff = ymax - ymin mid_x = x_diff / 2 + xmin mid_x = math.ceil(mid_x) mid_y = ymin + y_diff / 2 mid_y = math.ceil(mid_y) cv2.circle(frame, (0, 0), 5, (0, 0, 255), cv2.FILLED) cv2.circle(frame, (mid_x, mid_y), 5, (255, 255, 255), cv2.FILLED) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text label_out.append(label) mid_x_out.append(mid_x) mid_y_out.append(mid_y) # Draw framerate in corner of frame cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) # All the results have been drawn on the frame, so it's time to display it. #cv2.imshow('Object detector', frame) # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 (h, w) = frame.shape[:2] cv2.waitKey(100) break # Clean up cv2.destroyAllWindows() videostream.stop() return (label_out, mid_x_out, mid_y_out, h / 2, w / 2)
def frames(): # configure arguments -- begin MODEL_NAME = 'Sample_TFlite_model' GRAPH_NAME = 'objectdetect.tflite' LABELMAP_NAME = 'labelmapobjectdetect.txt' min_conf_threshold = float(0.7) imW, imH = 640, 320 # configure arguments --end # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter else: from tensorflow.lite.python.interpreter import Interpreter # Get path to current working directory CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW, imH), framerate=30).start() time.sleep(1) #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): while True: t1 = cv2.getTickCount() # Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor(output_details[1]['index'])[ 0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]['index'])[ 0] # Confidence of detected objects # boxes: [ymin, xmin, ymax, xmax] # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if (scores[i] > min_conf_threshold) and (scores[i] < 1): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(scores[i] * 100) ) # Example: 'person: 72%' labelSize, baseLine = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # Draw framerate in corner of frame cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 # encode as a jpeg image and return it yield cv2.imencode('.jpg', frame)[1].tobytes()
def objectsCount(MODEL_NAME, GRAPH_NAME, LABELMAP_NAME, min_conf_threshold, use_TPU, IM_NAME, IM_DIR): import os import cv2 import numpy as np import sys import glob import importlib.util # If both an image AND a folder are specified, throw an error if (IM_NAME and IM_DIR): print( 'Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.' ) sys.exit() # If neither an image or a folder are specified, default to using 'test1.jpg' for image name if (not IM_NAME and not IM_DIR): IM_NAME = 'test1.jpg' # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # Get path to current working directory CWD_PATH = os.getcwd() # Define path to images and grab all image filenames if IM_DIR: PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_DIR) images = glob.glob(PATH_TO_IMAGES + '/*') elif IM_NAME: PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_NAME) images = glob.glob(PATH_TO_IMAGES) # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument if use_TPU: interpreter = Interpreter( model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 objects_list = { } #create the dictionary where the traffic names and number of cars detected will be saved # Loop over every image and perform detection for image_path in images: # Load image and resize to expected shape [1xHxWx3] image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (width, height)) input_data = np.expand_dims(image_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) objects_count = 0 #instantiate detected object counts # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Draw label object_name = labels[int( classes[i] )] # Look up object name from "labels" array using class index if (object_name == 'car'): objects_count = objects_count + 1 #get the count of cars detected in the image objects_list[image_path] = objects_count return (objects_list)
class Detector: """ Perform object detection with the given model. The model is a quantized tflite file which if the detector can not find it at the path it will download it from neuralet repository automatically. :param config: Is a ConfigEngine instance which provides necessary parameters. """ def __init__(self, config): self.config = config # Get the model name from the config self.model_name = self.config.get_section_dict('Detector')['Name'] # Frames Per Second self.fps = None self.model_file = 'ped_ssd_mobilenet_v2_quantized_edgetpu.tflite' self.model_path = '/repo/data/edgetpu/' + self.model_file # Get the model .tflite file path from the config. # If there is no .tflite file in the path it will be downloaded automatically from base_url user_model_path = self.config.get_section_dict('Detector')['ModelPath'] if len(user_model_path) > 0: print('using %s as model' % user_model_path) self.model_path = user_model_path else: base_url = 'https://media.githubusercontent.com/media/neuralet/neuralet-models/master/edge-tpu/' url = base_url + self.model_name + '/' + self.model_file if not os.path.isfile(self.model_path): print('model does not exist under: ', self.model_path, 'downloading from ', url) wget.download(url, self.model_path) # Load TFLite model and allocate tensors self.interpreter = Interpreter( self.model_path, experimental_delegates=[load_delegate("libedgetpu.so.1")]) self.interpreter.allocate_tensors() # Get the model input and output tensor details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() # Get class id from config self.class_id = int( self.config.get_section_dict('Detector')['ClassID']) self.score_threshold = float( self.config.get_section_dict('Detector')['MinScore']) def inference(self, resized_rgb_image): """ inference function sets input tensor to input image and gets the output. The interpreter instance provides corresponding detection output which is used for creating result Args: resized_rgb_image: uint8 numpy array with shape (img_height, img_width, channels) Returns: result: a dictionary contains of [{"id": 0, "bbox": [x1, y1, x2, y2], "score":s%}, {...}, {...}, ...] """ input_image = np.expand_dims(resized_rgb_image, axis=0) # Fill input tensor with input_image self.interpreter.set_tensor(self.input_details[0]["index"], input_image) t_begin = time.perf_counter() self.interpreter.invoke() inference_time = time.perf_counter() - t_begin # Second self.fps = convert_infr_time_to_fps(inference_time) # The function `get_tensor()` returns a copy of the tensor data. # Use `tensor()` in order to get a pointer to the tensor. boxes = self.interpreter.get_tensor(self.output_details[0]['index']) labels = self.interpreter.get_tensor(self.output_details[1]['index']) scores = self.interpreter.get_tensor(self.output_details[2]['index']) # TODO: will be used for getting number of objects # num = self.interpreter.get_tensor(self.output_details[3]['index']) result = [] for i in range(boxes.shape[1]): # number of boxes if labels[0, i] == self.class_id and scores[0, i] > self.score_threshold: result.append({ "id": str(self.class_id) + '-' + str(i), "bbox": boxes[0, i, :], "score": scores[0, i] }) return result
def gen_frames(): # Define VideoStream class to handle streaming of video from webcam in separate processing thread # Source - Adrian Rosebrock, PyImageSearch: https://www.pyimagesearch.com/2015/12/28/increasing-raspberry-pi-fps-with-python-and-opencv/ class VideoStream(object): """Camera object that controls video streaming from the Picamera""" def __init__(self,resolution=(640,480),framerate=30,target=None,args=()): global capture_image_limit capture_image_limit = 2000 global file_save_id file_save_id =0 # Initialize the PiCamera and the camera image stream self.stream = cv2.VideoCapture(0) #VideoStream Instance instance = VideoStream.__qualname__ print('The class instance is: ',instance) #print('\nVIDEOSTREAM: locals() value inside class\n', locals()) #print(dir(VideoStream)) #Reload reloadClass = os.environ.get('reload') if reloadClass == 'True': print('Delete Self:') del self os.environ['reload'] = 'False' ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) ret = self.stream.set(3,resolution[0]) ret = self.stream.set(4,resolution[1]) # Read first frame from the stream (self.grabbed, self.frame) = self.stream.read() # Variable to control when the camera is stopped self.stopped = False def __del__(self): print ("Object destroyed"); def start(self): # Start the thread that reads frames from the video stream Thread(target=self.update,args=()).start() return self def update(self): # Keep looping indefinitely until the thread is stopped while True: # If the camera is stopped, stop the thread if self.stopped: # Close camera resources self.stream.release() return # Otherwise, grab the next frame from the stream (self.grabbed, self.frame) = self.stream.read() def read(self): # Return the most recent frame this_instance = self return self.frame def stop(self): # Indicate that the camera and thread should be stopped self.stopped = True # Define and parse input arguments parser = argparse.ArgumentParser() parser.add_argument('--modeldir', help='Folder the .tflite file is located in', required=True) parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite') parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt') parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5) parser.add_argument('--resolution', help='Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.', default='1280x720') parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true') args = parser.parse_args() MODEL_NAME = args.modeldir print('~~~~ Param Default Model Name: ' + str(MODEL_NAME)) GRAPH_NAME = args.graph LABELMAP_NAME = args.labels min_conf_threshold = float(args.threshold) resW, resH = args.resolution.split('x') imW, imH = int(resW), int(resH) use_TPU = args.edgetpu # Import TensorFlow libraries # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow # If using Coral Edge TPU, import the load_delegate library pkg = importlib.util.find_spec('tflite_runtime') print('TPU Runtime' + str(pkg)) if pkg: from tflite_runtime.interpreter import Interpreter if use_TPU: from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter if use_TPU: from tensorflow.lite.python.interpreter import load_delegate # If using Edge TPU, assign filename for Edge TPU model if use_TPU: # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite' if (GRAPH_NAME == 'detect.tflite'): GRAPH_NAME = 'edgetpu.tflite' # Get path to current working directory # Multi-Model # Demo90 /home/pi/SensorFusion/Demo90 # Deer: /home/pi/SensorFusion/PreLoadedModels/Model01.Deer # Head: /home/pi/SensorFusion/PreLoadedModels/Model02.Head # Eyes: /home/pi/SensorFusion/PreLoadedModels/Model03.Eyes # Tree: /home/pi/SensorFusion/PreLoadedModels/Model04.Tree # check.id - cd /home/pi/SensorFusion/checkid CWD_PATH = os.getcwd() print("Default Path: "+ CWD_PATH) newModel = str(os.environ.get('run_model')) print("New Model Name: "+ newModel) if newModel == "Demo90": CWD_PATH = "/home/pi/SensorFusion/"+ newModel elif newModel == 'Check.ID': CWD_PATH = "/home/pi/SensorFusion/checkid" else: CWD_PATH = "/home/pi/SensorFusion/PreLoadedModels/"+ newModel print("Current Model Path: "+ CWD_PATH) # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME) print("Current Path to Label Map: "+ PATH_TO_LABELS) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if labels[0] == '???': del(labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument #if video_camera_flag:#Using a Flag here - for future use if use_TPU: interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) print('TPU Detected' + PATH_TO_CKPT) else: interpreter = Interpreter(model_path=PATH_TO_CKPT) print('No TPU detected!'+ PATH_TO_CKPT) interpreter.allocate_tensors() # Get model details input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW,imH),framerate=30).start() time.sleep(1) global img_counter img_counter = 0 #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True): try: while True: #while video_camera_flag: # Start timer (for calculating frame rate) t1 = cv2.getTickCount() # Grab frame from video stream frame1 = videostream.read() # Acquire frame and resize to expected shape [1xHxWx3] global frame frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'],input_data) interpreter.invoke() # Retrieve detection results person_found = False boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) #Kill TensofFlow while Annotating kill_tensorFlow = os.environ.get('kill_tensorFlow') #print("TensofFlow Status: " + str(kill_tensorFlow)) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1,(boxes[i][0] * imH))) xmin = int(max(1,(boxes[i][1] * imW))) ymax = int(min(imH,(boxes[i][2] * imH))) xmax = int(min(imW,(boxes[i][3] * imW))) #print("Kill TF Flag: "+ str(kill_tensorFlow)) if kill_tensorFlow != 'True': try: cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 3) except: pass # Draw label (object_name) and score (%) object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index #print(labels[int(classes[i])]+": "+str(i)) if labels[int(classes[0])]== 'person':#NOTE - The bar is for one person only #print('Person Found!') person_found = True# used for bar below scores_flag = os.environ.get('scores_flag') labels_flag = os.environ.get('labels_flag') #states state_ = 11 #both on by default if labels_flag == 'labels_off' and scores_flag == 'scores_off': state_ = 0#00 label = object() if labels_flag == 'labels_on' and scores_flag == 'scores_on': state_ = 11#11 label = '%s: %d%%' % (object_name.capitalize(), int(scores[i]*100)) # Example: 'person: 72%' if labels_flag == 'labels_off' and scores_flag == 'scores_on': label = '%d%%' % (int(scores[i]*100)) # Example: '72%' state_ = 1#01 if labels_flag == 'labels_on' and scores_flag == 'scores_off': state_= 10 #10 label = '%s: ' % (object_name.capitalize()) # Example: 'person: ' #draw the labels, background score and box if state_ != 0: labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window #cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (237,237,237), cv2.FILLED) # Draw white box to put label text in if kill_tensorFlow != 'True': cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (128,128,128), cv2.FILLED) # Draw gray box to put label text in cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) # Draw label text else: if kill_tensorFlow != 'True': cv2.rectangle(frame, (xmin,ymin), (xmin,ymin), (237,237,237), cv2.FILLED) # Draw frame with no label OR score text ! # Draw framerate in corner of frame - use 'F' key to toggle on/off try: if fps_flag: cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA) else: pass except: pass #If Capture Image Draw status text capture_flag = os.environ.get('cap_flag') try: if capture_flag == "True": cv2.putText(frame,'Saving File: '+str(img_counter),(520,50),cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,0,255),2) else: pass except: pass # All the results have been drawn on the frame, so it's time to display it. #cv2.imshow('Object detector', frame) ## Commented for the FLASK API #Module widgets.meter() if kill_tensorFlow != 'True': #window_name ='Object detector' top = int(scores[0]*100) color = (0,0,255) if person_found == True: widgets.meter(frame,top)#module #End Module # Displaying the image - DO NOT USE! #cv2.imshow(window_name, image) #SENSOR FUSION Flask VIDEO API #Brute Force Motion JPEG, OpenCV defaults to capture raw images, #so we must encode it into JPEG in order to correctly display the #video stream - NOTE need to work on this cv2.imencode tobytes slows the apparent frame rate by about 50%, plus the UI takes some #See: https://www.pyimagesearch.com/2017/02/06/faster-video-file-fps-with-cv2-videocapture-and-opencv/ ret, buffer = cv2.imencode('.jpg', frame) frame2 = buffer.tobytes() #the image that is saved #Capture Images and save to Annotate Named subdirectory under ~/Pictures #capture_flag = os.environ.get('cap_flag') annotate_name = os.environ.get('annotate_name') if capture_flag == 'True': #Check limit try: print("image limit: " + anno_images) capture_image_limit = int(anno_images) except: pass if capture_flag == 'True' and img_counter < capture_image_limit: #Create new or use existing directory path_to_directory = '../Pictures/' + annotate_name print("Saving to ", path_to_directory) try: os.makedirs(path_to_directory) except FileExistsError: #dir already exists, so overwrite existing (unless we datestamp)! pass img_name="../Pictures/"+annotate_name+"/"+annotate_name+"sf-frame_{}.jpg".format(img_counter) cv2.namedWindow("Capture Window") cv2.moveWindow("Capture Window", -500, -500)# push it off screen :) cv2.imwrite(img_name, frame1) print('Wrote Image-'+ img_name) img_counter +=1 #Clear Capture Flag when done grabbing images if capture_flag == 'True' and img_counter >= capture_image_limit: os.environ['cap_flag'] = 'False' img_counter = 0 yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame2 + b'\r\n') # concat frame one by one and show result ## End Video Stream API ### # Calculate framerate t2 = cv2.getTickCount() time1 = (t2-t1)/freq frame_rate_calc= 1/time1 # Press 'q' to quit if cv2.waitKey(1) == ord('q'): print("CV2 Break") break # Press 'q' to quit quit_flag = os.environ.get('quit_flag') if quit_flag == 'quit':# os.environ['quit_flag'] = '' print("CV2 Quit " + quit_flag) cv2.destroyAllWindows() if videostream: #videostream.release() videostream.stop() print('Videostream stopped') break #print("quit_flag " + str(quit_flag)) # Clean up cv2.destroyAllWindows() if videostream: #videostream.release() videostream.stop() #os.system("pkill chromium") #webbrowser.open('http://localhost:5000', new=0) except KeyboardInterrupt: pass
image_height = image.shape[0] image_width = image.shape[1] # Resize and normalize image for network input t3 = time.perf_counter() frame = cv2.resize(image, (300, 300)) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = np.expand_dims(frame, axis=0) frame = frame.astype(np.float32) cv2.normalize(frame, frame, -1, 1, cv2.NORM_MINMAX) t4 = time.perf_counter() print("resize and normalize time: ", t4 - t3) # run model t5 = time.perf_counter() interpreter.set_tensor(input_details[0]['index'], frame) interpreter.invoke() t6 = time.perf_counter() print("inference + postprocess time: ", t6 - t5) # get results boxes = interpreter.get_tensor(output_details[0]['index'])[0] classes = interpreter.get_tensor(output_details[1]['index'])[0] scores = interpreter.get_tensor(output_details[2]['index'])[0] count = interpreter.get_tensor(output_details[3]['index'])[0] for i, (box, classidx, score) in enumerate(zip(boxes, classes, scores)): probability = score if probability >= 0.6: if (not math.isnan(box[0]) and not math.isnan(box[1]) and not math.isnan(box[2]) and not math.isnan(box[3])):
class ImageProcessing: import numpy as np import cv2 import math from threading import Thread from multiprocessing import Process from threading import Timer import os def __init__(self, resolution, flag): self.height, self.width = resolution self.resolution = self.height, self.width self.index=0 self.mode = 0 self.flag = flag self.camera = self.cv2.VideoCapture(-1) self.camera.set(self.cv2.CAP_PROP_FRAME_HEIGHT, self.height) self.camera.set(self.cv2.CAP_PROP_FRAME_WIDTH, self.width) self._setROI() self._setCoral() self._getDashboard() self._getIpmMat() self._getMaskBG() def _setROI(self): self.ROI_W = int(self.width*0.2) self.ROI_H = int(self.height*0.35) self.ROI_far_pos = 0.65 # Set along your vehicle velocity and frane rate | Late: far, Fast: near self.ROI_far_rngH = slice(int(self.height*self.ROI_far_pos - self.ROI_H), int(self.height*self.ROI_far_pos)) self.ROI_near_rngH = slice(int(self.height - self.ROI_H), int(self.height)) self.ROI_rngW = slice(int(self.width/2 - self.ROI_W/2),int(self.width/2 + self.ROI_W/2)) self.ROI_lane_rngH = slice(int(self.height*0.7 - self.ROI_H*0.7), int(self.height*0.7)) self.ROI_lane_rngW = slice(int(self.width/2 - self.ROI_W*0.5),int(self.width/2 + self.ROI_W*0.5)) def _getIpmMat(self): # Your camera inner & external parameters alpha = (7-90)*self.np.pi/180 beta = 0 gamma = 0 dist = 300 focal = 500 # Calculating rotational transformation matrix A1 = self.np.array([[1,0,-self.width/2],[0,1,-self.height/2],[0,0,0],[0,0,1]],dtype='f') RX = self.np.array([[1,0,0,0],[0,self.math.cos(alpha), -self.math.sin(alpha),0],[0,self.math.sin(alpha),self.math.cos(alpha),0],[0,0,0,1]],dtype='f') RY = self.np.array([[self.math.cos(beta),0,-self.math.sin(beta),0],[0,1,0,0],[self.math.sin(beta),0,self.math.cos(beta),0],[0,0,0,1]],dtype='f') RZ = self.np.array([[self.math.cos(gamma),-self.math.sin(gamma),0,0],[self.math.sin(gamma),self.math.cos(gamma),0,0],[0,0,1,0],[0,0,0,1]],dtype='f') R = self.np.dot(RX,self.np.dot(RY,RZ)) T = self.np.array([[1,0,0,0],[0,1,0,0],[0,0,1,dist],[0,0,0,1]],dtype='f') K = self.np.array([[focal,0,self.width/2,0],[0,focal,self.height/2,0],[0,0,1,0]],dtype='f') self.IpmMat = self.np.dot(K,self.np.dot(T,self.np.dot(R,A1))) def _getMaskBG(self): # Mask for blank area when images were ipm mapped tmp_blank = self.np.full((self.height, self.width,3), 255, dtype='uint8') tmp_ipm = self.cv2.warpPerspective(tmp_blank, self.IpmMat, (self.width, self.height), flags=self.cv2.INTER_CUBIC|self.cv2.WARP_INVERSE_MAP) self.maskBG = self.cv2.bitwise_not(tmp_ipm) def _getDashboard(self): self.board_size = 320 size = self.board_size self.board = self.np.full((size*2, size*2,3), 255, dtype='uint8') tmp_stopline = self.cv2.imread('dashboard/stopline_red.jpg',self.cv2.IMREAD_COLOR) self.icon_stopline = self.cv2.resize(tmp_stopline, dsize=(size,size), interpolation=self.cv2.INTER_AREA) tmp_blindspot = self.cv2.imread('dashboard/blind_spot_red.jpg',self.cv2.IMREAD_COLOR) self.icon_blindspot = self.cv2.resize(tmp_blindspot, dsize=(size,size), interpolation=self.cv2.INTER_AREA) self.icon_blank = self.np.full((size,size*2,3),255,dtype='uint8') self.icon_schoolzone = self.np.full((size,size*2,3),0,dtype='uint8') self.icon_schoolzone[:,:,2] = 255 self.icon_subs = self.np.full((size,size,3),0,dtype='uint8') self.icon_subs[:,:,2] = 255 def processing(self): # Calibration parameters by experiments CamMat = self.np.array([[314.484, 0, 321.999],[0, 315.110, 259.722],[ 0, 0, 1]],dtype='f') DistMat = self.np.array([ -0.332015, 0.108453, 0.001100, 0.002183],dtype='f') # For inRange function in opencv # Modify value along your brightness condition lower_k = self.np.array([0,0,0]) upper_k = self.np.array([180,255,100]) lower_r1 = self.np.array([0,50,50]) upper_r1 = self.np.array([30,255,255]) lower_r2 = self.np.array([150,50,50]) upper_r2 = self.np.array([180,255,255]) ret, frame = self.camera.read(); del(ret) # Now take frame from camera calibration = self.cv2.undistort(frame, CamMat, DistMat, None, CamMat) # Calibration because of wide angle camera tmp_ipm1 = self.cv2.warpPerspective(calibration, self.IpmMat, (self.width,self.height), flags=self.cv2.INTER_CUBIC|self.cv2.WARP_INVERSE_MAP) # Geometrical transform image to Top view perspective tmp_ipm2 = self.cv2.add(tmp_ipm1, self.maskBG) # It just merges ipm image with white background ipm = self.cv2.bilateralFilter(tmp_ipm2,9,50,50) # Just Filter self.result = ipm.copy() hsv = self.cv2.cvtColor(ipm, self.cv2.COLOR_BGR2HSV) gray = self.cv2.cvtColor(ipm, self.cv2.COLOR_BGR2GRAY) #canny = self.cv2.Canny(gray, 100, 200, 3) # If you want to use canny edge algorithm, activate this line threshold_inv = self.cv2.adaptiveThreshold(gray, 255, self.cv2.ADAPTIVE_THRESH_MEAN_C, self.cv2.THRESH_BINARY, 21, 5) threshold = self.cv2.bitwise_not(threshold_inv) #mask_k = self.cv2.inRange(hsv, lower_k, upper_k) #mask_k = canny.copy() mask_k = threshold.copy() self.mask_k = mask_k[self.ROI_far_rngH, self.ROI_rngW]#[self.ROI_far_rngH, self.ROI_rngW] self.mask_lane = mask_k[self.ROI_lane_rngH,self.ROI_lane_rngW] # Now you can get red mask for schoolzone detecting mask_r1 = self.cv2.inRange(hsv, lower_r1, upper_r1) mask_r2 = self.cv2.inRange(hsv, lower_r2, upper_r2) mask_r = self.cv2.add(mask_r1, mask_r2) self.mask_r = mask_r[self.ROI_near_rngH, self.ROI_rngW] def detectingSchoolzone(self): # Just counting red dots if((self.np.sum(self.mask_r)/255) > ((self.ROI_H)*(self.ROI_W)*0.2)): self.flag.schoolzone = True else: self.flag.schoolzone = False def laneDetect(self): # By Jinwon, Lane detecting algorithm. # adaptive detecting method # It need to improve frame = self.cv2.flip(self.mask_lane.copy(),0) H,W = frame.shape[0:2] lane_base = self.np.array(range(0,W)) lane = self.np.full((H,1), int(W/2), dtype='uint32') laneL = self.np.full((H,1), int(W/2), dtype='uint32') laneR = self.np.full((H,1), int(W/2), dtype='uint32') num0 = self.np.sum(frame[0,:] != False) if(num0 != 0): lane[0] = int(self.np.sum(lane_base*frame[0,:])/(255*num0)) else: lane[0] = int(W/2) for j in range(1,H): rangeL = range(0, int(lane[j-1])) rangeR = range(int(lane[j-1]), int(W)) numL = self.np.sum(frame[j,rangeL] != False) numR = self.np.sum(frame[j,rangeR] != False) if(numL == 0)|(numR == 0): lane[j] = lane[j-1] else: laneL[j] = self.np.sum(lane_base[rangeL]*frame[j,rangeL])/(255*numL) laneR[j] = self.np.sum(lane_base[rangeR]*frame[j,rangeR])/(255*numR) lane[j] = (laneR[j] + laneL[j])/2 self.mask_lane[int(H - j),int(lane[j])] = 255 self.flag.lane_err = ((self.np.mean(lane)*2/W) -1) # Return method is various. It just return mean value def _setCoral(self, modeldir="Model"): # By github.com/EdjeElectronics & coral.ai CWD_PATH =self.os.getcwd() MODEL_NAME = modeldir GRAPH_NAME = "edgetpu.tflite" #If you don't have coral, "detect.tflite" LABELMAP_NAME = "labelmap.txt" # path to PATH_TO_CKPT = self.os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) PATH_TO_LABELS = self.os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: self.coral_labels = [line.strip() for line in f.readlines()] # Have to do a weird fix for label map if using the COCO "starter model" from # https://www.tensorflow.org/lite/models/object_detection/overview # First label is '???', which has to be removed. if self.coral_labels[0] == '???': del(self.coral_labels[0]) # Load the Tensorflow Lite model. # If using Edge TPU, use special load_delegate argument self.coral_interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')]) self.coral_interpreter.allocate_tensors() # Get model details self.coral_input_details = self.coral_interpreter.get_input_details() self.coral_output_details = self.coral_interpreter.get_output_details() self.coral_height = self.coral_input_details[0]['shape'][1] self.coral_width = self.coral_input_details[0]['shape'][2] self.coral_input_mean = 127.5 self.coral_input_std = 127.5 def detectingStopline(self): # By github.com/EdjeElectronics & coral.ai image = self.mask_k.copy() imH,imW = image.shape[0:2] # Get image & general coral_frame = self.cv2.cvtColor(image, self.cv2.COLOR_GRAY2RGB) frame_rgb = self.cv2.cvtColor(coral_frame, self.cv2.COLOR_BGR2RGB) frame_resized = self.cv2.resize(frame_rgb, (self.coral_width, self.coral_height)) input_data = self.np.expand_dims(frame_resized, axis=0) # Perform the actual detection by running the model with the image as input self.coral_interpreter.set_tensor(self.coral_input_details[0]['index'],input_data) self.coral_interpreter.invoke() # Retrieve detection results self.coral_boxes = self.coral_interpreter.get_tensor(self.coral_output_details[0]['index'])[0] # Bounding box coordinates of detected objects self.coral_classes = self.coral_interpreter.get_tensor(self.coral_output_details[1]['index'])[0] # Class index of detected objects self.coral_scores = self.coral_interpreter.get_tensor(self.coral_output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Threshold self.coral_min_conf_threshold = 0.90 self.flag.stopline = False # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(self.coral_scores)): if ((self.coral_scores[i] > self.coral_min_conf_threshold) and (self.coral_scores[i] <= 1.0)): if(self.coral_labels[int(self.coral_classes[i])] == "stopline"): self.flag.stopline = True # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1,(self.coral_boxes[i][0] * imH)) + self.ROI_far_rngH.start) xmin = int(max(1,(self.coral_boxes[i][1] * imW)) + self.ROI_rngW.start) ymax = int(min(imH,(self.coral_boxes[i][2] * imH)) + self.ROI_far_rngH.start) xmax = int(min(imW,(self.coral_boxes[i][3] * imW)) + self.ROI_rngW.start) self.cv2.rectangle(self.result, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2) # Draw label object_name = self.coral_labels[int(self.coral_classes[i])] # Look up object name from "labels" array using class index label = '%s: %d%%' % (object_name, int(self.coral_scores[i]*100)) # Example: 'person: 72%' labelSize, baseLine = self.cv2.getTextSize(label, self.cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window self.cv2.rectangle(self.result, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), self.cv2.FILLED) # Draw white box to put label text in self.cv2.putText(self.result, label, (xmin, label_ymin-7), self.cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # Thread def task(self): while(1): t1 = self.cv2.getTickCount() self.processing() self.laneDetect() self.detectingSchoolzone() self.flag.schoolzone = False #tmp if(self.mode == 1): self.detectingStopline() self.board[160:640,:,:] = self.result.copy() self.board[0:self.board_size,:,:] = self.icon_blank.copy() # Now, Mode Selector if(self.mode == 0): if(self.flag.schoolzone == True): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() self.mode = 1 elif(self.mode == 1): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() if(self.flag.stopline == True): self.board[0:self.board_size,0:self.board_size,:] = self.icon_stopline.copy() self.mode = 2 self.flag.stop = True elif(self.mode == 2): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() self.board[0:self.board_size,0:self.board_size,:] = self.icon_stopline.copy() if(self.flag.depart == True): self.flag.depart = False self.flag.powerHandle = True self.mode = 3 elif(self.mode == 3): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() if(self.flag.refresh == True): self.flag.refresh = False self.flag.lidar = True self.mode = 4 elif(self.mode == 4): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() if(self.flag.blindspot == True): self.mode = 5 self.flag.slow = True elif(self.mode == 5): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() self.board[0:self.board_size,self.board_size:(self.board_size*2),:] = self.icon_blindspot.copy() if(self.flag.blindspot == False): self.mode = 6 self.flag.slow = False elif(self.mode == 6): self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy() if(self.flag.schoolzone == False): self.board[0:self.board_size,:,:] = self.icon_blank.copy() self.mode = 7 self.cv2.imshow('Dashboard', self.board) t2 = self.cv2.getTickCount() freq = self.cv2.getTickFrequency() #print(freq/(t2-t1)) self.cv2.waitKey(1) if(self.mode == 7): self.flag.end = True break self.cv2.destroyAllWindows() def start(self): self.thread = self.Thread(target=self.task) self.thread.start() def startLane(self): self.threadLane = self.Thread(target=self.taskLane) self.threadLane.start()
class ObjectDetectorLite: def __init__(self, model_path, label_path): """ Builds Tensorflow graph, load model and labels """ # Load label_map self._load_label(label_path) # Define lite graph and Load Tensorflow Lite model into memory self.interpreter = Interpreter(model_path=model_path) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() # Get input size input_shape = self.input_details[0]['shape'] self.size = input_shape[:2] if len( input_shape) == 3 else input_shape[1:3] def get_input_size(self): return self.size def detect(self, image, threshold=0.1): """ Predicts person in frame with threshold level of confidence Returns list with top-left, bottom-right coordinates and list with labels, confidence in % """ # Add a batch dimension frame = np.expand_dims(image, axis=0) # run model self.interpreter.set_tensor(self.input_details[0]['index'], frame) self.interpreter.invoke() # get results boxes = self.interpreter.get_tensor(self.output_details[0]['index']) classes = self.interpreter.get_tensor(self.output_details[1]['index']) scores = self.interpreter.get_tensor(self.output_details[2]['index']) num = self.interpreter.get_tensor(self.output_details[3]['index']) # Find detected boxes coordinates return self._boxes_coordinates(image, np.squeeze(boxes[0]), np.squeeze(classes[0] + 1).astype( np.int32), np.squeeze(scores[0]), min_score_thresh=threshold) def close(self): pass def _boxes_coordinates(self, image, boxes, classes, scores, max_boxes_to_draw=20, min_score_thresh=.5): """ This function groups boxes that correspond to the same location and creates a display string for each detection Args: image: uint8 numpy array with shape (img_height, img_width, 3) boxes: a numpy array of shape [N, 4] classes: a numpy array of shape [N] scores: a numpy array of shape [N] or None. If scores=None, then this function assumes that the boxes to be plotted are groundtruth boxes and plot all boxes as black with no classes or scores. max_boxes_to_draw: maximum number of boxes to visualize. If None, draw all boxes. min_score_thresh: minimum score threshold for a box to be visualized """ if not max_boxes_to_draw: max_boxes_to_draw = boxes.shape[0] number_boxes = min(max_boxes_to_draw, boxes.shape[0]) detected_boxes = [] probabilities = [] categories = [] for i in range(number_boxes): if scores is None or scores[i] > min_score_thresh: box = tuple(boxes[i].tolist()) detected_boxes.append(box) probabilities.append(scores[i]) categories.append(self.category_index[classes[i]]) return np.array(detected_boxes), probabilities, categories def _load_label(self, path): """ Loads labels """ categories = load_labelmap(path) self.category_index = create_category_index(categories)
ret, color_image = cap.read() if not ret: break colw = color_image.shape[1] colh = color_image.shape[0] new_w = int(colw * min(w/colw, h/colh)) new_h = int(colh * min(w/colw, h/colh)) resized_image = cv2.resize(color_image, (new_w, new_h), interpolation = cv2.INTER_CUBIC) canvas = np.full((h, w, 3), 128) canvas[(h - new_h)//2:(h - new_h)//2 + new_h,(w - new_w)//2:(w - new_w)//2 + new_w, :] = resized_image prepimg = canvas.astype(np.float32) prepimg = prepimg[np.newaxis, :, :, :] # Batch size axis add interpreter.set_tensor(input_details[0]['index'], prepimg) interpreter.invoke() outputs = interpreter.get_tensor(output_details[0]['index']) #(1, 32, 32, 57) outputs = outputs.transpose((0, 3, 1, 2)) # NHWC to NCHW, (1, 57, 32, 32) detected_keypoints = [] keypoints_list = np.zeros((0, 3)) keypoint_id = 0 for part in range(nPoints): probMap = outputs[0, part, :, :] probMap = cv2.resize(probMap, (canvas.shape[1], canvas.shape[0])) # (256, 256) keypoints = getKeypoints(probMap, threshold) keypoints_with_id = [] for i in range(len(keypoints)):
while True: t1 = cv2.getTickCount() frame1 = videostream.read() frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[0] classes = interpreter.get_tensor(output_details[1]['index'])[0] scores = interpreter.get_tensor(output_details[2]['index'])[0] #num = interpreter.get_tensor(output_details[3]['index'])[0] for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW)))
class WakeWord2: def __init__(self): # Sliding window self.window = np.zeros(int(RECORD_DURATION * RESAMPLE_RATE) * 2) # Load model self.interpreter = Interpreter(WAKEWORD_MODEL_PATH) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() # Thread and flags self.ON = True self.running = True self.wakeword_flag = False self.wakeword_thread = threading.Thread(target=self.wakeword_run, name="wakeword_thread") self.wakeword_thread.start() print('WakeWord Initialized') def close(self): self.running = False self.wakeword_thread.join() return # Background loop that continuously checks for wake words def wakeword_run(self): with sd.InputStream(channels=NUM_CHANNELS, samplerate=SAMPLE_RATE, blocksize=int(SAMPLE_RATE * RECORD_DURATION), callback=self.wakeword_process): while self.running: pass def __call__(self): if self.wakeword_flag: self.wakeword_flag = False return True return False def wakeword_process(self, rec, frames, time, error): # Start timing for testing start_time = timeit.default_timer() # Notify if errors if error: print("Error: ", error) # Remove 2nd dimension from recording sample and downsample rec = np.squeeze(rec) rec = scipy.signal.decimate(rec, DOWNSAMPLE) # Analyze a sliding window if the sound that overlaps with last window by 50% # to catch wake words that might span time segments self.window[:len(self.window) // 2] = self.window[len(self.window) // 2:] self.window[len(self.window) // 2:] = rec # Process image with MFCC (Mel Frequency Cepstrum) that scales the frequency in order # to match more closely what the human ear can hear mfccs = python_speech_features.base.mfcc(self.window, samplerate=RESAMPLE_RATE, winlen=0.256, winstep=0.050, numcep=NUM_MFCC, nfilt=26, nfft=2048, preemph=0.0, ceplifter=0, appendEnergy=False, winfunc=np.hanning) mfccs = mfccs.transpose() # Make prediction from model in_tensor = np.float32( mfccs.reshape(1, mfccs.shape[0], mfccs.shape[1], 1)) self.interpreter.set_tensor(self.input_details[0]['index'], in_tensor) self.interpreter.invoke() output_data = self.interpreter.get_tensor( self.output_details[0]['index']) val = output_data[0][0] # test for the wake word ('go') if val > WORD_THRESHOLD: print('listening') self.wakeword_flag = True if DEBUG_ACC: # print accuracy of each detection print(val) if DEBUG_TIME: # print processing time for a sound clip print(timeit.default_timer() - start_time)