class Detector(object): def __init__(self, model_path=DEFAULT_MODEL_PATH, labels_path=DEFAULT_LABEL_PATH): self.labels = self.load_labels(labels_path) self.interpreter = Interpreter(model_path) self.interpreter.allocate_tensors() def set_input_tensor(self, image): tensor_index = self.interpreter.get_input_details()[0]["index"] input_tensor = self.interpreter.tensor(tensor_index)()[0] input_tensor[:, :] = image def get_output_tensor(self, index): output_details = self.interpreter.get_output_details()[index] tensor = np.squeeze( self.interpreter.get_tensor(output_details["index"])) return tensor def detect_objects(self, image, threshold=0.5): self.set_input_tensor(image) self.interpreter.invoke() # Get all output details boxes = self.get_output_tensor(0) classes = self.get_output_tensor(1) scores = self.get_output_tensor(2) count = int(self.get_output_tensor(3)) results = [] for i in range(count): if scores[i] >= threshold: result = { 'bounding_box': boxes[i], 'class_id': classes[i], 'class': self.labels[classes[i]], 'score': scores[i] } results.append(result) print(results) return results def load_labels(self, path): """Loads the labels file. Supports files with or without index numbers.""" with open(path, 'r', encoding='utf-8') as f: lines = f.readlines() labels = {} for row_number, content in enumerate(lines): pair = re.split(r'[:\s]+', content.strip(), maxsplit=1) if len(pair) == 2 and pair[0].strip().isdigit(): labels[int(pair[0])] = pair[1].strip() else: labels[row_number] = pair[0].strip() return labels
class Detector(object): def __init__(self, path_to_model, threshold): self.threshold = threshold self.__camera = picamera.PiCamera(resolution=(640, 480), framerate=30) self.__camera.start_preview() self.__is_cat_detected = False self.__stream = io.BytesIO() self.__interpreter = Interpreter(path_to_model) self.__interpreter.allocate_tensors() self.__img_input_size = self.__interpreter.get_input_details( )[0]['shape'][1:3] def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.__camera.close() def check(self): result = True try: self.__stream.seek(0) self.__camera.capture(self.__stream, 'jpeg') image = Image.open(self.__stream).convert('RGB').resize( self.__img_input_size, Image.ANTIALIAS) self.__is_cat_detected = self.__is_cat(image) self.__stream.seek(0) self.__stream.truncate() except BaseException as err: print(err) result = False finally: self.__camera.stop_preview() return result def is_detect(self): return self.__is_cat_detected def __is_cat(self, image): tensor_index = self.__interpreter.get_input_details()[0]['index'] self.__interpreter.tensor(tensor_index)()[0][:, :] = image self.__interpreter.invoke() output_details = self.__interpreter.get_output_details()[0] output = np.squeeze( self.__interpreter.get_tensor(output_details['index'])) return output < self.threshold if self.threshold < 0 else output > self.threshold
class ImageClassifier: def __init__(self): try: self.interpreter = Interpreter(model_path='model.tflite') self.interpreter.allocate_tensors() _, self.height, self.width, _ = self.interpreter.get_input_details( )[0]['shape'] print('model successfully loaded') except Exception as e: print("error:", e) return def get_output_tensor(self, index): """Returns the output tensor at the given index.""" output_details = self.interpreter.get_output_details()[index] tensor = np.squeeze( self.interpreter.get_tensor(output_details['index'])) # # If the model is quantized (uint8 data), then dequantize the results # if output_details['dtype'] == np.uint8: # scale, zero_point = output_details['quantization'] # tensor = scale * (tensor - zero_point) return tensor def set_input_tensor(self, image): tensor_index = self.interpreter.get_input_details()[0]['index'] input_tensor = self.interpreter.tensor(tensor_index)()[0] input_tensor[:, :] = image def __call__(self, stream, top_k=1): # start_time = time() image = Image.open(stream).convert('RGB').resize( (self.width, self.height), Image.ANTIALIAS) image = np.array(image).astype('float') / 255.0 self.set_input_tensor(image) self.interpreter.invoke() classes = self.get_output_tensor(1) scores = self.get_output_tensor(2) # print('classes', classes) # print('scores', scores) index = np.argmax(scores) # elapsed_ms = (time() - start_time) * 1000 return int(classes[index]), scores[index]
class Classifier: def __init__(self, label_file, model_file): self.labels = self.load_labels(label_file) self.interpreter = Interpreter(model_file) self.interpreter.allocate_tensors() _, self.height, self.width, _ = self.interpreter.get_input_details( )[0]['shape'] def load_labels(self, path): with open(path, 'r') as f: return {i: line.strip() for i, line in enumerate(f.readlines())} def set_input_tensor(self, image): tensor_index = self.interpreter.get_input_details()[0]['index'] input_tensor = self.interpreter.tensor(tensor_index)()[0] input_tensor[:, :] = image def classify(self, original_image, top_k=1): start_time = time.time() image = cv2.resize(original_image, (self.height, self.width)) """Returns a sorted array of classification results.""" self.set_input_tensor(image) self.interpreter.invoke() output_details = self.interpreter.get_output_details()[0] output = np.squeeze( self.interpreter.get_tensor(output_details['index'])) # If the model is quantized (uint8 data), then dequantize the results if output_details['dtype'] == np.uint8: scale, zero_point = output_details['quantization'] output = scale * (output - zero_point) ordered = np.argpartition(-output, top_k) results = [(i, output[i]) for i in ordered[:top_k]] elapsed_ms = (time.time() - start_time) * 1000 label_id, prob = results[0] text = 'Class: %s Confidence: %.2f TIME: %.1fms' % ( self.labels[label_id], prob, elapsed_ms) cv2.putText(original_image, text, (10, 20), cv2.FONT_HERSHEY_SIMPLEX, self.width / 400, (0, 0, 255), 2, True) return cv2.imencode('.jpg', original_image)[1].tobytes()
def compute(model_path): now = time.monotonic() intp = Interpreter(model_path) x = intp.tensor(intp.get_input_details()[0]['index']) iy = intp.get_output_details()[0]['index'] intp.allocate_tensors() t1 = time.monotonic() - now now = time.monotonic() for i in range(WARMUP): #x().fill(CONSTANT) x =np.random.rand() intp.invoke() y = intp.get_tensor(iy) t2 = time.monotonic() - now now = time.monotonic() for i in range(ITER): #x().fill(CONSTANT) x =np.random.rand() intp.invoke() y = intp.get_tensor(iy) t3 = time.monotonic() - now return t1, t2/float(WARMUP), t3/float(ITER)
class Classifier: def __init__(self, model, labels): self.labels = labels self.model = Interpreter(model) def set_input_tensor(self, image): tensor_index = self.model.get_input_details()[0]['index'] input_tensor = self.model.tensor(tensor_index)()[0] input_tensor[:, :] = image def classify_image(self, image, top_k=1): """Returns a sorted array of classification results.""" set_input_tensor(self.model, image) self.model.invoke() output_details = self.model.get_output_details()[0] output = np.squeeze(self.model.get_tensor(output_details['index'])) # If the model is quantized (uint8 data), then dequantize the results if output_details['dtype'] == np.uint8: scale, zero_point = output_details['quantization'] output = scale * (output - zero_point) ordered = np.argpartition(-output, top_k) return [(i, output[i]) for i in ordered[:top_k]]
class Segnet(object): def __init__(self, model_file, label_file, overlay): self.interpreter = Interpreter(model_file) self.interpreter.allocate_tensors() _, self.input_height, self.input_width, _ = self.interpreter.get_input_details( )[0]['shape'] self.labels = self.load_labels(label_file) self.class_colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for _ in range(5000)] self.legend_img = self.get_legends(self.labels) self.overlay = overlay def load_labels(self, path): with open(path, 'r') as f: return [ line.strip() for line in f.read().replace('"', '').split(',') ] def preprocess(self, img): img = img.astype(np.float32) img[:, :, 0] -= 103.939 img[:, :, 1] -= 116.779 img[:, :, 2] -= 123.68 image = img[:, :, ::-1] return image def set_input_tensor(self, image): """Sets the input tensor.""" tensor_index = self.interpreter.get_input_details()[0]['index'] input_tensor = self.interpreter.tensor(tensor_index)()[0] input_tensor[:, :] = image def get_output_tensor(self, index): """Returns the output tensor at the given index.""" output_details = self.interpreter.get_output_details()[index] tensor = np.squeeze( self.interpreter.get_tensor(output_details['index'])) return tensor def get_legends(self, class_names): colors = self.class_colors n_classes = len(class_names) legend = np.zeros( ((len(class_names) * 25) + 25, 125, 3), dtype="uint8") + 255 for (i, (class_name, color)) in enumerate(zip(class_names, colors)): color = [int(c) for c in color] cv2.putText(legend, class_name, (5, (i * 25) + 17), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1) cv2.rectangle(legend, (100, (i * 25)), (125, (i * 25) + 25), tuple(color), -1) return legend def overlay_seg_image(self, inp_img, seg_img): orininal_h = inp_img.shape[0] orininal_w = inp_img.shape[1] seg_img = cv2.resize(seg_img, (orininal_w, orininal_h)) fused_img = (inp_img / 2 + seg_img / 2).astype('uint8') return fused_img def concat_lenends(self, seg_img, legend_img): new_h = np.maximum(seg_img.shape[0], legend_img.shape[0]) new_w = seg_img.shape[1] + legend_img.shape[1] out_img = np.zeros( (new_h, new_w, 3)).astype('uint8') + legend_img[0, 0, 0] out_img[:legend_img.shape[0], :legend_img.shape[1]] = np.copy( legend_img) out_img[:seg_img.shape[0], legend_img.shape[1]:] = np.copy(seg_img) return out_img def segment_objects(self, image): img = cv2.resize(image, (self.input_height, self.input_width)) img = self.preprocess(img) """Returns a list of detection results, each a dictionary of object info.""" self.set_input_tensor(image) self.interpreter.invoke() # Get all output details seg_arr = self.get_output_tensor(0) return seg_arr def segment(self, original_image): self.output_height, self.output_width = original_image.shape[0:2] start_time = time.time() image = cv2.resize(original_image, (self.input_width, self.input_height)) #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) results = self.segment_objects(image) elapsed_ms = (time.time() - start_time) * 1000 fps = 1 / elapsed_ms * 1000 print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}". format(fps, elapsed_ms)) seg_arr = results.argmax(axis=2) output_height = results.shape[0] output_width = results.shape[1] seg_img = np.zeros((output_height, output_width, 3)) for c in range(20): seg_img[:, :, 0] += ((seg_arr[:, :] == c) * (self.class_colors[c][0])).astype('uint8') seg_img[:, :, 1] += ((seg_arr[:, :] == c) * (self.class_colors[c][1])).astype('uint8') seg_img[:, :, 2] += ((seg_arr[:, :] == c) * (self.class_colors[c][2])).astype('uint8') seg_img = cv2.resize(seg_img, (self.output_width, self.output_height)) if self.overlay == True: seg_img = self.overlay_seg_image(original_image, seg_img) seg_img = self.concat_lenends(seg_img, self.legend_img) return cv2.imencode('.jpg', seg_img)[1].tobytes()
class PoseEngine: """Engine used for pose tasks.""" def __init__(self, model_path, mirror=False): """Creates a PoseEngine with given model. Args: model_path: String, path to TF-Lite Flatbuffer file. mirror: Flip keypoints horizontally Raises: ValueError: An error occurred when model output is invalid. """ self._mirror = mirror edgetpu_delegate = load_delegate(EDGETPU_SHARED_LIB) posenet_decoder_delegate = load_delegate(POSENET_SHARED_LIB) self._interpreter = Interpreter( model_path, experimental_delegates=[edgetpu_delegate, posenet_decoder_delegate]) self._interpreter.allocate_tensors() self._input_tensor_shape = self._interpreter.get_input_details()[0]['shape'] self._input_details = self._interpreter.get_input_details() if (self._input_tensor_shape.size != 4 or self._input_tensor_shape[3] != 3 or self._input_tensor_shape[0] != 1): raise ValueError( ('Image model should have input shape [1, height, width, 3]!' ' This model has {}.'.format(self._input_tensor_shape))) _, self.image_height, self.image_width, self.image_depth = self._input_tensor_shape # Auto-detect stride size def calcStride(h,w,L): return int((2*h*w)/(math.sqrt(h**2 + 4*h*L*w - 2*h*w + w**2) - h - w)) details = self._interpreter.get_output_details()[4] self.heatmap_zero_point = details['quantization_parameters']['zero_points'][0] self.heatmap_scale = details['quantization_parameters']['scales'][0] heatmap_size = self._interpreter.tensor(details['index'])().nbytes self.stride = calcStride(self.image_height, self.image_width, heatmap_size) self.heatmap_size = (self.image_width // self.stride + 1, self.image_height // self.stride + 1) details = self._interpreter.get_output_details()[5] self.parts_zero_point = details['quantization_parameters']['zero_points'][0] self.parts_scale = details['quantization_parameters']['scales'][0] print("Heatmap size: ", self.heatmap_size) print("Stride: ", self.stride, self.heatmap_size) def DetectPosesInImage(self, img): """Detects poses in a given image. For ideal results make sure the image fed to this function is close to the expected input size - it is the caller's responsibility to resize the image accordingly. Args: img: numpy array containing image """ # Extend or crop the input to match the input shape of the network. if img.shape[0] < self.image_height or img.shape[1] < self.image_width: pads = [[0, max(0, self.image_height - img.shape[0])], [0, max(0, self.image_width - img.shape[1])], [0, 0]] img = np.pad(img, pads, mode='constant') img = img[0:self.image_height, 0:self.image_width] assert (img.shape == tuple(self._input_tensor_shape[1:])) # Run the inference (API expects the data to be flattened) inference_time, outputs = self.run_inference(img) poses = self._parse_poses(outputs) heatmap, bodyparts = self._parse_heatmaps(outputs) return inference_time, poses, heatmap, bodyparts def ParseOutputs(self, outputs): poses = self._parse_poses(outputs) heatmap, bodyparts = self._parse_heatmaps(outputs) return poses, heatmap, bodyparts def _parse_poses(self, outputs): keypoints = outputs[0].reshape(-1, len(KEYPOINTS), 2) keypoint_scores = outputs[1].reshape(-1, len(KEYPOINTS)) pose_scores = outputs[2].flatten() nposes = int(outputs[3][0]) # Convert the poses to a friendlier format of keypoints with associated # scores. poses = [] for pose_i in range(nposes): keypoint_dict = {} for point_i, point in enumerate(keypoints[pose_i]): keypoint = Keypoint(KEYPOINTS[point_i], point, keypoint_scores[pose_i, point_i]) if self._mirror: keypoint.yx[1] = self.image_width - keypoint.yx[1] keypoint_dict[KEYPOINTS[point_i]] = keypoint poses.append(Pose(keypoint_dict, pose_scores[pose_i])) return poses def softmax(self, y, axis): y = y - np.expand_dims(np.max(y, axis = axis), axis) y = np.exp(y) return y / np.expand_dims(np.sum(y, axis = axis), axis) def _parse_heatmaps(self, outputs): # Heatmaps are really float32. heatmap = (outputs[4].astype(np.float32) - self.heatmap_zero_point) * self.heatmap_scale heatmap = np.reshape(heatmap, [self.heatmap_size[1], self.heatmap_size[0]]) part_heatmap = (outputs[5].astype(np.float32) - self.parts_zero_point) * self.parts_scale part_heatmap = np.reshape(part_heatmap, [self.heatmap_size[1], self.heatmap_size[0], -1]) part_heatmap = self.softmax(part_heatmap, axis=2) return heatmap, part_heatmap def run_inference(self, input): start_time = time.monotonic() self._interpreter.set_tensor(self._input_details[0]['index'], np.expand_dims(input, axis=0)) self._interpreter.invoke() duration_ms = (time.monotonic() - start_time) * 1000 output = [] for details in self._interpreter.get_output_details(): tensor = self._interpreter.get_tensor(details['index']) output.append(tensor) return (duration_ms, output)
class PoseEngine: """Engine used for pose tasks.""" def __init__(self, model_path, mirror=False, offsetRefineStep=2, scoreThreshold=0.8, maxPoseDetections=5, nmsRadius=30, minPoseConfidence=0.15): """Creates a PoseEngine with given model. Args: model_path: String, path to TF-Lite Flatbuffer file. mirror: Flip keypoints horizontally Raises: ValueError: An error occurred when model output is invalid. """ self.interpreter = Interpreter(model_path) self.interpreter.allocate_tensors() self._mirror = mirror self._input_tensor_shape = self.get_input_tensor_shape() if (self._input_tensor_shape.size != 4 or self._input_tensor_shape[3] != 3 or self._input_tensor_shape[0] != 1): raise ValueError( ('Image model should have input shape [1, height, width, 3]!' ' This model has {}.'.format(self._input_tensor_shape))) _, self.image_height, self.image_width, self.image_depth = self.get_input_tensor_shape( ) self.heatmaps_nx = self.interpreter.get_output_details()[0]['shape'][2] self.heatmaps_ny = self.interpreter.get_output_details()[0]['shape'][1] self.heatmaps_stride_x = self.getStride(self.image_width, self.heatmaps_nx) self.heatmaps_stride_y = self.getStride(self.image_height, self.heatmaps_ny) self.quant_heatmaps_r, self.quant_heatmaps_off = self.interpreter.get_output_details( )[0]['quantization'] self.quant_offsets_short_r, self.quant_offsets_short_off = self.interpreter.get_output_details( )[1]['quantization'] self.quant_offsets_mid_r, self.quant_offsets_mid_off = self.interpreter.get_output_details( )[2]['quantization'] self.offsetRefineStep = offsetRefineStep self.scoreThreshold = scoreThreshold self.maxPoseDetections = maxPoseDetections self.nmsRadius = nmsRadius self.sqRadius = self.nmsRadius * self.nmsRadius self.minPoseConfidence = minPoseConfidence # The API returns all the output tensors flattened and concatenated. We # have to figure out the boundaries from the tensor shapes & sizes. offset = 0 self._output_offsets = [0] for size in self.get_all_output_tensors_sizes(): offset += size self._output_offsets.append(offset) def getStride(self, l, n): strides = (8, 16, 32) return strides[np.argmin(np.abs(strides - l / n))] def get_input_tensor_shape(self): return self.interpreter.get_input_details()[0]['shape'] def get_all_output_tensors_sizes(self): sizes = np.array([], dtype='int32') for d in self.interpreter.get_output_details(): s = np.squeeze(self.interpreter.get_tensor( d['index'])).flatten().size sizes = np.append(sizes, int(s)) return sizes def DetectPosesInImage(self, img): """Detects poses in a given image. For ideal results make sure the image fed to this function is close to the expected input size - it is the caller's responsibility to resize the image accordingly. Args: img: numpy array containing image """ # Extend or crop the input to match the input shape of the network. if img.shape[0] < self.image_height or img.shape[1] < self.image_width: img = np.pad( img, [[0, max(0, self.image_height - img.shape[0])], [0, max(0, self.image_width - img.shape[1])], [0, 0]], mode='constant') img = img[0:self.image_height, 0:self.image_width] assert (img.shape == tuple(self._input_tensor_shape[1:])) # Run the inference (API expects the data to be flattened) return self.ParseOutput(self.run_inference(img)) def run_inference(self, img): if img.shape[0] < self.image_height or img.shape[1] < self.image_width: img = np.pad( img, [[0, max(0, self.image_height - img.shape[0])], [0, max(0, self.image_width - img.shape[1])], [0, 0]], mode='constant') img = img[0:self.image_height, 0:self.image_width] assert (img.shape == tuple(self._input_tensor_shape[1:])) tensor_index = self.interpreter.get_input_details()[0]['index'] input_tensor = self.interpreter.tensor(tensor_index) input_tensor()[:, :, :, :] = img start_time = time.monotonic() self.interpreter.invoke() elapsed_ms = (time.monotonic() - start_time) * 1000 out = np.empty(0) for d in self.interpreter.get_output_details(): o = np.squeeze(self.interpreter.get_tensor(d['index'])).flatten() out = np.append(out, o) return (elapsed_ms, out) def logistic(self, x): return 1 / (1 + np.exp(-x)) def isPeak(self, heatmaps_flat, index): maxindex = index // len(KEYPOINTS) maxkeypoint = index % len(KEYPOINTS) y_index = maxindex // self.heatmaps_nx x_index = maxindex % self.heatmaps_nx y_index_min = np.max((y_index - 1, 0)) y_index_max = np.min((y_index + 1, self.heatmaps_ny - 1)) x_index_min = np.max((x_index - 1, 0)) x_index_max = np.min((x_index + 1, self.heatmaps_nx - 1)) for y_current in range(y_index_min, y_index_max + 1): for x_current in range(x_index_min, x_index_max + 1): index_current = len(KEYPOINTS) * ( y_current * self.heatmaps_nx + x_current) + maxkeypoint if (heatmaps_flat[index_current] > heatmaps_flat[index]) and (index_current != index): return False return True def ParseOutput(self, output): inference_time, output = output outputs = [ output[i:j] for i, j in zip(self._output_offsets, self._output_offsets[1:]) ] heatmaps = outputs[0].reshape(-1, len(KEYPOINTS)) offsets_short_y = outputs[1].reshape( -1, 2 * len(KEYPOINTS))[:, 0:len(KEYPOINTS)] offsets_short_x = outputs[1].reshape( -1, 2 * len(KEYPOINTS))[:, len(KEYPOINTS):2 * len(KEYPOINTS)] offsets_mid_fwd_y = outputs[2].reshape( -1, 4 * len(poseChain))[:, 0:len(poseChain)] offsets_mid_fwd_x = outputs[2].reshape( -1, 4 * len(poseChain))[:, len(poseChain):2 * len(poseChain)] offsets_mid_bwd_y = outputs[2].reshape( -1, 4 * len(poseChain))[:, 2 * len(poseChain):3 * len(poseChain)] offsets_mid_bwd_x = outputs[2].reshape( -1, 4 * len(poseChain))[:, 3 * len(poseChain):4 * len(poseChain)] heatmaps = self.logistic( (heatmaps - self.quant_heatmaps_off) * self.quant_heatmaps_r) heatmaps_flat = heatmaps.flatten() offsets_short_y = (offsets_short_y - self.quant_offsets_short_off ) * self.quant_offsets_short_r offsets_short_x = (offsets_short_x - self.quant_offsets_short_off ) * self.quant_offsets_short_r offsets_mid_fwd_y = (offsets_mid_fwd_y - self.quant_offsets_mid_off ) * self.quant_offsets_mid_r offsets_mid_fwd_x = (offsets_mid_fwd_x - self.quant_offsets_mid_off ) * self.quant_offsets_mid_r offsets_mid_bwd_y = (offsets_mid_bwd_y - self.quant_offsets_mid_off ) * self.quant_offsets_mid_r offsets_mid_bwd_x = (offsets_mid_bwd_x - self.quant_offsets_mid_off ) * self.quant_offsets_mid_r # Obtaining the peaks of heatmaps larger than scoreThreshold orderedindices = np.argsort(heatmaps_flat)[::-1] largeheatmaps_indices = np.empty(0, dtype='int32') for i in range(len(orderedindices)): if heatmaps_flat[orderedindices[i]] < self.scoreThreshold: break if self.isPeak(heatmaps_flat, orderedindices[i]): largeheatmaps_indices = np.append(largeheatmaps_indices, orderedindices[i]) pose_list = np.full(self.maxPoseDetections * 2 * len(KEYPOINTS), 0.0, dtype='float32').reshape(-1, len(KEYPOINTS), 2) maxindex_list = np.full(self.maxPoseDetections * len(KEYPOINTS), -1, dtype='int32').reshape(-1, len(KEYPOINTS)) score_list = np.full(self.maxPoseDetections * len(KEYPOINTS), 0.0, dtype='float32').reshape(-1, len(KEYPOINTS)) pose_score_list = np.full(self.maxPoseDetections, 0.0, dtype='float32') nPoses = 0 # obtaining at most maxPoseDetections poses for point in range(len(largeheatmaps_indices)): if nPoses >= self.maxPoseDetections: break # obtain a root canidate maxindex = largeheatmaps_indices[point] // len(KEYPOINTS) maxkeypoint = largeheatmaps_indices[point] % len(KEYPOINTS) y = self.heatmaps_stride_y * (maxindex // self.heatmaps_nx) x = self.heatmaps_stride_x * (maxindex % self.heatmaps_nx) y += offsets_short_y[maxindex, maxkeypoint] x += offsets_short_x[maxindex, maxkeypoint] # skip keypoint with (x, y) that is close to the existing keypoints skip = 0 for p in range(nPoses): y_exist = pose_list[p, maxkeypoint, 0] x_exist = pose_list[p, maxkeypoint, 1] if (y_exist - y) * (y_exist - y) + (x_exist - x) * ( x_exist - x) < self.sqRadius: skip = 1 break if skip == 1: continue # setting the maxkeypoint as root pose_list[nPoses, maxkeypoint, 0] = y pose_list[nPoses, maxkeypoint, 1] = x maxindex_list[nPoses, maxkeypoint] = maxindex score_list[nPoses, maxkeypoint] = heatmaps[maxindex, maxkeypoint] # backward decoding for edge in reversed(range(len(poseChain))): sourceKeypointId = parentToChildEdges[edge] targetKeypointId = childToParentEdges[edge] if maxindex_list[nPoses, sourceKeypointId] != -1 and maxindex_list[ nPoses, targetKeypointId] == -1: maxindex = maxindex_list[nPoses, sourceKeypointId] y = pose_list[nPoses, sourceKeypointId, 0] x = pose_list[nPoses, sourceKeypointId, 1] y += offsets_mid_bwd_y[maxindex, edge] x += offsets_mid_bwd_x[maxindex, edge] y_index = np.clip(round(y / self.heatmaps_stride_y), 0, self.heatmaps_ny - 1) x_index = np.clip(round(x / self.heatmaps_stride_x), 0, self.heatmaps_nx - 1) maxindex_list[ nPoses, targetKeypointId] = self.heatmaps_nx * y_index + x_index for i in range(self.offsetRefineStep): y_index = np.clip(round(y / self.heatmaps_stride_y), 0, self.heatmaps_ny - 1) x_index = np.clip(round(x / self.heatmaps_stride_x), 0, self.heatmaps_nx - 1) maxindex_list[ nPoses, targetKeypointId] = self.heatmaps_nx * y_index + x_index y = self.heatmaps_stride_y * y_index x = self.heatmaps_stride_x * x_index y += offsets_short_y[maxindex_list[nPoses, targetKeypointId], targetKeypointId] x += offsets_short_x[maxindex_list[nPoses, targetKeypointId], targetKeypointId] pose_list[nPoses, targetKeypointId, 0] = y pose_list[nPoses, targetKeypointId, 1] = x score_list[nPoses, targetKeypointId] = heatmaps[ maxindex_list[nPoses, targetKeypointId], targetKeypointId] # forward decoding for edge in range(len(poseChain)): sourceKeypointId = childToParentEdges[edge] targetKeypointId = parentToChildEdges[edge] if maxindex_list[nPoses, sourceKeypointId] != -1 and maxindex_list[ nPoses, targetKeypointId] == -1: maxindex = maxindex_list[nPoses, sourceKeypointId] y = pose_list[nPoses, sourceKeypointId, 0] x = pose_list[nPoses, sourceKeypointId, 1] y += offsets_mid_fwd_y[maxindex, edge] x += offsets_mid_fwd_x[maxindex, edge] y_index = np.clip(round(y / self.heatmaps_stride_y), 0, self.heatmaps_ny - 1) x_index = np.clip(round(x / self.heatmaps_stride_x), 0, self.heatmaps_nx - 1) maxindex_list[ nPoses, targetKeypointId] = self.heatmaps_nx * y_index + x_index for i in range(self.offsetRefineStep): y_index = np.clip(round(y / self.heatmaps_stride_y), 0, self.heatmaps_ny - 1) x_index = np.clip(round(x / self.heatmaps_stride_x), 0, self.heatmaps_nx - 1) maxindex_list[ nPoses, targetKeypointId] = self.heatmaps_nx * y_index + x_index y = self.heatmaps_stride_y * y_index x = self.heatmaps_stride_x * x_index y += offsets_short_y[maxindex_list[nPoses, targetKeypointId], targetKeypointId] x += offsets_short_x[maxindex_list[nPoses, targetKeypointId], targetKeypointId] pose_list[nPoses, targetKeypointId, 0] = y pose_list[nPoses, targetKeypointId, 1] = x score_list[nPoses, targetKeypointId] = heatmaps[ maxindex_list[nPoses, targetKeypointId], targetKeypointId] # calclate pose score score = 0 for k in range(len(KEYPOINTS)): y = pose_list[nPoses, k, 0] x = pose_list[nPoses, k, 1] closekeypoint_exists = False for p in range(nPoses): y_exist = pose_list[p, k, 0] x_exist = pose_list[p, k, 1] if (y_exist - y) * (y_exist - y) + (x_exist - x) * ( x_exist - x) < self.sqRadius: closekeypoint_exists = True break if not closekeypoint_exists: score += score_list[nPoses, k] score /= len(KEYPOINTS) if score > self.minPoseConfidence: pose_score_list[nPoses] = score nPoses += 1 else: for k in range(len(KEYPOINTS)): maxindex_list[nPoses, k] = -1 # Convert the poses to a friendlier format of keypoints with associated # scores. poses = [] for pose_i in range(nPoses): keypoint_dict = {} for point_i, point in enumerate(pose_list[pose_i]): keypoint = Keypoint(KEYPOINTS[point_i], point, score_list[pose_i, point_i]) if self._mirror: keypoint.yx[1] = self.image_width - keypoint.yx[1] keypoint_dict[KEYPOINTS[point_i]] = keypoint poses.append(Pose(keypoint_dict, pose_score_list[pose_i])) return poses, inference_time
class PoseEngine(): """Engine used for pose tasks.""" def __init__(self, model_path, mirror=False): """Creates a PoseEngine with given model. Args: model_path: String, path to TF-Lite Flatbuffer file. mirror: Flip keypoints horizontally. Raises: ValueError: An error occurred when model output is invalid. """ edgetpu_delegate = load_delegate(EDGETPU_SHARED_LIB) posenet_decoder_delegate = load_delegate(POSENET_SHARED_LIB) self._interpreter = Interpreter(model_path, experimental_delegates=[ edgetpu_delegate, posenet_decoder_delegate ]) self._interpreter.allocate_tensors() self._mirror = mirror self._input_tensor_shape = self.get_input_tensor_shape() if (self._input_tensor_shape.size != 4 or self._input_tensor_shape[3] != 3 or self._input_tensor_shape[0] != 1): raise ValueError( ('Image model should have input shape [1, height, width, 3]!' ' This model has {}.'.format(self._input_tensor_shape))) _, self._input_height, self._input_width, self._input_depth = self.get_input_tensor_shape( ) self._input_type = self._interpreter.get_input_details()[0]['dtype'] self._inf_time = 0 def run_inference(self, input_data): """Run inference using the zero copy feature from pycoral and returns inference time in ms. """ start = time.monotonic() edgetpu.run_inference(self._interpreter, input_data) self._inf_time = time.monotonic() - start return (self._inf_time * 1000) def DetectPosesInImage(self, img): """Detects poses in a given image. For ideal results make sure the image fed to this function is close to the expected input size - it is the caller's responsibility to resize the image accordingly. Args: img: numpy array containing image """ input_details = self._interpreter.get_input_details() image_width, image_height = img.size resized_image = img.resize((self._input_width, self._input_height), Image.NEAREST) input_data = np.expand_dims(resized_image, axis=0) if self._input_type is np.float32: # Floating point versions of posenet take image data in [-1,1] range. input_data = np.float32(resized_image) / 128.0 - 1.0 else: # Assuming to be uint8 input_data = np.asarray(resized_image) self.run_inference(input_data.flatten()) return self.ParseOutput() def get_input_tensor_shape(self): """Returns input tensor shape.""" return self._interpreter.get_input_details()[0]['shape'] def get_output_tensor(self, idx): """Returns output tensor view.""" return np.squeeze( self._interpreter.tensor( self._interpreter.get_output_details()[idx]['index'])()) def ParseOutput(self): """Parses interpreter output tensors and returns decoded poses.""" keypoints = self.get_output_tensor(0) keypoint_scores = self.get_output_tensor(1) pose_scores = self.get_output_tensor(2) num_poses = self.get_output_tensor(3) poses = [] for i in range(int(num_poses)): pose_score = pose_scores[i] pose_keypoints = {} for j, point in enumerate(keypoints[i]): y, x = point if self._mirror: y = self._input_width - y pose_keypoints[KeypointType(j)] = Keypoint( Point(x, y), keypoint_scores[i, j]) poses.append(Pose(pose_keypoints, pose_score)) return poses, self._inf_time
class Detector(object): def __init__(self, label_file, model_file, threshold): self._threshold = float(threshold) self.labels = self.load_labels(label_file) self.interpreter = Interpreter(model_file) self.interpreter.allocate_tensors() _, self.input_height, self.input_width, _ = self.interpreter.get_input_details( )[0]['shape'] def load_labels(self, path): with open(path, 'r') as f: return { i: line.strip() for i, line in enumerate(f.read().replace('"', '').split(',')) } def set_input_tensor(self, image): """Sets the input tensor.""" tensor_index = self.interpreter.get_input_details()[0]['index'] input_tensor = self.interpreter.tensor(tensor_index)()[0] input_tensor[:, :] = image def get_output_tensor(self, index): """Returns the output tensor at the given index.""" output_details = self.interpreter.get_output_details()[index] tensor = np.squeeze( self.interpreter.get_tensor(output_details['index'])) return tensor def detect_objects(self, image): """Returns a list of detection results, each a dictionary of object info.""" self.set_input_tensor(image) self.interpreter.invoke() # Get all output details boxes = self.get_output_tensor(0) return boxes def detect(self, original_image): self.output_width, self.output_height = original_image.shape[0:2] start_time = time.time() image = cv2.resize(original_image, (self.input_width, self.input_height)) #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) results = self.detect_objects(image) elapsed_ms = (time.time() - start_time) * 1000 fps = 1 / elapsed_ms * 1000 print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}". format(fps, elapsed_ms)) def _to_original_scale(boxes): minmax_boxes = to_minmax(boxes) minmax_boxes[:, 0] *= self.output_width minmax_boxes[:, 2] *= self.output_width minmax_boxes[:, 1] *= self.output_height minmax_boxes[:, 3] *= self.output_height return minmax_boxes.astype(np.int) boxes, probs = self.run(results) print(boxes) if len(boxes) > 0: boxes = _to_original_scale(boxes) original_image = draw_boxes(original_image, boxes, probs, self.labels) return cv2.imencode('.jpg', original_image)[1].tobytes() def run(self, netout): anchors = [ 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 ] nms_threshold = 0.2 """Convert Yolo network output to bounding box # Args netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes) YOLO neural network output array # Returns boxes : array, shape of (N, 4) coordinate scale is normalized [0, 1] probs : array, shape of (N, nb_classes) """ grid_h, grid_w, nb_box = netout.shape[:3] boxes = [] # decode the output by the network netout[..., 4] = _sigmoid(netout[..., 4]) netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax( netout[..., 5:]) netout[..., 5:] *= netout[..., 5:] > self._threshold for row in range(grid_h): for col in range(grid_w): for b in range(nb_box): # from 4th element onwards are confidence and class classes classes = netout[row, col, b, 5:] if np.sum(classes) > 0: # first 4 elements are x, y, w, and h x, y, w, h = netout[row, col, b, :4] x = (col + _sigmoid(x) ) / grid_w # center position, unit: image width y = (row + _sigmoid(y) ) / grid_h # center position, unit: image height w = anchors[2 * b + 0] * np.exp( w) / grid_w # unit: image width h = anchors[2 * b + 1] * np.exp( h) / grid_h # unit: image height confidence = netout[row, col, b, 4] box = BoundBox(x, y, w, h, confidence, classes) boxes.append(box) boxes = nms_boxes(boxes, len(classes), nms_threshold, self._threshold) boxes, probs = boxes_to_array(boxes) return boxes, probs
class detector(): def __init__(self, modelPath = 'objectdetect.tflite', \ classLabelPath = 'objectlabelmap.txt', \ threshold = 0.7): self.modelPath = modelPath self.classLabelPath = classLabelPath self.inputImg = None self.threshold = threshold self.labels = {} self.load_labels() self.interpreter = Interpreter(self.modelPath) self.interpreter.allocate_tensors() def load_labels(self): with open(self.classLabelPath, 'r', encoding='utf-8') as f: lines = f.readlines() for row_number, content in enumerate(lines): pair = re.split(r'[:\s]+', content.strip(), maxsplit=1) if len(pair) == 2 and pair[0].strip().isdigit(): self.labels[int(pair[0])] = pair[1].strip() else: self.labels[row_number] = pair[0].strip() def setTensors(self, Img): """ Input argument Img MUST BE RESIZED before passing into function.""" self.inputImg = Img """Set input tensor as Img""" tensor_index = self.interpreter.get_input_details()[0]['index'] input_tensor = self.interpreter.tensor(tensor_index)()[0] input_tensor[:, :] = self.inputImg def get_output_tensor(self, index): """Get output from tensor given an index""" output_details = self.interpreter.get_output_details()[index] tensor = np.squeeze( self.interpreter.get_tensor(output_details['index'])) return tensor def detect_objects(self): """Returns a list of detection results, each a dictionary of object info.""" self.interpreter.invoke() # Get all output details boxes = self.get_output_tensor(0) classes = self.get_output_tensor(1) scores = self.get_output_tensor(2) count = int(self.get_output_tensor(3)) results = [] items = (0, 1, 2, 3, 5, 6, 7, 9, 10, 11) for i in range(count): if scores[i] >= self.threshold and int(classes[i]) in items: result = {'bounding_box': boxes[i], \ 'class_id': int(classes[i]), \ 'score': scores[i] \ } results.append(result) return results
class ObjDetect: _labels = None _interpreter = None _threshold = 0.5 _input_width = 0 _input_height = 0 _jpeg_quality = 95 # OpenCV default value def __init__(self, model_file, label_file, threshold, jpeg_quality): self._threshold = threshold self._labels = self.load_labels(label_file) self._interpreter = Interpreter(model_file) self._interpreter.allocate_tensors() _, self._input_width, self._input_height, _ = self._interpreter.get_input_details( )[0]['shape'] self._jpeg_quality = jpeg_quality def Detect(self, image_bytes, detect_list): original_image = Image.open(io.BytesIO(image_bytes)).convert('RGB') image = original_image.resize((self._input_width, self._input_height), Image.ANTIALIAS) results = self.detect_objects(image) CAMERA_WIDTH, CAMERA_HEIGHT = original_image.size draw = ImageDraw.Draw(original_image) has_objects = False for obj in results: # Check class_name is in detect_list? class_name = self._labels[obj['class_id']] if class_name not in detect_list: # Skip draw bouding box continue else: # Set hasObjects to True has_objects = True # Convert the bounding box figures from relative coordinates # to absolute coordinates based on the original resolution ymin, xmin, ymax, xmax = obj['bounding_box'] xmin = int(xmin * CAMERA_WIDTH) xmax = int(xmax * CAMERA_WIDTH) ymin = int(ymin * CAMERA_HEIGHT) ymax = int(ymax * CAMERA_HEIGHT) # Overlay the box, label, and score on the camera preview score = obj['score'] * 100 draw.rectangle([xmin, ymin, xmax, ymax], outline="red") draw.text([xmin, ymax], '{}:{:.2f}%'.format(self._labels[obj['class_id']], score), fill="red") del draw if has_objects: image_bytes_array = io.BytesIO() original_image.save(image_bytes_array, format='JPEG', quality=self._jpeg_quality, subsampling=0) return image_bytes_array.getvalue() else: return None def load_labels(self, path): """Loads the labels file. Supports files with or without index numbers.""" with open(path, 'r', encoding='utf-8') as f: lines = f.readlines() labels = {} for row_number, content in enumerate(lines): pair = re.split(r'[:\s]+', content.strip(), maxsplit=1) if len(pair) == 2 and pair[0].strip().isdigit(): labels[int(pair[0])] = pair[1].strip() else: labels[row_number] = pair[0].strip() return labels def set_input_tensor(self, image): """Sets the input tensor.""" tensor_index = self._interpreter.get_input_details()[0]['index'] input_tensor = self._interpreter.tensor(tensor_index)()[0] input_tensor[:, :] = image def get_output_tensor(self, index): """Returns the output tensor at the given index.""" output_details = self._interpreter.get_output_details()[index] tensor = np.squeeze( self._interpreter.get_tensor(output_details['index'])) return tensor def detect_objects(self, image): """Returns a list of detection results, each a dictionary of object info.""" self.set_input_tensor(image) self._interpreter.invoke() # Get all output details boxes = self.get_output_tensor(0) classes = self.get_output_tensor(1) scores = self.get_output_tensor(2) count = int(self.get_output_tensor(3)) results = [] for i in range(count): if scores[i] >= self._threshold: result = { 'bounding_box': boxes[i], 'class_id': classes[i], 'score': scores[i] } results.append(result) return results
class Detector(object): """Detector class which acts as a wrapper for the tensor flow library API""" def __init__(self, model, path_to_label_file, threshold=0.4): self.interpreter = Interpreter(model) self.interpreter.allocate_tensors() self.labels = self.load_labels(path_to_label_file) self.threshold = threshold _, self.input_height, self.input_width, _ = self.interpreter.get_input_details( )[0]['shape'] @staticmethod def load_labels(path): """Loads the labels file. Supports files with or without index numbers.""" with open(path, 'r', encoding='utf-8') as f: lines = f.readlines() labels = {} for row_number, content in enumerate(lines): pair = re.split(r'[:\s]+', content.strip(), maxsplit=1) if len(pair) == 2 and pair[0].strip().isdigit(): labels[int(pair[0])] = pair[1].strip() else: labels[row_number] = pair[0].strip() return labels def set_input_tensor(self, image): """Sets the input tensor.""" tensor_index = self.interpreter.get_input_details()[0]['index'] input_tensor = self.interpreter.tensor(tensor_index)()[0] input_tensor[:, :] = image def get_output_tensor(self, index): """Returns the output tensor at the given index.""" output_details = self.interpreter.get_output_details()[index] tensor = np.squeeze( self.interpreter.get_tensor(output_details['index'])) return tensor def detect_objects(self, image): """Returns a list of detection results, each a dictionary of object info.""" self.set_input_tensor(image) self.interpreter.invoke() # Get all output details boxes = self.get_output_tensor(0) classes = self.get_output_tensor(1) scores = self.get_output_tensor(2) count = int(self.get_output_tensor(3)) results = [] for i in range(count): if scores[i] >= self.threshold and classes[i] == 0: result = { 'bounding_box': boxes[i], 'class_id': classes[i], 'score': scores[i] } results.append(result) return results def annotate_objects(self, annotator, results): """Draws the bounding box and label for each object in the results.""" for obj in results: # Convert the bounding box figures from relative coordinates # to absolute coordinates based on the original resolution ymin, xmin, ymax, xmax = obj['bounding_box'] xmin = int(xmin * CAMERA_WIDTH) xmax = int(xmax * CAMERA_WIDTH) ymin = int(ymin * CAMERA_HEIGHT) ymax = int(ymax * CAMERA_HEIGHT) # Overlay the box, label, and score on the camera preview annotator.bounding_box([xmin, ymin, xmax, ymax]) annotator.text([xmin, ymin], '%s\n%.2f' % (self.labels[obj['class_id']], obj['score']))
for image_path in images: image_name = image_path.split('\\')[-1] # Load image and resize to expected shape [1xHxWx3] image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image_rgb, (width, height)) input_data = np.expand_dims(image_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.tensor(interpreter.get_input_details()[0]["index"]) interpreter.set_tensor(input_details[0]["index"], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]["index"])[0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor(output_details[1]["index"])[0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]["index"])[0] # Confidence of detected objects # Total number of detected objects (inaccurate and not needed) num = interpreter.get_tensor(output_details[3]['index'])[0] # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)):
def set_input_tensor(interpreter: tflite.Interpreter, frame: Image) -> None: """Sets the input tensor of the model to the current frame""" tensor_index = interpreter.get_input_details()[0]['index'] input_tensor = interpreter.tensor(tensor_index)()[0] input_tensor[:, :] = frame
class ImageCapture(Thread): def __init__(self): super().__init__() self.logger = logging.getLogger(__name__) self.logger.debug('Init image capture') self.WIDTH=640 self.HEIGHT=480 # Initialize the camera self.camera = PiCamera() # Set the camera resolution self.camera.resolution = (self.WIDTH, self.HEIGHT) # Set the number of frames per second self.camera.framerate = 32 # Generates a 3D RGB array and stores it in rawCapture self.raw_capture = PiRGBArray(self.camera, size=(self.WIDTH, self.HEIGHT)) # Wait a certain number of seconds to allow the camera time to warmup time.sleep(0.1) # load COCO labels self.labels = {} self.load_labels("./models/coco_labels.txt") # init the tf interpreter self.interpreter = Interpreter("./models/detect.tflite") self.interpreter.allocate_tensors() _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape'] # current image self.image=None # loop bool self.running=True def run(self): self.logger.debug('starting capture thread') while self.running: # proces the next camera frame self.nextFrame() time.sleep(0.05) def nextFrame(self): self.logger.debug('Capturing next frame') # Capture frames continuously from the camera self.camera.capture(self.raw_capture, format="bgr", use_video_port=True) # analyse the raw image to detect objects self.analyse() # convert raw image to jpeg res, self.image=cv2.imencode('.JPEG', self.raw_capture.array) # Clear the stream in preparation for the next frame self.raw_capture.truncate(0) def getEncodedImage(self): self.logger.debug('Returning current jpeg image base64 encoded') if self.image is not None: return base64.b64encode(self.image.tobytes()).decode('utf-8') return None def analyse(self): self.logger.debug('analyse raw frame for common objects') # resize the image resized = cv2.resize(self.raw_capture.array, (self.input_width,self.input_height), interpolation = cv2.INTER_AREA) results = self.detect_objects(resized, 0.4) self.annotate_objects(results) def load_labels(self, path): """Loads the labels file. Supports files with or without index numbers.""" self.logger.debug('loading labels from '+path) with open(path, 'r', encoding='utf-8') as f: lines = f.readlines() for row_number, content in enumerate(lines): pair = re.split(r'[:\s]+', content.strip(), maxsplit=1) if len(pair) == 2 and pair[0].strip().isdigit(): self.labels[int(pair[0])] = pair[1].strip() else: self.labels[row_number] = pair[0].strip() def set_input_tensor(self, image): """Sets the input tensor.""" self.logger.debug('setting input tensor') tensor_index = self.interpreter.get_input_details()[0]['index'] input_tensor = self.interpreter.tensor(tensor_index)()[0] input_tensor[:, :] = image def get_output_tensor(self, index): """Returns the output tensor at the given index.""" self.logger.debug('getting output tensor') output_details = self.interpreter.get_output_details()[index] tensor = np.squeeze(self.interpreter.get_tensor(output_details['index'])) return tensor def detect_objects(self, image, threshold): """Returns a list of detection results, each a dictionary of object info.""" self.logger.debug('starting to detect objects') self.set_input_tensor(image) self.interpreter.invoke() # Get all output details boxes = self.get_output_tensor(0) classes = self.get_output_tensor(1) scores = self.get_output_tensor(2) count = int(self.get_output_tensor(3)) results = [] for i in range(count): if scores[i] >= threshold: result = { 'bounding_box': boxes[i], 'class_id': classes[i], 'score': scores[i] } results.append(result) return results def annotate_objects(self, results): """Draws the bounding box and label for each object in the results.""" self.logger.debug('annotate objects') for obj in results: # Convert the bounding box figures from relative coordinates # to absolute coordinates based on the original resolution ymin, xmin, ymax, xmax = obj['bounding_box'] xmin = int(xmin * self.WIDTH) xmax = int(xmax * self.WIDTH) ymin = int(ymin * self.HEIGHT) ymax = int(ymax * self.HEIGHT) cv2.rectangle(self.raw_capture.array, (xmin,ymin), (xmax, ymax), (0,255,0), 2) cv2.putText(self.raw_capture.array, self.labels[obj['class_id']], (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)