class Classifier: def __init__(self, using_model: str, label_file: str): # Prepare labels. self.labels = dataset_utils.read_label_file(label_file) # Initialize engine. self.engine = ClassificationEngine(using_model) def classify(self, image: Image, top_k=3): return self.engine.classify_with_image(image, top_k=top_k)
def main(): args = _ParseArgs() print('--------------- Parsing data set -----------------') print('Dataset path:', args.data) train_set, test_set = _ReadData(args.data, args.test_ratio) print('Image list successfully parsed! Category Num = ', len(train_set)) shape = _GetRequiredShape(args.model_path) print('---------------- Processing training data ----------------') print('This process may take more than 30 seconds.') train_input = [] labels_map = _ReadLabel(args.label) class_id = len(labels_map) for (category, image_list) in (train_set.items()): print('Processing category:', category) train_input.append( _PrepareImages( image_list, os.path.join(args.data, category), shape) ) labels_map[class_id] = category class_id += 1 print('---------------- Start training -----------------') engine = ImprintingEngine(args.model_path, keep_classes=args.keep_classes) engine.TrainAll(train_input) print('---------------- Training finished! -----------------') engine.SaveModel(args.output) print('Model saved as : ', args.output) _SaveLabels(labels_map, args.output) print('------------------ Start evaluating ------------------') engine = ClassificationEngine(args.output) top_k = 12 correct = [0] * top_k wrong = [0] * top_k for category, image_list in test_set.items(): print('Evaluating category [', category, ']') for img_name in image_list: img = Image.open(os.path.join(args.data, category, img_name)) candidates = engine.ClassifyWithImage(img, threshold=0.01, top_k=top_k) recognized = False for i in range(top_k): if i < len(candidates) and labels_map[candidates[i][0]] == category: recognized = True if recognized: correct[i] = correct[i] + 1 else: wrong[i] = wrong[i] + 1 print('---------------- Evaluation result -----------------') for i in range(top_k): print('Top {} : {:.0%}'.format(i+1, correct[i] / (correct[i] + wrong[i])))
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='File path of Tflite model.', required=True) parser.add_argument('--label', help='File path of label file.', required=True) parser.add_argument( '--image', help='File path of the image to be recognized.', required=True) args = parser.parse_args() # Prepare labels. labels = dataset_utils.ReadLabelFile(args.label) # Initialize engine. engine = ClassificationEngine(args.model) # Run inference. img = Image.open(args.image) for result in engine.ClassifyWithImage(img, top_k=3): print('---------------------------') print(labels[result[0]]) print('Score : ', result[1])
def main(): args = _parse_args() print('--------------- Parsing data set -----------------') print('Dataset path:', args.data) train_set, test_set = _read_data(args.data, args.test_ratio) print('Image list successfully parsed! Category Num = ', len(train_set)) shape = _get_required_shape(args.model_path) print('---------------- Processing training data ----------------') print('This process may take more than 30 seconds.') train_input = [] labels_map = {} for class_id, (category, image_list) in enumerate(train_set.items()): print('Processing category:', category) train_input.append( _prepare_images(image_list, os.path.join(args.data, category), shape)) labels_map[class_id] = category print('---------------- Start training -----------------') engine = ImprintingEngine(args.model_path) engine.train_all(train_input) print('---------------- Training finished! -----------------') engine.save_model(args.output) print('Model saved as : ', args.output) _save_labels(labels_map, args.output) print('------------------ Start evaluating ------------------') engine = ClassificationEngine(args.output) top_k = 5 correct = [0] * top_k wrong = [0] * top_k for category, image_list in test_set.items(): print('Evaluating category [', category, ']') for img_name in image_list: img = Image.open(os.path.join(args.data, category, img_name)) candidates = engine.classify_with_image(img, threshold=0.1, top_k=top_k) recognized = False for i in range(top_k): if i < len(candidates) and labels_map[candidates[i] [0]] == category: recognized = True if recognized: correct[i] = correct[i] + 1 else: wrong[i] = wrong[i] + 1 print('---------------- Evaluation result -----------------') for i in range(top_k): print('Top {} : {:.0%}'.format(i + 1, correct[i] / (correct[i] + wrong[i])))
def run(self): while self.is_running: if app_state.last_state == "shutdown": self.is_running = False os.system('kill $PPID') if (app_state.last_state == "run") and self.is_trained: detection = False img = camera.returnPIL() output = self.engine.ClassifyWithImage(img) if output[0][0] == int(self.labels["detection"]): detection = True logging.info("detection triggered") self._detected(detection) if app_state.last_state == "retrain": logging.info("imprinting weights") self.is_trained = retrain() self.labels = get_labels() if self.is_trained: self.engine = ClassificationEngine( "./models/classify.tflite") app_state.last_state = "run" logging.info("finished imprinting") else: app_state.last_state = "collect" logging.warning( "could not imprint weights. Please provide enough pictures" ) if app_state.last_state == "collect_background": camera.collect("background") app_state.last_state = "collect" if app_state.last_state == "collect_detection": camera.collect("detection") app_state.last_state = "collect"
class ClassifyEdgeTPU(ImageClassifier): def __init__(self): super().__init__() self.label_file = (config.download_directory + os.path.sep + "imagenet_labels.txt") self.model_file = (config.download_directory + os.path.sep + "mobilenet_v2_1.0_224_quant_edgetpu.tflite") def load_model(self, label_file=None, model_file=None): """Load a pretrained model""" # Prepared labels if label_file is not None: self.label_file = label_file self.labels = self.read_label_file(self.label_file) # Initialize TPU engine if model_file is not None: self.model_file = model_file from edgetpu.classification.engine import ClassificationEngine self.model = ClassificationEngine(self.model_file) def read_label_file(self, file_path): """Function to read labels from text files""" with open(file_path, 'r') as f: lines = f.readlines() ret = {} for line in lines: num = line[:4].strip() label = line[5:].strip().split(',')[0].lower() ret[int(num)] = label return ret def predict(self, image_a, top=5, score=False): pred = self.model.ClassifyWithImage(image_a) if len(pred) == 0: p_label = 'other' p_score = 0.0 else: p_n_label, p_score = pred[0] p_label = self.labels[p_n_label] if score: return p_label, p_score else: return p_label def predict_file(self, file_path, top=5): image_a = self.preprocess(file_path) p_label = self.predict(image_a, top=top) return p_label
def main(): """Creates camera pipeline, and pushes pipeline through ClassificationEngine model. Logs results to user-defined storage. Runs either in training mode to gather images for custom model creation or in deterrent mode that sounds an 'alarm' if a defined label is detected.""" args = user_selections() print("Loading %s with %s labels." % (args.model, args.labels)) engine = ClassificationEngine(args.model) labels = load_labels(args.labels) storage_dir = args.storage #Initialize logging file logging.basicConfig(filename='%s/results.log' % storage_dir, format='%(asctime)s-%(message)s', level=logging.DEBUG) last_time = time.monotonic() last_results = [('label', 0)] def user_callback(image, svg_canvas): nonlocal last_time nonlocal last_results start_time = time.monotonic() results = engine.classify_with_image(image, threshold=args.threshold, top_k=args.top_k) end_time = time.monotonic() results = [(labels[i], score) for i, score in results] if args.print: print_results(start_time, last_time, end_time, results) if args.training: if do_training(results, last_results, args.top_k): save_data(image, results, storage_dir) else: #Custom model mode: #The labels can be modified to detect/deter user-selected items if results[0][0] != 'background': save_data(image, storage_dir, results) if 'fox squirrel, eastern fox squirrel, Sciurus niger' in results: if args.sound is not None: playsound(args.sound) logging.info('Deterrent sounded') last_results = results last_time = end_time # Note: we don't use the framerate paramter because our USB cam doesn't like it result = gstreamer.run_pipeline(user_callback, device='/dev/video1', src_caps='video/x-raw,format=YUY2')
def _run_benchmark_for_model(model_name, image): """Benchmarks model with given image. Args: model_name: string, file name of the model. image: string, name of the image used for test. Returns: float, average inference time. """ print('Benchmark for [', model_name, '] on ', image) engine = ClassificationEngine(test_utils.test_data_path(model_name)) iterations = 200 if ('edgetpu' in model_name) else 10 with test_utils.test_image(image) as img_obj: benchmark_time = timeit.timeit( lambda: engine.classify_with_image(img_obj, threshold=0.4, top_k=10), number=iterations) time_per_inference = (benchmark_time / iterations) * 1000 print(time_per_inference, 'ms (iterations = ', iterations, ')') return time_per_inference
def get_frame(): cap = cv2.VideoCapture(0) engine = ClassificationEngine(modelPath) prevTime = 0 while True: _, frame = cap.read() curTime = time.time() sec = curTime - prevTime prevTime = curTime fps = 1 / (sec) fpsText = "FPS : {:.2f}".format(fps) frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) framePIL = Image.fromarray(frameRGB) classify = engine.classify_with_image(framePIL) label = classify[0][0] if label == 0: labelText = "rock" elif label == 1: labelText = "paper" elif label == 2: labelText = "scissors" score = round(classify[0][1], 3) print(labelText, score) cv2.putText(frame, labelText + " " + str(score), (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.putText(frame, fpsText, (0, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) imgencode = cv2.imencode('.jpg', frame)[1] stringData = imgencode.tostring() yield (b'--frame\r\n' b'Content-Type: text/plain\r\n\r\n' + stringData + b'\r\n') del (cap)
def classify(path_list_, same_list_, THREAD): engine = ClassificationEngine(FaceNet_weight) pred = bool() correct = 0 for same_index, pair in enumerate(path_list_): picture1_embs = [] picture2_embs = [] for k, img in enumerate(pair): img = Image.open(img) img = np.asarray(img).flatten() result = engine.ClassifyWithInputTensor(img, top_k=200, threshold=-0.5) result.sort(key=takeSecond) if k == 1: for i in range(0, len(result)): picture1_embs.append(result[i][1]) else: for i in range(0, len(result)): picture2_embs.append(result[i][1]) picture1_embs = np.array(picture1_embs) picture2_embs = np.array(picture2_embs) diff = np.mean(np.square(picture1_embs - picture2_embs)) if diff < THREAD: pred = True else: pred = False if pred == same_list_[same_index]: correct += 1 accuracy = correct / len(path_list_) return accuracy
def read_embedding(path=Embedding_book): try: f = h5py.File(path, 'r') except OSError: face_engine = ClassificationEngine(FaceNet_weight) Create_embeddings(face_engine) f = h5py.File(path, 'r') class_arr = f['class_name'][:] class_arr = [k.decode() for k in class_arr] emb_arr = f['embeddings'][:] return class_arr, emb_arr
def _test_model(self, model_name, expected_top_1=None, expected_top_5=None): engine = ClassificationEngine(test_utils.test_data_path(model_name)) with open(test_utils.test_data_path('imagenet/val.txt'), 'r') as gt_file: gt = [line.strip().split(' ') for line in gt_file.readlines()] top_1_count = 0 top_5_count = 0 print('Running inference for model %s...' % model_name) for i in range(50000): label = int(gt[i][1]) + 1 image_name = 'imagenet/ILSVRC2012_val_%s.JPEG' % str(i + 1).zfill(8) with test_utils.test_image(image_name) as image: image = self._crop_image(image.convert('RGB')) prediction = engine.classify_with_image(image, threshold=0.0, top_k=5) if prediction[0][0] == label: top_1_count += 1 top_5_count += 1 else: for j in range(1, len(prediction)): if prediction[j][0] == label: top_5_count += 1 top_1_accuracy = top_1_count / 50000.0 top_5_accuracy = top_5_count / 50000.0 print('Top 1 accuracy: %.2f%%' % (top_1_accuracy * 100)) print('Top 5 accuracy: %.2f%%' % (top_5_accuracy * 100)) if expected_top_1 is not None: self.assertAlmostEqual(top_1_accuracy, expected_top_1, places=4) if expected_top_5 is not None: self.assertAlmostEqual(top_5_accuracy, expected_top_5, places=4)
def run_two_models_one_tpu(classification_model, detection_model, image_name, num_inferences, batch_size): """Runs two models ALTERNATIVELY using one Edge TPU. It runs classification model `batch_size` times and then switch to run detection model `batch_size` time until each model is run `num_inferences` times. Args: classification_model: string, path to classification model detection_model: string, path to detection model. image_name: string, path to input image. num_inferences: int, number of inferences to run for each model. batch_size: int, indicates how many inferences to run one model before switching to the other one. Returns: double, wall time it takes to finish the job. """ start_time = time.perf_counter() engine_a = ClassificationEngine(classification_model) # `engine_b` shares the same Edge TPU as `engine_a` engine_b = DetectionEngine(detection_model, engine_a.device_path()) with open_image(image_name) as image: # Resized image for `engine_a`, `engine_b`. tensor_a = get_input_tensor(engine_a, image) tensor_b = get_input_tensor(engine_b, image) num_iterations = (num_inferences + batch_size - 1) // batch_size for _ in range(num_iterations): # Using `classify_with_input_tensor` and `detect_with_input_tensor` on purpose to # exclude image down-scale cost. for _ in range(batch_size): engine_a.classify_with_input_tensor(tensor_a, top_k=1) for _ in range(batch_size): engine_b.detect_with_input_tensor(tensor_b, top_k=1) return time.perf_counter() - start_time
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument('--label', help='File path of label file.', required=True) args = parser.parse_args() labels = dataset_utils.read_label_file(args.label) engine = ClassificationEngine(args.model) with picamera.PiCamera() as camera: camera.resolution = (640, 480) camera.framerate = 30 _, height, width, _ = engine.get_input_tensor_shape() camera.start_preview() try: stream = io.BytesIO() for _ in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8) start_ms = time.time() results = engine.classify_with_input_tensor(input_tensor, top_k=1) elapsed_ms = time.time() - start_ms if results: camera.annotate_text = '%s %.2f\n%.2fms' % (labels[ results[0][0]], results[0][1], elapsed_ms * 1000.0) finally: camera.stop_preview()
def main(): # Load your model onto your Coral Edgetpu engine = ClassificationEngine(modelPath) labels = loadLabels(labelPath) cap = cv2.VideoCapture(0) prev_time = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break cur_time = time.time() sec = cur_time - prev_time prev_time = cur_time fps = str(round(1 / sec, 1)) + 'fps ' # Format the image into a PIL Image so its compatable with Edge TPU cv2_im = frame cv2_im_input = cv2.resize(frame, (224, 224)) pil_im = Image.fromarray(cv2_im_input) # Resize and flip image so its a square and matches training pil_im.transpose(Image.FLIP_LEFT_RIGHT) # Classify and display image results = classifyImage(pil_im, engine) class_value = results[0][0] probability = str(round(results[0][1] * 100, 1)) + '%' if class_value == 0: result = 'Wood ' elif class_value == 1: result = 'Yellow ' elif class_value == 2: result = 'Marble ' else: result = 'Carpet ' result = fps + result + probability cv2.putText(frame, result, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0)) cv2.imshow('frame', cv2_im) # print(results) if cv2.waitKey(1) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
class image_classification: MODEL_EFFICIENT_S = 'models/efficientnet-edgetpu-S_quant_edgetpu.tflite' MODEL_EFFICIENT_M = 'models/efficientnet-edgetpu-M_quant_edgetpu.tflite' MODEL_EFFICIENT_L = 'models/efficientnet-edgetpu-L_quant_edgetpu.tflite' MODEL_MOBILENET_V1 = 'models/mobilenet_v1_1.0_224_quant_edgetpu.tflite' MODEL_MOBILENET_V2 = 'models/mobilenet_v2_1.0_224_quant_edgetpu.tflite' MODEL_INCEPTION_V1 = 'models/inception_v1_224_quant_edgetpu.tflite' MODEL_INCEPTION_V2 = 'models/inception_v2_224_quant_edgetpu.tflite' MODEL_INCEPTION_V3 = 'models/inception_v3_299_quant_edgetpu.tflite' MODEL_INCEPTION_V4 = 'models/inception_v4_299_quant_edgetpu.tflite' LABELS = 'models/imagenet_labels.txt' def __init__(self, threshold=0.5, num_results=10, model=MODEL_EFFICIENT_S, labels=LABELS): self.engine = ClassificationEngine(model) self.model_labels = read_label_file(labels) self.objs = None self.scores = None self.labels = None self.threshold = threshold self.num_results = num_results def set_threshold(self, num): self.threshold = num def set_max_results(self, num): self.num_results = num def classify(self, img): img = Image.fromarray(img) self.objs = self.engine.classify_with_image(img, threshold=self.threshold, top_k=self.num_results) self.scores = [obj[1] for obj in self.objs] self.labels = [self.model_labels[obj[0]] for obj in self.objs] return self.objs def get_scores(self): return self.scores def get_labels(self): return self.labels
def main(): default_model_dir = "../all_models" default_model = 'mobilenet_v2_1.0_224_quant_edgetpu.tflite' default_labels = 'imagenet_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=os.path.join(default_model_dir, default_model)) parser.add_argument('--labels', help='label file path', default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of classes with highest score to display') parser.add_argument('--threshold', type=float, default=0.1, help='class score threshold') args = parser.parse_args() print("Loading %s with %s labels." % (args.model, args.labels)) engine = ClassificationEngine(args.model) labels = load_labels(args.labels) last_time = time.monotonic() def user_callback(image, svg_canvas): nonlocal last_time start_time = time.monotonic() results = engine.ClassifyWithImage(image, threshold=args.threshold, top_k=args.top_k) end_time = time.monotonic() text_lines = [ 'Inference: %.2f ms' % ((end_time - start_time) * 1000), 'FPS: %.2f fps' % (1.0 / (end_time - last_time)), ] for index, score in results: text_lines.append('score=%.2f: %s' % (score, labels[index])) print(' '.join(text_lines)) last_time = end_time generate_svg(svg_canvas, text_lines) result = gstreamer.run_pipeline(user_callback)
def main(): ''' modelPath = searchModelFile("/media") if not modelPath: print("No Model file") return ''' try: modelPath = searchModelFile("/media") # Load your model onto your Coral Edgetpu engine = ClassificationEngine(modelPath) #labels = loadLabels(labelPath) except NoModelFile: print("No Model File Exception") return ledCont=LED() #initLED() ledCont.wiggleLEDs(4) cap = cv2.VideoCapture(0) while cap.isOpened(): ret, frame = cap.read() if not ret: break # Format the image into a PIL Image so its compatable with Edge TPU cv2_im = frame pil_im = Image.fromarray(cv2_im) # Resize and flip image so its a square and matches training pil_im.resize((224, 224)) pil_im.transpose(Image.FLIP_LEFT_RIGHT) # Classify and display image results = classifyImage(pil_im, engine) ledCont.setOnlyLED(results) cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model1", help="File path of Tflite model.", required=True) parser.add_argument("--model2", help="File path of Tflite model.", required=True) parser.add_argument("--image", help="File path of the image to be recognized.", required=True) parser.add_argument("--num", help="Number of inference executions.", default=100, type=int) args = parser.parse_args() # Initialize engine. engine1 = ClassificationEngine(args.model1) engine2 = ClassificationEngine(args.model2) # Run inference. inference_time1 = [] inference_time2 = [] for i in range(num + 1): img = Image.open(args.image) result1 = engine1.ClassifyWithImage(img, top_k=3) result2 = engine2.ClassifyWithImage(img, top_k=3) # Get Inference time. if i > 0: inference_time1.append(engine1.get_inference_time()) inference_time2.append(engine2.get_inference_time()) # Avg print("Model1 inference time avg: {0:.4f}".format( statistics.mean(inference_time1))) print("Model2 inference time avg: {0:.4f}".format( statistics.mean(inference_time2)))
class ObjectClassification(object): def __init__(self, model, labels, threshold, camera_res): self.threshold = threshold self.engine = ClassificationEngine(model) self.labels = self.read_labels(labels) self.camera_res = camera_res def read_labels(self, file_path): with open(file_path, 'r') as f: lines = f.readlines() labels = {} for line in lines: pair = line.strip().split(maxsplit=1) labels[int(pair[0])] = pair[1].strip() return labels def pre_process(self, frame): frame_expanded = np.expand_dims(frame.get(), axis=0) return frame_expanded.flatten() def post_process(self, objects): processed_objects = [] for label_id, score in objects: processed_objects.append({ "label": str(self.labels[label_id]), "confidence": float(score) }) return processed_objects def return_objects(self, frame): tensor = self.pre_process(frame) detected_objects = self.engine.ClassifyWithInputTensor( tensor, threshold=self.threshold, top_k=3) objects = self.post_process(detected_objects) # LOGGER.info(self.engine.get_inference_time()) return objects
def main(ip_address, port, path, model_path, labels_path): # Setup the OSC Client osc = OSCClient(ip_address, int(port), encoding="utf8") # Load your model onto your Coral Edgetpu engine = ClassificationEngine(model_path) labels = loadLabels(labels_path) cap = cv2.VideoCapture(0) while cap.isOpened(): ret, frame = cap.read() if not ret: break # Format the image into a PIL Image so its compatable with Edge TPU cv2_im = frame pil_im = Image.fromarray(cv2_im) # Resize and flip image so its a square and matches training pil_im.resize((224, 224)) pil_im.transpose(Image.FLIP_LEFT_RIGHT) # Classify and display image results = classifyImage(pil_im, engine) cv2.imshow('frame', cv2_im) # Get the label of the best result and send it with OSC details_of_best_result = get_details_of_best_result(results, labels) print(details_of_best_result) osc.send_message(path, details_of_best_result["label"]) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def load_classifiers(self, input_string): for name in input_string.split(","): # Check if classifier has already been loaded if name not in self.loaded: logger.debug("Loading classifier %s " % (name)) # Read attributes from library and initialise try: attr = self.library[name] output = {} output["labels"] = dataset_utils.read_label_file( attr["labels"]) output["model"] = ClassificationEngine(attr["model"]) output["thresholds"] = attr["thresholds"] self.loaded[name] = output except KeyError: raise KeyError("Classifier name not found in database") except FileNotFoundError: raise FileNotFoundError( "Model or labels not found in models folder") else: logger.debug("Classifier already loaded %s " % (name))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='Path of the detection model.', required=True) parser.add_argument('--label', help='Path of the labels file.') parser.add_argument( '--mode', help='Mode for de detection: OBJECT_DETECTION or IMAGE_CLASSIFICATION', required=True) parser.add_argument('--camera', help='Camera source (if multiple available)', type=int, required=False) args = parser.parse_args() # Initialize engine. if args.mode == "OBJECT_DETECTION": engine = DetectionEngine(args.model) elif args.mode == "IMAGE_CLASSIFICATION": engine = ClassificationEngine(args.model) else: print( "Please insert the mode from OBJECT_DETECTION or IMAGE_CLASSIFICATION" ) exit() labels = read_label_file(args.label) if args.label else None label = None camera = args.camera if args.camera else 0 # Initialize the camera #cam = cv2.VideoCapture(camera) camera = PiCamera() time.sleep(2) camera.resolution = (640, 480) # Create the in-memory stream stream = io.BytesIO() # Initialize the timer for fps start_time = time.time() frame_times = deque(maxlen=40) while True: #ret, cv2_im = cam.read() stream = io.BytesIO() #wipe the contents camera.capture(stream, format='jpeg', use_video_port=True) stream.seek(0) pil_im = Image.open(stream) cv2_im = np.array(pil_im) cv2_im = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) if args.mode == "OBJECT_DETECTION": ans = engine.DetectWithImage(pil_im, threshold=0.05, keep_aspect_ratio=True, relative_coord=False, top_k=10) if ans: for obj in ans: if obj.score > 0.4: if labels: label = labels[obj.label_id] + " - {0:.2f}".format( obj.score) draw_rectangles(obj.bounding_box, cv2_im, label=label) else: draw_text(cv2_im, 'No object detected!') else: i = 0 for result in engine.ClassifyWithImage(pil_im, top_k=5): if result: label = labels[result[0]] score = result[1] draw_text(cv2_im, label, i) i += 1 else: draw_text(cv2_im, 'No classification detected!') lastInferenceTime = engine.get_inference_time() frame_times.append(time.time()) fps = len(frame_times) / float(frame_times[-1] - frame_times[0] + 0.001) draw_text(cv2_im, "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime)) #print("FPS / Inference time: " + "{:.1f} / {:.2f}ms".format(fps, lastInferenceTime)) #flipping the image: cv2.flip(cv2_im, 1) #cv2_im = cv2.resize(cv2_im, (800, 600)) cv2.imshow('object detection', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() exit() break
def main(): load_time = time.time() # Initialize engine. engine = DetectionEngine(Model_weight) labels = None # Face recognize engine face_engine = ClassificationEngine(FaceNet_weight) # read embedding class_arr, emb_arr = read_embedding(Embedding_book) l = time.time() - load_time with tf.Graph().as_default(): with tf.compat.v1.Session() as sess: cap = cv2.VideoCapture(0) while (True): t1 = cv2.getTickCount() print('Load_model: {:.2f} sec'.format(l)) ret, frame = cap.read() img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(img) # Run inference. ans = engine.DetectWithImage(img, threshold=0.05, keep_aspect_ratio=False, relative_coord=False, top_k=10) img = numpy.asarray(img) # Display result. if ans: crop_img = crop_image(ans, frame) if cv2.waitKey(1) == ord('a'): for k in range(0, len(crop_img)): new_class_name = input( 'Please input your name of class:') new_save = cv2.cvtColor(crop_img[k], cv2.COLOR_BGR2RGB) cv2.imwrite( 'pictures/' + str(new_class_name) + '.jpg', new_save) Create_embeddings(face_engine) class_arr, emb_arr = read_embedding( 'embedding_book/embeddings.h5') embs = Tpu_FaceRecognize(face_engine, crop_img) face_num = len(ans) face_class = ['Others'] * face_num for i in range(face_num): diff = np.mean(np.square(embs[i] - emb_arr), axis=1) min_diff = min(diff) if min_diff < THRED: index = np.argmin(diff) face_class[i] = class_arr[index] print('Face_class:', face_class) print('Classes:', class_arr) for count, obj in enumerate(ans): print('-----------------------------------------') if labels: print(labels[obj.label_id]) print('Score = ', obj.score) box = obj.bounding_box.flatten().tolist() # Draw a rectangle and label cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 255, 0), 2) cv2.putText(img, '{}'.format(face_class[count]), (int(box[0]), int(box[1]) - 5), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1, cv2.LINE_AA) t2 = cv2.getTickCount() t = (t2 - t1) / cv2.getTickFrequency() fps = 1.0 / t cv2.putText(img, 'fps: {:.2f}'.format(fps), (5, 20), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1, cv2.LINE_AA) cv2.putText(img, 'A: Add new class', (5, 450), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1, cv2.LINE_AA) cv2.putText(img, 'Q: Quit', (5, 470), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 1, cv2.LINE_AA) img_ = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.imshow('frame', img_) if cv2.waitKey(1) == ord('q'): break cap.release() cv2.destroyAllWindows()
ap.add_argument("-i", "--image", required=True, help="path to input image") args = vars(ap.parse_args()) # initialize the labels dictionary print("[INFO] parsing class labels...") labels = {} # loop over the class labels file for row in open(args["labels"]): # unpack the row and update the labels dictionary (classID, label) = row.strip().split(" ", maxsplit=1) labels[int(classID)] = label.strip() # load the Google Coral classification model print("[INFO] loading Coral model...") model = ClassificationEngine(args["model"]) # load the input image image = cv2.imread(args["image"]) image = imutils.resize(image, width=500) orig = image.copy() # prepare the image for classification by converting (1) it from BGR # to RGB channel ordering and then (2) from a NumPy array to PIL # image format image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) # make predictions on the input image print("[INFO] making predictions...") start = time.time()
default= '/home/pi/Desktop/AUTO_ML/models_edge_ICN6216886327266610278_2019-08-26_07-02-41-723_tflite_model.tflite', required=False) parser.add_argument( '--label', help='File path of label file.', default= '/home/pi/Desktop/AUTO_ML/models_edge_ICN6216886327266610278_2019-08-26_07-02-41-723_tflite_dict.txt', required=False) #parser.add_argument( # '--image', help='File path of the image to be tested.', required=False) args = parser.parse_args() # Prepare labels. labels = dataset_utils.ReadLabelFile(args.label) # Initialize engine engine = ClassificationEngine(args.model) print("load model costs %s sec" % (time.time() - start_time)) dir_path = '/home/pi/Desktop/test-dataset/' os.chdir(dir_path) subdir_list = next(os.walk('.'))[1] print(subdir_list) acc = [] for subdir in subdir_list: files = [] count = 0 sub_p = os.path.join(dir_path, subdir) for r, d, f in os.walk(sub_p): for file in f: if file[:2] != "._" and file[-3:] == "jpg":
'set': [], 'truth': [] } for split in ['train', 'validation', 'test']: for label in ['positive', 'negative']: data_paths = glob.glob('data/' + split + '/' + label + '/*.png') predictions['path'].extend(data_paths) predictions['set'].extend([split] * len(data_paths)) predictions['truth'].extend([int(label == 'positive')] * len(data_paths)) inference_times = {} model_paths = glob.glob('models/*/model_edgetpu.tflite') for model_path in model_paths: classifier = ClassificationEngine(model_path) model_name = model_path.split('/')[1] input_shape = [int(dim) for dim in model_name.split('_')[:3]] predictions[model_name] = [] inference_times[model_name] = [] for path in predictions['path']: image = Image.open(path) # Set threshold to smaller than 0 to receive each prediction in range [0, 1] prediction = classifier.classify_with_image(image, threshold=-1) inference_time = classifier.get_inference_time() # Predictions are returned as [(label_id, confidence_score)] predictions[model_name].append(prediction[0][1].astype(float)) inference_times[model_name].append(inference_time) with open('results/predictions_edgetpu.json', 'w') as fp: json.dump(predictions, fp)
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--classification_model', help='Path of classification model.', required=False, default='all_models/mobilenet_v2_1.0_224_inat_bird_quant_edgetpu.tflite' ) parser.add_argument( '--detection_model', help='Path of detection model.', required=False, default= 'all_models/ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite') parser.add_argument('--image', help='Path of the image.', required=False) parser.add_argument('--classification_labels', required=False, default='all_models/inat_bird_labels.txt') parser.add_argument('--detection_labels', required=False, default='all_models/coco_labels.txt') args = parser.parse_args() # initialize the video stream and allow the camera sensor to warmup print("[INFO] starting video stream...") vs = VideoStream(src=0, resolution=(2048, 1536)).start() #vs = VideoStream(usePiCamera=False).start() time.sleep(2.0) detection_model = DetectionEngine(args.detection_model) classification_model = ClassificationEngine(args.classification_model) detection_labels = load_labels(args.detection_labels) print("detection_labels : {}".format(len(detection_labels))) classification_labels = load_labels(args.classification_labels) multiTracker = cv2.MultiTracker_create() tracking_mode = False tracking_expire = None # loop over the frames from the video stream while True: # grab the frame from the threaded video stream and resize it # to have a maximum width of 500 pixels frame = vs.read() #resized_frame = imutils.resize(frame, width=500) resized_frame = frame orig = resized_frame.copy() # prepare the frame for classification by converting (1) it from # BGR to RGB channel ordering and then (2) from a NumPy array to # PIL image format resized_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB) resized_frame = Image.fromarray(resized_frame) # make predictions on the input frame start = time.time() success, boxes = multiTracker.update(orig) if tracking_expire and time.time() > tracking_expire: tracking_mode = False for tracker in multiTracker.getObjects(): tracker.clear() multiTracker = cv2.MultiTracker_create() print('success {}'.format(success)) print('boxes {}'.format(boxes)) if success: for box in boxes: (x, y, w, h) = [int(v) for v in box] cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) text = "{}: {:.2f}% ({:.4f} sec)".format( "bird", score * 100, end - start) cv2.putText(orig, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) objs = detection_model.detect_with_image(resized_frame, top_k=1) end = time.time() for obj in objs: # draw the predicted class label, probability, and inference # time on the output frame score = obj.score box = obj.bounding_box height, width, channels = orig.shape label = detection_labels[obj.label_id] if label == "bird": p0, p1 = list(box) x0, y0 = list(p0) x1, y1 = list(p1) x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int( x1 * width), int(y1 * height) cv2.rectangle(orig, (x0, y0), (x1, y1), (0, 255, 0), 2) text = "{}: {:.2f}% ({:.4f} sec)".format( "bird", score * 100, end - start) cv2.putText(orig, text, (x0, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) if score > 0.2: #im = Image.new('RGB', (x1-x0, y1-y0)) #im.putdata(frame[y0:y1,x0:x1]) #print("raw {}".format(frame[y0:y1,x0:x1])) #classification_thread = threading.Thread(target=classification_job,args=(classification_model, frame[y0:y1,x0:x1], 1)) #classification_thread.start() #classification_thread.join() is_intersection = False for box in boxes: (x, y, w, h) = [int(v) for v in box] if bb_intersection_over_union( [x0, y0, x1, y1], [x, y, x + w, y + h]) > 0: is_intersection = True print("intersect.. already tracking") if not is_intersection: tracking_expire = time.time() + 60 tracker = cv2.TrackerCSRT_create() print("add tracker {} {} {} {}".format( x0, y0, width, height)) multiTracker.add(tracker, orig, (x0, y0, width / 2, height / 2)) # show the output frame and wait for a key press cv2.namedWindow("Frame", cv2.WINDOW_NORMAL) cv2.resizeWindow("Frame", 800, 600) cv2.imshow("Frame", orig) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break # do a bit of cleanup cv2.destroyAllWindows() vs.stop()
def main(): """Creates camera pipeline, and pushes pipeline through ClassificationEngine model. Logs results to user-defined storage. Runs either in training mode to gather images for custom model creation or in deterrent mode that sounds an 'alarm' if a defined label is detected.""" args = user_selections() print("Loading %s with %s labels." % (args.model, args.labels)) engine = ClassificationEngine(args.model) labels = load_labels(args.labels) storage_dir = args.storage #Initialize logging files logging.basicConfig(filename='%s/results.log' % storage_dir, format='%(asctime)s-%(message)s', level=logging.DEBUG) last_time = time.monotonic() last_results = [('label', 0)] last_tweet = None def user_callback(image, svg_canvas): nonlocal last_time nonlocal last_results nonlocal last_tweet start_time = time.monotonic() results = engine.classify_with_image(image, threshold=args.threshold, top_k=args.top_k) end_time = time.monotonic() results = [(labels[i], score) for i, score in results] if args.print: print_results(start_time, last_time, end_time, results) if args.training: print("training mode") if do_training(results, last_results, args.top_k): save_data(image, results, storage_dir) else: print("looking for birds") # Custom model mode: # Save the images if the label is one of the targets and its probability is relatively high if results[0][1] >= 0.8: filename = save_data(image, results, storage_dir) if (last_tweet is None) or ((time.time() - last_tweet > 300) and results[0][1] >= 0.9): try: #imageFile = take_a_picture(storage_dir) status = "I'm %d percent sure this is a %s. #ai" % ( results[0][1] * 100, results[0][0]) logging.info('Trying to tweet : %s', status) logging.info('Reading file %s', filename) tweet(status, filename) last_tweet = time.time() except: logging.exception('Failed to send tweet') last_tweet = None last_results = results last_time = end_time result = gstreamer.run_pipeline(user_callback)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', help='File path of Tflite model.', required=True) parser.add_argument('--label', help='File path of label file.', required=True) args = parser.parse_args() labels = dataset_utils.read_label_file(args.label) engine = ClassificationEngine(args.model) detectionEngine = DetectionEngine( '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/models/ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite' ) detectionLabels = dataset_utils.read_label_file( '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/models/coco_labels.txt' ) with picamera.PiCamera() as camera: camera.resolution = (640, 480) camera.framerate = 30 _, height, width, _ = engine.get_input_tensor_shape() print(height) print(width) camera.start_preview() try: stream = io.BytesIO() count = 0 for _ in camera.capture_continuous(stream, format='rgb', use_video_port=True, resize=(width, height)): stream.truncate() stream.seek(0) input_tensor = np.frombuffer(stream.getvalue(), dtype=np.uint8) print(type(stream.getvalue())) image = Image.frombuffer('RGB', (width, height), stream.getvalue()) draw = ImageDraw.Draw(image) with open( '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/test_images/' + str(count) + '.png', 'wb') as f: image.save(f) start_ms = time.time() results = engine.classify_with_input_tensor(input_tensor, top_k=1) objects = detectionEngine.detect_with_image( image, threshold=0.1, keep_aspect_ratio=True, relative_coord=False, top_k=3) elapsed_ms = time.time() - start_ms print('--------------------------') for obj in objects: if detectionLabels: print(detectionLabels[obj.label_id] + ' score = ' + str(obj.score)) box = obj.bounding_box.flatten().tolist() print('box = ', box) draw.rectangle(box, outline='red') draw.text( (box[0], box[1]), detectionLabels[obj.label_id] + " " + str(obj.score)) if not objects: print('No objects detected') else: with open( '/home/cerbaris/pupper_code/PupperPy/pupperpy/Vision/test_images/' + str(count) + '_boxes.png', 'wb') as f: image.save(f) count += 1 #if results: # camera.annotate_text = '%s %.2f\n%.2fms' % ( # labels[results[0][0]], results[0][1], elapsed_ms * 1000.0) finally: camera.stop_preview()