def __init__(self, **kwargs): super(YOLO_np, self).__init__() self.__dict__.update(self._defaults) # set up default values self.__dict__.update(kwargs) # and update with user overrides self.class_names = get_classes(self.classes_path) self.anchors = get_anchors(self.anchors_path) self.colors = get_colors(self.class_names) K.set_learning_phase(0) self.yolo_model = self._generate_model()
def validate_yolo_model_tflite(model_path, image_file, anchors, class_names, loop_count): interpreter = interpreter_wrapper.Interpreter(model_path=model_path) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() #print(input_details) #print(output_details) # check the type of the input tensor if input_details[0]['dtype'] == np.float32: floating_model = True img = Image.open(image_file) image = np.array(img, dtype='uint8') height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] image_data = preprocess_image(img, (height, width)) image_shape = img.size # predict once first to bypass the model building time interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() start = time.time() for i in range(loop_count): interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() end = time.time() print("Average Inference time: {:.8f}ms".format((end - start) * 1000 /loop_count)) out_list = [] for output_detail in output_details: output_data = interpreter.get_tensor(output_detail['index']) out_list.append(output_data) start = time.time() predictions = yolo_head(out_list, anchors, num_classes=len(class_names), input_dims=(height, width)) boxes, classes, scores = handle_predictions(predictions, confidence=0.1, iou_threshold=0.4) boxes = adjust_boxes(boxes, image_shape, (height, width)) end = time.time() print("PostProcess time: {:.8f}ms".format((end - start) * 1000)) print('Found {} boxes for {}'.format(len(boxes), image_file)) for box, cls, score in zip(boxes, classes, scores): print("Class: {}, Score: {}".format(class_names[cls], score)) colors = get_colors(class_names) image = draw_boxes(image, boxes, classes, scores, class_names, colors) Image.fromarray(image).show()
def validate_yolo_model(model, image_file, anchors, class_names, model_image_size, loop_count): image = Image.open(image_file) image_array = np.array(image, dtype='uint8') image_data = preprocess_image(image, model_image_size) image_shape = image.size # predict once first to bypass the model building time model.predict([image_data]) start = time.time() for i in range(loop_count): boxes, classes, scores = yolo3_postprocess_np(model.predict([image_data]), image_shape, anchors, len(class_names), model_image_size) end = time.time() print('Found {} boxes for {}'.format(len(boxes), image_file)) for box, cls, score in zip(boxes, classes, scores): print("Class: {}, Score: {}".format(class_names[cls], score)) colors = get_colors(class_names) image_array = draw_boxes(image_array, boxes, classes, scores, class_names, colors) print("Average Inference time: {:.8f}s".format((end - start)/loop_count)) Image.fromarray(image_array).show()
os.environ['CUDA_VISIBLE_DEVICES'] = '0' from tensorflow.keras.utils import multi_gpu_model # Setting up Model Configuration model_type = 'darknet' model_path = 'inference_model/shelf_product_detection_inference_model.h5' anchors_path = 'configs/tiny_yolo_anchors.txt' classes_path = 'yolo_format_training_classes.txt' model_image_size = (416, 416) gpu_num = 1 # Load Classes and get anchors for comparision class_names = get_classes(classes_path) anchors = get_anchors(anchors_path) colors = get_colors(class_names) num_anchors = len(anchors) num_classes = len(class_names) num_feature_layers = num_anchors//3 # Initialise the model with same architecture and load the trained model provided in the config yolo_model, _ = get_yolo3_model(model_type, num_feature_layers, num_anchors, num_classes, input_shape=model_image_size + (3,)) yolo_model.load_weights(model_path) # make sure model, anchors and classes match print('{} model, anchors, and classes loaded.'.format(model_path)) # Define the Prediction with config parameters def predict(image_data, image_shape): out_boxes, out_classes, out_scores = yolo3_postprocess_np(yolo_model.predict(image_data), image_shape,
def validate_yolo_model_mnn(model_path, image_file, anchors, class_names, loop_count): interpreter = MNN.Interpreter(model_path) session = interpreter.createSession() # TODO: currently MNN python API only support getting input/output tensor by default or # by name. so we need to hardcode the output tensor names here to get them from model if len(anchors) == 6: output_tensor_names = ['conv2d_1/Conv2D', 'conv2d_3/Conv2D'] elif len(anchors) == 9: output_tensor_names = [ 'conv2d_3/Conv2D', 'conv2d_8/Conv2D', 'conv2d_13/Conv2D' ] else: raise ValueError('invalid anchor number') # assume only 1 input tensor for image input_tensor = interpreter.getSessionInput(session) # get input shape input_shape = input_tensor.getShape() if input_tensor.getDimensionType() == MNN.Tensor_DimensionType_Tensorflow: batch, height, width, channel = input_shape elif input_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe: batch, channel, height, width = input_shape else: # should be MNN.Tensor_DimensionType_Caffe_C4, unsupported now raise ValueError('unsupported input tensor dimension type') # prepare input image img = Image.open(image_file) image = np.array(img, dtype='uint8') image_data = preprocess_image(img, (height, width)) image_shape = img.size # use a temp tensor to copy data tmp_input = MNN.Tensor(input_shape, input_tensor.getDataType(),\ image_data, input_tensor.getDimensionType()) # predict once first to bypass the model building time input_tensor.copyFrom(tmp_input) interpreter.runSession(session) start = time.time() for i in range(loop_count): input_tensor.copyFrom(tmp_input) interpreter.runSession(session) end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) out_list = [] for output_tensor_name in output_tensor_names: output_tensor = interpreter.getSessionOutput(session, output_tensor_name) output_shape = output_tensor.getShape() assert output_tensor.getDataType() == MNN.Halide_Type_Float # copy output tensor to host, for further postprocess tmp_output = MNN.Tensor(output_shape, output_tensor.getDataType(),\ np.zeros(output_shape, dtype=float), output_tensor.getDimensionType()) output_tensor.copyToHostTensor(tmp_output) #tmp_output.printTensorData() output_data = np.array(tmp_output.getData(), dtype=float).reshape(output_shape) # our postprocess code based on TF channel last format, so if the output format # doesn't match, we need to transpose if output_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe: output_data = output_data.transpose((0, 2, 3, 1)) elif output_tensor.getDimensionType( ) == MNN.Tensor_DimensionType_Caffe_C4: raise ValueError('unsupported output tensor dimension type') out_list.append(output_data) start = time.time() predictions = yolo3_head(out_list, anchors, num_classes=len(class_names), input_dims=(height, width)) boxes, classes, scores = yolo3_handle_predictions(predictions, confidence=0.1, iou_threshold=0.4) boxes = yolo3_adjust_boxes(boxes, image_shape, (height, width)) end = time.time() print("PostProcess time: {:.8f}ms".format((end - start) * 1000)) print('Found {} boxes for {}'.format(len(boxes), image_file)) for box, cls, score in zip(boxes, classes, scores): print("Class: {}, Score: {}".format(class_names[cls], score)) colors = get_colors(class_names) image = draw_boxes(image, boxes, classes, scores, class_names, colors) Image.fromarray(image).show()
def get_prediction_class_records(model_path, annotation_records, anchors, class_names, model_image_size, conf_threshold, save_result): ''' Do the predict with YOLO model on annotation images to get predict class dict predict class dict would contain image_name, coordinary and score, and sorted by score: pred_classes_records = { 'car': [ ['00001.jpg','94,115,203,232',0.98], ['00002.jpg','82,64,154,128',0.93], ... ], ... } ''' # support of tflite model if model_path.endswith('.tflite'): from tensorflow.lite.python import interpreter as interpreter_wrapper interpreter = interpreter_wrapper.Interpreter(model_path=model_path) interpreter.allocate_tensors() # support of MNN model elif model_path.endswith('.mnn'): interpreter = MNN.Interpreter(model_path) session = interpreter.createSession() # normal keras h5 model else: model = load_model(model_path, compile=False) pred_classes_records = {} for (image_name, gt_records) in annotation_records.items(): image = Image.open(image_name) image_array = np.array(image, dtype='uint8') image_data = preprocess_image(image, model_image_size) image_shape = image.size if model_path.endswith('.tflite'): pred_boxes, pred_classes, pred_scores = yolo_predict_tflite( interpreter, image, anchors, len(class_names), conf_threshold) elif model_path.endswith('.mnn'): pred_boxes, pred_classes, pred_scores = yolo_predict_mnn( interpreter, session, image, anchors, len(class_names), conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np( model.predict([image_data]), image_shape, anchors, len(class_names), model_image_size, max_boxes=100, confidence=conf_threshold) print('Found {} boxes for {}'.format(len(pred_boxes), image_name)) if save_result: gt_boxes, gt_classes, gt_scores = transform_gt_record( gt_records, class_names) result_dir = os.path.join('result', 'detection') touchdir(result_dir) colors = get_colors(class_names) image_array = draw_boxes(image_array, gt_boxes, gt_classes, gt_scores, class_names, colors=None, show_score=False) image_array = draw_boxes(image_array, pred_boxes, pred_classes, pred_scores, class_names, colors) image = Image.fromarray(image_array) # here we handle the RGBA image if (len(image.split()) == 4): r, g, b, a = image.split() image = Image.merge("RGB", (r, g, b)) image.save( os.path.join(result_dir, image_name.split(os.path.sep)[-1])) # Nothing detected if pred_boxes is None or len(pred_boxes) == 0: continue for box, cls, score in zip(pred_boxes, pred_classes, pred_scores): pred_class_name = class_names[cls] xmin, ymin, xmax, ymax = box coordinate = "{},{},{},{}".format(xmin, ymin, xmax, ymax) #append or add predict class item if pred_class_name in pred_classes_records: pred_classes_records[pred_class_name].append( [image_name, coordinate, score]) else: pred_classes_records[pred_class_name] = list( [[image_name, coordinate, score]]) # sort pred_classes_records for each class according to score for pred_class_list in pred_classes_records.values(): pred_class_list.sort(key=lambda ele: ele[2], reverse=True) return pred_classes_records