def main(): #default_model_dir = './all_models' # Set face detection model # default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' # Coral ver # default_model = 'mobilenet_ssd_v2_face_quant_postprocess.tflite' # GPU ver default_model = './1NN/quantized/two_nn_nomask.tflite' # GPU ver default_labels = 'face_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=default_model) # Set mask classification model default_model2 = 'mask_detector_quant.tflite' # GPU ver #default_model2 = 'mask_detector_quant_edgetpu.tflite' #Coral ver parser.add_argument('--model2', help='.tflite model path', default=default_model2) parser.add_argument('--labels', help='label file path', default=default_labels) #parser.add_argument('--top_k', type=int, default=3, # help='number of categories with highest score to display') #parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0) #parser.add_argument('--threshold', type=float, default=0.1, # help='classifier score threshold') args = parser.parse_args() # Load 1NN interpreter = tflite.Interpreter(model_path=args.model) interpreter.allocate_tensors() # Load 2NN interpreter2 = tflite.Interpreter(model_path=args.model2) interpreter2.allocate_tensors() # Load labels labels = load_labels(args.labels) # Load Test Data - ground truth, image test_dir = 'for_evaluation(test_set)/xml' test_img_dir = 'for_evaluation(test_set)/image' filenames = os.listdir(test_dir) full_filenames = [] for filename in filenames: full_filename = os.path.join(test_dir, filename) full_filenames.append(full_filename) total_facedetection_time = 0 face_detection_count = 0 total_maskdetection_time = 0 mask_detection_count = 0 for filename in full_filenames: #print(f'---------------------------', filename, '---------------------------') # get filenum filenum = filename[-9:-4] # filenum = filename.split('/')[2].split('.')[0] # set root from xml tree = ET.parse(filename) root = tree.getroot() # find img directory image_filename = root.find('filename').text image_path = os.path.join(test_img_dir, image_filename) # Load Image, get height and width cv2_im = cv2.imread(image_path, 1) height, width, channels = cv2_im.shape # Get ground truths all = root.findall('object') ground_truths = [] for object in all: # get name, bndbox for labels and bbox name = object.find('name') bndbox = object.find('bndbox') # set test label to name.text (mask or nomask) test_label = name.text bbox = [] for element in bndbox: bbox.append(int(element.text)) xmin, ymin, xmax, ymax = bbox top_left, bottom_right = (xmin, ymax), (xmax, ymin) #color = (0, 0, 255) #thickness = 2 #cv2.rectangle(cv2_im, top_left, bottom_right, color, thickness) test_bbox = [ bbox[0] / width, bbox[1] / height, bbox[2] / width, bbox[3] / height ] ground_truths.append([test_label, test_bbox]) #print('ground_truths: ', ground_truths) for ground_truth in ground_truths: with open("./mAP/groundtruths/{}.txt".format(filenum), "a+") as file: file.write(str(ground_truth[0]) + ' ') for item in ground_truth[1]: file.write("%s " % item) file.write("\n") # Evaluation of object detection cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) # Latency calculation detect_start_time = time.time() interpreter.invoke() detect_end_time = time.time() total_facedetection_time += detect_end_time - detect_start_time face_detection_count += 1 objs = get_output( interpreter) #score_threshold=args.threshold, top_k=args.top_k) #print('detection result:', objs) for i in range(len(objs)): if objs[i].id != 0: continue if objs[i].score > 1: continue obj_bbox = list(objs[i].bbox) if any(edge > 1 for edge in obj_bbox): continue xmin, ymin, xmax, ymax = obj_bbox xmin, ymin, xmax, ymax = int(xmin * width), int( ymin * height), int(xmax * width), int(ymax * height) unnorm = [xmin, ymin, xmax, ymax] top_left, bottom_right = (xmin, ymax), (xmax, ymin) #color = (255, 0, 0) #thickness = 2 #cv2.rectangle(cv2_im, top_left, bottom_right, color, thickness) pil_im2 = Image.fromarray(cv2_im_rgb[ymin:ymax, xmin:xmax]) common.set_input2(interpreter2, pil_im2) # Latency calculation mask_start_time = time.time() interpreter2.invoke() mask_end_time = time.time() output_data = common.output_tensor2(interpreter2) total_maskdetection_time += mask_end_time - mask_start_time mask_detection_count += 1 # print(output_data) mask = output_data[0] withoutMask = output_data[1] print('mask_percentage: ', mask, ', nomask_percentage: ', withoutMask) if mask > withoutMask: label = "mask" score = mask * objs[i].score else: label = "nomask" score = withoutMask * objs[i].score #print(obj_bbox, label, score) with open("./mAP/2NN_CPU_8bit_detections/{}.txt".format(filenum), "a+") as file: file.write(label + ' ') file.write(str(score) + ' ') for item in unnorm: file.write("%s " % item) file.write("\n") #window_name = 'Image' #cv2.imshow(window_name, cv2_im) #cv2.waitKey() #print('-------------------------------next file----------------------------------------------------------') avg_face = total_facedetection_time / face_detection_count avg_mask = total_maskdetection_time / mask_detection_count print('Average Face Detection Time: ', avg_face) print('Average Mask Detection Time: ', avg_mask) print('Average Total Inference Time: ', avg_face + avg_mask)
def main(): default_model_dir = './all_models' #default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' default_model = 'mobilenet_ssd_v2_coco_quant_postprocess.tflite' default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=default_model) #default=os.path.join(default_model_dir,default_model)) #################### Keondo's Modification ######################### default_model2 = 'mask_detector_quant.tflite' #default_model2 = 'mask_detector_quant_edgetpu.tflite' parser.add_argument('--model2', help='.tflite model path', default=default_model2) #################### Keondo's Modification ######################### parser.add_argument('--labels', help='label file path', default=default_labels) #default=os.path.join(default_model_dir, default_labels)) parser.add_argument( '--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default=0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() #Initialize and configure pyttsx3 for warning messages #engine = pyttsx3.init() #rate = engine.getProperty('rate') #engine.setProperty('rate', rate - 50) print('Loading {} with {} labels.'.format(args.model, args.labels)) #interpreter = common.make_interpreter(args.model) interpreter = tflite.Interpreter(model_path=args.model) interpreter.allocate_tensors() #################### Keondo's Modification ######################### #interpreter2 = common.make_interpreter(args.model2) interpreter2 = tflite.Interpreter(model_path=args.model2) interpreter2.allocate_tensors() print('Interpreter 2 loaded') #################### Keondo's Modification ######################### labels = load_labels(args.labels) cap = cv2.VideoCapture(args.camera_idx) frame_no = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) #cv2_im = append_objs_to_img(cv2_im, objs, labels) #################### Keondo's Modification ######################### #print('Interpreter 2 processing start') #pil_im2 = pil_im.resize((224,224), resample=Image.NEAREST) #interpreter2.tensor(tensor_index)()[0][:,:] = pil_im2 #pil_im2 = np.expand_dims(pil_im2, axis=0) height, width, channels = cv2_im.shape noMaskCount = 0 mask_data = [] i = 0 #for obj in objs: for i in range(len(objs) - 1, -1, -1): #x0, y0, x1, y1 = list(obj.bbox) x0, y0, x1, y1 = list(objs[i].bbox) x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int( x1 * width), int(y1 * height) pil_im2 = Image.fromarray(cv2_im_rgb[y0:y1, x0:x1]) print("Bf NN: ", frame_no, i, x0, y0) common.set_input2(interpreter2, pil_im2) output_data = common.output_tensor2(interpreter2) interpreter2.invoke() print("Af NN: ", frame_no, i, x0, y0) print("Output data: ", output_data) mask_data.append((len(objs) - 1 - i, output_data)) #qi += 1 j = 0 #for obj in objs: for j in range(len(objs)): #x0, y0, x1, y1 = list(obj.bbox) x0, y0, x1, y1 = list(objs[j].bbox) x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int( x1 * width), int(y1 * height) print("2nd loop: ", frame_no, j, x0, y0) print(list(filter(lambda x: x[0] == j, mask_data))) output = list(filter(lambda x: x[0] == j, mask_data)) mask, withoutMask = output[0][1] if mask > withoutMask: labelMask = "Mask (" + str(x0) + "," + str(y0) + ")" color = (255, 0, 0) #blue else: labelMask = "No Mask (" + str(x0) + "," + str(y0) + ")" color = (0, 0, 255) #red noMaskCount += 1 labelMask = "{}: {:.2f}%".format(labelMask, max(mask, withoutMask) * 100) cv2_im = cv2.rectangle(cv2_im, (x0, y0), (x1, y1), color, 2) cv2_im = cv2.putText(cv2_im, labelMask, (x0, y0 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.0, color, 2) #j += 1 frame_no += 1 #if noMaskCount > 0: # engine.say("There are " + str(noMaskCount) + "people not wearing masks. Please wear a mask") #tensor_index = interpreter2.get_input_details()[0]['index'] #set_input2 = interpreter2.tensor(tensor_index)() #input_tensor2(interpreter2)[:,:] = pil_im2 #interpreter2.tensor(tensor_index)()[0][:,:] = pil_im2 #set_input2(pil_im2) #interpreter2.set_tensor(tensor_index, pil_im2) #output_details = interpreter2.get_output_details()[0] #output_data = np.squeeze(interpreter2.tensor(output_details['index'])()) """ There is at least 1 reference to internal data in the interpreter in the form of a numpy array or slice. Be sure to only hold the function returned from tensor() if you are using raw data access. """ #print('Interpreter 2 Output data') #print(output_data) #if 'quantization' in output_details: # print('quantization') # print(output_details['quantization']) #elif 'quantization_parameters' in output_details: # print('quantization_parameters') # print(output_details['quantization_parameters']) #else: # print('No quantization') #scales, zero_points, quantized_dimension = output_details['quantization_parameters'] #if scales == 0: # objs2 = output_data - zero_points #else: # objs2 = scales * (output_data - zero_points) #print('Check objs2') #print(objs2) #################### Keondo's Modification ######################### cv2.imshow('frame', cv2_im) #engine.runAndWait() if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(): #default_model_dir = './all_models' # Set face detection model # default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' # Coral ver # default_model = 'mobilenet_ssd_v2_face_quant_postprocess.tflite' # GPU ver default_model = './1NN/quantized/two_nn_nomask.tflite' # GPU ver default_labels = 'face_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default=default_model) # Set mask classification model #default_model2 = 'mask_detector_quant.tflite' # GPU ver default_model2 = 'mask_detector_quant_v2_edgetpu.tflite' #Coral ver parser.add_argument('--model2', help='.tflite model path', default=default_model2) parser.add_argument('--labels', help='label file path', default=default_labels) args = parser.parse_args() # Load 1NN #interpreter = tflite.Interpreter(model_path = args.model) #interpreter.allocate_tensors() # Load 2NN interpreter2 = tflite.Interpreter( model_path=args.model2, experimental_delegates=[tflite.load_delegate('libedgetpu.so.1')]) interpreter2.allocate_tensors() # Load labels labels = load_labels(args.labels) # Load Test Data - ground truth, image test_dir = 'for_evaluation(test_set)/xml' #test_img_dir = 'for_evaluation(2NN)/wo_mask' test_img_dir = 'for_evaluation(2NN)' #test_img_dir = 'temp' filenames = os.listdir(test_img_dir) full_filenames = [] for folder in filenames: filenames2 = os.listdir(os.path.join(test_img_dir, folder)) full_folder = os.path.join(test_img_dir, folder) for filename in filenames2: full_filename = os.path.join(full_folder, filename) full_filenames.append(full_filename) #for filename in filenames: # full_filename = os.path.join(test_img_dir, filename) # full_filenames.append(full_filename) total_facedetection_time = 0 face_detection_count = 0 total_maskdetection_time = 0 mask_detection_count = 0 correct_mask_classification_count = 0 #random.shuffle(full_filenames) for filename in full_filenames: print(f'---------------------------', filename, '---------------------------') # get filenum filenum = filename[-9:-4] image_path = filename # Load Image, get height and width cv2_im = cv2.imread(image_path, 1) height, width, channels = cv2_im.shape # Evaluation of object detection cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input2(interpreter2, pil_im) # Latency calculation mask_start_time = time.time() interpreter2.invoke() mask_end_time = time.time() output_data = common.output_tensor2(interpreter2) total_maskdetection_time += mask_end_time - mask_start_time mask_detection_count += 1 mask = output_data[0] withoutMask = output_data[1] print('mask_percentage: ', mask, ', nomask_percentage: ', withoutMask) if mask > withoutMask: label = "mask" score = mask else: label = "nomask" score = withoutMask gt = '' filesplit = filename.split('/') print(filesplit[-2]) if filesplit[-2] == 'w_mask': gt = 'mask' else: gt = 'nomask' if label == gt: print("Correct classification") correct_mask_classification_count += 1 else: print("NOT correct classification") #if mask_detection_count > 100: # break print("Total mask detection count: ", mask_detection_count) print("Correct mask classification count: ", correct_mask_classification_count) print("Accuracy: ", correct_mask_classification_count / mask_detection_count)
def main(): default_model_dir = './all_models' #### In order to run on Laptop, tflite file before edgetpu compile should be used ### default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite' #wc amend #default_model = 'mobilenet_ssd_v2_coco_quant_postprocess.tflite' #default_model2 = 'mask_detector_quant_edgetpu.tflite' default_model2 = 'mask_detector_quant_v2_edgetpu.tflite' #wc amend ##################################################################################### default_labels = 'coco_labels.txt' parser = argparse.ArgumentParser() parser.add_argument('--model', help='.tflite model path', default = default_model) #default=os.path.join(default_model_dir,default_model)) parser.add_argument('--model2', help='.tflite model path', default=default_model2) parser.add_argument('--labels', help='label file path', default = default_labels) #default=os.path.join(default_model_dir, default_labels)) parser.add_argument('--top_k', type=int, default=3, help='number of categories with highest score to display') parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0) parser.add_argument('--threshold', type=float, default=0.1, help='classifier score threshold') args = parser.parse_args() print('Loading {} with {} labels.'.format(args.model, args.labels)) ### Some functions in common.make_interpreter needs Edge TPU ######################## ### Simply use tflite.Interpreter method on laptop interpreter = common.make_interpreter(args.model) #interpreter = tflite.Interpreter(model_path = args.model) interpreter.allocate_tensors() interpreter2 = common.make_interpreter(args.model2) #interpreter2 = tflite.Interpreter(model_path = args.model2) #interpreter2 = tflite.Interpreter(model_path = args.model2, experimental_delegates=[tflite.load_delegate('libedgetpu.so.1')]) # wc amend interpreter2.allocate_tensors() ##################################################################################### print('Interpreter 2 loaded') labels = load_labels(args.labels) cap = cv2.VideoCapture(args.camera_idx) #Initialize and configure pygame for warning messages pygame.init() beep = pygame.mixer.Sound("coral.wav") beep_switch = False frame_no = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) common.set_input(interpreter, pil_im) interpreter.invoke() objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k) height, width, channels = cv2_im.shape noMaskCount = 0 mask_data = [] i = 0 for i in range(len(objs)): x0, y0, x1, y1 = list(objs[i].bbox) x0, y0, x1, y1 = int(x0*width), int(y0*height), int(x1*width), int(y1*height) pil_im2 = Image.fromarray(cv2_im_rgb[y0:y1, x0:x1]) common.set_input2(interpreter2, pil_im2) interpreter2.invoke() output_data = common.output_tensor2(interpreter2) mask_data.append((len(objs) - 1 - i, output_data)) j = 0 for j in range(len(objs)): x0, y0, x1, y1 = list(objs[j].bbox) x0, y0, x1, y1 = int(x0*width), int(y0*height), int(x1*width), int(y1*height) output = list(filter(lambda x: x[0] == j, mask_data)) mask, withoutMask = output[0][1] if mask > withoutMask: labelMask = "Mask (" + str(x0) + "," + str(y0) + ")" color = (255, 0, 0) #blue else: labelMask = "No Mask (" + str(x0) + "," + str(y0) + ")" color = (0, 0, 255) #red noMaskCount += 1 labelMask = "{}: {:.2f}%".format(labelMask, max(mask, withoutMask) * 100) cv2_im = cv2.rectangle(cv2_im, (x0, y0), (x1, y1), color, 2) cv2_im = cv2.putText(cv2_im, labelMask, (x0, y0-10), cv2.FONT_HERSHEY_SIMPLEX, 1.0, color, 2) frame_no += 1 if noMaskCount > 0 and frame_no % 10 == 0 and frame_no > 0: beep.play() """ Below code triggers an error #tensor_index = interpreter2.get_input_details()[0]['index'] #interpreter2.tensor(tensor_index)()[0][:,:] = pil_im2 #output_details = interpreter2.get_output_details()[0] #output_data = np.squeeze(interpreter2.tensor(output_details['index'])()) There is at least 1 reference to internal data in the interpreter in the form of a numpy array or slice. Be sure to only hold the function returned from tensor() if you are using raw data access. """ cv2.imshow('frame', cv2_im) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()