def __init__(self, config): self.config = config self.detector = None self.classifier_model = None self.running_video = False self.device = self.config.DEVICE if self.device == "x86": from libs.detectors.x86.detector import Detector from libs.classifiers.x86.classifier import Classifier self.detector = Detector(self.config) self.classifier_model = Classifier(self.config) elif self.device == "EdgeTPU": from libs.detectors.edgetpu.detector import Detector from libs.classifiers.edgetpu.classifier import Classifier self.detector = Detector(self.config) self.classifier_model = Classifier(self.config) elif self.device == "Jetson": from libs.detectors.jetson.detector import Detector from libs.classifiers.jetson.classifier import Classifier self.detector = Detector(self.config) self.classifier_model = Classifier(self.config) else: raise ValueError('Not supported device named: ', self.device) self.image_size = (self.config.DETECTOR_INPUT_SIZE[0], self.config.DETECTOR_INPUT_SIZE[1], 3) self.classifier_img_size = (self.config.CLASSIFIER_INPUT_SIZE, self.config.CLASSIFIER_INPUT_SIZE, 3)
def main(): """ Read input video and process it, the output video will be exported output_video path which can be set by input arguments. Example: python inference_video.py --config configs/config.json --input_video_path data/video/sample.mov --output_video data/videos/output.avi """ argparse = ArgumentParser() argparse.add_argument('--config', type=str, help='json config file path') argparse.add_argument('--input_video_path', type=str, help='the path of input video', default='') argparse.add_argument('--output_video', type=str, help='the name of output video file', default='face_mask_output.avi') args = argparse.parse_args() config_path = args.config cfg = Config(path=config_path) if args.input_video_path == '': input_path = cfg.APP_VIDEO_PATH else: input_path = args.input_video_path print("INFO: Input video is: ", input_path) output_path = args.output_video file_name_size = len(output_path.split('/')[-1]) output_dir = output_path[:-file_name_size] if not os.path.exists(output_dir): os.makedirs(output_dir) print("INFO: The output video will be exported at: ", output_path) detector_input_size = (cfg.DETECTOR_INPUT_SIZE[0], cfg.DETECTOR_INPUT_SIZE[1], 3) classifier_img_size = (cfg.CLASSIFIER_INPUT_SIZE, cfg.CLASSIFIER_INPUT_SIZE, 3) device = cfg.DEVICE detector = None classifier = None output_vidwriter = None if device == "x86": from libs.detectors.x86.detector import Detector from libs.classifiers.x86.classifier import Classifier elif device == "EdgeTPU": from libs.detectors.edgetpu.detector import Detector from libs.classifiers.edgetpu.classifier import Classifier else: raise ValueError('Not supported device named: ', device) detector = Detector(cfg) classifier_model = Classifier(cfg) input_cap = cv.VideoCapture(input_path) print("INFO: Start inferencing") frame_id = 0 while (input_cap.isOpened()): ret, raw_img = input_cap.read() if output_vidwriter is None: output_vidwriter = cv.VideoWriter(output_path, cv.VideoWriter_fourcc('M', 'J', 'P', 'G'), 24, (raw_img.shape[1], raw_img.shape[0])) height, width = raw_img.shape[:2] if ret == False: break _, cv_image = input_cap.read() if np.shape(cv_image) != (): resized_image = cv.resize(cv_image, tuple(detector_input_size[:2])) rgb_resized_image = cv.cvtColor(resized_image, cv.COLOR_BGR2RGB) objects_list = detector.inference(rgb_resized_image) faces = [] cordinates = [] for obj in objects_list: if 'bbox' in obj.keys(): face_bbox = obj['bbox'] # [ymin, xmin, ymax, xmax] xmin, xmax = np.multiply([face_bbox[1], face_bbox[3]], width) ymin, ymax = np.multiply([face_bbox[0], face_bbox[2]], height) croped_face = cv_image[int(ymin):int(ymin) + (int(ymax) - int(ymin)), int(xmin):int(xmin) + (int(xmax) - int(xmin))] # Resizing input image croped_face = cv.resize(croped_face, tuple(classifier_img_size[:2])) croped_face = cv.cvtColor(croped_face, cv.COLOR_BGR2RGB) # Normalizing input image to [0.0-1.0] croped_face = croped_face / 255.0 faces.append(croped_face) cordinates.append([int(xmin), int(ymin), int(xmax), int(ymax)]) faces = np.array(faces) face_mask_results, scores = classifier_model.inference(faces) for i, cor in enumerate(cordinates): if face_mask_results[i] == 1: color = (0, 0, 255) elif face_mask_results[i] == 0: color = (0, 255, 0) else: color = (0, 0, 0) cv.rectangle(raw_img, (cor[0], cor[1]), (cor[2], cor[3]), color, 2) output_vidwriter.write(raw_img) print('{} Frames number are processed. {} fps'.format(frame_id, detector.fps)) frame_id = frame_id + 1 else: continue input_cap.release() output_vidwriter.release() print('INFO: Finish:) Output video is exported at: ', output_path)
def main(): """ Read input images and process them, the output images will be exported output_dir which can be set by input arguments. Example: python inference_images.py --config configs/config.json --input_image_dir data/images --output_image_dir output_images """ argparse = ArgumentParser() argparse.add_argument('--config', type=str, help='json config file path') argparse.add_argument('--input_image_dir', type=str, help='the directory of input images') argparse.add_argument('--output_image_dir', type=str, help='the directory of output images', default='output_images') args = argparse.parse_args() config_path = args.config cfg = Config(path=config_path) input_dir = args.input_image_dir print("INFO: The directory of input images is: ", input_dir) output_dir = args.output_image_dir if not os.path.exists(output_dir): os.makedirs(output_dir) if not os.path.isdir(output_dir): print( '"{} "output directory is not exists please make the directory before running this script.' .format(output_dir)) exit(1) print("INFO: The output images will be exported at: ", output_dir) detector_input_size = (cfg.DETECTOR_INPUT_SIZE[0], cfg.DETECTOR_INPUT_SIZE[1], 3) classifier_img_size = (cfg.CLASSIFIER_INPUT_SIZE, cfg.CLASSIFIER_INPUT_SIZE, 3) device = cfg.DEVICE detector = None classifier = None if device == "x86": from libs.detectors.x86.detector import Detector from libs.classifiers.x86.classifier import Classifier elif device == "EdgeTPU": from libs.detectors.edgetpu.detector import Detector from libs.classifiers.edgetpu.classifier import Classifier elif device == "Jetson": from libs.detectors.jetson.detector import Detector from libs.classifiers.jetson.classifier import Classifier else: raise ValueError('Not supported device named: ', device) detector = Detector(cfg) classifier_model = Classifier(cfg) print("INFO: Start inferencing") for filename in os.listdir(input_dir): image_path = os.path.join(input_dir, filename) raw_img = cv.imread(image_path) if np.shape(raw_img) != (): height, width, _ = raw_img.shape resized_image = cv.resize(raw_img, tuple(detector_input_size[:2])) rgb_resized_image = cv.cvtColor(resized_image, cv.COLOR_BGR2RGB) objects_list = detector.inference(rgb_resized_image) faces = [] cordinates = [] cordinates_head = [] for obj in objects_list: if 'bbox' in obj.keys(): face_bbox = obj['bbox'] # [ymin, xmin, ymax, xmax] xmin, xmax = np.multiply([face_bbox[1], face_bbox[3]], width) ymin, ymax = np.multiply([face_bbox[0], face_bbox[2]], height) croped_face = raw_img[int(ymin):int(ymin) + (int(ymax) - int(ymin)), int(xmin):int(xmin) + (int(xmax) - int(xmin))] # Resizing input image croped_face = cv.resize(croped_face, tuple(classifier_img_size[:2])) croped_face = cv.cvtColor(croped_face, cv.COLOR_BGR2RGB) # Normalizing input image to [0.0-1.0] croped_face = croped_face / 255.0 faces.append(croped_face) cordinates.append( [int(xmin), int(ymin), int(xmax), int(ymax)]) if 'bbox_head' in obj.keys(): head_bbox = obj['bbox_head'] # [ymin, xmin, ymax, xmax] xmin, xmax = np.multiply([head_bbox[1], head_bbox[3]], width) ymin, ymax = np.multiply([head_bbox[0], head_bbox[2]], height) cordinates_head.append( [int(xmin), int(ymin), int(xmax), int(ymax)]) faces = np.array(faces) if np.shape(faces)[0] == 0: print("can not find face at ".image_path) continue face_mask_results, scores = classifier_model.inference(faces) for i, cor in enumerate(cordinates): if face_mask_results[i] == 1: color = (0, 0, 255) elif face_mask_results[i] == 0: color = (0, 255, 0) else: color = (0, 0, 0) cv.rectangle(raw_img, (cor[0], cor[1]), (cor[2], cor[3]), color, 2) for cor in cordinates_head: cv.rectangle(raw_img, (cor[0], cor[1]), (cor[2], cor[3]), (200, 200, 200), 2) cv.imwrite(os.path.join(output_dir, filename), raw_img) print('image "{}" are processed. {} fps'.format( image_path, detector.fps)) else: continue print('INFO: Finish:) Output images are exported at: ', output_dir)
class FaceMaskAppEngine: """ Perform detector which detects faces from input video, and classifier to classify croped faces to face or mask class :param config: Is a Config instance which provides necessary parameters. """ def __init__(self, config): self.config = config self.detector = None self.classifier_model = None self.running_video = False self.device = self.config.DEVICE if self.device == "x86": from libs.detectors.x86.detector import Detector from libs.classifiers.x86.classifier import Classifier self.detector = Detector(self.config) self.classifier_model = Classifier(self.config) elif self.device == "EdgeTPU": from libs.detectors.edgetpu.detector import Detector from libs.classifiers.edgetpu.classifier import Classifier self.detector = Detector(self.config) self.classifier_model = Classifier(self.config) elif self.device == "Jetson": from libs.detectors.jetson.detector import Detector from libs.classifiers.jetson.classifier import Classifier self.detector = Detector(self.config) self.classifier_model = Classifier(self.config) else: raise ValueError('Not supported device named: ', self.device) self.image_size = (self.config.DETECTOR_INPUT_SIZE[0], self.config.DETECTOR_INPUT_SIZE[1], 3) self.classifier_img_size = (self.config.CLASSIFIER_INPUT_SIZE, self.config.CLASSIFIER_INPUT_SIZE, 3) def set_ui(self, ui): self.ui = ui def __process(self, cv_image): # Resize input image to resolution self.resolution = self.config.APP_VIDEO_RESOLUTION cv_image = cv.resize(cv_image, tuple(self.resolution)) resized_image = cv.resize(cv_image, tuple(self.image_size[:2])) rgb_resized_image = cv.cvtColor(resized_image, cv.COLOR_BGR2RGB) objects_list = self.detector.inference(rgb_resized_image) [w, h] = self.resolution #objects_list = [{'id': '1-0', 'bbox': [.1, .2, .5, .5]}, {'id': '1-1', 'bbox': [.3, .1, .5, .5]}] faces = [] for obj in objects_list: if 'bbox' in obj.keys(): face_bbox = obj['bbox'] # [ymin, xmin, ymax, xmax] xmin, xmax = np.multiply([face_bbox[1], face_bbox[3]], self.resolution[0]) ymin, ymax = np.multiply([face_bbox[0], face_bbox[2]], self.resolution[1]) croped_face = cv_image[int(ymin):int(ymin) + (int(ymax) - int(ymin)), int(xmin):int(xmin) + (int(xmax) - int(xmin))] # Resizing input image croped_face = cv.resize(croped_face, tuple(self.classifier_img_size[:2])) croped_face = cv.cvtColor(croped_face, cv.COLOR_BGR2RGB) # Normalizing input image to [0.0-1.0] croped_face = croped_face / 255.0 faces.append(croped_face) faces = np.array(faces) face_mask_results, scores = self.classifier_model.inference(faces) # TODO: it could be optimized by the returned dictionary from openpifpaf (returining List instead dict) [w, h] = self.resolution idx = 0 for obj in objects_list: if 'bbox' in obj.keys(): obj['face_label'] = face_mask_results[idx] obj['score'] = scores[idx] idx = idx + 1 box = obj["bbox"] x0 = box[1] y0 = box[0] x1 = box[3] y1 = box[2] obj["bbox"] = [x0, y0, x1, y1] return cv_image, objects_list def process_video(self, video_uri): input_cap = cv.VideoCapture(video_uri) if (input_cap.isOpened()): print('opened video ', video_uri) else: print('failed to load video ', video_uri) return self.running_video = True while input_cap.isOpened() and self.running_video: _, cv_image = input_cap.read() if np.shape(cv_image) != (): cv_image, objects = self.__process(cv_image) else: continue self.ui.update(cv_image, objects) input_cap.release() self.running_video = False