def run_model(img_path, model): img_raw = cv2.imread(img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) down_scale_factor = 1.0 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) outputs = model(img[np.newaxis, ...]).numpy() outputs = recover_pad_output(outputs, pad_params) name = img_path.split('/')[-1].split('.')[0] if not os.path.exists('outputs'): os.makedirs('outputs') saved_img_path = OUTPUT + name + '_OUTPUT.png' for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(saved_img_path, img_raw) return saved_img_path
def test_step(inputs, img_name): _, img_height_raw, img_width_raw, _ = inputs.shape # pad input image to avoid unmatched shape problem img = inputs[0].numpy() # if img_name == '6_Funeral_Funeral_6_618': # resize = 0.5 # this image is too big to avoid OOM problem # img = cv2.resize(img, None, None, fx=resize, fy=resize, # interpolation=cv2.INTER_LINEAR) img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) input_img = img[np.newaxis, ...] predictions = model(input_img, training=False) outputs = pred_to_outputs(cfg, predictions, input_img.shape).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) bboxs = outputs[:, :4] confs = outputs[:, -1] pred_boxes = [] for box, conf in zip(bboxs, confs): x = int(box[0] * img_width_raw) y = int(box[1] * img_height_raw) w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw) h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw) pred_boxes.append([x, y, w, h, conf]) pred_boxes = np.array(pred_boxes).astype('float') return pred_boxes
def get_face_value(img_raw, down_scale_factor=0.3): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = '0' logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) # 빠르게 얼굴을 찾기위해 이미지 크기를 줄여서 탐색 if down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=down_scale_factor, fy=down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) #output된 얼굴들에 대한 정보가 들어있는 배열 fvalues = map( lambda output: FV.FaceValue(output, img_width_raw, img_height_raw), outputs) result = list(fvalues) print('get face value', result) return result
def process_single_image(img_path, img_outputpath, model, cfg, data): if not os.path.exists(img_path): print(f"cannot find image path from {img_path}") exit() img_raw = cv2.imread(img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # Run model outputs = model(img[np.newaxis, ...]).numpy() # Recover padding effect outputs = recover_pad_output(outputs, pad_params) landmarks = [] for prior_index in range(len(outputs)): x = draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) landmarks.append(x) cv2.imwrite(img_outputpath, img_raw) return get_json_landmark_data(data, img_outputpath, landmarks)
def __detect_faces(self, frame): img = np.float32(frame.copy()) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=32) # run model outputs = self.model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) return outputs
def main(_argv): cfg = load_yaml(FLAGS.cfg_path) input_size = FLAGS.size physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.framework == 'tf': infer = tf.keras.models.load_model(FLAGS.weights) elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) signature_keys = list(saved_model_loaded.signatures.keys()) print(signature_keys) infer = saved_model_loaded.signatures['serving_default'] logging.info('weights loaded') sum = 0 img_raw = cv2.imread(FLAGS.image) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) batched_input = img[np.newaxis, ...] if FLAGS.framework == 'tf': # pred_bbox = run_model(images_data) outputs = infer(batched_input).numpy() elif FLAGS.framework == 'trt': pred_bbox = infer(batched_input) # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = 'out.jpg' for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}")
def get_face_value(img_raw, down_scale_factor=0.3): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = '0' logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=down_scale_factor, fy=down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) def output_to_fvalue(out, w, h): result = {} left = int(out[0] * w) top = int(out[1] * h) right = int(out[2] * w) bottom = int(out[3] * h) result['bbox'] = [left, top, right, bottom] result['bbox_size'] = (result['bbox'][3] - result['bbox'][1]) * ( result['bbox'][2] - result['bbox'][0]) result['landm'] = {} result['landm']['left_eye'] = (int(out[4] * w), int(out[5] * h)) result['landm']['right_eye'] = (int(out[6] * w), int(out[7] * h)) result['landm']['nose'] = (int(out[8] * w), int(out[9] * h)) result['landm']['mouse_left'] = (int(out[10] * w), int(out[11] * h)) result['landm']['mouse_right'] = (int(out[12] * w), int(out[13] * h)) return result fvalues = map( lambda output: output_to_fvalue(output, img_width_raw, img_height_raw), outputs) return list(fvalues)
def __detect_faces(self, frame): img = np.float32(frame.copy()) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_height, img_width, _ = img.shape if img.shape[1] != 640: img = cv2.resize(img, (int(img_height * 640 / img_width), 640), interpolation=cv2.INTER_CUBIC) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=32) # run model outputs = self.model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) return outputs
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: if not os.path.exists(FLAGS.img_path): print(f"cannot find image path from {FLAGS.img_path}") exit() print("[*] Processing on single image {}".format(FLAGS.img_path)) img_raw = cv2.imread(FLAGS.img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path)) for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}") else: cam = cv2.VideoCapture("./videos/Bentall_Centra.MP4") start_time = time.time() while True: _, frame = cam.read() if frame is None: print("no cam input") frame_height, frame_width, _ = frame.shape img = np.float32(frame.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit()
def main(_argv): mkdir(FLAGS.destination_dir) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) aligner = FaceAligner(desiredFaceSize=128) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() total = 0 processed_total = 0 CLASS_NAMES = np.array(os.listdir(FLAGS.folder_path)) temp = os.listdir(FLAGS.destination_dir) temp.sort() CLASS_NAMES.sort() for f in CLASS_NAMES: processed_image = 0 ###################################### # Need modified for using ###################################### if os.path.isfile(FLAGS.folder_path+f): continue if f in temp and f != temp[-1]: continue items = os.listdir(FLAGS.folder_path+f) mkdir(FLAGS.destination_dir+f) for path in items: frame = cv2.imread(FLAGS.folder_path + f +'/'+ path) if frame is None: continue frame_height, frame_width, _ = frame.shape img = np.float32(frame.copy()) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) if len(outputs) < 1: continue ann = max(outputs, key=lambda x: (x[2]-x[0])*(x[3]-x[1])) b_box = int(ann[0] * frame_width), int(ann[1] * frame_height), \ int(ann[2] * frame_width), int(ann[3] * frame_height) if (b_box[0]<0) or (b_box[1]<0) or (b_box[2]>=frame_width) or (b_box[3]>=frame_height): continue keypoints = { 'left_eye': (ann[4] * frame_width,ann[5] * frame_height), 'right_eye': (ann[6] * frame_width,ann[7] * frame_height), 'nose': (ann[8], ann[9]), 'left_mouth': (ann[10] * frame_width, ann[11] * frame_height), 'right_mouth': (ann[12] * frame_width,ann[13] * frame_height), } # croped_image = frame[b_box[1]:b_box[3],b_box[0]:b_box[2], :] # out_frame = cv2.resize(croped_image, (112,112), interpolation=cv2.INTER_CUBIC) out_frame = aligner.align(frame, keypoints, b_box) # for i in range(4,14): # if i%2 == 0: # ann[i] = int(ann[i]*frame_width) # else: # ann[i] = int(ann[i]*frame_height) # out_frame = norm_crop(frame, np.array([ann[4:6],ann[6:8],ann[8:10],ann[10:12],ann[12:14]])) try: cv2.imwrite(FLAGS.destination_dir + f +'/'+ path, out_frame) processed_image += 1 except FileExistsError as e: pass print(f + " Done") total += len(items) processed_total += processed_image
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load model from weights.h5 # model.load_weights('./model/mbv2_weights.h5', by_name=True, skip_mismatch=True) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: if not os.path.exists(FLAGS.img_path): print(f"cannot find image path from {FLAGS.img_path}") exit() print("[*] Processing on single image {}".format(FLAGS.img_path)) img_raw = cv2.imread(FLAGS.img_path) img = np.float32(img_raw.copy()) # testing scale target_size = 320 img_size_max = np.max(img.shape[0:2]) resize = float(target_size) / float(img_size_max) img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # draw and save results save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path)) for prior_index in range(len(outputs)): draw_bbox_landm(img, outputs[prior_index], target_size, target_size) cv2.imwrite(save_img_path, img) print(f"[*] save result at {save_img_path}") else: cam = cv2.VideoCapture('./data/lichaochao.mp4') # cam = cv2.VideoCapture(0) frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH)) fourcc = cv2.VideoWriter_fourcc(*'XVID') fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter('chaochao1.mp4', fourcc, fps=fps, frameSize=(frame_height, frame_width)) resize = FLAGS.down_scale_factor frame_height *= resize frame_width *= resize max_steps = max(cfg['steps']) img_pad_h = max_steps - frame_height % max_steps if frame_height % max_steps > 0 else 0 img_pad_w = max_steps - frame_width % max_steps if frame_width % max_steps > 0 else 0 priors = prior_box_tf( (frame_height + img_pad_h, frame_width + img_pad_w), cfg['min_sizes'], cfg['steps'], cfg['clip']) frame_index = 0 outputs = [] start_time = time.time() while cam.isOpened(): _, frame = cam.read() if frame is None: print('no cam') break if frame_index < 5: frame_index += 1 # continue else: frame_index = 0 img = np.float32(frame.copy()) if resize < 1: img = cv2.resize(img, (0, 0), fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max_steps) # run model outputs = model(img[np.newaxis, ...]) preds = tf.concat([ outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis], outputs[2][0, :, 1][..., tf.newaxis] ], -1) decode_preds = decode_tf(preds, priors, cfg['variances']) selected_indices = tf.image.non_max_suppression( boxes=decode_preds[:, :4], scores=decode_preds[:, -1], max_output_size=tf.shape(decode_preds)[0], iou_threshold=FLAGS.iou_th, score_threshold=FLAGS.score_th) outputs = tf.gather(decode_preds, selected_indices).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params, resize=resize) # calculate fps # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) # start_time = time.time() # cv2.putText(frame, fps_str, (25, 50), # cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 2) # draw results for prior_index in range(len(outputs)): draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) # start_time = time.time() # cv2.putText(frame, fps_str, (25, 25), # cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame out.write(frame) cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit()
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() # evaluation on testing dataset testset_folder = cfg['testing_dataset_path'] testset_list = os.path.join(testset_folder, 'label.txt') img_paths, _ = load_info(testset_list) for img_index, img_path in enumerate(img_paths): print(" [{} / {}] det {}".format(img_index + 1, len(img_paths), img_path)) img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) # testing scale target_size = 1600 max_size = 2150 img_shape = img.shape img_size_min = np.min(img_shape[0:2]) img_size_max = np.max(img_shape[0:2]) resize = float(target_size) / float(img_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * img_size_max) > max_size: resize = float(max_size) / float(img_size_max) if FLAGS.origin_size: if os.path.basename(img_path) == '6_Funeral_Funeral_6_618.jpg': resize = 0.5 # this image is too big to avoid OOM problem else: resize = 1 img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # write results img_name = os.path.basename(img_path) sub_dir = os.path.basename(os.path.dirname(img_path)) save_name = os.path.join(FLAGS.save_folder, sub_dir, img_name.replace('.jpg', '.txt')) pathlib.Path(os.path.join(FLAGS.save_folder, sub_dir)).mkdir(parents=True, exist_ok=True) with open(save_name, "w") as file: bboxs = outputs[:, :4] confs = outputs[:, -1] file_name = img_name + "\n" bboxs_num = str(len(bboxs)) + "\n" file.write(file_name) file.write(bboxs_num) for box, conf in zip(bboxs, confs): x = int(box[0] * img_width_raw) y = int(box[1] * img_height_raw) w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw) h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw) confidence = str(conf) line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) \ + " " + confidence + " \n" file.write(line) # save images pathlib.Path(os.path.join('./results', cfg['sub_name'], sub_dir)).mkdir(parents=True, exist_ok=True) if FLAGS.save_image: for prior_index in range(len(outputs)): if outputs[prior_index][15] >= FLAGS.vis_th: draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite( os.path.join('./results', cfg['sub_name'], sub_dir, img_name), img_raw)
def main(_argv): # FUNCTIONS FOR CROPPING ##################################################################################################### def bounding_box(img, ann, img_height, img_width): x1, y1, x2, y2 = int(ann[0] * img_width), int(ann[1] * img_height), \ int(ann[2] * img_width), int(ann[3] * img_height) return x1, y1, x2, y2 def calc_points(x, y, side): return int(x - side / 2), int(x + side / 2), int(y - side / 2), int(y + side / 2) def adjust_points(x_center, y_center, original_longest, scaling_factor, min_scaling_factor): factors = np.arange(scaling_factor, min_scaling_factor - 0.04, -0.05) for factor in factors: # calculate nex points x1, x2, y1, y2 = calc_points(x_center, y_center, int(original_longest * factor)) for i in range(FLAGS.max_iter): if x1 < 0: x2 -= x1 x1 = 0 if y1 < 0: y2 -= y1 y1 = 0 if x2 > img_raw.shape[1]: x1 -= x2 x2 = img_raw.shape[1] if y2 > img_raw.shape[0]: y1 -= y2 y2 = img_raw.shape[0] if x1 >= 0 and y1 >= 0 and x2 <= img_raw.shape[ 1] and y2 <= img_raw.shape[0]: return x1, x2, y1, y2, True print("Not cropping", img_path, "due to a problem with a cropping square box") return x1, x2, y1, y2, False def get_dim(lst): return [(lst[3] - lst[1]) * (lst[2] - lst[0])] def get_max(outputs, lst): area = [i[0] for i in lst] prob = [i[1] for i in lst] max_area_index = set([i for i, j in enumerate(area) if j == max(area)]) max_prob_index = set([i for i, j in enumerate(prob) if j == max(prob)]) indecies = list(max_area_index.intersection(max_prob_index)) if len(indecies) >= 1: return [outputs[indecies[0]]] elif len(indecies ) == 0: # if there is a mismatch, return the largest element if len(list(max_area_index)) >= 1: return [outputs[list(max_area_index)[0]]] else: # precautionary because there should always be at least one face print("Not cropping", img_path, "due to a problem with returning the largest element") return [] ##################################################################################################### # MODEL ##################################################################################################### # initialisation os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() ##################################################################################################### # CROPPING ##################################################################################################### # check if the path exits if not os.path.exists(FLAGS.path): print(f"cannot find the specified path from {FLAGS.path}") exit() # make a corresponding directory try: os.mkdir(FLAGS.path.replace("images", "cropped_images")) except FileExistsError: print(FLAGS.path.replace("images", "cropped_images"), "already exists") # eget subdirectories within the specified folder subdirectories = [FLAGS.path+'/'+i for i in os.listdir(FLAGS.path) \ if os.path.isdir(FLAGS.path+'/'+i)] # loop through each folder for subdir in sorted(subdirectories): # create corresponding folders for cropped data and get all images in a given folder if 'original' in subdir: x = 3 else: x = 7 try: os.mkdir(subdir.replace("images", "cropped_images")) images_lst = glob.glob(subdir + "/*.png") cropped_images_lst = [] print(subdir[len(subdir) - x:len(subdir)]) except FileExistsError: # count number of existing images in this subdirectory, if same as original, skip images_lst = glob.glob(subdir + "/*.png") cropped_images_lst = glob.glob( subdir.replace("images", "cropped_images") + "/*.png") cropped_images_lst = [ e[len(e) - 8:len(e)] for e in cropped_images_lst ] if len(images_lst) == len(cropped_images_lst): print(subdir[len(subdir) - x:len(subdir)], "has already been generated") continue else: print(subdir[len(subdir) - x:len(subdir)]) # loop through each image in a given folder for img_path in sorted(images_lst): if img_path[len(img_path) - 8:len(img_path)] in cropped_images_lst: continue img_raw = cv2.imread(img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image (unmatched shape problem), run model, recover padding effect img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) outputs = model(img[np.newaxis, ...]).numpy() outputs = recover_pad_output(outputs, pad_params) # get rid of elements which are faces with less that threshold probability outputs = [i for i in outputs if i[15] >= FLAGS.threshold_prob] # flag any images which have no recognised faces in them if len(outputs) == 0: print("no faces detected for", img_path) # if more than one face detected, select the largest and most definite elif len(outputs) > 1: f = [list(bounding_box(img_raw, i[0:4], img_height_raw, img_width_raw)) + [i[15]] \ for i in outputs] f = [get_dim(i[0:4]) + [i[4]] for i in f] outputs = get_max(outputs, f) # keeping as a loop in case we decide to use multiple faces per frame in the future # get cropping coordinates and save results for prior_index in range(len(outputs)): # get the bounding box coordinates bb_x1, bb_y1, bb_x2, bb_y2 = bounding_box( img_raw, outputs[prior_index], img_height_raw, img_width_raw) # scale up the magnitude of the longest side original_longest = int(max(bb_x2 - bb_x1, bb_y2 - bb_y1)) longest = int(original_longest * FLAGS.scaling_factor) x_center = int((bb_x1 + bb_x2) / 2) y_center = int((bb_y1 + bb_y2) / 2) x1, x2, y1, y2, save_image = adjust_points( x_center, y_center, original_longest, FLAGS.scaling_factor, FLAGS.min_scaling_factor) if save_image: try: save_img_path = os.path.join(subdir.replace("images", "cropped_images") \ + "/" + img_path.replace(subdir + '/', '')) cv2.imwrite(save_img_path, img_raw[y1:y2, x1:x2]) except: print(img_path, "is not cropped for unknown reasons")
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load model from weights.h5 # model.load_weights('./model/mbv2_weights.h5', by_name=True, skip_mismatch=True) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: file_path = '/Users/lichaochao/Downloads/images_UMU/' for file_name in os.listdir(file_path + 'source_images/'): image_path = file_path + 'source_images/' + file_name if not os.path.exists(image_path): print(f"cannot find image path from {image_path}") continue img_raw = cv2.imread(image_path) img = np.float32(img_raw.copy()) # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) img_height, img_width, _ch = img.shape # run model outputs = model(img[np.newaxis, ...]) preds = tf.concat([ outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis], outputs[2][0, :, 1][..., tf.newaxis] ], -1) priors = prior_box_tf((img_height, img_width), cfg['min_sizes'], cfg['steps'], cfg['clip']) decode_preds = decode_tf(preds, priors, cfg['variances']) selected_indices = tf.image.non_max_suppression( boxes=decode_preds[:, :4], scores=decode_preds[:, -1], max_output_size=tf.shape(decode_preds)[0], iou_threshold=FLAGS.iou_th, score_threshold=FLAGS.score_th) outputs = tf.gather(decode_preds, selected_indices).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) has_face = False is_smile = False for prior_index in range(len(outputs)): ann = outputs[prior_index] if ann[-1] >= 0.5: has_face = True x1, y1 = int(ann[0] * img_width), int(ann[1] * img_height) x2, y2 = int(ann[2] * img_width), int(ann[3] * img_height) text = "face: {:.2f}".format(ann[-1] * 100) cv2.putText(img, text, (x1 + 5, y1 - 10), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) if ann[-2] >= 0.5: is_smile = True smile_text = "smile: {:.2f}".format(ann[-2] * 100) cv2.putText(img, smile_text, (x1 + 5, y1 + 30), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2) else: cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) if is_smile: dst_file_path = file_path + '/smile_face/' + file_name elif has_face: dst_file_path = file_path + '/face/' + file_name else: dst_file_path = file_path + '/no_face/' + file_name cv2.imwrite(dst_file_path, img) print(dst_file_path) else: cam = cv2.VideoCapture('./data/linda_umu.mp4') # cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640) # cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) resize = FLAGS.down_scale_factor frame_height = cam.get(cv2.CAP_PROP_FRAME_HEIGHT) * resize frame_width = cam.get(cv2.CAP_PROP_FRAME_WIDTH) * resize max_steps = max(cfg['steps']) img_pad_h = max_steps - frame_height % max_steps if frame_height % max_steps > 0 else 0 img_pad_w = max_steps - frame_width % max_steps if frame_width % max_steps > 0 else 0 priors = prior_box_tf( (frame_height + img_pad_h, frame_width + img_pad_w), cfg['min_sizes'], cfg['steps'], cfg['clip']) frame_index = 0 outputs = [] start_time = time.time() while cam.isOpened(): _, frame = cam.read() if frame is None: print('no cam') break if frame_index < 5: frame_index += 1 # continue else: frame_index = 0 img = np.float32(frame.copy()) if resize < 1: img = cv2.resize(img, (0, 0), fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max_steps) # run model outputs = model(img[np.newaxis, ...]) preds = tf.concat([ outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis], outputs[2][0, :, 1][..., tf.newaxis] ], -1) decode_preds = decode_tf(preds, priors, cfg['variances']) selected_indices = tf.image.non_max_suppression( boxes=decode_preds[:, :4], scores=decode_preds[:, -1], max_output_size=tf.shape(decode_preds)[0], iou_threshold=FLAGS.iou_th, score_threshold=FLAGS.score_th) outputs = tf.gather(decode_preds, selected_indices).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params, resize=resize) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 50), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 2) # draw results for prior_index in range(len(outputs)): draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) # start_time = time.time() # cv2.putText(frame, fps_str, (25, 25), # cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit()
def main(_argv): mkdir(FLAGS.destination_dir) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) aligner = FaceAligner() # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() with open(FLAGS.destination_dir+'log.txt', 'a+') as log_txt: total = 0 processed_total = 0 CLASS_NAMES = np.array(os.listdir(FLAGS.folder_path)) CLASS_NAMES.sort() for f in CLASS_NAMES[1978:]: processed_image = 0 images = [] labels = [] ###################################### # Need modified for using ###################################### items = os.listdir(FLAGS.folder_path+f) for path in items: frame = cv2.imread(FLAGS.folder_path + f +'/'+ path) if frame is None: continue frame_height, frame_width, _ = frame.shape img = np.float32(frame.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) if len(outputs) < 1: continue ann = max(outputs, key=lambda x: (x[2]-x[0])*(x[3]-x[1])) b_box = int(ann[0] * frame_width), int(ann[1] * frame_height), \ int(ann[2] * frame_width), int(ann[3] * frame_height) if (b_box[0]<0) or (b_box[1]<0) or (b_box[2]>=frame_width) or (b_box[3]>=frame_height): continue keypoints = { 'left_eye': (int(ann[4] * frame_width),int(ann[5] * frame_height)), 'right_eye': (int(ann[6] * frame_width),int(ann[7] * frame_height)), } # print(keypoints) out_frame = aligner.align(frame, keypoints, b_box) # cv2.imshow('original', frame) # cv2.imshow('aligned', out_frame) # if cv2.waitKey(0) & 0xFF == ord('q'): # continue try: images.append(out_frame.reshape(1,112,112,3)) labels.append((CLASS_NAMES == f).reshape(1,-1)) log_txt.write(FLAGS.destination_dir + f +'/'+ path+"\n") processed_image += 1 except FileExistsError as e: pass images_np = np.concatenate((tuple(images)), axis=0) labels_np = np.concatenate((tuple(labels)), axis=0) np.savez_compressed(FLAGS.destination_dir+"casia_image_{}.npz".format(f), images_np) np.savez_compressed(FLAGS.destination_dir+"casia_label_{}.npz".format(f), labels_np) print(f + " Done") log_txt.write(f + " Processed: " + str(processed_image) + ' / ' + str(len(items)) +"\n") total += len(items) processed_total += processed_image log_txt.write( "Processed total: " + str(processed_total) + ' / ' + str(total))
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) #print("[*] load ckpt from {}.".format(tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not os.path.exists(FLAGS.img_path): print(f"cannot find image path from {FLAGS.img_path}") exit() print("[*] Processing on single image {}".format(FLAGS.img_path)) img_raw = cv2.imread(FLAGS.img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results imgs = [] DIM = 64 save_img_path = os.path.join('data/out_' + os.path.basename(FLAGS.img_path)) for prior_index in range(9): if (prior_index < len(outputs)): img = get_bbox_imgs(img_raw, outputs[prior_index], img_height_raw, img_width_raw) img = cv2.resize(img, (DIM, DIM)) imgs.append(img) else: imgs.append(Image.new('RGB', (DIM, DIM))) imga = imgs[0] for img in imgs[1:3]: imga = np.concatenate((imga, img), axis=1) imgb = imgs[3] for img in imgs[4:6]: imgb = np.concatenate((imgb, img), axis=1) imgf = np.concatenate((imga, imgb), axis=0) imgc = imgs[6] for img in imgs[7:9]: imgc = np.concatenate((imgc, img), axis=1) imgf = np.concatenate((imgf, imgc), axis=0) cv2.imwrite(save_img_path, imgf) print(f"[*] save result at {save_img_path}")
def main(_argv): # init CONFIG_PATH = './configs/retinaface_mbv2.yaml' GPU = '0' IOU_TH = 0.4 SCORE_TH = 0.5 WEBCAM = False DOWN_FACTOR = 1.0 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = GPU logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(CONFIG_PATH) # define network model = RetinaFaceModel(cfg, training=False, iou_th=IOU_TH, score_th=SCORE_TH) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() return model if not WEBCAM: # if not os.path.exists(IMG_PATH): # print(f"cannot find image path from {IMG_PATH}") # exit() # print("[*] Processing on single image {}".format(IMG_PATH)) # img_raw = cv2.imread(IMG_PATH) img_raw = input_image img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if DOWN_FACTOR < 1.0: img = cv2.resize(img, (0, 0), fx=DOWN_FACTOR, fy=DOWN_FACTOR, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = "result.jpg" for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}") else: cam = cv2.VideoCapture(0) start_time = time.time() while True: _, frame = cam.read() if frame is None: print("no cam input") frame_height, frame_width, _ = frame.shape img = np.float32(frame.copy()) if DOWN_FACTOR < 1.0: img = cv2.resize(img, (0, 0), fx=DOWN_FACTOR, fy=DOWN_FACTOR, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit() return outputs
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: if not os.path.exists(FLAGS.img_path): print(f"cannot find image path from {FLAGS.img_path}") exit() print("[*] Processing on single image {}".format(FLAGS.img_path)) img_raw = cv2.imread(FLAGS.img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path)) for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}") else: cam = cv2.VideoCapture(FLAGS.vid_path) fps = int(cam.get(cv2.CAP_PROP_FPS)) ### Saving Video to file frame_width = int(cam.get(3)) frame_height = int(cam.get(4)) # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file. # import os if not os.path.exists('./output'): print('Creating folder: output/ for saving video.') os.makedirs('./output') out = cv2.VideoWriter('./output/output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame_width, frame_height)) start_time = time.time() counter = 0 frameCount = 0 while True: _, frame = cam.read() if frame is None: print("no cam input") frameCount = frameCount + 1 frame_height, frame_width, _ = frame.shape img = np.float32(frame.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): croppedFace = draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) if frameCount >= fps * FLAGS.dfps: fileName = "%d.png" % counter cv2.imwrite(FLAGS.dst_path + fileName, croppedFace) print('Saved:', fileName) counter = counter + 1 frameCount = 0 # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame if FLAGS.preview: cv2.imshow('frame', frame) out.write(frame) if cv2.waitKey(1) == ord('q'): exit()
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) aligner = FaceAligner() # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if FLAGS.input_stream == '0': input_stream = 0 elif FLAGS.input_stream == 'rtsp': input_stream = settings.RTSP_ADDR else: input_stream = FLAGS.input_stream cam = cv2.VideoCapture(input_stream) #("/home/hao/Videos/Webcam/3.webm") mbv2 = tf.keras.models.load_model(settings.CHECKPOINT_PATH) anchor_dataset = np.load(settings.ANCHOR_PATH)['arr_0'] label_dataset = np.load(settings.LABEL_PATH)['arr_0'] start_time = time.time() i = 0 while cam.isOpened(): _, frame = cam.read() if frame is None: print("no cam input") frame_height, frame_width, _ = frame.shape img = np.float32(frame.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): ann = outputs[prior_index] b_box = int(ann[0] * frame_width), int(ann[1] * frame_height), \ int(ann[2] * frame_width), int(ann[3] * frame_height) if (b_box[0] < 0) or (b_box[1] < 0) or ( b_box[2] >= frame_width) or (b_box[3] >= frame_height): continue keypoints = { 'left_eye': (ann[4] * frame_width, ann[5] * frame_height), 'right_eye': (ann[6] * frame_width, ann[7] * frame_height), 'nose': (ann[8], ann[9]), 'left_mouth': (ann[10] * frame_width, ann[11] * frame_height), 'right_mouth': (ann[12] * frame_width, ann[13] * frame_height), } out_frame = aligner.align(frame, keypoints, b_box) scaled = out_frame #cv2.resize(out_frame, (settings.IMAGE_SIZE, settings.IMAGE_SIZE), interpolation=cv2.INTER_CUBIC) scaled_reshape = scaled.reshape(-1, settings.IMAGE_SIZE, settings.IMAGE_SIZE, 3) embed_vector = mbv2(scaled_reshape / 255.0) label, prob = classify(embed_vector, anchor_dataset, label_dataset) if prob < 0.5: label = "Unknown" cv2.rectangle(frame, (b_box[0], b_box[1]), (b_box[2], b_box[3]), (0, 255, 0), 2) cv2.putText(frame, label, (b_box[0], b_box[1]), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) text = "{:.4f}".format(prob) cv2.putText(frame, text, (b_box[0], b_box[1] + 15), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) i += 1 # show frame # cv2.imwrite('UNKNOWN/4/'+str(i)+'.jpeg', frame) cv2.imshow("frame", frame) if cv2.waitKey(1) == ord('q'): exit()
def main(_argv): # init face_aligner = FaceAligner() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: save_count = 0 for path, subdirs, files in os.walk(FLAGS.img_path): for name in files: if name.endswith('.jpg'): img_path = os.path.join(path, name) if not os.path.exists(img_path): print(f"cannot find image path from {img_path}") exit() if save_count < FLAGS.img_num: print("[*] Processing on single image {}".format( img_path)) img_raw = cv2.imread(img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max( cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = os.path.join( 'out_' + os.path.basename(img_path)) for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}") save_count += 1 else: cam = cv2.VideoCapture(0) start_time = time.time() while True: _, frame = cam.read() if frame is None: print("no cam input") frame_height, frame_width, _ = frame.shape orig_frame = frame.copy() face = None img = cv2.resize(frame, (512, 512)) img = np.float32(frame.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model start_time = time.time() outputs = model(img[np.newaxis, ...]).numpy() inference_time = f"Inf: {time.time() - start_time}" cv2.putText(frame, inference_time, (25, 50), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): preds = decode_predictions((frame_width, frame_height), outputs) for key, value in preds.items(): bbox = value[0]['bbox'] left_eye = value[0]['left_eye'] right_eye = value[0]['right_eye'] # Our face ROI face = orig_frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] # Eyes x1_le = left_eye[0] - 25 y1_le = left_eye[1] - 25 x2_le = left_eye[0] + 25 y2_le = left_eye[1] + 25 x1_re = right_eye[0] - 25 y1_re = right_eye[1] - 25 x2_re = right_eye[0] + 25 y2_re = right_eye[1] + 25 if left_eye[1] > right_eye[1]: A = (right_eye[0], left_eye[1]) else: A = (left_eye[0], right_eye[1]) # Calc our rotating degree delta_x = right_eye[0] - left_eye[0] delta_y = right_eye[1] - left_eye[1] angle = np.arctan( delta_y / (delta_x + 1e-17)) # avoid devision by zero angle = (angle * 180) / np.pi # compute the desired right eye x-coordinate based on the # desired x-coordinate of the left eye desiredRightEyeX = 1.0 - 0.35 # determine the scale of the new resulting image by taking # the ratio of the distance between eyes in the *current* # image to the ratio of distance between eyes in the # *desired* image dist = np.sqrt((delta_x**2) + (delta_y**2)) desiredDist = (desiredRightEyeX - 0.35) desiredDist *= 256 scale = desiredDist / dist eyesCenter = ((left_eye[0] + right_eye[0]) // 2, (left_eye[1] + right_eye[1]) // 2) cv2.circle(frame, A, 5, (255, 0, 0), -1) cv2.putText(frame, str(int(angle)), (x1_le - 15, y1_le), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) cv2.line(frame, right_eye, left_eye, (0, 200, 200), 3) cv2.line(frame, left_eye, A, (0, 200, 200), 3) cv2.line(frame, right_eye, A, (0, 200, 200), 3) cv2.line(frame, (left_eye[0], left_eye[1]), (right_eye[0], right_eye[1]), (0, 200, 200), 3) rotated = face_aligner.align(orig_frame, left_eye, right_eye) draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if face is not None: cv2.imshow('face aligned', rotated) if cv2.waitKey(1) == ord('q'): exit()