def run_model(img_path, model): img_raw = cv2.imread(img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) down_scale_factor = 1.0 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) outputs = model(img[np.newaxis, ...]).numpy() outputs = recover_pad_output(outputs, pad_params) name = img_path.split('/')[-1].split('.')[0] if not os.path.exists('outputs'): os.makedirs('outputs') saved_img_path = OUTPUT + name + '_OUTPUT.png' for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(saved_img_path, img_raw) return saved_img_path
def main( image_path, config_path, export_path, ckpt_path, score_thres, iou_thres, detection_width, result_save_path, ): config = load_yaml(config_path) if not Path(export_path).joinpath("saved_model.pb").exists() and ckpt_path is not None: export_to_saved_model(ckpt_path, export_path, config) elif not Path(export_path).joinpath("saved_model.pb").exists() and ckpt_path is None: raise ValueError(f"Must provide a checkpoint to export model.") loaded_model = tf.saved_model.load(export_path) print("model_loaded") img_raw = cv2.imread(image_path) img_rgb = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB) img_height_raw, img_width_raw, _ = img_rgb.shape outputs = _run_detection(loaded_model, img_rgb, score_thres, iou_thres, detection_width) # draw and save results result_save_path = Path(result_save_path) result_save_path.mkdir(exist_ok=True, parents=True) save_img_path = result_save_path.joinpath("result_" + Path(image_path).name) for prior_index in range(len(outputs)): draw_bbox_landm( img_raw, outputs[prior_index], img_height_raw, img_width_raw, draw_score=True, draw_lm=True ) cv2.imwrite(str(save_img_path), img_raw) print(f"Results saved at {save_img_path}")
def main(_argv): cfg = load_yaml(FLAGS.cfg_path) input_size = FLAGS.size physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.framework == 'tf': infer = tf.keras.models.load_model(FLAGS.weights) elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) signature_keys = list(saved_model_loaded.signatures.keys()) print(signature_keys) infer = saved_model_loaded.signatures['serving_default'] logging.info('weights loaded') sum = 0 img_raw = cv2.imread(FLAGS.image) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) batched_input = img[np.newaxis, ...] if FLAGS.framework == 'tf': # pred_bbox = run_model(images_data) outputs = infer(batched_input).numpy() elif FLAGS.framework == 'trt': pred_bbox = infer(batched_input) # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = 'out.jpg' for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}")
def process_single_image(img_path, img_outputpath, model, cfg, data): if not os.path.exists(img_path): print(f"cannot find image path from {img_path}") exit() img_raw = cv2.imread(img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # Run model outputs = model(img[np.newaxis, ...]).numpy() # Recover padding effect outputs = recover_pad_output(outputs, pad_params) landmarks = [] for prior_index in range(len(outputs)): x = draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) landmarks.append(x) cv2.imwrite(img_outputpath, img_raw) return get_json_landmark_data(data, img_outputpath, landmarks)
def main(_argv): # init os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = "./checkpoints/" + cfg["sub_name"] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() # evaluation on testing dataset testset_folder = cfg["testing_dataset_path"] testset_list = os.path.join(testset_folder, "label.txt") img_paths, _ = load_info(testset_list) for img_index, img_path in enumerate(img_paths): print(" [{} / {}] det {}".format(img_index + 1, len(img_paths), img_path)) img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) # testing scale target_size = 1600 max_size = 2150 img_shape = img.shape img_size_min = np.min(img_shape[0:2]) img_size_max = np.max(img_shape[0:2]) resize = float(target_size) / float(img_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * img_size_max) > max_size: resize = float(max_size) / float(img_size_max) if FLAGS.origin_size: if os.path.basename(img_path) == "6_Funeral_Funeral_6_618.jpg": resize = 0.5 # this image is too big to avoid OOM problem else: resize = 1 img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image_(img, max_steps=max(cfg["steps"])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # write results img_name = os.path.basename(img_path) sub_dir = os.path.basename(os.path.dirname(img_path)) save_name = os.path.join(FLAGS.save_folder, sub_dir, img_name.replace(".jpg", ".txt")) pathlib.Path(os.path.join(FLAGS.save_folder, sub_dir)).mkdir(parents=True, exist_ok=True) with open(save_name, "w") as file: bboxs = outputs[:, :4] confs = outputs[:, -1] file_name = img_name + "\n" bboxs_num = str(len(bboxs)) + "\n" file.write(file_name) file.write(bboxs_num) for box, conf in zip(bboxs, confs): x = int(box[0] * img_width_raw) y = int(box[1] * img_height_raw) w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw) h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw) confidence = str(conf) line = str(x) + " " + str(y) + " " + str(w) + " " + str( h) + " " + confidence + " \n" file.write(line) # save images pathlib.Path(os.path.join("./results", cfg["sub_name"], sub_dir)).mkdir(parents=True, exist_ok=True) if FLAGS.save_image: for prior_index in range(len(outputs)): if outputs[prior_index][15] >= FLAGS.vis_th: draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite( os.path.join("./results", cfg["sub_name"], sub_dir, img_name), img_raw)
def capture_face_img(): gauid = cv2.imread('g.png') capture = cv2.VideoCapture(0) # 카메라 불러오기 capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640) capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) start_time = time.time() timer = time.time() timer_flag = 0 name_count = 0 while True: ret, frame = capture.read() # 카메리의 입력을 읽어와서 frame = cv2.flip(frame, 1) ##출력영상 좌우반전 # cv2.imshow("Frame", frame) # 화면에 출력합니다 key = cv2.waitKey(33) outputs = get_fv.get_face_value(frame) print(outputs) frame_height, frame_width, _ = frame.shape gauid = cv2.resize(gauid, dsize=(frame_width, frame_height), interpolation=cv2.INTER_AREA) # landmark save Nfaces = len(outputs) lms = np.zeros((Nfaces, 10)) lms[:, 0:5] = outputs[:, [4, 6, 8, 10, 12]] * frame_width lms[:, 5:10] = outputs[:, [5, 7, 9, 11, 13]] * frame_height #print(lms) if len(outputs) == 0: cv2.putText(frame, "No Person in Frame", (200, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 1) # 1명이상 탐지됨 elif len(outputs) >= 1: box_arr = [] for prior_index in range(len(outputs)): x1, y1, x2, y2 = int(outputs[prior_index][0] * frame_width), int(outputs[prior_index][1] * frame_height), \ int(outputs[prior_index][2] * frame_width), int(outputs[prior_index][3] * frame_height) box_arr.append((x2 - x1) * (y2 - y1)) max_size = max(box_arr) max_idx = box_arr.index(max_size) max_x1, max_y1, max_x2, max_y2 = int(outputs[max_idx][0] * frame_width), int(outputs[max_idx][1] * frame_height), \ int(outputs[max_idx][2] * frame_width), int(outputs[max_idx][3] * frame_height) #얼굴박스의 중심값 좌표 center_x = int(outputs[max_idx][0] * frame_width) + ( (int(outputs[max_idx][2] * frame_width) - int(outputs[max_idx][0] * frame_width)) / 2) center_y = int(outputs[max_idx][1] * frame_height) + ( (int(outputs[max_idx][3] * frame_height) - int(outputs[max_idx][1] * frame_height)) / 2) is_center_x = center_x < (int(frame_width / 2) + 40) and center_x > ( int(frame_width / 2) - 40) is_center_y = center_y < (int(frame_height / 2) + 20) and center_y > ( int(frame_height / 2) - 60) cv2.circle(frame, (int(frame_width / 2), int(frame_height / 2)), 1, (255, 255, 0), 1) #좌표 확인 cv2.putText( frame, "bbox_center" + "(" + str(center_x) + "," + str(center_y) + ")", (400, 40), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 0), 1) cv2.putText( frame, "bbox_center" + "(" + str(is_center_x) + "," + str(is_center_y) + ")", (400, 60), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 0), 1) #얼굴크기 및 가이드영역(프레임 가운데)으로 얼굴 맞추기 if max_size >= 150 * 150 and is_center_x and is_center_y: draw_bbox_landm(frame, outputs[box_arr.index(max_size)], frame_height, frame_width) cv2.putText(frame, "(" + str(x2 - x1) + "," + str(y2 - y1) + ")", (25, 140), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 0), 1) # 3축 확인 필터 if (find_roll(lms[max_idx]) >= -10 and \ find_roll(lms[max_idx]) <= 10) and \ (find_yaw(lms[max_idx]) >= -15 and \ find_yaw(lms[max_idx]) <= 15) and \ (find_pitch(lms[max_idx]) >= -2 and \ find_pitch(lms[max_idx]) <= 2): cv2.putText(frame, "Angle Filter Pass", (200, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 1) #타이머 작동 if time.time() - timer < 0: pass elif time.time() - timer < 3: cv2.putText(frame, str(3 - int(time.time() - timer)) + 'sec ', (500, 30), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 0, 0), 1) if (time.time() - timer >= 3): # 얼굴 검출 상태에서 3초간 대기 cv2.imwrite('{}.jpg'.format(name_count), frame) # 캡쳐 name_count += 1 timer = time.time() timer_flag = 1 else: cv2.putText(frame, "Angle Filter NO Pass", (200, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 1) timer = time.time() elif max_size < 150 * 150 and is_center_x and is_center_y: cv2.putText(frame, "Please come closer", (200, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 1) timer = time.time() else: cv2.putText(frame, "Please come to the center", (200, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 1) timer = time.time() # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 1) cv2.imshow('frame', frame & gauid) # if key == 99: # c 키 의 ascii 코드가 99 # img_path = 'captured/{}.png'.format(uuid.uuid4()) # start = time.time() # f_value = get_fv.get_face_value(frame) # print('얼굴사진 추출', time.time()-start) # print(f_value) # if(f_value['success']): # print(f_value['face_value']) # # x1, y1, x2, y2 = f_value['face_value'][:4] # # start = time.time() # # response = requests.post(myurl, data=pickle.dumps({'label': myname, 'img': frame[y1:y2, x1:x2]}), headers=headers) # # print('face img 전송 시간', time.time()-start) # # print(response) # # img_rgb[top:bottom, left: right] # # alignment = f_cap.align_face(frame) # # print(alignment['success']) # # if (alignment['success']): # # # cv2.imwrite(img_path, alignment['faceImg']) # # _, img_encoded = cv2.imencode('.jpg', alignment['faceImg']) # # print('face img 전송 시작', datetime.datetime.now()) # # # files = {'file': open(img_path, 'rb')} # # response = requests.post(myurl, data=pickle.dumps({'label': myname, 'img': img_encoded.tostring()}), headers=headers) # # print(response) # else: # continue if key == 27: capture.release() cv2.destroyAllWindows() break return frame
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load model from weights.h5 # model.load_weights('./model/mbv2_weights.h5', by_name=True, skip_mismatch=True) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: file_path = '/Users/lichaochao/Downloads/images_UMU/' for file_name in os.listdir(file_path + 'source_images/'): image_path = file_path + 'source_images/' + file_name if not os.path.exists(image_path): print(f"cannot find image path from {image_path}") continue img_raw = cv2.imread(image_path) img = np.float32(img_raw.copy()) # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) img_height, img_width, _ch = img.shape # run model outputs = model(img[np.newaxis, ...]) preds = tf.concat([ outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis], outputs[2][0, :, 1][..., tf.newaxis] ], -1) priors = prior_box_tf((img_height, img_width), cfg['min_sizes'], cfg['steps'], cfg['clip']) decode_preds = decode_tf(preds, priors, cfg['variances']) selected_indices = tf.image.non_max_suppression( boxes=decode_preds[:, :4], scores=decode_preds[:, -1], max_output_size=tf.shape(decode_preds)[0], iou_threshold=FLAGS.iou_th, score_threshold=FLAGS.score_th) outputs = tf.gather(decode_preds, selected_indices).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) has_face = False is_smile = False for prior_index in range(len(outputs)): ann = outputs[prior_index] if ann[-1] >= 0.5: has_face = True x1, y1 = int(ann[0] * img_width), int(ann[1] * img_height) x2, y2 = int(ann[2] * img_width), int(ann[3] * img_height) text = "face: {:.2f}".format(ann[-1] * 100) cv2.putText(img, text, (x1 + 5, y1 - 10), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) if ann[-2] >= 0.5: is_smile = True smile_text = "smile: {:.2f}".format(ann[-2] * 100) cv2.putText(img, smile_text, (x1 + 5, y1 + 30), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2) else: cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) if is_smile: dst_file_path = file_path + '/smile_face/' + file_name elif has_face: dst_file_path = file_path + '/face/' + file_name else: dst_file_path = file_path + '/no_face/' + file_name cv2.imwrite(dst_file_path, img) print(dst_file_path) else: cam = cv2.VideoCapture('./data/linda_umu.mp4') # cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640) # cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) resize = FLAGS.down_scale_factor frame_height = cam.get(cv2.CAP_PROP_FRAME_HEIGHT) * resize frame_width = cam.get(cv2.CAP_PROP_FRAME_WIDTH) * resize max_steps = max(cfg['steps']) img_pad_h = max_steps - frame_height % max_steps if frame_height % max_steps > 0 else 0 img_pad_w = max_steps - frame_width % max_steps if frame_width % max_steps > 0 else 0 priors = prior_box_tf( (frame_height + img_pad_h, frame_width + img_pad_w), cfg['min_sizes'], cfg['steps'], cfg['clip']) frame_index = 0 outputs = [] start_time = time.time() while cam.isOpened(): _, frame = cam.read() if frame is None: print('no cam') break if frame_index < 5: frame_index += 1 # continue else: frame_index = 0 img = np.float32(frame.copy()) if resize < 1: img = cv2.resize(img, (0, 0), fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max_steps) # run model outputs = model(img[np.newaxis, ...]) preds = tf.concat([ outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis], outputs[2][0, :, 1][..., tf.newaxis] ], -1) decode_preds = decode_tf(preds, priors, cfg['variances']) selected_indices = tf.image.non_max_suppression( boxes=decode_preds[:, :4], scores=decode_preds[:, -1], max_output_size=tf.shape(decode_preds)[0], iou_threshold=FLAGS.iou_th, score_threshold=FLAGS.score_th) outputs = tf.gather(decode_preds, selected_indices).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params, resize=resize) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 50), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 2) # draw results for prior_index in range(len(outputs)): draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) # start_time = time.time() # cv2.putText(frame, fps_str, (25, 25), # cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit()
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: if not os.path.exists(FLAGS.img_path): print(f"cannot find image path from {FLAGS.img_path}") exit() print("[*] Processing on single image {}".format(FLAGS.img_path)) img_raw = cv2.imread(FLAGS.img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path)) for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}") else: cam = cv2.VideoCapture("./videos/Bentall_Centra.MP4") start_time = time.time() while True: _, frame = cam.read() if frame is None: print("no cam input") frame_height, frame_width, _ = frame.shape img = np.float32(frame.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit()
using_flip=True, using_distort=False, using_encoding=using_encoding, priors=priors, match_thresh=match_thresh, ignore_thresh=ignore_thresh, variances=variances, shuffle=False) start_time = time.time() for idx, (inputs, labels) in enumerate(train_dataset.take(num_samples)): print("{} inputs:".format(idx), inputs.shape, "labels:", labels.shape) if not visualization: continue img = np.clip(inputs.numpy()[0], 0, 255).astype(np.uint8) if not using_encoding: # labels includes loc, landm, landm_valid. targets = labels.numpy()[0] for target in targets: draw_bbox_landm(img, target, img_dim, img_dim) else: # labels includes loc, landm, landm_valid, conf. targets = decode_tf(labels[0], priors, variances=variances).numpy() for prior_index in range(len(targets)): if targets[prior_index][-1] == 1: draw_bbox_landm(img, targets[prior_index], img_dim, img_dim) draw_anchor(img, priors[prior_index], img_dim, img_dim) cv2.imshow('img', cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) if cv2.waitKey(0) == ord('q'): exit() print("data fps: {:.2f}".format(num_samples / (time.time() - start_time)))
def main(_): min_sizes = [[16, 32], [64, 128], [256, 512]] steps = [8, 16, 32] clip = False img_dim = 640 priors = prior_box((img_dim, img_dim), min_sizes, steps, clip) variances = [0.1, 0.2] match_thresh = 0.45 ignore_thresh = 0.3 batch_size = 1 shuffle = True using_flip = True using_distort = True using_bin = True buffer_size = 4000 number_cycles = 2 threads = 2 check_dataset = load_tfrecord_dataset(dataset_root=FLAGS.dataset_path, split=FLAGS.split, threads=threads, number_cycles=number_cycles, batch_size=batch_size, hvd=[], img_dim=img_dim, using_bin=using_bin, using_flip=using_flip, using_distort=using_distort, using_encoding=FLAGS.using_encoding, priors=priors, match_thresh=match_thresh, ignore_thresh=ignore_thresh, variances=variances, shuffle=shuffle, buffer_size=buffer_size) time.time() for idx, (inputs, labels, _) in enumerate(check_dataset): print("{} inputs:".format(idx), inputs.shape, "labels:", labels.shape) if not FLAGS.visualization: continue img = np.clip(inputs.numpy()[0], 0, 255).astype(np.uint8) if not FLAGS.using_encoding: # labels includes loc, landm, landm_valid. targets = labels.numpy()[0] for target in targets: draw_bbox_landm(img, target, img_dim, img_dim) else: # labels includes loc, landm, landm_valid, conf. targets = decode_tf(labels[0], priors, variances=variances).numpy() for prior_index in range(len(targets)): if targets[prior_index][-1] != 1: continue draw_bbox_landm(img, targets[prior_index], img_dim, img_dim) draw_anchor(img, priors[prior_index], img_dim, img_dim) cv2.imwrite('{}/{}.png'.format(FLAGS.output_path, str(idx)), img[:, :, ::-1])
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load model from weights.h5 # model.load_weights('./model/mbv2_weights.h5', by_name=True, skip_mismatch=True) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: if not os.path.exists(FLAGS.img_path): print(f"cannot find image path from {FLAGS.img_path}") exit() print("[*] Processing on single image {}".format(FLAGS.img_path)) img_raw = cv2.imread(FLAGS.img_path) img = np.float32(img_raw.copy()) # testing scale target_size = 320 img_size_max = np.max(img.shape[0:2]) resize = float(target_size) / float(img_size_max) img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # draw and save results save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path)) for prior_index in range(len(outputs)): draw_bbox_landm(img, outputs[prior_index], target_size, target_size) cv2.imwrite(save_img_path, img) print(f"[*] save result at {save_img_path}") else: cam = cv2.VideoCapture('./data/lichaochao.mp4') # cam = cv2.VideoCapture(0) frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH)) fourcc = cv2.VideoWriter_fourcc(*'XVID') fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter('chaochao1.mp4', fourcc, fps=fps, frameSize=(frame_height, frame_width)) resize = FLAGS.down_scale_factor frame_height *= resize frame_width *= resize max_steps = max(cfg['steps']) img_pad_h = max_steps - frame_height % max_steps if frame_height % max_steps > 0 else 0 img_pad_w = max_steps - frame_width % max_steps if frame_width % max_steps > 0 else 0 priors = prior_box_tf( (frame_height + img_pad_h, frame_width + img_pad_w), cfg['min_sizes'], cfg['steps'], cfg['clip']) frame_index = 0 outputs = [] start_time = time.time() while cam.isOpened(): _, frame = cam.read() if frame is None: print('no cam') break if frame_index < 5: frame_index += 1 # continue else: frame_index = 0 img = np.float32(frame.copy()) if resize < 1: img = cv2.resize(img, (0, 0), fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max_steps) # run model outputs = model(img[np.newaxis, ...]) preds = tf.concat([ outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis], outputs[2][0, :, 1][..., tf.newaxis] ], -1) decode_preds = decode_tf(preds, priors, cfg['variances']) selected_indices = tf.image.non_max_suppression( boxes=decode_preds[:, :4], scores=decode_preds[:, -1], max_output_size=tf.shape(decode_preds)[0], iou_threshold=FLAGS.iou_th, score_threshold=FLAGS.score_th) outputs = tf.gather(decode_preds, selected_indices).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params, resize=resize) # calculate fps # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) # start_time = time.time() # cv2.putText(frame, fps_str, (25, 50), # cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 2) # draw results for prior_index in range(len(outputs)): draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) # start_time = time.time() # cv2.putText(frame, fps_str, (25, 25), # cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame out.write(frame) cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit()
def main(_): min_sizes = [[16, 32], [64, 128], [256, 512]] steps = [8, 16, 32] clip = False img_dim = 640 priors = prior_box((img_dim, img_dim), min_sizes, steps, clip) variances = [0.1, 0.2] match_thresh = 0.45 ignore_thresh = 0.3 num_samples = 100 if FLAGS.using_encoding: assert FLAGS.batch_size == 1 if FLAGS.using_bin: tfrecord_name = './data/widerface_train_bin.tfrecord' else: tfrecord_name = './data/widerface_train.tfrecord' train_dataset = load_tfrecord_dataset(tfrecord_name, FLAGS.batch_size, img_dim=640, using_bin=FLAGS.using_bin, using_flip=True, using_distort=False, using_encoding=FLAGS.using_encoding, priors=priors, match_thresh=match_thresh, ignore_thresh=ignore_thresh, variances=variances, shuffle=False) start_time = time.time() for idx, (inputs, labels) in enumerate(train_dataset.take(num_samples)): print("{} inputs:".format(idx), inputs.shape, "labels:", labels.shape) if not FLAGS.visualization: continue img = np.clip(inputs.numpy()[0], 0, 255).astype(np.uint8) if not FLAGS.using_encoding: # labels includes loc, landm, landm_valid. targets = labels.numpy()[0] for target in targets: draw_bbox_landm(img, target, img_dim, img_dim) else: # labels includes loc, landm, landm_valid, conf. targets = decode_tf(labels[0], priors, variances=variances).numpy() for prior_index in range(len(targets)): if targets[prior_index][-1] != 1: continue draw_bbox_landm(img, targets[prior_index], img_dim, img_dim) draw_anchor(img, priors[prior_index], img_dim, img_dim) cv2.imshow('img', cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) if cv2.waitKey(0) == ord('q'): exit() print("data fps: {:.2f}".format(num_samples / (time.time() - start_time)))
def main(_argv): # init face_aligner = FaceAligner() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: save_count = 0 for path, subdirs, files in os.walk(FLAGS.img_path): for name in files: if name.endswith('.jpg'): img_path = os.path.join(path, name) if not os.path.exists(img_path): print(f"cannot find image path from {img_path}") exit() if save_count < FLAGS.img_num: print("[*] Processing on single image {}".format( img_path)) img_raw = cv2.imread(img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max( cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = os.path.join( 'out_' + os.path.basename(img_path)) for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}") save_count += 1 else: cam = cv2.VideoCapture(0) start_time = time.time() while True: _, frame = cam.read() if frame is None: print("no cam input") frame_height, frame_width, _ = frame.shape orig_frame = frame.copy() face = None img = cv2.resize(frame, (512, 512)) img = np.float32(frame.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model start_time = time.time() outputs = model(img[np.newaxis, ...]).numpy() inference_time = f"Inf: {time.time() - start_time}" cv2.putText(frame, inference_time, (25, 50), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): preds = decode_predictions((frame_width, frame_height), outputs) for key, value in preds.items(): bbox = value[0]['bbox'] left_eye = value[0]['left_eye'] right_eye = value[0]['right_eye'] # Our face ROI face = orig_frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] # Eyes x1_le = left_eye[0] - 25 y1_le = left_eye[1] - 25 x2_le = left_eye[0] + 25 y2_le = left_eye[1] + 25 x1_re = right_eye[0] - 25 y1_re = right_eye[1] - 25 x2_re = right_eye[0] + 25 y2_re = right_eye[1] + 25 if left_eye[1] > right_eye[1]: A = (right_eye[0], left_eye[1]) else: A = (left_eye[0], right_eye[1]) # Calc our rotating degree delta_x = right_eye[0] - left_eye[0] delta_y = right_eye[1] - left_eye[1] angle = np.arctan( delta_y / (delta_x + 1e-17)) # avoid devision by zero angle = (angle * 180) / np.pi # compute the desired right eye x-coordinate based on the # desired x-coordinate of the left eye desiredRightEyeX = 1.0 - 0.35 # determine the scale of the new resulting image by taking # the ratio of the distance between eyes in the *current* # image to the ratio of distance between eyes in the # *desired* image dist = np.sqrt((delta_x**2) + (delta_y**2)) desiredDist = (desiredRightEyeX - 0.35) desiredDist *= 256 scale = desiredDist / dist eyesCenter = ((left_eye[0] + right_eye[0]) // 2, (left_eye[1] + right_eye[1]) // 2) cv2.circle(frame, A, 5, (255, 0, 0), -1) cv2.putText(frame, str(int(angle)), (x1_le - 15, y1_le), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) cv2.line(frame, right_eye, left_eye, (0, 200, 200), 3) cv2.line(frame, left_eye, A, (0, 200, 200), 3) cv2.line(frame, right_eye, A, (0, 200, 200), 3) cv2.line(frame, (left_eye[0], left_eye[1]), (right_eye[0], right_eye[1]), (0, 200, 200), 3) rotated = face_aligner.align(orig_frame, left_eye, right_eye) draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if face is not None: cv2.imshow('face aligned', rotated) if cv2.waitKey(1) == ord('q'): exit()
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint12 checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() # evaluation on testing dataset testing_dataset_path = cfg['testing_dataset_path'] img_paths, _ = load_info(testing_dataset_path, './data/CelebA/train_labels.txt') for img_index, img_path in enumerate(img_paths): print(" [{} / {}] det {}".format(img_index + 1, len(img_paths), img_path)) img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) # testing scale if not FLAGS.origin_size: target_size = 320 img_size_max = np.max(img.shape[0:2]) resize = float(target_size) / float(img_size_max) img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # write results img_name = os.path.basename(img_path) sub_dir = os.path.basename(os.path.dirname(img_path)) save_name = os.path.join( FLAGS.save_folder, sub_dir, img_name.replace('.jpg', '.txt')) pathlib.Path(os.path.join(FLAGS.save_folder, sub_dir)).mkdir( parents=True, exist_ok=True) with open(save_name, "w") as file: bboxs = outputs[:, :4] smile_confs = outputs[:, 4] face_confs = outputs[:, -1] file_name = img_name + "\n" bboxs_num = str(len(bboxs)) + "\n" file.write(file_name) file.write(bboxs_num) for box, smile_conf, face_conf in zip(bboxs, smile_confs, face_confs): x = int(box[0] * img_width_raw) y = int(box[1] * img_height_raw) w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw) h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw) line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + str(smile_conf) + " " + str( face_conf) + " \n" file.write(line) # save images pathlib.Path(os.path.join( './results', cfg['sub_name'], sub_dir)).mkdir( parents=True, exist_ok=True) if FLAGS.save_image: for prior_index in range(len(outputs)): if outputs[prior_index][-1] >= FLAGS.vis_th: draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(os.path.join('./results', cfg['sub_name'], sub_dir, img_name), img_raw)
def main(_argv): # init CONFIG_PATH = './configs/retinaface_mbv2.yaml' GPU = '0' IOU_TH = 0.4 SCORE_TH = 0.5 WEBCAM = False DOWN_FACTOR = 1.0 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = GPU logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(CONFIG_PATH) # define network model = RetinaFaceModel(cfg, training=False, iou_th=IOU_TH, score_th=SCORE_TH) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() return model if not WEBCAM: # if not os.path.exists(IMG_PATH): # print(f"cannot find image path from {IMG_PATH}") # exit() # print("[*] Processing on single image {}".format(IMG_PATH)) # img_raw = cv2.imread(IMG_PATH) img_raw = input_image img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if DOWN_FACTOR < 1.0: img = cv2.resize(img, (0, 0), fx=DOWN_FACTOR, fy=DOWN_FACTOR, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = "result.jpg" for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}") else: cam = cv2.VideoCapture(0) start_time = time.time() while True: _, frame = cam.read() if frame is None: print("no cam input") frame_height, frame_width, _ = frame.shape img = np.float32(frame.copy()) if DOWN_FACTOR < 1.0: img = cv2.resize(img, (0, 0), fx=DOWN_FACTOR, fy=DOWN_FACTOR, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit() return outputs
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: if not os.path.exists(FLAGS.img_path): print(f"cannot find image path from {FLAGS.img_path}") exit() print("[*] Processing on single image {}".format(FLAGS.img_path)) img_raw = cv2.imread(FLAGS.img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path)) for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}") else: cam = cv2.VideoCapture(FLAGS.vid_path) fps = int(cam.get(cv2.CAP_PROP_FPS)) ### Saving Video to file frame_width = int(cam.get(3)) frame_height = int(cam.get(4)) # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file. # import os if not os.path.exists('./output'): print('Creating folder: output/ for saving video.') os.makedirs('./output') out = cv2.VideoWriter('./output/output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame_width, frame_height)) start_time = time.time() counter = 0 frameCount = 0 while True: _, frame = cam.read() if frame is None: print("no cam input") frameCount = frameCount + 1 frame_height, frame_width, _ = frame.shape img = np.float32(frame.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): croppedFace = draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) if frameCount >= fps * FLAGS.dfps: fileName = "%d.png" % counter cv2.imwrite(FLAGS.dst_path + fileName, croppedFace) print('Saved:', fileName) counter = counter + 1 frameCount = 0 # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame if FLAGS.preview: cv2.imshow('frame', frame) out.write(frame) if cv2.waitKey(1) == ord('q'): exit()