def get_model(cfg_path): # init IOU_TH = 0.5 SCORE_TH = 0.4 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = '0' logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=IOU_TH, score_th=SCORE_TH) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) #print("[*] load ckpt from {}.".format(tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() return model
def initialize(): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu # Setup logger logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # Define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # Load checkpoints checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print( "[*] load ckpt from {}.".format(tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() return model, cfg
def main(_argv): logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = FLAGS.weights checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() model.summary() for i in model.layers: print(i.output) model.save(FLAGS.output)
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load model from weights.h5 # model.load_weights('./model/mbv2_weights.h5', by_name=True, skip_mismatch=True) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: file_path = '/Users/lichaochao/Downloads/images_UMU/' for file_name in os.listdir(file_path + 'source_images/'): image_path = file_path + 'source_images/' + file_name if not os.path.exists(image_path): print(f"cannot find image path from {image_path}") continue img_raw = cv2.imread(image_path) img = np.float32(img_raw.copy()) # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) img_height, img_width, _ch = img.shape # run model outputs = model(img[np.newaxis, ...]) preds = tf.concat([ outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis], outputs[2][0, :, 1][..., tf.newaxis] ], -1) priors = prior_box_tf((img_height, img_width), cfg['min_sizes'], cfg['steps'], cfg['clip']) decode_preds = decode_tf(preds, priors, cfg['variances']) selected_indices = tf.image.non_max_suppression( boxes=decode_preds[:, :4], scores=decode_preds[:, -1], max_output_size=tf.shape(decode_preds)[0], iou_threshold=FLAGS.iou_th, score_threshold=FLAGS.score_th) outputs = tf.gather(decode_preds, selected_indices).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) has_face = False is_smile = False for prior_index in range(len(outputs)): ann = outputs[prior_index] if ann[-1] >= 0.5: has_face = True x1, y1 = int(ann[0] * img_width), int(ann[1] * img_height) x2, y2 = int(ann[2] * img_width), int(ann[3] * img_height) text = "face: {:.2f}".format(ann[-1] * 100) cv2.putText(img, text, (x1 + 5, y1 - 10), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) if ann[-2] >= 0.5: is_smile = True smile_text = "smile: {:.2f}".format(ann[-2] * 100) cv2.putText(img, smile_text, (x1 + 5, y1 + 30), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2) else: cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) if is_smile: dst_file_path = file_path + '/smile_face/' + file_name elif has_face: dst_file_path = file_path + '/face/' + file_name else: dst_file_path = file_path + '/no_face/' + file_name cv2.imwrite(dst_file_path, img) print(dst_file_path) else: cam = cv2.VideoCapture('./data/linda_umu.mp4') # cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640) # cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) resize = FLAGS.down_scale_factor frame_height = cam.get(cv2.CAP_PROP_FRAME_HEIGHT) * resize frame_width = cam.get(cv2.CAP_PROP_FRAME_WIDTH) * resize max_steps = max(cfg['steps']) img_pad_h = max_steps - frame_height % max_steps if frame_height % max_steps > 0 else 0 img_pad_w = max_steps - frame_width % max_steps if frame_width % max_steps > 0 else 0 priors = prior_box_tf( (frame_height + img_pad_h, frame_width + img_pad_w), cfg['min_sizes'], cfg['steps'], cfg['clip']) frame_index = 0 outputs = [] start_time = time.time() while cam.isOpened(): _, frame = cam.read() if frame is None: print('no cam') break if frame_index < 5: frame_index += 1 # continue else: frame_index = 0 img = np.float32(frame.copy()) if resize < 1: img = cv2.resize(img, (0, 0), fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max_steps) # run model outputs = model(img[np.newaxis, ...]) preds = tf.concat([ outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis], outputs[2][0, :, 1][..., tf.newaxis] ], -1) decode_preds = decode_tf(preds, priors, cfg['variances']) selected_indices = tf.image.non_max_suppression( boxes=decode_preds[:, :4], scores=decode_preds[:, -1], max_output_size=tf.shape(decode_preds)[0], iou_threshold=FLAGS.iou_th, score_threshold=FLAGS.score_th) outputs = tf.gather(decode_preds, selected_indices).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params, resize=resize) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 50), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 2) # draw results for prior_index in range(len(outputs)): draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) # start_time = time.time() # cv2.putText(frame, fps_str, (25, 25), # cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit()
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: if not os.path.exists(FLAGS.img_path): print(f"cannot find image path from {FLAGS.img_path}") exit() print("[*] Processing on single image {}".format(FLAGS.img_path)) img_raw = cv2.imread(FLAGS.img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path)) for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}") else: cam = cv2.VideoCapture("./videos/Bentall_Centra.MP4") start_time = time.time() while True: _, frame = cam.read() if frame is None: print("no cam input") frame_height, frame_width, _ = frame.shape img = np.float32(frame.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit()
from modules.models import RetinaFaceModel from modules.utils import (set_memory_growth, load_yaml, draw_bbox_landm, pad_input_image, recover_pad_output) set_memory_growth() # tf.debugging.set_log_device_placement(True) cfg_path = './configs/retinaface_mbv2.yaml' gpu = '0' iou_th = 0.4 score_th = 0.5 cfg = load_yaml(cfg_path) model = RetinaFaceModel(cfg, training=False, iou_th=iou_th, score_th=score_th) checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() def get_face_value(img_raw, down_scale_factor=0.3): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = '0'
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load model from weights.h5 # model.load_weights('./model/mbv2_weights.h5', by_name=True, skip_mismatch=True) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: if not os.path.exists(FLAGS.img_path): print(f"cannot find image path from {FLAGS.img_path}") exit() print("[*] Processing on single image {}".format(FLAGS.img_path)) img_raw = cv2.imread(FLAGS.img_path) img = np.float32(img_raw.copy()) # testing scale target_size = 320 img_size_max = np.max(img.shape[0:2]) resize = float(target_size) / float(img_size_max) img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # draw and save results save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path)) for prior_index in range(len(outputs)): draw_bbox_landm(img, outputs[prior_index], target_size, target_size) cv2.imwrite(save_img_path, img) print(f"[*] save result at {save_img_path}") else: cam = cv2.VideoCapture('./data/lichaochao.mp4') # cam = cv2.VideoCapture(0) frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH)) fourcc = cv2.VideoWriter_fourcc(*'XVID') fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter('chaochao1.mp4', fourcc, fps=fps, frameSize=(frame_height, frame_width)) resize = FLAGS.down_scale_factor frame_height *= resize frame_width *= resize max_steps = max(cfg['steps']) img_pad_h = max_steps - frame_height % max_steps if frame_height % max_steps > 0 else 0 img_pad_w = max_steps - frame_width % max_steps if frame_width % max_steps > 0 else 0 priors = prior_box_tf( (frame_height + img_pad_h, frame_width + img_pad_w), cfg['min_sizes'], cfg['steps'], cfg['clip']) frame_index = 0 outputs = [] start_time = time.time() while cam.isOpened(): _, frame = cam.read() if frame is None: print('no cam') break if frame_index < 5: frame_index += 1 # continue else: frame_index = 0 img = np.float32(frame.copy()) if resize < 1: img = cv2.resize(img, (0, 0), fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max_steps) # run model outputs = model(img[np.newaxis, ...]) preds = tf.concat([ outputs[0][0], outputs[1][0, :, 1][..., tf.newaxis], outputs[2][0, :, 1][..., tf.newaxis] ], -1) decode_preds = decode_tf(preds, priors, cfg['variances']) selected_indices = tf.image.non_max_suppression( boxes=decode_preds[:, :4], scores=decode_preds[:, -1], max_output_size=tf.shape(decode_preds)[0], iou_threshold=FLAGS.iou_th, score_threshold=FLAGS.score_th) outputs = tf.gather(decode_preds, selected_indices).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params, resize=resize) # calculate fps # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) # start_time = time.time() # cv2.putText(frame, fps_str, (25, 50), # cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 2) # draw results for prior_index in range(len(outputs)): draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps # fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) # start_time = time.time() # cv2.putText(frame, fps_str, (25, 25), # cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame out.write(frame) cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit()
from modules.models import RetinaFaceModel from modules.utils import (set_memory_growth, load_yaml, draw_bbox_landm, pad_input_image, recover_pad_output) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = '0' logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg_path = './retinaface_mbv2.yaml' cfg = load_yaml(cfg_path) model = RetinaFaceModel(cfg, training=False, iou_th=0.4, score_th=0.5) checkpoint_dir = './checkpoints/retinaface_mbv2' checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() app = Flask(__name__) run_with_ngrok(app)
def main(_argv): # init face_aligner = FaceAligner() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: save_count = 0 for path, subdirs, files in os.walk(FLAGS.img_path): for name in files: if name.endswith('.jpg'): img_path = os.path.join(path, name) if not os.path.exists(img_path): print(f"cannot find image path from {img_path}") exit() if save_count < FLAGS.img_num: print("[*] Processing on single image {}".format( img_path)) img_raw = cv2.imread(img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max( cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = os.path.join( 'out_' + os.path.basename(img_path)) for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}") save_count += 1 else: cam = cv2.VideoCapture(0) start_time = time.time() while True: _, frame = cam.read() if frame is None: print("no cam input") frame_height, frame_width, _ = frame.shape orig_frame = frame.copy() face = None img = cv2.resize(frame, (512, 512)) img = np.float32(frame.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model start_time = time.time() outputs = model(img[np.newaxis, ...]).numpy() inference_time = f"Inf: {time.time() - start_time}" cv2.putText(frame, inference_time, (25, 50), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): preds = decode_predictions((frame_width, frame_height), outputs) for key, value in preds.items(): bbox = value[0]['bbox'] left_eye = value[0]['left_eye'] right_eye = value[0]['right_eye'] # Our face ROI face = orig_frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] # Eyes x1_le = left_eye[0] - 25 y1_le = left_eye[1] - 25 x2_le = left_eye[0] + 25 y2_le = left_eye[1] + 25 x1_re = right_eye[0] - 25 y1_re = right_eye[1] - 25 x2_re = right_eye[0] + 25 y2_re = right_eye[1] + 25 if left_eye[1] > right_eye[1]: A = (right_eye[0], left_eye[1]) else: A = (left_eye[0], right_eye[1]) # Calc our rotating degree delta_x = right_eye[0] - left_eye[0] delta_y = right_eye[1] - left_eye[1] angle = np.arctan( delta_y / (delta_x + 1e-17)) # avoid devision by zero angle = (angle * 180) / np.pi # compute the desired right eye x-coordinate based on the # desired x-coordinate of the left eye desiredRightEyeX = 1.0 - 0.35 # determine the scale of the new resulting image by taking # the ratio of the distance between eyes in the *current* # image to the ratio of distance between eyes in the # *desired* image dist = np.sqrt((delta_x**2) + (delta_y**2)) desiredDist = (desiredRightEyeX - 0.35) desiredDist *= 256 scale = desiredDist / dist eyesCenter = ((left_eye[0] + right_eye[0]) // 2, (left_eye[1] + right_eye[1]) // 2) cv2.circle(frame, A, 5, (255, 0, 0), -1) cv2.putText(frame, str(int(angle)), (x1_le - 15, y1_le), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) cv2.line(frame, right_eye, left_eye, (0, 200, 200), 3) cv2.line(frame, left_eye, A, (0, 200, 200), 3) cv2.line(frame, right_eye, A, (0, 200, 200), 3) cv2.line(frame, (left_eye[0], left_eye[1]), (right_eye[0], right_eye[1]), (0, 200, 200), 3) rotated = face_aligner.align(orig_frame, left_eye, right_eye) draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if face is not None: cv2.imshow('face aligned', rotated) if cv2.waitKey(1) == ord('q'): exit()
def train_retinaface(cfg): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' if cfg['distributed']: import horovod.tensorflow as hvd # Initialize Horovod hvd.init() else: hvd = [] os.environ['CUDA_VISIBLE_DEVICES'] = '0' reset_random_seeds() logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth(hvd) # define network model = RetinaFaceModel(cfg, training=True) model.summary(line_length=80) # define prior box priors = prior_box((cfg['input_size'], cfg['input_size']), cfg['min_sizes'], cfg['steps'], cfg['clip']) # load dataset train_dataset = load_dataset(cfg, priors, 'train', hvd) if cfg['evaluation_during_training']: val_dataset = load_dataset(cfg, priors, 'val', []) # define optimizer if cfg['distributed']: init_lr = cfg['init_lr'] * hvd.size() min_lr = cfg['min_lr'] * hvd.size() steps_per_epoch = cfg['dataset_len'] // (cfg['batch_size'] * hvd.size()) else: init_lr = cfg['init_lr'] min_lr = cfg['min_lr'] steps_per_epoch = cfg['dataset_len'] // cfg['batch_size'] learning_rate = MultiStepWarmUpLR( initial_learning_rate=init_lr, lr_steps=[e * steps_per_epoch for e in cfg['lr_decay_epoch']], lr_rate=cfg['lr_rate'], warmup_steps=cfg['warmup_epoch'] * steps_per_epoch, min_lr=min_lr) optimizer = tf.keras.optimizers.SGD( learning_rate=learning_rate, momentum=0.9, nesterov=True) # define losses function multi_box_loss = MultiBoxLoss(num_class=cfg['num_class']) # load checkpoint checkpoint_dir = os.path.join(cfg['output_path'], 'checkpoints', cfg['sub_name']) checkpoint = tf.train.Checkpoint(epoch=tf.Variable(0, name='epoch'), optimizer=optimizer, model=model) manager = tf.train.CheckpointManager(checkpoint=checkpoint, directory=checkpoint_dir, max_to_keep=3) os.makedirs(checkpoint_dir, exist_ok=True) with open(os.path.join(checkpoint_dir, 'cfg.pickle'), 'wb') as handle: pickle.dump(cfg, handle, protocol=pickle.HIGHEST_PROTOCOL) if manager.latest_checkpoint: checkpoint.restore(manager.latest_checkpoint) print('[*] load ckpt from {}'.format(manager.latest_checkpoint)) else: print("[*] training from scratch.") # define training step function @tf.function def train_step(inputs, labels, first_batch, epoch): with tf.GradientTape() as tape: predictions = model(inputs, training=True) losses = {} losses['reg'] = tf.reduce_sum(model.losses) losses['loc'], losses['landm'], losses['class'] = \ multi_box_loss(labels, predictions) total_loss = tf.add_n([l for l in losses.values()]) if cfg['distributed']: # Horovod: add Horovod Distributed GradientTape. tape = hvd.DistributedGradientTape(tape) grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) if cfg['distributed'] and first_batch and epoch: hvd.broadcast_variables(model.variables, root_rank=0) hvd.broadcast_variables(optimizer.variables(), root_rank=0) return total_loss, losses def test_step(inputs, img_name): _, img_height_raw, img_width_raw, _ = inputs.shape # pad input image to avoid unmatched shape problem img = inputs[0].numpy() # if img_name == '6_Funeral_Funeral_6_618': # resize = 0.5 # this image is too big to avoid OOM problem # img = cv2.resize(img, None, None, fx=resize, fy=resize, # interpolation=cv2.INTER_LINEAR) img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) input_img = img[np.newaxis, ...] predictions = model(input_img, training=False) outputs = pred_to_outputs(cfg, predictions, input_img.shape).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) bboxs = outputs[:, :4] confs = outputs[:, -1] pred_boxes = [] for box, conf in zip(bboxs, confs): x = int(box[0] * img_width_raw) y = int(box[1] * img_height_raw) w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw) h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw) pred_boxes.append([x, y, w, h, conf]) pred_boxes = np.array(pred_boxes).astype('float') return pred_boxes #training loop summary_writer = tf.summary.create_file_writer(os.path.join(cfg['output_path'], 'logs', cfg['sub_name'])) prog_bar = ProgressBar(steps_per_epoch, 0) if cfg['evaluation_during_training']: widerface_eval_hard = WiderFaceEval(split='hard') for epoch in range(cfg['epoch']): try: actual_epoch = epoch + 1 if cfg['distributed']: if hvd.rank() == 0: print("\nStart of epoch %d" % (actual_epoch,)) else: print("\nStart of epoch %d" % (actual_epoch,)) checkpoint.epoch.assign_add(1) start_time = time.time() #Iterate over the batches of the dataset. for batch, (x_batch_train, y_batch_train, img_name) in enumerate(train_dataset): total_loss, losses = train_step(x_batch_train, y_batch_train, batch == 0, epoch == 0) if cfg['distributed']: if hvd.rank() == 0: # prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format( # checkpoint.epoch.numpy(), cfg['epoch'], total_loss.numpy(), optimizer._decayed_lr(tf.float32))) if batch % 100 == 0: print("batch={}/{}, epoch={}/{}, loss={:.4f}, lr={:.1e}".format( batch, steps_per_epoch, checkpoint.epoch.numpy(), cfg['epoch'], total_loss.numpy(), optimizer._decayed_lr(tf.float32))) else: prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format( checkpoint.epoch.numpy(), cfg['epoch'], total_loss.numpy(), optimizer._decayed_lr(tf.float32))) # Display metrics at the end of each epoch. # train_acc = train_acc_metric.result() # print("\nTraining loss over epoch: %.4f" % (float(total_loss.numpy()),)) if cfg['distributed']: if hvd.rank() == 0: print("Time taken: %.2fs" % (time.time() - start_time)) manager.save() print("\n[*] save ckpt file at {}".format(manager.latest_checkpoint)) else: print("Time taken: %.2fs" % (time.time() - start_time)) manager.save() print("\n[*] save ckpt file at {}".format(manager.latest_checkpoint)) if cfg['evaluation_during_training']: # Run a validation loop at the end of each epoch. for batch, (x_batch_val, y_batch_val, img_name) in enumerate(val_dataset.take(500)): if '/' in img_name.numpy()[0].decode(): img_name = img_name.numpy()[0].decode().split('/')[1].split('.')[0] else: img_name = [] pred_boxes = test_step(x_batch_val, img_name) gt_boxes = labels_to_boxes(y_batch_val) widerface_eval_hard.update(pred_boxes, gt_boxes, img_name) ap_hard = widerface_eval_hard.calculate_ap() widerface_eval_hard.reset() if cfg['distributed']: if hvd.rank() == 0: print("Validation acc: %.4f" % (float(ap_hard),)) else: print("Validation acc: %.4f" % (float(ap_hard),)) def tensorboard_writer(): with summary_writer.as_default(): tf.summary.scalar('loss/total_loss', total_loss, step=actual_epoch) for k, l in losses.items(): tf.summary.scalar('loss/{}'.format(k), l, step=actual_epoch) tf.summary.scalar('learning_rate', optimizer._decayed_lr(tf.float32), step=actual_epoch) if cfg['evaluation_during_training']: tf.summary.scalar('Val AP', ap_hard, step=actual_epoch) if cfg['distributed']: if hvd.rank() == 0: tensorboard_writer() else: tensorboard_writer() except Exception as E: print(E) continue if cfg['distributed']: if hvd.rank() == 0: manager.save() print("\n[*] training done! save ckpt file at {}".format( manager.latest_checkpoint)) else: manager.save() print("\n[*] training done! save ckpt file at {}".format( manager.latest_checkpoint))
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) #print("[*] load ckpt from {}.".format(tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not os.path.exists(FLAGS.img_path): print(f"cannot find image path from {FLAGS.img_path}") exit() print("[*] Processing on single image {}".format(FLAGS.img_path)) img_raw = cv2.imread(FLAGS.img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results imgs = [] DIM = 64 save_img_path = os.path.join('data/out_' + os.path.basename(FLAGS.img_path)) for prior_index in range(9): if (prior_index < len(outputs)): img = get_bbox_imgs(img_raw, outputs[prior_index], img_height_raw, img_width_raw) img = cv2.resize(img, (DIM, DIM)) imgs.append(img) else: imgs.append(Image.new('RGB', (DIM, DIM))) imga = imgs[0] for img in imgs[1:3]: imga = np.concatenate((imga, img), axis=1) imgb = imgs[3] for img in imgs[4:6]: imgb = np.concatenate((imgb, img), axis=1) imgf = np.concatenate((imga, imgb), axis=0) imgc = imgs[6] for img in imgs[7:9]: imgc = np.concatenate((imgc, img), axis=1) imgf = np.concatenate((imgf, imgc), axis=0) cv2.imwrite(save_img_path, imgf) print(f"[*] save result at {save_img_path}")
def main(_argv): # init CONFIG_PATH = './configs/retinaface_mbv2.yaml' GPU = '0' IOU_TH = 0.4 SCORE_TH = 0.5 WEBCAM = False DOWN_FACTOR = 1.0 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = GPU logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(CONFIG_PATH) # define network model = RetinaFaceModel(cfg, training=False, iou_th=IOU_TH, score_th=SCORE_TH) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() return model if not WEBCAM: # if not os.path.exists(IMG_PATH): # print(f"cannot find image path from {IMG_PATH}") # exit() # print("[*] Processing on single image {}".format(IMG_PATH)) # img_raw = cv2.imread(IMG_PATH) img_raw = input_image img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if DOWN_FACTOR < 1.0: img = cv2.resize(img, (0, 0), fx=DOWN_FACTOR, fy=DOWN_FACTOR, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = "result.jpg" for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}") else: cam = cv2.VideoCapture(0) start_time = time.time() while True: _, frame = cam.read() if frame is None: print("no cam input") frame_height, frame_width, _ = frame.shape img = np.float32(frame.copy()) if DOWN_FACTOR < 1.0: img = cv2.resize(img, (0, 0), fx=DOWN_FACTOR, fy=DOWN_FACTOR, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit() return outputs
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() if not FLAGS.webcam: if not os.path.exists(FLAGS.img_path): print(f"cannot find image path from {FLAGS.img_path}") exit() print("[*] Processing on single image {}".format(FLAGS.img_path)) img_raw = cv2.imread(FLAGS.img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw and save results save_img_path = os.path.join('out_' + os.path.basename(FLAGS.img_path)) for prior_index in range(len(outputs)): draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(save_img_path, img_raw) print(f"[*] save result at {save_img_path}") else: cam = cv2.VideoCapture(FLAGS.vid_path) fps = int(cam.get(cv2.CAP_PROP_FPS)) ### Saving Video to file frame_width = int(cam.get(3)) frame_height = int(cam.get(4)) # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file. # import os if not os.path.exists('./output'): print('Creating folder: output/ for saving video.') os.makedirs('./output') out = cv2.VideoWriter('./output/output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame_width, frame_height)) start_time = time.time() counter = 0 frameCount = 0 while True: _, frame = cam.read() if frame is None: print("no cam input") frameCount = frameCount + 1 frame_height, frame_width, _ = frame.shape img = np.float32(frame.copy()) if FLAGS.down_scale_factor < 1.0: img = cv2.resize(img, (0, 0), fx=FLAGS.down_scale_factor, fy=FLAGS.down_scale_factor, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # draw results for prior_index in range(len(outputs)): croppedFace = draw_bbox_landm(frame, outputs[prior_index], frame_height, frame_width) if frameCount >= fps * FLAGS.dfps: fileName = "%d.png" % counter cv2.imwrite(FLAGS.dst_path + fileName, croppedFace) print('Saved:', fileName) counter = counter + 1 frameCount = 0 # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start_time)) start_time = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame if FLAGS.preview: cv2.imshow('frame', frame) out.write(frame) if cv2.waitKey(1) == ord('q'): exit()
def main(_argv): # FUNCTIONS FOR CROPPING ##################################################################################################### def bounding_box(img, ann, img_height, img_width): x1, y1, x2, y2 = int(ann[0] * img_width), int(ann[1] * img_height), \ int(ann[2] * img_width), int(ann[3] * img_height) return x1, y1, x2, y2 def calc_points(x, y, side): return int(x - side / 2), int(x + side / 2), int(y - side / 2), int(y + side / 2) def adjust_points(x_center, y_center, original_longest, scaling_factor, min_scaling_factor): factors = np.arange(scaling_factor, min_scaling_factor - 0.04, -0.05) for factor in factors: # calculate nex points x1, x2, y1, y2 = calc_points(x_center, y_center, int(original_longest * factor)) for i in range(FLAGS.max_iter): if x1 < 0: x2 -= x1 x1 = 0 if y1 < 0: y2 -= y1 y1 = 0 if x2 > img_raw.shape[1]: x1 -= x2 x2 = img_raw.shape[1] if y2 > img_raw.shape[0]: y1 -= y2 y2 = img_raw.shape[0] if x1 >= 0 and y1 >= 0 and x2 <= img_raw.shape[ 1] and y2 <= img_raw.shape[0]: return x1, x2, y1, y2, True print("Not cropping", img_path, "due to a problem with a cropping square box") return x1, x2, y1, y2, False def get_dim(lst): return [(lst[3] - lst[1]) * (lst[2] - lst[0])] def get_max(outputs, lst): area = [i[0] for i in lst] prob = [i[1] for i in lst] max_area_index = set([i for i, j in enumerate(area) if j == max(area)]) max_prob_index = set([i for i, j in enumerate(prob) if j == max(prob)]) indecies = list(max_area_index.intersection(max_prob_index)) if len(indecies) >= 1: return [outputs[indecies[0]]] elif len(indecies ) == 0: # if there is a mismatch, return the largest element if len(list(max_area_index)) >= 1: return [outputs[list(max_area_index)[0]]] else: # precautionary because there should always be at least one face print("Not cropping", img_path, "due to a problem with returning the largest element") return [] ##################################################################################################### # MODEL ##################################################################################################### # initialisation os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() ##################################################################################################### # CROPPING ##################################################################################################### # check if the path exits if not os.path.exists(FLAGS.path): print(f"cannot find the specified path from {FLAGS.path}") exit() # make a corresponding directory try: os.mkdir(FLAGS.path.replace("images", "cropped_images")) except FileExistsError: print(FLAGS.path.replace("images", "cropped_images"), "already exists") # eget subdirectories within the specified folder subdirectories = [FLAGS.path+'/'+i for i in os.listdir(FLAGS.path) \ if os.path.isdir(FLAGS.path+'/'+i)] # loop through each folder for subdir in sorted(subdirectories): # create corresponding folders for cropped data and get all images in a given folder if 'original' in subdir: x = 3 else: x = 7 try: os.mkdir(subdir.replace("images", "cropped_images")) images_lst = glob.glob(subdir + "/*.png") cropped_images_lst = [] print(subdir[len(subdir) - x:len(subdir)]) except FileExistsError: # count number of existing images in this subdirectory, if same as original, skip images_lst = glob.glob(subdir + "/*.png") cropped_images_lst = glob.glob( subdir.replace("images", "cropped_images") + "/*.png") cropped_images_lst = [ e[len(e) - 8:len(e)] for e in cropped_images_lst ] if len(images_lst) == len(cropped_images_lst): print(subdir[len(subdir) - x:len(subdir)], "has already been generated") continue else: print(subdir[len(subdir) - x:len(subdir)]) # loop through each image in a given folder for img_path in sorted(images_lst): if img_path[len(img_path) - 8:len(img_path)] in cropped_images_lst: continue img_raw = cv2.imread(img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image (unmatched shape problem), run model, recover padding effect img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) outputs = model(img[np.newaxis, ...]).numpy() outputs = recover_pad_output(outputs, pad_params) # get rid of elements which are faces with less that threshold probability outputs = [i for i in outputs if i[15] >= FLAGS.threshold_prob] # flag any images which have no recognised faces in them if len(outputs) == 0: print("no faces detected for", img_path) # if more than one face detected, select the largest and most definite elif len(outputs) > 1: f = [list(bounding_box(img_raw, i[0:4], img_height_raw, img_width_raw)) + [i[15]] \ for i in outputs] f = [get_dim(i[0:4]) + [i[4]] for i in f] outputs = get_max(outputs, f) # keeping as a loop in case we decide to use multiple faces per frame in the future # get cropping coordinates and save results for prior_index in range(len(outputs)): # get the bounding box coordinates bb_x1, bb_y1, bb_x2, bb_y2 = bounding_box( img_raw, outputs[prior_index], img_height_raw, img_width_raw) # scale up the magnitude of the longest side original_longest = int(max(bb_x2 - bb_x1, bb_y2 - bb_y1)) longest = int(original_longest * FLAGS.scaling_factor) x_center = int((bb_x1 + bb_x2) / 2) y_center = int((bb_y1 + bb_y2) / 2) x1, x2, y1, y2, save_image = adjust_points( x_center, y_center, original_longest, FLAGS.scaling_factor, FLAGS.min_scaling_factor) if save_image: try: save_img_path = os.path.join(subdir.replace("images", "cropped_images") \ + "/" + img_path.replace(subdir + '/', '')) cv2.imwrite(save_img_path, img_raw[y1:y2, x1:x2]) except: print(img_path, "is not cropped for unknown reasons")
def main(_): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=True) model.summary(line_length=80) # define prior box priors = prior_box((cfg['input_size'], cfg['input_size']), cfg['min_sizes'], cfg['steps'], cfg['clip']) # load dataset train_dataset = load_dataset(cfg, priors, shuffle=True) # define optimizer steps_per_epoch = cfg['dataset_len'] // cfg['batch_size'] learning_rate = MultiStepWarmUpLR( initial_learning_rate=cfg['init_lr'], lr_steps=[e * steps_per_epoch for e in cfg['lr_decay_epoch']], lr_rate=cfg['lr_rate'], warmup_steps=cfg['warmup_epoch'] * steps_per_epoch, min_lr=cfg['min_lr']) optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True) # define losses function multi_box_loss = MultiBoxLoss() # load checkpoint checkpoint_dir = '/content/drive/My Drive/Colab/checkpoints/' + cfg[ 'sub_name'] checkpoint = tf.train.Checkpoint(step=tf.Variable(0, name='step'), optimizer=optimizer, model=model) manager = tf.train.CheckpointManager(checkpoint=checkpoint, directory=checkpoint_dir, max_to_keep=3) if manager.latest_checkpoint: checkpoint.restore(manager.latest_checkpoint) print('[*] load ckpt from {} at step {}.'.format( manager.latest_checkpoint, checkpoint.step.numpy())) else: print("[*] training from scratch.") # define training step function @tf.function def train_step(inputs, labels): with tf.GradientTape() as tape: predictions = model(inputs, training=True) losses = {} losses['reg'] = tf.reduce_sum(model.losses) losses['loc'], losses['landm'], losses['class'] = \ multi_box_loss(labels, predictions) total_loss = tf.add_n([l for l in losses.values()]) grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return total_loss, losses # training loop summary_writer = tf.summary.create_file_writer('./logs/' + cfg['sub_name']) remain_steps = max( steps_per_epoch * cfg['epoch'] - checkpoint.step.numpy(), 0) prog_bar = ProgressBar(steps_per_epoch, checkpoint.step.numpy() % steps_per_epoch) for inputs, labels in train_dataset.take(remain_steps): checkpoint.step.assign_add(1) steps = checkpoint.step.numpy() total_loss, losses = train_step(inputs, labels) prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format( ((steps - 1) // steps_per_epoch) + 1, cfg['epoch'], total_loss.numpy(), optimizer.lr(steps).numpy())) if steps % 10 == 0: with summary_writer.as_default(): tf.summary.scalar('loss/total_loss', total_loss, step=steps) for k, l in losses.items(): tf.summary.scalar('loss/{}'.format(k), l, step=steps) tf.summary.scalar('learning_rate', optimizer.lr(steps), step=steps) if steps % cfg['save_steps'] == 0: manager.save() print("\n[*] save ckpt file at {}".format( manager.latest_checkpoint)) manager.save() print("\n[*] training done! save ckpt file at {}".format( manager.latest_checkpoint))
def main(_argv): # init os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = "./checkpoints/" + cfg["sub_name"] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() # evaluation on testing dataset testset_folder = cfg["testing_dataset_path"] testset_list = os.path.join(testset_folder, "label.txt") img_paths, _ = load_info(testset_list) for img_index, img_path in enumerate(img_paths): print(" [{} / {}] det {}".format(img_index + 1, len(img_paths), img_path)) img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) # testing scale target_size = 1600 max_size = 2150 img_shape = img.shape img_size_min = np.min(img_shape[0:2]) img_size_max = np.max(img_shape[0:2]) resize = float(target_size) / float(img_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * img_size_max) > max_size: resize = float(max_size) / float(img_size_max) if FLAGS.origin_size: if os.path.basename(img_path) == "6_Funeral_Funeral_6_618.jpg": resize = 0.5 # this image is too big to avoid OOM problem else: resize = 1 img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image_(img, max_steps=max(cfg["steps"])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # write results img_name = os.path.basename(img_path) sub_dir = os.path.basename(os.path.dirname(img_path)) save_name = os.path.join(FLAGS.save_folder, sub_dir, img_name.replace(".jpg", ".txt")) pathlib.Path(os.path.join(FLAGS.save_folder, sub_dir)).mkdir(parents=True, exist_ok=True) with open(save_name, "w") as file: bboxs = outputs[:, :4] confs = outputs[:, -1] file_name = img_name + "\n" bboxs_num = str(len(bboxs)) + "\n" file.write(file_name) file.write(bboxs_num) for box, conf in zip(bboxs, confs): x = int(box[0] * img_width_raw) y = int(box[1] * img_height_raw) w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw) h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw) confidence = str(conf) line = str(x) + " " + str(y) + " " + str(w) + " " + str( h) + " " + confidence + " \n" file.write(line) # save images pathlib.Path(os.path.join("./results", cfg["sub_name"], sub_dir)).mkdir(parents=True, exist_ok=True) if FLAGS.save_image: for prior_index in range(len(outputs)): if outputs[prior_index][15] >= FLAGS.vis_th: draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite( os.path.join("./results", cfg["sub_name"], sub_dir, img_name), img_raw)
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint12 checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() # evaluation on testing dataset testing_dataset_path = cfg['testing_dataset_path'] img_paths, _ = load_info(testing_dataset_path, './data/CelebA/train_labels.txt') for img_index, img_path in enumerate(img_paths): print(" [{} / {}] det {}".format(img_index + 1, len(img_paths), img_path)) img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) # testing scale if not FLAGS.origin_size: target_size = 320 img_size_max = np.max(img.shape[0:2]) resize = float(target_size) / float(img_size_max) img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # write results img_name = os.path.basename(img_path) sub_dir = os.path.basename(os.path.dirname(img_path)) save_name = os.path.join( FLAGS.save_folder, sub_dir, img_name.replace('.jpg', '.txt')) pathlib.Path(os.path.join(FLAGS.save_folder, sub_dir)).mkdir( parents=True, exist_ok=True) with open(save_name, "w") as file: bboxs = outputs[:, :4] smile_confs = outputs[:, 4] face_confs = outputs[:, -1] file_name = img_name + "\n" bboxs_num = str(len(bboxs)) + "\n" file.write(file_name) file.write(bboxs_num) for box, smile_conf, face_conf in zip(bboxs, smile_confs, face_confs): x = int(box[0] * img_width_raw) y = int(box[1] * img_height_raw) w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw) h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw) line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + str(smile_conf) + " " + str( face_conf) + " \n" file.write(line) # save images pathlib.Path(os.path.join( './results', cfg['sub_name'], sub_dir)).mkdir( parents=True, exist_ok=True) if FLAGS.save_image: for prior_index in range(len(outputs)): if outputs[prior_index][-1] >= FLAGS.vis_th: draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite(os.path.join('./results', cfg['sub_name'], sub_dir, img_name), img_raw)