def main(_): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=True) model.summary(line_length=80) # define prior box priors = prior_box((cfg['input_size'], cfg['input_size']), cfg['min_sizes'], cfg['steps'], cfg['clip']) # load dataset train_dataset = load_dataset(cfg, priors, shuffle=True) # define optimizer steps_per_epoch = cfg['dataset_len'] // cfg['batch_size'] learning_rate = MultiStepWarmUpLR( initial_learning_rate=cfg['init_lr'], lr_steps=[e * steps_per_epoch for e in cfg['lr_decay_epoch']], lr_rate=cfg['lr_rate'], warmup_steps=cfg['warmup_epoch'] * steps_per_epoch, min_lr=cfg['min_lr']) optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True) # define losses function multi_box_loss = MultiBoxLoss() # load checkpoint checkpoint_dir = '/content/drive/My Drive/Colab/checkpoints/' + cfg[ 'sub_name'] checkpoint = tf.train.Checkpoint(step=tf.Variable(0, name='step'), optimizer=optimizer, model=model) manager = tf.train.CheckpointManager(checkpoint=checkpoint, directory=checkpoint_dir, max_to_keep=3) if manager.latest_checkpoint: checkpoint.restore(manager.latest_checkpoint) print('[*] load ckpt from {} at step {}.'.format( manager.latest_checkpoint, checkpoint.step.numpy())) else: print("[*] training from scratch.") # define training step function @tf.function def train_step(inputs, labels): with tf.GradientTape() as tape: predictions = model(inputs, training=True) losses = {} losses['reg'] = tf.reduce_sum(model.losses) losses['loc'], losses['landm'], losses['class'] = \ multi_box_loss(labels, predictions) total_loss = tf.add_n([l for l in losses.values()]) grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return total_loss, losses # training loop summary_writer = tf.summary.create_file_writer('./logs/' + cfg['sub_name']) remain_steps = max( steps_per_epoch * cfg['epoch'] - checkpoint.step.numpy(), 0) prog_bar = ProgressBar(steps_per_epoch, checkpoint.step.numpy() % steps_per_epoch) for inputs, labels in train_dataset.take(remain_steps): checkpoint.step.assign_add(1) steps = checkpoint.step.numpy() total_loss, losses = train_step(inputs, labels) prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format( ((steps - 1) // steps_per_epoch) + 1, cfg['epoch'], total_loss.numpy(), optimizer.lr(steps).numpy())) if steps % 10 == 0: with summary_writer.as_default(): tf.summary.scalar('loss/total_loss', total_loss, step=steps) for k, l in losses.items(): tf.summary.scalar('loss/{}'.format(k), l, step=steps) tf.summary.scalar('learning_rate', optimizer.lr(steps), step=steps) if steps % cfg['save_steps'] == 0: manager.save() print("\n[*] save ckpt file at {}".format( manager.latest_checkpoint)) manager.save() print("\n[*] training done! save ckpt file at {}".format( manager.latest_checkpoint))
import time import cv2 import numpy as np from modules.dataset import load_tfrecord_dataset from modules.anchor import prior_box, decode_tf from modules.utils import draw_bbox_landm, draw_anchor using_bin = True batch_size = 1 min_sizes = [[16, 32], [64, 128], [256, 512]] steps = [8, 16, 32] clip = False img_dim = 640 priors = prior_box((img_dim, img_dim), min_sizes, steps, clip) visualization = True # False for time cost estimattion using_encoding = True # batch size should be 1 when False variances = [0.1, 0.2] match_thresh = 0.45 ignore_thresh = 0.3 num_samples = 100 if using_bin: tfrecord_name = './data/widerface_train_bin.tfrecord' else: tfrecord_name = './data/widerface_train.tfrecord' train_dataset = load_tfrecord_dataset( tfrecord_name, batch_size, img_dim=640, using_bin=using_bin,
def main(_): min_sizes = [[16, 32], [64, 128], [256, 512]] steps = [8, 16, 32] clip = False img_dim = 640 priors = prior_box((img_dim, img_dim), min_sizes, steps, clip) variances = [0.1, 0.2] match_thresh = 0.45 ignore_thresh = 0.3 batch_size = 1 shuffle = True using_flip = True using_distort = True using_bin = True buffer_size = 4000 number_cycles = 2 threads = 2 check_dataset = load_tfrecord_dataset(dataset_root=FLAGS.dataset_path, split=FLAGS.split, threads=threads, number_cycles=number_cycles, batch_size=batch_size, hvd=[], img_dim=img_dim, using_bin=using_bin, using_flip=using_flip, using_distort=using_distort, using_encoding=FLAGS.using_encoding, priors=priors, match_thresh=match_thresh, ignore_thresh=ignore_thresh, variances=variances, shuffle=shuffle, buffer_size=buffer_size) time.time() for idx, (inputs, labels, _) in enumerate(check_dataset): print("{} inputs:".format(idx), inputs.shape, "labels:", labels.shape) if not FLAGS.visualization: continue img = np.clip(inputs.numpy()[0], 0, 255).astype(np.uint8) if not FLAGS.using_encoding: # labels includes loc, landm, landm_valid. targets = labels.numpy()[0] for target in targets: draw_bbox_landm(img, target, img_dim, img_dim) else: # labels includes loc, landm, landm_valid, conf. targets = decode_tf(labels[0], priors, variances=variances).numpy() for prior_index in range(len(targets)): if targets[prior_index][-1] != 1: continue draw_bbox_landm(img, targets[prior_index], img_dim, img_dim) draw_anchor(img, priors[prior_index], img_dim, img_dim) cv2.imwrite('{}/{}.png'.format(FLAGS.output_path, str(idx)), img[:, :, ::-1])
def main(_): min_sizes = [[16, 32], [64, 128], [256, 512]] steps = [8, 16, 32] clip = False img_dim = 640 priors = prior_box((img_dim, img_dim), min_sizes, steps, clip) variances = [0.1, 0.2] match_thresh = 0.45 ignore_thresh = 0.3 num_samples = 100 if FLAGS.using_encoding: assert FLAGS.batch_size == 1 if FLAGS.using_bin: tfrecord_name = './data/widerface_train_bin.tfrecord' else: tfrecord_name = './data/widerface_train.tfrecord' train_dataset = load_tfrecord_dataset(tfrecord_name, FLAGS.batch_size, img_dim=640, using_bin=FLAGS.using_bin, using_flip=True, using_distort=False, using_encoding=FLAGS.using_encoding, priors=priors, match_thresh=match_thresh, ignore_thresh=ignore_thresh, variances=variances, shuffle=False) start_time = time.time() for idx, (inputs, labels) in enumerate(train_dataset.take(num_samples)): print("{} inputs:".format(idx), inputs.shape, "labels:", labels.shape) if not FLAGS.visualization: continue img = np.clip(inputs.numpy()[0], 0, 255).astype(np.uint8) if not FLAGS.using_encoding: # labels includes loc, landm, landm_valid. targets = labels.numpy()[0] for target in targets: draw_bbox_landm(img, target, img_dim, img_dim) else: # labels includes loc, landm, landm_valid, conf. targets = decode_tf(labels[0], priors, variances=variances).numpy() for prior_index in range(len(targets)): if targets[prior_index][-1] != 1: continue draw_bbox_landm(img, targets[prior_index], img_dim, img_dim) draw_anchor(img, priors[prior_index], img_dim, img_dim) cv2.imshow('img', cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) if cv2.waitKey(0) == ord('q'): exit() print("data fps: {:.2f}".format(num_samples / (time.time() - start_time)))
label[2] += label[0] label[3] += label[1] print(label) labels = [label] cv2.rectangle(img_raw, (label[0], label[1]), (label[2], label[3]), (0, 255, 0), 2) cv2.imshow('img_raw', img_raw) input_size = 320 steps = [8, 16, 32] min_sizes = [[8, 16], [32, 64], [128, 256]] match_thresh = 0.5 ignore_thresh = 0.3 variances = [0.1, 0.2] # define prior box priors = prior_box((input_size, input_size), min_sizes, steps, True) # img = np.array(img_raw) labels = np.array(labels) img, labels = _transform_data( input_size, True, True, True, priors, match_thresh, ignore_thresh, variances)(img_raw, labels) img = np.array(img, np.int8) img_h, img_w, _ = img.shape # boxes = _decode_bbox(labels[:, :4], priors) for i in range(labels.shape[0]): if labels[i][-1] > 0: box = priors[i] x1 = int(box[0] * img_w - box[2] * img_w / 2) y1 = int(box[1] * img_h - box[3] * img_h / 2)
def train_retinaface(cfg): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' if cfg['distributed']: import horovod.tensorflow as hvd # Initialize Horovod hvd.init() else: hvd = [] os.environ['CUDA_VISIBLE_DEVICES'] = '0' reset_random_seeds() logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth(hvd) # define network model = RetinaFaceModel(cfg, training=True) model.summary(line_length=80) # define prior box priors = prior_box((cfg['input_size'], cfg['input_size']), cfg['min_sizes'], cfg['steps'], cfg['clip']) # load dataset train_dataset = load_dataset(cfg, priors, 'train', hvd) if cfg['evaluation_during_training']: val_dataset = load_dataset(cfg, priors, 'val', []) # define optimizer if cfg['distributed']: init_lr = cfg['init_lr'] * hvd.size() min_lr = cfg['min_lr'] * hvd.size() steps_per_epoch = cfg['dataset_len'] // (cfg['batch_size'] * hvd.size()) else: init_lr = cfg['init_lr'] min_lr = cfg['min_lr'] steps_per_epoch = cfg['dataset_len'] // cfg['batch_size'] learning_rate = MultiStepWarmUpLR( initial_learning_rate=init_lr, lr_steps=[e * steps_per_epoch for e in cfg['lr_decay_epoch']], lr_rate=cfg['lr_rate'], warmup_steps=cfg['warmup_epoch'] * steps_per_epoch, min_lr=min_lr) optimizer = tf.keras.optimizers.SGD( learning_rate=learning_rate, momentum=0.9, nesterov=True) # define losses function multi_box_loss = MultiBoxLoss(num_class=cfg['num_class']) # load checkpoint checkpoint_dir = os.path.join(cfg['output_path'], 'checkpoints', cfg['sub_name']) checkpoint = tf.train.Checkpoint(epoch=tf.Variable(0, name='epoch'), optimizer=optimizer, model=model) manager = tf.train.CheckpointManager(checkpoint=checkpoint, directory=checkpoint_dir, max_to_keep=3) os.makedirs(checkpoint_dir, exist_ok=True) with open(os.path.join(checkpoint_dir, 'cfg.pickle'), 'wb') as handle: pickle.dump(cfg, handle, protocol=pickle.HIGHEST_PROTOCOL) if manager.latest_checkpoint: checkpoint.restore(manager.latest_checkpoint) print('[*] load ckpt from {}'.format(manager.latest_checkpoint)) else: print("[*] training from scratch.") # define training step function @tf.function def train_step(inputs, labels, first_batch, epoch): with tf.GradientTape() as tape: predictions = model(inputs, training=True) losses = {} losses['reg'] = tf.reduce_sum(model.losses) losses['loc'], losses['landm'], losses['class'] = \ multi_box_loss(labels, predictions) total_loss = tf.add_n([l for l in losses.values()]) if cfg['distributed']: # Horovod: add Horovod Distributed GradientTape. tape = hvd.DistributedGradientTape(tape) grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) if cfg['distributed'] and first_batch and epoch: hvd.broadcast_variables(model.variables, root_rank=0) hvd.broadcast_variables(optimizer.variables(), root_rank=0) return total_loss, losses def test_step(inputs, img_name): _, img_height_raw, img_width_raw, _ = inputs.shape # pad input image to avoid unmatched shape problem img = inputs[0].numpy() # if img_name == '6_Funeral_Funeral_6_618': # resize = 0.5 # this image is too big to avoid OOM problem # img = cv2.resize(img, None, None, fx=resize, fy=resize, # interpolation=cv2.INTER_LINEAR) img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) input_img = img[np.newaxis, ...] predictions = model(input_img, training=False) outputs = pred_to_outputs(cfg, predictions, input_img.shape).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) bboxs = outputs[:, :4] confs = outputs[:, -1] pred_boxes = [] for box, conf in zip(bboxs, confs): x = int(box[0] * img_width_raw) y = int(box[1] * img_height_raw) w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw) h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw) pred_boxes.append([x, y, w, h, conf]) pred_boxes = np.array(pred_boxes).astype('float') return pred_boxes #training loop summary_writer = tf.summary.create_file_writer(os.path.join(cfg['output_path'], 'logs', cfg['sub_name'])) prog_bar = ProgressBar(steps_per_epoch, 0) if cfg['evaluation_during_training']: widerface_eval_hard = WiderFaceEval(split='hard') for epoch in range(cfg['epoch']): try: actual_epoch = epoch + 1 if cfg['distributed']: if hvd.rank() == 0: print("\nStart of epoch %d" % (actual_epoch,)) else: print("\nStart of epoch %d" % (actual_epoch,)) checkpoint.epoch.assign_add(1) start_time = time.time() #Iterate over the batches of the dataset. for batch, (x_batch_train, y_batch_train, img_name) in enumerate(train_dataset): total_loss, losses = train_step(x_batch_train, y_batch_train, batch == 0, epoch == 0) if cfg['distributed']: if hvd.rank() == 0: # prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format( # checkpoint.epoch.numpy(), cfg['epoch'], total_loss.numpy(), optimizer._decayed_lr(tf.float32))) if batch % 100 == 0: print("batch={}/{}, epoch={}/{}, loss={:.4f}, lr={:.1e}".format( batch, steps_per_epoch, checkpoint.epoch.numpy(), cfg['epoch'], total_loss.numpy(), optimizer._decayed_lr(tf.float32))) else: prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format( checkpoint.epoch.numpy(), cfg['epoch'], total_loss.numpy(), optimizer._decayed_lr(tf.float32))) # Display metrics at the end of each epoch. # train_acc = train_acc_metric.result() # print("\nTraining loss over epoch: %.4f" % (float(total_loss.numpy()),)) if cfg['distributed']: if hvd.rank() == 0: print("Time taken: %.2fs" % (time.time() - start_time)) manager.save() print("\n[*] save ckpt file at {}".format(manager.latest_checkpoint)) else: print("Time taken: %.2fs" % (time.time() - start_time)) manager.save() print("\n[*] save ckpt file at {}".format(manager.latest_checkpoint)) if cfg['evaluation_during_training']: # Run a validation loop at the end of each epoch. for batch, (x_batch_val, y_batch_val, img_name) in enumerate(val_dataset.take(500)): if '/' in img_name.numpy()[0].decode(): img_name = img_name.numpy()[0].decode().split('/')[1].split('.')[0] else: img_name = [] pred_boxes = test_step(x_batch_val, img_name) gt_boxes = labels_to_boxes(y_batch_val) widerface_eval_hard.update(pred_boxes, gt_boxes, img_name) ap_hard = widerface_eval_hard.calculate_ap() widerface_eval_hard.reset() if cfg['distributed']: if hvd.rank() == 0: print("Validation acc: %.4f" % (float(ap_hard),)) else: print("Validation acc: %.4f" % (float(ap_hard),)) def tensorboard_writer(): with summary_writer.as_default(): tf.summary.scalar('loss/total_loss', total_loss, step=actual_epoch) for k, l in losses.items(): tf.summary.scalar('loss/{}'.format(k), l, step=actual_epoch) tf.summary.scalar('learning_rate', optimizer._decayed_lr(tf.float32), step=actual_epoch) if cfg['evaluation_during_training']: tf.summary.scalar('Val AP', ap_hard, step=actual_epoch) if cfg['distributed']: if hvd.rank() == 0: tensorboard_writer() else: tensorboard_writer() except Exception as E: print(E) continue if cfg['distributed']: if hvd.rank() == 0: manager.save() print("\n[*] training done! save ckpt file at {}".format( manager.latest_checkpoint)) else: manager.save() print("\n[*] training done! save ckpt file at {}".format( manager.latest_checkpoint))