def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_path',
                        help='model file to predict',
                        type=str,
                        required=True)
    parser.add_argument('--image_file',
                        help='image file to predict',
                        type=str,
                        required=True)
    parser.add_argument('--anchors_path',
                        help='path to anchor definitions',
                        type=str,
                        required=True)
    parser.add_argument(
        '--classes_path',
        help='path to class definitions, default ../configs/voc_classes.txt',
        type=str,
        default='../configs/voc_classes.txt')
    parser.add_argument('--loop_count',
                        help='loop inference for certain times',
                        type=int,
                        default=1)

    args = parser.parse_args()

    # param parse
    anchors = get_anchors(args.anchors_path)
    class_names = get_classes(args.classes_path)

    validate_yolo_model_mnn(args.model_path, args.image_file, anchors,
                            class_names, args.loop_count)
Beispiel #2
0
    def __init__(self, FLAGS):
        self.__dict__.update(self._defaults)  # set up default values
        self.backbone = FLAGS['backbone']
        self.opt = FLAGS['opt']
        self.class_names = get_classes(FLAGS['classes_path'])
        self.anchors = get_anchors(FLAGS['anchors_path'])
        self.input_shape = FLAGS['input_size']
        config = tf.ConfigProto()

        if self.opt == OPT.XLA:
            config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
            sess = tf.Session(config=config)
            tf.keras.backend.set_session(sess)
        elif self.opt == OPT.MKL:
            config.intra_op_parallelism_threads = 4
            config.inter_op_parallelism_threads = 4
            sess = tf.Session(config=config)
            tf.keras.backend.set_session(sess)
        elif self.opt == OPT.DEBUG:
            tf.logging.set_verbosity(tf.logging.DEBUG)
            sess = tf_debug.TensorBoardDebugWrapperSession(
                tf.Session(config=tf.ConfigProto(log_device_placement=True)),
                "localhost:6064")
            tf.keras.backend.set_session(sess)
        else:
            sess = tf.keras.backend.get_session()
        self.sess = sess
        self.generate(FLAGS)
def main(_):
    if MODE.STATUS == FLAGS.mode:
        request = get_model_status_pb2.GetModelStatusRequest()
        request.model_spec.name = 'detection'
        request.model_spec.signature_name = 'serving_default'
    elif MODE.CONFIG == FLAGS.mode:
        request = model_management_pb2.ReloadConfigRequest()
        config = request.config.model_config_list.config.add()
        config.name = 'detection'
        config.base_path = '/models/detection/detection'
        config.model_platform = 'tensorflow'
        config.model_version_policy.specific.versions.append(5)
        config.model_version_policy.specific.versions.append(7)
        config2 = request.config.model_config_list.config.add()
        config2.name = 'pascal'
        config2.base_path = '/models/detection/pascal'
        config2.model_platform = 'tensorflow'
    elif MODE.ZOOKEEPER == FLAGS.mode:
        zk = KazooClient(hosts="10.10.67.225:2181")
        zk.start()
        zk.ensure_path('/serving/cunan')
        zk.set(
            '/serving/cunan',
            get_config('detection', 5, 224, 'serving_default',
                       ','.join(get_classes('model_data/cci.names')),
                       "10.12.102.32:8000"))
        return
    for address in FLAGS.addresses:
        channel = grpc.insecure_channel(address)
        stub = model_service_pb2_grpc.ModelServiceStub(channel)
        if MODE.STATUS == FLAGS.mode:
            result = stub.GetModelStatus(request)
        elif MODE.CONFIG == FLAGS.mode:
            result = stub.HandleReloadConfigRequest(request)
        print(result)
Beispiel #4
0
 def __init__(self, **kwargs):
     super(YOLO_np, self).__init__()
     self.__dict__.update(self._defaults)  # set up default values
     self.__dict__.update(kwargs)  # and update with user overrides
     self.class_names = get_classes(self.classes_path)
     self.anchors = get_anchors(self.anchors_path)
     self.colors = get_colors(self.class_names)
     K.set_learning_phase(0)
     self.yolo_model = self._generate_model()
Beispiel #5
0
    def __init__(self, FLAGS):
        self.backbone = FLAGS.get('backbone', BACKBONE.MOBILENETV2)
        self.class_names = get_classes(
            FLAGS.get('classes_path', 'model_data/voc_classes.txt'))
        self.anchors = get_anchors(
            FLAGS.get('anchors_path', 'model_data/yolo_anchors'))
        self.input_shape = FLAGS.get('input_size', (416, 416))
        self.score = FLAGS.get('score', 0.2)
        self.nms = FLAGS.get('nms', 0.5)
        self.with_classes = FLAGS.get('with_classes', False)

        self.generate(FLAGS)
Beispiel #6
0
def translated_gt_if_needed():
    if 'class_translation_path' in train_config and train_config[
            'class_translation_path']:
        print('Translating dataset classes...')
        with open(train_config['class_translation_path'], 'r') as stream:
            class_translation_config = yaml.load(stream)

        with open(train_config['test_path']) as f:
            lines = f.readlines()

        class_names = get_classes(train_config['classes_path'])
        lines = translate_classes(lines, class_names, class_translation_config)
        print(
            'Translation is done. Now we want to save the new translated dataset version.'
        )
        annotation_path_translated = train_config['test_path'].replace(
            '.txt', '_' +
            train_config['class_translation_path'].replace('.yml', '.txt'))
        if os.path.exists(annotation_path_translated):
            print('Seems like this translation has already been done before.')
            already_present_translation_on_disk_lines = open(
                annotation_path_translated, 'r').readlines()
            disk_md5 = calc_annot_lines_md5(
                already_present_translation_on_disk_lines)
            disk_md52 = calc_annot_lines_md5(
                already_present_translation_on_disk_lines)
            current_translated_md5 = calc_annot_lines_md5(lines)
            print('Checking translation version...')
            if disk_md5 == current_translated_md5:
                print(
                    'Disk translation version matches the current generated one. Lets procced to training.'
                )
            else:
                print(
                    'Disk translation version is different from the current one. Seems like the translation code has changed.'
                )
                print(
                    'Do backup the disk annotation translated file and properly document it, and move to some other folder: ',
                    annotation_path_translated)
                raise Exception(
                    'Disk and current class translations missmatch versions. Cannot proceed.'
                )
        else:
            with open(annotation_path_translated, 'w') as output_f:
                print('Writting the new translated annotation file to',
                      annotation_path_translated)
                for annot_line in lines:
                    output_f.write(annot_line + '\n')

        return annotation_path_translated
    else:
        #no translation needed
        return train_config['test_path']
Beispiel #7
0
def main():
    annotation_path = 'create_train_data/train.txt'
    log_dir = os.path.join(
        "trained_model", time.strftime("%Y-%m-%d %H-%M-%S", time.localtime()))
    if os.path.exists("trained_model"):
        os.mkdir(log_dir)
    classes_path = 'model_data/voc_classes.txt'
    anchors_path = 'model_data/yolo_anchors.txt'
    class_names = get_classes(classes_path)
    anchors = get_anchors(anchors_path)
    input_shape = (416, 416)  # multiple of 32, hw
    model = create_model(input_shape, anchors, len(class_names))
    train(model,
          annotation_path,
          input_shape,
          anchors,
          len(class_names),
          log_dir=log_dir)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_path', help='model file to predict', type=str, required=True)
    parser.add_argument('--image_file', help='image file to predict', type=str, required=True)
    parser.add_argument('--anchors_path',help='path to anchor definitions', type=str, required=True)
    parser.add_argument('--classes_path', help='path to class definitions, default ../configs/voc_classes.txt', type=str, default='../configs/voc_classes.txt')
    parser.add_argument('--model_image_size', help='model image input size as <num>x<num>, default 416x416', type=str, default='416x416')
    parser.add_argument('--loop_count', help='loop inference for certain times', type=int, default=1)

    args = parser.parse_args()

    # param parse
    model = load_model(args.model_path, compile=False)
    anchors = get_anchors(args.anchors_path)
    class_names = get_classes(args.classes_path)
    height, width = args.model_image_size.split('x')
    model_image_size = (int(height), int(width))

    validate_yolo_model(model, args.image_file, anchors, class_names, model_image_size, args.loop_count)
Beispiel #9
0
def train(FLAGS):
    """Train yolov3 with different backbone
    """
    prune = FLAGS['prune']
    opt = FLAGS['opt']
    backbone = FLAGS['backbone']
    log_dir = os.path.join(
        'logs',
        str(backbone).split('.')[1].lower() + '_' + str(datetime.date.today()))

    batch_size = FLAGS['batch_size']
    train_dataset_glob = FLAGS['train_dataset']
    val_dataset_glob = FLAGS['val_dataset']
    test_dataset_glob = FLAGS['test_dataset']
    freeze = FLAGS['freeze']
    epochs = FLAGS['epochs'][0] if freeze else FLAGS['epochs'][1]

    class_names = get_classes(FLAGS['classes_path'])
    num_classes = len(class_names)
    anchors = get_anchors(FLAGS['anchors_path'])
    input_shape = FLAGS['input_size']  # multiple of 32, hw
    model_path = FLAGS['model']
    if model_path and model_path.endswith('.h5') is not True:
        model_path = tf.train.latest_checkpoint(model_path)
    lr = FLAGS['learning_rate']
    tpu_address = FLAGS['tpu_address']
    if tpu_address is not None:
        cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
            tpu=tpu_address)
        tf.config.experimental_connect_to_host(cluster_resolver.master())
        tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
        strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver)
    else:
        strategy = tf.distribute.MirroredStrategy(devices=FLAGS['gpus'])
    batch_size = batch_size * strategy.num_replicas_in_sync

    train_dataset_builder = Dataset(train_dataset_glob, batch_size, anchors,
                                    num_classes, input_shape)
    train_dataset, train_num = train_dataset_builder.build(epochs)
    val_dataset_builder = Dataset(val_dataset_glob,
                                  batch_size,
                                  anchors,
                                  num_classes,
                                  input_shape,
                                  mode=DATASET_MODE.VALIDATE)
    val_dataset, val_num = val_dataset_builder.build(epochs)
    map_callback = MAPCallback(test_dataset_glob, input_shape, anchors,
                               class_names)
    tensorboard = tf.keras.callbacks.TensorBoard(write_graph=False,
                                                 log_dir=log_dir,
                                                 write_images=True)
    checkpoint = tf.keras.callbacks.ModelCheckpoint(os.path.join(
        log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                                    monitor='val_loss',
                                                    save_weights_only=True,
                                                    save_best_only=True,
                                                    period=3)
    cos_lr = tf.keras.callbacks.LearningRateScheduler(
        lambda epoch, _: tf.keras.experimental.CosineDecay(lr[1], epochs)
        (epoch).numpy(), 1)
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                      min_delta=0,
                                                      patience=epochs // 5,
                                                      verbose=1)

    loss = [
        YoloLoss(idx, anchors, print_loss=False)
        for idx in range(len(anchors) // 3)
    ]

    adv_config = nsl.configs.make_adv_reg_config(multiplier=0.2,
                                                 adv_step_size=0.2,
                                                 adv_grad_norm='infinity')
    train_dataset = strategy.experimental_distribute_dataset(train_dataset)
    val_dataset = strategy.experimental_distribute_dataset(val_dataset)

    with strategy.scope():
        factory = ModelFactory(tf.keras.layers.Input(shape=(*input_shape, 3)),
                               weights_path=model_path)
        if backbone == BACKBONE.MOBILENETV2:
            model = factory.build(mobilenetv2_yolo_body,
                                  155,
                                  len(anchors) // 3,
                                  num_classes,
                                  alpha=FLAGS['alpha'])
        elif backbone == BACKBONE.DARKNET53:
            model = factory.build(darknet_yolo_body, 185,
                                  len(anchors) // 3, num_classes)
        elif backbone == BACKBONE.EFFICIENTNET:
            model = factory.build(efficientnet_yolo_body,
                                  499,
                                  FLAGS['model_name'],
                                  len(anchors) // 3,
                                  batch_norm_momentum=0.9,
                                  batch_norm_epsilon=1e-3,
                                  num_classes=num_classes,
                                  drop_connect_rate=0.2,
                                  data_format="channels_first")

    # Train with frozen layers first, to get a stable loss.
    # Adjust num epochs to your dataset. This step is enough to obtain a not bad model.
    if freeze is True:
        with strategy.scope():
            model.compile(optimizer=tf.keras.optimizers.Adam(lr[0],
                                                             epsilon=1e-8),
                          loss=loss)
        model.fit(epochs, [
            checkpoint, tensorboard,
            tf.keras.callbacks.LearningRateScheduler((lambda _, lr: lr), 1)
        ], train_dataset, val_dataset)
        model.save_weights(
            os.path.join(
                log_dir,
                str(backbone).split('.')[1].lower() +
                '_trained_weights_stage_1.h5'))
    # Unfreeze and continue training, to fine-tune.
    # Train longer if the result is not good.
    else:
        for i in range(len(model.layers)):
            model.layers[i].trainable = True
        with strategy.scope():
            model.compile(optimizer=tf.keras.optimizers.Adam(lr[1],
                                                             epsilon=1e-8),
                          loss=loss)  # recompile to apply the change
        print('Unfreeze all of the layers.')
        model.fit(epochs, [checkpoint, cos_lr, tensorboard, early_stopping],
                  train_dataset,
                  val_dataset,
                  use_adv=False)
        model.save_weights(
            os.path.join(
                log_dir,
                str(backbone).split('.')[1].lower() +
                '_trained_weights_final.h5'))
Beispiel #10
0
import pymysql
import pymysql.cursors
import os, json, pytz, time, datetime

import numpy as np
from yolo import YOLO
from PIL import Image
from io import BytesIO

from yolo3.utils import get_classes, get_anchors
from imgur_api import upload_photo

annotation_path = os.path.join('model_data', 'anno.txt')
classes_path = os.path.join('model_data', 'sp_classes.txt')
anchors_path = os.path.join('model_data', 'yolo_anchors.txt')
class_names = get_classes(classes_path)
num_classes = len(class_names)
anchors = get_anchors(anchors_path)

input_shape = (416, 416)  # multiple of 32, hw

yolo = YOLO(model_path='single_label.h5',
            classes_path=classes_path,
            anchors_path=anchors_path)

# 載入 line secret key
secretFileContentJson = json.load(
    open("./line_secret_key", "r", encoding="utf8"))

# 設定 Server 啟用細節
app = Flask(__name__, static_url_path="/images", static_folder="./images/")
Beispiel #11
0
def _main():
    import os
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
    from keras import backend as K
    config = tf.ConfigProto()

    # 怀疑是gpu版本用的
    # config.gpu_options.allow_growth = True

    sess = tf.Session(config=config)
    K.set_session(sess)

    # annotation_path = 'dataset/WIDER_train.txt'  # 数据
    # annotation_path = 'VOCdevkit/VOC2010/2010_train_label.txt'
    annotation_path = 'data/all.txt'
    classes_path = 'model_data/my_classes.txt'  # 类别

    log_dir = 'logs/384/'  # 日志文件夹

    pretrained_path = 'logs/000/trained_weights_final.h5'  # 预训练模型
    # pretrained_path = 'logs/000/trained_weights_final.h5'  # 预训练模型
    anchors_path = 'model_data/yolo_anchors.txt'  # anchors

    class_names = get_classes(classes_path)  # 类别列表
    num_classes = len(class_names)  # 类别数
    anchors = get_anchors(anchors_path)  # anchors列表

    input_shape = (288, 384)  # 32的倍数,输入图像
    model = create_model(
        input_shape,
        anchors,
        num_classes,
        freeze_body=2,
        load_pretrained=0,
        weights_path=pretrained_path)  # make sure you know what you freeze

    logging = TensorBoard(log_dir=log_dir)
    checkpoint = ModelCheckpoint(
        log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
        monitor='val_loss',
        save_weights_only=True,
        save_best_only=True,
        period=3)  # 只存储weights,
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.1,
                                  patience=3,
                                  verbose=1)  # 当评价指标不在提升时,减少学习率
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=10,
                                   verbose=1)  # 测试集准确率,下降前终止

    val_split = 0.1  # 训练和验证的比例
    with open(annotation_path) as f:
        lines = f.readlines()
    np.random.seed(47)
    np.random.shuffle(lines)
    np.random.seed(None)
    num_val = int(len(lines) * val_split)  # 验证集数量
    num_train = len(lines) - num_val  # 训练集数量
    """
    把目标当成一个输入,构成多输入模型,把loss写成一个层,作为最后的输出,搭建模型的时候,
    就只需要将模型的output定义为loss,而compile的时候,
    直接将loss设置为y_pred(因为模型的输出就是loss,所以y_pred就是loss),
    无视y_true,训练的时候,y_true随便扔一个符合形状的数组进去就行了。
    """
    if 0:
        model.compile(
            optimizer=Adam(lr=1e-3),
            loss={
                # 使用定制的 yolo_loss Lambda层
                'yolo_loss': lambda y_true, y_pred: y_pred
            })  # 损失函数

        batch_size = 32  # batch尺寸
        print('Train on {} samples, val on {} samples, with batch size {}.'.
              format(num_train, num_val, batch_size))
        history = model.fit_generator(
            data_generator_wrapper(lines[:num_train], batch_size, input_shape,
                                   anchors, num_classes),
            steps_per_epoch=max(1, num_train // batch_size),
            validation_data=data_generator_wrapper(lines[num_train:],
                                                   batch_size, input_shape,
                                                   anchors, num_classes),
            validation_steps=max(1, num_val // batch_size),
            epochs=500,
            initial_epoch=0,
            callbacks=[logging, checkpoint])
        # 存储最终的参数,再训练过程中,通过回调存储
        model.save_weights(log_dir + 'trained_weights_stage_1.h5')

    if 1:  # 全部训练
        for i in range(len(model.layers)):
            model.layers[i].trainable = True

        # 训练初期学习率可以适当大点, 后期可以减小
        model.compile(optimizer=Adam(lr=1e-3),
                      loss={
                          'yolo_loss': lambda y_true, y_pred: y_pred
                      })  # recompile to apply the change
        print('Unfreeze all of the layers.')

        batch_size = 4  # note that more GPU memory is required after unfreezing the body
        print('Train on {} samples, val on {} samples, with batch size {}.'.
              format(num_train, num_val, batch_size))

        history = model.fit_generator(
            data_generator_wrapper(lines[:num_train], batch_size, input_shape,
                                   anchors, num_classes),
            steps_per_epoch=max(1, num_train // batch_size),
            validation_data=data_generator_wrapper(lines[num_train:],
                                                   batch_size, input_shape,
                                                   anchors, num_classes),
            validation_steps=max(1, num_val // batch_size),
            epochs=20,
            initial_epoch=0,
            callbacks=[logging, checkpoint, reduce_lr, early_stopping])
        model.save_weights(log_dir + 'trained_weights_final.h5')

    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(len(loss))
    plt.plot(epochs, loss, label='Training loss')
    plt.plot(epochs, val_loss, label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()

    plt.show()
Beispiel #12
0
def _main(args):
    global lr_base, total_epochs
    lr_base = args.learning_rate
    total_epochs = args.total_epoch

    annotation_file = args.annotation_file
    log_dir = 'logs/000/'
    classes_path = args.classes_path
    class_names = get_classes(classes_path)
    num_classes = len(class_names)
    if args.tiny_version:
        anchors_path = 'configs/tiny_yolo_anchors.txt'
    else:
        anchors_path = 'configs/yolo_anchors.txt'
    anchors = get_anchors(anchors_path)
    print("\nanchors = ", anchors)
    print("\nnum_classes = ", num_classes)

    # get freeze level according to CLI option
    if args.weights_path:
        freeze_level = 0
    else:
        freeze_level = 1

    if args.freeze_level is not None:
        freeze_level = args.freeze_level
        print("\n\nFREEZE LEVEL  = ", freeze_level)

    # callbacks for training process
    logging = TensorBoard(log_dir=log_dir,
                          histogram_freq=0,
                          write_graph=False,
                          write_grads=False,
                          write_images=False,
                          update_freq='batch')
    checkpoint = ModelCheckpoint(
        log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
        monitor='val_loss',
        verbose=1,
        save_weights_only=False,
        save_best_only=True,
        period=5)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  patience=5,
                                  verbose=1,
                                  cooldown=0,
                                  min_lr=1e-10)
    lr_scheduler = LearningRateScheduler(learning_rate_scheduler)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=30,
                                   verbose=1)
    terminate_on_nan = TerminateOnNaN()

    callbacks = [
        logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan
    ]

    # get train&val dataset
    dataset = get_dataset(annotation_file)
    if args.val_annotation_file:
        val_dataset = get_dataset(args.val_annotation_file)
        num_train = len(dataset)
        num_val = len(val_dataset)
        dataset.extend(val_dataset)
    else:
        val_split = args.val_split
        num_val = int(len(dataset) * val_split)
        num_train = len(dataset) - num_val

    # prepare model pruning config
    pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(
        np.int32) * args.total_epoch
    if args.model_pruning:
        pruning_callbacks = [
            sparsity.UpdatePruningStep(),
            sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)
        ]
        callbacks = callbacks + pruning_callbacks

    # prepare optimizer
    optimizer = get_optimizer(args.optimizer, args.learning_rate)

    # get train model
    model = get_yolo3_train_model(args.model_type,
                                  anchors,
                                  num_classes,
                                  weights_path=args.weights_path,
                                  freeze_level=freeze_level,
                                  optimizer=optimizer,
                                  label_smoothing=args.label_smoothing,
                                  model_pruning=args.model_pruning,
                                  pruning_end_step=pruning_end_step)
    # support multi-gpu training
    if args.gpu_num >= 2:
        model = multi_gpu_model(model, gpus=args.gpu_num)
    model.summary()

    # Train some initial epochs with frozen layers first if needed, to get a stable loss.
    input_shape = args.model_image_size
    assert (input_shape[0] % 32 == 0
            and input_shape[1] % 32 == 0), 'Multiples of 32 required'
    batch_size = args.batch_size
    initial_epoch = 0
    epochs = args.init_epoch
    print("Initial training stage")
    print(
        'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
        .format(num_train, num_val, batch_size, input_shape))
    model.fit_generator(data_generator_wrapper(dataset[:num_train], batch_size,
                                               input_shape, anchors,
                                               num_classes),
                        steps_per_epoch=max(1, num_train // batch_size),
                        validation_data=data_generator_wrapper(
                            dataset[num_train:], batch_size, input_shape,
                            anchors, num_classes),
                        validation_steps=max(1, num_val // batch_size),
                        epochs=epochs,
                        initial_epoch=initial_epoch,
                        callbacks=callbacks)

    # Apply Cosine learning rate decay only after
    # unfreeze all layers
    if args.cosine_decay_learning_rate:
        callbacks.remove(reduce_lr)
        callbacks.append(lr_scheduler)

    # Unfreeze the whole network for further training
    # NOTE: more GPU memory is required after unfreezing the body
    print("Unfreeze and continue training, to fine-tune.")
    for i in range(len(model.layers)):
        model.layers[i].trainable = True
    model.compile(optimizer=optimizer,
                  loss={
                      'yolo_loss': lambda y_true, y_pred: y_pred
                  })  # recompile to apply the change

    if args.multiscale:
        # prepare multiscale config
        input_shape_list = get_multiscale_list(args.model_type,
                                               args.tiny_version)
        interval = args.rescale_interval

        # Do multi-scale training on different input shape
        # change every "rescale_interval" epochs
        for epoch_step in range(epochs + interval, args.total_epoch, interval):
            # shuffle train/val dataset for cross-validation
            if args.data_shuffle:
                np.random.shuffle(dataset)

            initial_epoch = epochs
            epochs = epoch_step
            # rescale input only from 2nd round, to make sure unfreeze stable
            if initial_epoch != args.init_epoch:
                input_shape = input_shape_list[random.randint(
                    0,
                    len(input_shape_list) - 1)]
            print(
                'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
                .format(num_train, num_val, batch_size, input_shape))
            model.fit_generator(
                data_generator_wrapper(dataset[:num_train], batch_size,
                                       input_shape, anchors, num_classes),
                steps_per_epoch=max(1, num_train // batch_size),
                validation_data=data_generator_wrapper(dataset[num_train:],
                                                       batch_size, input_shape,
                                                       anchors, num_classes),
                validation_steps=max(1, num_val // batch_size),
                epochs=epochs,
                initial_epoch=initial_epoch,
                callbacks=callbacks)
    else:
        # Do single-scale training
        print(
            'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'
            .format(num_train, num_val, batch_size, input_shape))
        model.fit_generator(data_generator_wrapper(dataset[:num_train],
                                                   batch_size, input_shape,
                                                   anchors, num_classes),
                            steps_per_epoch=max(1, num_train // batch_size),
                            validation_data=data_generator_wrapper(
                                dataset[num_train:], batch_size, input_shape,
                                anchors, num_classes),
                            validation_steps=max(1, num_val // batch_size),
                            epochs=args.total_epoch,
                            initial_epoch=epochs,
                            callbacks=callbacks)

    # Finally store model
    if args.model_pruning:
        model = sparsity.strip_pruning(model)
    model.save(log_dir + 'trained_final.h5')
Beispiel #13
0
def main():
    # class YOLO defines the default value, so suppress any default here
    parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS)
    '''
    Command line options
    '''
    parser.add_argument('--model_path',
                        type=str,
                        required=True,
                        help='path to model weight file')

    parser.add_argument('--anchors_path',
                        type=str,
                        required=True,
                        help='path to anchor definitions')

    parser.add_argument(
        '--classes_path',
        type=str,
        required=True,
        help='path to class definitions, default configs/voc_classes.txt',
        default='configs/voc_classes.txt')

    parser.add_argument('--annotation_file',
                        type=str,
                        required=True,
                        help='annotation txt file to varify')

    parser.add_argument('--eval_type',
                        type=str,
                        help='evaluation type (VOC/COCO), default=VOC',
                        default='VOC')

    parser.add_argument('--iou_threshold',
                        type=float,
                        help='IOU threshold for PascalVOC mAP, default=0.5',
                        default=0.5)

    parser.add_argument(
        '--conf_threshold',
        type=float,
        help=
        'confidence threshold for filtering box in postprocess, default=0.001',
        default=0.001)

    parser.add_argument(
        '--model_image_size',
        type=str,
        help='model image input size as <num>x<num>, default 416x416',
        default='416x416')

    parser.add_argument(
        '--save_result',
        default=False,
        action="store_true",
        help='Save the detection result image in result/detection dir')

    args = parser.parse_args()

    # param parse
    anchors = get_anchors(args.anchors_path)
    class_names = get_classes(args.classes_path)
    height, width = args.model_image_size.split('x')
    model_image_size = (int(height), int(width))

    eval_AP(args.eval_type, args.model_path, args.annotation_file, anchors,
            class_names, args.iou_threshold, args.conf_threshold,
            model_image_size, args.save_result)
Beispiel #14
0
def _main(train_config):
    annotation_path = train_config['train_path']
    log_dir = train_config['log_dir']
    classes_path = train_config['classes_path']
    anchors_path = train_config['anchors_path']
    model_name = train_config['model_name']
    class_names = get_classes(classes_path)
    num_classes = len(class_names)
    print('num_classes', num_classes)
    num_yolo_heads = 3 if model_name in [
        'yolo', 'yolo_infusion'
    ] else 2  #This is the number of specifically YOLO heads which predict bboxes. Not counting infusion, etc.
    print('number of yolo heads', num_yolo_heads)
    anchors = get_anchors(anchors_path, num_yolo_heads)
    freeze_body = 1
    pretrained_weights_path = ARGS.pretrained_weights if ARGS.pretrained_weights else train_config[
        'pretrained_weights_path']
    input_shape = (int(train_config['input_height']),
                   int(train_config['input_width']))

    if model_name in ['tiny_yolo', 'tiny_yolo_infusion']:
        model = create_tiny_model(input_shape,
                                  anchors,
                                  num_classes,
                                  load_pretrained=True,
                                  freeze_body=freeze_body,
                                  weights_path=pretrained_weights_path,
                                  model_name=model_name,
                                  num_yolo_heads=num_yolo_heads)
    else:
        model = create_model(input_shape,
                             anchors,
                             num_classes,
                             load_pretrained=True,
                             freeze_body=freeze_body,
                             weights_path=pretrained_weights_path,
                             model_name=model_name,
                             num_yolo_heads=num_yolo_heads
                             )  # make sure you know what you freeze

    # print(model.summary())

    logging = TensorBoard(log_dir=log_dir, write_grads=True, write_images=True)
    checkpoint = ModelCheckpoint(os.path.join(
        log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                 monitor='val_loss',
                                 save_weights_only=True,
                                 save_best_only=True,
                                 period=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.1,
                                  patience=3,
                                  verbose=1)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=5,
                                   verbose=1)

    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()

    if 'class_translation_path' in train_config and train_config[
            'class_translation_path']:
        print('Translating dataset classes...')
        with open(train_config['class_translation_path'], 'r') as stream:
            class_translation_config = yaml.load(stream)

        lines = translate_classes(lines, class_names, class_translation_config)
        print(
            'Translation is done. Now we want to save the new translated dataset version.'
        )
        annotation_path_translated = annotation_path.replace(
            '.txt', '_' +
            train_config['class_translation_path'].replace('.yml', '.txt'))
        if os.path.exists(annotation_path_translated):
            print('Seems like this translation has already been done before.')
            already_present_translation_on_disk_lines = open(
                annotation_path_translated, 'r').readlines()
            disk_md5 = calc_annot_lines_md5(
                already_present_translation_on_disk_lines)
            disk_md52 = calc_annot_lines_md5(
                already_present_translation_on_disk_lines)
            current_translated_md5 = calc_annot_lines_md5(lines)
            print('Checking translation version...')
            if disk_md5 == current_translated_md5:
                print(
                    'Disk translation version matches the current generated one. Lets procced to training.'
                )
            else:
                print(
                    'Disk translation version is different from the current one. Seems like the translation code has changed.'
                )
                print(
                    'Do backup the disk annotation translated file and properly document it, and move to some other folder: ',
                    annotation_path_translated)
                raise Exception(
                    'Disk and current class translations missmatch versions. Cannot proceed.'
                )
        else:
            with open(annotation_path_translated, 'w') as output_f:
                print('Writting the new translated annotation file to',
                      annotation_path_translated)
                for annot_line in lines:
                    output_f.write(annot_line + '\n')

    np.random.seed(10101)
    np.random.shuffle(lines)
    np.random.seed(None)
    num_val = int(len(lines) * val_split)
    num_train = len(lines) - num_val

    # Train with frozen layers first, to get a stable loss.
    # Adjust num epochs to your dataset. This step is enough to obtain a not bad model.
    batch_size_freezed = train_config['batch_size_freezed']
    epochs_freezed = train_config['epochs_freezed']
    if True and epochs_freezed > 0:
        compile_model(model, model_name)
        print('Train on {} samples, val on {} samples, with batch size {}.'.
              format(num_train, num_val, batch_size_freezed))
        model.fit_generator(
            data_generator_wrapper(lines[:num_train], batch_size_freezed,
                                   input_shape, anchors, num_classes,
                                   model_name, num_yolo_heads),
            steps_per_epoch=max(1, num_train // batch_size_freezed),
            validation_data=data_generator_wrapper(lines[num_train:],
                                                   batch_size_freezed,
                                                   input_shape, anchors,
                                                   num_classes, model_name,
                                                   num_yolo_heads),
            validation_steps=max(1, num_val // batch_size_freezed),
            epochs=epochs_freezed,
            initial_epoch=0,
            callbacks=[logging, checkpoint])
        model.save_weights(os.path.join(log_dir, 'trained_weights_stage_1.h5'))

    # Unfreeze and continue training, to fine-tune.
    # Train longer if the result is not good.
    if True:
        #Unfreeze all layers.
        print('Unfreeze all of the layers.')
        for i in range(len(model.layers)):
            model.layers[i].trainable = True

        #recompile the model once we unfreezed the layers.
        compile_model(model, model_name)

        batch_size_unfreezed = train_config[
            'batch_size_unfreezed']  # note that more GPU memory is required after unfreezing the body
        epochs_unfreezed = train_config['epochs_unfreezed']
        print('Train on {} samples, val on {} samples, with batch size {}.'.
              format(num_train, num_val, batch_size_unfreezed))
        model.fit_generator(
            data_generator_wrapper(lines[:num_train], batch_size_unfreezed,
                                   input_shape, anchors, num_classes,
                                   model_name, num_yolo_heads),
            steps_per_epoch=max(1, num_train // batch_size_unfreezed),
            validation_data=data_generator_wrapper(lines[num_train:],
                                                   batch_size_unfreezed,
                                                   input_shape, anchors,
                                                   num_classes, model_name,
                                                   num_yolo_heads),
            validation_steps=max(1, num_val // batch_size_unfreezed),
            epochs=epochs_freezed + epochs_unfreezed,
            initial_epoch=epochs_freezed,
            callbacks=[logging, checkpoint, reduce_lr, early_stopping])
        model.save_weights(os.path.join(log_dir, 'trained_weights_final.h5'))
Beispiel #15
0
    def train_new_model(data_path):

        # 根据项目名称创建相关文件夹
        prefix = "projects"
        project_name = "new_project"
        if not os.path.exists(os.path.join(prefix, project_name)):
            mkdir_for_newProject(project_name=project_name, prefix=prefix)
        print("Directories of Project:{} have been created!".format(
            project_name))

        # 复制图片到目标文件夹
        if len(
                os.listdir(
                    os.path.join(prefix, project_name, "VOC2007",
                                 "JPEGImages"))) == 0:
            """
            os.path.join(prefix, project_name, "VOC2007", "JPEGImages")为空,将训练数据复制到该文件夹
            """
            count = 1
            for file in os.listdir(data_path):
                if len(str(count)) < 5:
                    new_name = "0" * (5 -
                                      len(str(count))) + str(count) + ".jpg"
                    shutil.copy(
                        os.path.join(data_path, file),
                        os.path.join(prefix, project_name, "VOC2007",
                                     "JPEGImages", new_name))
                count += 1
            print("Train_data have been copied from: {} to {} ".format(
                data_path,
                os.path.join(prefix, project_name, "VOC2007", "JPEGImages")))

        # 标注数据
        if len(os.listdir(os.path.join(prefix, project_name, "VOC2007", "JPEGImages"))) != 0 and \
                len(os.listdir(os.path.join(prefix, project_name, "VOC2007", "Annotations"))) == 0:
            print("Please annotate images of directory: {}".format(
                os.path.join(prefix, project_name, "VOC2007", "JPEGImages")))
        if len(os.listdir(os.path.join(prefix, project_name, "VOC2007", "JPEGImages"))) != 0 and \
                len(os.listdir(os.path.join(prefix, project_name, "VOC2007", "JPEGImages"))) != \
                len(os.listdir(os.path.join(prefix, project_name, "VOC2007", "Annotations"))) :
            a = input("Please verify Annotations have complete,(Y/N)?")
            if a == "N" or a == "n":
                print("please complete Annotations first.")
                exit()
        else:
            print("Annotations have complete!")

        # 生成VOC2007格式数据
        if len(
                os.listdir(
                    os.path.join(prefix, project_name, "VOC2007",
                                 "Annotations"))) != 0:
            voc_xml_to_txt(xmlfilepath=os.path.join(prefix, project_name,
                                                    "VOC2007", "Annotations"),
                           txtsavepath=os.path.join(prefix, project_name,
                                                    "VOC2007", "ImageSets"),
                           trainval_percent=0.8,
                           train_percent=0.8)

        # VOC2007格式数据生成yolo3能用的txt数据
        # 获取类别信息
        sets = [('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
        classes = get_classes(classes_path=config["classes_path"])

        for year, image_set in sets:
            image_ids = open('{}/{}/VOC{}/ImageSets/{}.txt'.format(
                prefix, project_name, year, image_set)).read().strip().split()
            list_file = open(
                '{}/{}/{}.txt'.format(prefix, project_name, image_set), 'w')
            for image_id in image_ids:
                list_file.write('{}/{}/VOC{}/JPEGImages/{}.jpg'.format(
                    prefix, project_name, year, image_id))
                convert_annotation(prefix, project_name, year, image_id,
                                   list_file, classes)
                list_file.write('\n')
            list_file.close()

        # 训练模型
        if config["train_method"] == 1:
            """
            在训练好的权重yolov3.weights基础上进行fing-tune
            """



        if config["train_method"] == 2:
            """
def train(FLAGS):
    """Train yolov3 with different backbone
    """
    prune = FLAGS['prune']
    opt = FLAGS['opt']
    backbone = FLAGS['backbone']
    log_dir = FLAGS['log_directory'] or os.path.join(
        'logs',
        str(backbone).split('.')[1].lower() + str(datetime.date.today()))
    if tf.io.gfile.exists(log_dir) is not True:
        tf.io.gfile.mkdir(log_dir)
    batch_size = FLAGS['batch_size']
    train_dataset_glob = FLAGS['train_dataset']
    val_dataset_glob = FLAGS['val_dataset']
    test_dataset_glob = FLAGS['test_dataset']
    freeze = FLAGS['freeze']
    freeze_step = FLAGS['epochs'][0]
    train_step = FLAGS['epochs'][1]

    if opt == OPT.DEBUG:
        tf.config.experimental_run_functions_eagerly(True)
        tf.debugging.set_log_device_placement(True)
        tf.get_logger().setLevel(tf.logging.DEBUG)
    elif opt == OPT.XLA:
        config = tf.ConfigProto()
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)
        tf.keras.backend.set_session(sess)

    class_names = get_classes(FLAGS['classes_path'])
    num_classes = len(class_names)
    anchors = get_anchors(FLAGS['anchors_path'])
    input_shape = FLAGS['input_size']  # multiple of 32, hw
    model_path = FLAGS['model']
    if model_path and model_path.endswith('.h5') is not True:
        model_path = tf.train.latest_checkpoint(model_path)
    lr = FLAGS['learning_rate']
    tpu_address = FLAGS['tpu_address']
    if tpu_address is not None:
        cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
            tpu=tpu_address)
        tf.config.experimental_connect_to_host(cluster_resolver.master())
        tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
        strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver)
    else:
        strategy = tf.distribute.MirroredStrategy(devices=FLAGS['gpus'])

    batch_size = batch_size * strategy.num_replicas_in_sync

    train_dataset_builder = Dataset(train_dataset_glob, batch_size, anchors,
                                    num_classes, input_shape)
    train_dataset, train_num = train_dataset_builder.build()
    val_dataset_builder = Dataset(val_dataset_glob,
                                  batch_size,
                                  anchors,
                                  num_classes,
                                  input_shape,
                                  mode=DATASET_MODE.VALIDATE)
    val_dataset, val_num = val_dataset_builder.build()
    map_callback = MAPCallback(test_dataset_glob, input_shape, anchors,
                               class_names)
    logging = tf.keras.callbacks.TensorBoard(write_graph=False,
                                             log_dir=log_dir,
                                             write_images=True)
    checkpoint = tf.keras.callbacks.ModelCheckpoint(os.path.join(
        log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                                    monitor='val_loss',
                                                    save_weights_only=False,
                                                    save_best_only=False,
                                                    period=1)
    cos_lr = tf.keras.callbacks.LearningRateScheduler(
        lambda epoch, _: tf.keras.experimental.CosineDecay(lr[1], train_step)
        (epoch - freeze_step).numpy(), 1)
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        min_delta=0,
        patience=(freeze_step + train_step) // 10,
        verbose=0)
    if tf.version.VERSION.startswith('1.'):
        loss = [
            lambda y_true, yolo_output: YoloLoss(
                y_true, yolo_output, 0, anchors, print_loss=True)
        ]
    else:
        loss = [
            YoloLoss(idx, anchors, print_loss=False)
            for idx in range(len(anchors) // 3)
        ]

    with strategy.scope():
        #factory = ModelFactory(tf.keras.layers.Input(shape=(*input_shape, 3)),
        #                       weights_path=model_path)
        factory = ModelFactory(tf.keras.layers.Input(shape=(*input_shape, 3)))
        if backbone == BACKBONE.MOBILENETV2:
            model = factory.build(mobilenetv2_yolo_body,
                                  20,
                                  len(anchors) // 1,
                                  num_classes,
                                  alpha=1.0)
        elif backbone == BACKBONE.DARKNET53:
            model = factory.build(darknet_yolo_body, 185,
                                  len(anchors) // 3, num_classes)
        elif backbone == BACKBONE.EFFICIENTNET:
            FLAGS['model_name'] = 'efficientnet-b4'
            model = factory.build(
                efficientnet_yolo_body,
                20,  # todo
                FLAGS['model_name'],
                len(anchors) // 2,
                batch_norm_momentum=0.9,
                batch_norm_epsilon=1e-3,
                num_classes=num_classes,
                drop_connect_rate=0.2,
                data_format="channels_first")

    if prune:
        from tensorflow_model_optimization.python.core.api.sparsity import keras as sparsity
        end_step = np.ceil(1.0 * train_num / batch_size).astype(
            np.int32) * train_step
        new_pruning_params = {
            'pruning_schedule':
            sparsity.PolynomialDecay(initial_sparsity=0.5,
                                     final_sparsity=0.9,
                                     begin_step=0,
                                     end_step=end_step,
                                     frequency=1000)
        }
        pruned_model = sparsity.prune_low_magnitude(model,
                                                    **new_pruning_params)
        pruned_model.compile(optimizer=tf.keras.optimizers.Adam(lr[0],
                                                                epsilon=1e-8),
                             loss=loss)
        pruned_model.fit(train_dataset,
                         epochs=train_step,
                         initial_epoch=0,
                         steps_per_epoch=max(1, train_num // batch_size),
                         callbacks=[
                             checkpoint, cos_lr, logging, map_callback,
                             early_stopping
                         ],
                         validation_data=val_dataset,
                         validation_steps=max(1, val_num // batch_size))
        model = sparsity.strip_pruning(pruned_model)
        model.save_weights(
            os.path.join(
                log_dir,
                str(backbone).split('.')[1].lower() +
                '_trained_weights_pruned.h5'))
        with zipfile.ZipFile(os.path.join(
                log_dir,
                str(backbone).split('.')[1].lower() +
                '_trained_weights_pruned.h5.zip'),
                             'w',
                             compression=zipfile.ZIP_DEFLATED) as f:
            f.write(
                os.path.join(
                    log_dir,
                    str(backbone).split('.')[1].lower() +
                    '_trained_weights_pruned.h5'))
        return

    # Train with frozen layers first, to get a stable loss.
    # Adjust num epochs to your dataset. This step is enough to obtain a not bad model.
    if freeze is True:
        with strategy.scope():
            model.compile(optimizer=tf.keras.optimizers.Adam(lr[0],
                                                             epsilon=1e-8),
                          loss=loss)
        model.fit(train_dataset,
                  epochs=freeze_step,
                  initial_epoch=0,
                  steps_per_epoch=max(1, train_num // batch_size),
                  callbacks=[logging, checkpoint],
                  validation_data=val_dataset,
                  validation_steps=max(1, val_num // batch_size))
        model.save_weights(
            os.path.join(
                log_dir,
                str(backbone).split('.')[1].lower() +
                '_trained_weights_stage_1.h5'))
    # Unfreeze and continue training, to fine-tune.
    # Train longer if the result is not good.
    else:
        #if 1:
        for i in range(len(model.layers)):
            model.layers[i].trainable = True
        with strategy.scope():
            model.compile(optimizer=tf.keras.optimizers.Adam(lr[1],
                                                             epsilon=1e-8),
                          loss=loss)  # recompile to apply the change
        print('Unfreeze all of the layers.')
        model.fit(
            train_dataset,
            epochs=train_step + freeze_step,
            initial_epoch=freeze_step,
            steps_per_epoch=max(1, train_num // batch_size),
            callbacks=[
                checkpoint,
                cos_lr,
                logging,
                early_stopping  #map_callback
            ],
            validation_data=val_dataset,
            validation_steps=max(1, val_num // batch_size))
        model.save_weights(
            os.path.join(
                log_dir,
                str(backbone).split('.')[1].lower() +
                '_trained_weights_final.h5'))
def _main():

    # 这些路径可以自定义
    fig_path = 'fig/fackoff823.png'
    log_dir = 'logs/fackoff823/'

    # 训练数据及模型参数路径
    classes_path = 'model_data/my_classes.txt'
    anchors_path = 'model_data/320_224.txt'
    annotation_path = 'data/all.txt'

    class_names = get_classes(classes_path)
    num_classes = len(class_names)
    anchors = get_anchors(anchors_path)

    input_shape = (224, 320)  # multiple of 32, hw

    is_tiny_version = len(anchors) == 6  # default setting
    if is_tiny_version:
        model = create_tiny_model(input_shape,
                                  anchors,
                                  num_classes,
                                  freeze_body=2,
                                  load_pretrained=0,
                                  weights_path='others/detect8.5.h5')
    else:
        model = create_model(
            input_shape,
            anchors,
            num_classes,
            freeze_body=2,
            weights_path=
            'logs/yolo_body_mobilenetv2_5_93843/trained_weights_final.h5'
        )  # make sure you know what you freeze

    logging = TensorBoard(log_dir=log_dir)
    checkpoint = ModelCheckpoint(
        log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
        monitor='val_loss',
        save_weights_only=True,
        save_best_only=True,
        period=3)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  patience=3,
                                  verbose=1)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0,
                                   patience=10,
                                   verbose=1)

    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()
    np.random.seed(21552)
    np.random.shuffle(lines)
    np.random.seed(None)
    num_val = int(len(lines) * val_split)
    num_train = len(lines) - num_val

    # Train with frozen layers first, to get a stable loss.
    # Adjust num epochs to your dataset. This step is enough to obtain a not bad model.
    if 0:
        model.compile(
            optimizer=Adam(lr=1e-3),
            loss={
                # use custom yolo_loss Lambda layer.
                'yolo_loss': lambda y_true, y_pred: y_pred
            })

        batch_size = 32
        model.summary()
        print('Train on {} samples, val on {} samples, with batch size {}.'.
              format(num_train, num_val, batch_size))
        history = model.fit_generator(
            data_generator_wrapper(lines[:num_train], batch_size, input_shape,
                                   anchors, num_classes),
            steps_per_epoch=max(1, num_train // batch_size),
            validation_data=data_generator_wrapper(lines[num_train:],
                                                   batch_size, input_shape,
                                                   anchors, num_classes),
            validation_steps=max(1, num_val // batch_size),
            epochs=100,
            initial_epoch=0,
            callbacks=[logging, checkpoint])
        model.save_weights(log_dir + 'trained_weights_stage_1.h5')

    # Unfreeze and continue training, to fine-tune.
    # Train longer if the result is not good.
    if 1:
        model.summary()
        for i in range(len(model.layers)):
            model.layers[i].trainable = True
        # recompile to apply the change
        model.compile(optimizer=Adam(lr=1e-3),
                      loss={
                          'yolo_loss': lambda y_true, y_pred: y_pred
                      })
        print('Unfreeze all of the layers.')

        batch_size = 32  # note that more GPU memory is required after unfreezing the body
        print('Train on {} samples, val on {} samples, with batch size {}.'.
              format(num_train, num_val, batch_size))
        history = model.fit_generator(
            data_generator_wrapper(lines[:num_train], batch_size, input_shape,
                                   anchors, num_classes),
            steps_per_epoch=max(1, num_train // batch_size),
            validation_data=data_generator_wrapper(lines[num_train:],
                                                   batch_size, input_shape,
                                                   anchors, num_classes),
            validation_steps=max(1, num_val // batch_size),
            epochs=100,
            initial_epoch=0,
            callbacks=[logging, checkpoint, reduce_lr, early_stopping])
        model.save_weights(log_dir + 'trained_weights_final.h5')

    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(len(loss))
    plt.plot(epochs, loss, label='Training loss')
    plt.plot(epochs, val_loss, label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()
    plt.savefig(fig_path)
    plt.show()
    required=True,
    default=None,
    type=str,
    help="The suffix to be appended in the output annotation file.")
ap.add_argument("-t",
                "--run_class_translations",
                required=False,
                action="store_true",
                help="Run class translations.")
ARGS = ap.parse_args()

train_config = None
with open(ARGS.config_path, 'r') as stream:
    train_config = yaml.load(stream)

class_names = get_classes(train_config['classes_path'])

for annotation_path in [train_config['train_path'], train_config['test_path']]:
    print('Checking annotation', annotation_path)
    did_modify_something = False
    output_annotation_path = annotation_path.replace(
        '.txt', '_{}.txt'.format(ARGS.output_suffix))

    if os.path.exists(output_annotation_path):
        raise Exception('The output file already exists:',
                        output_annotation_path)

    with open(annotation_path) as f:
        lines = f.readlines()

    if ARGS.run_class_translations:
def train(FLAGS):
    batch_size = FLAGS['batch_size']
    use_tpu = FLAGS['use_tpu']
    class_names = get_classes(FLAGS['classes_path'])
    epochs = FLAGS['epochs'][0]
    input_size = FLAGS['input_size']
    model_path = FLAGS['model']
    backbone = FLAGS['backbone']
    train_dataset_glob = FLAGS['train_dataset']
    val_dataset_glob = FLAGS['val_dataset']
    log_dir = FLAGS['log_directory'] or os.path.join(
        'logs',
        str(backbone).split('.')[1].lower() + str(datetime.date.today()))
    strategy = tf.distribute.MirroredStrategy()
    batch_size = batch_size * strategy.num_replicas_in_sync
    with strategy.scope():
        factory = ModelFactory(weights_path=model_path)
        if backbone == BACKBONE.MOBILENETV2:
            model = factory.build(mobilenetv2,
                                  0,
                                  alpha=1.4,
                                  classes=len(class_names))
        elif backbone == BACKBONE.DARKNET53:
            model = factory.build(darknet_body, 0, classes=len(class_names))
        elif backbone == BACKBONE.EFFICIENTNET:
            model = factory.build(EfficientNet,
                                  0,
                                  classes=len(class_names),
                                  input_shape=(*input_size, 3))
        model.compile(tf.keras.optimizers.Adam(1e-3),
                      loss=tf.keras.losses.sparse_categorical_crossentropy,
                      metrics=[tf.keras.metrics.sparse_categorical_accuracy])
    if use_tpu:
        tpu = tf.contrib.cluster_resolver.TPUClusterResolver()
        tpu_strategy = tf.contrib.tpu.TPUDistributionStrategy(tpu)
        model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=tpu_strategy)

    train_dataset, train_num = BackboneDataset(
        train_dataset_glob,
        batch_size,
        num_classes=len(class_names),
        input_shapes=input_size).build()
    val_dataset, val_num = BackboneDataset(val_dataset_glob,
                                           batch_size,
                                           num_classes=len(class_names),
                                           input_shapes=input_size).build()

    cos_lr = tf.keras.callbacks.LearningRateScheduler(
        lambda epoch, _: tf.train.cosine_decay(1e-3, epoch, epochs)().numpy(),
        1)
    logging = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                             write_images=True)
    checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(
        log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                                    save_weights_only=True,
                                                    verbose=1,
                                                    period=3)
    model.fit(train_dataset,
              epochs=epochs,
              steps_per_epoch=max(1, train_num // batch_size),
              validation_data=val_dataset,
              validation_steps=max(1, val_num // batch_size),
              callbacks=[cos_lr, logging, checkpoint])
    model.save_weights(
        os.path.join(
            log_dir,
            str(backbone).split('.')[1].lower() + '_trained_weights.h5'))