Example #1
0
def main(_argv):
    model = YoloV3(FLAGS.size, training=True)
    anchors = yolo_anchors
    anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes)
    train_dataset = train_dataset.shuffle(buffer_size=1024)  # TODO: not 1024
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [YoloLoss(anchors[mask]) for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            logging.info("{}, train: {}".format(epoch,
                                                avg_loss.result().numpy()))

            avg_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
Example #2
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Example #3
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')

    # Setup
    if FLAGS.multi_gpu:
        for physical_device in physical_devices:
            tf.config.experimental.set_memory_growth(physical_device, True)

        strategy = tf.distribute.MirroredStrategy()
        print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        BATCH_SIZE = FLAGS.batch_size * strategy.num_replicas_in_sync
        FLAGS.batch_size = BATCH_SIZE

        with strategy.scope():
            model, optimizer, loss, anchors, anchor_masks = setup_model()
    else:
        model, optimizer, loss, anchors, anchor_masks = setup_model()

    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    else:
        train_dataset = dataset.load_fake_dataset()
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    else:
        val_dataset = dataset.load_fake_dataset()
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        start_time = time.time()
        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
        end_time = time.time() - start_time
        print(f'Total Training Time: {end_time}')
Example #4
0
def main(_argv):
    if FLAGS.mode == "eager_tf":
        tf.compat.v1.enable_eager_execution()

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    if FLAGS.trace:
        run_options = tf.compat.v1.RunOptions(
            output_partition_graphs=True,
            trace_level=tf.compat.v1.RunOptions.FULL_TRACE)
        run_metadata = tf.compat.v1.RunMetadata()
        trace_dir = os.path.join("traces", "training")
        if not os.path.isdir(trace_dir):
            os.makedirs(trace_dir)
        graphs_dir = os.path.join("traces", "training", "graphs")
        if not os.path.isdir(graphs_dir):
            os.makedirs(graphs_dir)
    else:
        run_options = None
        run_metadata = None

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.repeat()
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    val_dataset = val_dataset.repeat()

    # TF2 doesn't need this, but we're using TF1.15.
    if FLAGS.mode == "fit":
        sess = tf.keras.backend.get_session()
        sess.run(tf.compat.v1.global_variables_initializer(),
                 options=run_options,
                 run_metadata=run_metadata)
        if FLAGS.trace:
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(os.path.join(trace_dir, f"variables_init.json"),
                      'w') as f:
                f.write(chrome_trace)
            for i in range(len(run_metadata.partition_graphs)):
                with open(
                        os.path.join(graphs_dir,
                                     f"variables_init_partition_{i}.pbtxt"),
                        'w') as f:
                    f.write(str(run_metadata.partition_graphs[i]))

        sess.run(tf.compat.v1.tables_initializer(),
                 options=run_options,
                 run_metadata=run_metadata)
        if FLAGS.trace:
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(os.path.join(trace_dir, f"table_init.json"), 'w') as f:
                f.write(chrome_trace)
            for i in range(len(run_metadata.partition_graphs)):
                with open(
                        os.path.join(graphs_dir,
                                     f"table_init_partition_{i}.pbtxt"),
                        'w') as f:
                    f.write(str(run_metadata.partition_graphs[i]))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'),
                      options=run_options,
                      run_metadata=run_metadata)

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
        ]

        class TraceCallback(tf.keras.callbacks.Callback):
            def on_epoch_begin(self, epoch, logs=None):
                self.current_epoch = epoch

            def on_train_batch_end(self, batch, logs=None):
                fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                chrome_trace = fetched_timeline.generate_chrome_trace_format()
                with open(
                        os.path.join(
                            trace_dir,
                            f"training_epoch_{self.current_epoch}_batch_{batch}.json"
                        ), 'w') as f:
                    f.write(chrome_trace)
                # No need to dump graph partitions for every batch; they should be identical.
                if batch == 0:
                    for i in range(len(run_metadata.partition_graphs)):
                        with open(
                                os.path.join(graphs_dir,
                                             f"training_partition_{i}.pbtxt"),
                                'w') as f:
                            f.write(str(run_metadata.partition_graphs[i]))

        if FLAGS.trace:
            callbacks.append(TraceCallback())
        else:
            callbacks.append(TensorBoard(write_graph=False, log_dir="logs"))

        history = model.fit(
            train_dataset,
            epochs=FLAGS.epochs,
            callbacks=callbacks,
            validation_data=val_dataset,
            steps_per_epoch=FLAGS.num_samples // FLAGS.batch_size,
            validation_steps=FLAGS.num_val_samples // FLAGS.batch_size)
Example #5
0
def main(_argv):
    # GPU设置
    physical_devices = tf.config.experimental.list_physical_devices('GPU')    # 获取所有物理GPU
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)       # 打开内存增长

    # 模型初始化
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # 加载数据集
    # 训练集
    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(    # 这里也做了resize,和后面的resize调用了一样的函数,后面的
            FLAGS.dataset, FLAGS.classes, FLAGS.size)     # resize应该删除,且这里的resize应该修改为不让图片失真的resize
    num_of_data = 0
    for _ in train_dataset:
        num_of_data += 1
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),          # 做resize和像素值小数化,这里的具体做法可能需要修改
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    # 验证集
    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(
            FLAGS.val_dataset, FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # 迁移学习
    if FLAGS.transfer == 'none':                            # 不进行迁移学习
        pass
    elif FLAGS.transfer in ['darknet', 'no_output',
                            'no_output_no_freeze']:         # 只迁移某些层的参数并将这些层冻结
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(
                FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(
                FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':                         # 加载darknet层的参数并冻结
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':                     # 加载除输出层以外的参数并冻结
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)
        else:                                                   # 加载除输出层以外的参数并且不冻结
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(model_pretrained.get_layer(l.name).get_weights())

    else:                                                   # 迁移整个网络的所有参数并冻结某些层
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':                       # 迁移整个网络所有参数并冻结yolo_darknet
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':                        # 迁移整个网络所有参数并冻结所有参数
            freeze_all(model)
        elif FLAGS.transfer == 'continue':                      # 迁移整个网络进行训练
            pass

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)

    # loss: [高层loss, 中层loss函数, 低层loss]
    loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes)
            for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':

        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)

        # 用于写日志文件
        num_of_batch = int(np.ceil(num_of_data / FLAGS.batch_size))
        logging.info("num of data: {}, batch size: {}, num of batch: {}".format(
                num_of_data, FLAGS.batch_size, num_of_batch))
        train_summary_writer = tf.summary.create_file_writer('logs/train')

        for epoch in range(FLAGS.epochs):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:

                    # 正则化损失
                    regularization_loss = tf.reduce_sum(model.losses)

                    # 预测损失
                    # outputs: (高层output, 中层output, 底层output),格式为:
                    #   ((batch_size, h2, w2, num_anchors0, num_class+5),
                    #    (batch_size, h2*2, w2*2, num_anchors1, num_class+5)
                    #    (batch_size, h2*4, w2*4, num_anchors2, num_class+5))
                    # labels: 由GT框得到的target,格式为:
                    #   (高层target, 中层target, 低层target)
                    #   (batch_size, grid_size, grid_size, num_anchors, [x1, y1, x2, y2, 1, class])
                    # loss: [高层loss, 中层loss函数, 低层loss]
                    #     loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes)
                    #             for mask in anchor_masks]
                    # pred_loss: [(batch,), (batch,), (batch,)]
                    pred_loss = []
                    outputs = model(images, training=True)
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    loss_without_reg = tf.reduce_sum(pred_loss)

                    # 带正则项的损失
                    total_loss = loss_without_reg + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads, model.trainable_variables))

                logging.info("epoch_{}_batch_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

                # 每个batch记录一次训练集上的loss
                with train_summary_writer.as_default():
                    tf.summary.scalar('loss', loss_without_reg.numpy(), step=(epoch * num_of_batch + batch))

                # 定期保存checkpoint
                    model.save_weights('checkpoints/yolov3_{}_{}.tf'.format(epoch, batch))

                # 定期计算mAP

            avg_loss.reset_states()
    else:
        model.compile(optimizer=optimizer, loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1, save_weights_only=True, save_freq=500),
            TensorBoard(log_dir='logs', update_freq=10)
        ]

        # history.history是一个字典,存放着训练过程的loss和其他metrics
        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Example #6
0
def main(_argv):
    # Horovod: initialize Horovod.
    hvd.init()

    # Horovod: pin GPU to be used to process local rank (one GPU per process)
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    if gpus:
        tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()],
                                                   'GPU')

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes)
    train_dataset = train_dataset.shuffle(buffer_size=1024)  # TODO: not 1024
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))

    if FLAGS.transfer != 'none':
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.mode == 'frozen':
            # freeze everything
            freeze_all(model)
        else:
            # reset top layers
            if FLAGS.tiny:  # get initial weights
                init_model = YoloV3Tiny(FLAGS.size, training=True)
            else:
                init_model = YoloV3(FLAGS.size, training=True)

            if FLAGS.transfer == 'darknet':
                for l in model.layers:
                    if l.name != 'yolo_darknet' and l.name.startswith('yolo_'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)
            elif FLAGS.transfer == 'no_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)

    # Horovod: adjust learning rate based on number of GPUs.
    optimizer = tf.optimizers.Adam(FLAGS.learning_rate * hvd.size())
    # Horovod: add Horovod DistributedOptimizer.

    ###############################################
    loss = [YoloLoss(anchors[mask]) for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(
                    train_dataset.take(5717 // hvd.size())):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                # Horovod: add Horovod Distributed GradientTape.
                tape = hvd.DistributedGradientTape(tape)

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))
                # Horovod: broadcast initial variable states from rank 0 to all other processes.
                # This is necessary to ensure consistent initialization of all workers when
                # training is started with random weights or restored from a checkpoint.
                #
                # Note: broadcast should be done after the first gradient step to ensure optimizer
                # initialization.
                if batch == 0:
                    hvd.broadcast_variables(model.variables, root_rank=0)
                    hvd.broadcast_variables(optimizer.variables(), root_rank=0)

                #############################
                if hvd.rank() == 0:
                    logging.info("{}_train_{}, {}, {}".format(
                        epoch, batch, total_loss.numpy(),
                        list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                ###########################
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                if hvd.rank() == 0:
                    logging.info("{}_val_{}, {}, {}".format(
                        epoch, batch, total_loss.numpy(),
                        list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)
            if hvd.rank() == 0:
                logging.info("{}, train: {}, val: {}".format(
                    epoch,
                    avg_loss.result().numpy(),
                    avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            if hvd.rank() == 0:
                model.save_weights(
                    'checkpoints/horovod_yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Example #7
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    # anchors是固定的,每一层每个anchors对应固定的3个anchors boxes,共三层
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # 目的是什么???不清楚
    train_dataset = dataset.load_fake_dataset()

    # 载入训练数据,生成dataset.map,进行预处理
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(
            FLAGS.dataset, FLAGS.classes, FLAGS.size)

    # 训练数据打乱
    train_dataset = train_dataset.shuffle(buffer_size=512)
    # 训练数据设置batch大小
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    # 训练数据匹配anchor,做map预处理
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    # 训练数据使用多线程并行计算预处理,自动设置为最大的可用线程数,机器算力拉满
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    # 同train_dataset
    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(
            FLAGS.val_dataset, FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))


    # Configure the model for transfer learning 为迁移学习配置模型,所谓迁移,就是利用yolo的结构做为预训练模型
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers 载入预训练模型的预处理数据
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(
                FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(
                FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
        # 载入预训练模型权重
        model_pretrained.load_weights(FLAGS.weights)

        # 载入backbone及其参数权重,即darknet,做为预训练模型的主干,训练过程中对除backbone及其参数权重以外的参数做训练
        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        # 载入预训练模型的全部,除了输出部分,即nms部分,训练过程只对输出部分(nms)参数做训练
        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(model_pretrained.get_layer(
                        l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        # 载入backbone,即darknet,做为预训练模型的主干,训练过程中不改变主干backbone的结构,对全网络参数做训练
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        # 载入全部模型,整体做为预处理模型,训练过程中,不做任何改变
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    # 优化器设置
    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    # 损失函数设置
    loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes)
            for mask in anchor_masks]

    # Eager Mode(动态图模式),便于可以得到即时的反馈,用于训练的时候便于观察变化
    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        # 损失函数的均值观测
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                # 根据训练数据损失函数的反馈值,逐步优化梯度,进而优化模型参数
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(
                    zip(grads, model.trainable_variables))

                # 日志展示损失loss的变化
                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            # val同上train的部分
            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            # 日志展示一整个epoch后的train和val的最终loss
            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            # 展示完成,复位,重置
            avg_loss.reset_states()
            avg_val_loss.reset_states()

            # 将本次训练结束后得到的模型参数保存并输出
            model.save_weights(
                'checkpoints/yolov3_train_{}.tf'.format(epoch))

    # 如果不需要观测实时反馈变化,那么就后台训练,日志端不会看到任何信息
    else:
        # 模型配置器
        model.compile(optimizer=optimizer, loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'),
                      metrics=['accuracy'])

        # 自定义模型控制器,创建一个保存模型权重的回调
        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1, save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        # # period = 2, 表示每隔1个epoch保存一次checkpoint
        # callbacks = [
        #     ReduceLROnPlateau(verbose=1),
        #     EarlyStopping(patience=3, verbose=1),
        #     ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
        #                     verbose=1, save_weights_only=True, period = 2),
        #     TensorBoard(log_dir='logs')
        # ]

        # 模型训练,使用新的回调训练模型
        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset,
                            validation_freq=1)
Example #8
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tf_record(
            tfrecord=FLAGS.dataset,
            mode=tf.estimator.ModeKeys.TRAIN,
            class_file=FLAGS.classes,
            anchors=anchors,
            anchor_masks=anchor_masks,
            batch_size=FLAGS.batch_size,
            max_detections=FLAGS.yolo_max_boxes,
            size=FLAGS.size,
            augmentation=FLAGS.augmentation)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tf_record(
            tfrecord=FLAGS.val_dataset,
            mode=tf.estimator.ModeKeys.EVAL,
            class_file=FLAGS.classes,
            anchors=anchors,
            anchor_masks=anchor_masks,
            batch_size=FLAGS.batch_size,
            max_detections=FLAGS.yolo_max_boxes,
            size=FLAGS.size,
            augmentation=False)

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    train_log_dir = './logs/train'
    val_log_dir = './logs/valid'
    clear_directory(train_log_dir, clear_subdirectories=True)
    clear_directory(val_log_dir, clear_subdirectories=True)
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    val_summary_writer = tf.summary.create_file_writer(val_log_dir)

    # Define checkpoint handler: track macro mAP
    ckpt_handler = BestEpochCheckpoint(model,
                                       './checkpoints/',
                                       10,
                                       min_delta=0.005,
                                       mode='max')

    # Eager mode is great for debugging
    # Non eager graph mode is recommended for real training
    avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
    avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

    # Training
    for epoch in range(1, FLAGS.epochs + 1):
        for batch, (images, labels) in enumerate(train_dataset):
            images, filenames = images
            with tf.GradientTape() as tape:
                outputs = model(images, training=True)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

            grads = tape.gradient(total_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

            logging.info("{}_train_{}, {}, {}".format(
                epoch, batch, total_loss.numpy(),
                list(map(lambda x: np.sum(x.numpy()), pred_loss))))
            avg_loss.update_state(total_loss)

        with train_summary_writer.as_default():
            tf.summary.scalar('Avg_loss', avg_loss.result(), step=epoch)

        all_annotations = []
        all_detections = []

        for batch, (images, labels, int_labels) in enumerate(val_dataset):
            images, filenames = images
            outputs = model(images)
            boxes, scores, classes, valid_detections = convert_yolo_output(
                outputs[0],
                outputs[1],
                outputs[2],
                anchors=anchors,
                anchor_masks=anchor_masks,
                num_classes=FLAGS.num_classes,
                max_boxes=FLAGS.yolo_max_boxes,
                iou_threshold=FLAGS.yolo_iou_threshold,
                score_threshold=FLAGS.yolo_score_threshold)
            all_annotations = create_annotations(all_annotations, int_labels,
                                                 FLAGS.num_classes)
            all_detections = create_detections(all_detections, boxes, scores,
                                               classes, valid_detections,
                                               FLAGS.num_classes)
            regularization_loss = tf.reduce_sum(model.losses)
            pred_loss = []
            for output, label, loss_fn in zip(outputs, labels, loss):
                pred_loss.append(loss_fn(label, output))
            total_loss = tf.reduce_sum(pred_loss) + regularization_loss

            logging.info("{}_val_{}, {}, {}".format(
                epoch, batch, total_loss.numpy(),
                list(map(lambda x: np.sum(x.numpy()), pred_loss))))
            avg_val_loss.update_state(total_loss)

        ap_val = average_precisions(all_detections, all_annotations,
                                    FLAGS.num_classes,
                                    FLAGS.yolo_iou_threshold)

        micro_map, macro_map = calculate_map(ap_val)
        logging.info(
            "{}, train: {}, val: {}, micro_map: {}, macro_map:{}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy(), micro_map, macro_map))
        print('micro_map: ', micro_map)
        print('macro_map: ', micro_map)

        with val_summary_writer.as_default():
            tf.summary.scalar('Avg_loss', avg_val_loss.result(), step=epoch)
            tf.summary.scalar('Micro_mAP', micro_map, step=epoch)
            tf.summary.scalar('Macro_mAP', macro_map, step=epoch)

        ckpt_handler.on_epoch_end(epoch=epoch, current_monitor=macro_map)

        avg_loss.reset_states()
        avg_val_loss.reset_states()
Example #9
0
def main(_argv):
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True, classes=49)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    elif FLAGS.custom:
        model = Custom(FLAGS.size, training=True)
        anchors = custom_anchors
        anchor_masks = custom_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes)
    train_dataset = train_dataset.shuffle(buffer_size=1024)  # TODO: not 1024
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 49)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 49)))

    if FLAGS.transfer != 'none':
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.mode == 'frozen':
            # freeze everything
            freeze_all(model)
        else:
            # reset top layers
            if FLAGS.tiny:  # get initial weights
                init_model = YoloV3Tiny(FLAGS.size, training=True)
            elif FLAGS.custom:
                init_model = Custom(FLAGS.size, training=True)
            else:
                init_model = YoloV3(FLAGS.size, training=True)

            if FLAGS.transfer == 'darknet':
                for l in model.layers:
                    if l.name != 'yolo_darknet' and l.name.startswith('yolo_'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)
            elif FLAGS.transfer == 'no_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [YoloLoss(anchors[mask], classes=49) for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=30, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    check_weighs_exist(tiny=FLAGS.tiny)

    if FLAGS.tiny:
        model = YoloV3Tiny(
            FLAGS.size,
            training=True,
            classes=FLAGS.num_classes
        )
        model.summary()
        plot_model(model, to_file='yoloV3Tiny-model-plot.png', show_shapes=True, show_layer_names=True)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(
            FLAGS.size,
            training=True,
            classes=FLAGS.num_classes
        )
        model.summary()
        plot_model(model, to_file='yoloV3-model-plot.png', show_shapes=True, show_layer_names=True)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # Load the dataset
    train_dataset = dataset.load_fake_dataset()

    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(
            file_pattern=FLAGS.dataset,
            class_file=FLAGS.classes,
            size=FLAGS.size
        )
    # Shuffle the dataset
    train_dataset = train_dataset.shuffle(buffer_size=FLAGS.buffer_size, reshuffle_each_iteration=True)
    train_dataset_length = [i for i, _ in enumerate(train_dataset)][-1] + 1
    print(f"Dataset for training consists of {train_dataset_length} images.")

    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (dataset.transform_images(x, FLAGS.size),
                                                    dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))).repeat()
    train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(
            FLAGS.val_dataset, FLAGS.classes, FLAGS.size)

    val_dataset_length = [i for i, _ in enumerate(val_dataset)][-1] + 1
    print(f"Dataset for validation consists of {val_dataset_length} images.")
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (dataset.transform_images(x, FLAGS.size),
                                                dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))).repeat()

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes
        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(
                size=FLAGS.size,
                training=True,
                classes=FLAGS.weights_num_classes or FLAGS.num_classes)
            model_pretrained.load_weights(FLAGS.weights_tf_format_tiny)

        else:
            model_pretrained = YoloV3(
                size=FLAGS.size,
                training=True,
                classes=FLAGS.weights_num_classes or FLAGS.num_classes)
            model_pretrained.load_weights(FLAGS.weights_tf_format)

        if FLAGS.transfer == 'darknet':
            # Set yolo darknet layer weights to the loaded pretrained model weights
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            # Freeze these layers
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for i in model.layers:
                if not i.name.startswith('yolo_output'):
                    i.set_weights(model_pretrained.get_layer(
                        i.name).get_weights())
                    freeze_all(i)

    else:
        # All other transfer require matching classes
        if FLAGS.tiny:
            model.load_weights(FLAGS.weights_tf_format_tiny)
        else:
            model.load_weights(FLAGS.weights_tf_format)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    # Use the Adam optimizer with the specified learning rate
    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)

    # YoloLoss function
    loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        print(f"Mode is: {FLAGS.mode}")
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)

                    regularization_loss = tf.reduce_sum(model.losses)

                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads, model.trainable_variables))

                logging.info(f"epoch_{epoch}_train_batch_{batch},"
                             f"{total_loss.numpy()},"
                             f"{list(map(lambda x: np.sum(x.numpy()), pred_loss))}")
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch,
                    batch,
                    total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()),
                             pred_loss)))
                )
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights(f'checkpoints/{data_set}_tiny_{FLAGS.tiny}_im_size_{FLAGS.size}.tf')
    else:
        print(f"Compiling the model")
        model.compile(
            optimizer=optimizer,
            loss=loss,
            run_eagerly=(FLAGS.mode == 'eager_fit'),
            metrics=['accuracy'])

    callbacks = [
        EarlyStopping(monitor='val_loss',
                      patience=125,
                      verbose=1),
        ReduceLROnPlateau(monitor='val_loss',
                          verbose=1,
                          factor=0.90,
                          min_lr=0,
                          patience=20,
                          mode="auto"),
        ModelCheckpoint(
            str(f'checkpoints/{data_set}_tiny_{FLAGS.tiny}_im_size_{FLAGS.size}.tf'),
            verbose=1,
            save_weights_only=True,
            save_best_only=True,
            mode="auto",
        ),
        TensorBoard(log_dir='logs'),
        CSVLogger(f'checkpoints/logs/{data_set}_tiny_{FLAGS.tiny}_im_size_{FLAGS.size}',
                  separator=',')
    ]
    history = model.fit(train_dataset,
                        epochs=FLAGS.epochs,
                        verbose=2,
                        callbacks=callbacks,
                        validation_data=val_dataset,
                        steps_per_epoch=np.ceil(train_dataset_length / FLAGS.batch_size),
                        validation_steps=np.ceil(val_dataset_length / FLAGS.batch_size))
Example #11
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    post_process_outputs = post_process_block(model.outputs,
                                              classes=FLAGS.num_classes)
    post_process_model = Model(model.inputs, post_process_outputs)

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size), y))
    # dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y:
                                  (dataset.transform_images(x, FLAGS.size), y))
    # dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)
    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    # (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls))
    # model.outputs shape: [[N, 13, 13, 3, 85], [N, 26, 26, 3, 85], [N, 52, 52, 3, 85]]
    # labels shape: ([N, 13, 13, 3, 6], [N, 26, 26, 3, 6], [N, 52, 52, 3, 6])
    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    transf_labels = dataset.transform_targets(
                        labels, anchors, anchor_masks, FLAGS.size)
                    for output, label, loss_fn in zip(outputs, transf_labels,
                                                      loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss,
                                               axis=None) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                log_batch(logging, epoch, batch, total_loss, pred_loss)
                avg_loss.update_state(total_loss)

                if batch >= 100:
                    break

            true_pos_total = np.zeros(FLAGS.num_classes)
            false_pos_total = np.zeros(FLAGS.num_classes)
            n_pos_total = np.zeros(FLAGS.num_classes)
            for batch, (images, labels) in enumerate(val_dataset):
                # get losses
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                transf_labels = dataset.transform_targets(
                    labels, anchors, anchor_masks, FLAGS.size)
                for output, label, loss_fn in zip(outputs, transf_labels,
                                                  loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                log_batch(logging, epoch, batch, total_loss, pred_loss)
                avg_val_loss.update_state(total_loss)

                # get true positives, false positives, and positive labels
                preds = post_process_model(images)
                true_pos, false_pos, n_pos = batch_true_false_positives(
                    preds.numpy(), labels.numpy(), FLAGS.num_classes)
                true_pos_total += true_pos
                false_pos_total += false_pos
                n_pos_total += n_pos

                if batch >= 20:
                    break

            # precision-recall by class
            precision, recall = batch_precision_recall(true_pos_total,
                                                       false_pos_total,
                                                       n_pos_total)
            for c in range(FLAGS.num_classes):
                print('Class {} - Prec: {}, Rec: {}'.format(
                    c, precision[c], recall[c]))
            # total precision-recall
            print('Total - Prec: {}, Rec: {}'.format(
                calc_precision(np.sum(true_pos_total),
                               np.sum(false_pos_total)),
                calc_recall(np.sum(true_pos_total), np.sum(n_pos_total))))
            import pdb
            pdb.set_trace()

            # log losses
            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            # reset loop and save weights
            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights(
                os.path.join(FLAGS.checkpoint_dir, 'yolov3_train_{}.tf'\
                    .format(epoch)))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint(os.path.join(FLAGS.checkpoint_dir,
                                         'yolov3_train_{epoch}.tf'),
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir=FLAGS.log_dir)
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Example #12
0
def main(_argv):

    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
    except:
        tpu = None
    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
    else:
        strategy = tf.distribute.get_strategy()
    #print("REPLICAS: ", strategy.num_replicas_in_sync)
    FLAGS.batch_size = FLAGS.batch_size * strategy.num_replicas_in_sync
    with strategy.scope():
        if FLAGS.tiny:
            model = YoloV3Tiny(FLAGS.size,
                               training=True,
                               classes=FLAGS.num_classes)
            anchors = yolo_tiny_anchors
            anchor_masks = yolo_tiny_anchor_masks
        else:
            model = YoloV3(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
            anchors = yolo_anchors
            anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=FLAGS.buffer_size)
    train_dataset = train_dataset.batch(FLAGS.batch_size, drop_remainder=True)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size, drop_remainder=True)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    if FLAGS.optimizer == 'Adam':
        optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'nAdam':
        optimizer = tf.keras.optimizers.Nadam(lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'Adagrad':
        optimizer = tf.keras.optimizers.Adagrad(lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'RMSprop':
        optimizer = tf.keras.optimizers.RMSprop(lr=FLAGS.learning_rate,
                                                rho=0.9)

    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))
        if tpu:
            callbacks = [
                ReduceLROnPlateau(verbose=1),
                EarlyStopping(patience=3, verbose=1),
                ModelCheckpoint(
                    'yolov3_train_{epoch}.h5',
                    save_weights_only=True,
                    verbose=1,
                    period=FLAGS.period
                )  #, monitor='val_loss', mode='min', save_best_only=True), #1000
            ]
        else:
            callbacks = [
                ReduceLROnPlateau(verbose=1),
                #EarlyStopping(patience=3, verbose=1),
                ModelCheckpoint('./checkpoints/yolov3_train_{epoch}.tf',
                                verbose=1,
                                save_weights_only=True,
                                period=FLAGS.period),  #1000
                TensorBoard(log_dir='logs')
            ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Example #13
0
def main(_argv):
    # Change flag values
    if FLAGS.height is None:
        FLAGS.height = FLAGS.size
    if FLAGS.width is None:
        FLAGS.width = FLAGS.size
    size = (FLAGS.height, FLAGS.width)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        model = YoloV3Tiny(size,
                           training=True,
                           classes=FLAGS.num_classes,
                           recurrent=FLAGS.recurrent)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(size,
                       training=True,
                       classes=FLAGS.num_classes,
                       recurrent=FLAGS.recurrent)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes, size)
    else:
        train_dataset = dataset.load_fake_dataset()
    train_dataset = train_dataset.shuffle(buffer_size=8)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (dataset.transform_images(
        x, size), dataset.transform_targets(y, anchors, anchor_masks, size)))
    if FLAGS.recurrent:
        train_dataset = train_dataset.map(
            lambda x, y: (dataset.get_recurrect_inputs(
                x, y, anchors, anchor_masks, FLAGS.num_classes), y))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, size)
    else:
        val_dataset = dataset.load_fake_dataset()
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (dataset.transform_images(
        x, size), dataset.transform_targets(y, anchors, anchor_masks, size)))
    if FLAGS.recurrent:
        val_dataset = val_dataset.map(
            lambda x, y: (dataset.get_recurrect_inputs(
                x, y, anchors, anchor_masks, FLAGS.num_classes), y))

    # Configure the model for transfer learning
    if FLAGS.transfer != 'none':
        # if we need all weights, no need to create another model
        if FLAGS.transfer == 'all':
            model.load_weights(FLAGS.weights)

        # else, we need only some of the weights
        # create appropriate model_pretrained, load all weights and copy the ones we need
        else:
            if FLAGS.tiny:
                model_pretrained = YoloV3Tiny(size,
                                              training=True,
                                              classes=FLAGS.weights_num_classes
                                              or FLAGS.num_classes,
                                              recurrent=FLAGS.recurrent)
            else:
                model_pretrained = YoloV3(size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes,
                                          recurrent=FLAGS.recurrent)
            # load pretrained weights
            model_pretrained.load_weights(FLAGS.weights)
            # transfer darknet
            darknet = model.get_layer('yolo_darknet')
            darknet.set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            # transfer 'yolo_conv_i' layer weights
            if FLAGS.transfer in [
                    'yolo_conv', 'yolo_output_conv', 'yolo_output'
            ]:
                for l in model.layers:
                    if l.name.startswith('yolo_conv'):
                        model.get_layer(l.name).set_weights(
                            model_pretrained.get_layer(l.name).get_weights())
            # transfer 'yolo_output_i' first conv2d layer
            if FLAGS.transfer == 'yolo_output_conv':
                # transfer tiny output conv2d
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        # get and set the weights of the appropriate layers
                        model.get_layer(l.name).layers[1].set_weights(
                            model_pretrained.get_layer(
                                l.name).layers[1].get_weights())
                        # should I freeze batch_norm as well?
            # transfer 'yolo_output_i' layer weights
            if FLAGS.transfer == 'yolo_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        model.get_layer(l.name).set_weights(
                            model_pretrained.get_layer(l.name).get_weights())
    # no transfer learning
    else:
        pass

    # freeze layers, if requested
    if FLAGS.freeze != 'none':
        if FLAGS.freeze == 'all':
            freeze_all(model)
        if FLAGS.freeze in [
                'yolo_darknet'
                'yolo_conv', 'yolo_output_conv', 'yolo_output'
        ]:
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        if FLAGS.freeze in ['yolo_conv', 'yolo_output_conv', 'yolo_output']:
            for l in model.layers:
                if l.name.startswith('yolo_conv'):
                    freeze_all(l)
        if FLAGS.freeze == 'yolo_output_conv':
            if FLAGS.tiny:
                # freeze the appropriate layers
                freeze_all(model.layers[4].layers[1])
                freeze_all(model.layers[5].layers[1])
            else:
                # freeze the appropriate layers
                freeze_all(model.layers[5].layers[1])
                freeze_all(model.layers[6].layers[1])
                freeze_all(model.layers[7].layers[1])
        if FLAGS.transfer == 'yolo_output':
            for l in model.layers:
                if l.name.startswith('yolo_output'):
                    freeze_all(l)
    # freeze nothing
    else:
        pass

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Example #14
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        # 设置仅在需要时申请显存空间
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    # 判断训练tiny版本的YOLO还是完整版的YOLO
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # 如果未指定数据集则加载一张图片作为数据集=>fake_dataset
    train_dataset = dataset.load_fake_dataset()

    # 判断数据集路径是否为空
    if FLAGS.dataset:
        # 从TFRecode文件加载数据集 train_dataset:(x_train, y_train)
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    # 生成批训练数据
    # 打乱数据顺序
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    # y.shape:train_dataset.as_numpy_iterator().next()[1].shape
    # =>(batch_size, yolo_max_boxes, 5) 5=>(xmin, ymin, xmax, ymax, classlabel)
    train_dataset = train_dataset.map(lambda x, y: (
        # 图像数据归一化[0,1]
        dataset.transform_images(x, FLAGS.size),
        # 根据先验框anchor确定bbox属于哪一层特征图(13*13, 26*26, 52*52)
        # 并计算出bbox的中心点在特征图上的位置
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    # 数据预读取,提高延迟和吞吐量
    # tf.data.experimental.AUTOTUNE:根据可用CPU动态设置并行调用的数量
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    # 加载伪验证集,防止没有添加验证集路径时报错
    val_dataset = dataset.load_fake_dataset()
    # 加载验证集
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    # 训练模式选择
    # 随机初始化权重,从0开始训练整个网络
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    # 迁移训练的两种方式
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            # 模型网络结构
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        # 加载预训练权重
        model_pretrained.load_weights(FLAGS.weights)

        # 设置darknet网络权重并冻结网络,即主干网络不参与训练,其余参数随机初始化
        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        # 设置YOLO输出层以外的网络的权重并冻结, 即仅训练YOLO的输出层且参数随机初始化
        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    # 迁移学习fine_tune和frozen模式要求训练的类别数和预训练权重一致(80类)
    else:
        # All other transfer require matching classes
        # 加载网络所有预训练权重参数
        model.load_weights(FLAGS.weights)
        # 冻结darknet(骨干网络)权重, 其余参数在预训练权重的基础上训练
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        # 冻结所有参数,训练不起作用.
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    # 定义优化器:Adam
    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)

    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    # 调试模型:速度慢:  Eager: op 在调用后会立即运行
    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        # 训练集上的平均loss/验证集上的平均loss
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)
        # 迭代每个epoch
        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                # 梯度带:自动计算变量梯度
                with tf.GradientTape() as tape:
                    # model(): eager模式下选择此方式,不需要编译直接运行, 速度快.
                    # model.predict()第一次运行时需要先编译图模式
                    outputs = model(images, training=True)
                    # 计算张量各维度的元素之和.
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                # 梯度
                grads = tape.gradient(total_loss, model.trainable_variables)
                # 执行最优化器
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))
                # 记录日志文件
                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                # 更新平均loss
                avg_loss.update_state(total_loss)

            # 在验证集上验证
            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)
            # .result():返回累计结果
            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))
            # reset_states:清除累计值
            avg_loss.reset_states()
            avg_val_loss.reset_states()
            # 每个epoch保存一次模型权重
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))

    # 训练模式
    else:
        # 编译模型
        model.compile(optimizer=optimizer,
                      loss=loss,
                      metrics=['accuracy'],
                      run_eagerly=(FLAGS.mode == 'eager_fit'))
        # 回调函数
        callbacks = [
            # lr衰减
            ReduceLROnPlateau(verbose=1),
            # lr不变时停止训练
            EarlyStopping(patience=3, verbose=1),
            # 保存模型
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            # 训练结果可视化
            TensorBoard(log_dir='logs', write_images=True, update_freq='batch')
        ]
        # 进行迭代训练
        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)