def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    training_model = load_model()

    if FLAGS.tiny:
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_dataset(FLAGS.dataset, FLAGS.dataset_labels)
    train_dataset = train_dataset.shuffle(buffer_size=1024)  # TODO: not 1024
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (dataset.transform_images(
        x, FLAGS.size
    ), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.num_classes)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_dataset(FLAGS.val_dataset,
                                       FLAGS.val_dataset_labels)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (dataset.transform_images(
        x, FLAGS.size
    ), dataset.transform_targets(y, anchors, anchor_masks, FLAGS.num_classes)))

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    training_model.compile(optimizer=optimizer, loss=loss)

    callbacks = [
        # ReduceLROnPlateau(monitor='val_loss', factor=0.75, patience=5, verbose=1, min_lr=0.001),
        # EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1),
        ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                        verbose=1,
                        save_weights_only=True,
                        period=1),
        TensorBoard(log_dir='logs')
    ]

    history = training_model.fit(train_dataset,
                                 epochs=FLAGS.epochs,
                                 callbacks=callbacks,
                                 validation_data=val_dataset)
Exemple #2
0
def main():
    batch_size = 4
    input_size = 416
    num_classes = 10
    shuffle_buffer_size = 128

    anchors = yolo_anchors
    anchor_masks = yolo_anchor_masks

    # load train set
    train_dataset = dataset.load_tfrecord_dataset(
        "C:/Users/14841/Desktop/tf_out/train/yymnist.tfrecord",
        "C:/Users/14841/Desktop/tf_out/yymnist.names", input_size)
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)
    train_dataset = train_dataset.shuffle(buffer_size=shuffle_buffer_size)
    train_dataset = train_dataset.batch(batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, input_size),
        dataset.transform_targets(y, anchors, anchor_masks, input_size)))

    # load valid set
    val_dataset = dataset.load_tfrecord_dataset(
        "C:/Users/14841/Desktop/tf_out/valid/yymnist.tfrecord",
        "C:/Users/14841/Desktop/tf_out/yymnist.names", input_size)
    val_dataset = val_dataset.batch(batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, input_size),
        dataset.transform_targets(y, anchors, anchor_masks, input_size)))

    model = YoloV3(input_size, training=True, classes=num_classes)

    learning_rate = 0.001

    optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=num_classes) for mask in anchor_masks
    ]

    model.compile(optimizer=optimizer, loss=loss)
    model.summary()
    epochs = 10

    train_size = 1000
    valid_size = 200

    history = model.fit(train_dataset,
                        verbose=True,
                        steps_per_epoch=train_size // batch_size,
                        epochs=epochs,
                        validation_steps=valid_size // batch_size,
                        validation_data=val_dataset)
Exemple #3
0
def main():
    model = TreeNet(True)
    model.summary()
    anchors = yolo_anchors
    anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_tree_tfrecord_dataset(
        "gs://zach_schira_bucket/data.tfrecord")

    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(16)
    train_dataset = train_dataset.map(lambda x, y: (
        x, dataset.transform_targets(y, anchors, anchor_masks, 416)))

    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    # Configure the model for transfer learning

    optimizer = tf.keras.optimizers.Adam(lr=10e-3)
    loss = [YoloLoss(anchors[mask], classes=2) for mask in anchor_masks]

    model.compile(optimizer=optimizer, loss=loss, run_eagerly=False)

    callbacks = [
        ReduceLROnPlateau(verbose=1),
        EarlyStopping(patience=3, verbose=1),
        TensorBoard(log_dir='gs://zach_schira_bucket/logs')
    ]

    history = model.fit(train_dataset, epochs=5000, callbacks=callbacks)
    model.save("gs://zach_schira_bucket/tree_model")
Exemple #4
0
    def input_fn(train_dataset, val_dataset, classes, batch_size):
        train_dataset = dataset.load_tfrecord_dataset(train_dataset, classes, size)
#         train_dataset = train_dataset.shuffle(buffer_size=10000)
        train_dataset = train_dataset.batch(batch_size)
        train_dataset = train_dataset.map(lambda x, y: (
            dataset.transform_images(x, size),
            dataset.transform_targets(y, anchors, anchor_masks, 2, size)))
        train_dataset = train_dataset.prefetch(
            buffer_size=tf.data.experimental.AUTOTUNE)
    
        val_dataset = dataset.load_tfrecord_dataset(val_dataset, classes, size)
        val_dataset = val_dataset.batch(batch_size).repeat(EPOCHS)
        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, size),
            dataset.transform_targets(y, anchors, anchor_masks, 2, size)))
        
        return train_dataset, val_dataset
Exemple #5
0
def main(args):
    class_path = args.classes  # Path to classes file
    weights = args.weights  # Path to weight file
    image_size = cfg.IMAGE_SIZE  # Resize images to size - 416 04 608
    image = ''  # Path to input image
    tfrecord = args.dataset  # tfrecord instead of image or None
    output = args.output  # Path to output image
    num_classes = args.num_classes  # Number of classes in model

    anchors = cfg.YOLO_ANCHORS
    anchor_masks = cfg.YOLO_ANCHOR_MASKS

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    yolo = YoloV3(image_size, training=False, classes=num_classes)
    yolo.load_weights(weights).expect_partial()
    print('weights loaded')

    class_names = [c.strip() for c in open(class_path).readlines()]
    print('classes loaded')

    if tfrecord:
        val_dataset = load_tfrecord_dataset(tfrecord, class_path, image_size)
        # val_dataset = val_dataset.shuffle(512)
        val_dataset = val_dataset.batch(1)
        val_dataset = val_dataset.map(lambda x, y: (
            transform_images(x, image_size),
            transform_targets(y, anchors, anchor_masks, image_size)))
        # img_raw, _label = next(iter(dataset.take(1)))
    else:
        img_raw = tf.image.decode_image(open(image, 'rb').read(), channels=3)

    index = 0
    for img_raw, _label in val_dataset.take(25):
        # img = tf.expand_dims(img_raw, 0)
        img = transform_images(img_raw, image_size)
        img = img * 255

        boxes, scores, classes, nums = yolo(img)

        output = '/Users/justinbutler/Desktop/test/test_images/test_{}.jpg'.format(
            index)
        output = '/home/justin/Models/yolov3-tf2/test_images/test_{}.jpg'.format(
            index)
        print('output saved to: {}'.format(output))

        img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR)
        img = draw_outputs(img, (boxes, scores, classes, nums),
                           class_names,
                           thresh=0)
        img = img * 255
        cv2.imwrite(output, img)

        index = index + 1
Exemple #6
0
def main(_argv):
    model = YoloV3(FLAGS.size, training=True)
    anchors = yolo_anchors
    anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes)
    train_dataset = train_dataset.shuffle(buffer_size=1024)  # TODO: not 1024
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [YoloLoss(anchors[mask]) for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            logging.info("{}, train: {}".format(epoch,
                                                avg_loss.result().numpy()))

            avg_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
Exemple #7
0
def main(_argv):

    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
    except:
        tpu = None
    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
    else:
        strategy = tf.distribute.get_strategy()
    #print("REPLICAS: ", strategy.num_replicas_in_sync)
    FLAGS.batch_size = FLAGS.batch_size * strategy.num_replicas_in_sync
    with strategy.scope():
        if FLAGS.tiny:
            model = YoloV3Tiny(FLAGS.size,
                               training=True,
                               classes=FLAGS.num_classes)
            anchors = yolo_tiny_anchors
            anchor_masks = yolo_tiny_anchor_masks
        else:
            model = YoloV3(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
            anchors = yolo_anchors
            anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=FLAGS.buffer_size)
    train_dataset = train_dataset.batch(FLAGS.batch_size, drop_remainder=True)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size, drop_remainder=True)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    if FLAGS.optimizer == 'Adam':
        optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'nAdam':
        optimizer = tf.keras.optimizers.Nadam(lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'Adagrad':
        optimizer = tf.keras.optimizers.Adagrad(lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'RMSprop':
        optimizer = tf.keras.optimizers.RMSprop(lr=FLAGS.learning_rate,
                                                rho=0.9)

    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))
        if tpu:
            callbacks = [
                ReduceLROnPlateau(verbose=1),
                EarlyStopping(patience=3, verbose=1),
                ModelCheckpoint(
                    'yolov3_train_{epoch}.h5',
                    save_weights_only=True,
                    verbose=1,
                    period=FLAGS.period
                )  #, monitor='val_loss', mode='min', save_best_only=True), #1000
            ]
        else:
            callbacks = [
                ReduceLROnPlateau(verbose=1),
                #EarlyStopping(patience=3, verbose=1),
                ModelCheckpoint('./checkpoints/yolov3_train_{epoch}.tf',
                                verbose=1,
                                save_weights_only=True,
                                period=FLAGS.period),  #1000
                TensorBoard(log_dir='logs')
            ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Exemple #8
0
def main(_argv):
    if FLAGS.mode == "eager_tf":
        tf.compat.v1.enable_eager_execution()

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    if FLAGS.trace:
        run_options = tf.compat.v1.RunOptions(
            output_partition_graphs=True,
            trace_level=tf.compat.v1.RunOptions.FULL_TRACE)
        run_metadata = tf.compat.v1.RunMetadata()
        trace_dir = os.path.join("traces", "training")
        if not os.path.isdir(trace_dir):
            os.makedirs(trace_dir)
        graphs_dir = os.path.join("traces", "training", "graphs")
        if not os.path.isdir(graphs_dir):
            os.makedirs(graphs_dir)
    else:
        run_options = None
        run_metadata = None

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.repeat()
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    val_dataset = val_dataset.repeat()

    # TF2 doesn't need this, but we're using TF1.15.
    if FLAGS.mode == "fit":
        sess = tf.keras.backend.get_session()
        sess.run(tf.compat.v1.global_variables_initializer(),
                 options=run_options,
                 run_metadata=run_metadata)
        if FLAGS.trace:
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(os.path.join(trace_dir, f"variables_init.json"),
                      'w') as f:
                f.write(chrome_trace)
            for i in range(len(run_metadata.partition_graphs)):
                with open(
                        os.path.join(graphs_dir,
                                     f"variables_init_partition_{i}.pbtxt"),
                        'w') as f:
                    f.write(str(run_metadata.partition_graphs[i]))

        sess.run(tf.compat.v1.tables_initializer(),
                 options=run_options,
                 run_metadata=run_metadata)
        if FLAGS.trace:
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(os.path.join(trace_dir, f"table_init.json"), 'w') as f:
                f.write(chrome_trace)
            for i in range(len(run_metadata.partition_graphs)):
                with open(
                        os.path.join(graphs_dir,
                                     f"table_init_partition_{i}.pbtxt"),
                        'w') as f:
                    f.write(str(run_metadata.partition_graphs[i]))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'),
                      options=run_options,
                      run_metadata=run_metadata)

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
        ]

        class TraceCallback(tf.keras.callbacks.Callback):
            def on_epoch_begin(self, epoch, logs=None):
                self.current_epoch = epoch

            def on_train_batch_end(self, batch, logs=None):
                fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                chrome_trace = fetched_timeline.generate_chrome_trace_format()
                with open(
                        os.path.join(
                            trace_dir,
                            f"training_epoch_{self.current_epoch}_batch_{batch}.json"
                        ), 'w') as f:
                    f.write(chrome_trace)
                # No need to dump graph partitions for every batch; they should be identical.
                if batch == 0:
                    for i in range(len(run_metadata.partition_graphs)):
                        with open(
                                os.path.join(graphs_dir,
                                             f"training_partition_{i}.pbtxt"),
                                'w') as f:
                            f.write(str(run_metadata.partition_graphs[i]))

        if FLAGS.trace:
            callbacks.append(TraceCallback())
        else:
            callbacks.append(TensorBoard(write_graph=False, log_dir="logs"))

        history = model.fit(
            train_dataset,
            epochs=FLAGS.epochs,
            callbacks=callbacks,
            validation_data=val_dataset,
            steps_per_epoch=FLAGS.num_samples // FLAGS.batch_size,
            validation_steps=FLAGS.num_val_samples // FLAGS.batch_size)
Exemple #9
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    # anchors是固定的,每一层每个anchors对应固定的3个anchors boxes,共三层
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # 目的是什么???不清楚
    train_dataset = dataset.load_fake_dataset()

    # 载入训练数据,生成dataset.map,进行预处理
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(
            FLAGS.dataset, FLAGS.classes, FLAGS.size)

    # 训练数据打乱
    train_dataset = train_dataset.shuffle(buffer_size=512)
    # 训练数据设置batch大小
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    # 训练数据匹配anchor,做map预处理
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    # 训练数据使用多线程并行计算预处理,自动设置为最大的可用线程数,机器算力拉满
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    # 同train_dataset
    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(
            FLAGS.val_dataset, FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))


    # Configure the model for transfer learning 为迁移学习配置模型,所谓迁移,就是利用yolo的结构做为预训练模型
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers 载入预训练模型的预处理数据
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(
                FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(
                FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
        # 载入预训练模型权重
        model_pretrained.load_weights(FLAGS.weights)

        # 载入backbone及其参数权重,即darknet,做为预训练模型的主干,训练过程中对除backbone及其参数权重以外的参数做训练
        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        # 载入预训练模型的全部,除了输出部分,即nms部分,训练过程只对输出部分(nms)参数做训练
        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(model_pretrained.get_layer(
                        l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        # 载入backbone,即darknet,做为预训练模型的主干,训练过程中不改变主干backbone的结构,对全网络参数做训练
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        # 载入全部模型,整体做为预处理模型,训练过程中,不做任何改变
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    # 优化器设置
    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    # 损失函数设置
    loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes)
            for mask in anchor_masks]

    # Eager Mode(动态图模式),便于可以得到即时的反馈,用于训练的时候便于观察变化
    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        # 损失函数的均值观测
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                # 根据训练数据损失函数的反馈值,逐步优化梯度,进而优化模型参数
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(
                    zip(grads, model.trainable_variables))

                # 日志展示损失loss的变化
                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            # val同上train的部分
            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            # 日志展示一整个epoch后的train和val的最终loss
            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            # 展示完成,复位,重置
            avg_loss.reset_states()
            avg_val_loss.reset_states()

            # 将本次训练结束后得到的模型参数保存并输出
            model.save_weights(
                'checkpoints/yolov3_train_{}.tf'.format(epoch))

    # 如果不需要观测实时反馈变化,那么就后台训练,日志端不会看到任何信息
    else:
        # 模型配置器
        model.compile(optimizer=optimizer, loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'),
                      metrics=['accuracy'])

        # 自定义模型控制器,创建一个保存模型权重的回调
        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1, save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        # # period = 2, 表示每隔1个epoch保存一次checkpoint
        # callbacks = [
        #     ReduceLROnPlateau(verbose=1),
        #     EarlyStopping(patience=3, verbose=1),
        #     ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
        #                     verbose=1, save_weights_only=True, period = 2),
        #     TensorBoard(log_dir='logs')
        # ]

        # 模型训练,使用新的回调训练模型
        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset,
                            validation_freq=1)
Exemple #10
0
def main(args):

    image_size = 416  # 416
    num_epochs = args.epochs
    batch_size = args.batch_size
    learning_rate = 1e-3
    num_classes = args.num_classes
    # num class for `weights` file if different, useful in transfer learning with different number of classes
    weight_num_classes = args.num_weight_class
    valid_path = args.valid_dataset
    weights_path = args.weights
    # Path to text? file containing all classes, 1 per line
    classes = args.classes

    anchors = yolo_anchors
    anchor_masks = yolo_anchor_masks

    val_dataset = dataset.load_tfrecord_dataset(valid_path, classes,
                                                image_size)
    val_dataset = val_dataset.batch(batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, image_size),
        dataset.transform_targets(y, anchors, anchor_masks, image_size)))

    model = YoloV3(image_size, training=True, classes=num_classes)
    # Darknet transfer is a special case that works
    # with incompatible number of classes
    # reset top layers
    model_pretrained = YoloV3(image_size,
                              training=True,
                              classes=weight_num_classes or num_classes)
    model_pretrained.load_weights(weights_path)

    if transfer == 'darknet':
        model.get_layer('yolo_darknet').set_weights(
            model_pretrained.get_layer('yolo_darknet').get_weights())
        freeze_all(model.get_layer('yolo_darknet'))

    predictions = []

    evaluator = Evaluator(iou_thresh=args.iou)

    # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class])
    boxes, scores, classes, num_detections = model.predict(val_dataset)
    # boxes -> (num_imgs, num_detections, box coords)

    # Full labels shape -> [num_batches, grid scale, imgs]
    # Full labels shape -> [num_batches, [grid, grid, anchors, [x,y,w,h,obj,class]]]
    full_labels = np.asarray([label for _, label in val_dataset])

    # Shape -> [num_batches, num_imgs_in_batch, 3]
    # Shape -> [num_batches, num_imgs, 3x[grid,grid,anchors,[x,y,w,h,score,class]]]
    full_labels_trans = full_labels.transpose(0, 2, 1)

    full_labels_flat = []

    for batch in full_labels_trans:
        for img in batch:
            row = []
            for scale in img:
                row.append(scale)
            full_labels_flat.append(row)

    # Shape -> [num_imgs x 3]
    full_labels_flat = np.asarray(full_labels_flat)

    # Remove any labels consisting of all 0's
    filt_labels = []
    # for img in range(len(full_labels_flat)):
    for img in full_labels_flat:
        test = []
        # for scale in full_labels_flat[img]:
        for scale in img:
            lab_list = []
            for g1 in scale:
                for g2 in g1:
                    for anchor in g2:
                        if anchor[0] > 0:
                            temp = [
                                anchor[0] * image_size, anchor[1] * image_size,
                                anchor[2] * image_size, anchor[3] * image_size,
                                anchor[4], anchor[5]
                            ]
                            temp = [float(x) for x in temp]
                            lab_list.append(np.asarray(temp))
            test.append(np.asarray(lab_list))
        filt_labels.append(np.asarray(test))
    filt_labels = np.asarray(
        filt_labels
    )  # Numpy array of shape [num_imgs, 3x[num_boxesx[x1,y1,x2,y2,score,class]]]
    # filt_labels = filt_labels[:, :4] * image_size

    # i is the num_images index
    # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0]
    for img in range(len(num_detections)):
        row = []
        for sc in range(len(scores[img])):
            if scores[img][sc] > 0:
                row.append(
                    np.hstack([
                        boxes[img][sc] * image_size, scores[img][sc],
                        classes[img][sc]
                    ]))
        predictions.append(np.asarray(row))

    predictions = np.asarray(
        predictions)  # numpy array of shape [num_imgs x num_preds x 6]

    if len(predictions) == 0:  # No predictions made
        print('No predictions made - exiting.')
        exit()

    # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size
    # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]]
    # Box coords should be in format x1 y1 x2 y2

    evaluator(predictions, filt_labels, images)  # Check gts box coords

    confidence_thresholds = np.linspace(0.1, 1, 15)
    confidence_thresholds = [0.5]
    all_tp_rates = []
    all_fp_rates = []

    # Compute ROCs for above range of thresholds
    # Compute one for each class vs. the other classes
    for index, conf in enumerate(confidence_thresholds):
        tp_of_img = []
        fp_of_img = []
        all_classes = []

        tp_rates = {}
        fp_rates = {}

        boxes, scores, classes, num_detections = model.predict(val_dataset)

        # Full labels shape -> [num_batches, grid scale, imgs]
        # Full labels shape -> [num_batches, [grid, grid, anchors, [x,y,w,h,obj,class]]]
        full_labels = np.asarray([label for _, label in val_dataset])

        # Shape -> [num_batches, num_imgs_in_batch, 3]
        # Shape -> [num_batches, num_imgs, 3x[grid,grid,anchors,[x,y,w,h,score,class]]]
        full_labels_trans = full_labels.transpose(0, 2, 1)

        full_labels_flat = []

        for batch in full_labels_trans:
            for img in batch:
                row = []
                for scale in img:
                    row.append(scale)
                full_labels_flat.append(row)

        # Shape -> [num_imgs x 3]
        full_labels_flat = np.asarray(full_labels_flat)

        # Remove any labels consisting of all 0's
        filt_labels = []
        # for img in range(len(full_labels_flat)):
        for img in full_labels_flat:
            test = []
            # for scale in full_labels_flat[img]:
            for scale in img:
                lab_list = []
                for g1 in scale:
                    for g2 in g1:
                        for anchor in g2:
                            if anchor[0] > 0:
                                temp = [
                                    anchor[0] * image_size,
                                    anchor[1] * image_size,
                                    anchor[2] * image_size,
                                    anchor[3] * image_size, anchor[4],
                                    anchor[5]
                                ]
                                temp = [float(x) for x in temp]
                                lab_list.append(np.asarray(temp))
                test.append(np.asarray(lab_list))
            filt_labels.append(np.asarray(test))
        filt_labels = np.asarray(
            filt_labels
        )  # Numpy array of shape [num_imgs, 3x[num_boxesx[x1,y1,x2,y2,score,class]]]
        # filt_labels = filt_labels[:, :4] * image_size

        # i is the num_images index
        # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0]
        for img in range(len(num_detections)):
            row = []
            for sc in range(len(scores[img])):
                if scores[img][sc] > 0:
                    row.append(
                        np.hstack([
                            boxes[img][sc] * image_size, scores[img][sc],
                            classes[img][sc]
                        ]))
            predictions.append(np.asarray(row))

        predictions = np.asarray(
            predictions)  # numpy array of shape [num_imgs x num_preds x 6]

        if len(predictions) == 0:  # No predictions made
            print('No predictions made - exiting.')
            exit()

        # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size
        # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]]
        # Box coords should be in format x1 y1 x2 y2

        evaluator(predictions, filt_labels, images)  # Check gts box coords

        classes = list(set(r['class_ids']))  # All unique class ids
        for c in classes:
            if c not in all_classes:
                all_classes.append(c)
        complete_classes = dataset_val.class_ids[1:]
        # Need TPR and FPR rates for each class versus the other classes
        # Recall == TPR
        tpr = utils.compute_ap_indiv_class(gt_bbox, gt_class_id, gt_mask,
                                           r["rois"], r["class_ids"],
                                           r["scores"], r['masks'],
                                           complete_classes)
        total_fpr = utils.compute_fpr_indiv_class(gt_bbox, gt_class_id,
                                                  gt_mask, r["rois"],
                                                  r["class_ids"], r["scores"],
                                                  r['masks'], complete_classes)
        # print(f'For Image: TPR: {tpr} -- FPR: {total_fpr}')
        tp_of_img.append(tpr)
        fp_of_img.append(total_fpr)

        all_classes = dataset_val.class_ids[1:]

        # Need to get average TPR and FPR for number of images used
        for c in all_classes:
            tp_s = 0
            for item in tp_of_img:
                if c in item.keys():
                    tp_s += item[c]
                else:
                    tp_s += 0

            tp_rates[c] = tp_s / len(image_ids)
            # tp_rates[c] = tp_s

        # print(tp_rates)

        for c in all_classes:
            fp_s = 0
            for item in fp_of_img:
                if c in item.keys():
                    fp_s += item[c]
                else:
                    fp_s += 0
            fp_rates[c] = fp_s / len(image_ids)
            # fp_rates[c] = fp_s

        all_fp_rates.append(fp_rates)
        all_tp_rates.append(tp_rates)

    print(f'TP Rates: {all_tp_rates}')
    print(f'FP Rates: {all_fp_rates}')
Exemple #11
0
def main(_argv):
    # Horovod: initialize Horovod.
    hvd.init()

    # Horovod: pin GPU to be used to process local rank (one GPU per process)
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    if gpus:
        tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()],
                                                   'GPU')

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes)
    train_dataset = train_dataset.shuffle(buffer_size=1024)  # TODO: not 1024
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))

    if FLAGS.transfer != 'none':
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.mode == 'frozen':
            # freeze everything
            freeze_all(model)
        else:
            # reset top layers
            if FLAGS.tiny:  # get initial weights
                init_model = YoloV3Tiny(FLAGS.size, training=True)
            else:
                init_model = YoloV3(FLAGS.size, training=True)

            if FLAGS.transfer == 'darknet':
                for l in model.layers:
                    if l.name != 'yolo_darknet' and l.name.startswith('yolo_'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)
            elif FLAGS.transfer == 'no_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)

    # Horovod: adjust learning rate based on number of GPUs.
    optimizer = tf.optimizers.Adam(FLAGS.learning_rate * hvd.size())
    # Horovod: add Horovod DistributedOptimizer.

    ###############################################
    loss = [YoloLoss(anchors[mask]) for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(
                    train_dataset.take(5717 // hvd.size())):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                # Horovod: add Horovod Distributed GradientTape.
                tape = hvd.DistributedGradientTape(tape)

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))
                # Horovod: broadcast initial variable states from rank 0 to all other processes.
                # This is necessary to ensure consistent initialization of all workers when
                # training is started with random weights or restored from a checkpoint.
                #
                # Note: broadcast should be done after the first gradient step to ensure optimizer
                # initialization.
                if batch == 0:
                    hvd.broadcast_variables(model.variables, root_rank=0)
                    hvd.broadcast_variables(optimizer.variables(), root_rank=0)

                #############################
                if hvd.rank() == 0:
                    logging.info("{}_train_{}, {}, {}".format(
                        epoch, batch, total_loss.numpy(),
                        list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                ###########################
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                if hvd.rank() == 0:
                    logging.info("{}_val_{}, {}, {}".format(
                        epoch, batch, total_loss.numpy(),
                        list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)
            if hvd.rank() == 0:
                logging.info("{}, train: {}, val: {}".format(
                    epoch,
                    avg_loss.result().numpy(),
                    avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            if hvd.rank() == 0:
                model.save_weights(
                    'checkpoints/horovod_yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Exemple #12
0
def main(args):
    tf.config.experimental.list_physical_devices('GPU')
    # tf.device(f'/gpu:{args.gpu_num}')

    train_path = args.train_dataset
    valid_path = args.valid_dataset
    weights_path = args.weights
    # Path to text? file containing all classes, 1 per line
    classes_file = args.classes
    # Usually fit
    # mode = 'fit'  # Can be 'fit', 'eager_fit', 'eager_tf', 'valid'
    mode = args.mode
    '''
    'fit: model.fit, '
    'eager_fit: model.fit(run_eagerly=True), '
    'eager_tf: custom GradientTape'
    '''

    # Usually darknet
    transfer = args.transfer
    '''
    'none: Training from scratch, '
                      'darknet: Transfer darknet, '
                      'no_output: Transfer all but output, '
                      'frozen: Transfer and freeze all, '
                      'fine_tune: Transfer all and freeze darknet only'),
                      'pre': Use a pre-trained model for validation
    '''
    image_size = cfg.IMAGE_SIZE

    num_epochs = args.epochs
    batch_size = args.batch_size
    learning_rate = cfg.LEARNING_RATE
    num_classes = args.num_classes
    # num class for `weights` file if different, useful in transfer learning with different number of classes
    weight_num_classes = args.num_weight_class

    # saved_weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/weights/'
    saved_weights_path = '/home/justin/ml_models/yolov3-tf2/weights/trained_{}.tf'.format(num_epochs)
    saved_weights_path = args.saved_weights

    # Original Anchors below
    anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                             (59, 119), (116, 90), (156, 198), (373, 326)],
                            np.float32) / 608

    anchors = cfg.YOLO_ANCHORS

    anchor_masks = cfg.YOLO_ANCHOR_MASKS

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if args.no_train:
        print('Skipping training...')
    else:
        start_time = time.time()
        model = YoloV3(image_size, training=True, classes=num_classes)

        train_dataset = dataset.load_tfrecord_dataset(train_path,
                                                      classes_file,
                                                      image_size)
        train_dataset = train_dataset.shuffle(buffer_size=512)
        train_dataset = train_dataset.batch(batch_size)
        train_dataset = train_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))
        train_dataset = train_dataset.prefetch(
            buffer_size=tf.data.experimental.AUTOTUNE)

        val_dataset = dataset.load_tfrecord_dataset(valid_path,
                                                    classes_file,
                                                    image_size)
        val_dataset = val_dataset.batch(batch_size)
        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))

        # Configure the model for transfer learning
        if transfer == 'none':
            pass  # Nothing to do
        elif transfer in ['darknet', 'no_output']:
            # Darknet transfer is a special case that works
            # with incompatible number of classes
            # reset top layers
            model_pretrained = YoloV3(image_size,
                                      training=True,
                                      classes=weight_num_classes or num_classes)
            model_pretrained.load_weights(weights_path)

            if transfer == 'darknet':
                model.get_layer('yolo_darknet').set_weights(
                    model_pretrained.get_layer('yolo_darknet').get_weights())
                freeze_all(model.get_layer('yolo_darknet'))

            elif transfer == 'no_output':
                for layer in model.layers:
                    if not layer.name.startswith('yolo_output'):
                        layer.set_weights(model_pretrained.get_layer(
                            layer.name).get_weights())
                        freeze_all(layer)
        elif transfer == 'pre':
            model = YoloV3(image_size,
                           training=False,
                           classes=num_classes)
            model.load_weights(weights_path)

        else:
            # All other transfer require matching classes
            model.load_weights(weights_path)
            if transfer == 'fine_tune':
                # freeze darknet and fine tune other layers
                darknet = model.get_layer('yolo_darknet')
                freeze_all(darknet)
            elif transfer == 'frozen':
                # freeze everything
                freeze_all(model)
        optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
        loss = [YoloLoss(anchors[mask], classes=num_classes)
                for mask in anchor_masks]  # Passing loss as a list might sometimes fail? dict might be better?

        if mode == 'eager_tf':
            # Eager mode is great for debugging
            # Non eager graph mode is recommended for real training
            avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
            avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)
            for epoch in range(1, num_epochs + 1):
                for batch, (images, labels) in enumerate(train_dataset):
                    with tf.GradientTape() as tape:
                        outputs = model(images, training=True)
                        regularization_loss = tf.reduce_sum(model.losses)
                        pred_loss = []
                        for output, label, loss_fn in zip(outputs, labels, loss):
                            pred_loss.append(loss_fn(label, output))
                        total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                    grads = tape.gradient(total_loss, model.trainable_variables)
                    optimizer.apply_gradients(
                        zip(grads, model.trainable_variables))
                    print("{}_train_{}, {}, {}".format(
                        epoch, batch, total_loss.numpy(),
                        list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                    avg_loss.update_state(total_loss)
                for batch, (images, labels) in enumerate(val_dataset):
                    outputs = model(images)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                    print("{}_val_{}, {}, {}".format(
                        epoch, batch, total_loss.numpy(),
                        list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                    avg_val_loss.update_state(total_loss)
                print("{}, train: {}, val: {}".format(
                    epoch,
                    avg_loss.result().numpy(),
                    avg_val_loss.result().numpy()))
                avg_loss.reset_states()
                avg_val_loss.reset_states()

                model.save_weights(
                    'checkpoints/yolov3_train_{}.tf'.format(epoch))
        elif mode == 'valid':
            pass  # Pass this step for validation only
        else:
            model.compile(optimizer=optimizer, loss=loss,
                          run_eagerly=(mode == 'eager_fit'))
            callbacks = [
                ReduceLROnPlateau(verbose=1, min_lr=1e-4, patience=50),
                # EarlyStopping(patience=3, verbose=1),
                ModelCheckpoint('checkpoints/midpoints/yolov3_train_{epoch}.tf',
                                verbose=1, save_weights_only=True),
                TensorBoard(log_dir=f'logs/{saved_weights_path[:-3]}')
            ]

            history = model.fit(train_dataset,
                                epochs=num_epochs,
                                callbacks=callbacks,
                                validation_data=val_dataset)
            print(f'Saving weights to: {saved_weights_path}')
            model.save_weights(saved_weights_path)
        finish_time = time.time()
        train_time = finish_time - start_time
        print('Training time elapsed: {}'.format(train_time))

    # Calculate mAP
    if args.validate:
        print('Validating...')
        model = YoloV3(image_size, training=False, classes=num_classes)
        model.load_weights(saved_weights_path).expect_partial()

        batch_size = 1

        val_dataset = dataset.load_tfrecord_dataset(valid_path,
                                                    classes_file,
                                                    image_size)
        val_dataset = val_dataset.batch(batch_size)

        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))

        images = []
        for img, labs in val_dataset:
            img = np.squeeze(img)
            images.append(img)

        predictions = []

        evaluator = Evaluator(iou_thresh=args.iou)

        # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class])
        boxes, scores, classes, num_detections = model.predict(val_dataset)
        print(boxes.shape)
        print(boxes[0])
        # boxes -> (num_imgs, num_detections, box coords)

        filtered_labels = []
        for _, label in val_dataset:
            filt_labels = flatten_labels(label)
            filtered_labels.append(filt_labels)

        # i is the num_images index
        for img in range(len(num_detections)):
            row = []
            for sc in range(len(scores[img])):
                if scores[img][sc] > 0:
                    row.append(np.hstack([boxes[img][sc] * image_size, scores[img][sc], classes[img][sc]]))
            predictions.append(np.asarray(row))

        predictions = np.asarray(predictions)  # numpy array of shape [num_imgs x num_preds x 6]

        if len(predictions) == 0:  # No predictions made
            print('No predictions made - exiting.')
            exit()

        # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size
        # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]]
        # Box coords should be in format x1 y1 x2 y2

        evaluator(predictions, filtered_labels, images, roc=False)  # Check gts box coords

    if args.valid_imgs:  # Predictions
        print('Valid Images...')
        # yolo = YoloV3(classes=num_classes)
        yolo = YoloV3(image_size, training=False, classes=num_classes)
        yolo.load_weights(saved_weights_path).expect_partial()
        print('weights loaded')

        print('Validation Image...')
        # Find better way to do this so not requiring manual changes
        class_dict = cfg.CLASS_DICT

        class_names = list(class_dict.values())
        print('classes loaded')

        val_dataset = dataset.load_tfrecord_dataset(valid_path,
                                                    classes_file,
                                                    image_size)
        val_dataset = val_dataset.batch(1)
        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))


        # boxes, scores, classes, num_detections
        index = 0
        for img_raw, _label in val_dataset.take(5):
            print(f'Index {index}')

            #img = tf.expand_dims(img_raw, 0)
            img = transform_images(img_raw, image_size)
            img = img * 255

            boxes, scores, classes, nums = yolo(img)

            filt_labels = flatten_labels(_label)

            boxes = tf.expand_dims(filt_labels[:, 0:4], 0)
            scores = tf.expand_dims(filt_labels[:, 4], 0)
            classes = tf.expand_dims(filt_labels[:, 5], 0)
            nums = tf.expand_dims(filt_labels.shape[0], 0)

            img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR)
            img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0)
            # img = img * 255

            output = 'test_images/test_{}.jpg'.format(index)
            # output = '/Users/justinbutler/Desktop/test/test_images/test_{}.jpg'.format(index)

            # print('detections:')
            # for i in range(nums[index]):
            #     print('\t{}, {}, {}'.format(class_names[int(classes[index][i])],
            #                               np.array(scores[index][i]),
            #                               np.array(boxes[index][i])))
            #     if i > 10:
            #         continue

            img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR)
            img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0)
            img = img * 255
            cv2.imwrite(output, img)

            index = index + 1

    if args.visual_data:
        print('Visual Data...')
        val_dataset = dataset.load_tfrecord_dataset(valid_path,
                                                    classes_file,
                                                    image_size)
        val_dataset = val_dataset.batch(1)
        val_dataset = val_dataset.map(lambda x, y: (
            dataset.transform_images(x, image_size),
            dataset.transform_targets(y, anchors, anchor_masks, image_size)))

        index = 0
        for img_raw, _label in val_dataset.take(5):
            print(f'Index {index}')
            # img = tf.expand_dims(img_raw, 0)
            img = transform_images(img_raw, image_size)

            output = 'test_images/test_labels_{}.jpg'.format(index)
            # output = '/Users/justinbutler/Desktop/test/test_images/test_labels_{}.jpg'.format(index)

            filt_labels = flatten_labels(_label)

            boxes = tf.expand_dims(filt_labels[:, 0:4], 0)
            scores = tf.expand_dims(filt_labels[:, 4], 0)
            classes = tf.expand_dims(filt_labels[:, 5], 0)
            nums = tf.expand_dims(filt_labels.shape[0], 0)

            img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR)
            img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0)
            img = img * 255

            cv2.imwrite(output, img)

            index = index + 1

        return
Exemple #13
0
def main(_argv):
    # Change flag values
    if FLAGS.height is None:
        FLAGS.height = FLAGS.size
    if FLAGS.width is None:
        FLAGS.width = FLAGS.size
    size = (FLAGS.height, FLAGS.width)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        model = YoloV3Tiny(size,
                           training=True,
                           classes=FLAGS.num_classes,
                           recurrent=FLAGS.recurrent)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(size,
                       training=True,
                       classes=FLAGS.num_classes,
                       recurrent=FLAGS.recurrent)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes, size)
    else:
        train_dataset = dataset.load_fake_dataset()
    train_dataset = train_dataset.shuffle(buffer_size=8)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (dataset.transform_images(
        x, size), dataset.transform_targets(y, anchors, anchor_masks, size)))
    if FLAGS.recurrent:
        train_dataset = train_dataset.map(
            lambda x, y: (dataset.get_recurrect_inputs(
                x, y, anchors, anchor_masks, FLAGS.num_classes), y))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, size)
    else:
        val_dataset = dataset.load_fake_dataset()
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (dataset.transform_images(
        x, size), dataset.transform_targets(y, anchors, anchor_masks, size)))
    if FLAGS.recurrent:
        val_dataset = val_dataset.map(
            lambda x, y: (dataset.get_recurrect_inputs(
                x, y, anchors, anchor_masks, FLAGS.num_classes), y))

    # Configure the model for transfer learning
    if FLAGS.transfer != 'none':
        # if we need all weights, no need to create another model
        if FLAGS.transfer == 'all':
            model.load_weights(FLAGS.weights)

        # else, we need only some of the weights
        # create appropriate model_pretrained, load all weights and copy the ones we need
        else:
            if FLAGS.tiny:
                model_pretrained = YoloV3Tiny(size,
                                              training=True,
                                              classes=FLAGS.weights_num_classes
                                              or FLAGS.num_classes,
                                              recurrent=FLAGS.recurrent)
            else:
                model_pretrained = YoloV3(size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes,
                                          recurrent=FLAGS.recurrent)
            # load pretrained weights
            model_pretrained.load_weights(FLAGS.weights)
            # transfer darknet
            darknet = model.get_layer('yolo_darknet')
            darknet.set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            # transfer 'yolo_conv_i' layer weights
            if FLAGS.transfer in [
                    'yolo_conv', 'yolo_output_conv', 'yolo_output'
            ]:
                for l in model.layers:
                    if l.name.startswith('yolo_conv'):
                        model.get_layer(l.name).set_weights(
                            model_pretrained.get_layer(l.name).get_weights())
            # transfer 'yolo_output_i' first conv2d layer
            if FLAGS.transfer == 'yolo_output_conv':
                # transfer tiny output conv2d
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        # get and set the weights of the appropriate layers
                        model.get_layer(l.name).layers[1].set_weights(
                            model_pretrained.get_layer(
                                l.name).layers[1].get_weights())
                        # should I freeze batch_norm as well?
            # transfer 'yolo_output_i' layer weights
            if FLAGS.transfer == 'yolo_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        model.get_layer(l.name).set_weights(
                            model_pretrained.get_layer(l.name).get_weights())
    # no transfer learning
    else:
        pass

    # freeze layers, if requested
    if FLAGS.freeze != 'none':
        if FLAGS.freeze == 'all':
            freeze_all(model)
        if FLAGS.freeze in [
                'yolo_darknet'
                'yolo_conv', 'yolo_output_conv', 'yolo_output'
        ]:
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        if FLAGS.freeze in ['yolo_conv', 'yolo_output_conv', 'yolo_output']:
            for l in model.layers:
                if l.name.startswith('yolo_conv'):
                    freeze_all(l)
        if FLAGS.freeze == 'yolo_output_conv':
            if FLAGS.tiny:
                # freeze the appropriate layers
                freeze_all(model.layers[4].layers[1])
                freeze_all(model.layers[5].layers[1])
            else:
                # freeze the appropriate layers
                freeze_all(model.layers[5].layers[1])
                freeze_all(model.layers[6].layers[1])
                freeze_all(model.layers[7].layers[1])
        if FLAGS.transfer == 'yolo_output':
            for l in model.layers:
                if l.name.startswith('yolo_output'):
                    freeze_all(l)
    # freeze nothing
    else:
        pass

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
def main(_argv):
    # 使用GPU
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:  # tiny模型
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors  # 锚框
        anchor_masks = yolo_tiny_anchor_masks  # 锚框对应的索引值

    if FLAGS.dataset:  # 读取训练数据集
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    # 用来打乱数据集中数据顺序,训练时非常常用,取所有数据的前buffer_size数据项
    train_dataset = train_dataset.shuffle(buffer_size=512)
    # 设置批次,按照顺序取出batch_size行数据,最后一次输出可能小于batch
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    # 训练数据,格式(x,y)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    # 预先载入buffer_size项
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    if FLAGS.val_dataset:  # 读取验证数据集
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    # 设置批次,按照顺序取出batch_size行数据,最后一次输出可能小于batch
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    # 验证数据,格式(x,y)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # 优化器
    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    # 损失函数
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    # 编译模型
    model.compile(optimizer=optimizer, loss=loss, run_eagerly=False)
    # 回调函数列表
    callbacks = [
        ReduceLROnPlateau(verbose=1),
        EarlyStopping(patience=3, verbose=1),
        ModelCheckpoint('checkpoints/yolov3_tiny_train_{epoch}.tf',
                        verbose=1,
                        save_weights_only=True),
        TensorBoard(log_dir='logs')
    ]
    # 训练模型
    history = model.fit(train_dataset,
                        epochs=FLAGS.epochs,
                        callbacks=callbacks,
                        validation_data=val_dataset)
Exemple #15
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    post_process_outputs = post_process_block(model.outputs,
                                              classes=FLAGS.num_classes)
    post_process_model = Model(model.inputs, post_process_outputs)

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size), y))
    # dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y:
                                  (dataset.transform_images(x, FLAGS.size), y))
    # dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)
    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    # (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls))
    # model.outputs shape: [[N, 13, 13, 3, 85], [N, 26, 26, 3, 85], [N, 52, 52, 3, 85]]
    # labels shape: ([N, 13, 13, 3, 6], [N, 26, 26, 3, 6], [N, 52, 52, 3, 6])
    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    transf_labels = dataset.transform_targets(
                        labels, anchors, anchor_masks, FLAGS.size)
                    for output, label, loss_fn in zip(outputs, transf_labels,
                                                      loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss,
                                               axis=None) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                log_batch(logging, epoch, batch, total_loss, pred_loss)
                avg_loss.update_state(total_loss)

                if batch >= 100:
                    break

            true_pos_total = np.zeros(FLAGS.num_classes)
            false_pos_total = np.zeros(FLAGS.num_classes)
            n_pos_total = np.zeros(FLAGS.num_classes)
            for batch, (images, labels) in enumerate(val_dataset):
                # get losses
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                transf_labels = dataset.transform_targets(
                    labels, anchors, anchor_masks, FLAGS.size)
                for output, label, loss_fn in zip(outputs, transf_labels,
                                                  loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                log_batch(logging, epoch, batch, total_loss, pred_loss)
                avg_val_loss.update_state(total_loss)

                # get true positives, false positives, and positive labels
                preds = post_process_model(images)
                true_pos, false_pos, n_pos = batch_true_false_positives(
                    preds.numpy(), labels.numpy(), FLAGS.num_classes)
                true_pos_total += true_pos
                false_pos_total += false_pos
                n_pos_total += n_pos

                if batch >= 20:
                    break

            # precision-recall by class
            precision, recall = batch_precision_recall(true_pos_total,
                                                       false_pos_total,
                                                       n_pos_total)
            for c in range(FLAGS.num_classes):
                print('Class {} - Prec: {}, Rec: {}'.format(
                    c, precision[c], recall[c]))
            # total precision-recall
            print('Total - Prec: {}, Rec: {}'.format(
                calc_precision(np.sum(true_pos_total),
                               np.sum(false_pos_total)),
                calc_recall(np.sum(true_pos_total), np.sum(n_pos_total))))
            import pdb
            pdb.set_trace()

            # log losses
            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            # reset loop and save weights
            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights(
                os.path.join(FLAGS.checkpoint_dir, 'yolov3_train_{}.tf'\
                    .format(epoch)))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint(os.path.join(FLAGS.checkpoint_dir,
                                         'yolov3_train_{epoch}.tf'),
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir=FLAGS.log_dir)
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Exemple #16
0
def main(_argv):
    model = YoloV3(FLAGS.size, training=True, classes=80)
    anchors = yolo_anchors
    anchor_masks = yolo_anchor_masks
    train_dataset = dataset.load_tfrecord_dataset(
        "./data/tfrecord/train.tfrecord", './data/faces.names')
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (dataset.transform_images(
        x, 416), dataset.transform_targets(y, anchors, anchor_masks, 416)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)
    val_dataset = dataset.load_tfrecord_dataset("./data/tfrecord/val.tfrecord",
                                                './data/faces.names')
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (dataset.transform_images(
        x, 416), dataset.transform_targets(y, anchors, anchor_masks, 416)))

    # test_dataset = dataset.load(FLAGS.batch_size, split='test')
    # test_dataset = train_dataset.shuffle(buffer_size=512)
    # test_dataset = train_dataset.batch(FLAGS.batch_size)
    # test_dataset = test_dataset.map(lambda x, y: (
    #     dataset.transform_images(x, FLAGS.size),
    #     dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    model.load_weights(FLAGS.weights)
    darknet = model.get_layer('yolo_darknet')
    freeze_all(darknet)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)

    avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
    avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)
    loss = [YoloLoss(anchors[mask], classes=80) for mask in anchor_masks]

    for epoch in range(1, FLAGS.epochs + 1):
        for batch, (images, labels) in train_dataset.enumerate():
            with tf.GradientTape() as tape:
                outputs = model(images, training=True)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

            grads = tape.gradient(total_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

            logging.info("{}_train_{}, {}, {}".format(
                epoch, batch, total_loss.numpy(),
                list(map(lambda x: np.sum(x.numpy()), pred_loss))))
            avg_loss.update_state(total_loss)

        for batch, (images, labels) in enumerate(val_dataset):
            outputs = model(images)
            regularization_loss = tf.reduce_sum(model.losses)
            pred_loss = []
            for output, label, loss_fn in zip(outputs, labels, loss):
                pred_loss.append(loss_fn(label, output))
            total_loss = tf.reduce_sum(pred_loss) + regularization_loss

            logging.info("{}_val_{}, {}, {}".format(
                epoch, batch, total_loss.numpy(),
                list(map(lambda x: np.sum(x.numpy()), pred_loss))))
            avg_val_loss.update_state(total_loss)

        logging.info("{}, train: {}, val: {}".format(
            epoch,
            avg_loss.result().numpy(),
            avg_val_loss.result().numpy()))

        avg_loss.reset_states()
        avg_val_loss.reset_states()
        model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
Exemple #17
0
def main(_argv):
    set_one_gpu()

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # train_dataset = dataset.load_fake_dataset()
    dataset_name = 'data/' + FLAGS.dataset + '.train.record'
    val_dataset_name = 'data/' + FLAGS.dataset + '.val.record'

    train_dataset = dataset.load_tfrecord_dataset(dataset_name, FLAGS.classes)
    train_dataset = train_dataset.shuffle(buffer_size=1024)  # TODO: not 1024
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    tf_name = FLAGS.name
    if not tf_name:
        tf_name = 'train' + FLAGS.gpu
    best_tf_name = "checkpoints/%s_best.tf" % tf_name
    last_tf_name = "checkpoints/%s_last.tf" % tf_name

    # val_dataset = dataset.load_fake_dataset()
    val_dataset = dataset.load_tfrecord_dataset(val_dataset_name,
                                                FLAGS.classes)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))

    if FLAGS.transfer != 'none':
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)
        else:
            # reset top layers
            if FLAGS.tiny:  # get initial weights
                init_model = YoloV3Tiny(FLAGS.size,
                                        training=True,
                                        classes=FLAGS.num_classes)
            else:
                init_model = YoloV3(FLAGS.size,
                                    training=True,
                                    classes=FLAGS.num_classes)

            if FLAGS.transfer == 'darknet':
                for l in model.layers:
                    if l.name != 'yolo_darknet' and l.name.startswith('yolo_'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)
            elif FLAGS.transfer == 'no_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]
    best_val_loss = 0
    history = None

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                # logging.info("{}_train_{}, {}, {}".format(
                #     epoch, batch, total_loss.numpy(),
                #     list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                # logging.info("{}_val_{}, {}, {}".format(
                #     epoch, batch, total_loss.numpy(),
                #     list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            val_lost = avg_val_loss.result().numpy()
            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(), val_lost))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights(last_tf_name)
            if best_val_loss == 0 or best_val_loss > val_lost:
                best_val_loss = val_lost
                logging.info("saving best val loss: %s" % best_tf_name)
                model.save_weights(best_tf_name)
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint(best_tf_name, verbose=1, save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)

    if history is not None:
        print(history.history['val_loss'])
        best_val_loss = min(history.history['val_loss'])
        model.save_weights(best_tf_name)

    print("Best weights are saved as %s" % best_tf_name)
    tiny = 'tiny_' if FLAGS.tiny else ''
    out_name = "%s_d%s_%sm%s_bs%d_s%s_e%d_val%d" % \
         (tf_name, FLAGS.dataset, tiny, FLAGS.transfer, FLAGS.batch_size, FLAGS.size, FLAGS.epochs, best_val_loss)
    mfn = "data/model/%s/" % out_name

    final_tf_name = "%s.tf" % out_name
    copy_tf("%s_best.tf" % tf_name, final_tf_name)
    print("Final checkpoint file saved as: %s" % final_tf_name)
    model.load_weights(best_tf_name)
    tf.saved_model.save(model, mfn)
    print("Model file saved to: %s" % mfn)
def main(_argv):
    # Setting up the accelerator
    strategy = setup_accelerator(FLAGS.accelerator)

    if 'TPU' in FLAGS.accelerator:
        with strategy.scope():
            if FLAGS.tiny:
                model = YoloV3Tiny(FLAGS.size, training=True,
                           classes=FLAGS.num_classes, mixed_precision=FLAGS.mixed_precision)
                anchors = adjust_yolo_anchors(yolo_tiny_anchors, FLAGS.size)
                anchor_masks = yolo_tiny_anchor_masks
            else:
                model = YoloV3(FLAGS.size, training=True, 
                            classes=FLAGS.num_classes, mixed_precision=FLAGS.mixed_precision)
                anchors = adjust_yolo_anchors(yolo_anchors, FLAGS.size)
                anchor_masks = yolo_anchor_masks
    else:
        if FLAGS.tiny:
            model = YoloV3Tiny(FLAGS.size, training=True,
                        classes=FLAGS.num_classes, mixed_precision=FLAGS.mixed_precision)
            anchors = adjust_yolo_anchors(yolo_tiny_anchors, FLAGS.size)
            anchor_masks = yolo_tiny_anchor_masks
        else:
            model = YoloV3(FLAGS.size, training=True, 
                        classes=FLAGS.num_classes, mixed_precision=FLAGS.mixed_precision)
            anchors = adjust_yolo_anchors(yolo_anchors, FLAGS.size)
            anchor_masks = yolo_anchor_masks

    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(
            FLAGS.dataset, FLAGS.classes, FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=512).batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(
            FLAGS.val_dataset, FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    
    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['no_darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(
                FLAGS.size, training=True, classes=FLAGS.pretrained_weights_num_classes or FLAGS.num_classes,
                mixed_precision=FLAGS.pretrained_mixed_precision
            )
        else:
            model_pretrained = YoloV3(
                    FLAGS.size, training=True, classes=FLAGS.pretrained_weights_num_classes or FLAGS.num_classes,
                    mixed_precision=FLAGS.pretrained_mixed_precision
            )
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'no_darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))
        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)
    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'all':
            # fine_tune the whole model.
            for i, l in enumerate(model.layers):
                print(i, "layer: ", l.name)
                freeze_all(l, frozen=False)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes, ignore_thresh=FLAGS.ignore_thresh, dtype=FLAGS.mixed_precision)
            for mask in anchor_masks]
    
    logging.info("\nloss: {}".format(loss))
    for l in loss:
        print(l)
    logging.info("global policy: {}".format(tf_mixed_precision.global_policy()))
    logging.info("global policy loss: {}\n".format(tf_mixed_precision.global_policy().loss_scale))
    # Print model summary and plot it to .png 
    print_all_layers(model, p_details=FLAGS.pdetails)
    if FLAGS.tiny:
        tf.keras.utils.plot_model(model, to_file='yolov3_tiny.png', show_shapes=True, show_layer_names=False)
    else:
        tf.keras.utils.plot_model(model, to_file='yolov3.png', show_shapes=True, show_layer_names=False)

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training

        #optimizer = tf_mixed_precision.LossScaleOptimizer(optimizer, loss_scale='dynamic')
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)
        y = []
        y_val = []
        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(
                    zip(grads, model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info(GREEN+"{}, train: {}, val: {}\33".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy())+DEFAULT)

            y.append(avg_loss.result().numpy())
            y_val.append(avg_val_loss.result().numpy())

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            if epoch%4 == 0:
                save_epoch = epoch
                model.save_weights(
                    'checkpoints/yolov3_train_{}.tf'.format(save_epoch))
                logging.info(GREEN+'checkpoints/yolov3_train_{}.tf weights saved'.format(save_epoch)+DEFAULT)
        
    else:
        metrics = None
        if FLAGS.metrics:
            thresholds = [x for x in np.linspace(0,1,11)]
            precision = []
            #recall = []
            for mask in anchor_masks:
                precision.append([experimentalYoloPrecision(anchors=anchors[mask], num_classes=FLAGS.num_classes, thresholds=thresholds, name='precision')])
                #recall.append([experimentalYoloRecall(anchors=anchors[mask], num_classes=FLAGS.num_classes, thresholds=thresholds, name='recall')])
            #recall = [experimentalYoloRecall(anchors=anchors[mask], num_classes=FLAGS.num_classes, thresholds=thresholds, name='recall') for mask in anchor_masks]
            metrics = []
            for p in zip(precision):
                metrics.append(p)

        if 'TPU' in FLAGS.accelerator:
            with strategy.scope():
                model.compile(optimizer=optimizer, loss=loss,
                              metrics = metrics, run_eagerly=(FLAGS.mode=='eager_fit'))
        else:
            model.compile(optimizer=optimizer, loss=loss,
                          metrics = metrics, run_eagerly=(FLAGS.mode=='eager_fit'))
        
        callbacks = [
            ModelCheckpoint(FLAGS.output_weights, verbose=1, save_best_only=True, save_weights_only=True),
            TensorBoard(log_dir=FLAGS.log_output_fit+'-'+datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), histogram_freq=1)
        ]
        
        print("\n--- START FITTING ---\n")
        training_history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset,
                            workers=2)

        logging.info(" > Average loss: \t{}".format(np.average(training_history.history['loss'])))
        logging.info(" > Average val loss: \t{}".format(np.average(training_history.history['val_loss'])))
        if FLAGS.metrics:
            for i, k in enumerate(list(training_history.history.keys())):
                if 'precision' in k:
                    logging.info(" > {0}, mAP={1:.4}%".format(k, np.average(training_history.history[k])*100))
                if 'recall' in k:
                    logging.info(" > {0}, mAR={1:.4}%".format(k, np.average(training_history.history[k])*100))
Exemple #19
0
def main():

    train_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/Datasets/Shapes/tfrecord_single/coco_train.record-00000-of-00001'
    valid_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/Datasets/Shapes/tfrecord_single/coco_val.record-00000-of-00001'
    weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/checkpoints/yolov3.tf'
    # Path to text? file containing all classes, 1 per line
    classes = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/shapes/shapes.names'
    # Usually fit
    # mode = 'fit'  # Can be 'fit', 'eager_fit', 'eager_tf', 'valid'
    mode = 'fit'
    '''
    'fit: model.fit, '
    'eager_fit: model.fit(run_eagerly=True), '
    'eager_tf: custom GradientTape'
    '''

    # Usually darknet
    transfer = 'none'
    '''
    'none: Training from scratch, '
                      'darknet: Transfer darknet, '
                      'no_output: Transfer all but output, '
                      'frozen: Transfer and freeze all, '
                      'fine_tune: Transfer all and freeze darknet only'),
                      'pre': Use a pre-trained model for validation
    '''
    image_size = 416
    num_epochs = 1
    batch_size = 8
    learning_rate = 1e-3
    num_classes = 4
    # num class for `weights` file if different, useful in transfer learning with different number of classes
    weight_num_classes = 80
    iou_threshold = 0.5

    # saved_weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/weights/'
    saved_weights_path = '/home/justin/ml_models/yolov3-tf2/weights/shapes_{}.tf'.format(
        num_epochs)
    anchors = yolo_anchors
    anchor_masks = yolo_anchor_masks

    # Training dataset
    #dataset_train = tf.data.TFRecordDataset(train_path)
    #dataset_val = tf.data.TFRecordDataset(valid_path)

    dataset_train = load_tfrecord_dataset(train_path, classes, image_size)
    dataset_train = dataset_train.shuffle(buffer_size=512)
    dataset_train = dataset_train.batch(batch_size)
    #dataset_train = dataset_train.map(lambda x, y: (
    #    transform_images(x, image_size),
    #    transform_targets(y, anchors, anchor_masks, image_size)))
    #dataset_train = dataset_train.prefetch(
    #    buffer_size=tf.data.experimental.AUTOTUNE)

    dataset_val = load_tfrecord_dataset(valid_path, classes, image_size)
    dataset_val = dataset_val.shuffle(buffer_size=512)
    dataset_val = dataset_val.batch(batch_size)
    #dataset_val = dataset_val.map(lambda x, y: (
    #    transform_images(x, image_size),
    #    transform_targets(y, anchors, anchor_masks, image_size)))

    # Create model in training mode
    yolo = models.YoloV3(image_size, training=True, classes=num_classes)

    model_pretrained = YoloV3(image_size,
                              training=True,
                              classes=weight_num_classes or num_classes)
    model_pretrained.load_weights(weights_path)

    # Which weights to start with?
    print('Loading Weights...')
    #yolo.load_weights(weights_path)

    yolo.get_layer('yolo_darknet').set_weights(
        model_pretrained.get_layer('yolo_darknet').get_weights())
    freeze_all(yolo.get_layer('yolo_darknet'))

    optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=num_classes) for mask in anchor_masks
    ]  # Passing loss as a list might sometimes fail? dict might be better?

    yolo.compile(optimizer=optimizer,
                 loss=loss,
                 run_eagerly=(mode == 'eager_fit'))
    callbacks = [
        ReduceLROnPlateau(verbose=1),
        EarlyStopping(patience=3, verbose=1),
        ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                        verbose=1,
                        save_weights_only=True),
        TensorBoard(log_dir='logs')
    ]

    history = yolo.fit(dataset_train,
                       epochs=num_epochs,
                       callbacks=callbacks,
                       validation_data=dataset_val)
    yolo.save_weights(saved_weights_path)

    # Detect/ROC
    model = YoloV3(image_size, training=False, classes=num_classes)
    model.load_weights(saved_weights_path).expect_partial()

    batch_size = 1

    val_dataset = load_tfrecord_dataset(valid_path, classes, image_size)
    val_dataset = val_dataset.batch(batch_size)

    val_dataset = val_dataset.map(
        lambda x, y: (transform_images(x, image_size),
                      transform_targets(y, anchors, anchor_masks, image_size)))

    images = []
    for img, labs in val_dataset:
        img = np.squeeze(img)
        images.append(img)

    predictions = []

    evaluator = Evaluator(iou_thresh=iou_threshold)

    # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class])
    boxes, scores, classes, num_detections = model.predict(val_dataset)
    # boxes -> (num_imgs, num_detections (200), box coords (4))
    # scores -> (num_imgs, num_detections)
    # classes -> (num_imgs, num_detections)
    # num_detections -> num_imgs

    # Aim for labels shape (per batch): [num_imgs, 3x[num_boxes x [x1,y1,x2,y2,score,class]]
    # full_labels = [label for _, label in val_dataset]

    # Shape : [Num images, 3 scales, grid, grid, anchor, 6 ]

    filtered_labels = []

    for _, label in val_dataset:
        img_labels = []
        # Label has shape [3 scales x[1, grid, grid, 3, 6]]
        for scale in label:
            # Shape [1, grid, grid, 3, 6]
            scale = np.asarray(scale)
            grid = scale.shape[1]

            scale2 = np.reshape(scale, (3, grid * grid, 6))
            # Shape: [3, grix*grid, 6]

            for anchor in scale2:
                filtered_anchors = []
                for box in anchor:
                    if box[4] > 0:
                        filtered_anchors.append(np.asarray(box))
            img_labels.append(filtered_anchors)

        img_labels = np.asarray(img_labels)
        filtered_labels.append(img_labels)

    print(len(filtered_labels))
    print(len(filtered_labels[0]))
    print(len(filtered_labels[0][2]))

    # i is the num_images index
    # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0]
    for img in range(len(num_detections)):
        row = []
        for sc in range(len(scores[img])):
            if scores[img][sc] > 0:
                row.append(
                    np.hstack([
                        boxes[img][sc] * image_size, scores[img][sc],
                        classes[img][sc]
                    ]))
        predictions.append(np.asarray(row))

    predictions = np.asarray(
        predictions)  # numpy array of shape [num_imgs x num_preds x 6]

    if len(predictions) == 0:  # No predictions made
        print('No predictions made - exiting.')
        exit()

    # Predictions shape: [num_imgs x num_preds x[box coords(4), conf, classes]]
    # Box coords should be in format x1 y1 x2 y2

    # Labels shape: [num_imgs, 3x[num_boxes x [x1,y1,x2,y2,score,class]]
    evaluator(predictions, filtered_labels, images)  # Check gts box coords
    '''
Exemple #20
0
def main(_argv):
    # GPU设置
    physical_devices = tf.config.experimental.list_physical_devices('GPU')    # 获取所有物理GPU
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)       # 打开内存增长

    # 模型初始化
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # 加载数据集
    # 训练集
    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(    # 这里也做了resize,和后面的resize调用了一样的函数,后面的
            FLAGS.dataset, FLAGS.classes, FLAGS.size)     # resize应该删除,且这里的resize应该修改为不让图片失真的resize
    num_of_data = 0
    for _ in train_dataset:
        num_of_data += 1
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),          # 做resize和像素值小数化,这里的具体做法可能需要修改
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    # 验证集
    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(
            FLAGS.val_dataset, FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # 迁移学习
    if FLAGS.transfer == 'none':                            # 不进行迁移学习
        pass
    elif FLAGS.transfer in ['darknet', 'no_output',
                            'no_output_no_freeze']:         # 只迁移某些层的参数并将这些层冻结
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(
                FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(
                FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':                         # 加载darknet层的参数并冻结
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':                     # 加载除输出层以外的参数并冻结
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)
        else:                                                   # 加载除输出层以外的参数并且不冻结
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(model_pretrained.get_layer(l.name).get_weights())

    else:                                                   # 迁移整个网络的所有参数并冻结某些层
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':                       # 迁移整个网络所有参数并冻结yolo_darknet
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':                        # 迁移整个网络所有参数并冻结所有参数
            freeze_all(model)
        elif FLAGS.transfer == 'continue':                      # 迁移整个网络进行训练
            pass

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)

    # loss: [高层loss, 中层loss函数, 低层loss]
    loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes)
            for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':

        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)

        # 用于写日志文件
        num_of_batch = int(np.ceil(num_of_data / FLAGS.batch_size))
        logging.info("num of data: {}, batch size: {}, num of batch: {}".format(
                num_of_data, FLAGS.batch_size, num_of_batch))
        train_summary_writer = tf.summary.create_file_writer('logs/train')

        for epoch in range(FLAGS.epochs):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:

                    # 正则化损失
                    regularization_loss = tf.reduce_sum(model.losses)

                    # 预测损失
                    # outputs: (高层output, 中层output, 底层output),格式为:
                    #   ((batch_size, h2, w2, num_anchors0, num_class+5),
                    #    (batch_size, h2*2, w2*2, num_anchors1, num_class+5)
                    #    (batch_size, h2*4, w2*4, num_anchors2, num_class+5))
                    # labels: 由GT框得到的target,格式为:
                    #   (高层target, 中层target, 低层target)
                    #   (batch_size, grid_size, grid_size, num_anchors, [x1, y1, x2, y2, 1, class])
                    # loss: [高层loss, 中层loss函数, 低层loss]
                    #     loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes)
                    #             for mask in anchor_masks]
                    # pred_loss: [(batch,), (batch,), (batch,)]
                    pred_loss = []
                    outputs = model(images, training=True)
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    loss_without_reg = tf.reduce_sum(pred_loss)

                    # 带正则项的损失
                    total_loss = loss_without_reg + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads, model.trainable_variables))

                logging.info("epoch_{}_batch_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

                # 每个batch记录一次训练集上的loss
                with train_summary_writer.as_default():
                    tf.summary.scalar('loss', loss_without_reg.numpy(), step=(epoch * num_of_batch + batch))

                # 定期保存checkpoint
                    model.save_weights('checkpoints/yolov3_{}_{}.tf'.format(epoch, batch))

                # 定期计算mAP

            avg_loss.reset_states()
    else:
        model.compile(optimizer=optimizer, loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1, save_weights_only=True, save_freq=500),
            TensorBoard(log_dir='logs', update_freq=10)
        ]

        # history.history是一个字典,存放着训练过程的loss和其他metrics
        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Exemple #21
0
def main(_argv):
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size, training=True, classes=49)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    elif FLAGS.custom:
        model = Custom(FLAGS.size, training=True)
        anchors = custom_anchors
        anchor_masks = custom_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes)
    train_dataset = train_dataset.shuffle(buffer_size=1024)  # TODO: not 1024
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 49)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(
        lambda x, y: (dataset.transform_images(x, FLAGS.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 49)))

    if FLAGS.transfer != 'none':
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.mode == 'frozen':
            # freeze everything
            freeze_all(model)
        else:
            # reset top layers
            if FLAGS.tiny:  # get initial weights
                init_model = YoloV3Tiny(FLAGS.size, training=True)
            elif FLAGS.custom:
                init_model = Custom(FLAGS.size, training=True)
            else:
                init_model = YoloV3(FLAGS.size, training=True)

            if FLAGS.transfer == 'darknet':
                for l in model.layers:
                    if l.name != 'yolo_darknet' and l.name.startswith('yolo_'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)
            elif FLAGS.transfer == 'no_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [YoloLoss(anchors[mask], classes=49) for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=30, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Exemple #22
0
def main(args):

    # 1、判断传入的是需要训练YoloV3Tiny还是YOLOV3正常版本
    if args.tiny:
        model = YoloV3Tiny(args.size, training=True, classes=args.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(args.size, training=True, classes=args.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # 2、获取传入参数的训练数据
    if args.dataset:
        train_dataset = dataset.load_tfrecord_dataset(args.dataset,
                                                      args.classes)
    train_dataset = train_dataset.shuffle(buffer_size=1024)
    train_dataset = train_dataset.batch(args.batch_size)
    train_dataset = train_dataset.map(
        lambda x, y: (dataset.transform_images(x, args.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    # 3、获取传入参数的验证集数据
    if args.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(args.val_dataset,
                                                    args.classes)
    val_dataset = val_dataset.batch(args.batch_size)
    val_dataset = val_dataset.map(
        lambda x, y: (dataset.transform_images(x, args.size),
                      dataset.transform_targets(y, anchors, anchor_masks, 80)))

    # 4、判断是否进行迁移学习
    if args.transfer != 'none':
        # 加载与训练模型'./data/yolov3.weights'
        model.load_weights(args.weights)
        if args.transfer == 'fine_tune':
            # 冻结darknet
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif args.mode == 'frozen':
            # 冻结所有层
            freeze_all(model)
        else:
            # 重置网络后端结构
            if args.tiny:
                init_model = YoloV3Tiny(args.size,
                                        training=True,
                                        classes=args.num_classes)
            else:
                init_model = YoloV3(args.size,
                                    training=True,
                                    classes=args.num_classes)

            # 如果迁移指的是darknet
            if args.transfer == 'darknet':
                # 获取网络的权重
                for l in model.layers:
                    if l.name != 'yolo_darknet' and l.name.startswith('yolo_'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)
            elif args.transfer == 'no_output':
                for l in model.layers:
                    if l.name.startswith('yolo_output'):
                        l.set_weights(
                            init_model.get_layer(l.name).get_weights())
                    else:
                        freeze_all(l)

    # 5、定义优化器以及损失计算方式
    optimizer = tf.keras.optimizers.Adam(lr=args.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=args.num_classes)
        for mask in anchor_masks
    ]

    # 6、训练优化过程,训练指定模式
    # 用eager模式进行训练易于调试
    # keras model模式简单易用
    if args.mode == 'eager_tf':
        # 定义评估方式
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        # 迭代优化
        for epoch in range(1, args.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    # 1、计算模型输出和损失
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        # 根据输出和标签计算出损失
                        pred_loss.append(loss_fn(label, output))

                    # 计算总损失 = 平均损失 + regularization_loss
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                # 计算梯度以及更新梯度
                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                # 打印日志
                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            # 验证数据集验证输出计算
            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                # 求损失
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                # 输出结果和标签计算损失
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                # 打印总损失
                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, image_2: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))
            # 保存模型位置
            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer, loss=loss)

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=args.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    check_weighs_exist(tiny=FLAGS.tiny)

    if FLAGS.tiny:
        model = YoloV3Tiny(
            FLAGS.size,
            training=True,
            classes=FLAGS.num_classes
        )
        model.summary()
        plot_model(model, to_file='yoloV3Tiny-model-plot.png', show_shapes=True, show_layer_names=True)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(
            FLAGS.size,
            training=True,
            classes=FLAGS.num_classes
        )
        model.summary()
        plot_model(model, to_file='yoloV3-model-plot.png', show_shapes=True, show_layer_names=True)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # Load the dataset
    train_dataset = dataset.load_fake_dataset()

    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(
            file_pattern=FLAGS.dataset,
            class_file=FLAGS.classes,
            size=FLAGS.size
        )
    # Shuffle the dataset
    train_dataset = train_dataset.shuffle(buffer_size=FLAGS.buffer_size, reshuffle_each_iteration=True)
    train_dataset_length = [i for i, _ in enumerate(train_dataset)][-1] + 1
    print(f"Dataset for training consists of {train_dataset_length} images.")

    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (dataset.transform_images(x, FLAGS.size),
                                                    dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))).repeat()
    train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(
            FLAGS.val_dataset, FLAGS.classes, FLAGS.size)

    val_dataset_length = [i for i, _ in enumerate(val_dataset)][-1] + 1
    print(f"Dataset for validation consists of {val_dataset_length} images.")
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (dataset.transform_images(x, FLAGS.size),
                                                dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size))).repeat()

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes
        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(
                size=FLAGS.size,
                training=True,
                classes=FLAGS.weights_num_classes or FLAGS.num_classes)
            model_pretrained.load_weights(FLAGS.weights_tf_format_tiny)

        else:
            model_pretrained = YoloV3(
                size=FLAGS.size,
                training=True,
                classes=FLAGS.weights_num_classes or FLAGS.num_classes)
            model_pretrained.load_weights(FLAGS.weights_tf_format)

        if FLAGS.transfer == 'darknet':
            # Set yolo darknet layer weights to the loaded pretrained model weights
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            # Freeze these layers
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for i in model.layers:
                if not i.name.startswith('yolo_output'):
                    i.set_weights(model_pretrained.get_layer(
                        i.name).get_weights())
                    freeze_all(i)

    else:
        # All other transfer require matching classes
        if FLAGS.tiny:
            model.load_weights(FLAGS.weights_tf_format_tiny)
        else:
            model.load_weights(FLAGS.weights_tf_format)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    # Use the Adam optimizer with the specified learning rate
    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)

    # YoloLoss function
    loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes) for mask in anchor_masks]

    if FLAGS.mode == 'eager_tf':
        print(f"Mode is: {FLAGS.mode}")
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)

                    regularization_loss = tf.reduce_sum(model.losses)

                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads, model.trainable_variables))

                logging.info(f"epoch_{epoch}_train_batch_{batch},"
                             f"{total_loss.numpy()},"
                             f"{list(map(lambda x: np.sum(x.numpy()), pred_loss))}")
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch,
                    batch,
                    total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()),
                             pred_loss)))
                )
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights(f'checkpoints/{data_set}_tiny_{FLAGS.tiny}_im_size_{FLAGS.size}.tf')
    else:
        print(f"Compiling the model")
        model.compile(
            optimizer=optimizer,
            loss=loss,
            run_eagerly=(FLAGS.mode == 'eager_fit'),
            metrics=['accuracy'])

    callbacks = [
        EarlyStopping(monitor='val_loss',
                      patience=125,
                      verbose=1),
        ReduceLROnPlateau(monitor='val_loss',
                          verbose=1,
                          factor=0.90,
                          min_lr=0,
                          patience=20,
                          mode="auto"),
        ModelCheckpoint(
            str(f'checkpoints/{data_set}_tiny_{FLAGS.tiny}_im_size_{FLAGS.size}.tf'),
            verbose=1,
            save_weights_only=True,
            save_best_only=True,
            mode="auto",
        ),
        TensorBoard(log_dir='logs'),
        CSVLogger(f'checkpoints/logs/{data_set}_tiny_{FLAGS.tiny}_im_size_{FLAGS.size}',
                  separator=',')
    ]
    history = model.fit(train_dataset,
                        epochs=FLAGS.epochs,
                        verbose=2,
                        callbacks=callbacks,
                        validation_data=val_dataset,
                        steps_per_epoch=np.ceil(train_dataset_length / FLAGS.batch_size),
                        validation_steps=np.ceil(val_dataset_length / FLAGS.batch_size))
Exemple #24
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')

    # Setup
    if FLAGS.multi_gpu:
        for physical_device in physical_devices:
            tf.config.experimental.set_memory_growth(physical_device, True)

        strategy = tf.distribute.MirroredStrategy()
        print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
        BATCH_SIZE = FLAGS.batch_size * strategy.num_replicas_in_sync
        FLAGS.batch_size = BATCH_SIZE

        with strategy.scope():
            model, optimizer, loss, anchors, anchor_masks = setup_model()
    else:
        model, optimizer, loss, anchors, anchor_masks = setup_model()

    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    else:
        train_dataset = dataset.load_fake_dataset()
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    else:
        val_dataset = dataset.load_fake_dataset()
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        start_time = time.time()
        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
        end_time = time.time() - start_time
        print(f'Total Training Time: {end_time}')
Exemple #25
0
#     logging.info('labels:')
#     for i in range(nums[0]):
#         logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
#                                            np.array(scores[0][i]),
#                                            np.array(boxes[0][i])))

#     img = cv2.cvtColor(image.numpy(), cv2.COLOR_RGB2BGR)
#     img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
#     plt.imshow(img)
#     cv2.imwrite('./OutputTEST.jpg', img)
#     logging.info('output saved to: {}'.format('./OutputTEST.jpg'))
###############

train_dataset = train_dataset.batch(1)
train_dataset = train_dataset.map(lambda x, y: (tf.image.resize(x, (
    832, 832)), dataset.transform_targets(y, anchors, anchor_masks, 832)))
train_dataset = train_dataset.prefetch(
    buffer_size=tf.data.experimental.AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(1)
val_dataset = val_dataset.map(lambda x, y: (tf.image.resize(x, (
    832, 832)), dataset.transform_targets(y, anchors, anchor_masks, 832)))
val_dataset = val_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

model_pretrained = YoloV3(416, training=True, classes=80)
model_pretrained.load_weights('./checkpoints/yolov3.tf')

model.get_layer('yolo_darknet').set_weights(
    model_pretrained.get_layer('yolo_darknet').get_weights())
freeze_all(model.get_layer('yolo_darknet'))
Exemple #26
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    train_dataset = dataset.load_fake_dataset()
    if FLAGS.dataset:
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    train_dataset = train_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    val_dataset = dataset.load_fake_dataset()
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        model_pretrained.load_weights(FLAGS.weights)

        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    else:
        # All other transfer require matching classes
        model.load_weights(FLAGS.weights)
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)

        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                with tf.GradientTape() as tape:
                    outputs = model(images, training=True)
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                grads = tape.gradient(total_loss, model.trainable_variables)
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))

                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_loss.update_state(total_loss)

            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)

            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))

            avg_loss.reset_states()
            avg_val_loss.reset_states()
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))
    else:
        model.compile(optimizer=optimizer,
                      loss=loss,
                      run_eagerly=(FLAGS.mode == 'eager_fit'))

        callbacks = [
            ReduceLROnPlateau(verbose=1),
            EarlyStopping(patience=3, verbose=1),
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            TensorBoard(log_dir='logs')
        ]

        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)
Exemple #27
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        # 设置仅在需要时申请显存空间
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    # 判断训练tiny版本的YOLO还是完整版的YOLO
    if FLAGS.tiny:
        model = YoloV3Tiny(FLAGS.size,
                           training=True,
                           classes=FLAGS.num_classes)
        anchors = yolo_tiny_anchors
        anchor_masks = yolo_tiny_anchor_masks
    else:
        model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

    # 如果未指定数据集则加载一张图片作为数据集=>fake_dataset
    train_dataset = dataset.load_fake_dataset()

    # 判断数据集路径是否为空
    if FLAGS.dataset:
        # 从TFRecode文件加载数据集 train_dataset:(x_train, y_train)
        train_dataset = dataset.load_tfrecord_dataset(FLAGS.dataset,
                                                      FLAGS.classes,
                                                      FLAGS.size)
    # 生成批训练数据
    # 打乱数据顺序
    train_dataset = train_dataset.shuffle(buffer_size=512)
    train_dataset = train_dataset.batch(FLAGS.batch_size)
    # y.shape:train_dataset.as_numpy_iterator().next()[1].shape
    # =>(batch_size, yolo_max_boxes, 5) 5=>(xmin, ymin, xmax, ymax, classlabel)
    train_dataset = train_dataset.map(lambda x, y: (
        # 图像数据归一化[0,1]
        dataset.transform_images(x, FLAGS.size),
        # 根据先验框anchor确定bbox属于哪一层特征图(13*13, 26*26, 52*52)
        # 并计算出bbox的中心点在特征图上的位置
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
    # 数据预读取,提高延迟和吞吐量
    # tf.data.experimental.AUTOTUNE:根据可用CPU动态设置并行调用的数量
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    # 加载伪验证集,防止没有添加验证集路径时报错
    val_dataset = dataset.load_fake_dataset()
    # 加载验证集
    if FLAGS.val_dataset:
        val_dataset = dataset.load_tfrecord_dataset(FLAGS.val_dataset,
                                                    FLAGS.classes, FLAGS.size)
    val_dataset = val_dataset.batch(FLAGS.batch_size)
    val_dataset = val_dataset.map(lambda x, y: (
        dataset.transform_images(x, FLAGS.size),
        dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))

    # Configure the model for transfer learning
    # 训练模式选择
    # 随机初始化权重,从0开始训练整个网络
    if FLAGS.transfer == 'none':
        pass  # Nothing to do
    # 迁移训练的两种方式
    elif FLAGS.transfer in ['darknet', 'no_output']:
        # Darknet transfer is a special case that works
        # with incompatible number of classes

        # reset top layers
        if FLAGS.tiny:
            model_pretrained = YoloV3Tiny(FLAGS.size,
                                          training=True,
                                          classes=FLAGS.weights_num_classes
                                          or FLAGS.num_classes)
        else:
            # 模型网络结构
            model_pretrained = YoloV3(FLAGS.size,
                                      training=True,
                                      classes=FLAGS.weights_num_classes
                                      or FLAGS.num_classes)
        # 加载预训练权重
        model_pretrained.load_weights(FLAGS.weights)

        # 设置darknet网络权重并冻结网络,即主干网络不参与训练,其余参数随机初始化
        if FLAGS.transfer == 'darknet':
            model.get_layer('yolo_darknet').set_weights(
                model_pretrained.get_layer('yolo_darknet').get_weights())
            freeze_all(model.get_layer('yolo_darknet'))

        # 设置YOLO输出层以外的网络的权重并冻结, 即仅训练YOLO的输出层且参数随机初始化
        elif FLAGS.transfer == 'no_output':
            for l in model.layers:
                if not l.name.startswith('yolo_output'):
                    l.set_weights(
                        model_pretrained.get_layer(l.name).get_weights())
                    freeze_all(l)

    # 迁移学习fine_tune和frozen模式要求训练的类别数和预训练权重一致(80类)
    else:
        # All other transfer require matching classes
        # 加载网络所有预训练权重参数
        model.load_weights(FLAGS.weights)
        # 冻结darknet(骨干网络)权重, 其余参数在预训练权重的基础上训练
        if FLAGS.transfer == 'fine_tune':
            # freeze darknet and fine tune other layers
            darknet = model.get_layer('yolo_darknet')
            freeze_all(darknet)
        # 冻结所有参数,训练不起作用.
        elif FLAGS.transfer == 'frozen':
            # freeze everything
            freeze_all(model)

    # 定义优化器:Adam
    optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)

    loss = [
        YoloLoss(anchors[mask], classes=FLAGS.num_classes)
        for mask in anchor_masks
    ]

    # 调试模型:速度慢:  Eager: op 在调用后会立即运行
    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        # 训练集上的平均loss/验证集上的平均loss
        avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
        avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)
        # 迭代每个epoch
        for epoch in range(1, FLAGS.epochs + 1):
            for batch, (images, labels) in enumerate(train_dataset):
                # 梯度带:自动计算变量梯度
                with tf.GradientTape() as tape:
                    # model(): eager模式下选择此方式,不需要编译直接运行, 速度快.
                    # model.predict()第一次运行时需要先编译图模式
                    outputs = model(images, training=True)
                    # 计算张量各维度的元素之和.
                    regularization_loss = tf.reduce_sum(model.losses)
                    pred_loss = []
                    for output, label, loss_fn in zip(outputs, labels, loss):
                        pred_loss.append(loss_fn(label, output))
                    total_loss = tf.reduce_sum(pred_loss) + regularization_loss
                # 梯度
                grads = tape.gradient(total_loss, model.trainable_variables)
                # 执行最优化器
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))
                # 记录日志文件
                logging.info("{}_train_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                # 更新平均loss
                avg_loss.update_state(total_loss)

            # 在验证集上验证
            for batch, (images, labels) in enumerate(val_dataset):
                outputs = model(images)
                regularization_loss = tf.reduce_sum(model.losses)
                pred_loss = []
                for output, label, loss_fn in zip(outputs, labels, loss):
                    pred_loss.append(loss_fn(label, output))
                total_loss = tf.reduce_sum(pred_loss) + regularization_loss

                logging.info("{}_val_{}, {}, {}".format(
                    epoch, batch, total_loss.numpy(),
                    list(map(lambda x: np.sum(x.numpy()), pred_loss))))
                avg_val_loss.update_state(total_loss)
            # .result():返回累计结果
            logging.info("{}, train: {}, val: {}".format(
                epoch,
                avg_loss.result().numpy(),
                avg_val_loss.result().numpy()))
            # reset_states:清除累计值
            avg_loss.reset_states()
            avg_val_loss.reset_states()
            # 每个epoch保存一次模型权重
            model.save_weights('checkpoints/yolov3_train_{}.tf'.format(epoch))

    # 训练模式
    else:
        # 编译模型
        model.compile(optimizer=optimizer,
                      loss=loss,
                      metrics=['accuracy'],
                      run_eagerly=(FLAGS.mode == 'eager_fit'))
        # 回调函数
        callbacks = [
            # lr衰减
            ReduceLROnPlateau(verbose=1),
            # lr不变时停止训练
            EarlyStopping(patience=3, verbose=1),
            # 保存模型
            ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
                            verbose=1,
                            save_weights_only=True),
            # 训练结果可视化
            TensorBoard(log_dir='logs', write_images=True, update_freq='batch')
        ]
        # 进行迭代训练
        history = model.fit(train_dataset,
                            epochs=FLAGS.epochs,
                            callbacks=callbacks,
                            validation_data=val_dataset)