Esempio n. 1
0
def train(
        network_backbone,
        pre_trained_model=None,
        trainset_filename='data/datasets/VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt',
        valset_filename='data/datasets/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt',
        images_dir='data/datasets/VOCdevkit/VOC2012/JPEGImages/',
        labels_dir='data/datasets/VOCdevkit/VOC2012/SegmentationClass/',
        trainset_augmented_filename='data/datasets/SBD/train_noval.txt',
        images_augmented_dir='data/datasets/SBD/benchmark_RELEASE/dataset/img/',
        labels_augmented_dir='data/datasets/SBD/benchmark_RELEASE/dataset/cls/',
        model_dir=None,
        log_dir='data/logs/deeplab/'):

    if not model_dir:
        model_dir = 'data/models/deeplab/{}_voc2012/'.format(network_backbone)
    num_classes = 21
    ignore_label = 255
    num_epochs = 1000
    minibatch_size = 8  # Unable to do minibatch_size = 12 :(
    random_seed = 0
    learning_rate = 1e-5
    weight_decay = 5e-4
    batch_norm_decay = 0.99
    image_shape = [513, 513]

    # validation_scales = [0.5, 1, 1.5]
    validation_scales = [1]

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    # Prepare datasets
    train_dataset = Dataset(dataset_filename=trainset_filename,
                            images_dir=images_dir,
                            labels_dir=labels_dir,
                            image_extension='.jpg',
                            label_extension='.png')
    valid_dataset = Dataset(dataset_filename=valset_filename,
                            images_dir=images_dir,
                            labels_dir=labels_dir,
                            image_extension='.jpg',
                            label_extension='.png')

    # Calculate image channel means
    channel_means = save_load_means(
        means_filename='channel_means.npz',
        image_filenames=train_dataset.image_filenames,
        recalculate=False)

    voc2012_preprocessor = DataPreprocessor(channel_means=channel_means,
                                            output_size=image_shape,
                                            min_scale_factor=0.5,
                                            max_scale_factor=2.0)

    # Prepare dataset iterators
    train_iterator = Iterator(dataset=train_dataset,
                              minibatch_size=minibatch_size,
                              process_func=voc2012_preprocessor.preprocess,
                              random_seed=random_seed,
                              scramble=True,
                              num_jobs=1)
    valid_iterator = Iterator(dataset=valid_dataset,
                              minibatch_size=minibatch_size,
                              process_func=voc2012_preprocessor.preprocess,
                              random_seed=None,
                              scramble=False,
                              num_jobs=1)

    # Prepare augmented dataset
    train_augmented_dataset = Dataset(
        dataset_filename=trainset_augmented_filename,
        images_dir=images_augmented_dir,
        labels_dir=labels_augmented_dir,
        image_extension='.jpg',
        label_extension='.mat')

    channel_augmented_means = save_load_means(
        means_filename='channel_augmented_means.npz',
        image_filenames=train_augmented_dataset.image_filenames,
        recalculate=False)

    voc2012_augmented_preprocessor = DataPreprocessor(
        channel_means=channel_augmented_means,
        output_size=image_shape,
        min_scale_factor=0.5,
        max_scale_factor=2.0)
    train_augmented_iterator = Iterator(
        dataset=train_augmented_dataset,
        minibatch_size=minibatch_size,
        process_func=voc2012_augmented_preprocessor.preprocess,
        random_seed=random_seed,
        scramble=True,
        num_jobs=1)

    model = DeepLab(network_backbone,
                    num_classes=num_classes,
                    ignore_label=ignore_label,
                    batch_norm_momentum=batch_norm_decay,
                    pre_trained_model=pre_trained_model,
                    log_dir=log_dir)

    best_mIoU = 0

    for i in range(num_epochs):

        print('Epoch number: {}'.format(i))

        print('Start validation...')

        valid_loss_total = 0
        num_pixels_union_total = np.zeros(num_classes)
        num_pixels_intersection_total = np.zeros(num_classes)

        # Multi-scale inputs prediction
        for _ in trange(valid_iterator.dataset_size):
            image, label = valid_iterator.next_raw_data()
            image = subtract_channel_means(image=image,
                                           channel_means=channel_means)

            output, valid_loss = multiscale_single_validate(
                image=image,
                label=label,
                input_scales=validation_scales,
                validator=model.validate)
            valid_loss_total += valid_loss

            prediction = np.argmax(output, axis=-1)
            num_pixels_union, num_pixels_intersection = count_label_prediction_matches(
                labels=[np.squeeze(label, axis=-1)],
                predictions=[prediction],
                num_classes=num_classes,
                ignore_label=ignore_label)

            num_pixels_union_total += num_pixels_union
            num_pixels_intersection_total += num_pixels_intersection

            # validation_single_demo(image=image, label=np.squeeze(label, axis=-1), prediction=prediction, demo_dir=os.path.join(results_dir, 'validation_demo'), filename=str(_))

        mean_IOU = mean_intersection_over_union(
            num_pixels_union=num_pixels_union_total,
            num_pixels_intersection=num_pixels_intersection_total)

        valid_loss_ave = valid_loss_total / valid_iterator.dataset_size

        print('Validation loss: {:.4f} | mIoU: {:.4f}'.format(
            valid_loss_ave, mean_IOU))

        if mean_IOU > best_mIoU:
            best_mIoU = mean_IOU
            model_savename = '{}_{:.4f}.ckpt'.format(network_backbone,
                                                     best_mIoU)
            print('New best mIoU achieved, model saved as {}.'.format(
                model_savename))
            model.save(model_dir, model_savename)

        print('Start training...')

        train_loss_total = 0
        num_pixels_union_total = np.zeros(num_classes)
        num_pixels_intersection_total = np.zeros(num_classes)

        print('Training using VOC2012...')
        for _ in trange(
                np.ceil(train_iterator.dataset_size /
                        minibatch_size).astype(int)):
            images, labels = train_iterator.next_minibatch()
            balanced_weight_decay = weight_decay * sum(
                labels != ignore_label) / labels.size
            outputs, train_loss = model.train(
                inputs=images,
                labels=labels,
                target_height=image_shape[0],
                target_width=image_shape[1],
                learning_rate=learning_rate,
                weight_decay=balanced_weight_decay)
            train_loss_total += train_loss

            predictions = np.argmax(outputs, axis=-1)
            num_pixels_union, num_pixels_intersection = count_label_prediction_matches(
                labels=np.squeeze(labels, axis=-1),
                predictions=predictions,
                num_classes=num_classes,
                ignore_label=ignore_label)

            num_pixels_union_total += num_pixels_union
            num_pixels_intersection_total += num_pixels_intersection

            # validation_demo(images=images, labels=np.squeeze(labels, axis=-1), predictions=predictions, demo_dir=os.path.join(results_dir, 'training_demo'), batch_no=_)
        train_iterator.shuffle_dataset()

        print('Training using SBD...')
        for _ in trange(
                np.ceil(train_augmented_iterator.dataset_size /
                        minibatch_size).astype(int)):
            images, labels = train_augmented_iterator.next_minibatch()
            balanced_weight_decay = weight_decay * sum(
                labels != ignore_label) / labels.size
            outputs, train_loss = model.train(
                inputs=images,
                labels=labels,
                target_height=image_shape[0],
                target_width=image_shape[1],
                learning_rate=learning_rate,
                weight_decay=balanced_weight_decay)
            train_loss_total += train_loss

            predictions = np.argmax(outputs, axis=-1)
            num_pixels_union, num_pixels_intersection = count_label_prediction_matches(
                labels=np.squeeze(labels, axis=-1),
                predictions=predictions,
                num_classes=num_classes,
                ignore_label=ignore_label)

            num_pixels_union_total += num_pixels_union
            num_pixels_intersection_total += num_pixels_intersection

            # validation_demo(images=images, labels=np.squeeze(labels, axis=-1), predictions=predictions, demo_dir=os.path.join(results_dir, 'training_demo'), batch_no=_)
        train_augmented_iterator.shuffle_dataset()

        mIoU = mean_intersection_over_union(
            num_pixels_union=num_pixels_union_total,
            num_pixels_intersection=num_pixels_intersection_total)
        train_loss_ave = train_loss_total / (
            train_iterator.dataset_size +
            train_augmented_iterator.dataset_size)
        print('Training loss: {:.4f} | mIoU: {:.4f}'.format(
            train_loss_ave, mIoU))

    model.close()
Esempio n. 2
0
from glob import glob
import numpy as np

from model import DeepLab
from utils import (save_load_means, subtract_channel_means, single_demo,
                   read_image)
import gpu_limit

if __name__ == '__main__':

    demo_dir = 'data/demos/deeplab/hello/'
    models_dir = 'data/models/deeplab/resnet_101_voc2012/'
    model_filename = 'resnet_101_0.0040.ckpt'

    channel_means = save_load_means(means_filename='channel_means.npz',
                                    image_filenames=None,
                                    recalculate=False)

    deeplab = DeepLab('resnet_101', training=False)
    deeplab.load(osp.join(models_dir, model_filename))
    files = glob(demo_dir + '*.jpg')
    for image_filename in files:
        filename = osp.basename(image_filename).split('.')[0]
        image = read_image(image_filename=image_filename)
        image_input = subtract_channel_means(image=image,
                                             channel_means=channel_means)
        output = deeplab.test(inputs=[image_input],
                              target_height=image.shape[0],
                              target_width=image.shape[1])[0]
        single_demo(image, np.argmax(output, axis=-1), demo_dir, filename)
from model import DeepLab
from tqdm import trange
from utils import (Dataset, Iterator, save_load_means, subtract_channel_means,
                   validation_single_demo_collage)

if __name__ == '__main__':

    data_dir = '/content/Data_Camera_SanTennis_Labeled/'
    testset_filename = osp.join(data_dir, 'valid.txt')
    images_dir = osp.join(data_dir, 'RGBs/')
    labels_dir = osp.join(data_dir, 'Labels/')
    demo_dir = 'data/demos/deeplab/resnet_101_voc2012/'
    models_dir = '/content/drive/MyDrive/Colab Notebooks/RobotNhatBongTennis2021/Models/'
    model_filename = 'resnet_101_0.7076.ckpt'

    channel_means = save_load_means(means_filename='channel_means.npz', image_filenames=None)

    minibatch_size = 16

    test_dataset = Dataset(dataset_filename=testset_filename, images_dir=images_dir, labels_dir=labels_dir,
                           image_extension='.png', label_extension='.png')
    test_iterator = Iterator(dataset=test_dataset, minibatch_size=minibatch_size, process_func=None, random_seed=None,
                             scramble=False, num_jobs=1)

    deeplab = DeepLab('resnet_101', training=False, num_classes=5)
    deeplab.load(osp.join(models_dir, model_filename))

    n_samples = 8
    for i in trange(n_samples):
        image, label = test_iterator.next_raw_data()
        # image_input = subtract_channel_means(image=image, channel_means=channel_means)
Esempio n. 4
0
def train(network_backbone, pre_trained_model=None,
          trainset_filename='/content/Data_Camera_SanTennis_Labeled/train.txt',
          valset_filename='/content/Data_Camera_SanTennis_Labeled/valid.txt',
          images_dir='/content/Data_Camera_SanTennis_Labeled/RGBs/',
          labels_dir='/content/Data_Camera_SanTennis_Labeled/Labels/',
          trainset_augmented_filename='data/datasets/SBD/train_noval.txt',
          images_augmented_dir='data/datasets/SBD/benchmark_RELEASE/dataset/img/',
          labels_augmented_dir='data/datasets/SBD/benchmark_RELEASE/dataset/cls/', model_dir=None,
          log_dir='data/logs/deeplab/'):
    if not model_dir:
        model_dir = '/content/drive/MyDrive/Colab Notebooks/RobotNhatBongTennis2021/Models/'
    num_classes = 5
    ignore_label = 255
    num_epochs = 1000
    minibatch_size = 4  # Unable to do minibatch_size = 12 :(
    random_seed = 0
    learning_rate = 1e-3
    weight_decay = 5e-4
    batch_norm_decay = 0.99
    image_shape = [480, 640]

    # validation_scales = [0.5, 1, 1.5]
    validation_scales = [1]

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    # Prepare datasets
    train_dataset = Dataset(dataset_filename=trainset_filename, images_dir=images_dir, labels_dir=labels_dir,
                            image_extension='.png', label_extension='.png')
    valid_dataset = Dataset(dataset_filename=valset_filename, images_dir=images_dir, labels_dir=labels_dir,
                            image_extension='.png', label_extension='.png')

    # Calculate image channel means
    channel_means = save_load_means(means_filename='channel_means.npz', image_filenames=train_dataset.image_filenames,
                                    recalculate=False)

    voc2012_preprocessor = DataPreprocessor(channel_means=channel_means, output_size=image_shape, min_scale_factor=0.5,
                                            max_scale_factor=2.0)

    # Prepare dataset iterators
    train_iterator = Iterator(dataset=train_dataset, minibatch_size=minibatch_size,
                              process_func=voc2012_preprocessor.preprocess, random_seed=random_seed, scramble=True,
                              num_jobs=1)
    valid_iterator = Iterator(dataset=valid_dataset, minibatch_size=minibatch_size,
                              process_func=voc2012_preprocessor.preprocess, random_seed=None, scramble=False,
                              num_jobs=1)

    # Prepare augmented dataset
    # train_augmented_dataset = Dataset(dataset_filename=trainset_augmented_filename, images_dir=images_augmented_dir, labels_dir=labels_augmented_dir, image_extension='.jpg', label_extension='.mat')
    #
    # channel_augmented_means = save_load_means(means_filename='channel_augmented_means.npz', image_filenames=train_augmented_dataset.image_filenames, recalculate=False)
    #
    # voc2012_augmented_preprocessor = DataPreprocessor(channel_means=channel_augmented_means, output_size=image_shape, min_scale_factor=0.5, max_scale_factor=2.0)
    # train_augmented_iterator = Iterator(dataset=train_augmented_dataset, minibatch_size=minibatch_size, process_func=voc2012_augmented_preprocessor.preprocess, random_seed=random_seed, scramble=True, num_jobs=1)

    model = DeepLab(network_backbone, num_classes=num_classes, ignore_label=ignore_label,
                    batch_norm_momentum=batch_norm_decay, pre_trained_model=pre_trained_model, log_dir=log_dir)

    best_mIoU = 0

    train_loss = ","
    train_mIoU = ","
    valid_loss = ","
    valid_mIoU = ","

    for i in range(num_epochs):

        print('Epoch number: {}'.format(i))

        print('Start validation...')

        valid_loss_total = 0
        num_pixels_union_total = np.zeros(num_classes)
        num_pixels_intersection_total = np.zeros(num_classes)

        rand = np.random.randint(0, valid_iterator.dataset_size - 1)
        count = 0

        # Multi-scale inputs prediction
        for _ in trange(valid_iterator.dataset_size):
            image, label = valid_iterator.next_raw_data()

            # image = subtract_channel_means(image=image, channel_means=channel_means)

            output, valid_loss = multiscale_single_validate(image=image, label=label, input_scales=validation_scales,
                                                            validator=model.validate)
            valid_loss_total += valid_loss

            prediction = np.argmax(output, axis=-1)
            num_pixels_union, num_pixels_intersection = count_label_prediction_matches(
                labels=[np.squeeze(label, axis=-1)], predictions=[prediction], num_classes=num_classes,
                ignore_label=ignore_label)

            num_pixels_union_total += num_pixels_union
            num_pixels_intersection_total += num_pixels_intersection

            if count == rand:
                validation_single_demo_collage(image=image, label=np.squeeze(label, axis=-1), prediction=prediction,
                                               demo_dir=os.path.join(
                                                   "/content/CustomDeeplabv3/data/demos/deeplab/resnet_101_voc2012/",
                                                   'validation_demo'), val_no=str(i))

            count += 1

        mean_IOU = mean_intersection_over_union(num_pixels_union=num_pixels_union_total,
                                                num_pixels_intersection=num_pixels_intersection_total)

        valid_loss_ave = valid_loss_total / valid_iterator.dataset_size

        print('Validation loss: {:.4f} | mIoU: {:.4f}'.format(valid_loss_ave, mean_IOU))

        # valid_loss += str(train_loss_total / train_iterator.dataset_size) + ","
        valid_mIoU += str(mean_IOU) + ","

        if mean_IOU > best_mIoU and mean_IOU > 0.25:
            best_mIoU = mean_IOU
            model_savename = '{}_{:.4f}.ckpt'.format(network_backbone, best_mIoU)
            print('New best mIoU achieved, model saved as {}.'.format(model_savename))
            model.save(model_dir, model_savename)

        print('Start training...')

        train_loss_total = 0
        num_pixels_union_total = np.zeros(num_classes)
        num_pixels_intersection_total = np.zeros(num_classes)

        print('Training using Data Nhà làm...')
        for _ in trange(np.ceil(train_iterator.dataset_size / minibatch_size).astype(int)):
            images, labels = train_iterator.next_minibatch()
            balanced_weight_decay = weight_decay * sum(labels != ignore_label) / labels.size
            outputs, train_loss = model.train(inputs=images, labels=labels, target_height=image_shape[0],
                                              target_width=image_shape[1], learning_rate=learning_rate,
                                              weight_decay=balanced_weight_decay)
            train_loss_total += train_loss

            predictions = np.argmax(outputs, axis=-1)
            num_pixels_union, num_pixels_intersection = count_label_prediction_matches(
                labels=np.squeeze(labels, axis=-1), predictions=predictions, num_classes=num_classes,
                ignore_label=ignore_label)

            num_pixels_union_total += num_pixels_union
            num_pixels_intersection_total += num_pixels_intersection

            validation_demo_collage(images=images, labels=np.squeeze(labels, axis=-1), predictions=predictions,
                                    demo_dir=os.path.join(
                                        "/content/CustomDeeplabv3/data/demos/deeplab/resnet_101_voc2012/",
                                        'training_demo'), batch_no=i)
        train_iterator.shuffle_dataset()

        # print('Training using SBD...')
        # for _ in trange(np.ceil(train_augmented_iterator.dataset_size / minibatch_size).astype(int)):
        #     images, labels = train_augmented_iterator.next_minibatch()
        #     balanced_weight_decay = weight_decay * sum(labels != ignore_label) / labels.size
        #     outputs, train_loss = model.train(inputs=images, labels=labels, target_height=image_shape[0], target_width=image_shape[1], learning_rate=learning_rate, weight_decay=balanced_weight_decay)
        #     train_loss_total += train_loss
        #
        #     predictions = np.argmax(outputs, axis=-1)
        #     num_pixels_union, num_pixels_intersection = count_label_prediction_matches(labels=np.squeeze(labels, axis=-1), predictions=predictions, num_classes=num_classes, ignore_label=ignore_label)
        #
        #     num_pixels_union_total += num_pixels_union
        #     num_pixels_intersection_total += num_pixels_intersection
        #
        #     # validation_demo(images=images, labels=np.squeeze(labels, axis=-1), predictions=predictions, demo_dir=os.path.join(results_dir, 'training_demo'), batch_no=_)
        # train_augmented_iterator.shuffle_dataset()

        mIoU = mean_intersection_over_union(num_pixels_union=num_pixels_union_total,
                                            num_pixels_intersection=num_pixels_intersection_total)
        # train_loss_ave = train_loss_total / (train_iterator.dataset_size + train_augmented_iterator.dataset_size)
        train_loss_ave = train_loss_total / train_iterator.dataset_size
        print('Training loss: {:.4f} | mIoU: {:.4f}'.format(train_loss_ave, mIoU))

        # train_loss += str(train_loss_total / train_iterator.dataset_size) + ","
        train_mIoU += str(mIoU) + ","

        # loss_log = open("/content/drive/MyDrive/Colab Notebooks/RobotNhatBongTennis2021/loss_log.txt", "w")
        mIoU_log = open("/content/drive/MyDrive/Colab Notebooks/RobotNhatBongTennis2021/mIoU_log.txt", "w")

        # loss_log.write(train_loss + "\n" + valid_loss)
        mIoU_log.write(train_mIoU + "\n" + valid_mIoU)

    model.close()