Example #1
0
#
# 2. Randomly crop the image and resize it to 224x224
# 3. Randomly flip the image horizontally
# 4. Randomly jitter color and add noise
# 5. Transpose the data from height*width*num_channels to num_channels*height*width, and map values from [0, 255] to [0, 1]
# 6. Normalize with the mean and standard deviation from the ImageNet dataset.
#
jitter_param = 0.4
lighting_param = 0.1

transform_train = transforms.Compose([
    transforms.RandomResizedCrop(
        224),  #Randomly crop the image and resize it to 224x224
    transforms.RandomFlipLeftRight(),
    transforms.RandomColorJitter(brightness=jitter_param,
                                 contrast=jitter_param,
                                 saturation=jitter_param),
    transforms.RandomLighting(lighting_param),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

################################################################################
# With the data augmentation functions, we can define our data loaders:
Example #2
0
#             feat4 = self.upsample(F, self.conv4(self.pool(F, x, 4)), h, w)
#             return F.concat(x, feat1, feat2, feat3, feat4, dim=1)
#
# PSPNet model is provided in :class:`gluoncv.model_zoo.PSPNet`. To get
# PSP model using ResNet50 base network for ADE20K dataset:
model = gluoncv.model_zoo.get_psp(dataset='ade20k', backbone='resnet50', pretrained=False)
print(model)

##############################################################################
# Dataset and Data Augmentation
# -----------------------------
#
# image transform for color normalization
from mxnet.gluon.data.vision import transforms
input_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
])

##############################################################################
# We provide semantic segmentation datasets in :class:`gluoncv.data`.
# For example, we can easily get the ADE20K dataset:
trainset = gluoncv.data.ADE20KSegmentation(split='train', transform=input_transform)
print('Training images:', len(trainset))
# set batch_size = 2 for toy example
batch_size = 2
# Create Training Loader
train_data = gluon.data.DataLoader(
    trainset, batch_size, shuffle=True, last_batch='rollover',
    num_workers=batch_size)

##############################################################################
Example #3
0
def train_cifar(args, reporter):
    print('args', args)
    batch_size = args.batch_size

    num_gpus = args.num_gpus
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = args.num_workers

    model_name = args.model
    net = get_model(model_name, classes=10)

    transform_train = transforms.Compose([
        gcv_transforms.RandomCrop(32, pad=4),
        transforms.RandomFlipLeftRight(),
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465],
                             [0.2023, 0.1994, 0.2010])
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465],
                             [0.2023, 0.1994, 0.2010])
    ])

    def test(ctx, val_data):
        metric = mx.metric.Accuracy()
        for i, batch in enumerate(val_data):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0)
            outputs = [net(X) for X in data]
            metric.update(label, outputs)
        return metric.get()

    def train(epochs, ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]
        net.initialize(mx.init.Xavier(), ctx=ctx)

        train_data = gluon.data.DataLoader(gluon.data.vision.CIFAR10(
            train=True).transform_first(transform_train),
                                           batch_size=batch_size,
                                           shuffle=True,
                                           last_batch='discard',
                                           num_workers=num_workers)

        val_data = gluon.data.DataLoader(gluon.data.vision.CIFAR10(
            train=False).transform_first(transform_test),
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers)

        lr_scheduler = LRScheduler(mode='cosine',
                                   base_lr=args.lr,
                                   nepochs=args.epochs,
                                   iters_per_epoch=len(train_data))
        trainer = gluon.Trainer(net.collect_params(), 'sgd', {
            'lr_scheduler': lr_scheduler,
            'wd': args.wd,
            'momentum': args.momentum
        })
        metric = mx.metric.Accuracy()
        train_metric = mx.metric.Accuracy()
        loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()

        iteration = 0
        best_val_score = 0

        start_epoch = 0

        for epoch in range(start_epoch, epochs):
            tic = time.time()
            train_metric.reset()
            metric.reset()
            train_loss = 0
            num_batch = len(train_data)
            alpha = 1

            for i, batch in enumerate(train_data):
                data = gluon.utils.split_and_load(batch[0],
                                                  ctx_list=ctx,
                                                  batch_axis=0)
                label = gluon.utils.split_and_load(batch[1],
                                                   ctx_list=ctx,
                                                   batch_axis=0)

                with mx.autograd.record():
                    output = [net(X) for X in data]
                    loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)]
                for l in loss:
                    l.backward()
                trainer.step(batch_size)
                train_loss += sum([l.sum().asscalar() for l in loss])

                train_metric.update(label, output)
                name, acc = train_metric.get()
                iteration += 1

            train_loss /= batch_size * num_batch
            name, acc = train_metric.get()
            name, val_acc = test(ctx, val_data)
            reporter(epoch=epoch, accuracy=val_acc)

    train(args.epochs, context)
Example #4
0
plot_name = opt.save_plot_dir

logging_handlers = [logging.StreamHandler()]
if opt.logging_dir:
    logging_dir = opt.logging_dir
    makedirs(logging_dir)
    logging_handlers.append(
        logging.FileHandler('%s/train_cifar10_%s.log' %
                            (logging_dir, model_name)))

logging.basicConfig(level=logging.INFO, handlers=logging_handlers)
logging.info(opt)

transform_train = transforms.Compose([
    gcv_transforms.RandomCrop(32, pad=4),
    transforms.RandomFlipLeftRight(),
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])


def label_transform(label, classes):
    ind = label.astype('int')
    res = nd.zeros((ind.shape[0], classes), ctx=label.context)
    res[nd.arange(ind.shape[0], ctx=label.context), ind] = 1
    return res
Example #5
0
from myNet import resnet18
from mxnet import cpu, gpu
from mxnet import ndarray as nd
from mxnet.test_utils import list_gpus
import pandas as pd

BATCH_SIZE = 1
MODEL_PATH = 'resnet18.params'

if list_gpus():
    CTX = gpu()
else:
    CTX = cpu()

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

test_dataloader = DataLoader(_test_data.transform_first(transform_test),
                             batch_size=BATCH_SIZE,
                             shuffle=True,
                             last_batch='keep')

net = resnet18(10)
net.load_parameters(MODEL_PATH, ctx=CTX)
# net.initialize(ctx=CTX)

confusion_matrix = nd.zeros((10, 10))

print("====>make confusion matrix")
for data, label in test_dataloader:
Example #6
0
    def predict(self,
                X,
                input_size=224,
                crop_ratio=0.875,
                set_prob_thresh=0.001,
                plot=False):
        """Predict class-index and associated class probability for each image in a given dataset (or just a single image). 
        
        Parameters
        ----------
        X : str or :class:`autogluon.task.ImageClassification.Dataset` or list of `autogluon.task.ImageClassification.Dataset`
            If str, should be path to the input image (when we just want to predict on single image).
            If class:`autogluon.task.ImageClassification.Dataset`, should be dataset of multiple images in same format as training dataset.
            If list of `autogluon.task.ImageClassification.Dataset`, should be a set of test dataset with different scales of origin images.
        input_size : int
            Size of the images (pixels).
        plot : bool
            Whether to plot the image being classified.
        set_prob_thresh: float
            Results with probability below threshold are set to 0 by default.

        Examples
        --------
        >>> import autogluon.core as ag
        >>> from autogluon.vision import ImageClassification as task
        >>> train_data = task.Dataset(train_path='~/data/train')
        >>> classifier = task.fit(train_data,
        >>>                       nets=ag.space.Categorical['resnet18_v1', 'resnet34_v1'],
        >>>                       time_limits=600, ngpus_per_trial=1, num_trials=4)
        >>> test_data = task.Dataset('~/data/test', train=False)
        >>> class_index, class_probability = classifier.predict('example.jpg')
        """

        input_size = self.model.input_size if hasattr(
            self.model, 'input_size') else input_size
        resize = int(math.ceil(input_size / crop_ratio))

        transform_size = transforms.Compose([
            transforms.Resize(resize),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        def predict_img(img, ensemble=False):
            proba = self.predict_proba(img)
            if ensemble:
                return proba
            else:
                ind = mx.nd.argmax(proba, axis=1).astype('int')
                idx = mx.nd.stack(
                    mx.nd.arange(proba.shape[0], ctx=proba.context),
                    ind.astype('float32'))
                probai = mx.nd.gather_nd(proba, idx)
                return ind, probai, proba

        def avg_prediction(different_dataset, threshold=0.001):
            result = defaultdict(list)
            inds, probas, probals_all = [], [], []
            for i in range(len(different_dataset)):
                for j in range(len(different_dataset[0])):
                    result[j].append(different_dataset[i][j])

            for c in result.keys():
                proba_all = sum([*result[c]]) / len(different_dataset)
                proba_all = (proba_all >= threshold) * proba_all
                ind = mx.nd.argmax(proba_all, axis=1).astype('int')
                idx = mx.nd.stack(
                    mx.nd.arange(proba_all.shape[0], ctx=proba_all.context),
                    ind.astype('float32'))
                proba = mx.nd.gather_nd(proba_all, idx)
                inds.append(ind.asscalar())
                probas.append(proba.asnumpy())
                probals_all.append(proba_all.asnumpy().flatten())
            return inds, probas, probals_all

        def predict_imgs(X):
            if isinstance(X, list):
                different_dataset = []
                for i, x in enumerate(X):
                    proba_all_one_dataset = []
                    tbar = tqdm(range(len(x.items)))
                    for j, x_item in enumerate(x):
                        tbar.update(1)
                        proba_all = predict_img(x_item[0], ensemble=True)
                        tbar.set_description(
                            'ratio:[%d],The input picture [%d]' % (i, j))
                        proba_all_one_dataset.append(proba_all)
                    different_dataset.append(proba_all_one_dataset)
                inds, probas, probals_all = avg_prediction(
                    different_dataset, threshold=set_prob_thresh)
            else:
                inds, probas, probals_all = [], [], []
                tbar = tqdm(range(len(X.items)))
                for i, x in enumerate(X):
                    tbar.update(1)
                    ind, proba, proba_all = predict_img(x[0])
                    tbar.set_description(
                        'The input picture [%d] is classified as [%d], with probability %.2f '
                        % (i, ind.asscalar(), proba.asscalar()))
                    inds.append(ind.asscalar())
                    probas.append(proba.asnumpy())
                    probals_all.append(proba_all.asnumpy().flatten())
            return inds, probas, probals_all

        if isinstance(X, str) and os.path.isfile(X):
            img = mx.image.imread(filename=X)
            if plot:
                plt.imshow(img.asnumpy())
                plt.show()

            img = transform_size(img)
            return predict_img(img)

        if isinstance(X, AutoGluonObject):
            X = X.init()
            return predict_imgs(X)

        if isinstance(X, list) and len(X) > 1:
            X_group = []
            for X_item in X:
                X_item = X_item.init()
                X_group.append(X_item)
            return predict_imgs(X_group)
    gpu = args.gpu_id
    lr = args.lr
    #ctx = mx.cpu()
    ctx = mx.gpu(gpu)

    if not os.path.exists('output/snapshots'):
        os.makedirs('output/snapshots')

    model = hopenet.Hopenet(model_zoo.vision.BottleneckV1, [3, 4, 6, 3], 66)
    
    # ResNet50 structure
    model.hybridize()
    
    print('Loading data.')
    transformations = transforms.Compose([transforms.Resize(240),
            transforms.RandomResizedCrop(224), transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])
                                      
    if args.dataset == 'Pose_300W_LP':
        pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations)
    elif args.dataset == 'Pose_300W_LP_random_ds':
        pose_dataset = datasets.Pose_300W_LP_random_ds(args.data_dir, args.filename_list, transformations)
    elif args.dataset == 'Synhead':
        pose_dataset = datasets.Synhead(args.data_dir, args.filename_list, transformations)
    elif args.dataset == 'AFLW2000':
        pose_dataset = datasets.AFLW2000(args.data_dir, args.filename_list, transformations)
    elif args.dataset == 'BIWI':
        pose_dataset = datasets.BIWI(args.data_dir, args.filename_list, transformations)
    elif args.dataset == 'AFLW':
        pose_dataset = datasets.AFLW(args.data_dir, args.filename_list, transformations)
    elif args.dataset == 'AFLW_aug':
Example #8
0
def get_dataloader(module_name, module_args, num_label):
    train_transfroms = transforms.Compose(
        [transforms.RandomColorJitter(brightness=0.5),
         transforms.ToTensor()])

    val_transfroms = transforms.ToTensor()
    dataset_args = module_args['dataset']
    dataset_args['num_label'] = num_label
    # 创建数据集
    train_data_path = dataset_args.pop('train_data_path')
    train_data_ratio = dataset_args.pop('train_data_ratio')
    val_data_path = dataset_args.pop('val_data_path')

    if module_name == 'ImageDataset':
        train_data_list, val_data_list = get_datalist(
            train_data_path, val_data_path,
            module_args['loader']['validation_split'])
    elif module_name == 'LmdbDataset':
        train_data_list = train_data_path
        val_data_list = val_data_path
    else:
        raise Exception('current only support ImageDataset and LmdbDataset')
    train_dataset_list = []
    for train_data in train_data_list:
        train_dataset_list.append(
            get_dataset(data_list=train_data,
                        module_name=module_name,
                        phase='train',
                        dataset_args=dataset_args))

    if len(train_dataset_list) > 1:
        train_loader = dataset.Batch_Balanced_Dataset(
            dataset_list=train_dataset_list,
            ratio_list=train_data_ratio,
            module_args=module_args,
            dataset_transfroms=train_transfroms,
            phase='train')
    elif len(train_dataset_list) == 1:
        train_loader = DataLoader(
            dataset=train_dataset_list[0].transform_first(train_transfroms),
            batch_size=module_args['loader']['train_batch_size'],
            shuffle=module_args['loader']['shuffle'],
            last_batch='rollover',
            num_workers=module_args['loader']['num_workers'])
        train_loader.dataset_len = len(train_dataset_list[0])
    else:
        raise Exception('no images found')
    if len(val_data_list):
        val_dataset = get_dataset(data_list=val_data_list,
                                  module_name=module_name,
                                  phase='test',
                                  dataset_args=dataset_args)
        val_loader = DataLoader(
            dataset=val_dataset.transform_first(val_transfroms),
            batch_size=module_args['loader']['val_batch_size'],
            shuffle=module_args['loader']['shuffle'],
            last_batch='keep',
            num_workers=module_args['loader']['num_workers'])
        val_loader.dataset_len = len(val_dataset)
    else:
        val_loader = None
    return train_loader, val_loader
Example #9
0
# which is equal to Normalize a tensorized image in the range [0, 1)
# with mean value 0.5=127.5/225 and std value 128/255
# the class FaceTypeNormalizeTransform and face_type_normalize is almost equal
# the tiny difference is due to numerical calculation
class FaceTypeNormalizeTransform(nn.HybridBlock):
    def __init__(self):
        super(FaceTypeNormalizeTransform, self).__init__()

    def hybrid_forward(self, F, x):
        return (x*255-127.5)*0.0078125


face_type_normalize = transforms.Normalize(0.5, 128/255)

transform_test = transforms.Compose([
    transforms.ToTensor()
])

_transform_train = transforms.Compose([
    transforms.RandomBrightness(0.3),
    transforms.RandomContrast(0.3),
    transforms.RandomSaturation(0.3),
    transforms.RandomFlipLeftRight(),
    transforms.ToTensor()
])


def transform_train(data, label):
    im = _transform_train(data)
    return im, label
Example #10
0
import mxnet.gluon as gl

from mxnet import autograd, image
from mxnet.gluon import nn
from mxnet.gluon.data import DataLoader
from mxnet.gluon.data.vision import transforms, MNIST
from mxnet.gluon.nn import Sequential
from mxnet.ndarray import NDArray

transform: Sequential = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=0.5, std=0.5)
])

# Download and load the training data
trainSet: MNIST = MNIST('~/.mxnet/MNIST_data/', train=True).transform_first(transform)
trainLoader: DataLoader = DataLoader(trainSet, batch_size=64, shuffle=True)

# Build a feed-forward network
model = nn.Sequential()
# with model.name_scope():
model.add(
    nn.Dense(128, activation='relu'),
    # nn.Activation('relu'),
    nn.Dense(64, activation='relu'),
    nn.Dense(10)
)
model.initialize()

criterion = gl.loss.SoftmaxCrossEntropyLoss()
optimizer = gl.Trainer(model.collect_params(), 'sgd', {'learning_rate': 0.01})
def train():
    logging.info('Start Training for Task: %s\n' % (task))

    # Initialize the net with pretrained model
    pretrained_net = gluon.model_zoo.vision.get_model(model_name,
                                                      pretrained=True)

    finetune_net = gluon.model_zoo.vision.get_model(model_name,
                                                    classes=task_num_class)
    finetune_net.features = pretrained_net.features
    finetune_net.output.initialize(init.Xavier(), ctx=ctx)
    finetune_net.collect_params().reset_ctx(ctx)
    finetune_net.hybridize()

    # Carefully set the 'scale' parameter to make the 'muti-scale train' and 'muti-scale test'
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(448,
                                     scale=(0.76, 1.0),
                                     ratio=(0.999, 1.001)),
        transforms.RandomFlipLeftRight(),
        transforms.RandomBrightness(0.20),
        #transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param,
        #                             saturation=jitter_param),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    train_dataset = cutomdataset.custom_dataset2(root='./data2/crop_lapel2',
                                                 filename=os.path.join(
                                                     'data2/',
                                                     task + '_train.txt'))
    train_data = gluon.data.DataLoader(
        train_dataset.transform_first(train_transform),
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        last_batch='discard')

    val_transform = transforms.Compose([
        transforms.Resize(480),
        transforms.CenterCrop(448),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    val_dataset = cutomdataset.custom_dataset2(root='./data2/crop_lapel2',
                                               filename=os.path.join(
                                                   'data2/',
                                                   task + '_val.txt'))
    val_data = gluon.data.DataLoader(
        val_dataset.transform_first(val_transform),
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers)

    # Define Trainer use ADam to make mdoel converge quickly
    trainer = gluon.Trainer(finetune_net.collect_params(), 'adam',
                            {'learning_rate': lr})
    metric = mx.metric.Accuracy()
    L = gluon.loss.SoftmaxCrossEntropyLoss()
    lr_counter = 0
    num_batch = len(train_data)

    # Start Training
    best_AP = 0
    best_acc = 0
    for epoch in range(epochs):
        train_acc = 0.
        #### Load the best model when go to the next training stage
        if epoch == lr_steps[lr_counter]:
            finetune_net.collect_params().load(best_path, ctx=ctx)
            trainer.set_learning_rate(trainer.learning_rate * lr_factor)
            lr_counter += 1

        tic = time.time()
        train_loss = 0
        metric.reset()
        AP = 0.
        AP_cnt = 0

        for i, batch in enumerate(train_data):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0,
                                              even_split=False)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0,
                                               even_split=False)
            with ag.record():
                outputs = [finetune_net(X) for X in data]
                loss = []
                ###### Handle 'm' label by soft-softmax function ######
                for yhat, y in zip(outputs[0], label[0]):
                    loss_1 = 0
                    if y[1] == 99:  # only have y [4,0,0,0,0]
                        loss_1 += L(yhat, y[0])
                    elif y[2] == 99:  #have one m [4,1,0,0,0]
                        loss_1 = 0.8 * L(yhat, y[0]) + 0.2 * L(yhat, y[1])
                    elif y[3] == 99:  #have two m [4,1,3,0,0]
                        loss_1 = 0.7 * L(yhat, y[0]) + 0.15 * L(
                            yhat, y[1]) + 0.15 * L(yhat, y[2])
                    else:  # have many m [4,1,3,2,0]
                        loss_1 = 0.6 * L(yhat, y[0]) + 0.13 * L(
                            yhat, y[1]) + 0.13 * L(yhat, y[2]) + 0.13 * L(
                                yhat, y[3])

                    loss += [loss_1]

                #loss = [L(yhat, y) for yhat, y in zip(outputs, label)
            # for l in loss:
            #     l.backward()
            ag.backward(loss)  # for soft-softmax

            trainer.step(batch_size)
            train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)
            #train_acc += accuracy(outputs, label)
            metric.update([label[0][:, 0]], outputs)
            #ap, cnt = calculate_ap(label, outputs)
            #AP += ap
            #AP_cnt += cnt
            #progressbar(i, num_batch-1)

        #train_map = AP / AP_cnt
        _, train_acc = metric.get()
        train_loss /= num_batch

        val_acc, val_loss = validate(finetune_net, val_data, ctx)

        logging.info(
            '[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f'
            % (epoch, train_acc, train_loss, val_acc, val_loss,
               time.time() - tic, trainer.learning_rate))
        f_val.writelines(
            '[Epoch %d] Train-acc: %.3f, , loss: %.3f | Val-acc: %.3f,  loss: %.3f | time: %.1f | learning_rate %.6f\n'
            % (epoch, train_acc, train_loss, val_acc, val_loss,
               time.time() - tic, trainer.learning_rate))
        ### Save the best model every stage
        if val_acc > best_acc:
            #best_AP = this_AP
            best_acc = val_acc
            best_path = '/usr/data/fashionai/models/%s_%s_%s_%s.params' % (
                task, model_name, epoch, best_acc)
            finetune_net.collect_params().save(best_path)

    logging.info('\n')
    finetune_net.collect_params().load(best_path, ctx=ctx)
    f_val.writelines(
        'Best val acc is :[Epoch %d] Train-acc: %.3f, loss: %.3f | Best-val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n'
        % (epoch, train_acc, train_loss, best_acc, val_loss, time.time() - tic,
           trainer.learning_rate))
    return (finetune_net)
# We can easily tell that they are photos of the same thing.
#
# |image-golden-bridge|
#
# We want to teach this invariance to our model, by playing "augmenting"
# input image. Our augmentation transforms the image with
# resizing, cropping, flipping and other techniques.
#
# With ``Gluon``, we can create our transform function as following:

transform_train = transforms.Compose([
    # Randomly crop an area and resize it to be 32x32, then pad it to be 40x40
    gcv_transforms.RandomCrop(32, pad=4),
    # Randomly flip the image horizontally
    transforms.RandomFlipLeftRight(),
    # Transpose the image from height*width*num_channels to num_channels*height*width
    # and map values from [0, 255] to [0,1]
    transforms.ToTensor(),
    # Normalize the image with mean and standard deviation calculated across all images
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

################################################################
# You may have noticed that most of the operations are randomized. This in effect
# increases the number of different images the model sees during training.
# The more data we have, the better our model generalizes over
# unseen images.
#
# On the other hand, when making prediction, we would like to remove all
# random operations in order to get a deterministic result. The transform
# function for prediction is:
Example #13
0
min_random_area = 0.08
jitter_param = 0.4
lighting_param = 0.1

transform_train = transforms.Compose([    
#     transforms.RandomResizedCrop(resize,
#                                  scale=(min_random_area, max_random_area), 
#                                  ratio=(min_aspect_ratio, max_aspect_ratio)),
    
        # Randomly flip the image horizontally
    transforms.RandomFlipLeftRight(),
    
    transforms.RandomBrightness(brightness=jitter_param),
    transforms.RandomSaturation(saturation=jitter_param),
    transforms.RandomHue(hue=jitter_param),
    
    transforms.RandomLighting(lighting_param),
    
    # Randomly crop an area and resize it to be 32x32, then pad it to be 40x40
    gcv_transforms.RandomCrop(32, pad=4),
        
    # Transpose the image from height*width*num_channels to num_channels*height*width
    # and map values from [0, 255] to [0,1]
    transforms.ToTensor(),
    
    # Normalize the image with mean and standard deviation calculated across all images
    transforms.Normalize(mean_rgb, std_rgb),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean_rgb, std_rgb),
Example #14
0
from mxnet.gluon import data as gdata, loss as gloss, Trainer, nn
from mxnet.gluon.data.vision import transforms
import mxnet
from mxnet import autograd, init

from gluoncv import model_zoo, utils
import time

transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomFlipLeftRight(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

root = r'../resource/minc-2500-tiny/minc-2500-tiny/'

train_path = f'{root}train'
val_path = f'{root}val'
test_path = f'{root}test'

batch_size = 8
classes = 23
epochs = 16
Example #15
0
args = parser.parse_args()

ctx = mx.cpu()

num_outputs = 10
jitter_param = 0.4
lighting_param = 0.1

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

training_transformer = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomFlipLeftRight(),
    transforms.RandomColorJitter(brightness=jitter_param,
                                 contrast=jitter_param,
                                 saturation=jitter_param),
    transforms.RandomLighting(lighting_param),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

validation_transformer = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

mean_img = mx.nd.stack(*[mx.nd.full((224, 224), m) for m in mean])
std_img = mx.nd.stack(*[mx.nd.full((224, 224), s) for s in std])
#mx.nd.save('mean_std_224.nd', {"mean_img": mean_img, "std_img": std_img})
Example #16
0
def get_data_loader(opt, batch_size, num_workers, logger):
    data_dir = opt.data_dir
    val_data_dir = opt.val_data_dir
    normalize = video.VideoNormalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
    scale_ratios = [float(i) for i in opt.scale_ratios.split(',')]
    input_size = opt.input_size

    def batch_fn(batch, ctx):
        if opt.num_segments > 1:
            data = split_and_load(batch[0],
                                  ctx_list=ctx,
                                  batch_axis=0,
                                  even_split=False,
                                  multiplier=opt.num_segments)
        else:
            data = split_and_load(batch[0],
                                  ctx_list=ctx,
                                  batch_axis=0,
                                  even_split=False)
        label = split_and_load(batch[1],
                               ctx_list=ctx,
                               batch_axis=0,
                               even_split=False)
        return data, label

    transform_train = transforms.Compose([
        video.VideoMultiScaleCrop(size=(input_size, input_size),
                                  scale_ratios=scale_ratios),
        video.VideoRandomHorizontalFlip(),
        video.VideoToTensor(), normalize
    ])
    transform_test = transforms.Compose([
        video.VideoCenterCrop(size=input_size),
        video.VideoToTensor(), normalize
    ])

    if opt.dataset == 'kinetics400':
        train_dataset = kinetics400.classification.Kinetics400(
            setting=opt.train_list,
            root=data_dir,
            train=True,
            new_width=opt.new_width,
            new_height=opt.new_height,
            target_width=input_size,
            target_height=input_size,
            num_segments=opt.num_segments,
            transform=transform_train)
        val_dataset = kinetics400.classification.Kinetics400(
            setting=opt.val_list,
            root=val_data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            target_width=input_size,
            target_height=input_size,
            num_segments=opt.num_segments,
            transform=transform_test)
    elif opt.dataset == 'ucf101':
        train_dataset = ucf101.classification.UCF101(
            setting=opt.train_list,
            root=data_dir,
            train=True,
            new_width=opt.new_width,
            new_height=opt.new_height,
            target_width=input_size,
            target_height=input_size,
            num_segments=opt.num_segments,
            transform=transform_train)
        val_dataset = ucf101.classification.UCF101(
            setting=opt.val_list,
            root=data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            target_width=input_size,
            target_height=input_size,
            num_segments=opt.num_segments,
            transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    logger.info('Load %d training samples and %d validation samples.' %
                (len(train_dataset), len(val_dataset)))

    if opt.num_segments > 1:
        train_data = gluon.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=num_workers,
                                           batchify_fn=tsn_mp_batchify_fn)
        val_data = gluon.data.DataLoader(val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers,
                                         batchify_fn=tsn_mp_batchify_fn)
    else:
        train_data = gluon.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=num_workers)
        val_data = gluon.data.DataLoader(val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers)

    return train_data, val_data, batch_fn
Example #17
0
def main():
    opt = parse_args()
    print(opt)

    # Garbage collection, default threshold is (700, 10, 10).
    # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading.
    gc.set_threshold(100, 5, 5)

    # set env
    num_gpus = opt.num_gpus
    batch_size = opt.batch_size
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get model
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    num_segments=opt.num_segments)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        print('Pre-trained model %s is successfully loaded.' %
              (opt.resume_params))
    else:
        print('Pre-trained model is successfully loaded from the model zoo.')

    # get data
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    else:
        transform_test = video.VideoGroupValTransform(
            size=opt.input_size,
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225])

    if opt.dataset == 'ucf101':
        val_dataset = ucf101.classification.UCF101(
            setting=opt.val_list,
            root=opt.data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            target_width=opt.input_size,
            target_height=opt.input_size,
            test_mode=True,
            num_segments=opt.num_segments,
            transform=transform_test)
    elif opt.dataset == 'kinetics400':
        val_dataset = kinetics400.classification.Kinetics400(
            setting=opt.val_list,
            root=opt.data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=opt.input_size,
            target_height=opt.input_size,
            test_mode=True,
            num_segments=opt.num_segments,
            transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     prefetch=int(opt.prefetch_ratio *
                                                  num_workers),
                                     batchify_fn=tsn_mp_batchify_fn,
                                     last_batch='discard')
    print('Load %d test samples.' % len(val_dataset))

    start_time = time.time()
    acc_top1_val, acc_top5_val = test(context, val_data, opt, net)
    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))
Example #18
0
def main():
    opt = parse_args()

    makedirs(opt.save_dir)

    filehandler = logging.FileHandler(os.path.join(opt.save_dir, opt.logging_file))
    streamhandler = logging.StreamHandler()
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)
    logger.info(opt)

    gc.set_threshold(100, 5, 5)

    # set env
    if opt.gpu_id == -1:
        context = mx.cpu()
    else:
        gpu_id = opt.gpu_id
        context = mx.gpu(gpu_id)

    # get data preprocess
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=image_norm_mean, std=image_norm_std)
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained,
                    num_segments=opt.num_segments, num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        logger.info('Pre-trained model %s is successfully loaded.' % (opt.resume_params))
    else:
        logger.info('Pre-trained model is successfully loaded from the model zoo.')
    logger.info("Successfully built model {}".format(model_name))

    # get classes list, if we are using a pretrained network from the model_zoo
    classes = None
    if opt.use_pretrained:
        if "kinetics400" in model_name:
            classes = Kinetics400Attr().classes
        elif "ucf101" in model_name:
            classes = UCF101Attr().classes
        elif "hmdb51" in model_name:
            classes = HMDB51Attr().classes
        elif "sthsth" in model_name:
            classes = SomethingSomethingV2Attr().classes

    # get data
    anno_file = opt.data_list
    f = open(anno_file, 'r')
    data_list = f.readlines()
    logger.info('Load %d video samples.' % len(data_list))

    # build a pseudo dataset instance to use its children class methods
    video_utils = VideoClsCustom(root=opt.data_dir,
                                 setting=opt.data_list,
                                 num_segments=opt.num_segments,
                                 num_crop=opt.num_crop,
                                 new_length=opt.new_length,
                                 new_step=opt.new_step,
                                 new_width=opt.new_width,
                                 new_height=opt.new_height,
                                 video_loader=opt.video_loader,
                                 use_decord=opt.use_decord,
                                 slowfast=opt.slowfast,
                                 slow_temporal_stride=opt.slow_temporal_stride,
                                 fast_temporal_stride=opt.fast_temporal_stride,
                                 lazy_init=True)

    start_time = time.time()
    for vid, vline in enumerate(data_list):
        video_path = vline.split()[0]
        video_name = video_path.split('/')[-1]
        if opt.need_root:
            video_path = os.path.join(opt.data_dir, video_path)
        video_data = read_data(opt, video_path, transform_test, video_utils)
        video_input = video_data.as_in_context(context)
        pred = net(video_input.astype(opt.dtype, copy=False))
        if opt.save_logits:
            logits_file = '%s_%s_logits.npy' % (model_name, video_name)
            np.save(os.path.join(opt.save_dir, logits_file), pred.asnumpy())
        pred_label = np.argmax(pred.asnumpy())
        if opt.save_preds:
            preds_file = '%s_%s_preds.npy' % (model_name, video_name)
            np.save(os.path.join(opt.save_dir, preds_file), pred_label)

        # Try to report a text label instead of the number.
        if classes:
            pred_label = classes[pred_label]

        logger.info('%04d/%04d: %s is predicted to class %s' % (vid, len(data_list), video_name, pred_label))

    end_time = time.time()
    logger.info('Total inference time is %4.2f minutes' % ((end_time - start_time) / 60))
num_gpu = 1
ctx = [mx.gpu(i) for i in range(num_gpu)]

model.initialize(mx.init.Xavier(), ctx=ctx)

#print model data flow
#x = np.random.uniform(size=(1, 1, 100, 100))
x = np.random.uniform(size=(1, 3, 100, 100), ctx=mx.gpu(0))
for layer in model:
    x = layer(x)
    print(f'Layer : {layer.name}, output shape : {x.shape}')

#set up data augmentation, transforms and data loaders
transform_train = transforms.Compose([
    transforms.RandomBrightness(0.2),
    transforms.RandomFlipLeftRight(),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

train_folder = gluon.data.vision.ImageFolderDataset(
    'data/fruits/train').transform_first(transform_train)
test_folder = gluon.data.vision.ImageFolderDataset(
    'data/fruits/test').transform_first(transform_test)
train_data = gluon.data.DataLoader(train_folder,
                                   batch_size=batch_size,
                                   shuffle=True)
Example #20
0
def hpatches_val_transform(ds_metainfo):
    assert (ds_metainfo is not None)
    return transforms.Compose([transforms.ToTensor()])
Example #21
0
            cv2.imread(os.path.join(self.image_root,
                                    self.images[item]))[:, :, ::-1])

    def __len__(self):
        return len(self.images)


if __name__ == '__main__':
    gpu_id = 8
    net = Encoder()
    net.collect_params().reset_ctx(mx.gpu(gpu_id))
    from mxnet.gluon.data.vision import transforms

    transform_fn = transforms.Compose([
        LeftTopPad(dest_shape=(256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406),
                             std=(0.229, 0.224, 0.225))
    ])
    dataset = DirectoryDataSet(image_root="/data3/zyx/yks/coco2017/train2017",
                               transforms=transform_fn)
    loader = DataLoader(dataset=dataset,
                        batch_size=16,
                        shuffle=True,
                        num_workers=8,
                        pin_memory=True)
    f = h5py.File('output/train2017.h5', 'w')
    for batch in tqdm.tqdm(loader):
        indices, data = batch
        outputs = net(data.as_in_context(mx.gpu(gpu_id))).asnumpy()
        indices = indices.asnumpy()
        for idx, output in zip(indices, outputs):
Example #22
0
    return args


if __name__ == '__main__':
    opt = parse_args()
    # context list
    if opt.gpu_id == '-1':
        ctx = mx.cpu()
    else:
        ctx = mx.gpu(int(opt.gpu_id.strip()))

    netG = SRGenerator()
    netG.load_parameters(opt.pretrained)
    netG.collect_params().reset_ctx(ctx)
    image_list = [x.strip() for x in opt.images.split(',') if x.strip()]
    transform_fn = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

    ax = None
    for image_path in image_list:
        img = image.imread(image_path)
        img = transform_fn(img)
        img = img.expand_dims(0).as_in_context(ctx)
        output = netG(img)
        predict = mx.nd.squeeze(output)
        predict = ((predict.transpose([1, 2, 0]).asnumpy() * 0.5 + 0.5) *
                   255).astype('uint8')
        plt.imshow(predict)
        plt.show()
            net.hybridize(static_alloc=True, static_shape=True)
        else:
            net.hybridize()

    normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
    """
    Aligning with TF implementation, the default crop-input
    ratio set as 0.875; Set the crop as ceil(input-size/ratio)
    """
    crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875
    resize = int(math.ceil(input_size / crop_ratio))

    transform_test = transforms.Compose([
        transforms.Resize(resize, keep_ratio=True),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(), normalize
    ])

    if not opt.benchmark:
        if not opt.rec_dir:
            val_data = gluon.data.DataLoader(imagenet.classification.ImageNet(
                opt.data_dir, train=False).transform_first(transform_test),
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=num_workers)
        else:
            imgrec = os.path.join(opt.rec_dir, 'val.rec')
            imgidx = os.path.join(opt.rec_dir, 'val.idx')
            val_data = mx.io.ImageRecordIter(path_imgrec=imgrec,
                                             path_imgidx=imgidx,
Example #24
0
def train_cifar10(config):
    args = config.pop("args")
    vars(args).update(config)
    np.random.seed(args.seed)
    random.seed(args.seed)
    mx.random.seed(args.seed)

    # Set Hyper-params
    batch_size = args.batch_size * max(args.num_gpus, 1)
    ctx = [mx.gpu(i)
           for i in range(args.num_gpus)] if args.num_gpus > 0 else [mx.cpu()]

    # Define DataLoader
    transform_train = transforms.Compose([
        gcv_transforms.RandomCrop(32, pad=4),
        transforms.RandomFlipLeftRight(),
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465],
                             [0.2023, 0.1994, 0.2010]),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465],
                             [0.2023, 0.1994, 0.2010]),
    ])

    train_data = gluon.data.DataLoader(
        gluon.data.vision.CIFAR10(train=True).transform_first(transform_train),
        batch_size=batch_size,
        shuffle=True,
        last_batch="discard",
        num_workers=args.num_workers,
    )

    test_data = gluon.data.DataLoader(
        gluon.data.vision.CIFAR10(train=False).transform_first(transform_test),
        batch_size=batch_size,
        shuffle=False,
        num_workers=args.num_workers,
    )

    # Load model architecture and Initialize the net with pretrained model
    finetune_net = get_model(args.model, pretrained=True)
    with finetune_net.name_scope():
        finetune_net.fc = nn.Dense(args.classes)
    finetune_net.fc.initialize(init.Xavier(), ctx=ctx)
    finetune_net.collect_params().reset_ctx(ctx)
    finetune_net.hybridize()

    # Define trainer
    trainer = gluon.Trainer(
        finetune_net.collect_params(),
        "sgd",
        {
            "learning_rate": args.lr,
            "momentum": args.momentum,
            "wd": args.wd
        },
    )
    L = gluon.loss.SoftmaxCrossEntropyLoss()
    metric = mx.metric.Accuracy()

    def train(epoch):
        for i, batch in enumerate(train_data):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0,
                                              even_split=False)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0,
                                               even_split=False)
            with ag.record():
                outputs = [finetune_net(X) for X in data]
                loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
            for ls in loss:
                ls.backward()

            trainer.step(batch_size)
        mx.nd.waitall()

    def test():
        test_loss = 0
        for i, batch in enumerate(test_data):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0,
                                              even_split=False)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0,
                                               even_split=False)
            outputs = [finetune_net(X) for X in data]
            loss = [L(yhat, y) for yhat, y in zip(outputs, label)]

            test_loss += sum(ls.mean().asscalar() for ls in loss) / len(loss)
            metric.update(label, outputs)

        _, test_acc = metric.get()
        test_loss /= len(test_data)
        return test_loss, test_acc

    for epoch in range(1, args.epochs + 1):
        train(epoch)
        test_loss, test_acc = test()
        session.report({"mean_loss": test_loss, "mean_accuracy": test_acc})
Example #25
0
def main():
    epoches = 32
    gpu_id = 7
    ctx_list = [mx.gpu(x) for x in [7, 8]]
    log_interval = 100
    batch_size = 32
    start_epoch = 0
    # trainer_resume = resume + ".states" if resume is not None else None
    trainer_resume = None

    resume = None
    from mxnet.gluon.data.vision import transforms
    transform_fn = transforms.Compose([
        LeftTopPad(dest_shape=(256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406),
                             std=(0.229, 0.224, 0.225))
    ])
    dataset = CaptionDataSet(
        image_root="/data3/zyx/yks/coco2017/train2017",
        annotation_path=
        "/data3/zyx/yks/coco2017/annotations/captions_train2017.json",
        transforms=transform_fn,
        feature_hdf5="output/train2017.h5")
    val_dataset = CaptionDataSet(
        image_root="/data3/zyx/yks/coco2017/val2017",
        annotation_path=
        "/data3/zyx/yks/coco2017/annotations/captions_val2017.json",
        words2index=dataset.words2index,
        index2words=dataset.index2words,
        transforms=transform_fn,
        feature_hdf5="output/val2017.h5")
    dataloader = DataLoader(dataset=dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=1,
                            pin_memory=True,
                            last_batch="discard")
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=1,
                            pin_memory=True)

    num_words = dataset.words_count

    # set up logger
    save_prefix = "output/res50_"
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    log_file_path = save_prefix + '_train.log'
    log_dir = os.path.dirname(log_file_path)
    if log_dir and not os.path.exists(log_dir):
        os.makedirs(log_dir)
    fh = logging.FileHandler(log_file_path)
    logger.addHandler(fh)

    net = EncoderDecoder(num_words=num_words,
                         test_max_len=val_dataset.max_len).cuda()
    for name, p in net.named_parameters():
        if "bias" in name:
            p.data.zero_()
        else:
            p.data.normal_(0, 0.01)
        print(name)
    net = torch.nn.DataParallel(net)
    if resume is not None:
        net.collect_params().load(resume,
                                  allow_missing=True,
                                  ignore_extra=True)
        logger.info("Resumed form checkpoint {}.".format(resume))

    trainer = torch.optim.Adam(params=filter(lambda p: p.requires_grad,
                                             net.parameters()),
                               lr=4e-4)
    criterion = Criterion()
    accu_top3_metric = TopKAccuracy(top_k=3)
    accu_top1_metric = Accuracy(name="batch_accu")
    ctc_loss_metric = Loss(name="ctc_loss")
    alpha_metric = Loss(name="alpha_loss")
    batch_bleu = BleuMetric(name="batch_bleu",
                            pred_index2words=dataset.index2words,
                            label_index2words=dataset.index2words)
    epoch_bleu = BleuMetric(name="epoch_bleu",
                            pred_index2words=dataset.index2words,
                            label_index2words=dataset.index2words)
    btic = time.time()
    logger.info(batch_size)
    logger.info(num_words)
    logger.info(len(dataset.words2index))
    logger.info(len(dataset.index2words))
    logger.info(dataset.words2index["<PAD>"])
    logger.info(val_dataset.words2index["<PAD>"])
    logger.info(len(val_dataset.words2index))
    for nepoch in range(start_epoch, epoches):
        if nepoch > 15:
            trainer.set_learning_rate(4e-5)
        logger.info("Current lr: {}".format(trainer.param_groups[0]["lr"]))
        accu_top1_metric.reset()
        accu_top3_metric.reset()
        ctc_loss_metric.reset()
        alpha_metric.reset()
        epoch_bleu.reset()
        batch_bleu.reset()
        for nbatch, batch in enumerate(tqdm.tqdm(dataloader)):
            batch = [
                Variable(torch.from_numpy(x.asnumpy()).cuda()) for x in batch
            ]
            data, label, label_len = batch
            label = label.long()
            label_len = label_len.long()
            max_len = label_len.max().data.cpu().numpy()
            net.train()
            outputs = net(data, label, max_len)
            predictions, alphas = outputs
            ctc_loss = criterion(predictions, label, label_len)
            loss2 = 1.0 * ((1. - alphas.sum(dim=1))**2).mean()
            ((ctc_loss + loss2) / batch_size).backward()
            for group in trainer.param_groups:
                for param in group['params']:
                    if param.grad is not None:
                        param.grad.data.clamp_(-5, 5)

            trainer.step()
            if nbatch % 10 == 0:
                for n, l in enumerate(label_len):
                    l = int(l.data.cpu().numpy())
                    la = label[n, 1:l].data.cpu().numpy()
                    pred = predictions[n, :(l - 1)].data.cpu().numpy()
                    accu_top3_metric.update(mx.nd.array(la), mx.nd.array(pred))
                    accu_top1_metric.update(mx.nd.array(la), mx.nd.array(pred))
                    epoch_bleu.update(la, predictions[n, :].data.cpu().numpy())
                    batch_bleu.update(la, predictions[n, :].data.cpu().numpy())
                ctc_loss_metric.update(
                    None,
                    preds=mx.nd.array([ctc_loss.data.cpu().numpy()]) /
                    batch_size)
                alpha_metric.update(None,
                                    preds=mx.nd.array(
                                        [loss2.data.cpu().numpy()]))
                if nbatch % log_interval == 0 and nbatch > 0:
                    msg = ','.join([
                        '{}={:.3f}'.format(*metric.get()) for metric in [
                            epoch_bleu, batch_bleu, accu_top1_metric,
                            accu_top3_metric, ctc_loss_metric, alpha_metric
                        ]
                    ])
                    logger.info(
                        '[Epoch {}][Batch {}], Speed: {:.3f} samples/sec, {}'.
                        format(
                            nepoch, nbatch,
                            log_interval * batch_size / (time.time() - btic),
                            msg))
                    btic = time.time()
                    batch_bleu.reset()
                    accu_top1_metric.reset()
                    accu_top3_metric.reset()
                    ctc_loss_metric.reset()
                    alpha_metric.reset()
        net.eval()
        bleu, acc_top1 = validate(net,
                                  gpu_id=gpu_id,
                                  val_loader=val_loader,
                                  train_index2words=dataset.index2words,
                                  val_index2words=val_dataset.index2words)
        save_path = save_prefix + "_weights-%d-bleu-%.4f-%.4f.params" % (
            nepoch, bleu, acc_top1)
        torch.save(net.module.state_dict(), save_path)
        torch.save(trainer.state_dict(), save_path + ".states")
        logger.info("Saved checkpoint to {}.".format(save_path))
Example #26
0
    def __init__(self, args):
        self.args = args
        # image transform
        input_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
        ])
        # dataset and dataloader
        data_kwargs = {'transform': input_transform, 'base_size': args.base_size,
                       'crop_size': args.crop_size}
        trainset = get_segmentation_dataset(
            args.dataset, split=args.train_split, mode='train', root='/mnt/mdisk/xcq/VOCdevkit/',**data_kwargs)
        valset = get_segmentation_dataset(
            args.dataset, split='val', mode='val', root='/mnt/mdisk/xcq/VOCdevkit/',**data_kwargs)
        self.train_data = gluon.data.DataLoader(
            trainset, args.batch_size, shuffle=True, last_batch='rollover',
            num_workers=args.workers)
        self.eval_data = gluon.data.DataLoader(valset, args.test_batch_size,
            last_batch='rollover', num_workers=args.workers)
        # create network
        if args.model_zoo is not None:
            model = get_model(args.model_zoo, pretrained=True)
        else:
            model = get_segmentation_model(model=args.model, dataset=args.dataset,
                                           backbone=args.backbone, norm_layer=args.norm_layer,
                                           norm_kwargs=args.norm_kwargs, aux=args.aux,
                                           crop_size=args.crop_size)
        model.cast(args.dtype)
        print(model)
        self.net = DataParallelModel(model, args.ctx, args.syncbn)
        self.evaluator = DataParallelModel(SegEvalModel(model), args.ctx)
        # resume checkpoint if needed
        if args.resume is not None:
            if os.path.isfile(args.resume):
                model.load_parameters(args.resume, ctx=args.ctx)
            else:
                raise RuntimeError("=> no checkpoint found at '{}'" \
                    .format(args.resume))
        # create criterion
        criterion = MixSoftmaxCrossEntropyLoss(args.aux, aux_weight=args.aux_weight)
        self.criterion = DataParallelCriterion(criterion, args.ctx, args.syncbn)
        # optimizer and lr scheduling
        self.lr_scheduler = LRScheduler(mode='poly', base_lr=args.lr,
                                        nepochs=args.epochs,
                                        iters_per_epoch=len(self.train_data),
                                        power=0.9)
        kv = mx.kv.create(args.kvstore)
        optimizer_params = {'lr_scheduler': self.lr_scheduler,
                            'wd':args.weight_decay,
                            'momentum': args.momentum,
                            'learning_rate': args.lr
                           }
        if args.dtype == 'float16':
            optimizer_params['multi_precision'] = True

        if args.no_wd:
            for k, v in self.net.module.collect_params('.*beta|.*gamma|.*bias').items():
                v.wd_mult = 0.0

        self.optimizer = gluon.Trainer(self.net.module.collect_params(), 'sgd',
                                       optimizer_params, kvstore = kv)
        # evaluation metrics
        self.metric = gluoncv.utils.metrics.SegmentationMetric(trainset.num_class)
Example #27
0
train_path = os.path.join(dataset_path, 'train')

ctx = [mx.cpu()]
model_name = 'ResNet50_v2'
tuned_net = get_model(model_name, pretrained=True)
with tuned_net.name_scope():
    tuned_net.output = nn.Dense(2)
tuned_net.output.initialize(init.Xavier(), ctx=ctx)
tuned_net.collect_params().reset_ctx(ctx)
tuned_net.hybridize()
# tuned_net.load_parameters('training_logs/ttl_v4__resnset20/params/two_traffic_lights_v4__resnet20_v2.params')
tuned_net.load_parameters('rejector1_009__resnet20_v2.params')

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
jitter_param = 0.4
lighting_param = 0.1
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(input_size),
    transforms.RandomFlipLeftRight(),
    transforms.RandomColorJitter(brightness=jitter_param,
                                 contrast=jitter_param,
                                 saturation=jitter_param),
    transforms.RandomLighting(lighting_param),
    transforms.ToTensor(), normalize
])
Example #28
0
                    help='Path to save export files.')
parser.add_argument('--dtype',
                    type=str,
                    default='float32',
                    help='data type for training. default is float32')
parser.add_argument('--ctx', type=str, default="0", help='Use GPUs to train.')
parser.add_argument('--hybrid',
                    action='store_true',
                    help='Whether to use hybrid.')
opt = parser.parse_args()

assert opt.batch_size % len(
    opt.ctx.split(",")) == 0, "Per batch on each GPU must be same."
assert opt.dtype in ('float32', 'float16'), "Data type only support FP16/FP32."

transform_test = transforms.Compose([transforms.ToTensor()])


def transform_test_flip(data, isf=False):
    flip_data = nd.flip(data, axis=1)
    if isf:
        data = nd.transpose(data, (2, 0, 1)).astype('float32')
        flip_data = nd.transpose(flip_data, (2, 0, 1)).astype('float32')
        return data, flip_data
    return transform_test(data), transform_test(flip_data)


export_path = os.path.dirname(
    opt.model_params) if opt.export_path == '' else opt.export_path
ctx = [mx.gpu(int(i)) for i in opt.ctx.split(",")]
def transform():
    transformer = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(0.13, 0.31)])
    return transformer
Example #30
0
    save_dir = opt.save_dir
    makedirs(save_dir)
else:
    save_dir = ''
    save_period = 0

plot_path = opt.save_plot_dir

logging.basicConfig(level=logging.INFO)
logging.info(opt)

transform_train = transforms.Compose([
    transforms.Resize(32),
    transforms.RandomResizedCrop(32),
    transforms.RandomFlipLeftRight(),
    transforms.RandomColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.RandomLighting(0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

transform_test = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

def test(ctx, val_data):
    metric = mx.metric.Accuracy()
    for i, batch in enumerate(val_data):
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)