# # 2. Randomly crop the image and resize it to 224x224 # 3. Randomly flip the image horizontally # 4. Randomly jitter color and add noise # 5. Transpose the data from height*width*num_channels to num_channels*height*width, and map values from [0, 255] to [0, 1] # 6. Normalize with the mean and standard deviation from the ImageNet dataset. # jitter_param = 0.4 lighting_param = 0.1 transform_train = transforms.Compose([ transforms.RandomResizedCrop( 224), #Randomly crop the image and resize it to 224x224 transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) ################################################################################ # With the data augmentation functions, we can define our data loaders:
# feat4 = self.upsample(F, self.conv4(self.pool(F, x, 4)), h, w) # return F.concat(x, feat1, feat2, feat3, feat4, dim=1) # # PSPNet model is provided in :class:`gluoncv.model_zoo.PSPNet`. To get # PSP model using ResNet50 base network for ADE20K dataset: model = gluoncv.model_zoo.get_psp(dataset='ade20k', backbone='resnet50', pretrained=False) print(model) ############################################################################## # Dataset and Data Augmentation # ----------------------------- # # image transform for color normalization from mxnet.gluon.data.vision import transforms input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) ############################################################################## # We provide semantic segmentation datasets in :class:`gluoncv.data`. # For example, we can easily get the ADE20K dataset: trainset = gluoncv.data.ADE20KSegmentation(split='train', transform=input_transform) print('Training images:', len(trainset)) # set batch_size = 2 for toy example batch_size = 2 # Create Training Loader train_data = gluon.data.DataLoader( trainset, batch_size, shuffle=True, last_batch='rollover', num_workers=batch_size) ##############################################################################
def train_cifar(args, reporter): print('args', args) batch_size = args.batch_size num_gpus = args.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = args.num_workers model_name = args.model net = get_model(model_name, classes=10) transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) def test(ctx, val_data): metric = mx.metric.Accuracy() for i, batch in enumerate(val_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) outputs = [net(X) for X in data] metric.update(label, outputs) return metric.get() def train(epochs, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] net.initialize(mx.init.Xavier(), ctx=ctx) train_data = gluon.data.DataLoader(gluon.data.vision.CIFAR10( train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader(gluon.data.vision.CIFAR10( train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) lr_scheduler = LRScheduler(mode='cosine', base_lr=args.lr, nepochs=args.epochs, iters_per_epoch=len(train_data)) trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'lr_scheduler': lr_scheduler, 'wd': args.wd, 'momentum': args.momentum }) metric = mx.metric.Accuracy() train_metric = mx.metric.Accuracy() loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() iteration = 0 best_val_score = 0 start_epoch = 0 for epoch in range(start_epoch, epochs): tic = time.time() train_metric.reset() metric.reset() train_loss = 0 num_batch = len(train_data) alpha = 1 for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) with mx.autograd.record(): output = [net(X) for X in data] loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)] for l in loss: l.backward() trainer.step(batch_size) train_loss += sum([l.sum().asscalar() for l in loss]) train_metric.update(label, output) name, acc = train_metric.get() iteration += 1 train_loss /= batch_size * num_batch name, acc = train_metric.get() name, val_acc = test(ctx, val_data) reporter(epoch=epoch, accuracy=val_acc) train(args.epochs, context)
plot_name = opt.save_plot_dir logging_handlers = [logging.StreamHandler()] if opt.logging_dir: logging_dir = opt.logging_dir makedirs(logging_dir) logging_handlers.append( logging.FileHandler('%s/train_cifar10_%s.log' % (logging_dir, model_name))) logging.basicConfig(level=logging.INFO, handlers=logging_handlers) logging.info(opt) transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) def label_transform(label, classes): ind = label.astype('int') res = nd.zeros((ind.shape[0], classes), ctx=label.context) res[nd.arange(ind.shape[0], ctx=label.context), ind] = 1 return res
from myNet import resnet18 from mxnet import cpu, gpu from mxnet import ndarray as nd from mxnet.test_utils import list_gpus import pandas as pd BATCH_SIZE = 1 MODEL_PATH = 'resnet18.params' if list_gpus(): CTX = gpu() else: CTX = cpu() transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) test_dataloader = DataLoader(_test_data.transform_first(transform_test), batch_size=BATCH_SIZE, shuffle=True, last_batch='keep') net = resnet18(10) net.load_parameters(MODEL_PATH, ctx=CTX) # net.initialize(ctx=CTX) confusion_matrix = nd.zeros((10, 10)) print("====>make confusion matrix") for data, label in test_dataloader:
def predict(self, X, input_size=224, crop_ratio=0.875, set_prob_thresh=0.001, plot=False): """Predict class-index and associated class probability for each image in a given dataset (or just a single image). Parameters ---------- X : str or :class:`autogluon.task.ImageClassification.Dataset` or list of `autogluon.task.ImageClassification.Dataset` If str, should be path to the input image (when we just want to predict on single image). If class:`autogluon.task.ImageClassification.Dataset`, should be dataset of multiple images in same format as training dataset. If list of `autogluon.task.ImageClassification.Dataset`, should be a set of test dataset with different scales of origin images. input_size : int Size of the images (pixels). plot : bool Whether to plot the image being classified. set_prob_thresh: float Results with probability below threshold are set to 0 by default. Examples -------- >>> import autogluon.core as ag >>> from autogluon.vision import ImageClassification as task >>> train_data = task.Dataset(train_path='~/data/train') >>> classifier = task.fit(train_data, >>> nets=ag.space.Categorical['resnet18_v1', 'resnet34_v1'], >>> time_limits=600, ngpus_per_trial=1, num_trials=4) >>> test_data = task.Dataset('~/data/test', train=False) >>> class_index, class_probability = classifier.predict('example.jpg') """ input_size = self.model.input_size if hasattr( self.model, 'input_size') else input_size resize = int(math.ceil(input_size / crop_ratio)) transform_size = transforms.Compose([ transforms.Resize(resize), transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) def predict_img(img, ensemble=False): proba = self.predict_proba(img) if ensemble: return proba else: ind = mx.nd.argmax(proba, axis=1).astype('int') idx = mx.nd.stack( mx.nd.arange(proba.shape[0], ctx=proba.context), ind.astype('float32')) probai = mx.nd.gather_nd(proba, idx) return ind, probai, proba def avg_prediction(different_dataset, threshold=0.001): result = defaultdict(list) inds, probas, probals_all = [], [], [] for i in range(len(different_dataset)): for j in range(len(different_dataset[0])): result[j].append(different_dataset[i][j]) for c in result.keys(): proba_all = sum([*result[c]]) / len(different_dataset) proba_all = (proba_all >= threshold) * proba_all ind = mx.nd.argmax(proba_all, axis=1).astype('int') idx = mx.nd.stack( mx.nd.arange(proba_all.shape[0], ctx=proba_all.context), ind.astype('float32')) proba = mx.nd.gather_nd(proba_all, idx) inds.append(ind.asscalar()) probas.append(proba.asnumpy()) probals_all.append(proba_all.asnumpy().flatten()) return inds, probas, probals_all def predict_imgs(X): if isinstance(X, list): different_dataset = [] for i, x in enumerate(X): proba_all_one_dataset = [] tbar = tqdm(range(len(x.items))) for j, x_item in enumerate(x): tbar.update(1) proba_all = predict_img(x_item[0], ensemble=True) tbar.set_description( 'ratio:[%d],The input picture [%d]' % (i, j)) proba_all_one_dataset.append(proba_all) different_dataset.append(proba_all_one_dataset) inds, probas, probals_all = avg_prediction( different_dataset, threshold=set_prob_thresh) else: inds, probas, probals_all = [], [], [] tbar = tqdm(range(len(X.items))) for i, x in enumerate(X): tbar.update(1) ind, proba, proba_all = predict_img(x[0]) tbar.set_description( 'The input picture [%d] is classified as [%d], with probability %.2f ' % (i, ind.asscalar(), proba.asscalar())) inds.append(ind.asscalar()) probas.append(proba.asnumpy()) probals_all.append(proba_all.asnumpy().flatten()) return inds, probas, probals_all if isinstance(X, str) and os.path.isfile(X): img = mx.image.imread(filename=X) if plot: plt.imshow(img.asnumpy()) plt.show() img = transform_size(img) return predict_img(img) if isinstance(X, AutoGluonObject): X = X.init() return predict_imgs(X) if isinstance(X, list) and len(X) > 1: X_group = [] for X_item in X: X_item = X_item.init() X_group.append(X_item) return predict_imgs(X_group)
gpu = args.gpu_id lr = args.lr #ctx = mx.cpu() ctx = mx.gpu(gpu) if not os.path.exists('output/snapshots'): os.makedirs('output/snapshots') model = hopenet.Hopenet(model_zoo.vision.BottleneckV1, [3, 4, 6, 3], 66) # ResNet50 structure model.hybridize() print('Loading data.') transformations = transforms.Compose([transforms.Resize(240), transforms.RandomResizedCrop(224), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))]) if args.dataset == 'Pose_300W_LP': pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations) elif args.dataset == 'Pose_300W_LP_random_ds': pose_dataset = datasets.Pose_300W_LP_random_ds(args.data_dir, args.filename_list, transformations) elif args.dataset == 'Synhead': pose_dataset = datasets.Synhead(args.data_dir, args.filename_list, transformations) elif args.dataset == 'AFLW2000': pose_dataset = datasets.AFLW2000(args.data_dir, args.filename_list, transformations) elif args.dataset == 'BIWI': pose_dataset = datasets.BIWI(args.data_dir, args.filename_list, transformations) elif args.dataset == 'AFLW': pose_dataset = datasets.AFLW(args.data_dir, args.filename_list, transformations) elif args.dataset == 'AFLW_aug':
def get_dataloader(module_name, module_args, num_label): train_transfroms = transforms.Compose( [transforms.RandomColorJitter(brightness=0.5), transforms.ToTensor()]) val_transfroms = transforms.ToTensor() dataset_args = module_args['dataset'] dataset_args['num_label'] = num_label # 创建数据集 train_data_path = dataset_args.pop('train_data_path') train_data_ratio = dataset_args.pop('train_data_ratio') val_data_path = dataset_args.pop('val_data_path') if module_name == 'ImageDataset': train_data_list, val_data_list = get_datalist( train_data_path, val_data_path, module_args['loader']['validation_split']) elif module_name == 'LmdbDataset': train_data_list = train_data_path val_data_list = val_data_path else: raise Exception('current only support ImageDataset and LmdbDataset') train_dataset_list = [] for train_data in train_data_list: train_dataset_list.append( get_dataset(data_list=train_data, module_name=module_name, phase='train', dataset_args=dataset_args)) if len(train_dataset_list) > 1: train_loader = dataset.Batch_Balanced_Dataset( dataset_list=train_dataset_list, ratio_list=train_data_ratio, module_args=module_args, dataset_transfroms=train_transfroms, phase='train') elif len(train_dataset_list) == 1: train_loader = DataLoader( dataset=train_dataset_list[0].transform_first(train_transfroms), batch_size=module_args['loader']['train_batch_size'], shuffle=module_args['loader']['shuffle'], last_batch='rollover', num_workers=module_args['loader']['num_workers']) train_loader.dataset_len = len(train_dataset_list[0]) else: raise Exception('no images found') if len(val_data_list): val_dataset = get_dataset(data_list=val_data_list, module_name=module_name, phase='test', dataset_args=dataset_args) val_loader = DataLoader( dataset=val_dataset.transform_first(val_transfroms), batch_size=module_args['loader']['val_batch_size'], shuffle=module_args['loader']['shuffle'], last_batch='keep', num_workers=module_args['loader']['num_workers']) val_loader.dataset_len = len(val_dataset) else: val_loader = None return train_loader, val_loader
# which is equal to Normalize a tensorized image in the range [0, 1) # with mean value 0.5=127.5/225 and std value 128/255 # the class FaceTypeNormalizeTransform and face_type_normalize is almost equal # the tiny difference is due to numerical calculation class FaceTypeNormalizeTransform(nn.HybridBlock): def __init__(self): super(FaceTypeNormalizeTransform, self).__init__() def hybrid_forward(self, F, x): return (x*255-127.5)*0.0078125 face_type_normalize = transforms.Normalize(0.5, 128/255) transform_test = transforms.Compose([ transforms.ToTensor() ]) _transform_train = transforms.Compose([ transforms.RandomBrightness(0.3), transforms.RandomContrast(0.3), transforms.RandomSaturation(0.3), transforms.RandomFlipLeftRight(), transforms.ToTensor() ]) def transform_train(data, label): im = _transform_train(data) return im, label
import mxnet.gluon as gl from mxnet import autograd, image from mxnet.gluon import nn from mxnet.gluon.data import DataLoader from mxnet.gluon.data.vision import transforms, MNIST from mxnet.gluon.nn import Sequential from mxnet.ndarray import NDArray transform: Sequential = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=0.5, std=0.5) ]) # Download and load the training data trainSet: MNIST = MNIST('~/.mxnet/MNIST_data/', train=True).transform_first(transform) trainLoader: DataLoader = DataLoader(trainSet, batch_size=64, shuffle=True) # Build a feed-forward network model = nn.Sequential() # with model.name_scope(): model.add( nn.Dense(128, activation='relu'), # nn.Activation('relu'), nn.Dense(64, activation='relu'), nn.Dense(10) ) model.initialize() criterion = gl.loss.SoftmaxCrossEntropyLoss() optimizer = gl.Trainer(model.collect_params(), 'sgd', {'learning_rate': 0.01})
def train(): logging.info('Start Training for Task: %s\n' % (task)) # Initialize the net with pretrained model pretrained_net = gluon.model_zoo.vision.get_model(model_name, pretrained=True) finetune_net = gluon.model_zoo.vision.get_model(model_name, classes=task_num_class) finetune_net.features = pretrained_net.features finetune_net.output.initialize(init.Xavier(), ctx=ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() # Carefully set the 'scale' parameter to make the 'muti-scale train' and 'muti-scale test' train_transform = transforms.Compose([ transforms.RandomResizedCrop(448, scale=(0.76, 1.0), ratio=(0.999, 1.001)), transforms.RandomFlipLeftRight(), transforms.RandomBrightness(0.20), #transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, # saturation=jitter_param), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_dataset = cutomdataset.custom_dataset2(root='./data2/crop_lapel2', filename=os.path.join( 'data2/', task + '_train.txt')) train_data = gluon.data.DataLoader( train_dataset.transform_first(train_transform), batch_size=batch_size, shuffle=True, num_workers=num_workers, last_batch='discard') val_transform = transforms.Compose([ transforms.Resize(480), transforms.CenterCrop(448), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) val_dataset = cutomdataset.custom_dataset2(root='./data2/crop_lapel2', filename=os.path.join( 'data2/', task + '_val.txt')) val_data = gluon.data.DataLoader( val_dataset.transform_first(val_transform), batch_size=batch_size, shuffle=False, num_workers=num_workers) # Define Trainer use ADam to make mdoel converge quickly trainer = gluon.Trainer(finetune_net.collect_params(), 'adam', {'learning_rate': lr}) metric = mx.metric.Accuracy() L = gluon.loss.SoftmaxCrossEntropyLoss() lr_counter = 0 num_batch = len(train_data) # Start Training best_AP = 0 best_acc = 0 for epoch in range(epochs): train_acc = 0. #### Load the best model when go to the next training stage if epoch == lr_steps[lr_counter]: finetune_net.collect_params().load(best_path, ctx=ctx) trainer.set_learning_rate(trainer.learning_rate * lr_factor) lr_counter += 1 tic = time.time() train_loss = 0 metric.reset() AP = 0. AP_cnt = 0 for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) with ag.record(): outputs = [finetune_net(X) for X in data] loss = [] ###### Handle 'm' label by soft-softmax function ###### for yhat, y in zip(outputs[0], label[0]): loss_1 = 0 if y[1] == 99: # only have y [4,0,0,0,0] loss_1 += L(yhat, y[0]) elif y[2] == 99: #have one m [4,1,0,0,0] loss_1 = 0.8 * L(yhat, y[0]) + 0.2 * L(yhat, y[1]) elif y[3] == 99: #have two m [4,1,3,0,0] loss_1 = 0.7 * L(yhat, y[0]) + 0.15 * L( yhat, y[1]) + 0.15 * L(yhat, y[2]) else: # have many m [4,1,3,2,0] loss_1 = 0.6 * L(yhat, y[0]) + 0.13 * L( yhat, y[1]) + 0.13 * L(yhat, y[2]) + 0.13 * L( yhat, y[3]) loss += [loss_1] #loss = [L(yhat, y) for yhat, y in zip(outputs, label) # for l in loss: # l.backward() ag.backward(loss) # for soft-softmax trainer.step(batch_size) train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss) #train_acc += accuracy(outputs, label) metric.update([label[0][:, 0]], outputs) #ap, cnt = calculate_ap(label, outputs) #AP += ap #AP_cnt += cnt #progressbar(i, num_batch-1) #train_map = AP / AP_cnt _, train_acc = metric.get() train_loss /= num_batch val_acc, val_loss = validate(finetune_net, val_data, ctx) logging.info( '[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f' % (epoch, train_acc, train_loss, val_acc, val_loss, time.time() - tic, trainer.learning_rate)) f_val.writelines( '[Epoch %d] Train-acc: %.3f, , loss: %.3f | Val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' % (epoch, train_acc, train_loss, val_acc, val_loss, time.time() - tic, trainer.learning_rate)) ### Save the best model every stage if val_acc > best_acc: #best_AP = this_AP best_acc = val_acc best_path = '/usr/data/fashionai/models/%s_%s_%s_%s.params' % ( task, model_name, epoch, best_acc) finetune_net.collect_params().save(best_path) logging.info('\n') finetune_net.collect_params().load(best_path, ctx=ctx) f_val.writelines( 'Best val acc is :[Epoch %d] Train-acc: %.3f, loss: %.3f | Best-val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' % (epoch, train_acc, train_loss, best_acc, val_loss, time.time() - tic, trainer.learning_rate)) return (finetune_net)
# We can easily tell that they are photos of the same thing. # # |image-golden-bridge| # # We want to teach this invariance to our model, by playing "augmenting" # input image. Our augmentation transforms the image with # resizing, cropping, flipping and other techniques. # # With ``Gluon``, we can create our transform function as following: transform_train = transforms.Compose([ # Randomly crop an area and resize it to be 32x32, then pad it to be 40x40 gcv_transforms.RandomCrop(32, pad=4), # Randomly flip the image horizontally transforms.RandomFlipLeftRight(), # Transpose the image from height*width*num_channels to num_channels*height*width # and map values from [0, 255] to [0,1] transforms.ToTensor(), # Normalize the image with mean and standard deviation calculated across all images transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) ################################################################ # You may have noticed that most of the operations are randomized. This in effect # increases the number of different images the model sees during training. # The more data we have, the better our model generalizes over # unseen images. # # On the other hand, when making prediction, we would like to remove all # random operations in order to get a deterministic result. The transform # function for prediction is:
min_random_area = 0.08 jitter_param = 0.4 lighting_param = 0.1 transform_train = transforms.Compose([ # transforms.RandomResizedCrop(resize, # scale=(min_random_area, max_random_area), # ratio=(min_aspect_ratio, max_aspect_ratio)), # Randomly flip the image horizontally transforms.RandomFlipLeftRight(), transforms.RandomBrightness(brightness=jitter_param), transforms.RandomSaturation(saturation=jitter_param), transforms.RandomHue(hue=jitter_param), transforms.RandomLighting(lighting_param), # Randomly crop an area and resize it to be 32x32, then pad it to be 40x40 gcv_transforms.RandomCrop(32, pad=4), # Transpose the image from height*width*num_channels to num_channels*height*width # and map values from [0, 255] to [0,1] transforms.ToTensor(), # Normalize the image with mean and standard deviation calculated across all images transforms.Normalize(mean_rgb, std_rgb), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean_rgb, std_rgb),
from mxnet.gluon import data as gdata, loss as gloss, Trainer, nn from mxnet.gluon.data.vision import transforms import mxnet from mxnet import autograd, init from gluoncv import model_zoo, utils import time transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) root = r'../resource/minc-2500-tiny/minc-2500-tiny/' train_path = f'{root}train' val_path = f'{root}val' test_path = f'{root}test' batch_size = 8 classes = 23 epochs = 16
args = parser.parse_args() ctx = mx.cpu() num_outputs = 10 jitter_param = 0.4 lighting_param = 0.1 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] training_transformer = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), transforms.Normalize(mean, std) ]) validation_transformer = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean, std) ]) mean_img = mx.nd.stack(*[mx.nd.full((224, 224), m) for m in mean]) std_img = mx.nd.stack(*[mx.nd.full((224, 224), s) for s in std]) #mx.nd.save('mean_std_224.nd', {"mean_img": mean_img, "std_img": std_img})
def get_data_loader(opt, batch_size, num_workers, logger): data_dir = opt.data_dir val_data_dir = opt.val_data_dir normalize = video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) scale_ratios = [float(i) for i in opt.scale_ratios.split(',')] input_size = opt.input_size def batch_fn(batch, ctx): if opt.num_segments > 1: data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False, multiplier=opt.num_segments) else: data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) return data, label transform_train = transforms.Compose([ video.VideoMultiScaleCrop(size=(input_size, input_size), scale_ratios=scale_ratios), video.VideoRandomHorizontalFlip(), video.VideoToTensor(), normalize ]) transform_test = transforms.Compose([ video.VideoCenterCrop(size=input_size), video.VideoToTensor(), normalize ]) if opt.dataset == 'kinetics400': train_dataset = kinetics400.classification.Kinetics400( setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, target_width=input_size, target_height=input_size, num_segments=opt.num_segments, transform=transform_train) val_dataset = kinetics400.classification.Kinetics400( setting=opt.val_list, root=val_data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, target_width=input_size, target_height=input_size, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'ucf101': train_dataset = ucf101.classification.UCF101( setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, target_width=input_size, target_height=input_size, num_segments=opt.num_segments, transform=transform_train) val_dataset = ucf101.classification.UCF101( setting=opt.val_list, root=data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, target_width=input_size, target_height=input_size, num_segments=opt.num_segments, transform=transform_test) else: logger.info('Dataset %s is not supported yet.' % (opt.dataset)) logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset))) if opt.num_segments > 1: train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn) else: train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_data, val_data, batch_fn
def main(): opt = parse_args() print(opt) # Garbage collection, default threshold is (700, 10, 10). # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading. gc.set_threshold(100, 5, 5) # set env num_gpus = opt.num_gpus batch_size = opt.batch_size batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) # get model classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, num_segments=opt.num_segments) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) print('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: print('Pre-trained model is successfully loaded from the model zoo.') # get data if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) elif opt.three_crop: transform_test = transforms.Compose([ video.VideoThreeCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) else: transform_test = video.VideoGroupValTransform( size=opt.input_size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if opt.dataset == 'ucf101': val_dataset = ucf101.classification.UCF101( setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'kinetics400': val_dataset = kinetics400.classification.Kinetics400( setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) else: logger.info('Dataset %s is not supported yet.' % (opt.dataset)) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), batchify_fn=tsn_mp_batchify_fn, last_batch='discard') print('Load %d test samples.' % len(val_dataset)) start_time = time.time() acc_top1_val, acc_top5_val = test(context, val_data, opt, net) end_time = time.time() print('Test accuracy: acc-top1=%f acc-top5=%f' % (acc_top1_val * 100, acc_top5_val * 100)) print('Total evaluation time is %4.2f minutes' % ((end_time - start_time) / 60))
def main(): opt = parse_args() makedirs(opt.save_dir) filehandler = logging.FileHandler(os.path.join(opt.save_dir, opt.logging_file)) streamhandler = logging.StreamHandler() logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) gc.set_threshold(100, 5, 5) # set env if opt.gpu_id == -1: context = mx.cpu() else: gpu_id = opt.gpu_id context = mx.gpu(gpu_id) # get data preprocess image_norm_mean = [0.485, 0.456, 0.406] image_norm_std = [0.229, 0.224, 0.225] if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 10 elif opt.three_crop: transform_test = transforms.Compose([ video.VideoThreeCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize(image_norm_mean, image_norm_std) ]) opt.num_crop = 3 else: transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=image_norm_mean, std=image_norm_std) opt.num_crop = 1 # get model if opt.use_pretrained and len(opt.hashtag) > 0: opt.use_pretrained = opt.hashtag classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, num_segments=opt.num_segments, num_crop=opt.num_crop) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) logger.info('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: logger.info('Pre-trained model is successfully loaded from the model zoo.') logger.info("Successfully built model {}".format(model_name)) # get classes list, if we are using a pretrained network from the model_zoo classes = None if opt.use_pretrained: if "kinetics400" in model_name: classes = Kinetics400Attr().classes elif "ucf101" in model_name: classes = UCF101Attr().classes elif "hmdb51" in model_name: classes = HMDB51Attr().classes elif "sthsth" in model_name: classes = SomethingSomethingV2Attr().classes # get data anno_file = opt.data_list f = open(anno_file, 'r') data_list = f.readlines() logger.info('Load %d video samples.' % len(data_list)) # build a pseudo dataset instance to use its children class methods video_utils = VideoClsCustom(root=opt.data_dir, setting=opt.data_list, num_segments=opt.num_segments, num_crop=opt.num_crop, new_length=opt.new_length, new_step=opt.new_step, new_width=opt.new_width, new_height=opt.new_height, video_loader=opt.video_loader, use_decord=opt.use_decord, slowfast=opt.slowfast, slow_temporal_stride=opt.slow_temporal_stride, fast_temporal_stride=opt.fast_temporal_stride, lazy_init=True) start_time = time.time() for vid, vline in enumerate(data_list): video_path = vline.split()[0] video_name = video_path.split('/')[-1] if opt.need_root: video_path = os.path.join(opt.data_dir, video_path) video_data = read_data(opt, video_path, transform_test, video_utils) video_input = video_data.as_in_context(context) pred = net(video_input.astype(opt.dtype, copy=False)) if opt.save_logits: logits_file = '%s_%s_logits.npy' % (model_name, video_name) np.save(os.path.join(opt.save_dir, logits_file), pred.asnumpy()) pred_label = np.argmax(pred.asnumpy()) if opt.save_preds: preds_file = '%s_%s_preds.npy' % (model_name, video_name) np.save(os.path.join(opt.save_dir, preds_file), pred_label) # Try to report a text label instead of the number. if classes: pred_label = classes[pred_label] logger.info('%04d/%04d: %s is predicted to class %s' % (vid, len(data_list), video_name, pred_label)) end_time = time.time() logger.info('Total inference time is %4.2f minutes' % ((end_time - start_time) / 60))
num_gpu = 1 ctx = [mx.gpu(i) for i in range(num_gpu)] model.initialize(mx.init.Xavier(), ctx=ctx) #print model data flow #x = np.random.uniform(size=(1, 1, 100, 100)) x = np.random.uniform(size=(1, 3, 100, 100), ctx=mx.gpu(0)) for layer in model: x = layer(x) print(f'Layer : {layer.name}, output shape : {x.shape}') #set up data augmentation, transforms and data loaders transform_train = transforms.Compose([ transforms.RandomBrightness(0.2), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) train_folder = gluon.data.vision.ImageFolderDataset( 'data/fruits/train').transform_first(transform_train) test_folder = gluon.data.vision.ImageFolderDataset( 'data/fruits/test').transform_first(transform_test) train_data = gluon.data.DataLoader(train_folder, batch_size=batch_size, shuffle=True)
def hpatches_val_transform(ds_metainfo): assert (ds_metainfo is not None) return transforms.Compose([transforms.ToTensor()])
cv2.imread(os.path.join(self.image_root, self.images[item]))[:, :, ::-1]) def __len__(self): return len(self.images) if __name__ == '__main__': gpu_id = 8 net = Encoder() net.collect_params().reset_ctx(mx.gpu(gpu_id)) from mxnet.gluon.data.vision import transforms transform_fn = transforms.Compose([ LeftTopPad(dest_shape=(256, 256)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) dataset = DirectoryDataSet(image_root="/data3/zyx/yks/coco2017/train2017", transforms=transform_fn) loader = DataLoader(dataset=dataset, batch_size=16, shuffle=True, num_workers=8, pin_memory=True) f = h5py.File('output/train2017.h5', 'w') for batch in tqdm.tqdm(loader): indices, data = batch outputs = net(data.as_in_context(mx.gpu(gpu_id))).asnumpy() indices = indices.asnumpy() for idx, output in zip(indices, outputs):
return args if __name__ == '__main__': opt = parse_args() # context list if opt.gpu_id == '-1': ctx = mx.cpu() else: ctx = mx.gpu(int(opt.gpu_id.strip())) netG = SRGenerator() netG.load_parameters(opt.pretrained) netG.collect_params().reset_ctx(ctx) image_list = [x.strip() for x in opt.images.split(',') if x.strip()] transform_fn = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) ax = None for image_path in image_list: img = image.imread(image_path) img = transform_fn(img) img = img.expand_dims(0).as_in_context(ctx) output = netG(img) predict = mx.nd.squeeze(output) predict = ((predict.transpose([1, 2, 0]).asnumpy() * 0.5 + 0.5) * 255).astype('uint8') plt.imshow(predict) plt.show()
net.hybridize(static_alloc=True, static_shape=True) else: net.hybridize() normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) """ Aligning with TF implementation, the default crop-input ratio set as 0.875; Set the crop as ceil(input-size/ratio) """ crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875 resize = int(math.ceil(input_size / crop_ratio)) transform_test = transforms.Compose([ transforms.Resize(resize, keep_ratio=True), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize ]) if not opt.benchmark: if not opt.rec_dir: val_data = gluon.data.DataLoader(imagenet.classification.ImageNet( opt.data_dir, train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) else: imgrec = os.path.join(opt.rec_dir, 'val.rec') imgidx = os.path.join(opt.rec_dir, 'val.idx') val_data = mx.io.ImageRecordIter(path_imgrec=imgrec, path_imgidx=imgidx,
def train_cifar10(config): args = config.pop("args") vars(args).update(config) np.random.seed(args.seed) random.seed(args.seed) mx.random.seed(args.seed) # Set Hyper-params batch_size = args.batch_size * max(args.num_gpus, 1) ctx = [mx.gpu(i) for i in range(args.num_gpus)] if args.num_gpus > 0 else [mx.cpu()] # Define DataLoader transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), ]) train_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch="discard", num_workers=args.num_workers, ) test_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=args.num_workers, ) # Load model architecture and Initialize the net with pretrained model finetune_net = get_model(args.model, pretrained=True) with finetune_net.name_scope(): finetune_net.fc = nn.Dense(args.classes) finetune_net.fc.initialize(init.Xavier(), ctx=ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() # Define trainer trainer = gluon.Trainer( finetune_net.collect_params(), "sgd", { "learning_rate": args.lr, "momentum": args.momentum, "wd": args.wd }, ) L = gluon.loss.SoftmaxCrossEntropyLoss() metric = mx.metric.Accuracy() def train(epoch): for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) with ag.record(): outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] for ls in loss: ls.backward() trainer.step(batch_size) mx.nd.waitall() def test(): test_loss = 0 for i, batch in enumerate(test_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] test_loss += sum(ls.mean().asscalar() for ls in loss) / len(loss) metric.update(label, outputs) _, test_acc = metric.get() test_loss /= len(test_data) return test_loss, test_acc for epoch in range(1, args.epochs + 1): train(epoch) test_loss, test_acc = test() session.report({"mean_loss": test_loss, "mean_accuracy": test_acc})
def main(): epoches = 32 gpu_id = 7 ctx_list = [mx.gpu(x) for x in [7, 8]] log_interval = 100 batch_size = 32 start_epoch = 0 # trainer_resume = resume + ".states" if resume is not None else None trainer_resume = None resume = None from mxnet.gluon.data.vision import transforms transform_fn = transforms.Compose([ LeftTopPad(dest_shape=(256, 256)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) dataset = CaptionDataSet( image_root="/data3/zyx/yks/coco2017/train2017", annotation_path= "/data3/zyx/yks/coco2017/annotations/captions_train2017.json", transforms=transform_fn, feature_hdf5="output/train2017.h5") val_dataset = CaptionDataSet( image_root="/data3/zyx/yks/coco2017/val2017", annotation_path= "/data3/zyx/yks/coco2017/annotations/captions_val2017.json", words2index=dataset.words2index, index2words=dataset.index2words, transforms=transform_fn, feature_hdf5="output/val2017.h5") dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True, last_batch="discard") val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True) num_words = dataset.words_count # set up logger save_prefix = "output/res50_" logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) log_file_path = save_prefix + '_train.log' log_dir = os.path.dirname(log_file_path) if log_dir and not os.path.exists(log_dir): os.makedirs(log_dir) fh = logging.FileHandler(log_file_path) logger.addHandler(fh) net = EncoderDecoder(num_words=num_words, test_max_len=val_dataset.max_len).cuda() for name, p in net.named_parameters(): if "bias" in name: p.data.zero_() else: p.data.normal_(0, 0.01) print(name) net = torch.nn.DataParallel(net) if resume is not None: net.collect_params().load(resume, allow_missing=True, ignore_extra=True) logger.info("Resumed form checkpoint {}.".format(resume)) trainer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, net.parameters()), lr=4e-4) criterion = Criterion() accu_top3_metric = TopKAccuracy(top_k=3) accu_top1_metric = Accuracy(name="batch_accu") ctc_loss_metric = Loss(name="ctc_loss") alpha_metric = Loss(name="alpha_loss") batch_bleu = BleuMetric(name="batch_bleu", pred_index2words=dataset.index2words, label_index2words=dataset.index2words) epoch_bleu = BleuMetric(name="epoch_bleu", pred_index2words=dataset.index2words, label_index2words=dataset.index2words) btic = time.time() logger.info(batch_size) logger.info(num_words) logger.info(len(dataset.words2index)) logger.info(len(dataset.index2words)) logger.info(dataset.words2index["<PAD>"]) logger.info(val_dataset.words2index["<PAD>"]) logger.info(len(val_dataset.words2index)) for nepoch in range(start_epoch, epoches): if nepoch > 15: trainer.set_learning_rate(4e-5) logger.info("Current lr: {}".format(trainer.param_groups[0]["lr"])) accu_top1_metric.reset() accu_top3_metric.reset() ctc_loss_metric.reset() alpha_metric.reset() epoch_bleu.reset() batch_bleu.reset() for nbatch, batch in enumerate(tqdm.tqdm(dataloader)): batch = [ Variable(torch.from_numpy(x.asnumpy()).cuda()) for x in batch ] data, label, label_len = batch label = label.long() label_len = label_len.long() max_len = label_len.max().data.cpu().numpy() net.train() outputs = net(data, label, max_len) predictions, alphas = outputs ctc_loss = criterion(predictions, label, label_len) loss2 = 1.0 * ((1. - alphas.sum(dim=1))**2).mean() ((ctc_loss + loss2) / batch_size).backward() for group in trainer.param_groups: for param in group['params']: if param.grad is not None: param.grad.data.clamp_(-5, 5) trainer.step() if nbatch % 10 == 0: for n, l in enumerate(label_len): l = int(l.data.cpu().numpy()) la = label[n, 1:l].data.cpu().numpy() pred = predictions[n, :(l - 1)].data.cpu().numpy() accu_top3_metric.update(mx.nd.array(la), mx.nd.array(pred)) accu_top1_metric.update(mx.nd.array(la), mx.nd.array(pred)) epoch_bleu.update(la, predictions[n, :].data.cpu().numpy()) batch_bleu.update(la, predictions[n, :].data.cpu().numpy()) ctc_loss_metric.update( None, preds=mx.nd.array([ctc_loss.data.cpu().numpy()]) / batch_size) alpha_metric.update(None, preds=mx.nd.array( [loss2.data.cpu().numpy()])) if nbatch % log_interval == 0 and nbatch > 0: msg = ','.join([ '{}={:.3f}'.format(*metric.get()) for metric in [ epoch_bleu, batch_bleu, accu_top1_metric, accu_top3_metric, ctc_loss_metric, alpha_metric ] ]) logger.info( '[Epoch {}][Batch {}], Speed: {:.3f} samples/sec, {}'. format( nepoch, nbatch, log_interval * batch_size / (time.time() - btic), msg)) btic = time.time() batch_bleu.reset() accu_top1_metric.reset() accu_top3_metric.reset() ctc_loss_metric.reset() alpha_metric.reset() net.eval() bleu, acc_top1 = validate(net, gpu_id=gpu_id, val_loader=val_loader, train_index2words=dataset.index2words, val_index2words=val_dataset.index2words) save_path = save_prefix + "_weights-%d-bleu-%.4f-%.4f.params" % ( nepoch, bleu, acc_top1) torch.save(net.module.state_dict(), save_path) torch.save(trainer.state_dict(), save_path + ".states") logger.info("Saved checkpoint to {}.".format(save_path))
def __init__(self, args): self.args = args # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader data_kwargs = {'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size} trainset = get_segmentation_dataset( args.dataset, split=args.train_split, mode='train', root='/mnt/mdisk/xcq/VOCdevkit/',**data_kwargs) valset = get_segmentation_dataset( args.dataset, split='val', mode='val', root='/mnt/mdisk/xcq/VOCdevkit/',**data_kwargs) self.train_data = gluon.data.DataLoader( trainset, args.batch_size, shuffle=True, last_batch='rollover', num_workers=args.workers) self.eval_data = gluon.data.DataLoader(valset, args.test_batch_size, last_batch='rollover', num_workers=args.workers) # create network if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, norm_layer=args.norm_layer, norm_kwargs=args.norm_kwargs, aux=args.aux, crop_size=args.crop_size) model.cast(args.dtype) print(model) self.net = DataParallelModel(model, args.ctx, args.syncbn) self.evaluator = DataParallelModel(SegEvalModel(model), args.ctx) # resume checkpoint if needed if args.resume is not None: if os.path.isfile(args.resume): model.load_parameters(args.resume, ctx=args.ctx) else: raise RuntimeError("=> no checkpoint found at '{}'" \ .format(args.resume)) # create criterion criterion = MixSoftmaxCrossEntropyLoss(args.aux, aux_weight=args.aux_weight) self.criterion = DataParallelCriterion(criterion, args.ctx, args.syncbn) # optimizer and lr scheduling self.lr_scheduler = LRScheduler(mode='poly', base_lr=args.lr, nepochs=args.epochs, iters_per_epoch=len(self.train_data), power=0.9) kv = mx.kv.create(args.kvstore) optimizer_params = {'lr_scheduler': self.lr_scheduler, 'wd':args.weight_decay, 'momentum': args.momentum, 'learning_rate': args.lr } if args.dtype == 'float16': optimizer_params['multi_precision'] = True if args.no_wd: for k, v in self.net.module.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 self.optimizer = gluon.Trainer(self.net.module.collect_params(), 'sgd', optimizer_params, kvstore = kv) # evaluation metrics self.metric = gluoncv.utils.metrics.SegmentationMetric(trainset.num_class)
train_path = os.path.join(dataset_path, 'train') ctx = [mx.cpu()] model_name = 'ResNet50_v2' tuned_net = get_model(model_name, pretrained=True) with tuned_net.name_scope(): tuned_net.output = nn.Dense(2) tuned_net.output.initialize(init.Xavier(), ctx=ctx) tuned_net.collect_params().reset_ctx(ctx) tuned_net.hybridize() # tuned_net.load_parameters('training_logs/ttl_v4__resnset20/params/two_traffic_lights_v4__resnet20_v2.params') tuned_net.load_parameters('rejector1_009__resnet20_v2.params') transform_test = transforms.Compose([ transforms.Resize(256), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) jitter_param = 0.4 lighting_param = 0.1 transform_train = transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ])
help='Path to save export files.') parser.add_argument('--dtype', type=str, default='float32', help='data type for training. default is float32') parser.add_argument('--ctx', type=str, default="0", help='Use GPUs to train.') parser.add_argument('--hybrid', action='store_true', help='Whether to use hybrid.') opt = parser.parse_args() assert opt.batch_size % len( opt.ctx.split(",")) == 0, "Per batch on each GPU must be same." assert opt.dtype in ('float32', 'float16'), "Data type only support FP16/FP32." transform_test = transforms.Compose([transforms.ToTensor()]) def transform_test_flip(data, isf=False): flip_data = nd.flip(data, axis=1) if isf: data = nd.transpose(data, (2, 0, 1)).astype('float32') flip_data = nd.transpose(flip_data, (2, 0, 1)).astype('float32') return data, flip_data return transform_test(data), transform_test(flip_data) export_path = os.path.dirname( opt.model_params) if opt.export_path == '' else opt.export_path ctx = [mx.gpu(int(i)) for i in opt.ctx.split(",")]
def transform(): transformer = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(0.13, 0.31)]) return transformer
save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_period = 0 plot_path = opt.save_plot_dir logging.basicConfig(level=logging.INFO) logging.info(opt) transform_train = transforms.Compose([ transforms.Resize(32), transforms.RandomResizedCrop(32), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=0.1, contrast=0.1, saturation=0.1), transforms.RandomLighting(0.1), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) def test(ctx, val_data): metric = mx.metric.Accuracy() for i, batch in enumerate(val_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)