def get_triplet_train_data(batch_size=8): """ triplet loss :param batch_size: 批次大小 :return: """ transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), transforms.RandomLighting(0.1), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) img_folder, img_file = get_data_path() img_saved = os.path.join(img_file + ".tp.npz") td = TripletDataset(data_folder=img_folder, data_file=img_file, saved_path=img_saved, transform=transform_train) train_data = DataLoader(td, batch_size=batch_size, shuffle=True) return train_data
def test_transformer(): from mxnet.gluon.data.vision import transforms transform = transforms.Compose([ transforms.Resize(300), transforms.Resize(300, keep_ratio=True), transforms.CenterCrop(256), transforms.RandomCrop(256, pad=16), transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(0.1, 0.1, 0.1, 0.1), transforms.RandomBrightness(0.1), transforms.RandomContrast(0.1), transforms.RandomSaturation(0.1), transforms.RandomHue(0.1), transforms.RandomLighting(0.1), transforms.ToTensor(), transforms.RandomRotation([-10., 10.]), transforms.Normalize([0, 0, 0], [1, 1, 1])]) transform(mx.nd.ones((245, 480, 3), dtype='uint8')).wait_to_read()
def get_transform(jitter_param=0.4, pca_noise=0.2): # Init transformer # See https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/data/data_augmentation.html transform_train = transforms.Compose([ transforms.Resize(32), transforms.RandomResizedCrop((32, 32), scale=(0.8, 1.0), ratio=(0.9, 1.1)), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param, hue=jitter_param), transforms.RandomLighting(alpha=pca_noise), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) return transform_train, transform_test
def GluonTransformation(data: mx.nd.array): """ data: mx.nd.array h,w,c retrun data: mx.nd.array """ data = mx.nd.array(data) transform = transforms.Compose([ transforms.RandomResizedCrop(200, (0.8, 1.0)), transforms.CenterCrop((300, 300)), transforms.RandomFlipLeftRight(), transforms.RandomFlipTopBottom(), transforms.RandomLighting(0.3), transforms.RandomColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.2), transforms.Resize(384), transforms.ToTensor(), # h,w,c -> c, h, w transforms.Normalize(0, 1) ]) data = transform(data) return data # if __name__=='__main__': # img=cv2.imread('1.jpg') # img_out=ImageRotate(img,30) # # img_out=transformation(img) # cv2.imshow('ori',img) # cv2.imshow('rotate',img_out) # cv2.waitKey(0) # # cv2.imshow('img',mx.nd.clip(img_out,0,255).asnumpy().astype(np.uint8)) # # cv2.imshow('img',img_out.asnumpy().astype(np.uint8)) # # cv2.waitKey(0) # print('done!')
def get_data_rec_transfomed(args): data_dir = args.data_dir num_workers = args.num_workers batch_size = args.batch_size * max(1, args.num_gpus) normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) jitter_param = 0.4 lighting_param = 0.1 input_size = opt.input_size crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875 resize = int(math.ceil(input_size / crop_ratio)) def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) return data, label transform_train = transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ]) transform_test = transforms.Compose([ transforms.Resize(resize, keep_ratio=True), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize ]) train_data = gluon.data.DataLoader( imagenet.classification.ImageNet(data_dir, train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader( imagenet.classification.ImageNet(data_dir, train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_data, val_data, batch_fn
def cifar10_train_transform(ds_metainfo, mean_rgb=(0.4914, 0.4822, 0.4465), std_rgb=(0.2023, 0.1994, 0.2010), jitter_param=0.4, lighting_param=0.1): assert (ds_metainfo is not None) assert (ds_metainfo.input_image_size[0] == 32) return transforms.Compose([ RandomCrop( size=32, pad=4), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter( brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), transforms.Normalize( mean=mean_rgb, std=std_rgb) ])
def __init__(self): self.scale = 1.59 self.per_device_batch_size = 16 if self.im_size == 224 else 4 self.batch_size = self.per_device_batch_size * max(self.num_gpus, 1) self.transform_train = transforms.Compose([ transforms.Resize((int(self.im_size * self.scale), self.im_size)), # transforms.RandomFlipLeftRight(), # Normal_Y(), transforms.RandomColorJitter(brightness=self.jitter_param, contrast=self.jitter_param, saturation=self.jitter_param), transforms.ToTensor(), transforms.Normalize([0.41432491, 0.41432491, 0.41432491], [0.04530748, 0.04530748, 0.04530748]) ]) self.transform_test = transforms.Compose([ transforms.Resize((int(self.im_size * self.scale), self.im_size)), # Normal_Y(), transforms.ToTensor(), transforms.Normalize([0.41432491, 0.41432491, 0.41432491], [0.04530748, 0.04530748, 0.04530748]) ])
def get_dataloader(train_dataset, val_dataset, batch_size, num_workers): jitter_param = 0.4 lighting_param = 0.1 normalize = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) transform_train = transforms.Compose([ transforms.Resize(480), transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ]) transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) train_data = gluon.data.DataLoader( train_dataset.transform_first(transform_train), batch_size=batch_size, shuffle=True, num_workers=num_workers, last_batch='rollover') val_data = gluon.data.DataLoader( val_dataset.transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers, last_batch='keep') return train_data, val_data
def get_train_data(self, batch_size): """ 获取训练数据,数据扩充 """ transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), transforms.RandomLighting(0.1), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) td = MultilabelDataset(data_folder=self.train_folder, data_file=self.train_file, transform=transform_train) train_data = DataLoader(dataset=td, batch_size=batch_size, shuffle=True) return train_data, len(td)
def get_data_loader(data_dir, batch_size, num_workers): normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) jitter_param = 0.4 lighting_param = 0.1 def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) return data, label transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ]) transform_test = transforms.Compose([ transforms.Resize(256, keep_ratio=True), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) train_data = gluon.data.DataLoader( imagenet.classification.ImageNet(data_dir, train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader( imagenet.classification.ImageNet(data_dir, train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) if 'sync' in opt.kvstore: raise ValueError("Need to resize iterator for distributed training to not hang at the end") return train_data, val_data, batch_fn
def get_train_data_source(dataset_args, batch_size, num_workers): jitter_param = 0.4 lighting_param = 0.1 mean_rgb = (0.4914, 0.4822, 0.4465) std_rgb = (0.2023, 0.1994, 0.2010) data_dir = dataset_args.data_dir transform_train = transforms.Compose([ RandomCrop(size=32, pad=4), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), transforms.Normalize(mean=mean_rgb, std=std_rgb) ]) return gluon.data.DataLoader(dataset=gluon.data.vision.CIFAR10( root=data_dir, train=True).transform_first(fn=transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers)
def main(): opt = parse_args() batch_size = opt.batch_size classes = 10 log_dir = os.path.join(opt.save_dir, "logs") model_dir = os.path.join(opt.save_dir, "params") if not os.path.exists(model_dir): os.makedirs(model_dir) # Init dataloader jitter_param = 0.4 transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.RandomBrightness(jitter_param), transforms.RandomColorJitter(jitter_param), transforms.RandomContrast(jitter_param), transforms.RandomSaturation(jitter_param), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) train_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=opt.num_workers) val_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=opt.num_workers) num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] lr_decay = opt.lr_decay lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] + [np.inf] model_name = opt.model model_name = opt.model if model_name.startswith('cifar_wideresnet'): kwargs = {'classes': classes, 'drop_rate': opt.drop_rate} else: kwargs = {'classes': classes} net = get_model(model_name, **kwargs) if opt.resume_from: net.load_parameters(opt.resume_from, ctx=context) optimizer = 'nag' save_period = opt.save_period if opt.save_dir and save_period: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_period = 0 def test(ctx, val_loader): metric = mx.metric.Accuracy() for i, batch in enumerate(val_loader): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) outputs = [net(X) for X in data] metric.update(label, outputs) return metric.get() def train(train_data, val_data, epochs, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] net.hybridize() net.initialize(mx.init.Xavier(), ctx=ctx) net.forward(mx.nd.ones((1, 3, 30, 30), ctx=ctx[0])) with SummaryWriter(logdir=log_dir, verbose=False) as sw: sw.add_graph(net) trainer = gluon.Trainer(net.collect_params(), optimizer, { 'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum }) metric = mx.metric.Accuracy() train_metric = mx.metric.Accuracy() loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() iteration = 0 lr_decay_count = 0 best_val_score = 0 global_step = 0 for epoch in range(epochs): tic = time.time() train_metric.reset() metric.reset() train_loss = 0 num_batch = len(train_data) alpha = 1 if epoch == lr_decay_epoch[lr_decay_count]: trainer.set_learning_rate(trainer.learning_rate * lr_decay) lr_decay_count += 1 tbar = tqdm(train_data) for i, batch in enumerate(tbar): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) with ag.record(): output = [net(X) for X in data] loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)] for l in loss: l.backward() trainer.step(batch_size) train_loss += sum([l.sum().asscalar() for l in loss]) train_metric.update(label, output) name, acc = train_metric.get() iteration += 1 global_step += len(loss) train_loss /= batch_size * num_batch name, acc = train_metric.get() name, val_acc = test(ctx, val_data) if val_acc > best_val_score: best_val_score = val_acc net.save_parameters('{}/{}-{}-{:04.3f}-best.params'.format( model_dir, model_name, epoch, best_val_score)) with SummaryWriter(logdir=log_dir, verbose=False) as sw: sw.add_scalar(tag="TrainLos", value=train_loss, global_step=global_step) sw.add_scalar(tag="TrainAcc", value=acc, global_step=global_step) sw.add_scalar(tag="ValAcc", value=val_acc, global_step=global_step) sw.add_graph(net) logging.info('[Epoch %d] train=%f val=%f loss=%f time: %f' % (epoch, acc, val_acc, train_loss, time.time() - tic)) if save_period and save_dir and (epoch + 1) % save_period == 0: net.save_parameters('{}/{}-{}.params'.format( save_dir, model_name, epoch)) if save_period and save_dir: net.save_parameters('{}/{}-{}.params'.format( save_dir, model_name, epochs - 1)) if opt.mode == 'hybrid': net.hybridize() train(train_data, val_data, opt.num_epochs, context)
def get_dataloader(module_name, module_args, num_label): train_transfroms = transforms.Compose( [transforms.RandomColorJitter(brightness=0.5), transforms.ToTensor()]) val_transfroms = transforms.ToTensor() dataset_args = module_args['dataset'] dataset_args['num_label'] = num_label # 创建数据集 train_data_path = dataset_args.pop('train_data_path') train_data_ratio = dataset_args.pop('train_data_ratio') val_data_path = dataset_args.pop('val_data_path') if module_name == 'ImageDataset': train_data_list, val_data_list = get_datalist( train_data_path, val_data_path, module_args['loader']['validation_split']) elif module_name == 'LmdbDataset': train_data_list = train_data_path val_data_list = val_data_path else: raise Exception('current only support ImageDataset and LmdbDataset') train_dataset_list = [] for train_data in train_data_list: train_dataset_list.append( get_dataset(data_list=train_data, module_name=module_name, phase='train', dataset_args=dataset_args)) if len(train_dataset_list) > 1: train_loader = dataset.Batch_Balanced_Dataset( dataset_list=train_dataset_list, ratio_list=train_data_ratio, module_args=module_args, dataset_transfroms=train_transfroms, phase='train') elif len(train_dataset_list) == 1: train_loader = DataLoader( dataset=train_dataset_list[0].transform_first(train_transfroms), batch_size=module_args['loader']['train_batch_size'], shuffle=module_args['loader']['shuffle'], last_batch='rollover', num_workers=module_args['loader']['num_workers']) train_loader.dataset_len = len(train_dataset_list[0]) else: raise Exception('no images found') if len(val_data_list): val_dataset = get_dataset(data_list=val_data_list, module_name=module_name, phase='test', dataset_args=dataset_args) val_loader = DataLoader( dataset=val_dataset.transform_first(val_transfroms), batch_size=module_args['loader']['val_batch_size'], shuffle=module_args['loader']['shuffle'], last_batch='keep', num_workers=module_args['loader']['num_workers']) val_loader.dataset_len = len(val_dataset) else: val_loader = None return train_loader, val_loader
def train(): # Create inference inference = resnet100(args.num_classes, emb_size=args.emb_size, s=args.margin_s, a=args.margin_a, m=args.margin_m, b=args.margin_b) # Load inference params if args.init.lower() == 'xavier': init = mx.init.Xavier(rnd_type='gaussian', factor_type='out', magnitude=2) else: init = mx.initializer.Uniform() if args.model: helper.load_params(inference, args.model, ctx=ctx) cur_iter = 0 else: cur_iter = helper.load_params(inference, args.ckpt_dir, prefix=args.prefix, init=init, ctx=ctx) # Hybrid mode --> Symbol mode inference.hybridize(static_alloc=True, static_shape=True) # Datasets if args.color: train_transform = transforms.Compose([ transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(0.1, 0.1, 0.1), ToTensor() ]) else: train_transform = transforms.Compose( [transforms.RandomFlipLeftRight(), ToTensor()]) train_dataset = ImageRecordDataset( args.train_rec).transform_first(train_transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, last_batch='discard', num_workers=args.num_workers, pin_memory=True) test_transform = ToTensor() test_dataset = ImageRecordDataset( args.test_rec).transform_first(test_transform) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, last_batch='keep', num_workers=args.num_workers, pin_memory=False) # Create learning rate scheduler iterations_per_epoch = int(len(train_dataset) / args.batch_size) lr_steps = [s * iterations_per_epoch for s in args.lr_steps] print('Learning rate drops after iterations: {}'.format(lr_steps)) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(step=lr_steps, factor=0.1) # Create trainer trainer = gluon.Trainer(inference.collect_params(), optimizer='sgd', optimizer_params={ 'learning_rate': args.lr, 'wd': args.wd, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1. / len(ctx) }) # Load trainer from saved states helper.load_trainer(trainer, args.ckpt_dir, cur_iter, prefix=args.prefix) # Define loss functions softmax_cross_entropy = mx.gluon.loss.SoftmaxCrossEntropyLoss() # Define metric losses metric_ce_loss = mx.metric.Loss('CE-Loss') best_acc = 80 # only save the model if the accuracy is better than 80% # Start training print('Start to train {}...'.format(args.prefix)) start_epoch = cur_iter // iterations_per_epoch for cur_epoch in range(start_epoch + 1, args.max_epoch + 1): start_time = timeit.default_timer() for batch_idx, (image, label) in enumerate(train_loader): if label.ndim > 1: label = label[:, 0] # skip the landmarks # if batch_idx > 0: break cur_iter += 1 images = gluon.utils.split_and_load(image, ctx) labels = gluon.utils.split_and_load(label, ctx) with autograd.record(train_mode=True): losses = [] for x, y in zip(images, labels): fc = inference(x, y) loss_ce = softmax_cross_entropy(fc, y) losses.append(loss_ce) # update metrics metric_ce_loss.update(None, preds=loss_ce) for l in losses: l.backward() trainer.step(image.shape[0]) if (batch_idx % args.log_interval == 0) or (batch_idx == iterations_per_epoch - 1): elapsed_time = timeit.default_timer() - start_time scout = helper.print_scalars( OrderedDict([metric_ce_loss.get()]), cur_epoch, batch_idx, elapsed_time) logger.info(scout) start_time = timeit.default_timer() metric_ce_loss.reset() if (batch_idx % args.test_interval == 0) or (batch_idx == iterations_per_epoch - 1): # if batch_idx > 0: break start_time = timeit.default_timer() mu, std, t, _ = eval_lfw(inference.features, args.test_rec, test_loader, ctx) elapsed_time = timeit.default_timer() - start_time if mu > best_acc: best_acc = mu # Save trained model logger.info( 'Find better model at E: {}, B: {}, I: {}'.format( cur_epoch, batch_idx, cur_iter)) helper.save_params(inference, args.ckpt_dir, cur_iter, prefix=args.prefix + '-best') scout = helper.print_scalars( OrderedDict([('mu', mu), ('std', std), ('t', t)]), cur_epoch, batch_idx, elapsed_time) logger.info(scout) # Save trained model helper.save_params(inference, args.ckpt_dir, cur_iter, prefix=args.prefix) helper.save_trainer(trainer, args.ckpt_dir, cur_iter, prefix=args.prefix)
def get_train_data(rec_train, batch_size, data_nthreads, input_size, crop_ratio, args): def train_batch_fn(batch, ctx): data = batch[0].as_in_context(ctx) label = batch[1].as_in_context(ctx) return data, label jitter_param = 0.4 lighting_param = 0.1 resize = int(math.ceil(input_size / crop_ratio)) train_transforms = [] if args.auto_aug: print('Using AutoAugment') from autogluon.utils.augment import AugmentationBlock, autoaug_imagenet_policies train_transforms.append(AugmentationBlock(autoaug_imagenet_policies())) from gluoncv.utils.transforms import EfficientNetRandomCrop from autogluon.utils import pil_transforms if input_size >= 320: train_transforms.extend([ EfficientNetRandomCrop(input_size), pil_transforms.Resize((input_size, input_size), interpolation=Image.BICUBIC), pil_transforms.RandomHorizontalFlip(), pil_transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), pil_transforms.ToNDArray(), transforms.RandomLighting(lighting_param), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) else: train_transforms.extend([ pil_transforms.ToNDArray(), transforms.RandomResizedCrop(input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) transform_train = transforms.Compose(train_transforms) train_set = mx.gluon.data.vision.ImageRecordDataset( rec_train).transform_first(transform_train) train_sampler = SplitSampler(len(train_set), num_parts=num_workers, part_index=rank) train_data = gluon.data.DataLoader( train_set, batch_size=batch_size, # shuffle=True, last_batch='discard', num_workers=data_nthreads, sampler=train_sampler) return train_data, train_batch_fn
def split(X, Y, test_size): from sklearn.model_selection import train_test_split # 数据集划分操作 return train_test_split(X, Y, test_size=test_size, shuffle=True) # 数据增强 transform_train = gtf.Compose([ # 随机对图像裁剪出面积为原图像面积0.08~1倍、且高和宽之比在3/4~4/3的图像,再放缩为高和 # 宽都是为 224 的新图 gtf.RandomResizedCrop(224, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0)), gtf.RandomFlipLeftRight(), # 随机变化亮度、对比度和饱和度 gtf.RandomColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), # 随机加噪声 gtf.RandomLighting(0.1), gtf.ToTensor(), # 对图像的每个通道做标准化 gtf.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) transform_test = gtf.Compose([ gtf.Resize(256), # 将图像中央的高和宽均为 224 的正方形区域裁剪出来 gtf.CenterCrop(224), gtf.ToTensor(), gtf.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])
pass balanced_batch_images = nd.concat(*balanced_batch_images, dim=0) balanced_batch_texts = nd.concat(*balanced_batch_texts, dim=0) return balanced_batch_images, balanced_batch_texts if __name__ == '__main__': import os from tqdm import tqdm import anyconfig from mxnet.gluon.data.vision import transforms from utils import parse_config train_transfroms = transforms.Compose( [transforms.RandomColorJitter(brightness=0.5), transforms.ToTensor()]) config = anyconfig.load(open("config/icdar2015.yaml", 'rb')) if 'base' in config: config = parse_config(config) if os.path.isfile(config['dataset']['alphabet']): config['dataset']['alphabet'] = str( np.load(config['dataset']['alphabet'])) dataset_args = config['dataset']['validate']['dataset']['args'] dataset_args['num_label'] = 80 dataset_args['alphabet'] = config['dataset']['alphabet'] dataset = ImageDataset(**dataset_args) data_loader = DataLoader(dataset=dataset.transform_first(train_transfroms), batch_size=1, shuffle=True,
def main(): opt = parse_args() filehandler = logging.FileHandler(opt.logging_file, mode='a+') # streamhandler = logging.StreamHandler() logger = logging.getLogger('ImageNet') logger.setLevel(level=logging.DEBUG) logger.addHandler(filehandler) # logger.addHandler(streamhandler) logger.info(opt) if opt.amp: amp.init() batch_size = opt.batch_size classes = 1000 num_training_samples = 1281167 num_validating_samples = 50000 num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers accumulate = opt.accumulate lr_decay = opt.lr_decay lr_decay_period = opt.lr_decay_period if opt.lr_decay_period > 0: lr_decay_epoch = list( range(lr_decay_period, opt.num_epochs, lr_decay_period)) else: lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch] num_batches = num_training_samples // batch_size lr_scheduler = LRSequential([ LRScheduler('linear', base_lr=0, target_lr=opt.lr, nepochs=opt.warmup_epochs, iters_per_epoch=num_batches), LRScheduler(opt.lr_mode, base_lr=opt.lr, target_lr=0, nepochs=opt.num_epochs - opt.warmup_epochs, iters_per_epoch=num_batches, step_epoch=lr_decay_epoch, step_factor=lr_decay, power=2) ]) model_name = opt.model kwargs = {'ctx': context, 'pretrained': opt.use_pretrained} if opt.use_gn: kwargs['norm_layer'] = gcv.nn.GroupNorm if model_name.startswith('vgg'): kwargs['batch_norm'] = opt.batch_norm elif model_name.startswith('resnext'): kwargs['use_se'] = opt.use_se if opt.last_gamma: kwargs['last_gamma'] = True optimizer = 'sgd' optimizer_params = { 'wd': opt.wd, 'momentum': opt.momentum, 'lr_scheduler': lr_scheduler, 'begin_num_update': num_batches * opt.resume_epoch } # if opt.dtype != 'float32': # optimizer_params['multi_precision'] = True # net = get_model(model_name, **kwargs) if opt.model_backend == 'gluoncv': net = glcv_get_model(model_name, **kwargs) elif opt.model_backend == 'gluoncv2': net = glcv2_get_model(model_name, **kwargs) else: raise ValueError(f'Unknown backend: {opt.model_backend}') # net.cast(opt.dtype) if opt.resume_params != '': net.load_parameters(opt.resume_params, ctx=context, cast_dtype=True) # teacher model for distillation training if opt.teacher is not None and opt.hard_weight < 1.0: teacher_name = opt.teacher if opt.teacher_backend == 'gluoncv': teacher = glcv_get_model(teacher_name, **kwargs) elif opt.teacher_backend == 'gluoncv2': teacher = glcv2_get_model(teacher_name, **kwargs) else: raise ValueError(f'Unknown backend: {opt.teacher_backend}') # teacher = glcv2_get_model(teacher_name, pretrained=True, ctx=context) # teacher.cast(opt.dtype) teacher.collect_params().setattr('grad_req', 'null') distillation = True else: distillation = False # Two functions for reading data from record file or raw images def get_data_rec(rec_train, rec_val): rec_train = os.path.expanduser(rec_train) rec_val = os.path.expanduser(rec_val) # mean_rgb = [123.68, 116.779, 103.939] # std_rgb = [58.393, 57.12, 57.375] train_dataset = ImageRecordDataset(filename=rec_train, flag=1) val_dataset = ImageRecordDataset(filename=rec_val, flag=1) return train_dataset, val_dataset def get_data_loader(data_dir): train_dataset = ImageNet(data_dir, train=True) val_dataset = ImageNet(data_dir, train=False) return train_dataset, val_dataset def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) return data, label if opt.use_rec: train_dataset, val_dataset = get_data_rec(opt.rec_train, opt.rec_val) else: train_dataset, val_dataset = get_data_loader(opt.data_dir) normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) jitter_param = 0.4 lighting_param = 0.1 if not opt.multi_scale: train_dataset = train_dataset.transform_first( transforms.Compose([ transforms.RandomResizedCrop(opt.input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ])) train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, last_batch='rollover', num_workers=num_workers) else: train_data = RandomTransformDataLoader( [ Transform( transforms.Compose([ # transforms.RandomResizedCrop(opt.input_size), transforms.RandomResizedCrop(x * 32), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ])) for x in range(10, 20) ], train_dataset, interval=10 * opt.accumulate, batch_size=batch_size, shuffle=False, pin_memory=True, last_batch='rollover', num_workers=num_workers) val_dataset = val_dataset.transform_first( transforms.Compose([ transforms.Resize(opt.input_size, keep_ratio=True), transforms.CenterCrop(opt.input_size), transforms.ToTensor(), normalize ])) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, last_batch='keep', num_workers=num_workers) if opt.mixup: train_metric = mx.metric.RMSE() else: train_metric = mx.metric.Accuracy() train_loss_metric = mx.metric.Loss() acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) save_frequency = opt.save_frequency if opt.save_dir and save_frequency: if opt.wandb: save_dir = wandb.run.dir else: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_frequency = 0 def mixup_transform(label, classes, lam=1, eta=0.0): if isinstance(label, nd.NDArray): label = [label] res = [] for l in label: y1 = l.one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) y2 = l[::-1].one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) res.append(lam * y1 + (1 - lam) * y2) return res def smooth(label, classes, eta=0.1): if isinstance(label, nd.NDArray): label = [label] smoothed = [] for l in label: res = l.one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) smoothed.append(res) return smoothed def test(ctx, val_data): acc_top1.reset() acc_top5.reset() for i, batch in tqdm.tqdm(enumerate(val_data), desc='Validating', total=num_validating_samples // batch_size): data, label = batch_fn(batch, ctx) # outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] outputs = [net(X) for X in data] acc_top1.update(label, outputs) acc_top5.update(label, outputs) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return 1 - top1, 1 - top5 def train(ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if opt.resume_params == '': import warnings with warnings.catch_warnings(record=True) as w: net.initialize(mx.init.MSRAPrelu(), ctx=ctx) if opt.no_wd: for k, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 if accumulate > 1: logger.info(f'accumulate: {accumulate}, using "add" grad_req') import warnings with warnings.catch_warnings(record=True) as w: net.collect_params().setattr('grad_req', 'add') trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params, update_on_kvstore=False if opt.amp else None) if opt.amp: amp.init_trainer(trainer) if opt.resume_states != '': trainer.load_states(opt.resume_states) if opt.label_smoothing or opt.mixup: sparse_label_loss = False else: sparse_label_loss = True if distillation: L = gcv.loss.DistillationSoftmaxCrossEntropyLoss( temperature=opt.temperature, hard_weight=opt.hard_weight, sparse_label=sparse_label_loss) else: L = gluon.loss.SoftmaxCrossEntropyLoss( sparse_label=sparse_label_loss) best_val_score = 1 err_top1_val, err_top5_val = test(ctx, val_data) logger.info('initial validation: err-top1=%f err-top5=%f' % (err_top1_val, err_top5_val)) for epoch in range(opt.resume_epoch, opt.num_epochs): tic = time.time() train_metric.reset() train_loss_metric.reset() btic = time.time() pbar = tqdm.tqdm(total=num_batches, desc=f'Training [{epoch}]', leave=True) for i, batch in enumerate(train_data): data, label = batch_fn(batch, ctx) if opt.mixup: lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha) if epoch >= opt.num_epochs - opt.mixup_off_epoch: lam = 1 data = [lam * X + (1 - lam) * X[::-1] for X in data] if opt.label_smoothing: eta = 0.1 else: eta = 0.0 label = mixup_transform(label, classes, lam, eta) elif opt.label_smoothing: hard_label = label label = smooth(label, classes) if distillation: # teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \ # for X in data] with ag.predict_mode(): teacher_prob = [ nd.softmax( teacher( nd.transpose( nd.image.resize( nd.transpose(X, (0, 2, 3, 1)), size=opt.teacher_imgsize), (0, 3, 1, 2))) / opt.temperature) for X in data ] with ag.record(): # outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] outputs = [net(X) for X in data] if distillation: # loss = [L(yhat.astype('float32', copy=False), # y.astype('float32', copy=False), # p.astype('float32', copy=False)) for yhat, y, p in zip(outputs, label, teacher_prob)] # print([outputs, label, teacher_prob]) loss = [ L(yhat, y, p) for yhat, y, p in zip(outputs, label, teacher_prob) ] else: # loss = [L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label)] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] if opt.amp: with amp.scale_loss(loss, trainer) as scaled_loss: ag.backward(scaled_loss) else: ag.backward(loss) if accumulate > 1: if (i + 1) % accumulate == 0: trainer.step(batch_size * accumulate) net.collect_params().zero_grad() else: trainer.step(batch_size) train_loss_metric.update(0, loss) if opt.mixup: output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \ for out in outputs] train_metric.update(label, output_softmax) else: if opt.label_smoothing: train_metric.update(hard_label, outputs) else: train_metric.update(label, outputs) _, loss_score = train_loss_metric.get() train_metric_name, train_metric_score = train_metric.get() samplers_per_sec = batch_size / (time.time() - btic) postfix = f'{samplers_per_sec:.1f} imgs/sec, ' \ f'loss: {loss_score:.4f}, ' \ f'acc: {train_metric_score * 100:.2f}, ' \ f'lr: {trainer.learning_rate:.4e}' if opt.multi_scale: postfix += f', size: {data[0].shape[-1]}' pbar.set_postfix_str(postfix) pbar.update() btic = time.time() if opt.log_interval and not (i + 1) % opt.log_interval: step = epoch * num_batches + i wandb.log( { 'samplers_per_sec': samplers_per_sec, train_metric_name: train_metric_score, 'lr': trainer.learning_rate, 'loss': loss_score }, step=step) logger.info( 'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f' % (epoch, i, samplers_per_sec, train_metric_name, train_metric_score, trainer.learning_rate)) pbar.close() train_metric_name, train_metric_score = train_metric.get() throughput = int(batch_size * i / (time.time() - tic)) err_top1_val, err_top5_val = test(ctx, val_data) wandb.log({ 'err1': err_top1_val, 'err5': err_top5_val }, step=epoch * num_batches) logger.info('[Epoch %d] training: %s=%f' % (epoch, train_metric_name, train_metric_score)) logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f' % (epoch, throughput, time.time() - tic)) logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f' % (epoch, err_top1_val, err_top5_val)) if err_top1_val < best_val_score: best_val_score = err_top1_val net.save_parameters( '%s/%.4f-imagenet-%s-%d-best.params' % (save_dir, best_val_score, model_name, epoch)) trainer.save_states( '%s/%.4f-imagenet-%s-%d-best.states' % (save_dir, best_val_score, model_name, epoch)) if save_frequency and save_dir and (epoch + 1) % save_frequency == 0: net.save_parameters('%s/imagenet-%s-%d.params' % (save_dir, model_name, epoch)) trainer.save_states('%s/imagenet-%s-%d.states' % (save_dir, model_name, epoch)) if save_frequency and save_dir: net.save_parameters('%s/imagenet-%s-%d.params' % (save_dir, model_name, opt.num_epochs - 1)) trainer.save_states('%s/imagenet-%s-%d.states' % (save_dir, model_name, opt.num_epochs - 1)) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=not opt.multi_scale) if distillation: teacher.hybridize(static_alloc=True, static_shape=not opt.multi_scale) train(context)
#net.output=nn.GlobalAvgPool1D(100) softmax_loss = gluon.loss.SoftmaxCrossEntropyLoss() gmloss = L_GM_Loss(10, 10, args.margin, args.lamda, args.mult) gmloss.initialize(mx.init.MSRAPrelu(), ctx=ctx) net.initialize(mx.init.Xavier(), ctx=ctx) params = net.collect_params() params.update(gmloss.collect_params()) #params.update(gmloss.collect_params(select='mean')) transform_train = transforms.Compose([ # Randomly crop an area, and then resize it to be 32x32 transforms.RandomResizedCrop(32), # Randomly flip the image horizontally transforms.RandomFlipLeftRight(), # Randomly jitter the brightness, contrast and saturation of the image transforms.RandomColorJitter(brightness=0.1, contrast=0.1, saturation=0.1), # Randomly adding noise to the image transforms.RandomLighting(0.1), # Transpose the image from height*width*num_channels to num_channels*height*width # and map values from [0, 255] to [0,1] transforms.ToTensor(), # Normalize the image with mean and standard deviation calculated across all images transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ])
def main(_argv): FLAGS.every = [int(s) for s in FLAGS.every] FLAGS.balance = [ True if s.lower() == 'true' or s.lower() == 't' else False for s in FLAGS.balance ] FLAGS.lr_steps = [int(s) for s in FLAGS.lr_steps] if FLAGS.num_workers < 0: FLAGS.num_workers = multiprocessing.cpu_count() ctx = [mx.gpu(i) for i in range(FLAGS.num_gpus) ] if FLAGS.num_gpus > 0 else [mx.cpu()] # Set up logging logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) log_file_path = os.path.join('models', 'vision', 'experiments', FLAGS.model_id, 'log.txt') log_dir = os.path.dirname(log_file_path) if log_dir and not os.path.exists(log_dir): os.makedirs(log_dir) fh = logging.FileHandler(log_file_path) logger.addHandler(fh) key_flags = FLAGS.get_key_flags_for_module(sys.argv[0]) logging.info('\n'.join(f.serialize() for f in key_flags)) # set up tensorboard summary writer tb_sw = SummaryWriter(log_dir=os.path.join(log_dir, 'tb'), comment=FLAGS.model_id) feat_sub_dir = None # Data augmentation, will do in dataset incase window>1 and need to be applied image-wise jitter_param = 0.4 lighting_param = 0.1 transform_train = None transform_test = None balance_train = True if FLAGS.feats_model is None: transform_train = transforms.Compose([ transforms.RandomResizedCrop(FLAGS.data_shape), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) transform_test = transforms.Compose([ transforms.Resize(FLAGS.data_shape + 32), transforms.CenterCrop(FLAGS.data_shape), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if bool(FLAGS.flow): transform_test = transforms.Compose([ transforms.Resize(FLAGS.data_shape + 32), transforms.CenterCrop(FLAGS.data_shape), TwoStreamNormalize() ]) transform_train = transform_test if FLAGS.save_feats: balance_train = False transform_train = transform_test if FLAGS.window > 1: transform_train = transform_test # Load datasets if FLAGS.temp_pool not in ['max', 'mean']: train_set = TennisSet(split='train', transform=transform_train, every=FLAGS.every[0], padding=FLAGS.padding, stride=FLAGS.stride, window=FLAGS.window, model_id=FLAGS.model_id, split_id=FLAGS.split_id, balance=balance_train, flow=bool(FLAGS.flow), feats_model=FLAGS.feats_model, save_feats=FLAGS.save_feats) logging.info(train_set) val_set = TennisSet(split='val', transform=transform_test, every=FLAGS.every[1], padding=FLAGS.padding, stride=FLAGS.stride, window=FLAGS.window, model_id=FLAGS.model_id, split_id=FLAGS.split_id, balance=False, flow=bool(FLAGS.flow), feats_model=FLAGS.feats_model, save_feats=FLAGS.save_feats) logging.info(val_set) test_set = TennisSet(split='test', transform=transform_test, every=FLAGS.every[2], padding=FLAGS.padding, stride=FLAGS.stride, window=FLAGS.window, model_id=FLAGS.model_id, split_id=FLAGS.split_id, balance=False, flow=bool(FLAGS.flow), feats_model=FLAGS.feats_model, save_feats=FLAGS.save_feats) logging.info(test_set) # Data Loaders if FLAGS.temp_pool not in ['max', 'mean']: train_data = gluon.data.DataLoader(train_set, batch_size=FLAGS.batch_size, shuffle=True, num_workers=FLAGS.num_workers) val_data = gluon.data.DataLoader(val_set, batch_size=FLAGS.batch_size, shuffle=False, num_workers=FLAGS.num_workers) test_data = gluon.data.DataLoader(test_set, batch_size=FLAGS.batch_size, shuffle=False, num_workers=FLAGS.num_workers) # Define Model model = None if FLAGS.feats_model is None: if FLAGS.backbone == 'rdnet': backbone_net = get_r21d(num_layers=34, n_classes=400, t=8, pretrained=True).features else: if FLAGS.flow == 'sixc': backbone_net = get_model( FLAGS.backbone, pretrained=False ).features # 6 channel input, don't want pretraind else: backbone_net = get_model(FLAGS.backbone, pretrained=True).features if FLAGS.flow in ['twos', 'only']: if FLAGS.flow == 'only': backbone_net = None flow_net = get_model( FLAGS.backbone, pretrained=True ).features # todo orig exp was not pretrained flow model = TwoStreamModel(backbone_net, flow_net, len(train_set.classes)) elif FLAGS.backbone == 'rdnet': model = FrameModel(backbone_net, len(train_set.classes), swap=True) else: model = FrameModel(backbone_net, len(train_set.classes)) elif FLAGS.temp_pool in ['max', 'mean']: backbone_net = get_model(FLAGS.backbone, pretrained=True).features model = FrameModel(backbone_net, len(test_set.classes)) if FLAGS.window > 1: # Time Distributed RNN if FLAGS.backbone_from_id and model is not None: if os.path.exists( os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id)): files = os.listdir( os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id)) files = [f for f in files if f[-7:] == '.params'] if len(files) > 0: files = sorted(files, reverse=True) # put latest model first model_name = files[0] model.load_parameters( os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id, model_name)) logging.info('Loaded backbone params: {}'.format( os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id, model_name))) if FLAGS.freeze_backbone and model is not None: for param in model.collect_params().values(): param.grad_req = 'null' if FLAGS.temp_pool in ['gru', 'lstm']: model = CNNRNN(model, num_classes=len(test_set.classes), type=FLAGS.temp_pool, hidden_size=128) elif FLAGS.temp_pool in ['mean', 'max']: pass else: assert FLAGS.backbone == 'rdnet' # ensure 3d net assert FLAGS.window in [8, 32] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") model.initialize() num_channels = 3 if bool(FLAGS.flow): num_channels = 6 if FLAGS.feats_model is None: if FLAGS.window == 1: logging.info( model.summary( mx.nd.ndarray.ones(shape=(1, num_channels, FLAGS.data_shape, FLAGS.data_shape)))) else: logging.info( model.summary( mx.nd.ndarray.ones(shape=(1, FLAGS.window, num_channels, FLAGS.data_shape, FLAGS.data_shape)))) else: if FLAGS.window == 1: logging.info(model.summary(mx.nd.ndarray.ones(shape=(1, 4096)))) elif FLAGS.temp_pool not in ['max', 'mean']: logging.info( model.summary(mx.nd.ndarray.ones(shape=(1, FLAGS.window, 4096)))) model.collect_params().reset_ctx(ctx) model.hybridize() if FLAGS.save_feats: best_score = -1 best_epoch = -1 with open( os.path.join('models', 'vision', 'experiments', FLAGS.model_id, 'scores.txt'), 'r') as f: lines = f.readlines() lines = [line.rstrip().split() for line in lines] for ep, sc in lines: if float(sc) > best_score: best_epoch = int(ep) best_score = float(sc) logging.info('Testing best model from Epoch %d with score of %f' % (best_epoch, best_score)) model.load_parameters( os.path.join('models', 'vision', 'experiments', FLAGS.model_id, "{:04d}.params".format(best_epoch))) logging.info('Loaded model params: {}'.format( os.path.join('models', 'vision', 'experiments', FLAGS.model_id, "{:04d}.params".format(best_epoch)))) for data, sett in zip([train_data, val_data, test_data], [train_set, val_set, test_set]): save_features(model, data, sett, ctx) return start_epoch = 0 if os.path.exists( os.path.join('models', 'vision', 'experiments', FLAGS.model_id)): files = os.listdir( os.path.join('models', 'vision', 'experiments', FLAGS.model_id)) files = [f for f in files if f[-7:] == '.params'] if len(files) > 0: files = sorted(files, reverse=True) # put latest model first model_name = files[0] start_epoch = int(model_name.split('.')[0]) + 1 model.load_parameters(os.path.join('models', 'vision', 'experiments', FLAGS.model_id, model_name), ctx=ctx) logging.info('Loaded model params: {}'.format( os.path.join('models', 'vision', 'experiments', FLAGS.model_id, model_name))) # Setup the optimiser trainer = gluon.Trainer(model.collect_params(), 'sgd', { 'learning_rate': FLAGS.lr, 'momentum': FLAGS.momentum, 'wd': FLAGS.wd }) # Setup Metric/s metrics = [ Accuracy(label_names=test_set.classes), mx.metric.TopKAccuracy(5, label_names=test_set.classes), Accuracy(name='accuracy_no', label_names=test_set.classes[1:], ignore_labels=[0]), Accuracy(name='accuracy_o', label_names=test_set.classes[0], ignore_labels=list(range(1, len(test_set.classes)))), PRF1(label_names=test_set.classes) ] val_metrics = [ Accuracy(label_names=test_set.classes), mx.metric.TopKAccuracy(5, label_names=test_set.classes), Accuracy(name='accuracy_no', label_names=test_set.classes[1:], ignore_labels=[0]), Accuracy(name='accuracy_o', label_names=test_set.classes[0], ignore_labels=list(range(1, len(test_set.classes)))), PRF1(label_names=test_set.classes) ] test_metrics = [ Accuracy(label_names=test_set.classes), mx.metric.TopKAccuracy(5, label_names=test_set.classes), Accuracy(name='accuracy_no', label_names=test_set.classes[1:], ignore_labels=[0]), Accuracy(name='accuracy_o', label_names=test_set.classes[0], ignore_labels=list(range(1, len(test_set.classes)))), PRF1(label_names=test_set.classes) ] # Setup Loss/es loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() if FLAGS.temp_pool not in ['max', 'mean']: model = train_model(model, train_set, train_data, metrics, val_set, val_data, val_metrics, trainer, loss_fn, start_epoch, ctx, tb_sw) # model training complete, test it if FLAGS.temp_pool not in ['max', 'mean']: mod_path = os.path.join('models', 'vision', 'experiments', FLAGS.model_id) else: mod_path = os.path.join('models', 'vision', 'experiments', FLAGS.feats_model) best_score = -1 best_epoch = -1 with open(os.path.join(mod_path, 'scores.txt'), 'r') as f: lines = f.readlines() lines = [line.rstrip().split() for line in lines] for ep, sc in lines: if float(sc) > best_score: best_epoch = int(ep) best_score = float(sc) logging.info('Testing best model from Epoch %d with score of %f' % (best_epoch, best_score)) model.load_parameters( os.path.join(mod_path, "{:04d}.params".format(best_epoch))) logging.info('Loaded model params: {}'.format( os.path.join(mod_path, "{:04d}.params".format(best_epoch)))) if FLAGS.temp_pool in ['max', 'mean']: assert FLAGS.backbone_from_id or FLAGS.feats_model # if we doing temporal pooling ensure that we have loaded a pretrained net model = TemporalPooling(model, pool=FLAGS.temp_pool, num_classes=0, feats=FLAGS.feats_model != None) tic = time.time() _ = test_model(model, test_data, test_set, test_metrics, ctx, vis=FLAGS.vis) if FLAGS.temp_pool not in ['max', 'mean']: str_ = 'Train set:' for i in range(len(train_set.classes)): str_ += '\n' for j in range(len(train_set.classes)): str_ += str(metrics[4].mat[i, j]) + '\t' print(str_) str_ = 'Test set:' for i in range(len(test_set.classes)): str_ += '\n' for j in range(len(test_set.classes)): str_ += str(test_metrics[4].mat[i, j]) + '\t' print(str_) str_ = '[Finished] ' for metric in test_metrics: result = metric.get() if not isinstance(result, list): result = [result] for res in result: str_ += ', Test_{}={:.3f}'.format(res[0], res[1]) metric.reset() str_ += ' # Samples: {}, Time Taken: {:.1f}'.format( len(test_set), time.time() - tic) logging.info(str_)
args = parser.parse_args() ctx = mx.gpu() num_outputs = 10 jitter_param = 0.4 lighting_param = 0.1 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] training_transformer = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), transforms.Normalize(mean, std) ]) validation_transformer = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean, std) ]) mean_img = mx.nd.stack(*[mx.nd.full((224, 224), m) for m in mean]) std_img = mx.nd.stack(*[mx.nd.full((224, 224), s) for s in std])
def data_augmenting(self, config: Configuration, dataset_path): jitter_param = config.jitter_param lighting_param = config.lighting_param batch_size = config.batch_size * max(len(config.gpus_count), 1) num_workers = config.num_workers if config.data_augmenting: transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) else: transform_train = transforms.Compose([ transforms.Resize(size=(224, 224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) ################################################################################ # With the data augmentation functions, we can define our data loaders: # todo use variable for path # path = dataset_path train_path = os.path.join(path, 'train') val_path = os.path.join(path, 'val') test_path = os.path.join(path, 'test') train_data = gluon.data.DataLoader( gluon.data.vision.ImageFolderDataset(train_path).transform_first( transform_train), batch_size=batch_size, shuffle=True, num_workers=num_workers) val_data = gluon.data.DataLoader(gluon.data.vision.ImageFolderDataset( val_path).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) test_data = gluon.data.DataLoader(gluon.data.vision.ImageFolderDataset( test_path).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_data, val_data, test_data
def get_data_loader(data_dir, batch_size, num_workers): normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) jitter_param = 0.4 lighting_param = 0.1 input_size = opt.input_size crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875 resize = int(math.ceil(input_size / crop_ratio)) def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) return data, label transform_train = [] if opt.auto_aug: print('Using AutoAugment') from autogluon.utils.augment import AugmentationBlock, autoaug_imagenet_policies transform_train.append( AugmentationBlock(autoaug_imagenet_policies())) from gluoncv.utils.transforms import EfficientNetRandomCrop, EfficientNetCenterCrop from autogluon.utils import pil_transforms if input_size >= 320: transform_train.extend([ EfficientNetRandomCrop(input_size), pil_transforms.Resize((input_size, input_size), interpolation=Image.BICUBIC), pil_transforms.RandomHorizontalFlip(), pil_transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), transforms.RandomLighting(lighting_param), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) else: transform_train.extend([ transforms.RandomResizedCrop(input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) transform_train = transforms.Compose(transform_train) train_data = gluon.data.DataLoader(imagenet.classification.ImageNet( data_dir, train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) if input_size >= 320: transform_test = transforms.Compose([ pil_transforms.ToPIL(), EfficientNetCenterCrop(input_size), pil_transforms.Resize((input_size, input_size), interpolation=Image.BICUBIC), pil_transforms.ToNDArray(), transforms.ToTensor(), normalize ]) else: transform_test = transforms.Compose([ transforms.Resize(resize, keep_ratio=True), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize ]) val_data = gluon.data.DataLoader(imagenet.classification.ImageNet( data_dir, train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_data, val_data, batch_fn
def main(_argv): os.makedirs(os.path.join('models', 'captioning', 'experiments', FLAGS.model_id), exist_ok=True) if FLAGS.num_gpus > 0: # only supports 1 GPU ctx = mx.gpu() else: ctx = mx.cpu() # Set up logging logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) log_file_path = os.path.join('models', 'captioning', 'experiments', FLAGS.model_id, 'log.txt') log_dir = os.path.dirname(log_file_path) if log_dir and not os.path.exists(log_dir): os.makedirs(log_dir) fh = logging.FileHandler(log_file_path) logger.addHandler(fh) key_flags = FLAGS.get_key_flags_for_module(sys.argv[0]) logging.info('\n'.join(f.serialize() for f in key_flags)) # set up tensorboard summary writer tb_sw = SummaryWriter(log_dir=os.path.join(log_dir, 'tb'), comment=FLAGS.model_id) # are we using features or do we include the CNN? if FLAGS.feats_model is None: backbone_net = get_model(FLAGS.backbone, pretrained=True, ctx=ctx).features cnn_model = FrameModel(backbone_net, 11) # hardcoded the number of classes if FLAGS.backbone_from_id: if os.path.exists( os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id)): files = os.listdir( os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id)) files = [f for f in files if f[-7:] == '.params'] if len(files) > 0: files = sorted(files, reverse=True) # put latest model first model_name = files[0] cnn_model.load_parameters(os.path.join( 'models', 'vision', 'experiments', FLAGS.backbone_from_id, model_name), ctx=ctx) logging.info('Loaded backbone params: {}'.format( os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id, model_name))) else: raise FileNotFoundError('{}'.format( os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id))) if FLAGS.freeze_backbone: for param in cnn_model.collect_params().values(): param.grad_req = 'null' cnn_model = TimeDistributed(cnn_model.backbone) src_embed = cnn_model transform_train = transforms.Compose([ transforms.RandomResizedCrop(FLAGS.data_shape), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), transforms.RandomLighting(0.1), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) transform_test = transforms.Compose([ transforms.Resize(FLAGS.data_shape + 32), transforms.CenterCrop(FLAGS.data_shape), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) else: from mxnet.gluon import nn # need to do this to force no use of Embedding on src src_embed = nn.HybridSequential(prefix='src_embed_') with src_embed.name_scope(): src_embed.add(nn.Dropout(rate=0.0)) transform_train = None transform_test = None # setup the data data_train = TennisSet(split='train', transform=transform_train, captions=True, max_cap_len=FLAGS.tgt_max_len, every=FLAGS.every, feats_model=FLAGS.feats_model) data_val = TennisSet(split='val', transform=transform_test, captions=True, vocab=data_train.vocab, every=FLAGS.every, inference=True, feats_model=FLAGS.feats_model) data_test = TennisSet(split='test', transform=transform_test, captions=True, vocab=data_train.vocab, every=FLAGS.every, inference=True, feats_model=FLAGS.feats_model) val_tgt_sentences = data_val.get_captions(split=True) test_tgt_sentences = data_test.get_captions(split=True) write_sentences( val_tgt_sentences, os.path.join('models', 'captioning', 'experiments', FLAGS.model_id, 'val_gt.txt')) write_sentences( test_tgt_sentences, os.path.join('models', 'captioning', 'experiments', FLAGS.model_id, 'test_gt.txt')) # load embeddings for tgt_embed if FLAGS.emb_file: word_embs = nlp.embedding.TokenEmbedding.from_file( file_path=os.path.join('data', FLAGS.emb_file)) data_train.vocab.set_embedding(word_embs) input_dim, output_dim = data_train.vocab.embedding.idx_to_vec.shape tgt_embed = gluon.nn.Embedding(input_dim, output_dim) tgt_embed.initialize(ctx=ctx) tgt_embed.weight.set_data(data_train.vocab.embedding.idx_to_vec) else: tgt_embed = None # setup the model encoder, decoder = get_gnmt_encoder_decoder( cell_type=FLAGS.cell_type, hidden_size=FLAGS.num_hidden, dropout=FLAGS.dropout, num_layers=FLAGS.num_layers, num_bi_layers=FLAGS.num_bi_layers) model = NMTModel(src_vocab=None, tgt_vocab=data_train.vocab, encoder=encoder, decoder=decoder, embed_size=FLAGS.emb_size, prefix='gnmt_', src_embed=src_embed, tgt_embed=tgt_embed) model.initialize(init=mx.init.Uniform(0.1), ctx=ctx) static_alloc = True model.hybridize(static_alloc=static_alloc) logging.info(model) start_epoch = 0 if os.path.exists( os.path.join('models', 'captioning', 'experiments', FLAGS.model_id)): files = os.listdir( os.path.join('models', 'captioning', 'experiments', FLAGS.model_id)) files = [f for f in files if f[-7:] == '.params'] if len(files) > 0: files = sorted(files, reverse=True) # put latest model first model_name = files[0] if model_name == 'valid_best.params': model_name = files[1] start_epoch = int(model_name.split('.')[0]) + 1 model.load_parameters(os.path.join('models', 'captioning', 'experiments', FLAGS.model_id, model_name), ctx=ctx) logging.info('Loaded model params: {}'.format( os.path.join('models', 'captioning', 'experiments', FLAGS.model_id, model_name))) # setup the beam search translator = BeamSearchTranslator(model=model, beam_size=FLAGS.beam_size, scorer=nlp.model.BeamSearchScorer( alpha=FLAGS.lp_alpha, K=FLAGS.lp_k), max_length=FLAGS.tgt_max_len + 100) logging.info('Use beam_size={}, alpha={}, K={}'.format( FLAGS.beam_size, FLAGS.lp_alpha, FLAGS.lp_k)) # setup the loss function loss_function = MaskedSoftmaxCELoss() loss_function.hybridize(static_alloc=static_alloc) # run the training train(data_train, data_val, data_test, model, loss_function, val_tgt_sentences, test_tgt_sentences, translator, start_epoch, ctx, tb_sw)
def main(): opt = parse_args() batch_size = opt.batch_size classes = 10 # Init transformer # See https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/data/data_augmentation.html jitter_param = 0.4 transform_train = transforms.Compose([ transforms.Resize(32), transforms.RandomResizedCrop((32, 32), scale=(0.8, 1.0), ratio=(0.9, 1.1)), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param, hue=jitter_param), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test_viz = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), ]) dataset = opt.dataset if dataset == 'cifar10': dataset_train = gluon.data.vision.CIFAR10(train=True) dataset_test = gluon.data.vision.CIFAR10(train=False) elif dataset == 'cifar100': dataset_train = gluon.data.vision.CIFAR100(train=True, fine_label=True) dataset_test = gluon.data.vision.CIFAR100(train=False, fine_label=True) else: print("Dataset: {} is unknow".format(dataset)) triplet_dataset_train = TripletDataset(dataset_train, transform=transform_train) triplet_dataset_train_loader = gluon.data.DataLoader( triplet_dataset_train, batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=opt.num_workers) dataset_test_loader = gluon.data.DataLoader( dataset_test.transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=opt.num_workers) # TODO : Try normalizing but failed so we will loop through val set again to get data without normalization dataset_test_loader_2 = gluon.data.DataLoader( dataset_test.transform_first(transform_test_viz), batch_size=batch_size, shuffle=False, num_workers=opt.num_workers) print("Number of train sample: {}".format(len(triplet_dataset_train))) print("Number of val sample: {}".format(len(dataset_test))) num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] model_name = opt.model if model_name.startswith('cifar_wideresnet'): kwargs = { 'classes': classes, 'drop_rate': opt.drop_rate, 'pretrained': False, 'ctx': context } else: kwargs = {'classes': classes, 'pretrained': False, 'ctx': context} net = get_model(model_name, **kwargs) tripletnet = TripletNet(net.features) tripletnet.hybridize() tripletnet.initialize(mx.init.Xavier(), ctx=context) if opt.resume_from: tripletnet.load_parameters(opt.resume_from, ctx=context) # Note: Copy parameters from net into siamese. This will make training unconvergeble.... # else: # net_params = net.collect_params() # siamesenet_params = siamesenet.collect_params() # for p1, p2 in zip(net_params.values(), siamesenet_params.values()): # p2.set_data(p1.data()) save_period = opt.save_period if opt.save_dir and save_period: save_dir = os.path.join(opt.save_dir, "params") log_dir = os.path.join(opt.save_dir, "logs") else: save_dir = 'params' log_dir = 'logs' save_period = 0 makedirs(save_dir) makedirs(log_dir) def test(val_data, val_data_2, ctx, epoch): embedding = None labels = None images = None initialized = False for i, (data, label) in enumerate(val_data): if i >= 20: # only fetch the first 20 batches of images break data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(label, ctx_list=ctx, batch_axis=0) outputs = [tripletnet.get_feature(X) for X in data] outputs = mx.nd.concat(*outputs, dim=0) label = mx.nd.concat(*label, dim=0) if initialized: embedding = mx.nd.concat(*(embedding, outputs), dim=0) labels = mx.nd.concat(*(labels, label), dim=0) else: embedding = outputs labels = label initialized = True for i, (data, _) in enumerate(val_data_2): data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0) data = mx.nd.concat(*data, dim=0) if images is None: images = data else: images = mx.nd.concat(*(images, data), dim=0) with SummaryWriter(logdir=log_dir) as sw: sw.add_embedding(tag='{}_tripletnet_{}'.format(opt.dataset, epoch), embedding=embedding, labels=labels, images=images) def train(train_data, val_data, epochs, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] tripletnet.forward(mx.nd.ones((1, 3, 32, 32), ctx=ctx[0]), mx.nd.ones((1, 3, 32, 32), ctx=ctx[0]), mx.nd.ones((1, 3, 32, 32), ctx=ctx[0])) with SummaryWriter(logdir=log_dir, verbose=False) as sw: sw.add_graph(tripletnet) trainer = gluon.Trainer(tripletnet.collect_params(), 'adam', {'learning_rate': 0.001}) # Init contrastive loss loss_fn = gluon.loss.TripletLoss(margin=6) global_step = 0 for epoch in range(epochs): train_loss = 0 num_batch = len(train_data) tbar = tqdm(train_data) for i, batch in enumerate(tbar): batch_loss = 0 img = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) img_pos = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) img_neg = gluon.utils.split_and_load(batch[2], ctx_list=ctx, batch_axis=0) with ag.record(): output = [ tripletnet(x1, x2, x3) for x1, x2, x3 in zip(img, img_pos, img_neg) ] loss = [loss_fn(x1, x2, x3) for x1, x2, x3 in output] for l in loss: l.backward() batch_loss += l.mean().asscalar() trainer.step(batch_size) train_loss += sum([l.sum().asscalar() for l in loss]) global_step += batch_size with SummaryWriter(logdir=log_dir, verbose=False) as sw: sw.add_scalar(tag="BatchLoss", value=batch_loss, global_step=global_step) train_loss /= batch_size * num_batch with SummaryWriter(logdir=log_dir, verbose=False) as sw: sw.add_scalar(tag="TrainLoss", value=train_loss, global_step=global_step) if save_period and save_dir and (epoch + 1) % save_period == 0: # Test on first device test(val_data, dataset_test_loader_2, ctx, epoch) tripletnet.save_parameters('{}/{}-{}.params'.format( save_dir, model_name, epoch)) if save_period and save_dir: tripletnet.save_parameters('{}/{}-{}.params'.format( save_dir, model_name, epochs - 1)) train(triplet_dataset_train_loader, dataset_test_loader, opt.num_epochs, context)
def __getitem__(self, index): """Returns a single training item from the dataset as a dictionary. Values correspond to mxnet NDArray. Keys in the dictionary are either strings or tuples: ("color", <frame_id>, <scale>) for raw colour images, ("color_aug", <frame_id>, <scale>) for augmented colour images, ("K", scale) or ("inv_K", scale) for camera intrinsics, "stereo_T" for camera extrinsics, and "depth_gt" for ground truth depth maps. <frame_id> is either: an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index', or "s" for the opposite image in the stereo pair. <scale> is an integer representing the scale of the image relative to the full-size image: -1 images at native resolution as loaded from disk 0 images resized to (self.width, self.height ) 1 images resized to (self.width // 2, self.height // 2) 2 images resized to (self.width // 4, self.height // 4) 3 images resized to (self.width // 8, self.height // 8) """ inputs = {} do_color_aug = False # self.is_train and random.random() > 0.5 do_flip = self.is_train and random.random() > 0.5 line = self.filenames[index].split() folder = line[0] if len(line) == 3: frame_index = int(line[1]) else: frame_index = 0 if len(line) == 3: side = line[2] else: side = None for i in self.frame_idxs: if i == "s": other_side = {"r": "l", "l": "r"}[side] inputs[("color", i, -1)] = self.get_color( folder, frame_index, other_side, do_flip) else: inputs[("color", i, -1)] = self.get_color( folder, frame_index + i, side, do_flip) # adjusting intrinsics to match each scale in the pyramid for scale in range(self.num_scales): K = self.K.copy() K[0, :] *= self.width // (2 ** scale) K[1, :] *= self.height // (2 ** scale) inv_K = np.linalg.pinv(K) inputs[("K", scale)] = mx.nd.array(K) inputs[("inv_K", scale)] = mx.nd.array(inv_K) if do_color_aug: color_aug = transforms.RandomColorJitter( self.brightness, self.contrast, self.saturation, self.hue) else: color_aug = (lambda x: x) self.preprocess(inputs, color_aug) for i in self.frame_idxs: del inputs[("color", i, -1)] del inputs[("color_aug", i, -1)] if self.load_depth: depth_gt = self.get_depth(folder, frame_index, side, do_flip) inputs["depth_gt"] = np.expand_dims(depth_gt, 0) inputs["depth_gt"] = mx.nd.array(inputs["depth_gt"].astype(np.float32)) if "s" in self.frame_idxs: stereo_T = np.eye(4, dtype=np.float32) baseline_sign = -1 if do_flip else 1 side_sign = -1 if side == "l" else 1 stereo_T[0, 3] = side_sign * baseline_sign * 0.1 inputs["stereo_T"] = mx.nd.array(stereo_T) return inputs
def create_loader(self): """ Overwrite the data loader function :return: pairwised data loader, None, eval source loader, test target loader """ cpus = cpu_count() train_tforms, eval_tforms = [transforms.Resize(self.args.resize) ], [transforms.Resize(self.args.resize)] if self.args.random_crop: train_tforms.append( transforms.RandomResizedCrop(self.args.size, scale=(0.8, 1.2))) else: train_tforms.append(transforms.CenterCrop(self.args.size)) eval_tforms.append(transforms.CenterCrop(self.args.size)) if self.args.flip: train_tforms.append(transforms.RandomFlipLeftRight()) if self.args.random_color: train_tforms.append( transforms.RandomColorJitter(self.args.color_jitter, self.args.color_jitter, self.args.color_jitter, 0.1)) train_tforms.extend([ transforms.ToTensor(), transforms.Normalize(self.args.mean, self.args.std) ]) eval_tforms.extend([ transforms.ToTensor(), transforms.Normalize(self.args.mean, self.args.std) ]) train_tforms = transforms.Compose(train_tforms) eval_tforms = transforms.Compose(eval_tforms) if 'digits' in self.args.cfg: trs_set, tes_set, tet_set = self.create_digits_datasets( train_tforms, eval_tforms) elif 'office' in self.args.cfg: trs_set, tes_set, tet_set = self.create_office_datasets( train_tforms, eval_tforms) elif 'visda' in self.args.cfg: trs_set, tes_set, tet_set = self.create_visda_datasets( train_tforms, eval_tforms) else: raise NotImplementedError self.train_src_loader = DataLoader(trs_set, self.args.bs, shuffle=True, num_workers=cpus) self.test_src_loader = DataLoader(tes_set, self.args.bs, shuffle=False, num_workers=cpus) self.test_tgt_loader = DataLoader(tet_set, self.args.bs, shuffle=False, num_workers=cpus)
def get_dataset(path=None, train=True, name=None, input_size=224, crop_ratio=0.875, jitter_param=0.4, *args, **kwargs): """ Method to produce image classification dataset for AutoGluon, can either be a :class:`ImageFolderDataset`, :class:`RecordDataset`, or a popular dataset already built into AutoGluon ('mnist', 'cifar10', 'cifar100', 'imagenet'). Parameters ---------- name : str, optional Which built-in dataset to use, will override all other options if specified. The options are ('mnist', 'cifar', 'cifar10', 'cifar100', 'imagenet') train : bool, default = True Whether this dataset should be used for training or validation. path : str The training data location. If using :class:`ImageFolderDataset`, image folder`path/to/the/folder` should be provided. If using :class:`RecordDataset`, the `path/to/*.rec` should be provided. input_size : int The input image size. crop_ratio : float Center crop ratio (for evaluation only) Returns ------- Dataset object that can be passed to `task.fit()`, which is actually an :class:`autogluon.space.AutoGluonObject`. To interact with such an object yourself, you must first call `Dataset.init()` to instantiate the object in Python. """ resize = int(math.ceil(input_size / crop_ratio)) if isinstance(name, str) and name.lower() in built_in_datasets: return get_built_in_dataset(name, train=train, input_size=input_size, *args, **kwargs) if '.rec' in path: transform = transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(0.1), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if train else transforms.Compose([ transforms.Resize(resize), transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) dataset = RecordDataset(path, *args, **kwargs) dataset.transform_first(transform) else: # PIL Data Augmentation for users from Mac OSX transform = Compose([ RandomResizedCrop(input_size), RandomHorizontalFlip(), ColorJitter(0.4, 0.4, 0.4), ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if train else Compose([ Resize(resize), CenterCrop(input_size), ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) dataset = ImageFolderDataset(path, transform=transform, *args, **kwargs) return dataset.init()
def train_indoor(args, config, reporter): vars(args).update(config) np.random.seed(args.seed) random.seed(args.seed) mx.random.seed(args.seed) # Set Hyper-params batch_size = args.batch_size * max(args.num_gpus, 1) ctx = [mx.gpu(i) for i in range(args.num_gpus)] if args.num_gpus > 0 else [mx.cpu()] # Define DataLoader train_path = os.path.join(args.data, 'train') test_path = os.path.join(args.data, 'val') jitter_param = 0.4 lighting_param = 0.1 normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ]) transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) train_data = gluon.data.DataLoader(gluon.data.vision.ImageFolderDataset( train_path).transform_first(transform_train), batch_size=batch_size, shuffle=True, num_workers=args.num_workers) test_data = gluon.data.DataLoader(gluon.data.vision.ImageFolderDataset( test_path).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=args.num_workers) # Load model architecture and Initialize the net with pretrained model finetune_net = get_model(args.model, pretrained=True) with finetune_net.name_scope(): finetune_net.fc = nn.Dense(args.classes) finetune_net.fc.initialize(init.Xavier(), ctx=ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() # Define trainer trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', { 'learning_rate': args.lr, 'momentum': args.momentum, 'wd': args.wd }) L = gluon.loss.SoftmaxCrossEntropyLoss() metric = mx.metric.Accuracy() def train(epoch): if epoch == args.lr_step: trainer.set_learning_rate(trainer.learning_rate * args.lr_factor) for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) with ag.record(): outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] for l in loss: l.backward() trainer.step(batch_size) mx.nd.waitall() def test(): test_loss = 0 for i, batch in enumerate(test_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] test_loss += sum([l.mean().asscalar() for l in loss]) / len(loss) metric.update(label, outputs) _, test_acc = metric.get() test_loss /= len(test_data) reporter(mean_loss=test_loss, mean_accuracy=test_acc) for epoch in range(1, args.epochs + 1): train(epoch) test()
else: net.initialize(ctx=ctx) epoch_start=0 net.hybridize(static_alloc=True, static_shape=True) # ZoomZoom!! # Data augmentation definitions transform_train = transforms.Compose([ # Randomly crop an area, and then resize it to be 32x32 transforms.RandomResizedCrop(opt.crop_size,scale=(0.6,1.)),# test also with 0.6 # Randomly flip the image horizontally/vertically transforms.RandomFlipLeftRight(), transforms.RandomFlipTopBottom(), # Randomly jitter the brightness, contrast and saturation of the image transforms.RandomColorJitter(brightness=0.9, contrast=0.9, saturation=0.9), #NEW hue # Transpose the image from height*width*num_channels to num_channels*height*width # and map values from [0, 255] to [0,1] #transforms.RandomGray(p=0.35), # Random gray scale NEW transforms.ToTensor(), transforms.RandomRotation(angle_limits=(-90,90),zoom_in=True), # Random rotation # Normalize the image with mean and standard deviation calculated across all images transforms.Normalize([11.663384, 10.260227, 7.65015 ], [21.421959, 18.044296, 15.494861]) ]) transform_test = transforms.Compose([ transforms.Resize(opt.crop_size), transforms.ToTensor(), transforms.Normalize([11.663384, 10.260227, 7.65015 ], [21.421959, 18.044296, 15.494861]) ])