def main(): if opt.builtin_profiler > 0: profiler.set_config(profile_all=True, aggregate_stats=True) profiler.set_state('run') if opt.mode == 'symbolic': data = mx.sym.var('data') if opt.dtype == 'float16': data = mx.sym.Cast(data=data, dtype=np.float16) out = net(data) if opt.dtype == 'float16': out = mx.sym.Cast(data=out, dtype=np.float32) softmax = mx.sym.SoftmaxOutput(out, name='softmax') mod = mx.mod.Module(softmax, context=context) train_data, val_data = get_data_iters(dataset, batch_size, opt) mod.fit(train_data, eval_data=val_data, num_epoch=opt.epochs, kvstore=kv, batch_end_callback = mx.callback.Speedometer(batch_size, max(1, opt.log_interval)), epoch_end_callback = mx.callback.do_checkpoint('image-classifier-%s'% opt.model), optimizer = 'sgd', optimizer_params = {'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum, 'multi_precision': True}, initializer = mx.init.Xavier(magnitude=2)) mod.save_parameters('image-classifier-%s-%d-final.params'%(opt.model, opt.epochs)) else: if opt.mode == 'hybrid': net.hybridize() train(opt, context) if opt.builtin_profiler > 0: profiler.set_state('stop') print(profiler.dumps())
def test_profile_create_domain_dept(): profiler.set_config(profile_symbolic=True, filename='test_profile_create_domain_dept.json') profiler.set_state('run') domain = profiler.Domain(name='PythonDomain') print("Domain created: {}".format(str(domain))) profiler.dump_profile() profiler.set_state('stop')
def __init__(self, opts, ctx): self._opts = opts self._epochs = opts.epochs self._batch_size = opts.batch_size self._ctx = ctx self._chkpt_interval = opts.chkpt_interval self._log_interval = opts.log_interval self._weight_interval = opts.weight_interval self._profile = opts.profile self._epoch_tick = 0 self._batch_tick = 0 self._networks = [] self._overwrite = opts.overwrite self._outdir = opts.outdir or os.path.join(os.getcwd(), '{}-{}e-{}'.format(self.model_name(), self._epochs, datetime.now().strftime('%y_%m_%d-%H_%M'))) self._outdir = os.path.expanduser(self._outdir) self._outlogs = os.path.join(self._outdir, 'logs') self._outchkpts = os.path.join(self._outdir, 'checkpoints') self._outsounds = os.path.join(self._outdir, 'sounds') self._prepare_outdir() if self._profile: self._outprofile = os.path.join(self._outdir, 'profile.json') profiler.set_config(profile_all=True, aggregate_stats=True, filename=self._outprofile) logging.basicConfig() self._logger = logging.getLogger() self._logger.setLevel(logging.INFO)
def test_profile_create_domain_dept(): profiler.set_config(profile_symbolic=True, filename='test_profile_create_domain_dept.json') profiler.set_state('run') domain = profiler.Domain(name='PythonDomain') profiler.dump() profiler.set_state('stop')
def main(): if opt.builtin_profiler > 0: profiler.set_config(profile_all=True, aggregate_stats=True) profiler.set_state('run') if opt.mode == 'symbolic': data = mx.sym.var('data') out = net(data) softmax = mx.sym.SoftmaxOutput(out, name='softmax') mod = mx.mod.Module(softmax, context=[mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]) kv = mx.kv.create(opt.kvstore) train_data, val_data = get_data_iters(dataset, batch_size, kv.num_workers, kv.rank) mod.fit(train_data, eval_data = val_data, num_epoch=opt.epochs, kvstore=kv, batch_end_callback = mx.callback.Speedometer(batch_size, max(1, opt.log_interval)), epoch_end_callback = mx.callback.do_checkpoint('image-classifier-%s'% opt.model), optimizer = 'sgd', optimizer_params = {'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum, 'multi_precision': True}, initializer = mx.init.Xavier(magnitude=2)) mod.save_params('image-classifier-%s-%d-final.params'%(opt.model, opt.epochs)) else: if opt.mode == 'hybrid': net.hybridize() train(opt, context) if opt.builtin_profiler > 0: profiler.set_state('stop') print(profiler.dumps())
def main(): if opt.builtin_profiler > 0: profiler.set_config(profile_all=True, aggregate_stats=True) profiler.set_state('run') if opt.mode == 'hybrid': net.hybridize() train(opt, device) if opt.builtin_profiler > 0: profiler.set_state('stop') print(profiler.dumps())
def main(): if opt.builtin_profiler > 0: profiler.set_config(profile_all=True, aggregate_stats=True) profiler.set_state('run') if opt.mode == 'hybrid': net.hybridize(static_alloc = True, static_shape = True) train(opt, context) if opt.builtin_profiler > 0: profiler.set_state('stop') print(profiler.dumps())
def enable_profiler(profile_filename, run=True, continuous_dump=False, aggregate_stats=False): profiler.set_config(profile_symbolic=True, profile_imperative=True, profile_memory=True, profile_api=True, filename=profile_filename, continuous_dump=continuous_dump, aggregate_stats=aggregate_stats) if run is True: profiler.set_state('run')
def main(): if opt.builtin_profiler > 0: profiler.set_config(profile_all=True, aggregate_stats=True) profiler.set_state('run') if opt.mode == 'symbolic': train_symbolic(opt, context) else: train(opt, context) if opt.builtin_profiler > 0: profiler.set_state('stop') print(profiler.dumps())
def enable_profiler(profile_filename, run=True, continuous_dump=False, aggregate_stats=False): profiler.set_config(profile_symbolic=True, profile_imperative=True, profile_memory=True, profile_api=True, filename=profile_filename, continuous_dump=continuous_dump, aggregate_stats=aggregate_stats ) print('profile file save to {}'.format(profile_filename)) if run is True: profiler.set_state('run')
def enable_profiler(run=True, continuous_dump=False, aggregate_stats=False): profile_filename = 'test_profile.json' profiler.set_config(profile_symbolic=True, profile_imperative=True, profile_memory=True, profile_api=True, filename=profile_filename, continuous_dump=continuous_dump, aggregate_stats=aggregate_stats) print('profile file save to {}'.format(profile_filename)) if run is True: profiler.set_state('run')
def main(): if opt.builtin_profiler > 0: profiler.set_config(profile_all=True, aggregate_stats=True) profiler.set_state('run') if opt.mode == 'symbolic': data = mx.sym.var('data') out = net(data) softmax = mx.sym.SoftmaxOutput(out, name='softmax') mod = mx.mod.Module(softmax, context=context) kv = mx.kv.create(opt.kvstore) eval_metric = [] eval_metric.append(mx.metric.create('acc')) eval_metric.append(mx.metric.create('top_k_accuracy', top_k=5)) eval_metric.append(mx.metric.create('ce')) train_data, val_data = get_data_iters(dataset, batch_size, kv.num_workers, kv.rank) mod.fit(train_data, eval_data=val_data, eval_metric=eval_metric, num_epoch=opt.epochs, kvstore=kv, batch_end_callback=mx.callback.Speedometer( batch_size, max(1, opt.log_interval)), epoch_end_callback=mx.callback.do_checkpoint( 'image-classifier-%s' % opt.model), optimizer='sgd', optimizer_params={ 'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum, 'multi_precision': True }, initializer=mx.init.Xavier(magnitude=2)) mod.save_params('image-classifier-%s-%d-final.params' % (opt.model, opt.epochs)) else: if opt.mode == 'hybrid': net.hybridize() train(opt, context) if opt.builtin_profiler > 0: profiler.set_state('stop') print(profiler.dumps())
def main(): if opt.builtin_profiler > 0: profiler.set_config(profile_all=True, aggregate_stats=True) profiler.set_state("run") if opt.mode == "symbolic": data = mx.sym.var("data") if opt.dtype == "float16": data = mx.sym.Cast(data=data, dtype=np.float16) out = net(data) if opt.dtype == "float16": out = mx.sym.Cast(data=out, dtype=np.float32) softmax = mx.sym.SoftmaxOutput(out, name="softmax") mod = mx.mod.Module(softmax, context=context) train_data, val_data = get_data_iters(dataset, batch_size, opt) mod.fit( train_data, eval_data=val_data, num_epoch=opt.epochs, kvstore=kv, batch_end_callback=mx.callback.Speedometer( batch_size, max(1, opt.log_interval)), epoch_end_callback=mx.callback.do_checkpoint( "image-classifier-%s" % opt.model), optimizer="sgd", optimizer_params={ "learning_rate": opt.lr, "wd": opt.wd, "momentum": opt.momentum, "multi_precision": True, }, initializer=mx.init.Xavier(magnitude=2), ) mod.save_parameters("image-classifier-%s-%d-final.params" % (opt.model, opt.epochs)) else: if opt.mode == "hybrid": net.hybridize() train(opt, context) if opt.builtin_profiler > 0: profiler.set_state("stop") print(profiler.dumps())
def cpp_profile_it(*args, **kwargs): # Profile the operation profiler.set_config(profile_all=True, aggregate_stats=True) profiler.set_state('run') res = func(*args, **kwargs) profiler.set_state('stop') # Prepare the results profiler_dump = profiler.dumps(reset=True) # args[0] is assumed to be operator name, if not found check for block name. # NOTE: This parameter should be removed when we get away from parsing # profiler output and start using new profiler APIs - get_summary(), reset() if len(args) > 0: operator_name = args[0].__name__ elif 'block' in kwargs: operator_name = kwargs['block']._op_name else: raise ValueError("Unable to identify operator name to extract profiler output!") # Get the MXNet profile output profiler_output = parse_profiler_dump(operator_name, profiler_dump) return res, profiler_output
# Horovod: fetch and broadcast parameters params = model.collect_params() if params is not None: hvd.broadcast_parameters(params, root_rank=0) # Horovod: create DistributedTrainer, a subclass of gluon.Trainer trainer = hvd.DistributedTrainer(params, opt) # Create loss function and train metric loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() metric = mx.metric.Accuracy() # Set profiler profiler.set_config(profile_all=True, aggregate_stats=True, filename="profile_mx_mnist.json") # Train model for epoch in range(args.epochs): tic = time.time() train_data.reset() metric.reset() for nbatch, batch in enumerate(train_data, start=1): # Start and pause profiling if nbatch == 100: if epoch == 0: profiler.set_state('run') else: profiler.resume() elif nbatch == 200:
import sys import os import math import mxnet as mx import time import psutil import gc from mxnet import profiler from util import * profiler.set_config(profile_all=True, aggregate_stats=True, continuous_dump=True, filename='profile_output.json') def cpuStats(): # print(sys.version) # print(psutil.cpu_percent()) # print(psutil.virtual_memory()) # physical memory usage pid = os.getpid() py = psutil.Process(pid) memoryUse = py.memory_info()[0] / 2. ** 30 # memory use in GB...I think # print('memory GB:', memoryUse) return memoryUse jitter_param = 0.4 lighting_param = 0.1 mean_rgb = [123.68, 116.779, 103.939]
import mxnet as mx from mxnet import nd from mxnet import profiler profiler.set_config(profile_all=True, aggregate_stats=True, filename='cpu_gpu_data_copy_profiler_output.json') # Create a large Tensor on CPU data1 = nd.random.uniform(shape=(10000, 10000), ctx=mx.cpu()) data2 = nd.random.uniform(shape=(10000, 10000), ctx=mx.cpu()) nd.waitall() # Profiler copying data and operation only profiler.set_state('run') # Copy data to GPU data1.as_in_context(context=mx.gpu(0)) data2.as_in_context(context=mx.gpu(0)) # Do couple of operations on GPU res = data1 + data2 #res = nd.mean(res) # Copy result back to CPU res_cpu = res.as_in_context(context=mx.cpu()) nd.waitall() profiler.set_state('stop') print(profiler.dumps()) profiler.dump()
def train_net(net, config, check_flag, logger, sig_state, sig_pgbar, sig_table): print(config) # config = Configs() # matplotlib.use('Agg') # import matplotlib.pyplot as plt sig_pgbar.emit(-1) mx.random.seed(1) matplotlib.use('Agg') import matplotlib.pyplot as plt classes = 10 num_epochs = config.train_cfg.epoch batch_size = config.train_cfg.batchsize optimizer = config.lr_cfg.optimizer lr = config.lr_cfg.lr num_gpus = config.train_cfg.gpu batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = config.data_cfg.worker warmup = config.lr_cfg.warmup if config.lr_cfg.decay == 'cosine': lr_sch = lr_scheduler.CosineScheduler((50000//batch_size)*num_epochs, base_lr=lr, warmup_steps=warmup * (50000//batch_size), final_lr=1e-5) else: lr_sch = lr_scheduler.FactorScheduler((50000//batch_size)*config.lr_cfg.factor_epoch, factor=config.lr_cfg.factor, base_lr=lr, warmup_steps=warmup*(50000//batch_size)) model_name = config.net_cfg.name if config.data_cfg.mixup: model_name += '_mixup' if config.train_cfg.amp: model_name += '_amp' base_dir = './'+model_name if os.path.exists(base_dir): base_dir = base_dir + '-' + \ time.strftime("%m-%d-%H.%M.%S", time.localtime()) makedirs(base_dir) if config.save_cfg.tensorboard: logdir = base_dir+'/tb/'+model_name if os.path.exists(logdir): logdir = logdir + '-' + \ time.strftime("%m-%d-%H.%M.%S", time.localtime()) sw = SummaryWriter(logdir=logdir, flush_secs=5, verbose=False) cmd_file = open(base_dir+'/tb.bat', mode='w') cmd_file.write('tensorboard --logdir=./') cmd_file.close() save_period = 10 save_dir = base_dir+'/'+'params' makedirs(save_dir) plot_name = base_dir+'/'+'plot' makedirs(plot_name) stat_name = base_dir+'/'+'stat.txt' csv_name = base_dir+'/'+'data.csv' if os.path.exists(csv_name): csv_name = base_dir+'/'+'data-' + \ time.strftime("%m-%d-%H.%M.%S", time.localtime())+'.csv' csv_file = open(csv_name, mode='w', newline='') csv_writer = csv.writer(csv_file) csv_writer.writerow(['Epoch', 'train_loss', 'train_acc', 'valid_loss', 'valid_acc', 'lr', 'time']) logging_handlers = [logging.StreamHandler(), logger] logging_handlers.append(logging.FileHandler( '%s/train_cifar10_%s.log' % (model_name, model_name))) logging.basicConfig(level=logging.INFO, handlers=logging_handlers) logging.info(config) if config.train_cfg.amp: amp.init() if config.save_cfg.profiler: profiler.set_config(profile_all=True, aggregate_stats=True, continuous_dump=True, filename=base_dir+'/%s_profile.json' % model_name) is_profiler_run = False trans_list = [] imgsize = config.data_cfg.size if config.data_cfg.crop: trans_list.append(gcv_transforms.RandomCrop( 32, pad=config.data_cfg.crop_pad)) if config.data_cfg.cutout: trans_list.append(CutOut(config.data_cfg.cutout_size)) if config.data_cfg.flip: trans_list.append(transforms.RandomFlipLeftRight()) if config.data_cfg.erase: trans_list.append(gcv_transforms.block.RandomErasing(s_max=0.25)) trans_list.append(transforms.Resize(imgsize)) trans_list.append(transforms.ToTensor()) trans_list.append(transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])) transform_train = transforms.Compose(trans_list) transform_test = transforms.Compose([ transforms.Resize(imgsize), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) def label_transform(label, classes): ind = label.astype('int') res = nd.zeros((ind.shape[0], classes), ctx=label.context) res[nd.arange(ind.shape[0], ctx=label.context), ind] = 1 return res def test(ctx, val_data): metric = mx.metric.Accuracy() loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() num_batch = len(val_data) test_loss = 0 for i, batch in enumerate(val_data): data = gluon.utils.split_and_load( batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load( batch[1], ctx_list=ctx, batch_axis=0) outputs = [net(X) for X in data] loss = [loss_fn(yhat, y) for yhat, y in zip(outputs, label)] metric.update(label, outputs) test_loss += sum([l.sum().asscalar() for l in loss]) test_loss /= batch_size * num_batch name, val_acc = metric.get() return name, val_acc, test_loss def train(epochs, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if config.train_cfg.param_init: init_func = getattr(mx.init, config.train_cfg.init) net.initialize(init_func(), ctx=ctx, force_reinit=True) else: net.load_parameters(config.train_cfg.param_file, ctx=ctx) summary(net, stat_name, nd.uniform( shape=(1, 3, imgsize, imgsize), ctx=ctx[0])) # net = nn.HybridBlock() net.hybridize() root = config.dir_cfg.dataset train_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10( root=root, train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10( root=root, train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) trainer_arg = {'learning_rate': config.lr_cfg.lr, 'wd': config.lr_cfg.wd, 'lr_scheduler': lr_sch} extra_arg = eval(config.lr_cfg.extra_arg) trainer_arg.update(extra_arg) trainer = gluon.Trainer(net.collect_params(), optimizer, trainer_arg) if config.train_cfg.amp: amp.init_trainer(trainer) metric = mx.metric.Accuracy() train_metric = mx.metric.RMSE() loss_fn = gluon.loss.SoftmaxCrossEntropyLoss( sparse_label=False if config.data_cfg.mixup else True) train_history = TrainingHistory(['training-error', 'validation-error']) # acc_history = TrainingHistory(['training-acc', 'validation-acc']) loss_history = TrainingHistory(['training-loss', 'validation-loss']) iteration = 0 best_val_score = 0 # print('start training') sig_state.emit(1) sig_pgbar.emit(0) # signal.emit('Training') for epoch in range(epochs): tic = time.time() train_metric.reset() metric.reset() train_loss = 0 num_batch = len(train_data) alpha = 1 for i, batch in enumerate(train_data): if epoch == 0 and iteration == 1 and config.save_cfg.profiler: profiler.set_state('run') is_profiler_run = True if epoch == 0 and iteration == 1 and config.save_cfg.tensorboard: sw.add_graph(net) lam = np.random.beta(alpha, alpha) if epoch >= epochs - 20 or not config.data_cfg.mixup: lam = 1 data_1 = gluon.utils.split_and_load( batch[0], ctx_list=ctx, batch_axis=0) label_1 = gluon.utils.split_and_load( batch[1], ctx_list=ctx, batch_axis=0) if not config.data_cfg.mixup: data = data_1 label = label_1 else: data = [lam*X + (1-lam)*X[::-1] for X in data_1] label = [] for Y in label_1: y1 = label_transform(Y, classes) y2 = label_transform(Y[::-1], classes) label.append(lam*y1 + (1-lam)*y2) with ag.record(): output = [net(X) for X in data] loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)] if config.train_cfg.amp: with ag.record(): with amp.scale_loss(loss, trainer) as scaled_loss: ag.backward(scaled_loss) # scaled_loss.backward() else: for l in loss: l.backward() trainer.step(batch_size) train_loss += sum([l.sum().asscalar() for l in loss]) output_softmax = [nd.SoftmaxActivation(out) for out in output] train_metric.update(label, output_softmax) metric.update(label_1, output_softmax) name, acc = train_metric.get() if config.save_cfg.tensorboard: sw.add_scalar(tag='lr', value=trainer.learning_rate, global_step=iteration) if epoch == 0 and iteration == 1 and config.save_cfg.profiler: nd.waitall() profiler.set_state('stop') profiler.dump() iteration += 1 sig_pgbar.emit(iteration) if check_flag()[0]: sig_state.emit(2) while(check_flag()[0] or check_flag()[1]): if check_flag()[1]: print('stop') return else: time.sleep(5) print('pausing') epoch_time = time.time() - tic train_loss /= batch_size * num_batch name, acc = train_metric.get() _, train_acc = metric.get() name, val_acc, _ = test(ctx, val_data) # if config.data_cfg.mixup: # train_history.update([acc, 1-val_acc]) # plt.cla() # train_history.plot(save_path='%s/%s_history.png' % # (plot_name, model_name)) # else: train_history.update([1-train_acc, 1-val_acc]) plt.cla() train_history.plot(save_path='%s/%s_history.png' % (plot_name, model_name)) if val_acc > best_val_score: best_val_score = val_acc net.save_parameters('%s/%.4f-cifar-%s-%d-best.params' % (save_dir, best_val_score, model_name, epoch)) current_lr = trainer.learning_rate name, val_acc, val_loss = test(ctx, val_data) logging.info('[Epoch %d] loss=%f train_acc=%f train_RMSE=%f\n val_acc=%f val_loss=%f lr=%f time: %f' % (epoch, train_loss, train_acc, acc, val_acc, val_loss, current_lr, epoch_time)) loss_history.update([train_loss, val_loss]) plt.cla() loss_history.plot(save_path='%s/%s_loss.png' % (plot_name, model_name), y_lim=(0, 2), legend_loc='best') if config.save_cfg.tensorboard: sw._add_scalars(tag='Acc', scalar_dict={'train_acc': train_acc, 'test_acc': val_acc}, global_step=epoch) sw._add_scalars(tag='Loss', scalar_dict={'train_loss': train_loss, 'test_loss': val_loss}, global_step=epoch) sig_table.emit([epoch, train_loss, train_acc, val_loss, val_acc, current_lr, epoch_time]) csv_writer.writerow([epoch, train_loss, train_acc, val_loss, val_acc, current_lr, epoch_time]) csv_file.flush() if save_period and save_dir and (epoch + 1) % save_period == 0: net.save_parameters('%s/cifar10-%s-%d.params' % (save_dir, model_name, epoch)) if save_period and save_dir: net.save_parameters('%s/cifar10-%s-%d.params' % (save_dir, model_name, epochs-1)) train(num_epochs, context) if config.save_cfg.tensorboard: sw.close() for ctx in context: ctx.empty_cache() csv_file.close() logging.shutdown() reload(logging) sig_state.emit(0)
model_prefix, load_epoch) # initialize the module mod = mx.module.Module(symbol=sym, context=ctx, data_names=['user', 'item'], label_names=['score']) mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) # get the sparse weight parameter mod.set_params(arg_params=arg_params, aux_params=aux_params) # profile profiler.set_config(profile_all=True, aggregate_stats=True, filename='profile_neumf.json') profiler.set_state('run') if benchmark: logging.info('Evaluating...') (hits, ndcgs) = evaluate_model(mod, testRatings, testNegatives, topK, evaluation_threads) hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean() print('HR = %.4f, NDCG = %.4f' % (hr, ndcg)) logging.info('Evaluating completed') profiler.set_state('stop') else: logging.info('Inference started ...') nbatch = 0 tic = time()
import mxnet as mx from mxnet import autograd from mxnet import profiler #################### Set Profiler Config ###################### profiler.set_config(profile_all=True, aggregate_stats=True, filename='cpu_mnist_cnn_profile_output.json') ############################################################### # Build Network from mxnet import gluon net = gluon.nn.HybridSequential() with net.name_scope(): net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu')) net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2)) net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation='relu')) net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2)) net.add(gluon.nn.Flatten()) net.add(gluon.nn.Dense(512, activation="relu")) net.add(gluon.nn.Dense(10)) from mxnet.gluon.data.vision import transforms train_data = gluon.data.DataLoader( gluon.data.vision.MNIST(train=True).transform_first(transforms.ToTensor()), batch_size=64, shuffle=True) # Set Context ctx = mx.cpu()
def train(net, train_data, val_data, eval_metric, batch_size, ctx, args): """Training pipeline""" print("rank:{}, training...".format( kv.rank)) if "perseus" in args.kv_store else None if args.profiler == "1": # profiler config profiler.set_config(profile_all=True, aggregate_stats=True, continuous_dump=True, filename='profile_output_{}.json'.format( kv.rank if "perseus" in args.kv_store else 0)) net.collect_params().setattr('grad_req', 'null') net.collect_train_params().setattr('grad_req', 'write') optimizer_params = { 'learning_rate': args.lr, 'wd': args.wd, 'momentum': args.momentum } if args.amp: optimizer_params['multi_precision'] = True if args.horovod: hvd.broadcast_parameters(net.collect_params(), root_rank=0) trainer = hvd.DistributedTrainer( net.collect_train_params( ), # fix batchnorm, fix first stage, etc... 'sgd', optimizer_params) else: trainer = gluon.Trainer( net.collect_train_params( ), # fix batchnorm, fix first stage, etc... 'sgd', optimizer_params, update_on_kvstore=None, kvstore=kv) #(False if args.amp else None), kvstore=kv) if args.amp: amp.init_trainer(trainer) # lr decay policy lr_decay = float(args.lr_decay) lr_steps = sorted( [float(ls) for ls in args.lr_decay_epoch.split(',') if ls.strip()]) lr_warmup = float(args.lr_warmup) # avoid int division # TODO(zhreshold) losses? rpn_cls_loss = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss( from_sigmoid=False) rpn_box_loss = mx.gluon.loss.HuberLoss( rho=args.rpn_smoothl1_rho) # == smoothl1 rcnn_cls_loss = mx.gluon.loss.SoftmaxCrossEntropyLoss() rcnn_box_loss = mx.gluon.loss.HuberLoss( rho=args.rcnn_smoothl1_rho) # == smoothl1 metrics = [ mx.metric.Loss('RPN_Conf'), mx.metric.Loss('RPN_SmoothL1'), mx.metric.Loss('RCNN_CrossEntropy'), mx.metric.Loss('RCNN_SmoothL1'), ] rpn_acc_metric = RPNAccMetric() rpn_bbox_metric = RPNL1LossMetric() rcnn_acc_metric = RCNNAccMetric() rcnn_bbox_metric = RCNNL1LossMetric() metrics2 = [ rpn_acc_metric, rpn_bbox_metric, rcnn_acc_metric, rcnn_bbox_metric ] # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) log_file_path = args.save_prefix + '_train.log' log_dir = os.path.dirname(log_file_path) if log_dir and not os.path.exists(log_dir): os.makedirs(log_dir) fh = logging.FileHandler(log_file_path) logger.addHandler(fh) if args.custom_model: logger.info( 'Custom model enabled. Expert Only!! Currently non-FPN model is not supported!!' ' Default setting is for MS-COCO.') logger.info(args) if args.verbose: logger.info('Trainable parameters:') logger.info(net.collect_train_params().keys()) logger.info('Start training from [Epoch {}]'.format(args.start_epoch)) best_map = [0] for epoch in range(args.start_epoch, args.epochs): mix_ratio = 1.0 if not args.disable_hybridization: net.hybridize(static_alloc=args.static_alloc) rcnn_task = ForwardBackwardTask(net, trainer, rpn_cls_loss, rpn_box_loss, rcnn_cls_loss, rcnn_box_loss, mix_ratio=1.0) if "perseus" in args.kv_store: args.executor_threads = 1 executor = Parallel(args.executor_threads, rcnn_task) if ( not args.horovod and "perseus" not in args.kv_store) else None if args.mixup: # TODO(zhreshold) only support evenly mixup now, target generator needs to be modified otherwise train_data._dataset._data.set_mixup(np.random.uniform, 0.5, 0.5) mix_ratio = 0.5 if epoch >= args.epochs - args.no_mixup_epochs: train_data._dataset._data.set_mixup(None) mix_ratio = 1.0 while lr_steps and epoch >= lr_steps[0]: new_lr = trainer.learning_rate * lr_decay lr_steps.pop(0) trainer.set_learning_rate(new_lr) logger.info("[Epoch {}] Set learning rate to {}".format( epoch, new_lr)) for metric in metrics: metric.reset() tic = time.time() btic = time.time() base_lr = trainer.learning_rate rcnn_task.mix_ratio = mix_ratio if args.profiler == "1": # profiler 1 profiler.set_state('run') for i, batch in enumerate(train_data): if epoch == 0 and i <= lr_warmup: # adjust based on real percentage new_lr = base_lr * get_lr_at_iter(i / lr_warmup, args.lr_warmup_factor) if new_lr != trainer.learning_rate: if i % args.log_interval == 0: logger.info( '[Epoch 0 Iteration {}] Set learning rate to {}'. format(i, new_lr)) trainer.set_learning_rate(new_lr) batch = split_and_load(batch, ctx_list=ctx) metric_losses = [[] for _ in metrics] add_losses = [[] for _ in metrics2] if executor is not None: for data in zip(*batch): executor.put(data) for j in range(len(ctx)): if executor is not None: result = executor.get() else: result = rcnn_task.forward_backward(list(zip(*batch))[0]) if (not args.horovod) or hvd.rank() == 0: for k in range(len(metric_losses)): metric_losses[k].append(result[k]) for k in range(len(add_losses)): add_losses[k].append(result[len(metric_losses) + k]) for metric, record in zip(metrics, metric_losses): metric.update(0, record) for metric, records in zip(metrics2, add_losses): for pred in records: metric.update(pred[0], pred[1]) trainer.step(batch_size) # update metrics if ((not args.horovod) or hvd.rank() == 0) and args.log_interval \ and not (i + 1) % args.log_interval: msg = ','.join([ '{}={:.3f}'.format(*metric.get()) for metric in metrics + metrics2 ]) cur_rank = kv.rank if "perseus" in args.kv_store else 0 logger.info( 'rank:{}, [Epoch {}][Batch {}], Speed: {:.3f} samples/sec, {}' .format( cur_rank, epoch, i, args.log_interval * batch_size / (time.time() - btic), msg)) btic = time.time() if i >= 100 and args.profiler == "1": profiler.set_state('stop') print(profiler.dumps()) break if ((not args.horovod) and ("perseus" not in args.kv_store)) or ( args.horovod and hvd.rank() == 0) or ( ("perseus" in args.kv_store) and kv.rank == 0): # perseus #if (not args.horovod) or hvd.rank() == 0: msg = ','.join( ['{}={:.3f}'.format(*metric.get()) for metric in metrics]) logger.info('[Epoch {}] Training cost: {:.3f}, {}'.format( epoch, (time.time() - tic), msg)) if not (epoch + 1) % args.val_interval: # consider reduce the frequency of validation to save time map_name, mean_ap = validate(net, val_data, ctx, eval_metric, args) val_msg = '\n'.join( ['{}={}'.format(k, v) for k, v in zip(map_name, mean_ap)]) logger.info('[Epoch {}] Validation: \n{}'.format( epoch, val_msg)) current_map = float(mean_ap[-1]) else: current_map = 0. save_params(net, logger, best_map, current_map, epoch, args.save_interval, args.save_prefix) mx.nd.waitall()
def train(net, train_data, val_data, eval_metric, ctx, args): """Training pipeline""" net.collect_params().setattr('grad_req', 'null') net.collect_train_params().setattr('grad_req', 'write') rescale_factor = float( cfg.GENERAL.FP16_RESCALE_FACTOR) if cfg.GENERAL.FP16 else None trainer = gluon.Trainer( net.collect_train_params(), # fix batchnorm, fix first stage, etc... 'sgd', { 'learning_rate': cfg.TRAIN.BASE_LR, 'wd': cfg.TRAIN.WEIGHT_DECAY, 'momentum': cfg.TRAIN.MOMENTUM, 'clip_gradient': 5, 'multi_precision': cfg.GENERAL.FP16, 'rescale_grad': 1.0 / cfg.GENERAL.FP16_RESCALE_FACTOR if cfg.GENERAL.FP16 else 1.0 }) # lr decay policy lr_steps = cfg.AUTO.LR_DECAY_EPOCH lr_warmup = float(cfg.TRAIN.LR_WARMUP) # avoid int division # TODO(zhreshold) losses? rpn_cls_loss = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss( from_sigmoid=False, weight=rescale_factor) rpn_box_loss = mx.gluon.loss.HuberLoss( rho=1 / 9., weight=rescale_factor) # i.e. smoothl1 rcnn_cls_loss = mx.gluon.loss.SoftmaxCrossEntropyLoss( weight=rescale_factor) rcnn_box_loss = mx.gluon.loss.HuberLoss( weight=rescale_factor) # i.e. smoothl1 metrics = [ mx.metric.Loss('RPN_Conf'), mx.metric.Loss('RPN_SmoothL1'), mx.metric.Loss('RCNN_CrossEntropy'), mx.metric.Loss('RCNN_SmoothL1'), ] metrics2 = [ RPNAccMetric(), RPNL1LossMetric(), RCNNAccMetric(), RCNNL1LossMetric() ] logger.info("Trainable parameters: ------------------------------------------\n" + \ pprint.pformat(net.collect_train_params().keys(), indent=1, width=100, compact=True)) logger.info('LR Schedule [Epochs {} - {}].'.format( cfg.AUTO.LR_DECAY_EPOCH, [ cfg.TRAIN.BASE_LR * cfg.TRAIN.LR_DECAY_FACTOR**i for i in range(len(cfg.AUTO.LR_DECAY_EPOCH)) ])) logger.info('Start training from [Epoch {}] to [Epoch {}].'.format( cfg.TRAIN.START_EPOCH, cfg.AUTO.END_EPOCH)) best_map = [0] steps_per_epoch = cfg.TRAIN.STEPS_PER_EPOCH if cfg.TRAIN.STEPS_PER_EPOCH else len( train_data) for epoch in range(cfg.TRAIN.START_EPOCH, cfg.AUTO.END_EPOCH + 1): mix_ratio = 1.0 if cfg.TRAIN.MODE_MIXUP: # TODO(zhreshold) only support evenly mixup now, target generator needs to be modified otherwise train_data._dataset.set_mixup(np.random.uniform, 0.5, 0.5) mix_ratio = 0.5 if epoch >= (cfg.AUTO.END_EPOCH + 1) - cfg.AUTO.NO_MIXUP_EPOCH: train_data._dataset.set_mixup(None) mix_ratio = 1.0 if lr_steps and epoch >= lr_steps[0]: while lr_steps and epoch >= lr_steps[0]: new_lr = trainer.learning_rate * cfg.TRAIN.LR_DECAY_FACTOR lr_steps.pop(0) trainer.set_learning_rate(new_lr) logger.info("[Epoch {}] Set learning rate to {}".format( epoch, new_lr)) for metric in metrics: metric.reset() tic = time.time() btic = time.time() if epoch == cfg.TRAIN.START_EPOCH or ( epoch - 1) % cfg.TRAIN.EVAL_INTERVAL == 0: net.hybridize(static_alloc=True) base_lr = trainer.learning_rate tbar = tqdm(train_data, total=steps_per_epoch) tbar.set_description_str("[ TRAIN ]") for i, batch in enumerate(tbar): i += 1 total_iter = (epoch - 1) * steps_per_epoch + i if total_iter <= lr_warmup: # adjust based on real percentage new_lr = base_lr * get_lr_at_iter(total_iter / lr_warmup) if new_lr != trainer.learning_rate: if total_iter % cfg.GENERAL.LOG_INTERVAL == 0: tqdm.write( '[Warm Up] Set learning rate to {}'.format(new_lr)) trainer.set_learning_rate(new_lr) batch = split_and_load( batch, ctx_list=ctx) # Split data to 1 batch each device. batch_size = len(batch[0]) losses = [] metric_losses = [[] for _ in metrics] add_losses = [[] for _ in metrics2] if args.profile and i == 10: profiler.set_config(profile_all=True, aggregate_stats=True, filename='profile_output.json') profiler.set_state('run') with autograd.record(): for data, label, rpn_cls_targets, rpn_box_targets, rpn_box_masks in zip( *batch): gt_label = label[:, :, 4:5] gt_box = label[:, :, :4] cls_pred, box_pred, roi, samples, matches, rpn_score, rpn_box, anchors = net( data, gt_box) # losses of rpn if cfg.GENERAL.FP16: rpn_score = rpn_score.astype('float32') rpn_box = rpn_box.astype('float32') rpn_cls_targets = rpn_cls_targets.astype('float32') rpn_box_targets = rpn_box_targets.astype('float32') rpn_box_masks = rpn_box_masks.astype('float32') rpn_score = rpn_score.squeeze(axis=-1) num_rpn_pos = (rpn_cls_targets >= 0).sum() rpn_loss1 = rpn_cls_loss( rpn_score, rpn_cls_targets, rpn_cls_targets >= 0) * rpn_cls_targets.size / num_rpn_pos rpn_loss2 = rpn_box_loss( rpn_box, rpn_box_targets, rpn_box_masks) * rpn_box.size / num_rpn_pos # rpn overall loss, use sum rather than average rpn_loss = rpn_loss1 + rpn_loss2 # generate targets for rcnn cls_targets, box_targets, box_masks = net.target_generator( roi, samples, matches, gt_label, gt_box) # losses of rcnn if cfg.GENERAL.FP16: cls_pred = cls_pred.astype('float32') box_pred = box_pred.astype('float32') cls_targets = cls_targets.astype('float32') box_targets = box_targets.astype('float32') box_masks = box_masks.astype('float32') num_rcnn_pos = (cls_targets >= 0).sum() rcnn_loss1 = rcnn_cls_loss( cls_pred, cls_targets, cls_targets >= 0 ) * cls_targets.size / cls_targets.shape[0] / num_rcnn_pos rcnn_loss2 = rcnn_box_loss( box_pred, box_targets, box_masks ) * box_pred.size / box_pred.shape[0] / num_rcnn_pos rcnn_loss = rcnn_loss1 + rcnn_loss2 # overall losses losses.append(rpn_loss.sum() * mix_ratio + rcnn_loss.sum() * mix_ratio) metric_losses[0].append(rpn_loss1.sum() * mix_ratio) metric_losses[1].append(rpn_loss2.sum() * mix_ratio) metric_losses[2].append(rcnn_loss1.sum() * mix_ratio) metric_losses[3].append(rcnn_loss2.sum() * mix_ratio) add_losses[0].append( [[rpn_cls_targets, rpn_cls_targets >= 0], [rpn_score]]) add_losses[1].append([[rpn_box_targets, rpn_box_masks], [rpn_box]]) add_losses[2].append([[cls_targets], [cls_pred]]) add_losses[3].append([[box_targets, box_masks], [box_pred]]) autograd.backward(losses) for metric, record in zip(metrics, metric_losses): metric.update(0, record) for metric, records in zip(metrics2, add_losses): for pred in records: metric.update(pred[0], pred[1]) trainer.step(batch_size) if args.profile: mx.nd.waitall() profiler.set_state('stop') # update metrics if cfg.GENERAL.LOG_INTERVAL and total_iter % cfg.GENERAL.LOG_INTERVAL == 0: msg = ','.join([ '{}={:.3f}'.format(*metric.get()) for metric in metrics + metrics2 ]) total_speed = cfg.GENERAL.LOG_INTERVAL * batch_size / ( time.time() - btic) speed = total_speed / batch_size # batch size rely on the gpu num. epoch_time_left = (steps_per_epoch - i + 1) / speed total_time_left = ( (cfg.AUTO.END_EPOCH - epoch) * steps_per_epoch - i + 1) / speed epoch_tl_h, epoch_tl_m, epoch_tl_s = sec_to_time( epoch_time_left) total_tl_h, total_tl_m, _ = sec_to_time(total_time_left) tqdm.write( '[Epoch {}][Batch {}], {:.3f}/{:0>2}h{:0>2}m{:0>2}s/{:0>2}h{:0>2}m, {}' .format(epoch, total_iter, total_speed, epoch_tl_h, epoch_tl_m, epoch_tl_s, total_tl_h, total_tl_m, msg)) btic = time.time() if cfg.TRAIN.STEPS_PER_EPOCH and i >= cfg.TRAIN.STEPS_PER_EPOCH: break tbar.close() msg = ','.join( ['{}={:.3f}'.format(*metric.get()) for metric in metrics]) logger.info('[Epoch {}] Training cost: {:.3f}s, {}'.format( epoch, (time.time() - tic), msg)) if epoch % cfg.TRAIN.EVAL_INTERVAL == 0: # consider reduce the frequency of validation to save time map_name, mean_ap = validate(net, val_data, ctx, eval_metric) val_msg = '\n'.join( ['{}={}'.format(k, v) for k, v in zip(map_name, mean_ap)]) logger.info('[Epoch {}] Validation: \n{}'.format(epoch, val_msg)) current_map = float(mean_ap[-1]) else: current_map = 0. save_params(net, logger, best_map, current_map, epoch, cfg.TRAIN.SAVE_INTERVAL, args.logdir)
import mxnet as mx from mxnet import nd, gluon, init, autograd from mxnet.gluon import nn from mxnet.gluon.data.vision import datasets, transforms import time from mxnet import profiler ctx = mx.gpu(0) profiler.set_config(profile_all=True, aggregate_stats=True, filename='gpu_fashion_mnist_profile_output.json') # Get Data mnist_train = datasets.FashionMNIST(train=True) text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot'] # Data preprocessing transformer = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(0.13, 0.31)]) mnist_train = mnist_train.transform_first(transformer) batch_size = 256 train_data = gluon.data.DataLoader( mnist_train, batch_size=batch_size, shuffle=True, num_workers=4) mnist_valid = gluon.data.vision.FashionMNIST(train=False) valid_data = gluon.data.DataLoader( mnist_valid.transform_first(transformer),
def main(): import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt opt = parse_args() batch_size = opt.batch_size classes = 10 num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers lr_sch = lr_scheduler.CosineScheduler((50000//batch_size)*opt.num_epochs, base_lr=opt.lr, warmup_steps=5*(50000//batch_size), final_lr=1e-5) # lr_sch = lr_scheduler.FactorScheduler((50000//batch_size)*20, # factor=0.2, base_lr=opt.lr, # warmup_steps=5*(50000//batch_size)) # lr_sch = LRScheduler('cosine',opt.lr, niters=(50000//batch_size)*opt.num_epochs,) model_name = opt.model net = SKT_Lite() # if model_name.startswith('cifar_wideresnet'): # kwargs = {'classes': classes, # 'drop_rate': opt.drop_rate} # else: # kwargs = {'classes': classes} # net = get_model(model_name, **kwargs) if opt.mixup: model_name += '_mixup' if opt.amp: model_name += '_amp' makedirs('./'+model_name) os.chdir('./'+model_name) sw = SummaryWriter( logdir='.\\tb\\'+model_name, flush_secs=5, verbose=False) makedirs(opt.save_plot_dir) if opt.resume_from: net.load_parameters(opt.resume_from, ctx=context) optimizer = 'nag' save_period = opt.save_period if opt.save_dir and save_period: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_period = 0 plot_name = opt.save_plot_dir logging_handlers = [logging.StreamHandler()] if opt.logging_dir: logging_dir = opt.logging_dir makedirs(logging_dir) logging_handlers.append(logging.FileHandler( '%s/train_cifar10_%s.log' % (logging_dir, model_name))) logging.basicConfig(level=logging.INFO, handlers=logging_handlers) logging.info(opt) if opt.amp: amp.init() if opt.profile_mode: profiler.set_config(profile_all=True, aggregate_stats=True, continuous_dump=True, filename='%s_profile.json' % model_name) transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), CutOut(8), # gcv_transforms.block.RandomErasing(s_max=0.25), transforms.RandomFlipLeftRight(), # transforms.RandomFlipTopBottom(), transforms.Resize(32), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) def label_transform(label, classes): ind = label.astype('int') res = nd.zeros((ind.shape[0], classes), ctx=label.context) res[nd.arange(ind.shape[0], ctx=label.context), ind] = 1 return res def test(ctx, val_data): metric = mx.metric.Accuracy() loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() num_batch = len(val_data) test_loss = 0 for i, batch in enumerate(val_data): data = gluon.utils.split_and_load( batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load( batch[1], ctx_list=ctx, batch_axis=0) outputs = [net(X) for X in data] loss = [loss_fn(yhat, y) for yhat, y in zip(outputs, label)] metric.update(label, outputs) test_loss += sum([l.sum().asscalar() for l in loss]) test_loss /= batch_size * num_batch name, val_acc = metric.get() return name, val_acc, test_loss def train(epochs, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] net.initialize(mx.init.MSRAPrelu(), ctx=ctx) root = os.path.join('..', 'datasets', 'cifar-10') train_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10( root=root, train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10( root=root, train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) trainer = gluon.Trainer(net.collect_params(), optimizer, {'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum, 'lr_scheduler': lr_sch}) if opt.amp: amp.init_trainer(trainer) metric = mx.metric.Accuracy() train_metric = mx.metric.RMSE() loss_fn = gluon.loss.SoftmaxCrossEntropyLoss( sparse_label=False if opt.mixup else True) train_history = TrainingHistory(['training-error', 'validation-error']) # acc_history = TrainingHistory(['training-acc', 'validation-acc']) loss_history = TrainingHistory(['training-loss', 'validation-loss']) iteration = 0 best_val_score = 0 for epoch in range(epochs): tic = time.time() train_metric.reset() metric.reset() train_loss = 0 num_batch = len(train_data) alpha = 1 for i, batch in enumerate(train_data): if epoch == 0 and iteration == 1 and opt.profile_mode: profiler.set_state('run') lam = np.random.beta(alpha, alpha) if epoch >= epochs - 20 or not opt.mixup: lam = 1 data_1 = gluon.utils.split_and_load( batch[0], ctx_list=ctx, batch_axis=0) label_1 = gluon.utils.split_and_load( batch[1], ctx_list=ctx, batch_axis=0) if not opt.mixup: data = data_1 label = label_1 else: data = [lam*X + (1-lam)*X[::-1] for X in data_1] label = [] for Y in label_1: y1 = label_transform(Y, classes) y2 = label_transform(Y[::-1], classes) label.append(lam*y1 + (1-lam)*y2) with ag.record(): output = [net(X) for X in data] loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)] if opt.amp: with ag.record(): with amp.scale_loss(loss, trainer) as scaled_loss: ag.backward(scaled_loss) # scaled_loss.backward() else: for l in loss: l.backward() trainer.step(batch_size) train_loss += sum([l.sum().asscalar() for l in loss]) output_softmax = [nd.SoftmaxActivation(out) for out in output] train_metric.update(label, output_softmax) metric.update(label_1, output_softmax) name, acc = train_metric.get() sw.add_scalar(tag='lr', value=trainer.learning_rate, global_step=iteration) if epoch == 0 and iteration == 1 and opt.profile_mode: nd.waitall() profiler.set_state('stop') iteration += 1 train_loss /= batch_size * num_batch name, acc = train_metric.get() _, train_acc = metric.get() name, val_acc, _ = test(ctx, val_data) if opt.mixup: train_history.update([acc, 1-val_acc]) plt.cla() train_history.plot(save_path='%s/%s_history.png' % (plot_name, model_name)) else: train_history.update([1-train_acc, 1-val_acc]) plt.cla() train_history.plot(save_path='%s/%s_history.png' % (plot_name, model_name)) # acc_history.update([train_acc, val_acc]) # plt.cla() # acc_history.plot(save_path='%s/%s_acc.png' % # (plot_name, model_name), legend_loc='best') if val_acc > best_val_score: best_val_score = val_acc net.save_parameters('%s/%.4f-cifar-%s-%d-best.params' % (save_dir, best_val_score, model_name, epoch)) current_lr = trainer.learning_rate name, val_acc, val_loss = test(ctx, val_data) loss_history.update([train_loss, val_loss]) plt.cla() loss_history.plot(save_path='%s/%s_loss.png' % (plot_name, model_name), y_lim=(0, 2), legend_loc='best') logging.info('[Epoch %d] loss=%f train_acc=%f train_RMSE=%f\n val_acc=%f val_loss=%f lr=%f time: %f' % (epoch, train_loss, train_acc, acc, val_acc, val_loss, current_lr, time.time()-tic)) sw._add_scalars(tag='Acc', scalar_dict={'train_acc': train_acc, 'test_acc': val_acc}, global_step=epoch) sw._add_scalars(tag='Loss', scalar_dict={'train_loss': train_loss, 'test_loss': val_loss}, global_step=epoch) if save_period and save_dir and (epoch + 1) % save_period == 0: net.save_parameters('%s/cifar10-%s-%d.params' % (save_dir, model_name, epoch)) if save_period and save_dir: net.save_parameters('%s/cifar10-%s-%d.params' % (save_dir, model_name, epochs-1)) if opt.mode == 'hybrid': net.hybridize() train(opt.num_epochs, context) if opt.profile_mode: profiler.dump(finished=False) sw.close()
def run(self): # Helper methods def get_random_lot(data_loader): return next(iter(data_loader)) # Data importing, pre-processing, and loading num_training_examples, num_testing_examples, train_data_lot_iterator, train_data_eval_iterator, test_data = self._load_data( ) # parameters calculated from loaded data self._num_training_examples = num_training_examples self._num_testing_examples = num_testing_examples self._hyperparams[ 'sample_fraction'] = self._lot_size / num_training_examples rounds_per_epoch = round(num_training_examples / self._lot_size) # Set up privacy accountant accountant = rdp_acct.anaRDPacct() # dpacct.anaCGFAcct() eps_sequence = [] # Network structure creation self._create_network_params() # Loss function loss_func = self._get_loss_func() # Optimization procedure trainer = self._optimizer(self._hyperparams, self._net, self._params, loss_func, self._model_ctx, accountant) # begin profiling if enabled if self._enable_mxnet_profiling: from mxnet import profiler profiler.set_config(profile_all=True, aggregate_stats=True, filename='profile_output.json') profiler.set_state('run') # Training sequence rounds = round(self._epochs * rounds_per_epoch) loss_sequence = [] current_epoch_loss = mx.nd.zeros(1, ctx=self._model_ctx) for t in range(1, rounds + 1): if self._verbose and self._print_epoch_status: # show current epoch progress epoch_number = 1 + (t - 1) // rounds_per_epoch epoch_progress = 1 + (t - 1) % rounds_per_epoch printProgressBar( epoch_progress, rounds_per_epoch, prefix='Epoch {} progress:'.format(epoch_number), length=50) if self._run_training: # prepare random lot of data for DPSGD step data, labels = get_random_lot(train_data_lot_iterator) data = data.as_in_context(self._model_ctx).reshape( (-1, 1, self._input_layer)) labels = labels.as_in_context(self._model_ctx) else: data, labels = [], [] # perform DPSGD step lot_mean_loss = trainer.step( data, labels, accumulate_privacy=self._accumulate_privacy, run_training=self._run_training) loss_sequence.append(lot_mean_loss) current_epoch_loss += lot_mean_loss # no need to continue running training if NaNs are present if not np.isfinite(lot_mean_loss): self._run_training = False if self._verbose: print("NaN loss on round {}.".format(t)) if self._params_not_finite(): self._run_training = False if self._verbose: print("Non-finite parameters on round {}.".format(t)) if self._accumulate_privacy and self._debugging: eps_sequence.append(accountant.get_eps(self._fixed_delta)) # print some stats after an "epoch" if t % rounds_per_epoch == 0: if self._verbose: print("Epoch {} (round {}) complete.".format( t / rounds_per_epoch, t)) if self._run_training: print("mean epoch loss: {}".format( current_epoch_loss.asscalar() * self._lot_size / self._num_training_examples)) if self._compute_epoch_accuracy: print("training accuracy: {}".format( self._evaluate_accuracy( train_data_eval_iterator))) print("testing accuracy: {}".format( self._evaluate_accuracy(test_data))) if self._accumulate_privacy and self._debugging: print("eps used: {}\n".format(eps_sequence[-1])) print() current_epoch_loss = mx.nd.zeros(1, ctx=self._model_ctx) # end profiling if enabled if self._enable_mxnet_profiling: mx.nd.waitall() profiler.set_state('stop') print(profiler.dumps()) # Make sure we don't report a bogus number if self._accumulate_privacy: final_eps = accountant.get_eps(self._fixed_delta) else: final_eps = -1 test_accuracy = self._evaluate_accuracy(test_data) if self._save_plots or self._debugging: self._create_and_save_plots(t, eps_sequence, loss_sequence, final_eps, test_accuracy) return final_eps, test_accuracy
# %% [markdown] # # Train # %% trainer1 = { k: gluon.Trainer(v.collect_params(), 'adagrad', {'clip_gradient': 1.25}) for (k, v) in net1.items() } trainer2 = gluon.Trainer(net2.collect_params(), 'adagrad', {'clip_gradient': 1.25}) loss = gluon.loss.L2Loss() # %% profiler.set_config(profile_all=True, profile_imperative=True, aggregate_stats=True, continuous_dump=True, filename='profile.json') # %% def train_model(dataiter, epoch): train_loss = 0 total_size = 0 for i, batch in enumerate(dataiter): with mx.autograd.record(): # iterate over the left and right question embs = [] data_lists = [] for k in range(2): embedding = [
def main(): data_p = Path('/storage/data/').resolve() checkpoint_p = Path('./checkpoints/').resolve() checkpoint_p.mkdir(parents=True, exist_ok=True) logs_p = Path('./logs/').resolve() shutil.rmtree(logs_p, ignore_errors=True) encoder = SevenPlaneEncoder((19, 19)) builder = SGFDatasetBuilder(data_p, encoder=encoder) builder.download_and_prepare() train_itr = builder.train_dataset(batch_size=BATCH_SIZE, max_worker=cpu_count(), factor=FACTOR) test_itr = builder.test_dataset(batch_size=BATCH_SIZE, max_worker=cpu_count(), factor=FACTOR) # build model betago = Model() # convert to half-presicion floating point FP16 # NOTE: all NVIDIA GPUs with compute capability 6.1 have a low-rate FP16 performance == FFP16 is not the fast path on these GPUs # data passed to split_and_load() must be float16 too #betago.cast('float16') # hybridize for speed betago.hybridize(static_alloc=True, static_shape=True) # print graph shape = (1, ) + encoder.shape() mx.viz.print_summary(betago(mx.sym.var('data')), shape={'data': shape}) # pin GPUs ctx = [mx.gpu(i) for i in range(GPU_COUNT)] # optimizer opt_params = { 'learning_rate': 0.001, 'beta1': 0.9, 'beta2': 0.999, 'epsilon': 1e-08 } opt = mx.optimizer.create('adam', **opt_params) # initialize parameters # MXNet initializes the weight matrices uniformly by drawing from [−0.07,0.07], bias parameters are all set to 0 # 'Xavier': initializer is designed to keep the scale of gradients roughly the same in all layers betago.initialize(mx.init.Xavier(magnitude=2.3), ctx=ctx, force_reinit=True) # fetch and broadcast parameters params = betago.collect_params() # trainer trainer = Trainer(params=params, optimizer=opt, kvstore='device') # loss function loss_fn = SoftmaxCrossEntropyLoss() # use accuracy as the evaluation metric metric = Accuracy() with mxb.SummaryWriter(logdir='./logs') as sw: # add graph to MXBoard #betago.forward(mx.nd.ones(shape, ctx=ctx[0])) #betago.forward(mx.nd.ones(shape, ctx=ctx[1])) #sw.add_graph(betago) profiler.set_config(profile_all=True, aggregate_stats=True, continuous_dump=True, filename='profile_output.json') start = time.perf_counter() # train for e in range(EPOCHS): if 0 == e: profiler.set_state('run') tick = time.time() # reset the train data iterator. train_itr.reset() # loop over the train data iterator for i, batch in enumerate(train_itr): if 0 == i: tick_0 = time.time() # splits train data into multiple slices along batch_axis # copy each slice into a context data = split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0, even_split=False) # splits train label into multiple slices along batch_axis # copy each slice into a context label = split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0, even_split=False) outputs = [] losses = [] # inside training scope with ag.record(): for x, y in zip(data, label): z = betago(x) # computes softmax cross entropy loss l = loss_fn(z, y) outputs.append(z) losses.append(l) # backpropagate the error for one iteration for l in losses: l.backward() # make one step of parameter update. # trainer needs to know the batch size of data # to normalize the gradient by 1/batch_size trainer.step(BATCH_SIZE) # updates internal evaluation metric.update(label, outputs) # Print batch metrics if 0 == i % PRINT_N and 0 < i: # checkpointing betago.save_parameters( str(checkpoint_p.joinpath( 'betago-{}.params'.format(e)))) sw.add_scalar(tag='Accuracy', value={'naive': metric.get()[1]}, global_step=i - PRINT_N) sw.add_scalar(tag='Speed', value={ 'naive': BATCH_SIZE * (PRINT_N) / (time.time() - tick) }, global_step=i - PRINT_N) print( 'epoch[{}] batch [{}], accuracy {:.4f}, samples/sec: {:.4f}' .format(e, i, metric.get()[1], BATCH_SIZE * (PRINT_N) / (time.time() - tick))) tick = time.time() if 0 == e: profiler.set_state('stop') profiler.dump() # gets the evaluation result print('epoch [{}], accuracy {:.4f}, samples/sec: {:.4f}'.format( e, metric.get()[1], BATCH_SIZE * (i + 1) / (time.time() - tick_0))) # reset evaluation result to initial state metric.reset() elapsed = time.perf_counter() - start print('elapsed: {:0.3f}'.format(elapsed)) # use Accuracy as the evaluation metric metric = Accuracy() for batch in test_itr: data = split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) outputs = [] for x in data: outputs.append(betago(x)) metric.update(label, outputs) print('validation %s=%f' % metric.get())
def train( args, model, train_sampler, valid_samplers=None, rank=0, rel_parts=None, barrier=None, ): assert args.num_proc <= 1, "MXNet KGE does not support multi-process now" assert (args.rel_part == False ), "No need for relation partition in single process for MXNet KGE" logs = [] for arg in vars(args): logging.info("{:20}:{}".format(arg, getattr(args, arg))) if len(args.gpu) > 0: gpu_id = (args.gpu[rank % len(args.gpu)] if args.mix_cpu_gpu and args.num_proc > 1 else args.gpu[0]) else: gpu_id = -1 if args.strict_rel_part: model.prepare_relation(mx.gpu(gpu_id)) if mxprofiler: from mxnet import profiler profiler.set_config( profile_all=True, aggregate_stats=True, continuous_dump=True, filename="profile_output.json", ) start = time.time() for step in range(0, args.max_step): pos_g, neg_g = next(train_sampler) args.step = step if step == 1 and mxprofiler: profiler.set_state("run") with mx.autograd.record(): loss, log = model.forward(pos_g, neg_g, gpu_id) loss.backward() logs.append(log) model.update(gpu_id) if step % args.log_interval == 0: for k in logs[0].keys(): v = sum(l[k] for l in logs) / len(logs) print("[Train]({}/{}) average {}: {}".format( step, args.max_step, k, v)) logs = [] print(time.time() - start) start = time.time() if (args.valid and step % args.eval_interval == 0 and step > 1 and valid_samplers is not None): start = time.time() test(args, model, valid_samplers, mode="Valid") print("test:", time.time() - start) if args.strict_rel_part: model.writeback_relation(rank, rel_parts) if mxprofiler: nd.waitall() profiler.set_state("stop") profiler.dump() print(profiler.dumps()) # clear cache logs = []
from mxnet import profiler import Graph from learners.IterativeLearner import learn_iterative from common import data_ctx, measure_time from feature_transformations import FeatureTransformation from feature_transformations.FeatureScalingTransformation import FeatureScalingTransformation from feature_transformations.KernelTransformation import KernelTransformation from feature_transformations.LinearConvolutionTransformation import LinearConvolutionTransformation from feature_transformations.PcaTransformation import PcaTransformation ############## PARAMETERS ################### from feature_transformations.RealFeatureScalingTransformation import RealFeatureScalingTransformation from learners.IterativeLogisticLearner import learn_iterative_logistic profiler.set_config(aggregate_stats=True, filename='profile_output.json') mx.random.seed(101) random.seed(101) def learn(graph, net_type, training_set, test_set, iterations_per_epoch, batch_size): def try_standard_approach(approach): def get_all_vertices(data_loader: DataLoader): res_X = [] for X, y in data_loader: for x in X: res_X.append(round(float(x.asscalar()))) return res_X
import logging import argparse import os import sys import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import mxnet as mx from models.E3DNet import create_m3d from lib.data import ClipBatchIter from mxnet import profiler train_list = ["fc", "comp_17", "comp_16", "comp_15", "comp_14", "softmax"] tmp_pool_list = ["final_fc", "softmax_label"] profiler.set_config(profile_all=True, aggregate_stats=True, filename='profile_output_m3d.json') def plot_schedule(schedule_fn, iterations=1500): # Iteration count starting at 1 iterations = [i + 1 for i in range(iterations)] lrs = [schedule_fn(i) for i in iterations] plt.scatter(iterations, lrs) plt.xlabel("Iteration") plt.ylabel("Learning Rate") #plt.savefig('learning_rate.png') def train(args): gpus = [int(i) for i in args.gpus.split(',')]