def test_sequence_grad(device_name, translator): device = chainer.get_device(device_name) device.use() seq_length = 4 batch_size = 2 n_units = 3 model = SequenceGrad(n_units) model.to_device(device) xs = aranges(device.xp, seq_length, batch_size, n_units) xs = [device.xp.array(x) for x in xs] expected_ys, expected_grads = _run_fwd_bwd(model, [xs]) model = chainer_compiler.compile(model, [xs], translator=translator) model.to_device(device) actual_ys, actual_grads = _run_fwd_bwd(model, [xs]) assert len(expected_ys) == len(actual_ys) for e, a in zip(expected_ys, actual_ys): e = _array(e) a = _array(a) assert _get_device(e) == _get_device(a) _assert_allclose(e, a, rtol=1e-4) assert len(expected_grads) == len(actual_grads) for (e_name, e_grad), (a_name, a_grad) in zip( expected_grads, actual_grads): assert e_name == a_name assert e_grad is not None, e_name assert a_grad is not None, a_name _assert_allclose(e_grad, a_grad, rtol=1e-4)
def test_sequence(device_name, translator): if translator == 'onnx_chainer': if device_name == 'native:0' or device_name == 'cuda:0': pytest.skip() device = chainer.get_device(device_name) device.use() model = Sequence() model.to_device(device) xs = [device.xp.array(i + 1, dtype=np.float32) for i in range(3)] expected = model(xs) model = chainer_compiler.compile(model, [xs]) model.to_device(device) xs = [device.xp.array(i + 1, dtype=np.float32) for i in range(3)] actual = model(xs) assert len(expected) == len(actual) for e, a in zip(expected, actual): e = _array(e) a = _array(a) assert _get_device(e) == _get_device(a) _assert_allclose(e, a)
def test_mnist(device_name, translator): if translator == 'onnx_chainer': if device_name == 'native:0' or device_name == 'cuda:0': pytest.skip() np.random.seed(40) if has_cupy: cupy.random.seed(40) batch_size = 3 in_size = 5 n_units = 4 n_out = 10 device = chainer.get_device(device_name) device.use() mlp = MLP(n_units, n_out) model = L.Classifier(mlp) model.to_device(device) input = np.random.rand(batch_size, in_size).astype(np.float32) input = device.xp.array(input) target = device.xp.array(np.random.randint(n_out, size=batch_size)) def run_model(model): model.cleargrads() loss = model(input, target) loss.grad = device.xp.ones(loss.shape, loss.dtype) loss.backward() grads = [] for name, param in sorted(model.namedparams()): name = name.replace('/mc', '') grads.append((name, chainer.backend.to_chx(param.grad))) loss = chainer.backend.to_chx(loss.array) return loss, grads expected_loss, expected_grads = _run_fwd_bwd(model, [input, target]) mlp_compiled = chainer_compiler.compile(mlp, [input], translator=translator) model = L.Classifier(mlp_compiled) model.to_device(device) actual_loss, actual_grads = _run_fwd_bwd(model, [input, target]) _assert_allclose(expected_loss, actual_loss) assert len(expected_grads) == len(actual_grads) for (e_name, e_grad), (a_name, a_grad) in zip( expected_grads, actual_grads): assert e_name == a_name assert e_grad is not None, e_name assert a_grad is not None, a_name chainerx.testing.assert_allclose(e_grad, a_grad, rtol=1e-4)
def test_const_mul(device_name, translator): device = chainer.get_device(device_name) device.use() # This checks if the default ChainerX device is set properly by # Constant op, whose result will be placed on the default device. model = ConstMul() model.to_device(device) inputs = [np.array(3, dtype=np.float32)] expected = model(*inputs) model = chainer_compiler.compile(model, inputs, translator=translator) model.to_device(device) actual = model(*inputs) e = _array(expected) a = _array(actual) assert _get_device(e) == _get_device(a) _assert_allclose(e, a)
def test_multi_in_outs(device_name, translator): device = chainer.get_device(device_name) device.use() model = MultiInOuts() model.to_device(device) inputs = [np.array(3, dtype=np.float32), np.array(39, dtype=np.float32)] expected = model(*inputs) model = chainer_compiler.compile(model, inputs, translator=translator) model.to_device(device) actual = model(*inputs) assert len(expected) == len(actual) for e, a in zip(expected, actual): e = _array(e) a = _array(a) assert _get_device(e) == _get_device(a) _assert_allclose(e, a)
def test_sequence(device_name): device = chainer.get_device(device_name) device.use() model = Sequence() model.to_device(device) xs = [device.xp.array(i + 1, dtype=np.float32) for i in range(3)] expected = model(xs) model = chainer_compiler.compile(model, [xs]) model.to_device(device) xs = [device.xp.array(i + 1, dtype=np.float32) for i in range(3)] actual = model(xs) assert len(expected) == len(actual) for e, a in zip(expected, actual): e = _array(e) a = _array(a) assert _get_device(e) == _get_device(a) _assert_allclose(e, a)
def test_partially_differentiable(device_name, translator): np.random.seed(40) device = chainer.get_device(device_name) device.use() n_units = 3 batch_size = 2 seq_length = 7 xs = aranges(device.xp, seq_length, batch_size, n_units) xs = [chainer.Variable(device.xp.array(x)) for x in xs] indices = [np.array(i, dtype=np.int32) for i in [2, 3, 5, 1]] model = PartiallyDifferentiable(n_units) model.to_device(device) expected_loss, expected_grads = _run_fwd_bwd(model, [xs, indices]) # expected_gxs = [x.grad for x in xs] xs = aranges(device.xp, seq_length, batch_size, n_units) xs = [chainer.Variable(device.xp.array(x)) for x in xs] model = chainer_compiler.compile(model, [xs, indices], translator=translator) model.to_device(device) actual_loss, actual_grads = _run_fwd_bwd(model, [xs, indices]) # actual_gxs = [x.grad for x in xs] chainerx.testing.assert_allclose(expected_loss, actual_loss, rtol=1e-5) assert len(expected_grads) == len(actual_grads) for (e_name, e_grad), (a_name, a_grad) in zip( expected_grads, actual_grads): assert e_name == a_name assert e_grad is not None, e_name assert a_grad is not None, a_name _assert_allclose(e_grad, a_grad, rtol=1e-4)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--compile', action='store_true', help='Compile the model') parser.add_argument('--dump_onnx', action='store_true', help='Dump ONNX model after optimization') parser.add_argument('--iterations', '-I', type=int, default=None, help='Number of iterations to train') parser.add_argument('--use-fake-data', action='store_true', help='Use fake data') parser.add_argument('--computation_order', type=str, default=None, help='Computation order in backpropagation') parser.add_argument('--use_unified_memory', dest='use_unified_memory', action='store_true', help='Use unified memory for large model') args = parser.parse_args() device = chainer.get_device(args.device) print('Device: {}'.format(device)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. mlp = MLP(args.unit, 10) if args.compile: if args.computation_order is None: translator = 'ch2o' else: translator = 'onnx_chainer' export_allocator = None runtime_allocator = None if args.use_unified_memory: import cupy # unified memory export_allocator = cupy.cuda.memory.malloc_managed runtime_allocator = cupy.get_default_memory_pool().malloc mlp = chainer_compiler.compile( mlp, dump_onnx=args.dump_onnx, translator=translator, computation_order=args.computation_order, export_allocator=export_allocator, runtime_allocator=runtime_allocator) model = L.Classifier(mlp) model.to_device(device) device.use() # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset if args.use_fake_data: train, test = fake_dataset() else: train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) if args.iterations: stop_trigger = (args.iterations, 'iteration') else: stop_trigger = (args.epoch, 'epoch') trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. # TODO(niboshi): Temporarily disabled for chainerx. Fix it. if device.xp is not chainerx: trainer.extend(extensions.DumpGraph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): archs = { 'alex': alex.Alex, 'alex_fp16': alex.AlexFp16, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, 'resnext50': resnext50.ResNeXt50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--iterations', '-I', type=int, default=0, help='Number of iterations to train') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) parser.add_argument('--dali', action='store_true') parser.set_defaults(dali=False) group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--compile', action='store_true', help='Compile the model') parser.add_argument('--dump_onnx', action='store_true', help='Dump ONNX model after optimization') args = parser.parse_args() chainer.config.autotune = True chainer.config.cudnn_fast_batch_normalization = True device = chainer.get_device(args.device) print('Device: {}'.format(device)) print('# Minibatch-size: {}'.format(args.batchsize)) if args.iterations: print('# iterations: {}'.format(args.iterations)) else: print('# epoch: {}'.format(args.epoch)) print('') # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from {}'.format(args.initmodel)) chainer.serializers.load_npz(args.initmodel, model) insize = model.insize if args.compile: model = chainer_compiler.compile(model, dump_onnx=args.dump_onnx) model.to_device(device) device.use() # Load the mean file mean = np.load(args.mean) if args.dali: if not dali_util._dali_available: raise RuntimeError('DALI seems not available on your system.') num_threads = args.loaderjob if num_threads is None or num_threads <= 0: num_threads = 1 ch_mean = list(np.average(mean, axis=(1, 2))) ch_std = [255.0, 255.0, 255.0] # Setup DALI pipelines train_pipe = dali_util.DaliPipelineTrain(args.train, args.root, insize, args.batchsize, num_threads, args.gpu, True, mean=ch_mean, std=ch_std) val_pipe = dali_util.DaliPipelineVal(args.val, args.root, insize, args.val_batchsize, num_threads, args.gpu, False, mean=ch_mean, std=ch_std) train_iter = chainer.iterators.DaliIterator(train_pipe) val_iter = chainer.iterators.DaliIterator(val_pipe, repeat=False) # converter = dali_converter converter = dali_util.DaliConverter(mean=mean, crop_size=insize) else: # Load the dataset files train = PreprocessedDataset(args.train, args.root, mean, insize) val = PreprocessedDataset(args.val, args.root, mean, insize, False) # These iterators load the images with subprocesses running in parallel # to the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) converter = dataset.concat_examples # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, converter=converter, device=device) if args.iterations: stop_trigger = (args.iterations, 'iteration') else: stop_trigger = (args.epoch, 'epoch') trainer = training.Trainer(updater, stop_trigger, args.out) val_interval = (1 if args.test else 100000), 'iteration' log_interval = ((1 if args.test else 10 if args.iterations else 1000), 'iteration') trainer.extend(extensions.Evaluator(val_iter, model, converter=converter, device=device), trigger=val_interval) # TODO(sonots): Temporarily disabled for chainerx. Fix it. if device.xp is not chainerx: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) cuda_hook = function_hooks.CUDAProfileHook() with cuda_hook: trainer.run() with open('%s/log' % args.out) as f: logs = json.load(f) elapsed_times = [] for prev, cur in zip(logs, logs[1:]): iters = cur['iteration'] - prev['iteration'] elapsed = cur['elapsed_time'] - prev['elapsed_time'] elapsed_times.append(elapsed / iters) sec_per_iter = sum(elapsed_times) / len(elapsed_times) print(sec_per_iter * 1000, 'msec/iter') print(args.batchsize / sec_per_iter, 'images/sec')