def test_no_backprop_mode(): with chainerx.backprop_scope('bp1') as bp1, \ chainerx.backprop_scope('bp2') as bp2: assert chainerx.is_backprop_required() assert chainerx.is_backprop_required(bp1) assert chainerx.is_backprop_required(bp2) with chainerx.no_backprop_mode(): assert not chainerx.is_backprop_required() assert not chainerx.is_backprop_required(bp1) assert not chainerx.is_backprop_required(bp2) assert chainerx.is_backprop_required() assert chainerx.is_backprop_required(bp1) assert chainerx.is_backprop_required(bp2) with chainerx.no_backprop_mode(chainerx.get_default_context()): assert not chainerx.is_backprop_required() assert not chainerx.is_backprop_required(bp1) assert not chainerx.is_backprop_required(bp2) assert chainerx.is_backprop_required() assert chainerx.is_backprop_required(bp1) assert chainerx.is_backprop_required(bp2) with chainerx.no_backprop_mode(bp1): assert chainerx.is_backprop_required() assert not chainerx.is_backprop_required(bp1) assert chainerx.is_backprop_required(bp2) assert chainerx.is_backprop_required() assert chainerx.is_backprop_required(bp1) assert chainerx.is_backprop_required(bp2) with chainerx.no_backprop_mode((bp1, bp2)): assert chainerx.is_backprop_required() assert not chainerx.is_backprop_required(bp1) assert not chainerx.is_backprop_required(bp2) assert chainerx.is_backprop_required() assert chainerx.is_backprop_required(bp1) assert chainerx.is_backprop_required(bp2)
def test_is_backprop_required(): current_context = chainerx.get_default_context() another_context = chainerx.Context() with chainerx.backprop_scope('bp1') as bp1, \ chainerx.backprop_scope('bp2') as bp2: with chainerx.no_backprop_mode(): with chainerx.force_backprop_mode(bp1): assert not chainerx.is_backprop_required() assert chainerx.is_backprop_required(bp1) assert not chainerx.is_backprop_required(bp2) assert not chainerx.is_backprop_required( context=current_context) assert chainerx.is_backprop_required(context=another_context) with pytest.raises(TypeError): chainerx.is_backprop_required(context='foo')
def evaluate(model, X_test, Y_test, eval_size, batch_size): N_test = X_test.shape[0] if eval_size is None else eval_size if N_test > X_test.shape[0]: raise ValueError('Test size can be no larger than {}'.format( X_test.shape[0])) with chx.no_backprop_mode(): total_loss = chx.array(0, dtype=chx.float32) num_correct = chx.array(0, dtype=chx.int64) for i in range(0, N_test, batch_size): x = X_test[i:min(i + batch_size, N_test)] t = Y_test[i:min(i + batch_size, N_test)] y = model.forward(x) total_loss += compute_loss(y, t) * batch_size num_correct += (y.argmax(axis=1).astype(t.dtype) == t).astype( chx.int32).sum() mean_loss = float(total_loss) / N_test accuracy = int(num_correct) / N_test return mean_loss, accuracy
def evaluate(model, X_test, Y_test, eval_size, batch_size): N_test = X_test.shape[0] if eval_size is None else eval_size if N_test > X_test.shape[0]: raise ValueError( 'Test size can be no larger than {}'.format(X_test.shape[0])) with chx.no_backprop_mode(): total_loss = chx.array(0, dtype=chx.float32) num_correct = chx.array(0, dtype=chx.int64) for i in range(0, N_test, batch_size): x = X_test[i:min(i + batch_size, N_test)] t = Y_test[i:min(i + batch_size, N_test)] y = model.forward(x) total_loss += compute_loss(y, t) * batch_size num_correct += (y.argmax(axis=1).astype(t.dtype) == t).astype(chx.int32).sum() mean_loss = float(total_loss) / N_test accuracy = int(num_correct) / N_test return mean_loss, accuracy
def no_backprop_mode(): """Make a context manager which disables back-propagation. In this context, Chainer does not make a computational graph. It has the benefit of reducing memory consumption. However, a :class:`~chainer.Variable` created in this context does not hold a reference to the :class:`~chainer.FunctionNode` that created itself so no gradients are accumulated by :func:`~chainer.Variable.backward`. In the following example, ``y`` is created in this context, which means that calling :func:`~chainer.Variable.backward` on ``y`` has no effect on the gradients of ``x``. >>> x = chainer.Variable(np.array([1,], np.float32)) >>> with chainer.no_backprop_mode(): ... y = x + 1 >>> y.backward() >>> x.grad is None True .. note:: ``chainer.no_backprop_mode()`` implicitly applies ChainerX's counterpart :func:`chainerx.no_backprop_mode()`, but not vice versa. Also, setting ``enable_backprop`` :ref:`configuration <configuration>` does not affect ChainerX. .. seealso:: See :func:`chainer.force_backprop_mode` for details on how to override this context. """ c = configuration.using_config('enable_backprop', False) if chainerx.is_available(): return _BackpropModeContext((c, chainerx.no_backprop_mode())) return _BackpropModeContext((c,))
def no_backprop_mode(): """Make a context manager which disables back-propagation. In this context, Chainer does not make a computational graph. It has the benefit of reducing memory consumption. However, a :class:`~chainer.Variable` created in this context does not hold a reference to the :class:`~chainer.FunctionNode` that created itself so no gradients are accumulated by :func:`~chainer.Variable.backward`. In the following example, ``y`` is created in this context, which means that calling :func:`~chainer.Variable.backward` on ``y`` has no effect on the gradients of ``x``. >>> x = chainer.Variable(np.array([1,], np.float32)) >>> with chainer.no_backprop_mode(): ... y = x + 1 >>> y.backward() >>> x.grad is None True .. note:: ``chainer.no_backprop_mode()`` implicitly applies ChainerX's counterpart :func:`chainerx.no_backprop_mode()`, but not vice versa. Also, setting ``enable_backprop`` :ref:`configuration <configuration>` does not affect ChainerX. .. seealso:: See :func:`chainer.force_backprop_mode` for details on how to override this context. """ c = configuration.using_config('enable_backprop', False) if chainerx.is_available(): return _BackpropModeContext((c, chainerx.no_backprop_mode())) return _BackpropModeContext((c, ))
def main(): parser = argparse.ArgumentParser(description='Compare chainer vs chainerx') parser.add_argument('--batchsize', '-b', type=int, default=100) parser.add_argument('--epoch', '-e', type=int, default=10) parser.add_argument('--gpu', '-g', type=int, default=0, choices=[-1, 0, 1, 2, 3]) parser.add_argument('--chxon', '-c', type=int, default=1) args = parser.parse_args() # setup start = time.time() chx.available = True if args.chxon == 1 else False batch_size = args.batchsize # get MNIST train, test = chainer.datasets.get_mnist() if chx_available == True: device_name = 'cuda:{}'.format(args.gpu) # data with chx.using_device(device_name): train_images, train_labels = map(lamda d:chx.asarray(d), train._datasets) test_images, test_labels = map(lamda d:chx.asarray(d), test._datasets) # model chx.set_default_device(device_name) model = MLP(n_units=1000, n_out=10) optimizer = SGD(lr=0.01) else: device_name = args.gpu # data train_iter = chainer.iterators.SerialIterator(train, batch_size) test_iter = chainer.iterators.SerialIterator(train, batch_size, repeat=False, shuffle=False) # model model = MLP_chain(n_units=1000, n_out=10) model.to_gpu() chainer.cuda.get_device_from_id(device_name).use() optimizer = chainer.optimizers.SGD(lr=0.01) optimizer.setup(model) N_train, N_test = len(train), len(test) all_indices_np = np.arange(N_train, dtype=np.int64) # for chainerx epoch = 0 while epoch <= args.epoch: epoch += 1 if chx_available == True: np.random.shuffle(all_indices_np) all_indices = chx.array(all_indices_np) for i in range(0, N_train, batch_size): # time 1 if chx_available == True: indices = all_indices[i: i + batch_size] x = train_images.take(indices, axis=0) t = train_labels.take(indices, axis=0) else: batch = train_iter.next() x, t = convert.concat_examples(batch, device=device_name) y = model.forward(x) # time 2 # time 3 if chx_available == True: loss = compute_loss(y, t) else: loss = F.softmax_cross_entropy(y, t) model.cleargrads() loss.backward() # time 4 optimizer.update() # time 5 if chx_available == True: with chx.no_backprop_mode(): total_loss = chx.array(0, dtype=chx.float32) num_correct = chx.array(0, dtype=chx.int64) for i in range(0, N_test, batch_size): x = test_images[i:min(i + batch_size, N_test)] x = test_labels[i:min(i + batch_size, N_test)] y = model.forward(x) total_loss += compute_loss(y, t) * len(t) num_correct += (y.argmax(axis=1).astype(t.dtype) == t).astype(chx.int32).sum() else: test_iter.reset() with chainer.using_config('enable_backprop', False): total_loss = 0 num_correct = 0 for batch in test_iter: x, t = convert.concat_examples(batch, device=device_name) y = model.forward(x) total_loss += float(F.softmax_cross_entropy(y, t).array) * len(t) num_correct += float(F.accuracy(y, t).array) * len(t) mean_loss = float(total_loss) / N_test accuracy = int(num_correct) / N_test elapsed_time = time.time() - start print('epoch {} ... loss={}, accuracy, elapsed_time={}'.format( epoch, mean_loss, accuracy, elapsed_time))