def test_selectedrows_gradient1(self): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): places.append(fluid.CUDAPlace(0)) for place in places: for dtype in ["float32", "float64"]: for sort_sum_gradient in [True, False]: paddle.disable_static(place) fluid.set_flags( {'FLAGS_sort_sum_gradient': sort_sum_gradient}) # grad_clip = fluid.clip.GradientClipByGlobalNorm(5.0) input_word = np.array([[1, 2], [2, 1]]).astype('int64') input = paddle.to_tensor(input_word) simplenet = SimpleNet(20, 32, dtype) adam = SGDOptimizer(learning_rate=0.001, parameter_list=simplenet.parameters() ) # grad_clip=grad_clip input_emb, emb = simplenet(input) self.assertTrue(emb.weight.gradient() is None) self.assertTrue(input_emb.gradient() is None) input_emb.backward() adam.minimize(input_emb) self.assertTrue(emb.weight.gradient() is not None) emb.clear_gradients() self.assertTrue(emb.weight.gradient() is None) input_emb.clear_gradient() self.assertTrue(input_emb.gradient() is not None) paddle.enable_static()
def test_NoDetachMulti_DetachMulti(self): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) array_no_detach_multi = self.no_detach_multi() array_detach_multi = self.detach_multi() assert not np.array_equal(array_no_detach_multi, array_detach_multi) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_selectedrows_gradient2(self): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): places.append(fluid.CUDAPlace(0)) for place in places: for sort_sum_gradient in [True, False]: with fluid.dygraph.guard(place): fluid.set_flags({ 'FLAGS_sort_sum_gradient': sort_sum_gradient }) grad_clip = fluid.clip.GradientClipByGlobalNorm(5.0) input_word = np.array([[1, 2], [2, 1]]).astype('int64') input = to_variable(input_word) simplenet = SimpleNet(20, 32, "float32") adam = SGDOptimizer( learning_rate=0.001, parameter_list=simplenet.parameters(), grad_clip=grad_clip) input_emb, emb = simplenet(input) self.assertTrue(emb.weight.gradient() is None) self.assertTrue(input_emb.gradient() is None) input_emb.backward() adam.minimize(input_emb) self.assertTrue(emb.weight.gradient() is not None) emb.clear_gradients() self.assertTrue(emb.weight.gradient() is None) input_emb.clear_gradient() self.assertTrue(input_emb.gradient() is not None)
def test_coo_values_grad(self): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] values = [1.0, 2.0, 3.0, 4.0, 5.0] sparse_x = paddle.incubate.sparse.sparse_coo_tensor( paddle.to_tensor(indices), paddle.to_tensor(values), shape=[3, 4], stop_gradient=False) values_tensor = sparse_x.values() out_grad = [2.0, 3.0, 5.0, 8.0, 9.0] # test coo_values_grad values_tensor.backward(paddle.to_tensor(out_grad)) assert np.array_equal(out_grad, sparse_x.grad.values().numpy()) indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] values = [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [5.0, 5.0]] sparse_x = paddle.incubate.sparse.sparse_coo_tensor( paddle.to_tensor(indices), paddle.to_tensor(values), shape=[3, 4, 2], stop_gradient=False) values_tensor = sparse_x.values() out_grad = [[2.0, 2.0], [3.0, 3.0], [5.0, 5.0], [8.0, 8.0], [9.0, 9.0]] # test coo_values_grad values_tensor.backward(paddle.to_tensor(out_grad)) assert np.array_equal(out_grad, sparse_x.grad.values().numpy()) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def run_program(enable_addto): np.random.seed(10) paddle.seed(10) paddle.framework.random._manual_program_seed(10) if fluid.core.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) fluid.set_flags({"FLAGS_max_inplace_grad_add": 2}) loss, main, startup, w = create_program(data_format=data_format) place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) strategy = fluid.BuildStrategy() strategy.enable_addto = enable_addto compiled = fluid.CompiledProgram(main).with_data_parallel( loss_name=loss.name, build_strategy=strategy) exe.run(startup) img = np.random.uniform(-128, 128, [8, 3, 224, 224]).astype(np.float32) for i in range(10): res = exe.run(compiled, feed={'img': img}, fetch_list=[loss.name, w.name]) return res
def func_sum_op(self): x = np.ones([2, 2], np.float32) with fluid.dygraph.guard(): inputs = [] for _ in range(10): tmp = paddle.to_tensor(x) tmp.stop_gradient = False inputs.append(tmp) ret = paddle.add_n(inputs) loss = fluid.layers.reduce_sum(ret) loss.backward() with fluid.dygraph.guard(): inputs2 = [] for _ in range(10): tmp = paddle.to_tensor(x) tmp.stop_gradient = False inputs2.append(tmp) ret2 = paddle.add_n(inputs2) loss2 = fluid.layers.reduce_sum(ret2) fluid.set_flags({'FLAGS_sort_sum_gradient': True}) loss2.backward() self.assertTrue(np.allclose(ret.numpy(), x * 10)) self.assertTrue(np.allclose(inputs[0].gradient(), x)) self.assertTrue(np.allclose(ret2.numpy(), x * 10)) a = inputs2[0].gradient() self.assertTrue(np.allclose(inputs2[0].gradient(), x))
def test_coo_to_dense(self): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with _test_eager_guard(): indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] values = [1.0, 2.0, 3.0, 4.0, 5.0] sparse_x = paddle.incubate.sparse.sparse_coo_tensor( paddle.to_tensor(indices), paddle.to_tensor(values), shape=[3, 4], stop_gradient=False) dense_tensor = sparse_x.to_dense() #test to_dense_grad backward out_grad = [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0]] dense_tensor.backward(paddle.to_tensor(out_grad)) #mask the out_grad by sparse_x.indices() correct_x_grad = [2.0, 4.0, 7.0, 9.0, 10.0] assert np.array_equal(correct_x_grad, sparse_x.grad.values().numpy()) paddle.device.set_device("cpu") sparse_x_cpu = paddle.incubate.sparse.sparse_coo_tensor( paddle.to_tensor(indices), paddle.to_tensor(values), shape=[3, 4], stop_gradient=False) dense_tensor_cpu = sparse_x_cpu.to_dense() dense_tensor_cpu.backward(paddle.to_tensor(out_grad)) assert np.array_equal(correct_x_grad, sparse_x_cpu.grad.values().numpy()) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_assign_LoDTensorArray(self): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) main_program = Program() startup_program = Program() with program_guard(main_program): x = fluid.data(name='x', shape=[100, 10], dtype='float32') x.stop_gradient = False y = fluid.layers.fill_constant( shape=[100, 10], dtype='float32', value=1) z = fluid.layers.elementwise_add(x=x, y=y) i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0) init_array = fluid.layers.array_write(x=z, i=i) array = fluid.layers.assign(init_array) sums = fluid.layers.array_read(array=init_array, i=i) mean = fluid.layers.mean(sums) append_backward(mean) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) feed_x = np.random.random(size=(100, 10)).astype('float32') ones = np.ones((100, 10)).astype('float32') feed_add = feed_x + ones res = exe.run(main_program, feed={'x': feed_x}, fetch_list=[sums.name, x.grad_name]) self.assertTrue(np.allclose(res[0], feed_add)) self.assertTrue(np.allclose(res[1], ones / 1000.0))
def test_in_static_mode_mkldnn(self): fluid.set_flags({'FLAGS_use_mkldnn': True}) try: if paddle.fluid.core.is_compiled_with_mkldnn(): self.resnet_helper.train(to_static=True) finally: fluid.set_flags({'FLAGS_use_mkldnn': False})
def test_clone(self): paddle.disable_static() fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.python_api = paddle.clone x = paddle.ones([2]) x.stop_gradient = False clone_x = paddle.clone(x) y = clone_x**3 y.backward() self.assertTrue(np.array_equal(x, [1, 1]), True) self.assertTrue(np.array_equal(clone_x.grad.numpy(), [3, 3]), True) self.assertTrue(np.array_equal(x.grad.numpy(), [3, 3]), True) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False}) paddle.enable_static() with program_guard(Program(), Program()): x_np = np.random.randn(2, 3).astype('float32') x = paddle.static.data("X", shape=[2, 3]) clone_x = paddle.clone(x) exe = paddle.static.Executor() y_np = exe.run(paddle.static.default_main_program(), feed={'X': x_np}, fetch_list=[clone_x])[0] self.assertTrue(np.array_equal(y_np, x_np), True)
def test_dim2_offset1(self): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) expected_np = np.array([[2, 1, 2], [2, 2, 1], [2, 2, 2], [2, 2, 2]]).astype('float32') expected_grad = np.array([[1, 0, 1], [1, 1, 0], [1, 1, 1], [1, 1, 1]]).astype('float32') for idx, p in enumerate(self.places): if idx == 0: paddle.set_device('cpu') else: paddle.set_device('gpu') for dtype in self.typelist: v = paddle.ones((2, ), dtype=dtype) var = (np.random.random() + 1) x = paddle.ones((4, 3), dtype=dtype) x.stop_gradient = False y = x * 2 ny = y.fill_diagonal_tensor(v, offset=1, dim1=0, dim2=1) loss = ny.sum() loss.backward() self.assertEqual( (ny.numpy().astype('float32') == expected_np).all(), True) self.assertEqual( (y.grad.numpy().astype('float32') == expected_grad).all(), True) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_environ(self): self.input_np = np.random.random([2, 3, 5, 5]).astype("float32") for place in [paddle.CPUPlace(), paddle.CUDAPlace(0)]: fluid.set_flags({'FLAGS_conv2d_disable_cudnn': False}) self.run_all(place) fluid.set_flags({'FLAGS_conv2d_disable_cudnn': True}) self.run_all(place)
def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. shape = [2, 3, 4, 5] eps = 0.005 dtype = np.float64 x = layers.data('x', shape, False, dtype) y = layers.data('y', shape, False, dtype) x.persistable = True y.persistable = True out = layers.elementwise_mul(x, y) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) y_arr = np.random.uniform(-1, 1, shape).astype(dtype) gradient_checker.triple_grad_check([x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph(self.multiply_wrapper, [x, y], out, x_init=[x_arr, y_arr], place=place) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_all_cases(self): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) self.func_exception() self.func_example_with_gradient_and_create_graph() with _test_eager_guard(): self.func_exception() self.func_example_with_gradient_and_create_graph() fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def init_dtype_type(self): if fluid.is_compiled_with_cuda(): fluid.set_flags({'FLAGS_cudnn_deterministic': True}) self.x_type = np.float32 self.index_type = np.int32 self.dim = -2 self.x_shape = (10, 10, 4, 10) self.index_size = 10
def load_and_train_dygraph(self): place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.dygraph.guard(place): fluid.default_startup_program().random_seed = self.seed fluid.default_main_program().random_seed = self.seed fluid.set_flags({'FLAGS_sort_sum_gradient': True}) mnist = fluid.dygraph.static_runner.StaticModelRunner( model_dir=self.save_dirname, model_filename=self.model_filename, params_filename=self.params_filename) suffix_varname_dict = mnist._program_holder_dict[ 'forward']._suffix_varname_dict dict_old_new = {v: k for k, v in suffix_varname_dict.items()} dy_param_init_value = {} for param in mnist.parameters(): dy_param_init_value[param.name] = param.numpy() sgd = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=mnist.parameters()) train_reader = paddle.batch( self.reader_decorator(paddle.dataset.mnist.train()), batch_size=self.batch_size, drop_last=True) train_loader = fluid.io.DataLoader.from_generator(capacity=10) train_loader.set_sample_list_generator(train_reader, places=place) mnist.train() for epoch in range(self.epoch_num): for batch_id, data in enumerate(train_loader()): img = data[0] label = data[1] label.stop_gradient = True cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() sgd.minimize(avg_loss) mnist.clear_gradients() if batch_id >= self.batch_num: break dy_x_data = img.numpy() dy_out = avg_loss.numpy() dy_param_value = {} for param in mnist.parameters(): dy_param_value[param.name] = param.numpy() return dy_x_data, dy_out, dy_param_init_value, dy_param_value, dict_old_new
def test_dygraph_static_same_loss(self): if fluid.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) args = parse_args() fake_data_reader = FakeDataReader("train", parse_config(args.config)) dygraph_loss = train(args, fake_data_reader, to_static=False) static_loss = train(args, fake_data_reader, to_static=True) self.assertTrue(np.allclose(dygraph_loss, static_loss), msg="dygraph_loss: {} \nstatic_loss: {}".format( dygraph_loss, static_loss))
def test_dygraph_static_same_loss(self): if fluid.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) conf_dict = create_conf_dict() dygraph_loss = train(conf_dict, to_static=False) static_loss = train(conf_dict, to_static=True) self.assertEqual(len(dygraph_loss), len(static_loss)) for i in range(len(dygraph_loss)): self.assertAlmostEqual(dygraph_loss[i], static_loss[i])
def test_errors(self): fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) with program_guard(Program(), Program()): # The type of input must be Variable or numpy.ndarray. x1 = fluid.create_lod_tensor( np.array([[-1]]), [[1]], fluid.CPUPlace()) self.assertRaises(TypeError, paddle.assign, x1) # When the type of input is numpy.ndarray, the dtype of input must be float32, int32. x2 = np.array([[2.5, 2.5]], dtype='uint8') self.assertRaises(TypeError, paddle.assign, x2) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_mnist_declarative_cpu_vs_mkldnn(self): dygraph_loss_cpu = self.train_dygraph() fluid.set_flags({'FLAGS_use_mkldnn': True}) try: dygraph_loss_mkldnn = self.train_dygraph() finally: fluid.set_flags({'FLAGS_use_mkldnn': False}) self.assertTrue( np.allclose(dygraph_loss_cpu, dygraph_loss_mkldnn), msg='cpu dygraph is {}\n mkldnn dygraph is \n{}'.format( dygraph_loss_cpu, dygraph_loss_mkldnn))
def run_dygraph(): fluid.set_flags({'FLAGS_sort_sum_gradient': True}) paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) ocr_attention = OCRAttention() if Config.learning_rate_decay == "piecewise_decay": learning_rate = fluid.layers.piecewise_decay( [50000], [Config.LR, Config.LR * 0.01]) else: learning_rate = Config.LR optimizer = fluid.optimizer.SGD( learning_rate=0.001, parameter_list=ocr_attention.parameters()) dy_param_init_value = {} for param in ocr_attention.parameters(): dy_param_init_value[param.name] = param.numpy() for epoch in range(epoch_num): for batch_id in range(batch_num): label_in = to_variable(label_in_np) label_out = to_variable(label_out_np) label_out.stop_gradient = True img = to_variable(image_np) dy_prediction = ocr_attention(img, label_in) label_out = fluid.layers.reshape(label_out, [-1, 1], inplace=False) dy_prediction = fluid.layers.reshape( dy_prediction, [label_out.shape[0], -1], inplace=False) loss = fluid.layers.cross_entropy(input=dy_prediction, label=label_out) avg_loss = fluid.layers.reduce_sum(loss) dy_out = avg_loss.numpy() if epoch == 0 and batch_id == 0: for param in ocr_attention.parameters(): if param.name not in dy_param_init_value: dy_param_init_value[param.name] = param.numpy() avg_loss.backward() dy_grad_value = {} for param in ocr_attention.parameters(): if param.trainable: np_array = np.array( param._grad_ivar().value().get_tensor()) dy_grad_value[param.name + core.grad_var_suffix()] = np_array optimizer.minimize(avg_loss) ocr_attention.clear_gradients() dy_param_value = {} for param in ocr_attention.parameters(): dy_param_value[param.name] = param.numpy() return dy_out, dy_param_init_value, dy_param_value
def test_rnn(self): np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [10.0, 11.0, 12.0]]) np_inp = np_inp.reshape((1, 4, 3)) np_inp = np_inp.astype(np.float32) with fluid.dygraph.guard(): var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) simple_rnn = SimpleRNN() outs, pre_hiddens = simple_rnn.forward(var_inp) dy_out = outs[3].numpy() outs[3].backward() dy_grad_h2o = simple_rnn._cell._h2o_w.gradient() dy_grad_h2h = simple_rnn._cell._h2h_w.gradient() dy_grad_i2h = simple_rnn._cell._i2h_w.gradient() with fluid.dygraph.guard(): var_inp2 = fluid.dygraph.base.to_variable(np_inp) var_inp2 = fluid.layers.reshape(var_inp2, shape=[1, 4, 3]) simple_rnn2 = SimpleRNN() outs2, pre_hiddens2 = simple_rnn2.forward(var_inp2) dy_out2 = outs2[3].numpy() fluid.set_flags({'FLAGS_sort_sum_gradient': True}) outs2[3].backward() dy_grad_h2o2 = simple_rnn2._cell._h2o_w.gradient() dy_grad_h2h2 = simple_rnn2._cell._h2h_w.gradient() dy_grad_i2h2 = simple_rnn2._cell._i2h_w.gradient() with new_program_scope(): inp = fluid.layers.data(name="inp", shape=[1, 4, 3], append_batch_size=False) simple_rnn = SimpleRNN() outs, pre_hiddens = simple_rnn(inp) param_grads = fluid.backward.append_backward(outs[3]) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) static_out, static_grad_h2o, static_grad_h2h, static_grad_i2h = exe.run( feed={inp.name: np_inp}, fetch_list=[ outs[3].name, param_grads[0][1].name, param_grads[1][1].name, param_grads[2][1].name ]) self.assertTrue(np.allclose(dy_out, static_out)) self.assertTrue(np.allclose(dy_grad_h2o, static_grad_h2o)) self.assertTrue(np.allclose(dy_grad_h2h, static_grad_h2h)) self.assertTrue(np.allclose(dy_grad_i2h, static_grad_i2h)) self.assertTrue(np.allclose(dy_out2, static_out)) self.assertTrue(np.allclose(dy_grad_h2o2, static_grad_h2o)) self.assertTrue(np.allclose(dy_grad_h2h2, static_grad_h2h)) self.assertTrue(np.allclose(dy_grad_i2h2, static_grad_i2h))
def func_append_activation_in_dygraph_global_use_mkldnn(self): a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32) helper = LayerHelper(fluid.unique_name.generate("test"), act="relu") func = helper.append_activation with fluid.dygraph.guard(fluid.core.CPUPlace()): a = paddle.to_tensor(a_np) fluid.set_flags({'FLAGS_use_mkldnn': True}) try: res1 = func(a) finally: fluid.set_flags({'FLAGS_use_mkldnn': False}) res2 = fluid.layers.relu(a) self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))
def __init__(self, cfg): paddle.seed(1) paddle.framework.random._manual_program_seed(1) self.generator = Generator(cfg) self.discriminator = Discriminator(cfg) self.g_optimizer = build_optimizer(self.generator, cfg) self.d_optimizer = build_optimizer(self.discriminator, cfg) self.cfg = cfg fluid.set_flags({'FLAGS_sort_sum_gradient': cfg.sort_sum_gradient})
def test_mlp(self): np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) with fluid.dygraph.guard(): var_inp = fluid.dygraph.base.to_variable(np_inp) mlp = MLP(input_size=2) out = mlp(var_inp) dy_out = out.numpy() out.backward() dy_grad = mlp._linear1.weight.gradient() with fluid.dygraph.guard(): var_inp2 = fluid.dygraph.base.to_variable(np_inp) mlp2 = MLP(input_size=2) out2 = mlp2(var_inp2) dy_out2 = out2.numpy() fluid.set_flags({'FLAGS_sort_sum_gradient': True}) out2.backward() dy_grad2 = mlp2._linear1.weight.gradient() with new_program_scope(): inp = fluid.layers.data(name="inp", shape=[2, 2], append_batch_size=False) mlp = MLP(input_size=2) out = mlp(inp) param_grads = fluid.backward.append_backward( out, parameter_list=[mlp._linear1.weight.name])[0] exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) static_out, static_grad = exe.run( feed={inp.name: np_inp}, fetch_list=[out.name, param_grads[1].name]) self.assertTrue(np.allclose(dy_out, static_out)) self.assertTrue(np.allclose(dy_grad, static_grad)) self.assertTrue(np.allclose(dy_out2, static_out)) self.assertTrue(np.allclose(dy_grad2, static_grad)) params = mlp.parameters(True) self.assertEqual("linear_0.w_0", params[0].name) self.assertEqual("linear_0.b_0", params[1].name) self.assertEqual("linear_1.w_0", params[2].name) self.assertEqual("linear_1.b_0", params[3].name) self.assertEqual(len(params), 4) sublayers = mlp.sublayers(True) self.assertEqual(mlp._linear1, sublayers[0]) self.assertEqual(mlp._linear2, sublayers[1]) self.assertEqual(len(sublayers), 2)
def setUp(self): paddle.enable_static() if paddle.is_compiled_with_cuda(): fluid.set_flags({ 'FLAGS_cudnn_deterministic': 1, 'FLAGS_max_inplace_grad_add': 6, }) self.place = paddle.CUDAPlace(0) else: self.place = paddle.CPUPlace() self.use_cuda = isinstance(self.place, paddle.CUDAPlace) self.executor = paddle.static.Executor(self.place) self.num_classes = 1000 self.seed = 1
def test_single_api(sort_sum_gradient): fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient}) x = paddle.to_tensor(5., stop_gradient=False) for i in range(10): y = paddle.pow(x, 4.0) y.backward() self.assertEqual(x.grad.numpy(), (i + 1) * 500) x.clear_gradient() self.assertEqual(x.grad.numpy(), 0.) for i in range(10): y = paddle.pow(x, 4.0) y.backward() self.assertEqual(x.grad.numpy(), (i + 1) * 500) x.clear_grad() self.assertEqual(x.grad.numpy(), 0.)
def func(self, place): shape = [2, 3, 7, 9] eps = 0.0005 dtype = np.float64 x = layers.data('x', shape, False, dtype=dtype) x.persistable = True y = layers.tanh(x) x_arr = np.random.random(shape).astype(dtype) x_arr[np.abs(x_arr) < 0.005] = 0.002 gradient_checker.triple_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.tanh_wrapper, [x], y, x_init=x_arr, place=place) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def grad(self, outputs, inputs, grad_outputs=None, no_grad_vars=None, retain_graph=None, create_graph=False, allow_unused=False): fluid.set_flags({'FLAGS_sort_sum_gradient': self.sort_sum_gradient}) return fluid.dygraph.grad(outputs=outputs, inputs=inputs, grad_outputs=grad_outputs, no_grad_vars=no_grad_vars, retain_graph=retain_graph, create_graph=create_graph, allow_unused=allow_unused)
def test_api(self): flags = { 'FLAGS_eager_delete_tensor_gb': 1.0, 'FLAGS_check_nan_inf': True } fluid.set_flags(flags) flags_list = ['FLAGS_eager_delete_tensor_gb', 'FLAGS_check_nan_inf'] flag = 'FLAGS_eager_delete_tensor_gb' res_list = fluid.get_flags(flags_list) res = fluid.get_flags(flag) self.assertTrue(res_list['FLAGS_eager_delete_tensor_gb'], 1.0) self.assertTrue(res_list['FLAGS_check_nan_inf'], True) self.assertTrue(res['FLAGS_eager_delete_tensor_gb'], 1.0)