def test_cond_op_in_condition(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): a = fluid.layers.fill_constant(shape=[1], dtype='float32', value=1.23) a.stop_gradient = False b = fluid.layers.fill_constant(shape=[1], dtype='float32', value=1.24) b.stop_gradient = False out = fluid.layers.cond( a < b, lambda: fluid.layers.cond( a - b < -1.0, lambda: fluid.layers.elementwise_add(a, b), lambda: fluid.layers.elementwise_mul(a, b)), lambda: fluid .layers.cond(a == b, lambda: fluid.layers.elementwise_sub( a, b), lambda: fluid.layers.elementwise_pow(a, b))) append_backward(out) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=[out, a.grad_name, b.grad_name]) # Note: fill_constant has loss of precision, so we assertAlmostEqual. self.assertAlmostEqual(ret[0][0], 1.5252) self.assertAlmostEqual(ret[1][0], 1.24) self.assertAlmostEqual(ret[2][0], 1.23)
def test_forward(self): data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = layers.ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') with cond.block(): hidden = layers.fc(input=data, size=10) layers.assign(hidden, out) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) x = numpy.random.random(size=(10, 1)).astype('float32') outs = exe.run(feed={'X': x}, fetch_list=[out])[0] print outs loss = layers.mean(out) append_backward(loss=loss) outs = exe.run( feed={'X': x}, fetch_list=[ default_main_program().block(0).var(data.name + "@GRAD") ])[0] print outs
def test_grad(self): place = core.CPUPlace() program = Program() with program_guard(program): x = layers.data( name='x', shape=[1], dtype='float32', stop_gradient=False) table = layers.lod_rank_table(x, level=0) array = layers.lod_tensor_to_array(x, table) result = layers.array_to_lod_tensor(array, table) mean = layers.mean(result) append_backward(mean) tensor = core.LoDTensor() tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) tensor.set_lod([[0, 3, 9, 10]]) g_vars = program.global_block().var(x.name + "@GRAD") exe = Executor(place) g_out = [ numpy.array(item).sum() for item in exe.run(program, feed={'x': tensor}, fetch_list=[g_vars], return_numpy=False) ] g_out_sum = numpy.array(g_out).sum() self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def test_grad(self): place = core.CPUPlace() program = Program() with program_guard(program): x = layers.data(name='x', shape=[1], dtype='float32', stop_gradient=False) table = layers.lod_rank_table(x, level=0) array = layers.lod_tensor_to_array(x, table) result = layers.array_to_lod_tensor(array, table) mean = layers.mean(result) append_backward(mean) tensor = core.LoDTensor() tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) tensor.set_recursive_sequence_lengths([[3, 6, 1]]) g_vars = program.global_block().var(x.name + "@GRAD") exe = Executor(place) g_out = [ numpy.array(item).sum() for item in exe.run(program, feed={'x': tensor}, fetch_list=[g_vars], return_numpy=False) ] g_out_sum = numpy.array(g_out).sum() self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def build_network(self, only_forward, **kargs): x = layers.data('x', shape=[3], dtype='float32', lod_level=1) x.stop_gradient = False layers.Print(input=x, **kargs) loss = layers.mean(x) append_backward(loss=loss) return loss
def test_extremely_simple_net_with_op_in_condition(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): a = fluid.layers.fill_constant(shape=[1], dtype='float32', value=1.23) a.stop_gradient = False b = fluid.layers.fill_constant(shape=[1], dtype='float32', value=1.25) b.stop_gradient = False out = layers.cond(a - b < -1.0, lambda: a, lambda: b) append_backward(out) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=[out, b, a.grad_name, b.grad_name]) # Note: fill_constant has loss of precision, you have to assertEqual # with values doens't lose precision in float-point number. self.assertEqual(ret[0][0], ret[1][0]) self.assertEqual(ret[2][0], 0.0) self.assertEqual(ret[3][0], 1.0)
def test_assign_LoDTensorArray(self): main_program = Program() startup_program = Program() with program_guard(main_program): x = fluid.data(name='x', shape=[100, 10], dtype='float32') x.stop_gradient = False y = fluid.layers.fill_constant(shape=[100, 10], dtype='float32', value=1) z = fluid.layers.elementwise_add(x=x, y=y) i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0) init_array = fluid.layers.array_write(x=z, i=i) array = fluid.layers.assign(init_array) sums = fluid.layers.array_read(array=init_array, i=i) mean = fluid.layers.mean(sums) append_backward(mean) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feed_x = np.random.random(size=(100, 10)).astype('float32') ones = np.ones((100, 10)).astype('float32') feed_add = feed_x + ones res = exe.run(main_program, feed={'x': feed_x}, fetch_list=[sums.name, x.grad_name]) self.assertTrue(np.allclose(res[0], feed_add)) self.assertTrue(np.allclose(res[1], ones / 1000.0))
def test_forward_backward_single_tensor_output(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[2], dtype='float32') x.stop_gradient = False # For test gradient mask = layers.data(name='mask', shape=[1], dtype='int32') out = program.current_block().create_var( dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR) select_output(x, out, mask) y = select_input(out, mask) mean = layers.mean(y) append_backward(mean) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = Executor(place) feed_x = np.asarray([1.3, -1.4]).astype(np.float32) feed_mask = np.asarray([0]).astype(np.int32) ret = exe.run(program, feed={ 'x': feed_x, 'mask': feed_mask }, fetch_list=[y.name, x.grad_name]) x_grad = np.asarray([0.5, 0.5]).astype(np.float32) self.assertTrue(np.allclose(np.asarray(ret[0]), feed_x)) self.assertTrue(np.allclose(np.asarray(ret[1]), x_grad))
def test_forward(self): data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') with cond.block(): hidden = layers.fc(input=data, size=10) layers.assign(hidden, out) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) x = numpy.random.random(size=(10, 1)).astype('float32') outs = exe.run(feed={'X': x}, fetch_list=[out])[0] print(outs) loss = layers.mean(out) append_backward(loss=loss) outs = exe.run(feed={'X': x}, fetch_list=[ default_main_program().block(0).var(data.name + "@GRAD") ])[0] print(outs)
def build_graph(self, only_forward=False): x_tensor = fluid.layers.data( name='x_tensor', shape=[self.x_tensor_dim], dtype='float32', lod_level=1) x_tensor.stop_gradient = False static_input_tensor = fluid.layers.data( name='static_input_tensor', shape=[self.static_input_tensor_dim], dtype='float32', lod_level=1) static_input_tensor.stop_gradient = False if only_forward: static_input_out_array = self._program.global_block().create_var( name='static_input_out_array', type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, dtype='float32') static_input_out_array.stop_gradient = True rnn = fluid.layers.DynamicRNN() with rnn.block(): step_x = rnn.step_input(x_tensor) step_static_input = rnn.static_input(static_input_tensor) if only_forward: fluid.layers.array_write( x=step_static_input, i=rnn.step_idx, array=static_input_out_array) last = fluid.layers.sequence_pool( input=step_static_input, pool_type='last') projected = fluid.layers.fc(input=[step_x, last], size=self.output_dim) rnn.output(projected) if only_forward: static_input_step_outs = [] step_idx = fluid.layers.fill_constant( shape=[1], dtype='int64', value=0) step_idx.stop_gradient = True for i in xrange(self._max_sequence_len): step_out = fluid.layers.array_read(static_input_out_array, step_idx) step_out.stop_gradient = True static_input_step_outs.append(step_out) fluid.layers.increment(x=step_idx, value=1.0, in_place=True) if only_forward: return static_input_step_outs last = fluid.layers.sequence_pool(input=rnn(), pool_type='last') loss = fluid.layers.mean(last) append_backward(loss) static_input_grad = self._program.global_block().var( framework.grad_var_name('static_input_tensor')) return static_input_grad, loss
def build_network(self, only_forward, **kargs): x = layers.data('x', shape=[3], dtype='float32', lod_level=1) x.stop_gradient = False printed = layers.Print(input=x, **kargs) if only_forward: return printed loss = layers.mean(printed) append_backward(loss=loss) return loss
def build_graph(self, only_forward=False): x_tensor = fluid.layers.data(name='x_tensor', shape=[self.x_tensor_dim], dtype='float32', lod_level=1) x_tensor.stop_gradient = False static_input_tensor = fluid.layers.data( name='static_input_tensor', shape=[self.static_input_tensor_dim], dtype='float32', lod_level=1) static_input_tensor.stop_gradient = False if only_forward: static_input_out_array = self._program.global_block().create_var( name='static_input_out_array', type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, dtype='float32') static_input_out_array.stop_gradient = True rnn = fluid.layers.DynamicRNN() with rnn.block(): step_x = rnn.step_input(x_tensor) step_static_input = rnn.static_input(static_input_tensor) if only_forward: fluid.layers.array_write(x=step_static_input, i=rnn.step_idx, array=static_input_out_array) last = fluid.layers.sequence_pool(input=step_static_input, pool_type='last') projected = fluid.layers.fc(input=[step_x, last], size=self.output_dim) rnn.output(projected) if only_forward: static_input_step_outs = [] step_idx = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0) step_idx.stop_gradient = True for i in xrange(self._max_sequence_len): step_out = fluid.layers.array_read(static_input_out_array, step_idx) step_out.stop_gradient = True static_input_step_outs.append(step_out) fluid.layers.increment(x=step_idx, value=1.0, in_place=True) if only_forward: return static_input_step_outs last = fluid.layers.sequence_pool(input=rnn(), pool_type='last') loss = fluid.layers.mean(last) append_backward(loss) static_input_grad = self._program.global_block().var( framework.grad_var_name('static_input_tensor')) return static_input_grad, loss
def test_grad(self): place = core.CPUPlace() program = Program() with program_guard(program): x = layers.data(name='x', shape=[1], dtype='float32', stop_gradient=False) y = layers.data(name='y', shape=[1], dtype='bool', stop_gradient=False) level = 0 out_true, out_false = split_lod_tensor(input=x, mask=y, level=level) out = merge_lod_tensor(in_true=out_true, in_false=out_false, mask=y, x=x, level=level) mean = layers.mean(out) append_backward(mean) tensor = core.LoDTensor() tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place) tensor.set_recursive_sequence_lengths([[3, 6, 1]]) mask_np = np.array([0, 1, 0]).astype('bool') mask_np = np.expand_dims(mask_np, axis=1) mask = core.LoDTensor() mask.set(mask_np, place) exe = Executor(place) scope = core.Scope() g_vars = program.global_block().var(x.name + "@GRAD") g_out = [ item.sum() for item in map( np.array, exe.run(program, feed={ 'x': tensor, 'y': mask }, fetch_list=[g_vars], scope=scope, return_numpy=False)) ] g_out_sum = np.array(g_out).sum() self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def test_backward(self): self.check_forward() append_backward(self.output) ana_grad = [np.array(x) for x in self.backward()] num_grad = self.get_numerical_gradient() for idx, name in enumerate(self.data_field): self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape) self.assertTrue( np.isclose(num_grad[idx], ana_grad[idx], rtol=0.1).all())
def test_simple_forward(self): d0 = layers.data( "d0", shape=[10], append_batch_size=False, dtype='float32') d1 = layers.data( "d1", shape=[10], append_batch_size=False, dtype='float32') d2 = layers.data( "d2", shape=[10], append_batch_size=False, dtype='float32') i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') mem_array = layers.array_write(x=init, i=i) data_array = layers.array_write(x=d0, i=i) i = layers.increment(i) layers.array_write(d1, i, array=data_array) i = layers.increment(i) layers.array_write(d2, i, array=data_array) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True array_len = layers.fill_constant(shape=[1], dtype='int64', value=3) array_len.stop_gradient = True cond = layers.less_than(x=i, y=array_len) while_op = layers.While(cond=cond) with while_op.block(): d = layers.array_read(array=data_array, i=i) prev = layers.array_read(array=mem_array, i=i) result = layers.sums(input=[d, prev]) i = layers.increment(x=i, in_place=True) layers.array_write(result, i=i, array=mem_array) layers.less_than(x=i, y=array_len, cond=cond) sum_result = layers.array_read(array=mem_array, i=i) loss = layers.mean(sum_result) append_backward(loss) cpu = core.CPUPlace() exe = Executor(cpu) d = [] for i in xrange(3): d.append(numpy.random.random(size=[10]).astype('float32')) outs = exe.run(feed={'d0': d[0], 'd1': d[1], 'd2': d[2]}, fetch_list=[sum_result]) self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)
def test_backward(self): self.check_forward() append_backward(self.output) ana_grad = [np.array(x) for x in self.backward()] num_grad = self.get_numerical_gradient() for idx, name in enumerate(self.data_field): self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape) self.assertTrue( np.isclose( num_grad[idx], ana_grad[idx], rtol=0.1).all())
def test_cond_inside_cond(self): """ pseudocode: for i in range(1, 10): a = 2 * i if i < 5: if i >= 3: return a + a else: return a - a else: if i < 8: return a * a else: return a / a """ def less_than_branch(i, a): return layers.cond(i >= 3.0, lambda: layers.elementwise_add(a, a), lambda: layers.elementwise_sub(a, a)) def greater_equal_branch(i, a): return layers.cond(i < 8.0, lambda: layers.elementwise_mul(a, a), lambda: layers.elementwise_div(a, a)) main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): i = fluid.data(name="i", shape=[1], dtype='float32') a = 2.0 * i out = layers.cond(i < 5.0, lambda: less_than_branch(i, a), lambda: greater_equal_branch(i, a)) mean = layers.mean(out) append_backward(mean) place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) for feed_i in range(0, 10): expected_a = 2.0 * feed_i if feed_i < 5: expected_ret = expected_a + expected_a if feed_i >= 3 else 0.0 expected_a_grad = 2.0 if feed_i >= 3 else 0.0 else: expected_ret = expected_a * expected_a if feed_i < 8 else 1.0 expected_a_grad = 2.0 * expected_a if feed_i < 8 else 0.0 ret = exe.run(main_program, feed={'i': np.full((1), feed_i, np.float32)}, fetch_list=[out.name, a.grad_name]) self.assertEqual(ret[0][0], expected_ret) self.assertEqual(ret[1][0], expected_a_grad)
def test_backward(self, numeric_grad_delta=1e-5, max_relative_error=1e-7): self.check_forward() with fluid.program_guard(self.main_program, self.startup_program): append_backward(self.loss) ana_grad = [np.array(x) for x in self.backward()] num_grad = self.get_numerical_gradient(delta=numeric_grad_delta) self.assert_is_close(num_grad, ana_grad, 'x', max_relative_error=max_relative_error, msg_prefix="Gradient Check On %s" % str(self.place))
def test_while_loop_backward(self): def cond(i, x): return layers.less_than(i, eleven) def body(j, x): # TODO: In while block, if the var created in parent block # participates in the calculation of gradient, the result of gradient # is incorrect because each step scope always returns the same value # generated by last step. # Here we call `assign` op in while block to avoid this bug, and working on fixing it in next PR. i = layers.assign(j) x = layers.elementwise_mul(x=i, y=i) j = layers.increment(j) return [j, x] main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): i = fluid.data(name='i', shape=[1], dtype='float32') i.stop_gradient = False eleven = layers.fill_constant(shape=[1], dtype='float32', value=11) one = layers.fill_constant(shape=[1], dtype='float32', value=1) x = fluid.data(name='x', shape=[1], dtype='float32') x.stop_gradient = False out = layers.while_loop(cond, body, [i, x]) mean = layers.mean(out[1]) append_backward(mean) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feed_i = np.ones(1).astype('float32') feed_x = np.ones(1).astype('float32') data = np.asarray([100]).astype('float32') i_grad = np.asarray([110]).astype('float32') res = exe.run(main_program, feed={ 'i': feed_i, 'x': feed_x }, fetch_list=[mean.name, i.grad_name]) self.assertTrue(np.allclose(np.asarray(res[0]), data)) self.assertTrue(np.allclose(np.asarray(res[1]), i_grad), msg=" \nres = \n{} \n\n ans = \n{}".format( res[1], i_grad))
def test_adam_optimizer(self): init_program = framework.Program() program = framework.Program() block = program.global_block() mul_x = block.create_parameter(dtype="float32", shape=[5, 10], lod_level=0, name="mul.x", optimize_attr={'learning_rate': 1.1}) mul_y = block.create_var(dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") mul_out = block.create_var(dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") block.append_op(type="mul", inputs={ "X": mul_x, "Y": mul_y }, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) mean_out = block.create_var(dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op(type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) learning_rate = 0.01 adam_optimizer = self.MockAdam(learning_rate=learning_rate, beta1=0.9, beta2=0.999) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adam_optimizer.get_accumulators()), 0) with framework.program_guard(program, init_program): opts = adam_optimizer.apply_gradients(params_grads) self.assertEqual(len(opts), 2) self.assertEqual([op.type for op in opts], ["scale", "adam"]) # Check accumulators accumulators = adam_optimizer.get_accumulators() self.assertEqual(len(accumulators), 4) self.assertTrue(adam_optimizer.get_moment1_str() in accumulators) self.assertTrue(adam_optimizer.get_moment2_str() in accumulators) moment1_acc = accumulators[adam_optimizer.get_moment1_str()] moment2_acc = accumulators[adam_optimizer.get_moment2_str()] self.assertEqual(len(moment1_acc), 1) self.assertEqual(len(moment2_acc), 1) self.assertTrue(mul_x.name in moment1_acc) self.assertTrue(mul_x.name in moment2_acc) # Check init_program init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), 5) self.assertEqual(init_ops[0].type, "fill_constant") self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
def test_l2decay_regularizer(self): program = framework.Program() block = program.global_block() mul_x = block.create_parameter( dtype="float32", shape=[5, 10], lod_level=0, name="mul.x", regularizer=regularizer.L1DecayRegularizer(0.5)) self.assertTrue(mul_x.regularizer is not None) self.assertTrue( isinstance(mul_x.regularizer, regularizer.L1DecayRegularizer)) mul_y = block.create_var( dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") mul_out = block.create_var( dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") block.append_op( type="mul", inputs={"X": mul_x, "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) mean_out = block.create_var( dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op( type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) count_ops = len(block.ops) params_grads = optimizer.append_regularization_ops(params_grads) self.assertEqual(len(params_grads), 1) self.assertEqual(len(block.ops), count_ops + 3) self.assertEqual(block.ops[-1].type, 'elementwise_add') self.assertEqual(block.ops[-2].type, 'scale') self.assertEqual(block.ops[-3].type, 'sign')
def _generate_backward(self, main_program, startup_program, loss): with program_guard(main_program, startup_program): params_grads = append_backward( loss, distop_context=self._dist_context.dist_op_context) self._completer.complete_backward_annotation(main_program) self._dist_context.block_state.parse_backward_blocks(main_program) return params_grads
def test_backward(self, rtol=0.01): self.check_forward() num_grad = self.get_numerical_gradient() with fluid.program_guard(self.main_program, self.startup_program): append_backward(self.output) ana_grad = [np.array(x) for x in self.backward()] for idx, name in enumerate(self.data_field): self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape) self.assertTrue( np.isclose(num_grad[idx], ana_grad[idx], rtol=rtol).all(), "num_grad (" + name + ") has diff at " + str(self.place) + "\nExpect " + str(num_grad[idx]) + "\n" + "But Got" + str(ana_grad[idx]) + " in class " + self.__class__.__name__)
def test_vanilla_momentum_optimizer(self): init_program = framework.Program() program = framework.Program() block = program.global_block() mul_x = block.create_parameter(dtype="float32", shape=[5, 10], lod_level=0, name="mul.x", optimize_attr={'learning_rate': 1.1}) mul_y = block.create_var(dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") mul_out = block.create_var(dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") block.append_op(type="mul", inputs={ "X": mul_x, "Y": mul_y }, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) learning_rate = 0.01 momentum_optimizer = self.MockMomentum(learning_rate=learning_rate, momentum=0.2) mean_out = block.create_var(dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op(type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) opts = momentum_optimizer.create_optimization_pass( params_grads, mul_out, init_program) self.assertEqual(len(opts), 3) sgd_op = opts[-1] self.assertEqual([op.type for op in opts], ["fill_constant", "elementwise_mul", "momentum"]) self.assertFalse(sgd_op.attr('use_nesterov')) # Check accumulators accumulators = momentum_optimizer.get_accumulators() self.assertEqual(len(accumulators), 1) self.assertTrue(momentum_optimizer.get_velocity_str() in accumulators) velocity_acc = accumulators[momentum_optimizer.get_velocity_str()] self.assertEqual(len(velocity_acc), 1) self.assertTrue(mul_x.name in velocity_acc) # Check init_program init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), 2) self.assertEqual(init_ops[0].type, "fill_constant") self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) self.assertEqual(init_ops[1].type, "fill_constant") self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
def test_decayed_adagrad_optimizer(self): init_program = framework.Program() program = framework.Program() block = program.global_block() mul_x = block.create_parameter(dtype="float32", shape=[5, 10], lod_level=0, name="mul.x", optimize_attr={'learning_rate': 1.1}) mul_y = block.create_var(dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") mul_out = block.create_var(dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") block.append_op(type="mul", inputs={ "X": mul_x, "Y": mul_y }, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) mean_out = block.create_var(dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op(type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) learning_rate = 0.01 decayed_adagrad_optimizer = self.MockDecayedAdagrad( learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) opts = decayed_adagrad_optimizer.create_optimization_pass( params_grads, mul_out, init_program) self.assertEqual(len(opts), 3) self.assertEqual( [op.type for op in opts], ["fill_constant", "elementwise_mul", "decayed_adagrad"]) # Check accumulators accumulators = decayed_adagrad_optimizer.get_accumulators() self.assertEqual(len(accumulators), 1) self.assertTrue( decayed_adagrad_optimizer.get_moment_str() in accumulators) moment_acc = accumulators[decayed_adagrad_optimizer.get_moment_str()] self.assertEqual(len(moment_acc), 1) self.assertTrue(mul_x.name in moment_acc) # Check init_program init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), 2) self.assertEqual(init_ops[0].type, "fill_constant") self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) self.assertEqual(init_ops[1].type, "fill_constant") self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
def _get_gradient(self, input_to_check, place, output_names, no_grad_set, parallel=False): prog = Program() block = prog.global_block() self._append_ops(block) loss = append_loss_ops(block, output_names) param_grad_list = append_backward(loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set) inputs = self._get_inputs(block) feed_dict = self.feed_var(inputs, place) fetch_list = [g for p, g in param_grad_list] if parallel: use_cuda = False if isinstance(place, fluid.CUDAPlace(0)): use_cuda = True executor = fluid.ParallelExecutor(use_cuda=use_cuda, loss_name=loss.name, main_program=prog) else: executor = Executor(place) return list( map(np.array, executor.run(prog, feed_dict, fetch_list, return_numpy=False)))
def test_l2decay_regularizer(self): program = framework.Program() block = program.global_block() mul_x = block.create_parameter( dtype="float32", shape=[5, 10], lod_level=0, name="mul.x", regularizer=regularizer.L1DecayRegularizer(0.5)) self.assertTrue(mul_x.regularizer is not None) self.assertTrue( isinstance(mul_x.regularizer, regularizer.L1DecayRegularizer)) mul_y = block.create_var( dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") mul_out = block.create_var( dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") block.append_op( type="mul", inputs={"X": mul_x, "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) mean_out = block.create_var( dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op( type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) count_ops = len(block.ops) params_grads = optimizer.append_regularization_ops(params_grads) self.assertEqual(len(params_grads), 1) self.assertEqual(len(block.ops), count_ops + 3) self.assertEqual(block.ops[-1].type, 'sum') self.assertEqual(block.ops[-2].type, 'scale') self.assertEqual(block.ops[-3].type, 'sign')
def test_api(self, use_cuda=False): for x_stop_gradient in [False, True]: for y_stop_gradient in [False, True]: with fluid.program_guard(Program(), Program()): cond = fluid.layers.data(name='cond', shape=self.shape, dtype='bool') x = fluid.layers.data(name='x', shape=self.shape, dtype='float32') y = fluid.layers.data(name='y', shape=self.shape, dtype='float32') x.stop_gradient = x_stop_gradient y.stop_gradient = y_stop_gradient result = paddle.where(cond, x, y) append_backward(layers.mean(result)) for use_cuda in [False, True]: if (use_cuda and (not fluid.core.is_compiled_with_cuda())): break place = (fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()) exe = fluid.Executor(place) fetch_list = [result, result.grad_name] if (x_stop_gradient is False): fetch_list.append(x.grad_name) if (y_stop_gradient is False): fetch_list.append(y.grad_name) out = exe.run(fluid.default_main_program(), feed={ 'cond': self.cond, 'x': self.x, 'y': self.y }, fetch_list=fetch_list) assert np.array_equal(out[0], self.out) if (x_stop_gradient is False): assert np.array_equal(out[2], self.ref_x_backward(out[1])) if (y.stop_gradient is False): assert np.array_equal( out[3], self.ref_y_backward(out[1])) elif (y.stop_gradient is False): assert np.array_equal(out[2], self.ref_y_backward(out[1]))
def test_while_loop_backward2(self): def cond(i, x): return i < 3 def body(i, x): x = x * i i = i + 1 return [i, x] main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): i = fluid.data(name='i', shape=[1], dtype='float32') i.stop_gradient = False x = fluid.data(name='x', shape=[1], dtype='float32') x.stop_gradient = False out = layers.while_loop(cond, body, [i, x]) mean = layers.mean(out[1]) append_backward(mean) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feed_i = np.ones(1).astype('float32') feed_x = np.ones(1).astype('float32') data = np.asarray([2]).astype('float32') i_grad = np.asarray([3]).astype('float32') x_grad = np.asarray([2]).astype('float32') res = exe.run(main_program, feed={ 'i': feed_i, 'x': feed_x }, fetch_list=[mean.name, i.grad_name, x.grad_name]) self.assertTrue(np.allclose(np.asarray(res[0]), data)) self.assertTrue(np.allclose(np.asarray(res[1]), i_grad), msg=" \nres = \n{} \n\n ans = \n{}".format( res[1], i_grad)) self.assertTrue(np.allclose(np.asarray(res[2]), x_grad), msg=" \nres = \n{} \n\n ans = \n{}".format( res[2], x_grad))
def test_api(self): for x_stop_gradient in [False, True]: for y_stop_gradient in [False, True]: train_prog = fluid.Program() startup = fluid.Program() with fluid.program_guard(train_prog, startup): cond = fluid.data(name='cond', shape=self.shape, dtype='bool') x = fluid.data(name='x', shape=self.shape, dtype='float32') y = fluid.data(name='y', shape=self.shape, dtype='float32') x.stop_gradient = x_stop_gradient y.stop_gradient = y_stop_gradient result = paddle.where(cond, x, y) append_backward(fluid.layers.mean(result)) exe = fluid.Executor(self.place) exe.run(startup) fetch_list = [result, result.grad_name] if x_stop_gradient is False: fetch_list.append(x.grad_name) if y_stop_gradient is False: fetch_list.append(y.grad_name) out = exe.run(train_prog, feed={ 'cond': self.cond, 'x': self.x, 'y': self.y }, fetch_list=fetch_list) assert np.array_equal(out[0], self.out) if x_stop_gradient is False: assert np.array_equal(out[2], self.ref_x_backward(out[1])) if y.stop_gradient is False: assert np.array_equal(out[3], self.ref_y_backward(out[1])) elif y.stop_gradient is False: assert np.array_equal(out[2], self.ref_y_backward(out[1]))
def setUp(self): self.main_program = Program() switch_main_program(self.main_program) x = layers.data('x', shape=[100], dtype='float32') x.stop_gradient = False rank_table_tensor = layers.data( 'rank_table_tensor', shape=[1], dtype='float32', lod_level=1) table = layers.lod_rank_table(x=rank_table_tensor) i = layers.zeros(dtype='int64', shape=[1]) self.mem1 = layers.shrink_memory(x=x, i=i, table=table) i = layers.increment(x=i) i.stop_gradient = True self.mem2 = layers.shrink_memory(x=self.mem1, i=i, table=table) i = layers.increment(x=i) i.stop_gradient = True self.mem3 = layers.shrink_memory(x=self.mem2, i=i, table=table) mem3_mean = layers.mean(self.mem3) append_backward(loss=mem3_mean) self.x_grad = self.main_program.global_block().var('x@GRAD')
def test_while_loop_backward(self): def cond(i, x): return layers.less_than(i, eleven) def body(i, x): x = layers.elementwise_mul(x=i, y=i) i = layers.increment(i) return [i, x] main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): i = fluid.data(name='i', shape=[1], dtype='float32') i.stop_gradient = False eleven = layers.fill_constant(shape=[1], dtype='float32', value=11) one = layers.fill_constant(shape=[1], dtype='float32', value=1) x = fluid.data(name='x', shape=[1], dtype='float32') x.stop_gradient = False out = layers.while_loop(cond, body, [i, x]) mean = layers.mean(out[1]) append_backward(mean) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feed_i = np.ones(1).astype('float32') feed_x = np.ones(1).astype('float32') data = np.asarray([100]).astype('float32') i_grad = np.asarray([110]).astype('float32') res = exe.run(main_program, feed={ 'i': feed_i, 'x': feed_x }, fetch_list=[mean.name, i.grad_name]) self.assertTrue(np.allclose(np.asarray(res[0]), data)) self.assertTrue(np.allclose(np.asarray(res[1]), i_grad), msg=" \nres = \n{} \n\n ans = \n{}".format( res[1], i_grad))
def test_adamax_optimizer(self): init_program = framework.Program() program = framework.Program() block = program.global_block() mul_x = block.create_parameter( dtype="float32", shape=[5, 10], lod_level=0, name="mul.x", optimize_attr={'learning_rate': 1.1}) mul_y = block.create_var( dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") mul_out = block.create_var( dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") block.append_op( type="mul", inputs={"X": mul_x, "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) mean_out = block.create_var( dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op( type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) learning_rate = 0.01 adamax_optimizer = self.MockAdamax( learning_rate=learning_rate, beta1=0.9, beta2=0.999) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out, init_program) self.assertEqual(len(opts), 4) self.assertEqual( [op.type for op in opts], ["fill_constant", "elementwise_mul", "adamax", "scale"]) # Check accumulators accumulators = adamax_optimizer.get_accumulators() self.assertEqual(len(accumulators), 2) self.assertTrue(adamax_optimizer.get_moment_str() in accumulators) self.assertTrue(adamax_optimizer.get_inf_norm_str() in accumulators) moment_acc = accumulators[adamax_optimizer.get_moment_str()] inf_norm_acc = accumulators[adamax_optimizer.get_inf_norm_str()] self.assertEqual(len(moment_acc), 1) self.assertEqual(len(inf_norm_acc), 1) self.assertTrue(mul_x.name in moment_acc) self.assertTrue(mul_x.name in inf_norm_acc) # Check init_program init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), 4) self.assertEqual(init_ops[0].type, "fill_constant") self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
def test_all_parameters(self): x = layers.data('x', shape=[3], dtype='float32', lod_level=1) x.stop_gradient = False for print_tensor_name in [True, False]: for print_tensor_type in [True, False]: for print_tensor_shape in [True, False]: for print_tensor_lod in [True, False]: layers.Print( input=x, print_tensor_name=print_tensor_name, print_tensor_type=print_tensor_type, print_tensor_shape=print_tensor_shape, print_tensor_lod=print_tensor_lod, ) loss = layers.mean(x) append_backward(loss=loss) exe = Executor(self.place) outs = exe.run(feed={'x': self.x_tensor}, fetch_list=[loss], return_numpy=False)
def test_simple_net(self): paddle.enable_static() main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): loss, sum_result = self.simple_net() append_backward(loss) npu_place = paddle.NPUPlace(0) exe = Executor(npu_place) d = [] for i in range(3): d.append(numpy.random.random(size=[10]).astype('float32')) outs = exe.run(feed={'d0': d[0], 'd1': d[1], 'd2': d[2]}, fetch_list=[sum_result]) self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)
def test_vanilla_momentum_optimizer(self): init_program = framework.Program() program = framework.Program() block = program.global_block() mul_x = block.create_parameter( dtype="float32", shape=[5, 10], lod_level=0, name="mul.x", optimize_attr={'learning_rate': 1.1}) mul_y = block.create_var( dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") mul_out = block.create_var( dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") block.append_op( type="mul", inputs={"X": mul_x, "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) learning_rate = 0.01 momentum_optimizer = self.MockMomentum( learning_rate=learning_rate, momentum=0.2) mean_out = block.create_var( dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op( type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) opts = momentum_optimizer.create_optimization_pass( params_grads, mul_out, init_program) self.assertEqual(len(opts), 3) sgd_op = opts[-1] self.assertEqual([op.type for op in opts], ["fill_constant", "elementwise_mul", "momentum"]) self.assertFalse(sgd_op.attr('use_nesterov')) # Check accumulators accumulators = momentum_optimizer.get_accumulators() self.assertEqual(len(accumulators), 1) self.assertTrue(momentum_optimizer.get_velocity_str() in accumulators) velocity_acc = accumulators[momentum_optimizer.get_velocity_str()] self.assertEqual(len(velocity_acc), 1) self.assertTrue(mul_x.name in velocity_acc) # Check init_program init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), 2) self.assertEqual(init_ops[0].type, "fill_constant") self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) self.assertEqual(init_ops[1].type, "fill_constant") self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
def _generate_backward(self, main_program, startup_program, loss, parameter_list, no_grad_set, callbacks): with program_guard(main_program, startup_program): params_grads = append_backward( loss, parameter_list, no_grad_set, callbacks, distop_context=self._dist_context.dist_op_context) complete_backward_annotation( main_program, dist_context=self._dist_context) return params_grads
def parallelizer(program_func, rank): from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.partitioner import Partitioner from paddle.distributed.auto_parallel.dist_context import DistributedContext main_program, start_program, loss = program_func() dist_context = DistributedContext() completer = Completer(dist_context) completer.complete_forward_annotation(main_program) dist_context.block_state.parse_forward_blocks(main_program) with program_guard(main_program, start_program): params_grads = append_backward( loss, distop_context=dist_context.dist_op_context) completer.complete_backward_annotation(main_program) dist_context.block_state.parse_backward_blocks(main_program) partitioner = Partitioner(dist_context, rank) dist_main_prog, _, _ = partitioner.partition(main_program, start_program, []) return dist_main_prog, dist_context
def test_read_write(self): x = [ layers.data(name='x0', shape=[100]), layers.data(name='x1', shape=[100]), layers.data(name='x2', shape=[100]) ] for each_x in x: each_x.stop_gradient = False tensor = numpy.random.random(size=(100, 100)).astype('float32') a_sum, x_sum = _test_read_write(x) place = core.CPUPlace() exe = Executor(place) outs = exe.run(feed={ 'x0': tensor, 'x1': tensor, 'x2': tensor }, fetch_list=[a_sum, x_sum], scope=core.Scope()) self.assertEqual(outs[0], outs[1]) total_sum = layers.sums(input=[a_sum, x_sum]) total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0) append_backward(total_sum_scaled) g_vars = list( map(default_main_program().global_block().var, [each_x.name + "@GRAD" for each_x in x])) g_out = [ item.sum() for item in exe.run(feed={ 'x0': tensor, 'x1': tensor, 'x2': tensor }, fetch_list=g_vars) ] g_out_sum = numpy.array(g_out).sum() # since our final gradient is 1 and the neural network are all linear # with mean_op. # the input gradient should also be 1 self.assertAlmostEqual(1.0, g_out_sum, delta=0.1) with fluid.dygraph.guard(place): tensor1 = fluid.dygraph.to_variable(tensor) tensor2 = fluid.dygraph.to_variable(tensor) tensor3 = fluid.dygraph.to_variable(tensor) x_dygraph = [tensor1, tensor2, tensor3] for each_x in x_dygraph: each_x.stop_gradient = False a_sum_dygraph, x_sum_dygraph = _test_read_write(x_dygraph) self.assertEqual(a_sum_dygraph, x_sum_dygraph) total_sum_dygraph = layers.sums( input=[a_sum_dygraph, x_sum_dygraph]) total_sum_scaled_dygraph = layers.scale(x=total_sum_dygraph, scale=1 / 6.0) total_sum_scaled_dygraph.backward() g_out_dygraph = [ item._grad_ivar().numpy().sum() for item in x_dygraph ] g_out_sum_dygraph = numpy.array(g_out_dygraph).sum() self.assertAlmostEqual(1.0, g_out_sum_dygraph, delta=0.1)
def _get_gradient(self, input_to_check, place, output_names, no_grad_set): prog = Program() block = prog.global_block() inputs_with_np = { key: value for (key, value) in OpTest._create_var_descs_( block, getattr(self, 'inputs', {})) } outputs_with_np = { key: val for (key, val) in OpTest._create_var_descs_( block, getattr(self, 'outputs', {})) } inputs = { k: [item[0] for item in inputs_with_np[k]] for k in inputs_with_np } outputs = { k: [item[0] for item in outputs_with_np[k]] for k in outputs_with_np } op = block.append_op( type=self.op_type, inputs=inputs, outputs=outputs, attrs=getattr(self, 'attrs', {})) # infer variable type and infer shape in compile-time op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) mean_inputs = map(block.var, output_names) if len(mean_inputs) == 1: loss = block.create_var(dtype=mean_inputs[0].dtype, shape=[1]) op = block.append_op( inputs={"X": mean_inputs}, outputs={"Out": loss}, type='mean') op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) else: avg_sum = [] for cur_loss in mean_inputs: cur_avg_loss = block.create_var(dtype=cur_loss.dtype, shape=[1]) op = block.append_op( inputs={"X": [cur_loss]}, outputs={"Out": [cur_avg_loss]}, type="mean") op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) avg_sum.append(cur_avg_loss) loss_sum = block.create_var(dtype=avg_sum[0].dtype, shape=[1]) op_sum = block.append_op( inputs={"X": avg_sum}, outputs={"Out": loss_sum}, type='sum') op_sum.desc.infer_var_type(block.desc) op_sum.desc.infer_shape(block.desc) loss = block.create_var(dtype=loss_sum.dtype, shape=[1]) op_loss = block.append_op( inputs={"X": loss_sum}, outputs={"Out": loss}, type='scale', attrs={'scale': 1.0 / float(len(avg_sum))}) op_loss.desc.infer_var_type(block.desc) op_loss.desc.infer_shape(block.desc) param_grad_list = append_backward( loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set) feed_dict = { item[0].name: OpTest._numpy_to_lod_tensor(item[1], item[2], place) for p_name in inputs_with_np for item in inputs_with_np[p_name] } fetch_list = [g for p, g in param_grad_list] executor = Executor(place) return map(np.array, executor.run(prog, feed_dict, fetch_list, return_numpy=False))
def test_read_write(self): x = [ layers.data( name='x0', shape=[100]), layers.data( name='x1', shape=[100]), layers.data( name='x2', shape=[100]) ] for each_x in x: each_x.stop_gradient = False i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = False arr = layers.array_write(x=x[0], i=i) i = layers.increment(x=i) arr = layers.array_write(x=x[1], i=i, array=arr) i = layers.increment(x=i) arr = layers.array_write(x=x[2], i=i, array=arr) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = False a0 = layers.array_read(array=arr, i=i) i = layers.increment(x=i) a1 = layers.array_read(array=arr, i=i) i = layers.increment(x=i) a2 = layers.array_read(array=arr, i=i) mean_a0 = layers.mean(a0) mean_a1 = layers.mean(a1) mean_a2 = layers.mean(a2) a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2]) mean_x0 = layers.mean(x[0]) mean_x1 = layers.mean(x[1]) mean_x2 = layers.mean(x[2]) x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2]) scope = core.Scope() cpu = core.CPUPlace() exe = Executor(cpu) tensor = numpy.random.random(size=(100, 100)).astype('float32') outs = exe.run(feed={'x0': tensor, 'x1': tensor, 'x2': tensor}, fetch_list=[a_sum, x_sum], scope=scope) self.assertEqual(outs[0], outs[1]) total_sum = layers.sums(input=[a_sum, x_sum]) total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0) append_backward(total_sum_scaled) g_vars = map(default_main_program().global_block().var, [each_x.name + "@GRAD" for each_x in x]) g_out = [ item.sum() for item in exe.run( feed={'x0': tensor, 'x1': tensor, 'x2': tensor}, fetch_list=g_vars) ] g_out_sum = numpy.array(g_out).sum() # since our final gradient is 1 and the neural network are all linear # with mean_op. # the input gradient should also be 1 self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)