def test_var_list(self): def cond(i, mem): return layers.less_than(i, ten) def body(i, mem): mem = layers.elementwise_add(x=mem, y=one) i = layers.increment(i) return [i, mem] main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): i = layers.zeros(shape=[1], dtype='int64') ten = layers.fill_constant(shape=[1], dtype='int64', value=10) mem = fluid.data(name='mem', shape=[10], dtype='float32') one = layers.fill_constant(shape=[10], dtype='float32', value=1) out = layers.while_loop(cond, body, [i, mem]) data = np.random.rand(10).astype('float32') data_one = np.ones(10).astype('float32') place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, feed={'mem': data}, fetch_list=out) for i in range(10): data = np.add(data, data_one) self.assertTrue(np.allclose(np.asarray(res[1]), data))
def value_error_body_returns_with_mutable_list(): test_list = [ layers.fill_constant(shape=[2, 2], dtype='int64', value=1) ] out = layers.while_loop(cond_returns_with_mutable_list, body_returns_with_mutable_list, [data, test_list])
def test_var_dict(self): def cond(i, ten, test_dict, test_list, test_list_dict): return layers.less_than(i, ten) def body(i, ten, test_dict, test_list, test_list_dict): test_dict["test_key"] = i test_dict["test_key"] += 1 test_list[0] = fluid.layers.reshape(test_list[0], [2, -1]) + 1 test_list_dict[0]["test_key"] += 1 test_list_dict[0]["test_key"] = fluid.layers.relu(test_list_dict[0][ "test_key"]) i = layers.increment(i) return [i, ten, test_dict, test_list, test_list_dict] main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): i = layers.zeros(shape=[1], dtype='int64') ten = layers.fill_constant(shape=[1], dtype='int64', value=10) test_data = layers.fill_constant(shape=[1], dtype='int64', value=0) test_dict = {"test_key": test_data} test_list = [ layers.fill_constant( shape=[1, 2], dtype='int64', value=0) ] test_list_dict = [{ "test_key": layers.fill_constant( shape=[1], dtype='float32', value=0) }] i, ten, test_dict, test_list, test_list_dict = layers.while_loop( cond, body, [i, ten, test_dict, test_list, test_list_dict]) place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, fetch_list=[ test_dict["test_key"], test_list[0], test_list_dict[0]["test_key"] ]) self.assertTrue( np.allclose( np.asarray(res[0]), np.full( shape=(1), fill_value=10, dtype=np.int64))) self.assertTrue( np.allclose( np.asarray(res[1]), np.full( shape=(2, 1), fill_value=10, dtype=np.int64))) self.assertTrue( np.allclose( np.asarray(res[2]), np.full( shape=(1), fill_value=10, dtype=np.float32)))
def value_error_body_returns_with_mutable_dict(): test_dict = { "int_constant": layers.fill_constant(shape=[2, 2], dtype='int64', value=1) } out = layers.while_loop(cond_returns_with_mutable_dict, body_returns_with_mutable_dict, [data, test_dict])
def test_nested_net(self): def external_cond(i, j, init, sums): return layers.less_than(i, loop_len1) def external_body(i, j, init, sums): def internal_cond(j, init, sums): return layers.less_than(j, loop_len2) def internal_body(j, init, sums): init = layers.elementwise_add(x=init, y=ones) sums = layers.elementwise_add(x=init, y=sums) j = layers.increment(j) return [j, init, sums] result = layers.while_loop(internal_cond, internal_body, [j, init, sums]) j = result[0] init = result[1] sums = result[2] sums = layers.elementwise_add(x=init, y=sums) i = layers.increment(i) return [i, j, init, sums] main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): i = layers.zeros(shape=[1], dtype='int64') j = layers.zeros(shape=[1], dtype='int64') init = fluid.data(name='init', shape=[3, 3], dtype='float32') sums = fluid.data(name='sums', shape=[3, 3], dtype='float32') loop_len1 = layers.fill_constant(shape=[1], dtype='int64', value=2) loop_len2 = layers.fill_constant(shape=[1], dtype='int64', value=3) ones = layers.fill_constant(shape=[3, 3], dtype='float32', value=1) out = layers.while_loop(external_cond, external_body, [i, j, init, sums]) data = np.random.rand(3, 3).astype('float32') data_sums = np.zeros([3, 3]).astype('float32') place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, feed={ 'init': data, 'sums': data_sums }, fetch_list=out) for i in range(3): data = np.add(data, 1) data_sums = np.add(data, data_sums) for j in range(2): data_sums = np.add(data, data_sums) self.assertTrue(np.allclose(np.asarray(res[3]), data_sums))
def test_while_loop_backward(self): def cond(i, x): return layers.less_than(i, eleven) def body(j, x): # TODO: In while block, if the var created in parent block # participates in the calculation of gradient, the result of gradient # is incorrect because each step scope always returns the same value # generated by last step. # Here we call `assign` op in while block to avoid this bug, and working on fixing it in next PR. i = layers.assign(j) x = layers.elementwise_mul(x=i, y=i) j = layers.increment(j) return [j, x] main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): i = fluid.data(name='i', shape=[1], dtype='float32') i.stop_gradient = False eleven = layers.fill_constant(shape=[1], dtype='float32', value=11) one = layers.fill_constant(shape=[1], dtype='float32', value=1) x = fluid.data(name='x', shape=[1], dtype='float32') x.stop_gradient = False out = layers.while_loop(cond, body, [i, x]) mean = layers.mean(out[1]) append_backward(mean) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feed_i = np.ones(1).astype('float32') feed_x = np.ones(1).astype('float32') data = np.asarray([100]).astype('float32') i_grad = np.asarray([110]).astype('float32') res = exe.run(main_program, feed={ 'i': feed_i, 'x': feed_x }, fetch_list=[mean.name, i.grad_name]) self.assertTrue(np.allclose(np.asarray(res[0]), data)) self.assertTrue(np.allclose(np.asarray(res[1]), i_grad), msg=" \nres = \n{} \n\n ans = \n{}".format( res[1], i_grad))
def external_body(i, j, init, sums): def internal_cond(j, init, sums): return layers.less_than(j, loop_len2) def internal_body(j, init, sums): init = layers.elementwise_add(x=init, y=ones) sums = layers.elementwise_add(x=init, y=sums) j = layers.increment(j) return [j, init, sums] result = layers.while_loop(internal_cond, internal_body, [j, init, sums]) j = result[0] init = result[1] sums = result[2] sums = layers.elementwise_add(x=init, y=sums) i = layers.increment(i) return [i, j, init, sums]
def test_while_loop_backward2(self): def cond(i, x): return i < 3 def body(i, x): x = x * i i = i + 1 return [i, x] main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): i = fluid.data(name='i', shape=[1], dtype='float32') i.stop_gradient = False x = fluid.data(name='x', shape=[1], dtype='float32') x.stop_gradient = False out = layers.while_loop(cond, body, [i, x]) mean = layers.mean(out[1]) append_backward(mean) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feed_i = np.ones(1).astype('float32') feed_x = np.ones(1).astype('float32') data = np.asarray([2]).astype('float32') i_grad = np.asarray([3]).astype('float32') x_grad = np.asarray([2]).astype('float32') res = exe.run(main_program, feed={ 'i': feed_i, 'x': feed_x }, fetch_list=[mean.name, i.grad_name, x.grad_name]) self.assertTrue(np.allclose(np.asarray(res[0]), data)) self.assertTrue(np.allclose(np.asarray(res[1]), i_grad), msg=" \nres = \n{} \n\n ans = \n{}".format( res[1], i_grad)) self.assertTrue(np.allclose(np.asarray(res[2]), x_grad), msg=" \nres = \n{} \n\n ans = \n{}".format( res[2], x_grad))
def test_while_loop_backward(self): def cond(i, x): return layers.less_than(i, eleven) def body(i, x): x = layers.elementwise_mul(x=i, y=i) i = layers.increment(i) return [i, x] main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): i = fluid.data(name='i', shape=[1], dtype='float32') i.stop_gradient = False eleven = layers.fill_constant(shape=[1], dtype='float32', value=11) one = layers.fill_constant(shape=[1], dtype='float32', value=1) x = fluid.data(name='x', shape=[1], dtype='float32') x.stop_gradient = False out = layers.while_loop(cond, body, [i, x]) mean = layers.mean(out[1]) append_backward(mean) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) feed_i = np.ones(1).astype('float32') feed_x = np.ones(1).astype('float32') data = np.asarray([100]).astype('float32') i_grad = np.asarray([110]).astype('float32') res = exe.run(main_program, feed={ 'i': feed_i, 'x': feed_x }, fetch_list=[mean.name, i.grad_name]) self.assertTrue(np.allclose(np.asarray(res[0]), data)) self.assertTrue(np.allclose(np.asarray(res[1]), i_grad), msg=" \nres = \n{} \n\n ans = \n{}".format( res[1], i_grad))
def test_var_tuple(self): def cond(i): return layers.less_than(i, ten) def body(i): return layers.elementwise_add(x=i, y=one) main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): i = layers.fill_constant(shape=[1], dtype='int64', value=0) one = layers.fill_constant(shape=[1], dtype='int64', value=1) ten = layers.fill_constant(shape=[1], dtype='int64', value=10) out = layers.while_loop(cond, body, (i, )) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, fetch_list=out) self.assertTrue( np.allclose(np.asarray(res[0]), np.full((1), 10, np.int64)))
def test_with_switch_case(self): def cond(i): return layers.less_than(i, ten) def body(i): def fn_add_three(): data_add_three = layers.elementwise_add(x=i, y=three) return data_add_three def fn_square(): data_mul_data = layers.elementwise_mul(x=i, y=i) return data_mul_data def fn_add_one(): data_add_one = layers.elementwise_add(x=i, y=one) return data_add_one return layers.switch_case(branch_index=i, branch_fns={ 2: fn_add_three, 5: fn_square }, default=fn_add_one) main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): i = layers.fill_constant(shape=[1], dtype='int64', value=1) ten = layers.fill_constant(shape=[1], dtype='int64', value=10) three = layers.fill_constant(shape=[1], dtype='int64', value=3) one = layers.fill_constant(shape=[1], dtype='int64', value=1) out = layers.while_loop(cond, body, [i]) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) res = exe.run(main_program, fetch_list=out) data = np.asarray([25]).astype('int64') self.assertTrue(np.allclose(np.asarray(res[0]), data))
def external_body(i, j, x, mem_array): def internal_cond(j, x, mem_array): return layers.less_than(j, array_len2) def internal_body(j, x, mem_array): inner_data = layers.array_read(array=data_array, i=j) inner_prev = layers.array_read(array=mem_array, i=j) inner_sum_0 = layers.elementwise_add(x=inner_data, y=inner_prev) inner_sum_1 = layers.elementwise_add(x=x, y=inner_sum_0) j = layers.increment(x=j, in_place=True) layers.array_write(inner_sum_1, i=j, array=mem_array) return [j, x, mem_array] outer_data = layers.array_read(array=data_array, i=i) outer_prev = layers.array_read(array=mem_array, i=i) outer_sum_0 = layers.elementwise_add(x=outer_data, y=outer_prev) outer_sum_1 = layers.elementwise_add(x=x, y=outer_sum_0) i = layers.increment(x=i, in_place=True) layers.array_write(outer_sum_1, i=i, array=mem_array) j, x, mem_array = layers.while_loop(internal_cond, internal_body, [j, x, mem_array]) return [i, j, x, mem_array]
def test_ignore_var(self): def cond(i, ten, temp, y): return i < ten def body_func(i, ten, batch_info, origin_seq): print(batch_info) batch_info = fluid.contrib.layers.shuffle_batch(batch_info) print(batch_info) i = i + 1 return [i, ten, batch_info, origin_seq] x = fluid.layers.data(name='x', shape=[-1, 1, 4]) y = fluid.layers.data(name='y', shape=[-1, 1, 1]) temp = layers.concat(input=[x, y], axis=-1) i = layers.fill_constant(shape=[1], value=0, dtype='int32') num = layers.fill_constant(shape=[1], value=5, dtype='int32') i, ten, shuffle_temp, y = layers.while_loop(cond, body_func, [i, num, temp, y]) output = shuffle_temp exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) input_x = numpy.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]]) input_x = input_x.reshape(3, 1, 4) input_y = numpy.array([[10], [12], [33]]) input_y = input_y.reshape(3, 1, 1) res, = exe.run(fluid.default_main_program(), feed={ 'x': input_x, 'y': input_y }, fetch_list=[output]) self.assertListEqual(list(res.shape), [3, 1, 5])
def type_error_cond(): out = layers.while_loop(data, body, [data_1d])
def value_error_body_returns_error_length(): out = layers.while_loop(cond_returns_bool_tensor, body_returns_error_length, [data])
def value_error_body_returns_error_type(): out = layers.while_loop(cond_receives_two_args, body_returns_error_type, [data, ten])
def type_error_cond_returns_not_boolean(): out = layers.while_loop(cond_returns_not_bool_tensor, body, [data_1d])
def type_error_shape_cond_returns_2d(): out = layers.while_loop(cond_returns_2d_tensor, body, [data_2d])
def type_error_cond_returns_not_variable(): out = layers.while_loop(cond_returns_constant, body, [data_1d])
def value_error_loop_vars(): out = layers.while_loop(cond_returns_bool_tensor, body, [])
def type_error_loop_vars(): out = layers.while_loop(cond_returns_bool_tensor, body, data_1d)
def beam_search(): max_len = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=max_out_len, force_cpu=True) step_idx = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=0, force_cpu=True) # array states will be stored for each step. ids = layers.array_write(layers.reshape(start_tokens, (-1, 1)), step_idx) scores = layers.array_write(init_scores, step_idx) # cell states will be overwrited at each step. # caches contains states of history steps in decoder self-attention # and static encoder output projections in encoder-decoder attention # to reduce redundant computation. caches = [ { "k": # for self attention layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, n_head, 0, d_key], dtype=enc_output.dtype, value=0), "v": # for self attention layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, n_head, 0, d_value], dtype=enc_output.dtype, value=0), "static_k": # for encoder-decoder attention fluid.data( shape=[None, n_head, 0, d_key], dtype=enc_output.dtype, name=("static_k_%d" % i)), "static_v": # for encoder-decoder attention fluid.data( shape=[None, n_head, 0, d_value], dtype=enc_output.dtype, name=("static_v_%d" % i)), } for i in range(n_layer) ] def cond_func(step_idx, selected_ids, selected_scores, gather_idx, caches, trg_src_attn_bias): length_cond = layers.less_than(x=step_idx, y=max_len) finish_cond = layers.logical_not(layers.is_empty(x=selected_ids)) return layers.logical_and(x=length_cond, y=finish_cond) def body_func(step_idx, pre_ids, pre_scores, gather_idx, caches, trg_src_attn_bias): # gather cell states corresponding to selected parent pre_caches = map_structure( lambda x: layers.gather(x, index=gather_idx), caches) pre_src_attn_bias = layers.gather(trg_src_attn_bias, index=gather_idx) pre_pos = layers.elementwise_mul( x=layers.fill_constant_batch_size_like( input=pre_src_attn_bias, # cann't use lod tensor here value=1, shape=[-1, 1], dtype=pre_ids.dtype), y=step_idx, axis=0) logits = wrap_decoder((pre_ids, pre_pos, None, pre_src_attn_bias), trg_vocab_size, max_in_len, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd, weight_sharing, enc_output=enc_output, caches=pre_caches, bos_idx=bos_idx) # intra-beam topK topk_scores, topk_indices = layers.topk( input=layers.softmax(logits), k=beam_size) accu_scores = layers.elementwise_add(x=layers.log(topk_scores), y=pre_scores, axis=0) # beam_search op uses lod to differentiate branches. accu_scores = layers.lod_reset(accu_scores, pre_ids) # topK reduction across beams, also contain special handle of # end beams and end sentences(batch reduction) selected_ids, selected_scores, gather_idx = layers.beam_search( pre_ids=pre_ids, pre_scores=pre_scores, ids=topk_indices, scores=accu_scores, beam_size=beam_size, end_id=eos_idx, return_parent_idx=True) step_idx = layers.increment(x=step_idx, value=1.0, in_place=False) layers.array_write(selected_ids, i=step_idx, array=ids) layers.array_write(selected_scores, i=step_idx, array=scores) return (step_idx, selected_ids, selected_scores, gather_idx, pre_caches, pre_src_attn_bias) _ = layers.while_loop(cond=cond_func, body=body_func, loop_vars=[ step_idx, start_tokens, init_scores, parent_idx, caches, trg_src_attn_bias ], is_test=True) finished_ids, finished_scores = layers.beam_search_decode( ids, scores, beam_size=beam_size, end_id=eos_idx) return finished_ids, finished_scores
def test_nested_net_with_backward_and_lodtensor(self): def external_cond(i, j, x, mem_array): return layers.less_than(i, array_len) def external_body(i, j, x, mem_array): def internal_cond(j, x, mem_array): return layers.less_than(j, array_len2) def internal_body(j, x, mem_array): inner_data = layers.array_read(array=data_array, i=j) inner_prev = layers.array_read(array=mem_array, i=j) inner_sum_0 = layers.elementwise_add(x=inner_data, y=inner_prev) inner_sum_1 = layers.elementwise_add(x=x, y=inner_sum_0) j = layers.increment(x=j, in_place=True) layers.array_write(inner_sum_1, i=j, array=mem_array) return [j, x, mem_array] outer_data = layers.array_read(array=data_array, i=i) outer_prev = layers.array_read(array=mem_array, i=i) outer_sum_0 = layers.elementwise_add(x=outer_data, y=outer_prev) outer_sum_1 = layers.elementwise_add(x=x, y=outer_sum_0) i = layers.increment(x=i, in_place=True) layers.array_write(outer_sum_1, i=i, array=mem_array) j, x, mem_array = layers.while_loop(internal_cond, internal_body, [j, x, mem_array]) return [i, j, x, mem_array] main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): d0 = fluid.data(name='d0', shape=[10], dtype='float32') d1 = fluid.data(name='d1', shape=[10], dtype='float32') d2 = fluid.data(name='d2', shape=[10], dtype='float32') x = fluid.data(name='x', shape=[10], dtype='float32') x.stop_gradient = False i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') mem_array = layers.array_write(x=init, i=i) data_array = layers.array_write(x=d0, i=i) i = layers.increment(i) layers.array_write(d1, i, array=data_array) i = layers.increment(i) layers.array_write(d2, i, array=data_array) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True array_len = layers.fill_constant(shape=[1], dtype='int64', value=1) j = layers.fill_constant(shape=[1], dtype='int64', value=1) j.stop_gradient = True array_len2 = layers.fill_constant(shape=[1], dtype='int64', value=3) out = layers.while_loop(external_cond, external_body, [i, j, x, mem_array]) sum_result = layers.array_read(array=mem_array, i=j) mean = layers.mean(sum_result) append_backward(mean) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) d = [] for i in range(3): d.append(np.random.random(size=[10]).astype('float32')) feed_x = np.ones(10).astype('float32') data_sum = d[0] + d[1] + d[2] + 3 * feed_x x_grad = [0.3] * 10 res = exe.run(main_program, feed={ 'd0': d[0], 'd1': d[1], 'd2': d[2], 'x': feed_x }, fetch_list=[sum_result.name, x.grad_name]) self.assertTrue(np.allclose(res[0], data_sum)) self.assertTrue(np.allclose(res[1], x_grad))
def type_error_body(): out = layers.while_loop(cond_returns_bool_tensor, data, [data_1d])
import paddle.fluid as fluid import paddle.fluid.layers as layers def cond(i, ten): return i < ten def body(i, dummy): i = i + 1 return i, dummy i = layers.fill_constant(shape=[1], dtype='int64', value=0) ten = layers.fill_constant(shape=[1], dtype='int64', value=10) out, ten = layers.while_loop(cond=cond, body=body, loop_vars=[i, ten]) exe = fluid.Executor(fluid.CPUPlace()) res = exe.run(fluid.default_main_program(), feed={}, fetch_list=out) print(res)