def _apply_weight_decay(self, params_grads): """Apply weight decay.""" for p, g in params_grads: if not self.pat.match(p.name): with p.block.program._optimized_guard([p, g]): layers.assign(p * (1. - self.wd * self._learning_rate), p) return
def __init__(self, layer, param_name="weight", dim=0, power=2): super(WeightNormWrapper, self).__init__() self.param_name = param_name self.dim = dim self.power = power self.layer = layer w_v = param_name + "_v" w_g = param_name + "_g" # we could also use numpy to compute this, after all, it is run only once # at initialization. original_weight = getattr(layer, param_name) self.add_parameter( w_v, self.create_parameter(shape=original_weight.shape, dtype=original_weight.dtype)) with dg.no_grad(): F.assign(original_weight, getattr(self, w_v)) delattr(layer, param_name) temp = norm_except(getattr(self, w_v), self.dim, self.power) self.add_parameter( w_g, self.create_parameter(shape=temp.shape, dtype=temp.dtype)) with dg.no_grad(): F.assign(temp, getattr(self, w_g)) # also set this when setting up setattr( self.layer, self.param_name, compute_weight(getattr(self, w_v), getattr(self, w_g), self.dim, self.power)) self.weigth_norm_applied = True
def test_categorical_name(self): name = 'test_categorical' categorical1 = Categorical([0.4, 0.6], name=name) self.assertEqual(categorical1.name, name) categorical2 = Categorical([0.5, 0.5]) self.assertEqual(categorical2.name, 'Categorical') paddle.enable_static() sample = categorical1.sample([2]) self.assertEqual(self.get_prefix(sample.name), name + '_sample') entropy = categorical1.entropy() self.assertEqual(self.get_prefix(entropy.name), name + '_entropy') kl = categorical1.kl_divergence(categorical2) self.assertEqual(self.get_prefix(kl.name), name + '_kl_divergence') value_npdata = np.array([0], dtype="int64") value_tensor = layers.create_tensor(dtype="int64") layers.assign(value_npdata, value_tensor) p = categorical1.probs(value_tensor) self.assertEqual(self.get_prefix(p.name), name + '_probs') lp = categorical1.log_prob(value_tensor) self.assertEqual(self.get_prefix(lp.name), name + '_log_prob')
def test_normal_name(self): name = 'test_normal' normal1 = Normal(0.0, 1.0, name=name) self.assertEqual(normal1.name, name) normal2 = Normal(0.0, 1.0) self.assertEqual(normal2.name, 'Normal') paddle.enable_static() sample = normal1.sample([2]) self.assertEqual(self.get_prefix(sample.name), name + '_sample') entropy = normal1.entropy() self.assertEqual(self.get_prefix(entropy.name), name + '_entropy') value_npdata = np.array([0.8], dtype="float32") value_tensor = layers.create_tensor(dtype="float32") layers.assign(value_npdata, value_tensor) lp = normal1.log_prob(value_tensor) self.assertEqual(self.get_prefix(lp.name), name + '_log_prob') p = normal1.probs(value_tensor) self.assertEqual(self.get_prefix(p.name), name + '_probs') kl = normal1.kl_divergence(normal2) self.assertEqual(self.get_prefix(kl.name), name + '_kl_divergence')
def not_test_raw_api(self): prog = Program() startup_prog = Program() with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant(shape=[1], dtype='int64', value=5) cond = layers.less_than(x=label, y=limit) true_image, false_image = split_lod_tensor(input=image, mask=cond) true_out = layers.create_tensor(dtype='float32') true_cond = ConditionalBlock([cond]) with true_cond.block(): hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=true_out) false_out = layers.create_tensor(dtype='float32') false_cond = ConditionalBlock([cond]) with false_cond.block(): hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=false_out) prob = merge_lod_tensor( in_true=true_out, in_false=false_out, mask=cond, x=image) loss = layers.cross_entropy(input=prob, label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=10) place = core.CPUPlace() exe = Executor(place) exe.run(startup_prog) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array([x[0] for x in data]).astype("float32") y_data = np.array([x[1] for x in data]).astype("int64") y_data = np.expand_dims(y_data, axis=1) outs = exe.run(prog, feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) print(outs[0]) if outs[0] < 1.0: return self.assertFalse(True)
def __call__(self, batch_C_prime, I_r_size): C = self.build_C() P = self.build_P(I_r_size) inv_delta_C = self.build_inv_delta_C(C).astype('float32') P_hat = self.build_P_hat(C, P).astype('float32') inv_delta_C_tensor = layers.create_tensor(dtype='float32') layers.assign(inv_delta_C, inv_delta_C_tensor) inv_delta_C_tensor.stop_gradient = True P_hat_tensor = layers.create_tensor(dtype='float32') layers.assign(P_hat, P_hat_tensor) P_hat_tensor.stop_gradient = True batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime) # batch_C_ex_part_tensor = create_tmp_var( # fluid.default_main_program(), # name='batch_C_ex_part_tensor', # dtype='float32', shape=[-1, 3, 2]) # layers.py_func(func=get_batch_C_expand, # x=[batch_C_prime], out=[batch_C_ex_part_tensor]) batch_C_ex_part_tensor.stop_gradient = True batch_C_prime_with_zeros = layers.concat( [batch_C_prime, batch_C_ex_part_tensor], axis=1) batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros) batch_P_prime = layers.matmul(P_hat_tensor, batch_T) return batch_P_prime
def _run_paddle_pop(array, *args): if len(args) == 0: idx = -1 else: idx = args[0] assert isinstance(idx, int) def cond(i, new_array): return less_than(i, arr_len) def body(i, new_array): item = array_read(array=array, i=i) array_write(item, array_length(new_array), new_array) i = increment(i) return i, new_array arr_len = array_length(array) if idx < 0: idx = idx + arr_len else: idx = fill_constant(shape=[1], dtype="int64", value=idx) pop_item = array_read(array, idx) new_array = _slice_tensor_array(array, 0, idx) i = idx + 1 _, new_array = while_loop(cond, body, [i, new_array]) assign(input=new_array, output=array) return pop_item
def test_forward(self): data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = layers.ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') with cond.block(): hidden = layers.fc(input=data, size=10) layers.assign(hidden, out) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) x = numpy.random.random(size=(10, 1)).astype('float32') outs = exe.run(feed={'X': x}, fetch_list=[out])[0] print outs loss = layers.mean(out) append_backward(loss=loss) outs = exe.run( feed={'X': x}, fetch_list=[ default_main_program().block(0).var(data.name + "@GRAD") ])[0] print outs
def test_forward(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') with cond.block(): hidden = layers.fc(input=data, size=10) layers.assign(hidden, out) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(startup_program) x = np.random.random(size=(10, 1)).astype('float32') outs = exe.run(main_program, feed={'X': x}, fetch_list=[out])[0] print(outs) loss = layers.mean(out) append_backward(loss=loss) outs = exe.run( main_program, feed={'X': x}, fetch_list=[main_program.block(0).var(data.name + "@GRAD")])[0] print(outs)
def test_uniform_name(self): name = 'test_uniform' uniform1 = Uniform(0.0, 1.0, name=name) self.assertEqual(uniform1.name, name) uniform2 = Uniform(0.0, 1.0) self.assertEqual(uniform2.name, 'Uniform') paddle.enable_static() sample = uniform1.sample([2]) self.assertEqual(self.get_prefix(sample.name), name + '_sample') entropy = uniform1.entropy() self.assertEqual(self.get_prefix(entropy.name), name + '_entropy') value_npdata = np.array([0.8], dtype="float32") value_tensor = layers.create_tensor(dtype="float32") layers.assign(value_npdata, value_tensor) lp = uniform1.log_prob(value_tensor) self.assertEqual(self.get_prefix(lp.name), name + '_log_prob') p = uniform1.probs(value_tensor) self.assertEqual(self.get_prefix(p.name), name + '_probs')
def batch_scatter(ref, indices, updates, in_place=False, overwrite=False): """Scatter updates to ref, according to corrensponding index in indices in each batch. Currently, it only support 2d Tensor. Args: ref (Variable): with shape [batch_size, ...] indices (Variable): with shape [batch_size, 1] updates (Variable): with shape [batch_size] in_place (bool): if True, scatter result will be assign to ref. otherwise, a new Tensor will be returned. Default is False. overwrite (bool): if True, scatter will over write corrensponding elements. Default is False. Returns: TODO Raises: NULL Examples: ref [[1, 1, 1], [1, 1, 1]] indices [[2], [1]] updates [2, 3] return [[1, 1, 2], [1, 3, 1]] """ ref_dtype = ref.dtype if ref_dtype not in PaddleVarType.floats: ref_in = layers.cast(ref, dtype='float32') else: ref_in = ref if updates.dtype != ref_in.dtype: updates = layers.cast(updates, dtype=ref_in.dtype) batch_size = layers.cast(layers.shape(ref_in)[0], dtype=indices.dtype) zero = layers.fill_constant(shape=[1], dtype=indices.dtype, value=0) one = layers.fill_constant(shape=[1], dtype=indices.dtype, value=1) batch_indices = layers.unsqueeze( layers.range(zero, batch_size, one, dtype=indices.dtype), [1]) coord = layers.concat([batch_indices, indices], axis=1) if overwrite: mask = layers.gather_nd(ref_in, coord) mask = layers.elementwise_sub(layers.zeros_like(mask), mask) ref_in = layers.scatter_nd_add(ref_in, coord, mask) output = layers.scatter_nd_add(ref_in, coord, updates) if ref_dtype not in PaddleVarType.floats: output = layers.cast(output, dtype=ref_dtype) if in_place: layers.assign(output, ref) return ref else: return output
def test_fetch_var(self): val = numpy.array([1, 3, 5]).astype(numpy.int32) x = layers.create_tensor(dtype="int32", persistable=True, name="x") layers.assign(input=val, output=x) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_main_program(), feed={}, fetch_list=[]) fetched_x = fluid.fetch_var("x") self.assertTrue( numpy.array_equal(fetched_x, val), "fetch_x=%s val=%s" % (fetched_x, val)) self.assertEqual(fetched_x.dtype, val.dtype)
def grammar_decode_wrapper(self, fn, decoder, condition, step_gmr_info, gmr_stack_info, finished, outputs_info): """wrapper of grammar decoding, while process each grammar branch Args: fn (TYPE): NULL decoder (TYPE): NULL condition (TYPE): NULL step_gmr_info (TYPE): NULL gmr_stack_info (TYPE): NULL finished (TYPE): NULL outputs_info (TYPE): NULL Returns: TODO Raises: NULL """ condition = layers.utils.map_structure(self.merge_batch_beams, condition) ## 用 new_xx 保存中间结果,以便处理结束后 assign 回原变量 new_step_gmr_info = layers.utils.map_structure(self.merge_batch_beams, step_gmr_info) new_gmr_stack_info = layers.utils.map_structure( self.merge_batch_beams, gmr_stack_info) new_finished = layers.utils.map_structure(self.merge_batch_beams, finished) new_outputs_info = layers.utils.map_structure(self.merge_batch_beams, outputs_info) new_step_gmr_info, new_gmr_stack_info, new_finished, new_outputs_info = \ fn(decoder, condition, new_step_gmr_info, new_gmr_stack_info, new_finished, new_outputs_info) new_step_gmr_info = layers.utils.map_structure(self.split_batch_beams, new_step_gmr_info) new_gmr_stack_info = layers.utils.map_structure( self.split_batch_beams, new_gmr_stack_info) new_finished = layers.utils.map_structure(self.split_batch_beams, new_finished) new_outputs_info = layers.utils.map_structure(self.split_batch_beams, new_outputs_info) # 计算结果 assign 回原变量 layers.utils.map_structure(lambda x, y: layers.assign(x, y), new_step_gmr_info, step_gmr_info) layers.utils.map_structure(lambda x, y: layers.assign(x, y), new_gmr_stack_info, gmr_stack_info) layers.utils.map_structure(lambda x, y: layers.assign(x, y), new_finished, finished) layers.utils.map_structure(lambda x, y: layers.assign(x, y), new_outputs_info, outputs_info) return step_gmr_info, gmr_stack_info, finished, outputs_info
def test_sequence_slice(self): program = Program() with program_guard(program): import numpy as np seqs = layers.data( name='x', shape=[10, 5], dtype='float32', lod_level=1) offset = layers.assign(input=np.array([[0, 1]]).astype('int32')) length = layers.assign(input=np.array([[2, 1]]).astype('int32')) out = layers.sequence_slice( input=seqs, offset=offset, length=length) self.assertIsNotNone(out) print(str(program))
def test_assign(self): val = ( -100 + 200 * numpy.random.random(size=(2, 5))).astype(numpy.int32) x = layers.create_tensor(dtype="float32") layers.assign(input=val, output=x) exe = fluid.Executor(fluid.CPUPlace()) fetched_x = exe.run(fluid.default_main_program(), feed={}, fetch_list=[x])[0] self.assertTrue( numpy.array_equal(fetched_x, val), "fetch_x=%s val=%s" % (fetched_x, val)) self.assertEqual(fetched_x.dtype, val.dtype)
def check_switch(self, value): x = layers.fill_constant(shape=[1], dtype='float32', value=value) zero_var = layers.fill_constant(shape=[1], dtype='float32', value=0.0) one_var = layers.fill_constant(shape=[1], dtype='float32', value=1.0) two_var = layers.fill_constant(shape=[1], dtype='float32', value=2.0) three_var = layers.fill_constant(shape=[1], dtype='float32', value=3.0) result = layers.create_global_var( shape=[1], value=-1.0, dtype='float32', persistable=True) with layers.Switch() as switch: with switch.case(layers.less_than(x, zero_var)): layers.assign(zero_var, result) with switch.case(layers.less_than(x, one_var)): layers.assign(one_var, result) with switch.case(layers.less_than(x, two_var)): layers.assign(two_var, result) with switch.default(): layers.assign(three_var, result) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) out = exe.run(feed={}, fetch_list=[result])[0][0] return out
def increment(self): enough_steps = layers.less_than(self.increment_every, self.good_steps + 1) with layers.Switch() as switch: with switch.case(enough_steps): new_scale = self.scale * self.factor scale_valid = layers.isfinite(new_scale) with layers.Switch() as switch2: with switch2.case(scale_valid): layers.assign(new_scale, self.scale) layers.assign(layers.zeros_like(self.good_steps), self.good_steps) with switch2.default(): layers.increment(self.good_steps) with switch.default(): layers.increment(self.good_steps)
def fibonacci(channel, quit_channel): while_op = While(cond=while_cond) with while_op.block(): result2 = fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=0) with fluid.Select() as select: with select.case( fluid.channel_send, channel, x, is_copy=True): assign(input=x, output=x_tmp) assign(input=y, output=x) assign(elementwise_add(x=x_tmp, y=y), output=y) with select.case(fluid.channel_recv, quit_channel, result2): # Quit helper = layer_helper.LayerHelper('assign') helper.append_op( type='assign', inputs={'X': [while_false]}, outputs={'Out': [while_cond]})
def test_raw_api(self): prog = Program() startup_prog = Program() with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant_batch_size_like( input=label, dtype='int64', shape=[1], value=5.0) cond = layers.less_than(x=label, y=limit) true_image, false_image = layers.split_lod_tensor( input=image, mask=cond) true_out = layers.create_tensor(dtype='float32') true_cond = layers.ConditionalBlock([true_image]) with true_cond.block(): hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=true_out) false_out = layers.create_tensor(dtype='float32') false_cond = layers.ConditionalBlock([false_image]) with false_cond.block(): hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=false_out) prob = layers.merge_lod_tensor( in_true=true_out, in_false=false_out, mask=cond, x=image) loss = layers.cross_entropy(input=prob, label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=200) place = core.CPUPlace() exe = Executor(place) exe.run(startup_prog) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.expand_dims(y_data, axis=1) outs = exe.run(prog, feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) print outs[0] if outs[0] < 1.0: return self.assertFalse(True)