def check_switch(self, value): x = layers.fill_constant(shape=[1], dtype='float32', value=value) zero_var = layers.fill_constant(shape=[1], dtype='float32', value=0.0) one_var = layers.fill_constant(shape=[1], dtype='float32', value=1.0) two_var = layers.fill_constant(shape=[1], dtype='float32', value=2.0) three_var = layers.fill_constant(shape=[1], dtype='float32', value=3.0) result = layers.create_global_var( shape=[1], value=-1.0, dtype='float32', persistable=True) with layers.Switch() as switch: with switch.case(layers.less_than(x, zero_var)): layers.assign(zero_var, result) with switch.case(layers.less_than(x, one_var)): layers.assign(one_var, result) with switch.case(layers.less_than(x, two_var)): layers.assign(two_var, result) with switch.default(): layers.assign(three_var, result) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) out = exe.run(feed={}, fetch_list=[result])[0][0] return out
def test_select(self): with framework.program_guard(framework.Program()): ch1 = fluid.make_channel( dtype=core.VarDesc.VarType.LOD_TENSOR, capacity=1) result1 = self._create_tensor('return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.FP64) input_value = fill_constant( shape=[1], dtype=core.VarDesc.VarType.FP64, value=10) with fluid.Select() as select: with select.case(fluid.channel_send, ch1, input_value): # Execute something. pass with select.default(): pass # This should not block because we are using a buffered channel. result1, status = fluid.channel_recv(ch1, result1) fluid.channel_close(ch1) cpu = core.CPUPlace() exe = Executor(cpu) result = exe.run(fetch_list=[result1]) self.assertEqual(result[0][0], 10)
def test_array_length(self): tmp = layers.zeros(shape=[10], dtype='int32') i = layers.fill_constant(shape=[1], dtype='int64', value=10) arr = layers.array_write(tmp, i=i) arr_len = layers.array_length(arr) cpu = core.CPUPlace() exe = Executor(cpu) result = exe.run(fetch_list=[arr_len])[0] self.assertEqual(11, result[0])
def net_profiler(self, state, profile_path='/tmp/profile'): enable_if_gpu = state == 'GPU' or state == "All" if enable_if_gpu and not core.is_compiled_with_cuda(): return startup_program = fluid.Program() main_program = fluid.Program() with fluid.program_guard(main_program, startup_program): image = fluid.layers.data(name='x', shape=[784], dtype='float32') hidden1 = fluid.layers.fc(input=image, size=64, act='relu') i = layers.zeros(shape=[1], dtype='int64') counter = fluid.layers.zeros( shape=[1], dtype='int64', force_cpu=True) until = layers.fill_constant([1], dtype='int64', value=10) data_arr = layers.array_write(hidden1, i) cond = fluid.layers.less_than(x=counter, y=until) while_op = fluid.layers.While(cond=cond) with while_op.block(): hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu') layers.array_write(hidden_n, i, data_arr) fluid.layers.increment(x=counter, value=1, in_place=True) layers.less_than(x=counter, y=until, cond=cond) hidden_n = layers.array_read(data_arr, i) hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu') predict = fluid.layers.fc(input=hidden2, size=10, act='softmax') label = fluid.layers.data(name='y', shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) batch_size = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy( input=predict, label=label, total=batch_size) optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9) opts = optimizer.minimize(avg_cost, startup_program=startup_program) place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) pass_acc_calculator = fluid.average.WeightedAverage() with profiler.profiler(state, 'total', profile_path) as prof: for iter in range(10): if iter == 2: profiler.reset_profiler() x = np.random.random((32, 784)).astype("float32") y = np.random.randint(0, 10, (32, 1)).astype("int64") outs = exe.run(main_program, feed={'x': x, 'y': y}, fetch_list=[avg_cost, batch_acc, batch_size]) acc = np.array(outs[1]) b_size = np.array(outs[2]) pass_acc_calculator.add(value=acc, weight=b_size) pass_acc = pass_acc_calculator.eval()
def test_simple_forward(self): d0 = layers.data( "d0", shape=[10], append_batch_size=False, dtype='float32') d1 = layers.data( "d1", shape=[10], append_batch_size=False, dtype='float32') d2 = layers.data( "d2", shape=[10], append_batch_size=False, dtype='float32') i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') mem_array = layers.array_write(x=init, i=i) data_array = layers.array_write(x=d0, i=i) i = layers.increment(i) layers.array_write(d1, i, array=data_array) i = layers.increment(i) layers.array_write(d2, i, array=data_array) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True array_len = layers.fill_constant(shape=[1], dtype='int64', value=3) array_len.stop_gradient = True cond = layers.less_than(x=i, y=array_len) while_op = layers.While(cond=cond) with while_op.block(): d = layers.array_read(array=data_array, i=i) prev = layers.array_read(array=mem_array, i=i) result = layers.sums(input=[d, prev]) i = layers.increment(x=i, in_place=True) layers.array_write(result, i=i, array=mem_array) layers.less_than(x=i, y=array_len, cond=cond) sum_result = layers.array_read(array=mem_array, i=i) loss = layers.mean(sum_result) append_backward(loss) cpu = core.CPUPlace() exe = Executor(cpu) d = [] for i in xrange(3): d.append(numpy.random.random(size=[10]).astype('float32')) outs = exe.run(feed={'d0': d[0], 'd1': d[1], 'd2': d[2]}, fetch_list=[sum_result]) self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)
def test_error(self): startup_program = Program() main_program = Program() use_cuda = core.is_compiled_with_cuda() with program_guard(main_program, startup_program): def fn_1(opt, avg_loss): opt.minimize(avg_loss) def fn_2(opt, avg_loss): opt.minimize(avg_loss) x = fluid.layers.data("X", [10], 'float32') hidden = layers.fc(x, 5) avg_loss = layers.mean(hidden) adam = optimizer.Adam(learning_rate=LR) sgd = optimizer.SGD(learning_rate=LR) cond = layers.fill_constant([1], 'bool', True) layers.case([(cond, lambda: fn_1(adam, avg_loss))], lambda: fn_2(sgd, avg_loss)) cpu_place = fluid.CPUPlace() cuda_place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() for place in [cpu_place, cuda_place]: exe = fluid.Executor(place) exe.run(startup_program) np.random.seed(SEED) os.environ['CPU_NUM'] = str(2) pe_exe = fluid.ParallelExecutor(use_cuda=use_cuda, main_program=main_program, loss_name=avg_loss.name) num_devices = pe_exe.device_count def not_implemented_error(): pe_exe.run(feed={ 'X': np.random.random(size=[64, 10]).astype('float32'), }, fetch_list=[avg_loss.name]) if num_devices > 1: self.assertRaises(NotImplementedError, not_implemented_error) else: not_implemented_error()
def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([2, 2], dtype, 2) tmp_0 = layers.elementwise_add(self.feed_vars[0], self.feed_vars[1]) tmp_1 = layers.fill_constant(shape=[2, 2], dtype=dtype, value=2.0) tmp_2 = layers.scale( tmp_1, scale=3.0, bias=1.0, bias_after_scale=True) tmp_3 = layers.elementwise_mul(tmp_2, tmp_0) self.append_gradients(tmp_3) self.num_fused_ops = 1 self.fetch_list = [tmp_2, self.grad(tmp_0)]
def get_model(self, main_prog, startup_program, rank): with fluid.program_guard(main_prog, startup_program): tindata = layers.data( name="tindata", shape=[10, 1000], dtype='float64', append_batch_size=False) toutdata = layers.fill_constant( shape=[5, 1000], dtype='float64', value=1.0) tensor_list = None if rank == 1: tensor_list = paddle.split(tindata, 2, axis=0) paddle.distributed.scatter(toutdata, tensor_list, src=1) return [toutdata]
def forward(self, features): src_ids, sent_ids, qid = features zero = L.fill_constant([1], dtype='int64', value=0) input_mask = L.cast(L.logical_not(L.equal(src_ids, zero)), 'float32') # assume pad id == 0 #input_mask = L.unsqueeze(input_mask, axes=[2]) d_shape = L.shape(src_ids) seqlen = d_shape[1] batch_size = d_shape[0] pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0]) pos_ids = L.expand(pos_ids, [batch_size, 1]) pos_ids = L.unsqueeze(pos_ids, axes=[2]) pos_ids = L.cast(pos_ids, 'int64') pos_ids.stop_gradient = True input_mask.stop_gradient = True task_ids = L.zeros_like( src_ids) + self.hparam.task_id #this shit wont use at the moment task_ids.stop_gradient = True ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=self.hparam, use_fp16=self.hparam['use_fp16']) cls_feats = ernie.get_pooled_output() cls_feats = L.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = L.fc( input=cls_feats, size=self.hparam['num_label'], param_attr=F.ParamAttr( name="cls_out_w", initializer=F.initializer.TruncatedNormal(scale=0.02)), bias_attr=F.ParamAttr(name="cls_out_b", initializer=F.initializer.Constant(0.))) propeller.summary.histogram('pred', logits) if self.mode is propeller.RunMode.PREDICT: probs = L.softmax(logits) return qid, probs else: return qid, logits
def __init__(self, filters, filter_size, padding, forget_bias=1.0, name="conv_lstm_2d_uint"): self.filters = filters self.filter_size = filter_size self.padding = padding self.forget_bias = layers.fill_constant([1], dtype='float32', value=forget_bias) self.forget_bias.stop_gradient = False
def build_program(self): def true_func(): return layers.fill_constant(shape=[1, 2], dtype='int32', value=1), layers.fill_constant( shape=[2, 3], dtype='bool', value=True) def false_func(): return layers.fill_constant(shape=[3, 4], dtype='float32', value=3), layers.fill_constant( shape=[4, 5], dtype='int64', value=2) main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): x = layers.fill_constant(shape=[1], dtype='float32', value=0.1) y = layers.fill_constant(shape=[1], dtype='float32', value=0.23) pred = layers.less_than(x, y) out = layers.cond(pred, true_func, false_func) # out is a tuple containing 2 tensors return main_program, startup_program, out
def sequence_slice(x, index): #offset = layers.fill_constant(shape=[1, args.batch_size], value=index, dtype='float32') #length = layers.fill_constant(shape=[1, args.batch_size], value=1, dtype='float32') #return layers.sequence_slice(x, offset, length) idx = layers.fill_constant(shape=[1], value=1, dtype='int32') idx.stop_gradient = True from paddle.fluid.layers.control_flow import lod_rank_table from paddle.fluid.layers.control_flow import lod_tensor_to_array from paddle.fluid.layers.control_flow import array_read from paddle.fluid.layers.control_flow import array_to_lod_tensor table = lod_rank_table(x, level=0) table.stop_gradient = True array = lod_tensor_to_array(x, table) slice_array = array_read(array=array, i=idx) return array_to_lod_tensor(slice_array, table)
def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 64], dtype, 5) one = layers.fill_constant(shape=[1], dtype=dtype, value=1.0) tmp_0 = one * self.feed_vars[0] # subgraph with 9 op nodes tmp_1 = tmp_0 * layers.sigmoid(self.feed_vars[1]) + layers.sigmoid( self.feed_vars[2]) * layers.tanh(self.feed_vars[3]) tmp_2 = layers.tanh(tmp_1) + layers.sigmoid(self.feed_vars[4]) self.append_gradients(tmp_2) self.num_fused_ops = 2 self.fetch_list = [tmp_2, self.grad(tmp_0)]
def _create_gmr_type_tensor(self, type_id): """create grammar type tensor with shape=[batch_size * beam_size] Args: type_id (TYPE): NULL Returns: Variable shape = [batch_size * beam_size] dtype = int64 Raises: NULL """ shape = [self._batch_size, self._beam_size] output = layers.fill_constant(shape=shape, value=type_id, dtype='int64') return self.merge_batch_beams(output)
def pad_sequence_paddle(sequences, padding_value=0): """Fill sequences(variable) into a fixed-length matrix""" max_size = sequences[0].shape trailing_dims = max_size[1:] max_len = max([s.shape[0] for s in sequences]) out_tensor = [] for tensor in sequences: length = tensor.shape[0] pad_tensor = layers.concat((tensor, layers.fill_constant( (max_len - length, *trailing_dims), dtype=tensor.dtype, value=padding_value))) out_tensor.append(pad_tensor) out_tensor = layers.stack(out_tensor) return out_tensor
def fn_1(x=1): out = layers.switch_case(branch_index=layers.fill_constant( shape=[1], dtype='int32', value=x), branch_fns={ 1: partial(layers.fill_constant, shape=[1], dtype='int32', value=1), x: partial(layers.fill_constant, shape=[2], dtype='int32', value=x) }) return out
def __init__(self, x, y, y_aux, cfg): self.program = fluid.default_main_program().clone() with fluid.program_guard(self.program): model = ACGAN(cfg.latent_size, cfg.num_classes) self.fake, self.aux = model.network_d(x, name='d') self.fake_loss = layers.sigmoid_cross_entropy_with_logits( x=self.fake, label=y) self.aux_loss = layers.softmax_with_cross_entropy(logits=self.aux, label=y_aux) self.unweighted_loss = layers.reduce_sum(self.fake_loss + self.aux_loss) self.infer_program = self.program.clone(for_test=True) # we don't want the discriminator to also maximize the classification # accuracy of the auxiliary classifier on generated images, so we # don't train discriminator to produce class labels for generated # images (see https://openreview.net/forum?id=rJXTf9Bxg). # To preserve sum of sample weights for the auxiliary classifier, # we assign sample weight of 2 to the real images. fake_loss_weight = layers.ones(shape=[cfg.batch_size * 2, 1], dtype='float32') aux_loss_weight_zeros = layers.zeros(shape=[cfg.batch_size, 1], dtype='float32') aux_loss_weight_twos = layers.fill_constant( shape=[cfg.batch_size, 1], value=2.0, dtype='float32') aux_loss_weight = layers.concat( [aux_loss_weight_twos, aux_loss_weight_zeros]) self.fake_loss = layers.elementwise_mul(self.fake_loss, fake_loss_weight) self.aux_loss = layers.elementwise_mul(self.aux_loss, aux_loss_weight) self.loss = layers.reduce_sum(self.fake_loss) + layers.reduce_sum( self.aux_loss) vars = [] for var in self.program.list_vars(): if fluid.io.is_parameter(var) and (var.name.startswith("d")): vars.append(var.name) optimizer = fluid.optimizer.Adam(learning_rate=cfg.adam_lr, beta1=cfg.adam_beta_1, name="net_d") optimizer.minimize(self.loss, parameter_list=vars)
def __init__(self, beam_size, batch_size, alpha, vocab_size, hidden_size): self.beam_size = beam_size self.batch_size = batch_size self.alpha = alpha self.vocab_size = vocab_size self.hidden_size = hidden_size self.gather_top2k_append_index = layers.range(0, 2 * self.batch_size * beam_size, 1, 'int64') // \ (2 * self.beam_size) * (self.beam_size) self.gather_topk_append_index = layers.range(0, self.batch_size * beam_size, 1, 'int64') // \ self.beam_size * (2 * self.beam_size) self.gather_finish_topk_append_index = layers.range(0, self.batch_size * beam_size, 1, 'int64') // \ self.beam_size * (3 * self.beam_size) self.eos_id = layers.fill_constant([self.batch_size, 2 * self.beam_size], 'int64', value=1) self.get_alive_index = layers.range(0, self.batch_size, 1, 'int64') * self.beam_size
def __init__(self, init_loss_scale=2**15, increment_every=2000, factor=2.): super(DynamicLossScale, self).__init__() self.scale = layers.create_global_var( name=unique_name.generate("loss_scale"), shape=[1], value=init_loss_scale, dtype='float32', persistable=True) self.good_steps = layers.create_global_var( name=unique_name.generate("good_steps"), shape=[1], value=0, dtype='int32', persistable=True) self.increment_every = layers.fill_constant( shape=[1], dtype='int32', value=increment_every) self.factor = factor
def test_optimizer_in_case(self): BATCH_SIZE = 1 INPUT_SIZE = 784 EPOCH_NUM = 2 x = fluid.data(name='x', shape=[BATCH_SIZE, INPUT_SIZE], dtype='float32') y = fluid.data(name='y', shape=[BATCH_SIZE, INPUT_SIZE], dtype='float32') switch_id = fluid.data(name='switch_id', shape=[1], dtype='int32') one = layers.fill_constant(shape=[1], dtype='int32', value=1) adam = optimizer.Adam(learning_rate=0.001) adagrad = optimizer.Adagrad(learning_rate=0.001) def fn_1(): sum = layers.elementwise_mul(x, y) loss = layers.mean(sum, name="f_1_loss") adam.minimize(loss) def fn_2(): sum = layers.elementwise_mul(x, y) loss = layers.mean(sum, name="f_2_loss") adagrad.minimize(loss) layers.case(pred_fn_pairs=[(switch_id == one, fn_1)], default=fn_2) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) for epoch in range(EPOCH_NUM): np.random.seed(epoch) feed_image = np.random.random( size=[BATCH_SIZE, INPUT_SIZE]).astype('float32') main_program = fluid.default_main_program() out = exe.run(main_program, feed={ 'x': feed_image, 'y': feed_image, 'switch_id': np.array([epoch]).astype('int32') }, fetch_list=[])
def build_program(self, compile_program=True): startup_program = fluid.Program() main_program = fluid.Program() with fluid.program_guard(main_program, startup_program): image = fluid.layers.data(name='x', shape=[784], dtype='float32') hidden1 = fluid.layers.fc(input=image, size=64, act='relu') i = layers.zeros(shape=[1], dtype='int64') counter = fluid.layers.zeros(shape=[1], dtype='int64', force_cpu=True) until = layers.fill_constant([1], dtype='int64', value=10) data_arr = layers.array_write(hidden1, i) cond = fluid.layers.less_than(x=counter, y=until) while_op = fluid.layers.While(cond=cond) with while_op.block(): hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu') layers.array_write(hidden_n, i, data_arr) fluid.layers.increment(x=counter, value=1, in_place=True) layers.less_than(x=counter, y=until, cond=cond) hidden_n = layers.array_read(data_arr, i) hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu') predict = fluid.layers.fc(input=hidden2, size=10, act='softmax') label = fluid.layers.data(name='y', shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) batch_size = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=predict, label=label, total=batch_size) optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9) opts = optimizer.minimize(avg_cost, startup_program=startup_program) if compile_program: # TODO(luotao): profiler tool may have bug with multi-thread parallel executor. # https://github.com/PaddlePaddle/Paddle/pull/25200#issuecomment-650483092 exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 train_program = fluid.compiler.CompiledProgram( main_program).with_data_parallel(loss_name=avg_cost.name, exec_strategy=exec_strategy) else: train_program = main_program return train_program, startup_program, avg_cost, batch_size, batch_acc
def grow_finished(self, i, finished_seq, finished_scores, finished_flags, curr_seq, curr_scores, curr_finished): """ grow_finished """ finished_seq = layers.concat([finished_seq, layers.fill_constant([self.batch_size, self.beam_size, 1], dtype='int64', value=0)], axis=2) curr_scores = curr_scores + (1.0 - layers.cast(curr_finished, 'int64')) * -INF curr_finished_seq = layers.concat([finished_seq, curr_seq], axis=1) curr_finished_scores = layers.concat([finished_scores, curr_scores], axis=1) curr_finished_flags = layers.concat([finished_flags, curr_finished], axis=1) return self.compute_topk_scores_and_seq(curr_finished_seq, curr_finished_scores, curr_finished_scores, curr_finished_flags, pick_finish=True)
def test_input_type_error(self): main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): src = layers.data(name='data', shape=[1], dtype='float32') const_value = layers.fill_constant( [1], dtype='float32', value=123.0) ifcond = layers.less_than(x=src, y=const_value) with self.assertRaises(TypeError): ie = layers.IfElse(set()) with self.assertRaises(TypeError): ie = layers.IfElse(ifcond, set()) with self.assertRaises(TypeError): ie = layers.IfElse(ifcond) with ie.true_block(): true_target = ie.input(src) true_target = fluid.layers.exp(true_target) ie.output([])
def test_ping_pong(self): """ Mimics Ping Pong example: https://gobyexample.com/channel-directions """ with framework.program_guard(framework.Program()): result = self._create_tensor('return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.FP64) ping_result = self._create_tensor('ping_return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.FP64) def ping(ch, message): fluid.channel_send(ch, message, is_copy=True) def pong(ch1, ch2): fluid.channel_recv(ch1, ping_result) fluid.channel_send(ch2, ping_result, is_copy=True) pings = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR, capacity=1) pongs = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR, capacity=1) msg = fill_constant(shape=[1], dtype=core.VarDesc.VarType.FP64, value=9) ping(pings, msg) pong(pings, pongs) fluid.channel_recv(pongs, result) fluid.channel_close(pings) fluid.channel_close(pongs) cpu = core.CPUPlace() exe = Executor(cpu) exe_result = exe.run(fetch_list=[result]) self.assertEqual(exe_result[0][0], 9)
def _dygraph_clip_by_global_norm(self, params_grads): params_and_grads = [] sum_square_list = [] for p, g in params_grads: if g is None: continue if self._need_clip_func is not None and not self._need_clip_func( p): continue merge_grad = g if g.type == core.VarDesc.VarType.SELECTED_ROWS: merge_grad = layers.merge_selected_rows(g) merge_grad = layers.get_tensor_from_selected_rows(merge_grad) square = layers.square(merge_grad) sum_square = layers.reduce_sum(square) sum_square_list.append(sum_square) # all parameters have been filterd out if len(sum_square_list) == 0: return params_grads global_norm_var = layers.concat(sum_square_list) global_norm_var = layers.reduce_sum(global_norm_var) global_norm_var = layers.sqrt(global_norm_var) max_global_norm = layers.fill_constant(shape=[1], dtype='float32', value=self.clip_norm) clip_var = layers.elementwise_div(x=max_global_norm, y=layers.elementwise_max( x=global_norm_var, y=max_global_norm)) for p, g in params_grads: if g is None: continue if self._need_clip_func is not None and not self._need_clip_func( p): params_and_grads.append((p, g)) continue new_grad = layers.elementwise_mul(x=g, y=clip_var) params_and_grads.append((p, new_grad)) return params_and_grads
def test_pad2d(self): program = Program() with program_guard(program): input = layers.data(name="input", shape=[3, 100, 100], dtype="float32") paddings = layers.fill_constant(shape=[4], dtype='int32', value=1) out = layers.pad2d(input, paddings=[1, 2, 3, 4], mode='reflect', data_format='NCHW', name="shape") out_1 = layers.pad2d(input, paddings=paddings, mode='reflect', data_format='NCHW', name="shape") self.assertIsNotNone(out) self.assertIsNotNone(out_1) print(str(program))
def forward(self, features): src_ids, sent_ids, input_seqlen = features zero = L.fill_constant([1], dtype='int64', value=0) input_mask = L.cast(L.equal(src_ids, zero), 'float32') # assume pad id == 0 #input_mask = L.unsqueeze(input_mask, axes=[2]) d_shape = L.shape(src_ids) seqlen = d_shape[1] batch_size = d_shape[0] pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0]) pos_ids = L.expand(pos_ids, [batch_size, 1]) pos_ids = L.unsqueeze(pos_ids, axes=[2]) pos_ids = L.cast(pos_ids, 'int64') pos_ids.stop_gradient = True input_mask.stop_gradient = True task_ids = L.zeros_like( src_ids) + self.hparam.task_id #this shit wont use at the moment task_ids.stop_gradient = True model = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=self.hparam, use_fp16=self.hparam['use_fp16']) enc_out = model.get_sequence_output() logits = L.fc( input=enc_out, size=self.num_label, num_flatten_dims=2, param_attr=F.ParamAttr( name="cls_seq_label_out_w", initializer=F.initializer.TruncatedNormal(scale=0.02)), bias_attr=F.ParamAttr(name="cls_seq_label_out_b", initializer=F.initializer.Constant(0.))) propeller.summary.histogram('pred', logits) return logits, input_seqlen
def test_ping_pong(self): """ Mimics Ping Pong example: https://gobyexample.com/channel-directions """ with framework.program_guard(framework.Program()): result = self._create_tensor('return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.FP64) ping_result = self._create_tensor('ping_return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.FP64) def ping(ch, message): fluid.channel_send(ch, message, is_copy=True) def pong(ch1, ch2): fluid.channel_recv(ch1, ping_result) fluid.channel_send(ch2, ping_result, is_copy=True) pings = fluid.make_channel( dtype=core.VarDesc.VarType.LOD_TENSOR, capacity=1) pongs = fluid.make_channel( dtype=core.VarDesc.VarType.LOD_TENSOR, capacity=1) msg = fill_constant( shape=[1], dtype=core.VarDesc.VarType.FP64, value=9) ping(pings, msg) pong(pings, pongs) fluid.channel_recv(pongs, result) fluid.channel_close(pings) fluid.channel_close(pongs) cpu = core.CPUPlace() exe = Executor(cpu) exe_result = exe.run(fetch_list=[result]) self.assertEqual(exe_result[0][0], 9)
def fibonacci(channel, quit_channel): while_op = While(cond=while_cond) with while_op.block(): result2 = fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=0) with fluid.Select() as select: with select.case( fluid.channel_send, channel, x, is_copy=True): assign(input=x, output=x_tmp) assign(input=y, output=x) assign(elementwise_add(x=x_tmp, y=y), output=y) with select.case(fluid.channel_recv, quit_channel, result2): # Quit helper = layer_helper.LayerHelper('assign') helper.append_op( type='assign', inputs={'X': [while_false]}, outputs={'Out': [while_cond]})
def __init__(self, name_scope, hidden_size, param_attr=None, bias_attr=None, gate_activation=None, activation=None, forget_bias=1.0, dtype='float32'): super(BasicLSTMUnit, self).__init__(name_scope, dtype) self._name = name_scope self._hiden_size = hidden_size self._param_attr = param_attr self._bias_attr = bias_attr self._gate_activation = gate_activation or layers.sigmoid self._activation = activation or layers.tanh self._forget_bias = layers.fill_constant( [1], dtype=dtype, value=forget_bias) self._forget_bias.stop_gradient = False self._dtype = dtype
def test_return_var_tuple(self): def fn_1(): return layers.fill_constant(shape=[1, 2], dtype='int32', value=1), layers.fill_constant( shape=[2, 3], dtype='float32', value=2) def fn_2(): return layers.fill_constant(shape=[3, 4], dtype='int32', value=3), layers.fill_constant( shape=[4, 5], dtype='float32', value=4) def fn_3(): return layers.fill_constant(shape=[5], dtype='int32', value=5), layers.fill_constant( shape=[5, 6], dtype='float32', value=6) main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): index_1 = layers.fill_constant(shape=[1], dtype='int32', value=1) out = layers.switch_case(index_1, ((1, fn_1), (2, fn_2)), fn_3) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=out) self.assertTrue( np.allclose(np.asarray(ret[0]), np.full((1, 2), 1, np.int32))) self.assertTrue( np.allclose(np.asarray(ret[1]), np.full((2, 3), 2, np.float32)))
def increment(cls, x, value, in_place=False): """increment each element in x by value Args: x (Variable): NULL value (int/float): NULL in_place (TYPE): Default is False Returns: TODO Raises: NULL """ if len(x.shape) == 1 and x.shape[0] == 1: return layers.increment(x, value, in_place) value_tensor = layers.fill_constant(shape=[1], dtype=x.dtype, value=value) y = layers.elementwise_add(x, value_tensor) if in_place: y = layers.assign(y, x) return x else: return y
def graph_norm(gw, feature): """Implementation of graph normalization Reference Paper: BENCHMARKING GRAPH NEURAL NETWORKS Each node features is divied by sqrt(num_nodes) per graphs. Args: gw: Graph wrapper object (:code:`StaticGraphWrapper` or :code:`GraphWrapper`) feature: A tensor with shape (num_nodes, hidden_size) Return: A tensor with shape (num_nodes, hidden_size) """ nodes = L.fill_constant([gw.num_nodes, 1], dtype="float32", value=1.0) norm = graph_pooling(gw, nodes, pool_type="sum") norm = L.sqrt(norm) feature_lod = op.nested_lod_reset(feature, gw.graph_lod) norm = L.sequence_expand_as(norm, feature_lod) norm.stop_gradient = True return feature_lod / norm
def grow_finished(finished_seq, finished_scores, finished_flag, curr_seq, curr_scores, curr_finished): finished_seq = layers.concat([ finished_seq, layers.fill_constant( [beam_size, 1], dtype='int64', value=1) ], axis=1) curr_scores += (1.0 - curr_finished) * -INF #layers.Print( curr_scores, message="curr scores") curr_finished_seq = layers.concat([finished_seq, curr_seq], axis=0) curr_finished_scores = layers.concat( [finished_scores, curr_scores], axis=0) curr_finished_flags = layers.concat( [finished_flag, curr_finished], axis=0) return compute_topk_scores_and_seq(curr_finished_seq, curr_finished_scores, curr_finished_scores, curr_finished_flags, beam_size)
def test_simple_routine(self): ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) # Create LOD_TENSOR<INT64> and put it into the scope. This placeholder # variable will be filled in and returned by fluid.channel_recv result = self._create_tensor('return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.INT64) with fluid.Go(): input_value = fill_constant( shape=[1], dtype=core.VarDesc.VarType.FP64, value=1234) fluid.channel_send(ch, input_value) result, status = fluid.channel_recv(ch, result) fluid.channel_close(ch) cpu = core.CPUPlace() exe = Executor(cpu) outs = exe.run(fetch_list=[result]) self.assertEqual(outs[0], 1234)
def test_return_var_tuple(self): """ pseudocode: if True: return 1, True else: return 3, 2 """ def true_func(): return layers.fill_constant(shape=[1, 2], dtype='int32', value=1), layers.fill_constant( shape=[2, 3], dtype='bool', value=True) def false_func(): return layers.fill_constant(shape=[3, 4], dtype='float32', value=3), layers.fill_constant( shape=[4, 5], dtype='int64', value=2) main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): pred = layers.fill_constant(shape=[1], dtype='bool', value=True) out = layers.cond(pred, true_func, false_func) # out is a tuple containing 2 tensors place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=out) self.assertTrue( np.allclose(np.asarray(ret[0]), np.full((1, 2), 1, np.int32))) self.assertTrue( np.allclose(np.asarray(ret[1]), np.full((2, 3), True, np.bool)))
def test_pass_and_modify_var(self): """ pseudocode: for i in range(5): a = 7 if i % 2 == 0: a = a * (i + 1) else: a = a - (i - 1) """ def true_func(a, i): a = a * (i + 1) return a def false_func(a, i): a = a - (i - 1) return a main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): a = layers.fill_constant(shape=[3, 2, 1], dtype='int32', value=7) i = fluid.data(name="i", shape=[1], dtype='int32') pred = ((i % 2) == 0) a = layers.cond(pred, lambda: true_func(a, i), lambda: false_func(a, i)) place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) for feed_i in range(5): expected_a = 7 * (feed_i + 1) if feed_i % 2 == 0 else 8 - feed_i ret = exe.run(main_program, feed={'i': np.full((1), feed_i, np.int32)}, fetch_list=[a]) self.assertTrue( np.allclose( np.asarray(ret), np.full((3, 2, 1), expected_a, np.int32)))
def create_coalesce_program(grad_dict): coalesce_program = fluid.Program() in_vars = [] out_vars = [] with fluid.program_guard(coalesce_program): grad_out_dict = {} for name in grad_dict: grad = grad_dict[name] grad_in = layers.fill_constant(shape=grad.shape, dtype='float32', value=1) grad_out = layers.create_global_var(name='output_' + grad.name, shape=grad.shape, value=0, dtype='float32', persistable=True) in_vars.append(grad_in) out_vars.append(grad_out) grad_out_dict[name] = grad_out grad_fused = layers.create_global_var(name='fused_output', shape=[1], value=0, dtype='float32', persistable=True) coalesce_program.global_block().append_op(type='coalesce_tensor', inputs={'Input': in_vars}, outputs={ 'Output': out_vars, 'FusedOutput': grad_fused }, attrs={ 'copy_data': False, 'dtype': core.VarDesc.VarType.FP32 }) fused_shape = layers.shape(grad_fused) return coalesce_program, grad_out_dict, grad_fused, fused_shape
def _create_one_dim_tensor(self, value): one_dim_tensor = fill_constant(shape=[1], dtype='int', value=value) one_dim_tensor.stop_gradient = True return one_dim_tensor
def decode(context, is_sparse): init_state = context array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length) counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True) # fill the first element with init_state state_array = pd.create_array('float32') pd.array_write(init_state, array=state_array, i=counter) # ids, scores as memory ids_array = pd.create_array('int64') scores_array = pd.create_array('float32') init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2) init_scores = pd.data( name="init_scores", shape=[1], dtype="float32", lod_level=2) pd.array_write(init_ids, array=ids_array, i=counter) pd.array_write(init_scores, array=scores_array, i=counter) cond = pd.less_than(x=counter, y=array_len) while_op = pd.While(cond=cond) with while_op.block(): pre_ids = pd.array_read(array=ids_array, i=counter) pre_state = pd.array_read(array=state_array, i=counter) pre_score = pd.array_read(array=scores_array, i=counter) # expand the lod of pre_state to be the same with pre_score pre_state_expanded = pd.sequence_expand(pre_state, pre_score) pre_ids_emb = pd.embedding( input=pre_ids, size=[dict_size, word_dim], dtype='float32', is_sparse=is_sparse) # use rnn unit to update rnn current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb], size=decoder_size, act='tanh') current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score) # use score to do beam search current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') topk_scores, topk_indices = pd.topk(current_score, k=topk_size) selected_ids, selected_scores = pd.beam_search( pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) pd.increment(x=counter, value=1, in_place=True) # update the memories pd.array_write(current_state, array=state_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) pd.less_than(x=counter, y=array_len, cond=cond) translation_ids, translation_scores = pd.beam_search_decode( ids=ids_array, scores=scores_array) # return init_ids, init_scores return translation_ids, translation_scores
def test_fibonacci(self): """ Mimics Fibonacci Go example: https://tour.golang.org/concurrency/5 """ with framework.program_guard(framework.Program()): quit_ch_input_var = self._create_persistable_tensor( 'quit_ch_input', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.INT32) quit_ch_input = fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=0, out=quit_ch_input_var) result = self._create_persistable_tensor( 'result', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.INT32) fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=0, out=result) x = fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=0) y = fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=1) while_cond = fill_constant( shape=[1], dtype=core.VarDesc.VarType.BOOL, value=True) while_false = fill_constant( shape=[1], dtype=core.VarDesc.VarType.BOOL, value=False) x_tmp = fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=0) def fibonacci(channel, quit_channel): while_op = While(cond=while_cond) with while_op.block(): result2 = fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=0) with fluid.Select() as select: with select.case( fluid.channel_send, channel, x, is_copy=True): assign(input=x, output=x_tmp) assign(input=y, output=x) assign(elementwise_add(x=x_tmp, y=y), output=y) with select.case(fluid.channel_recv, quit_channel, result2): # Quit helper = layer_helper.LayerHelper('assign') helper.append_op( type='assign', inputs={'X': [while_false]}, outputs={'Out': [while_cond]}) ch1 = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) quit_ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) with fluid.Go(): for i in xrange(10): fluid.channel_recv(ch1, result) Print(result) fluid.channel_send(quit_ch, quit_ch_input) fibonacci(ch1, quit_ch) fluid.channel_close(ch1) fluid.channel_close(quit_ch) cpu = core.CPUPlace() exe = Executor(cpu) exe_result = exe.run(fetch_list=[result]) self.assertEqual(exe_result[0][0], 34)