def main(self, tensor, expect_array, expect_lod, expect_max_len, level=0): place = self.place() program = Program() with program_guard(program): x = layers.data(name='x', shape=[10]) x.persistable = True table = layers.lod_rank_table(x, level=level) max_len = layers.max_sequence_len(table) max_len.persistable = True array = layers.lod_tensor_to_array(x, table) array.persistable = True result = layers.array_to_lod_tensor(array, table) result.persistable = True exe = Executor(place) scope = core.Scope() exe.run(program, feed={'x': tensor}, scope=scope) var = scope.find_var(array.name) array = var.get_lod_tensor_array() if expect_array is not None and expect_lod is not None: self.check_array_same(array, expect_array, expect_lod) self.check_tensor_same( scope.find_var(result.name).get_tensor(), tensor) self.assertEqual( numpy.array(scope.find_var(max_len.name).get_tensor())[0], expect_max_len)
def not_test_raw_api(self): prog = Program() startup_prog = Program() with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant(shape=[1], dtype='int64', value=5) cond = layers.less_than(x=label, y=limit) true_image, false_image = split_lod_tensor(input=image, mask=cond) true_out = layers.create_tensor(dtype='float32') true_cond = ConditionalBlock([cond]) with true_cond.block(): hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=true_out) false_out = layers.create_tensor(dtype='float32') false_cond = ConditionalBlock([cond]) with false_cond.block(): hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=false_out) prob = merge_lod_tensor( in_true=true_out, in_false=false_out, mask=cond, x=image) loss = layers.cross_entropy(input=prob, label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=10) place = core.CPUPlace() exe = Executor(place) exe.run(startup_prog) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array([x[0] for x in data]).astype("float32") y_data = np.array([x[1] for x in data]).astype("int64") y_data = np.expand_dims(y_data, axis=1) outs = exe.run(prog, feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) print(outs[0]) if outs[0] < 1.0: return self.assertFalse(True)
def closure(**kwargs): role = kwargs['role'] pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port)) #init_op = fluid.default_main_program().global_block().ops[0] #_insert_init_op(program, init_op) executor = Executor(place) executor.run() outs = executor.run(prog, feed=feed_dict, fetch_list=fetch_list, return_numpy=False) # append lod information in last position lod = [] for idx in range(len(fetch_list)): return_results[idx].append(np.array(outs[idx])) lod_i = outs[idx].lod() lod_concat = [] for i in lod_i: lod_concat.append(i) lod.append(lod_concat) return_results[len(fetch_list)].append(lod)
def closure(**kwargs): role = kwargs['role'] pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port)) loss = append_loss_ops(block, output_names) param_grad_list = append_backward(loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set) inputs = self._get_inputs(block) feed_dict = self.feed_var(inputs, place) fetch_list = [g for p, g in param_grad_list] executor = Executor(place) executor.run() outs = executor.run(prog, feed=feed_dict, fetch_list=fetch_list, return_numpy=False) # append lod information in last position lod = [] for idx in range(fetch_list_len): return_results[idx].append(np.array(outs[idx])) lod_i = outs[idx].lod() lod_concat = [] for i in lod_i: lod_concat.append(i) lod.append(lod_concat) return_results[fetch_list_len].append(lod)
def main(self, tensor, expect_array, expect_lod, expect_max_len, level=0): place = self.place() program = Program() with program_guard(program): x = layers.data(name='x', shape=[10]) x.persistable = True table = layers.lod_rank_table(x, level=level) max_len = layers.max_sequence_len(table) max_len.persistable = True array = layers.lod_tensor_to_array(x, table) array.persistable = True result = layers.array_to_lod_tensor(array, table) result.persistable = True exe = Executor(place) scope = core.Scope() exe.run(program, feed={'x': tensor}, scope=scope) var = scope.find_var(array.name) array = var.get_lod_tensor_array() if expect_array is not None and expect_lod is not None: self.check_array_same(array, expect_array, expect_lod) self.check_tensor_same(scope.find_var(result.name).get_tensor(), tensor) self.assertEqual( numpy.array(scope.find_var(max_len.name).get_tensor())[0], expect_max_len)
def test_forward(self): data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = layers.ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') with cond.block(): hidden = layers.fc(input=data, size=10) layers.assign(hidden, out) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) x = numpy.random.random(size=(10, 1)).astype('float32') outs = exe.run(feed={'X': x}, fetch_list=[out])[0] print outs loss = layers.mean(out) append_backward(loss=loss) outs = exe.run( feed={'X': x}, fetch_list=[ default_main_program().block(0).var(data.name + "@GRAD") ])[0] print outs
def load(): """ load model for predict """ config = model_config() config.vocab_size = len(open(config.vocab_path).readlines()) final_score, final_ids, final_index = knowledge_seq2seq(config) final_score.persistable = True final_ids.persistable = True final_index.persistable = True main_program = fluid.default_main_program() if config.use_gpu: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) fluid.io.load_params(executor=exe, dirname=config.model_path, main_program=main_program) processors = KnowledgeCorpus( data_dir=config.data_dir, data_prefix=config.data_prefix, vocab_path=config.vocab_path, min_len=config.min_len, max_len=config.max_len) # load dict id_dict_array = load_id2str_dict(config.vocab_path) model_handle = [exe, place, final_score, final_ids, final_index, processors, id_dict_array] return model_handle
def train(config): """ model training """ config.vocab_size = len(open(config.vocab_path).readlines()) bow_loss, kl_loss, nll_loss, final_loss= knowledge_seq2seq(config) bow_loss.persistable = True kl_loss.persistable = True nll_loss.persistable = True final_loss.persistable = True main_program = fluid.default_main_program() inference_program = fluid.default_main_program().clone(for_test=True) fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=config.grad_clip)) optimizer = fluid.optimizer.Adam(learning_rate=config.lr) if config.stage == 0: print("stage 0") optimizer.minimize(bow_loss) else: print("stage 1") optimizer.minimize(final_loss) opt_var_name_list = optimizer.get_opti_var_name_list() if config.use_gpu: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) param_list = main_program.block(0).all_parameters() param_name_list = [p.name for p in param_list] init_model(config, param_name_list, place) processors = KnowledgeCorpus( data_dir=config.data_dir, data_prefix=config.data_prefix, vocab_path=config.vocab_path, min_len=config.min_len, max_len=config.max_len) train_generator = processors.data_generator( batch_size=config.batch_size, phase="train", shuffle=True) valid_generator = processors.data_generator( batch_size=config.batch_size, phase="dev", shuffle=False) model_handle = [exe, place, bow_loss, kl_loss, nll_loss, final_loss] train_loop(config, train_generator, valid_generator, main_program, inference_program, model_handle, param_name_list, opt_var_name_list)
def test_forward(self): data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') with cond.block(): hidden = layers.fc(input=data, size=10) layers.assign(hidden, out) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) x = numpy.random.random(size=(10, 1)).astype('float32') outs = exe.run(feed={'X': x}, fetch_list=[out])[0] print(outs) loss = layers.mean(out) append_backward(loss=loss) outs = exe.run(feed={'X': x}, fetch_list=[ default_main_program().block(0).var(data.name + "@GRAD") ])[0] print(outs)
def check_switch(self, value): x = layers.fill_constant(shape=[1], dtype='float32', value=value) zero_var = layers.fill_constant(shape=[1], dtype='float32', value=0.0) one_var = layers.fill_constant(shape=[1], dtype='float32', value=1.0) two_var = layers.fill_constant(shape=[1], dtype='float32', value=2.0) three_var = layers.fill_constant(shape=[1], dtype='float32', value=3.0) result = layers.create_global_var( shape=[1], value=-1.0, dtype='float32', persistable=True) with layers.Switch() as switch: with switch.case(layers.less_than(x, zero_var)): layers.assign(zero_var, result) with switch.case(layers.less_than(x, one_var)): layers.assign(one_var, result) with switch.case(layers.less_than(x, two_var)): layers.assign(two_var, result) with switch.default(): layers.assign(three_var, result) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) out = exe.run(feed={}, fetch_list=[result])[0][0] return out
def check_switch(self, value): x = layers.fill_constant(shape=[1], dtype='float32', value=value) zero_var = layers.fill_constant(shape=[1], dtype='float32', value=0.0) one_var = layers.fill_constant(shape=[1], dtype='float32', value=1.0) two_var = layers.fill_constant(shape=[1], dtype='float32', value=2.0) three_var = layers.fill_constant(shape=[1], dtype='float32', value=3.0) result = layers.create_global_var(shape=[1], value=-1.0, dtype='float32', persistable=True) with layers.Switch() as switch: with switch.case(layers.less_than(x, zero_var)): layers.assign(zero_var, result) with switch.case(layers.less_than(x, one_var)): layers.assign(one_var, result) with switch.case(layers.less_than(x, two_var)): layers.assign(two_var, result) with switch.default(): layers.assign(three_var, result) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) out = exe.run(feed={}, fetch_list=[result])[0][0] return out
def test_ifelse(self): prog = Program() startup_prog = Program() with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant_batch_size_like(input=label, dtype='int64', shape=[1], value=5.0) cond = layers.less_than(x=label, y=limit) ie = layers.IfElse(cond) with ie.true_block(): true_image = ie.input(image) hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') ie.output(prob) with ie.false_block(): false_image = ie.input(image) hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') ie.output(prob) prob = ie() loss = layers.cross_entropy(input=prob[0], label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=200) place = core.CPUPlace() exe = Executor(place) exe.run(kwargs['startup_program']) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape((y_data.shape[0], 1)) outs = exe.run(kwargs['main_program'], feed={ 'x': x_data, 'y': y_data }, fetch_list=[avg_loss]) print outs[0] if outs[0] < 1.0: return self.assertFalse(True)
def test_ifelse(self): prog = Program() startup_prog = Program() with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant_batch_size_like( input=label, dtype='int64', shape=[1], value=5.0) cond = layers.less_than(x=label, y=limit) ie = layers.IfElse(cond) with ie.true_block(): true_image = ie.input(image) hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') ie.output(prob) with ie.false_block(): false_image = ie.input(image) hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') ie.output(prob) prob = ie() loss = layers.cross_entropy(input=prob[0], label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=200) place = core.CPUPlace() exe = Executor(place) exe.run(kwargs['startup_program']) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape((y_data.shape[0], 1)) outs = exe.run(kwargs['main_program'], feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) print outs[0] if outs[0] < 1.0: return self.assertFalse(True)
def test_run(self): inputs = fluid.data(name='inputs', shape=[None, self.input_size], dtype='float32') pre_hidden = fluid.data(name='pre_hidden', shape=[None, self.hidden_size], dtype='float32') pre_cell = fluid.data(name='pre_cell', shape=[None, self.hidden_size], dtype='float32') cell = LSTMCell(self.hidden_size) lstm_hidden_new, lstm_states_new = cell(inputs, [pre_hidden, pre_cell]) lstm_unit = contrib.layers.rnn_impl.BasicLSTMUnit( "basicLSTM", self.hidden_size, None, None, None, None, 1.0, "float32") lstm_hidden, lstm_cell = lstm_unit(inputs, pre_hidden, pre_cell) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) inputs_np = np.random.uniform( -0.1, 0.1, (self.batch_size, self.input_size)).astype('float32') pre_hidden_np = np.random.uniform( -0.1, 0.1, (self.batch_size, self.hidden_size)).astype('float32') pre_cell_np = np.random.uniform( -0.1, 0.1, (self.batch_size, self.hidden_size)).astype('float32') param_names = [[ "LSTMCell/BasicLSTMUnit_0.w_0", "basicLSTM/BasicLSTMUnit_0.w_0" ], ["LSTMCell/BasicLSTMUnit_0.b_0", "basicLSTM/BasicLSTMUnit_0.b_0"]] for names in param_names: param = np.array(fluid.global_scope().find_var( names[0]).get_tensor()) param = np.random.uniform(-0.1, 0.1, size=param.shape).astype('float32') fluid.global_scope().find_var(names[0]).get_tensor().set( param, place) fluid.global_scope().find_var(names[1]).get_tensor().set( param, place) out = exe.run(feed={ 'inputs': inputs_np, 'pre_hidden': pre_hidden_np, 'pre_cell': pre_cell_np }, fetch_list=[lstm_hidden_new, lstm_hidden]) self.assertTrue(np.allclose(out[0], out[1], rtol=1e-4, atol=0))
def infer(): args = parse_args() num_layers = args.num_layers src_vocab_size = args.vocab_size tar_vocab_size = args.vocab_size batch_size = args.batch_size init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size attr_init = args.attr_init latent_size = 32 if args.enable_ce: fluid.default_main_program().random_seed = 102 framework.default_startup_program().random_seed = 102 model = VAE(hidden_size, latent_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, attr_init=attr_init) beam_size = args.beam_size trans_res = model.build_graph(mode='sampling', beam_size=beam_size) # clone from default main program and use it as the validation program main_program = fluid.default_main_program() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) dir_name = args.reload_model print("dir name", dir_name) dir_name = os.path.join(dir_name, "checkpoint") fluid.load(main_program, dir_name, exe) vocab, tar_id2vocab = get_vocab(args.dataset_prefix) infer_output = np.ones((batch_size, 1), dtype='int64') * BOS_ID fetch_outs = exe.run(feed={'tar': infer_output}, fetch_list=[trans_res.name], use_program_cache=False) with io.open(args.infer_output_file, 'w', encoding='utf-8') as out_file: for line in fetch_outs[0]: end_id = -1 if EOS_ID in line: end_id = np.where(line == EOS_ID)[0][0] new_line = [tar_id2vocab[e[0]] for e in line[1:end_id]] out_file.write(space_tok.join(new_line)) out_file.write(line_tok)
def main(): rnn_out = encoder_decoder() label = layers.data(name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) cost = layers.cross_entropy(input=rnn_out, label=label) avg_cost = fluid.layers.mean(cost) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer.minimize(avg_cost) # fluid.memory_optimize(fluid.default_main_program()) fluid.release_memory(fluid.default_main_program()) # fix the order of training data train_data = paddle.batch(paddle.dataset.wmt14.train(dict_size), batch_size=batch_size) # train_data = paddle.batch( # paddle.reader.shuffle( # paddle.dataset.wmt14.train(dict_size), buf_size=1000), # batch_size=batch_size) place = core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) feed_order = [ 'src_word_id', 'target_language_word', 'target_language_next_word' ] feed_list = [ fluid.default_main_program().global_block().var(var_name) for var_name in feed_order ] feeder = fluid.DataFeeder(feed_list, place) batch_id = 0 for pass_id in xrange(10): for data in train_data(): outs = exe.run(fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost]) avg_cost_val = np.array(outs[0]) print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + " avg_cost=" + str(avg_cost_val)) if batch_id > 2: exit(0) if math.isnan(float(avg_cost_val)): sys.exit("got NaN loss, training failed.") batch_id += 1
def test_shape_check_save_load(self): program_1 = fluid.Program() startup_program_1 = fluid.Program() with fluid.program_guard(program_1, startup_program_1): input = fluid.layers.data(name="x", shape=[-1, 10], dtype='float32') out = fluid.layers.fc(input, 20) place = fluid.CPUPlace() exe = Executor(place) exe.run(startup_program_1) fluid.io.save_params(exe, self.model_path, main_program=program_1) fluid.io.load_params(exe, self.model_path, main_program=program_1)
def main(args): # construct the sample input, output, data_size = construct_sample(args) # construct the train program train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): seq2seq_model = SeqModel(args.seq_num, args.batch_size, args.hidden_size) ret_dict = seq2seq_model.build_graph() val_program = train_program.clone() with fluid.program_guard(train_program, startup_program): optimizer = fluid.optimizer.Adam(args.lr) optimizer.minimize(ret_dict.loss) places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places() train_loader = fluid.io.DataLoader.from_generator( feed_list=ret_dict.feed_list, capacity=3, iterable=True) train_loader.set_batch_generator(train_reader(input, output, data_size, args.batch_size), places=places) exe = Executor(places[0]) exe.run(startup_program) # train stage:use data_loader as reader for _ in range(args.epoch): for data in train_loader(): results = exe.run(train_program, feed=data, fetch_list=ret_dict.fetch_list) print("train process loss:{}".format(results[0])) # save the model for inferenceing with fluid.program_guard(train_program, startup_program): fluid.io.save_inference_model(dirname="./model", feeded_var_names=['feat', 'lod'], \ target_vars=[ret_dict.last_predict], executor=exe, export_for_deployment=True) # val stage: use data_loader as reader val_loader = fluid.io.DataLoader.from_generator( feed_list=ret_dict.feed_list, capacity=3, iterable=True) val_loader.set_batch_generator(val_reader(input, output, data_size, output.shape[0]), places=places) for _ in range(1): for data in train_loader: results = exe.run(train_program, feed=data, fetch_list=ret_dict.fetch_list) print("val process loss:{}".format(results[0]))
def closure(**kwargs): role = kwargs['role'] pfl_mpc.init("privc", role, "localhost", self.server, int(self.port)) executor = Executor(place) executor.run() op.run(scope, place) for name in output_names: out = np.array(scope.find_var(name).get_tensor()) return_results[name].append(out)
def decode_main(use_cuda, is_sparse): if use_cuda and not fluid.core.is_compiled_with_cuda(): return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() context = encoder(is_sparse) translation_ids, translation_scores = decoder_decode(context, is_sparse) exe = Executor(place) exe.run(framework.default_startup_program()) init_ids_data = np.array([1 for _ in range(batch_size)], dtype='int64') init_scores_data = np.array([1. for _ in range(batch_size)], dtype='float32') init_ids_data = init_ids_data.reshape((batch_size, 1)) init_scores_data = init_scores_data.reshape((batch_size, 1)) init_recursive_seq_lens = [1] * batch_size init_recursive_seq_lens = [ init_recursive_seq_lens, init_recursive_seq_lens ] init_ids = fluid.create_lod_tensor(init_ids_data, init_recursive_seq_lens, place) init_scores = fluid.create_lod_tensor(init_scores_data, init_recursive_seq_lens, place) train_data = paddle.batch(paddle.reader.shuffle( paddle.dataset.wmt14.train(dict_size), buf_size=1000), batch_size=batch_size) feed_order = ['src_word_id'] feed_list = [ framework.default_main_program().global_block().var(var_name) for var_name in feed_order ] feeder = fluid.DataFeeder(feed_list, place) for data in train_data(): feed_dict = feeder.feed([[x[0]] for x in data]) feed_dict['init_ids'] = init_ids feed_dict['init_scores'] = init_scores result_ids, result_scores = exe.run( framework.default_main_program(), feed=feed_dict, fetch_list=[translation_ids, translation_scores], return_numpy=False) print(result_ids.recursive_sequence_lengths()) break
def main(self, tensor, mask, expect_true, expect_false, expect_out, level=0): place = self.place() program = Program() with program_guard(program): x = layers.data(name='x', shape=[1]) x.persistable = True y = layers.data(name='y', shape=[1]) y.persistable = True out_true, out_false = layers.split_lod_tensor(input=x, mask=y, level=level) out_true.persistable = True out_false.persistable = True out = layers.merge_lod_tensor(in_true=out_true, in_false=out_false, mask=y, x=x, level=level) out.persistable = True exe = Executor(place) scope = core.Scope() exe.run(program, feed={ 'x': tensor, 'y': mask }, scope=scope, return_numpy=False) var_true = scope.find_var(out_true.name).get_tensor() var_false = scope.find_var(out_false.name).get_tensor() var_out = scope.find_var(out.name).get_tensor() self.check_tensor_same(var_true, expect_true) self.check_tensor_same(var_false, expect_false) self.check_tensor_same(var_out, expect_out)
def test_lod_rank_table(self): x = data(name='x', shape=[100]) cpu = core.CPUPlace() rank_table = lod_rank_table(x=x, level=1) rank_table.persistable = True exe = Executor(cpu) scope = core.Scope() tensor = core.LoDTensor() tensor.set(numpy.random.random(size=(17, 100)), cpu) tensor.set_lod([[0, 1, 3], [0, 5, 6, 7], [0, 3, 4, 9, 10, 13, 16, 17]]) exe.run(scope=scope, feed={'x': tensor}) var = scope.find_var(rank_table.name) table = var.get_lod_rank_table() self.assertEqual([(0, 5), (1, 1), (2, 1)], table.items())
def main(): rnn_out = encoder_decoder() label = layers.data( name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) cost = layers.cross_entropy(input=rnn_out, label=label) avg_cost = fluid.layers.mean(cost) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer.minimize(avg_cost) # fluid.memory_optimize(fluid.default_main_program()) fluid.release_memory(fluid.default_main_program()) # fix the order of training data train_data = paddle.batch( paddle.dataset.wmt14.train(dict_size), batch_size=batch_size) # train_data = paddle.batch( # paddle.reader.shuffle( # paddle.dataset.wmt14.train(dict_size), buf_size=1000), # batch_size=batch_size) place = core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) batch_id = 0 for pass_id in xrange(10): for data in train_data(): word_data = to_lodtensor(map(lambda x: x[0], data), place) trg_word = to_lodtensor(map(lambda x: x[1], data), place) trg_word_next = to_lodtensor(map(lambda x: x[2], data), place) outs = exe.run(fluid.default_main_program(), feed={ 'src_word_id': word_data, 'target_language_word': trg_word, 'target_language_next_word': trg_word_next }, fetch_list=[avg_cost]) avg_cost_val = np.array(outs[0]) print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + " avg_cost=" + str(avg_cost_val)) if batch_id > 2: exit(0) if math.isnan(float(avg_cost_val)): sys.exit("got NaN loss, training failed.") batch_id += 1
def run(self, input, output, attrs): program = Program() block = program.global_block() op_proto = self.op_proto inputs = self.__append_input_output(block, op_proto, input, True) outputs = self.__append_input_output(block, op_proto, output, False) op = block.append_op(type=self.op_name, inputs=inputs, outputs=outputs, attrs=attrs) op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) fetch_list = [] for var_name, var in outputs.iteritems(): if var_name in outputs: if isinstance(var, list): for v in var: fetch_list.append(v) else: fetch_list.append(var) feed_map = self.__feed_var(inputs, input) exe = Executor(self.place) result = exe.run(program, feed=feed_map, fetch_list=fetch_list, return_numpy=False) atcual_dic = {} for i, obj in enumerate(fetch_list): atcual_dic[obj.name] = np.array(result[i]) return atcual_dic
def test_lod_rank_table(self): x = data(name='x', shape=[100]) cpu = core.CPUPlace() rank_table = lod_rank_table(x=x, level=1) rank_table.persistable = True exe = Executor(cpu) scope = core.Scope() tensor = core.LoDTensor() tensor.set(numpy.random.random(size=(17, 100)), cpu) tensor.set_recursive_sequence_lengths([[1, 2], [5, 1, 1], [3, 1, 5, 1, 3, 3, 1]]) exe.run(scope=scope, feed={'x': tensor}) var = scope.find_var(rank_table.name) table = var.get_lod_rank_table() self.assertEqual([(0, 5), (1, 1), (2, 1)], list(table.items()))
def test_backward(self): switch_main_program(Program()) loss = self.build_network(False, print_phase='backward') exe = Executor(self.place) outs = exe.run(feed={'x': self.x_tensor}, fetch_list=[loss], return_numpy=False)
def test_forward_backward_single_tensor_output(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[2], dtype='float32') x.stop_gradient = False # For test gradient mask = layers.data(name='mask', shape=[1], dtype='int32') out = program.current_block().create_var( dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR) select_output(x, out, mask) y = select_input(out, mask) mean = layers.mean(y) append_backward(mean) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = Executor(place) feed_x = np.asarray([1.3, -1.4]).astype(np.float32) feed_mask = np.asarray([0]).astype(np.int32) ret = exe.run(program, feed={ 'x': feed_x, 'mask': feed_mask }, fetch_list=[y.name, x.grad_name]) x_grad = np.asarray([0.5, 0.5]).astype(np.float32) self.assertTrue(np.allclose(np.asarray(ret[0]), feed_x)) self.assertTrue(np.allclose(np.asarray(ret[1]), x_grad))
def _get_gradient(self, input_to_check, place, output_names, no_grad_set, parallel=False): prog = Program() block = prog.global_block() self._append_ops(block) loss = append_loss_ops(block, output_names) param_grad_list = append_backward(loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set) inputs = self._get_inputs(block) feed_dict = self.feed_var(inputs, place) fetch_list = [g for p, g in param_grad_list] if parallel: use_cuda = False if isinstance(place, fluid.CUDAPlace(0)): use_cuda = True executor = fluid.ParallelExecutor(use_cuda=use_cuda, loss_name=loss.name, main_program=prog) else: executor = Executor(place) return list( map(np.array, executor.run(prog, feed_dict, fetch_list, return_numpy=False)))
def test_grad(self): place = core.CPUPlace() program = Program() with program_guard(program): x = layers.data(name='x', shape=[1], dtype='float32', stop_gradient=False) table = layers.lod_rank_table(x, level=0) array = layers.lod_tensor_to_array(x, table) result = layers.array_to_lod_tensor(array, table) mean = layers.mean(result) append_backward(mean) tensor = core.LoDTensor() tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) tensor.set_recursive_sequence_lengths([[3, 6, 1]]) g_vars = program.global_block().var(x.name + "@GRAD") exe = Executor(place) g_out = [ numpy.array(item).sum() for item in exe.run(program, feed={'x': tensor}, fetch_list=[g_vars], return_numpy=False) ] g_out_sum = numpy.array(g_out).sum() self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def test_grad(self): place = core.CPUPlace() program = Program() with program_guard(program): x = layers.data( name='x', shape=[1], dtype='float32', stop_gradient=False) table = layers.lod_rank_table(x, level=0) array = layers.lod_tensor_to_array(x, table) result = layers.array_to_lod_tensor(array, table) mean = layers.mean(result) append_backward(mean) tensor = core.LoDTensor() tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) tensor.set_lod([[0, 3, 9, 10]]) g_vars = program.global_block().var(x.name + "@GRAD") exe = Executor(place) g_out = [ numpy.array(item).sum() for item in exe.run(program, feed={'x': tensor}, fetch_list=[g_vars], return_numpy=False) ] g_out_sum = numpy.array(g_out).sum() self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def test_forward(self): switch_main_program(Program()) printed = self.build_network(True, print_phase='forward') exe = Executor(self.place) outs = exe.run(feed={'x': self.x_tensor}, fetch_list=[printed], return_numpy=False)
def test_mul(self): a = data(name='a', shape=[784], dtype='float32') b = data( name='b', shape=[784, 100], dtype='float32', append_batch_size=False) out = mul(x=a, y=b) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) a_np = numpy.random.random((100, 784)).astype('float32') b_np = numpy.random.random((784, 100)).astype('float32') self.assertEqual(0, core.get_mem_usage(0)) exe = Executor(place) outs = exe.run(feed={'a': a_np, 'b': b_np}, fetch_list=[out]) out = outs[0] #disable this assert since ctest will ignore the os.environ setting #self.assertGreater(core.get_mem_usage(0), 0) raised = False try: core.print_mem_usage() except: raised = True self.assertFalse(raised, 'Exception raised')
def test_simple_net_forward(self): paddle.enable_static() main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): self.simple_net() npu_place = paddle.NPUPlace(0) exe = Executor(npu_place) d = [] for i in range(3): d.append(numpy.random.random(size=[10]).astype('float32')) for _ in range(2): exe.run(main_program, feed={'d0': d[0], 'd1': d[1], 'd2': d[2]})
def _calc_output(self, place): op_proto = OpProtoHolder.instance().get_op_proto(self.op_type) program = Program() block = program.global_block() inputs = append_input_output(block, op_proto, self.inputs, True) outputs = append_input_output(block, op_proto, self.outputs, False) op = block.append_op( type=self.op_type, inputs=inputs, outputs=outputs, attrs=self.attrs if hasattr(self, "attrs") else dict()) # infer variable type and infer shape in compile-time op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) fetch_list = [] for var_name, var in outputs.iteritems(): if var_name in self.outputs: if isinstance(var, list): for v in var: fetch_list.append(v) else: fetch_list.append(var) feed_map = self.feed_var(inputs, place) exe = Executor(place) outs = exe.run(program, feed=feed_map, fetch_list=fetch_list, return_numpy=False) return outs, fetch_list
def test_select(self): with framework.program_guard(framework.Program()): ch1 = fluid.make_channel( dtype=core.VarDesc.VarType.LOD_TENSOR, capacity=1) result1 = self._create_tensor('return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.FP64) input_value = fill_constant( shape=[1], dtype=core.VarDesc.VarType.FP64, value=10) with fluid.Select() as select: with select.case(fluid.channel_send, ch1, input_value): # Execute something. pass with select.default(): pass # This should not block because we are using a buffered channel. result1, status = fluid.channel_recv(ch1, result1) fluid.channel_close(ch1) cpu = core.CPUPlace() exe = Executor(cpu) result = exe.run(fetch_list=[result1]) self.assertEqual(result[0][0], 10)
def test_square_error_cost(self): input_val = np.random.uniform(0.1, 0.5, (2, 3)).astype("float32") label_val = np.random.uniform(0.1, 0.5, (2, 3)).astype("float32") sub = input_val - label_val np_result = sub * sub input_var = layers.create_tensor(dtype="float32", name="input") label_var = layers.create_tensor(dtype="float32", name="label") layers.assign(input=input_val, output=input_var) layers.assign(input=label_val, output=label_var) output = layers.square_error_cost(input=input_var, label=label_var) for use_cuda in ([False, True] if core.is_compiled_with_cuda() else [False]): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = Executor(place) result = exe.run(fluid.default_main_program(), feed={ "input": input_var, "label": label_var }, fetch_list=[output]) self.assertTrue(np.isclose(np_result, result).all())
def test_select(self): with framework.program_guard(framework.Program()): ch1 = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR, capacity=1) result1 = self._create_tensor('return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.FP64) input_value = fill_constant(shape=[1], dtype=core.VarDesc.VarType.FP64, value=10) with fluid.Select() as select: with select.case(fluid.channel_send, ch1, input_value): # Execute something. pass with select.default(): pass # This should not block because we are using a buffered channel. result1, status = fluid.channel_recv(ch1, result1) fluid.channel_close(ch1) cpu = core.CPUPlace() exe = Executor(cpu) result = exe.run(fetch_list=[result1]) self.assertEqual(result[0][0], 10)
def test_simple_net_forward(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): self.simple_net() binary = fluid.compiler.CompiledProgram(main_program) cpu = core.CPUPlace() exe = Executor(cpu) d = [] for i in range(3): d.append(numpy.random.random(size=[10]).astype('float32')) for _ in range(2): exe.run(binary, feed={'d0': d[0], 'd1': d[1], 'd2': d[2]})
def decode_main(use_cuda, is_sparse): if use_cuda and not fluid.core.is_compiled_with_cuda(): return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() context = encoder(is_sparse) translation_ids, translation_scores = decode(context, is_sparse) exe = Executor(place) exe.run(framework.default_startup_program()) init_ids_data = np.array([1 for _ in range(batch_size)], dtype='int64') init_scores_data = np.array( [1. for _ in range(batch_size)], dtype='float32') init_ids_data = init_ids_data.reshape((batch_size, 1)) init_scores_data = init_scores_data.reshape((batch_size, 1)) init_lod = [1] * batch_size init_lod = [init_lod, init_lod] init_ids = fluid.create_lod_tensor(init_ids_data, init_lod, place) init_scores = fluid.create_lod_tensor(init_scores_data, init_lod, place) train_data = paddle.batch( paddle.reader.shuffle( paddle.dataset.wmt14.train(dict_size), buf_size=1000), batch_size=batch_size) feed_order = ['src_word_id'] feed_list = [ framework.default_main_program().global_block().var(var_name) for var_name in feed_order ] feeder = fluid.DataFeeder(feed_list, place) for data in train_data(): feed_dict = feeder.feed(map(lambda x: [x[0]], data)) feed_dict['init_ids'] = init_ids feed_dict['init_scores'] = init_scores result_ids, result_scores = exe.run( framework.default_main_program(), feed=feed_dict, fetch_list=[translation_ids, translation_scores], return_numpy=False) print result_ids.lod() break
def test_array_length(self): tmp = layers.zeros(shape=[10], dtype='int32') i = layers.fill_constant(shape=[1], dtype='int64', value=10) arr = layers.array_write(tmp, i=i) arr_len = layers.array_length(arr) cpu = core.CPUPlace() exe = Executor(cpu) result = exe.run(fetch_list=[arr_len])[0] self.assertEqual(11, result[0])
def train(use_cuda, save_dirname=None): [avg_cost, prediction] = seq_to_seq_net() optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer.minimize(avg_cost) train_data = paddle.batch( paddle.reader.shuffle( paddle.dataset.wmt14.train(dict_size), buf_size=1000), batch_size=batch_size) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) feed_order = ['source_sequence', 'target_sequence', 'label_sequence'] feed_list = [ framework.default_main_program().global_block().var(var_name) for var_name in feed_order ] feeder = fluid.DataFeeder(feed_list, place) batch_id = 0 for pass_id in xrange(2): for data in train_data(): outs = exe.run(framework.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost]) avg_cost_val = np.array(outs[0]) print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + " avg_cost=" + str(avg_cost_val)) if math.isnan(float(avg_cost_val[0])): sys.exit("got NaN loss, training failed.") if batch_id > 3: if save_dirname is not None: fluid.io.save_inference_model( save_dirname, ['source_sequence', 'target_sequence'], [prediction], exe) return batch_id += 1
def forward(self): self.feed_map = { x: create_tensor(getattr(self.py_rnn, x), self.place) for x in self.data_field } exe = Executor(self.place) out = exe.run(self.main_program, feed=self.feed_map, fetch_list=[self.output]) return out[0]
def test_simple_forward(self): d0 = layers.data( "d0", shape=[10], append_batch_size=False, dtype='float32') d1 = layers.data( "d1", shape=[10], append_batch_size=False, dtype='float32') d2 = layers.data( "d2", shape=[10], append_batch_size=False, dtype='float32') i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') mem_array = layers.array_write(x=init, i=i) data_array = layers.array_write(x=d0, i=i) i = layers.increment(i) layers.array_write(d1, i, array=data_array) i = layers.increment(i) layers.array_write(d2, i, array=data_array) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True array_len = layers.fill_constant(shape=[1], dtype='int64', value=3) array_len.stop_gradient = True cond = layers.less_than(x=i, y=array_len) while_op = layers.While(cond=cond) with while_op.block(): d = layers.array_read(array=data_array, i=i) prev = layers.array_read(array=mem_array, i=i) result = layers.sums(input=[d, prev]) i = layers.increment(x=i, in_place=True) layers.array_write(result, i=i, array=mem_array) layers.less_than(x=i, y=array_len, cond=cond) sum_result = layers.array_read(array=mem_array, i=i) loss = layers.mean(sum_result) append_backward(loss) cpu = core.CPUPlace() exe = Executor(cpu) d = [] for i in xrange(3): d.append(numpy.random.random(size=[10]).astype('float32')) outs = exe.run(feed={'d0': d[0], 'd1': d[1], 'd2': d[2]}, fetch_list=[sum_result]) self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)
def test_simple_routine(self): ch = fluid.make_channel( dtype=core.VarDesc.VarType.BOOL, name="CreateChannel") with fluid.Go(): fluid.channel_send(ch, True) result = fluid.channel_recv(ch) fluid.channel_close(ch) cpu = core.CPUPlace() exe = Executor(cpu) outs = exe.run(fetch_list=[result]) self.assertEqual(outs[0], True)
def backward(self): self.feed_map = { x: create_tensor(getattr(self.py_rnn, x), self.place) for x in self.data_field } fetch_list = [ self.main_program.global_block().var(grad_var_name(x)) for x in self.data_field ] exe = Executor(self.place) return exe.run(self.main_program, feed=self.feed_map, fetch_list=fetch_list, return_numpy=False)
def test_mul(self): a = data(name='a', shape=[784], dtype='float32') b = data( name='b', shape=[784, 100], dtype='float32', append_batch_size=False) out = mul(x=a, y=b) place = core.CPUPlace() a_np = numpy.random.random((100, 784)).astype('float32') b_np = numpy.random.random((784, 100)).astype('float32') exe = Executor(place) outs = exe.run(feed={'a': a_np, 'b': b_np}, fetch_list=[out]) out = outs[0] self.assertEqual((100, 100), out.shape) self.assertTrue(numpy.allclose(out, numpy.dot(a_np, b_np)))
def test_param(self): shape = [784, 100] val = 1.0625 b = main_program.global_block() param = b.create_parameter( name='fc.w', shape=shape, dtype='float32', initializer=ConstantInitializer(val)) self.assertIsNotNone(param) self.assertEqual('fc.w', param.name) self.assertEqual((784, 100), param.shape) self.assertEqual(core.VarDesc.VarType.FP32, param.dtype) self.assertEqual(0, param.block.idx) exe = Executor(core.CPUPlace()) p = exe.run(main_program, fetch_list=[param])[0] self.assertTrue(np.allclose(p, np.ones(shape) * val)) p = io.get_parameter_value_by_name('fc.w', exe, main_program) self.assertTrue(np.allclose(np.array(p), np.ones(shape) * val))
def test_ping_pong(self): """ Mimics Ping Pong example: https://gobyexample.com/channel-directions """ with framework.program_guard(framework.Program()): result = self._create_tensor('return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.FP64) ping_result = self._create_tensor('ping_return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.FP64) def ping(ch, message): fluid.channel_send(ch, message, is_copy=True) def pong(ch1, ch2): fluid.channel_recv(ch1, ping_result) fluid.channel_send(ch2, ping_result, is_copy=True) pings = fluid.make_channel( dtype=core.VarDesc.VarType.LOD_TENSOR, capacity=1) pongs = fluid.make_channel( dtype=core.VarDesc.VarType.LOD_TENSOR, capacity=1) msg = fill_constant( shape=[1], dtype=core.VarDesc.VarType.FP64, value=9) ping(pings, msg) pong(pings, pongs) fluid.channel_recv(pongs, result) fluid.channel_close(pings) fluid.channel_close(pongs) cpu = core.CPUPlace() exe = Executor(cpu) exe_result = exe.run(fetch_list=[result]) self.assertEqual(exe_result[0][0], 9)
def test_no_lod(self): cpu = core.CPUPlace() x_tensor = core.LoDTensor() tensor_np = np.random.random(size=(3, 100)).astype('float32') x_tensor.set(tensor_np, cpu) rank_table_tensor = core.LoDTensor() rank_table_tensor.set_lod([[0, 1, 3, 6]]) rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'), cpu) exe = Executor(cpu) outs = exe.run( feed={'x': x_tensor, 'rank_table_tensor': rank_table_tensor}, fetch_list=[self.mem1, self.mem2, self.mem3, self.x_grad], return_numpy=False) self.assertTrue(np.allclose(tensor_np[0:3], outs[0])) self.assertTrue(np.allclose(tensor_np[0:2], outs[1])) self.assertTrue(np.allclose(tensor_np[0:1], outs[2])) self.assertAlmostEqual(1.0, self.sum_lodtensor(outs[3]), delta=0.01)
def gaussian_random_test(self, place): program = fluid.Program() block = program.global_block() vout = block.create_var(name="Out") op = block.append_op( type=self.op_type, outputs={"Out": vout}, attrs=self.attrs) op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) fetch_list = [] for var_name in self.outputs: fetch_list.append(block.var(var_name)) exe = Executor(place) outs = exe.run(program, fetch_list=fetch_list) tensor = outs[0] self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1) self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1)
def test_daisy_chain(self): ''' Mimics classic Daisy-chain test: https://talks.golang.org/2012/concurrency.slide#39 ''' n = 100 leftmost = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) left = leftmost # TODO(thuan): Use fluid.While() after scope capture is implemented. # https://github.com/PaddlePaddle/Paddle/issues/8502 for i in range(n): right = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) with fluid.Go(): one_tensor = self._create_one_dim_tensor(1) result = self._create_tensor('return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.INT64) result, status = fluid.channel_recv(right, result) one_added = fluid.layers.elementwise_add(x=one_tensor, y=result) fluid.channel_send(left, one_added) left = right # Trigger the channel propagation by sending a "1" to rightmost channel with fluid.Go(): one_tensor = self._create_one_dim_tensor(1) fluid.channel_send(right, one_tensor) leftmost_result = self._create_tensor('return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.INT64) leftmost_result, status = fluid.channel_recv(leftmost, leftmost_result) cpu = core.CPUPlace() exe = Executor(cpu) leftmost_data = exe.run(fetch_list=[leftmost_result]) # The leftmost_data should be equal to the number of channels + 1 self.assertEqual(leftmost_data[0][0], n + 1)
def test_simple_routine(self): ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) # Create LOD_TENSOR<INT64> and put it into the scope. This placeholder # variable will be filled in and returned by fluid.channel_recv result = self._create_tensor('return_value', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.INT64) with fluid.Go(): input_value = fill_constant( shape=[1], dtype=core.VarDesc.VarType.FP64, value=1234) fluid.channel_send(ch, input_value) result, status = fluid.channel_recv(ch, result) fluid.channel_close(ch) cpu = core.CPUPlace() exe = Executor(cpu) outs = exe.run(fetch_list=[result]) self.assertEqual(outs[0], 1234)
def train(use_cuda, save_dirname, is_local=True): scale_infer, avg_cost = model() # test program test_program = fluid.default_main_program().clone(for_test=True) sgd_optimizer = SGDOptimizer(learning_rate=0.2) sgd_optimizer.minimize(avg_cost) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = Executor(place) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.movielens.train(), buf_size=8192), batch_size=BATCH_SIZE) test_reader = paddle.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) feed_order = [ 'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id', 'movie_title', 'score' ] def train_loop(main_program): exe.run(framework.default_startup_program()) feed_list = [ main_program.global_block().var(var_name) for var_name in feed_order ] feeder = fluid.DataFeeder(feed_list, place) PASS_NUM = 100 for pass_id in range(PASS_NUM): for batch_id, data in enumerate(train_reader()): # train a mini-batch outs = exe.run(program=main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) out = np.array(outs[0]) if (batch_id + 1) % 10 == 0: avg_cost_set = [] for test_data in test_reader(): avg_cost_np = exe.run(program=test_program, feed=feeder.feed(test_data), fetch_list=[avg_cost]) avg_cost_set.append(avg_cost_np[0]) break # test only 1 segment for speeding up CI # get test avg_cost test_avg_cost = np.array(avg_cost_set).mean() if test_avg_cost < 6.0: # if avg_cost less than 6.0, we think our code is good. if save_dirname is not None: fluid.io.save_inference_model(save_dirname, [ "user_id", "gender_id", "age_id", "job_id", "movie_id", "category_id", "movie_title" ], [scale_infer], exe) return if math.isnan(float(out[0])): sys.exit("got NaN loss, training failed.") if is_local: train_loop(fluid.default_main_program()) else: port = os.getenv("PADDLE_INIT_PORT", "6174") pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... trainers = int(os.getenv("TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) training_role = os.getenv("TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) exe.run(pserver_prog) elif training_role == "TRAINER": train_loop(t.get_trainer_program())
def test_read_write(self): x = [ layers.data( name='x0', shape=[100]), layers.data( name='x1', shape=[100]), layers.data( name='x2', shape=[100]) ] for each_x in x: each_x.stop_gradient = False i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = False arr = layers.array_write(x=x[0], i=i) i = layers.increment(x=i) arr = layers.array_write(x=x[1], i=i, array=arr) i = layers.increment(x=i) arr = layers.array_write(x=x[2], i=i, array=arr) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = False a0 = layers.array_read(array=arr, i=i) i = layers.increment(x=i) a1 = layers.array_read(array=arr, i=i) i = layers.increment(x=i) a2 = layers.array_read(array=arr, i=i) mean_a0 = layers.mean(a0) mean_a1 = layers.mean(a1) mean_a2 = layers.mean(a2) a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2]) mean_x0 = layers.mean(x[0]) mean_x1 = layers.mean(x[1]) mean_x2 = layers.mean(x[2]) x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2]) scope = core.Scope() cpu = core.CPUPlace() exe = Executor(cpu) tensor = numpy.random.random(size=(100, 100)).astype('float32') outs = exe.run(feed={'x0': tensor, 'x1': tensor, 'x2': tensor}, fetch_list=[a_sum, x_sum], scope=scope) self.assertEqual(outs[0], outs[1]) total_sum = layers.sums(input=[a_sum, x_sum]) total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0) append_backward(total_sum_scaled) g_vars = map(default_main_program().global_block().var, [each_x.name + "@GRAD" for each_x in x]) g_out = [ item.sum() for item in exe.run( feed={'x0': tensor, 'x1': tensor, 'x2': tensor}, fetch_list=g_vars) ] g_out_sum = numpy.array(g_out).sum() # since our final gradient is 1 and the neural network are all linear # with mean_op. # the input gradient should also be 1 self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def _get_gradient(self, input_to_check, place, output_names, no_grad_set): prog = Program() block = prog.global_block() inputs_with_np = { key: value for (key, value) in OpTest._create_var_descs_( block, getattr(self, 'inputs', {})) } outputs_with_np = { key: val for (key, val) in OpTest._create_var_descs_( block, getattr(self, 'outputs', {})) } inputs = { k: [item[0] for item in inputs_with_np[k]] for k in inputs_with_np } outputs = { k: [item[0] for item in outputs_with_np[k]] for k in outputs_with_np } op = block.append_op( type=self.op_type, inputs=inputs, outputs=outputs, attrs=getattr(self, 'attrs', {})) # infer variable type and infer shape in compile-time op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) mean_inputs = map(block.var, output_names) if len(mean_inputs) == 1: loss = block.create_var(dtype=mean_inputs[0].dtype, shape=[1]) op = block.append_op( inputs={"X": mean_inputs}, outputs={"Out": loss}, type='mean') op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) else: avg_sum = [] for cur_loss in mean_inputs: cur_avg_loss = block.create_var(dtype=cur_loss.dtype, shape=[1]) op = block.append_op( inputs={"X": [cur_loss]}, outputs={"Out": [cur_avg_loss]}, type="mean") op.desc.infer_var_type(block.desc) op.desc.infer_shape(block.desc) avg_sum.append(cur_avg_loss) loss_sum = block.create_var(dtype=avg_sum[0].dtype, shape=[1]) op_sum = block.append_op( inputs={"X": avg_sum}, outputs={"Out": loss_sum}, type='sum') op_sum.desc.infer_var_type(block.desc) op_sum.desc.infer_shape(block.desc) loss = block.create_var(dtype=loss_sum.dtype, shape=[1]) op_loss = block.append_op( inputs={"X": loss_sum}, outputs={"Out": loss}, type='scale', attrs={'scale': 1.0 / float(len(avg_sum))}) op_loss.desc.infer_var_type(block.desc) op_loss.desc.infer_shape(block.desc) param_grad_list = append_backward( loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set) feed_dict = { item[0].name: OpTest._numpy_to_lod_tensor(item[1], item[2], place) for p_name in inputs_with_np for item in inputs_with_np[p_name] } fetch_list = [g for p, g in param_grad_list] executor = Executor(place) return map(np.array, executor.run(prog, feed_dict, fetch_list, return_numpy=False))
def test_fibonacci(self): """ Mimics Fibonacci Go example: https://tour.golang.org/concurrency/5 """ with framework.program_guard(framework.Program()): quit_ch_input_var = self._create_persistable_tensor( 'quit_ch_input', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.INT32) quit_ch_input = fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=0, out=quit_ch_input_var) result = self._create_persistable_tensor( 'result', core.VarDesc.VarType.LOD_TENSOR, core.VarDesc.VarType.INT32) fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=0, out=result) x = fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=0) y = fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=1) while_cond = fill_constant( shape=[1], dtype=core.VarDesc.VarType.BOOL, value=True) while_false = fill_constant( shape=[1], dtype=core.VarDesc.VarType.BOOL, value=False) x_tmp = fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=0) def fibonacci(channel, quit_channel): while_op = While(cond=while_cond) with while_op.block(): result2 = fill_constant( shape=[1], dtype=core.VarDesc.VarType.INT32, value=0) with fluid.Select() as select: with select.case( fluid.channel_send, channel, x, is_copy=True): assign(input=x, output=x_tmp) assign(input=y, output=x) assign(elementwise_add(x=x_tmp, y=y), output=y) with select.case(fluid.channel_recv, quit_channel, result2): # Quit helper = layer_helper.LayerHelper('assign') helper.append_op( type='assign', inputs={'X': [while_false]}, outputs={'Out': [while_cond]}) ch1 = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) quit_ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) with fluid.Go(): for i in xrange(10): fluid.channel_recv(ch1, result) Print(result) fluid.channel_send(quit_ch, quit_ch_input) fibonacci(ch1, quit_ch) fluid.channel_close(ch1) fluid.channel_close(quit_ch) cpu = core.CPUPlace() exe = Executor(cpu) exe_result = exe.run(fetch_list=[result]) self.assertEqual(exe_result[0][0], 34)
def test_raw_api(self): prog = Program() startup_prog = Program() with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant_batch_size_like( input=label, dtype='int64', shape=[1], value=5.0) cond = layers.less_than(x=label, y=limit) true_image, false_image = layers.split_lod_tensor( input=image, mask=cond) true_out = layers.create_tensor(dtype='float32') true_cond = layers.ConditionalBlock([true_image]) with true_cond.block(): hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=true_out) false_out = layers.create_tensor(dtype='float32') false_cond = layers.ConditionalBlock([false_image]) with false_cond.block(): hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=false_out) prob = layers.merge_lod_tensor( in_true=true_out, in_false=false_out, mask=cond, x=image) loss = layers.cross_entropy(input=prob, label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=200) place = core.CPUPlace() exe = Executor(place) exe.run(startup_prog) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.expand_dims(y_data, axis=1) outs = exe.run(prog, feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) print outs[0] if outs[0] < 1.0: return self.assertFalse(True)