def test_get_places(self): program = Program() with program_guard(program): x = get_places(device_count=4) self.assertIsNotNone(x) print(str(program))
def test_get_places(self): places = get_places() cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) exe.run(fluid.default_main_program()) self.assertEqual(places.type, fluid.core.VarDesc.VarType.PLACE_LIST)
def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): PASS_NUM = 100 EMBED_SIZE = 32 HIDDEN_SIZE = 256 N = 5 BATCH_SIZE = 32 IS_SPARSE = is_sparse def __network__(words): embed_first = fluid.layers.embedding(input=words[0], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, param_attr='shared_w') embed_second = fluid.layers.embedding(input=words[1], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, param_attr='shared_w') embed_third = fluid.layers.embedding(input=words[2], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, param_attr='shared_w') embed_forth = fluid.layers.embedding(input=words[3], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, param_attr='shared_w') concat_embed = fluid.layers.concat( input=[embed_first, embed_second, embed_third, embed_forth], axis=1) hidden1 = fluid.layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid') predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax') cost = fluid.layers.cross_entropy(input=predict_word, label=words[4]) avg_cost = fluid.layers.mean(cost) return avg_cost, predict_word word_dict = paddle.dataset.imikolov.build_dict() dict_size = len(word_dict) first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64') third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64') forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') if not is_parallel: avg_cost, predict_word = __network__( [first_word, second_word, third_word, forth_word, next_word]) else: places = get_places() pd = ParallelDo(places) with pd.do(): avg_cost, predict_word = __network__( list( map(pd.read_input, [ first_word, second_word, third_word, forth_word, next_word ]))) pd.write_output(avg_cost) avg_cost = fluid.layers.mean(pd()) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) train_reader = paddle.batch(paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder( feed_list=[first_word, second_word, third_word, forth_word, next_word], place=place) def train_loop(main_program): exe.run(fluid.default_startup_program()) for pass_id in range(PASS_NUM): for data in train_reader(): avg_cost_np = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) if avg_cost_np[0] < 5.0: if save_dirname is not None: fluid.io.save_inference_model( save_dirname, ['firstw', 'secondw', 'thirdw', 'forthw'], [predict_word], exe) return if math.isnan(float(avg_cost_np[0])): sys.exit("got NaN loss, training failed.") raise AssertionError("Cost is too large {0:2.2}".format( avg_cost_np[0])) if is_local: train_loop(fluid.default_main_program()) else: port = os.getenv("PADDLE_PSERVER_PORT", "6174") pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) exe.run(pserver_prog) elif training_role == "TRAINER": train_loop(t.get_trainer_program())
# value accurately calculated by the default and the memory optimization # version. fluid.default_startup_program().random_seed = 111 x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') device_type = 'CPU' use_nccl = False place = fluid.CPUPlace() if fluid.core.is_compiled_with_cuda(): device_type = 'CUDA' use_nccl = False place = fluid.CUDAPlace(0) places = get_places(device_count=0, device_type=device_type) pd = ParallelDo(places, use_nccl=use_nccl) with pd.do(): x_ = pd.read_input(x) y_ = pd.read_input(y) y_predict = fluid.layers.fc(input=x_, size=1, act=None) cost = fluid.layers.square_error_cost(input=y_predict, label=y_) avg_cost = fluid.layers.mean(x=cost) pd.write_output(avg_cost) cost = pd() avg_cost = fluid.layers.mean(x=cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01) sgd_optimizer.minimize(avg_cost) fluid.memory_optimize(fluid.default_main_program(), print_log=True)
def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None, is_local=True): BATCH_SIZE = 128 PASS_NUM = 5 dict_dim = len(word_dict) class_dim = 2 data = fluid.layers.data( name="words", shape=[1], dtype="int64", lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") if not parallel: cost, acc_out, prediction = net_method( data, label, input_dim=dict_dim, class_dim=class_dim) else: places = get_places() pd = ParallelDo(places) with pd.do(): cost, acc, _ = net_method( pd.read_input(data), pd.read_input(label), input_dim=dict_dim, class_dim=class_dim) pd.write_output(cost) pd.write_output(acc) cost, acc = pd() cost = fluid.layers.mean(cost) acc_out = fluid.layers.mean(acc) prediction = None assert save_dirname is None adagrad = fluid.optimizer.Adagrad(learning_rate=0.002) adagrad.minimize(cost) train_data = paddle.batch( paddle.reader.shuffle( paddle.dataset.imdb.train(word_dict), buf_size=1000), batch_size=BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place) def train_loop(main_program): exe.run(fluid.default_startup_program()) for pass_id in range(PASS_NUM): for data in train_data(): cost_val, acc_val = exe.run(main_program, feed=feeder.feed(data), fetch_list=[cost, acc_out]) print("cost=" + str(cost_val) + " acc=" + str(acc_val)) if cost_val < 0.4 and acc_val > 0.8: if save_dirname is not None: fluid.io.save_inference_model(save_dirname, ["words"], prediction, exe) return if math.isnan(float(cost_val)): sys.exit("got NaN loss, training failed.") raise AssertionError("Cost is too large for {0}".format( net_method.__name__)) if is_local: train_loop(fluid.default_main_program()) else: port = os.getenv("PADDLE_PSERVER_PORT", "6174") pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) exe.run(pserver_prog) elif training_role == "TRAINER": train_loop(t.get_trainer_program())
def train(nn_type, use_cuda, parallel, save_dirname=None, save_full_dirname=None, model_filename=None, params_filename=None, is_local=True): if use_cuda and not fluid.core.is_compiled_with_cuda(): return img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') if nn_type == 'mlp': net_conf = mlp else: net_conf = conv_net if parallel: places = get_places() pd = ParallelDo(places) with pd.do(): img_ = pd.read_input(img) label_ = pd.read_input(label) prediction, avg_loss, acc = net_conf(img_, label_) for o in [avg_loss, acc]: pd.write_output(o) avg_loss, acc = pd() # get mean loss and acc through every devices. avg_loss = fluid.layers.mean(avg_loss) acc = fluid.layers.mean(acc) else: prediction, avg_loss, acc = net_conf(img, label) test_program = fluid.default_main_program().clone(for_test=True) optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer.minimize(avg_loss) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=500), batch_size=BATCH_SIZE) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) feeder = fluid.DataFeeder(feed_list=[img, label], place=place) def train_loop(main_program): exe.run(fluid.default_startup_program()) PASS_NUM = 100 for pass_id in range(PASS_NUM): for batch_id, data in enumerate(train_reader()): # train a mini-batch, fetch nothing exe.run(main_program, feed=feeder.feed(data)) if (batch_id + 1) % 10 == 0: acc_set = [] avg_loss_set = [] for test_data in test_reader(): acc_np, avg_loss_np = exe.run( program=test_program, feed=feeder.feed(test_data), fetch_list=[acc, avg_loss]) acc_set.append(float(acc_np)) avg_loss_set.append(float(avg_loss_np)) # get test acc and loss acc_val = numpy.array(acc_set).mean() avg_loss_val = numpy.array(avg_loss_set).mean() if float(acc_val ) > 0.2: # Smaller value to increase CI speed if save_dirname is not None: fluid.io.save_inference_model( save_dirname, ["img"], [prediction], exe, model_filename=model_filename, params_filename=params_filename) if save_full_dirname is not None: fluid.io.save_inference_model( save_full_dirname, [], [], exe, model_filename=model_filename, params_filename=params_filename, export_for_deployment=False) return else: print( 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. format(pass_id, batch_id + 1, float(avg_loss_val), float(acc_val))) if math.isnan(float(avg_loss_val)): sys.exit("got NaN loss, training failed.") raise AssertionError("Loss of recognize digits is too large") if is_local: train_loop(fluid.default_main_program()) else: port = os.getenv("PADDLE_PSERVER_PORT", "6174") pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) exe.run(pserver_prog) elif training_role == "TRAINER": train_loop(t.get_trainer_program())
def _run_test_impl_(self, callback, feed, fetch, place, use_parallel=False, use_nccl=False, use_gpu=False): """ Run a single test, returns the fetch values Args: place(Place): the computation place. use_parallel(bool): Whether use parallel.for or not. Returns: Fetched numpy arrays. """ if isinstance(fetch, six.string_types): fetch = [fetch] main = fluid.Program() startup = fluid.Program() # Fix seed main.random_seed = 10 startup.random_seed = 10 with fluid.program_guard(main, startup): generator = callback() # Automatically insert parallel do if use_parallel = True if use_parallel: thread_num = fluid.core.get_cuda_device_count( ) if use_gpu else 8 places = get_places(thread_num) pd = ParallelDo(places, use_nccl=use_nccl) data = next(generator) if isinstance(data, fluid.framework.Variable): data = [data] with pd.do(): ins = list(map(pd.read_input, data)) if len(ins) == 1: ins = ins[0] loss = generator.send(ins) # patch input pd.write_output(loss) loss = pd() else: data = next(generator) loss = generator.send(data) self.assertIsNotNone(loss) avg_loss = fluid.layers.mean(loss) fluid.backward.append_backward(loss=avg_loss) exe = fluid.Executor(place) exe.run(startup) if use_gpu: profile_type = 'GPU' else: profile_type = 'CPU' with profiler.profiler(profile_type, 'total', '/tmp/profiler'): return exe.run(main, feed=feed, fetch_list=fetch)
def check_get_gpu_places(self): places = get_places(device_type='CUDA') gpu = fluid.CUDAPlace(0) exe = fluid.Executor(gpu) exe.run(fluid.default_main_program()) self.assertEqual(places.type, fluid.core.VarDesc.VarType.PLACE_LIST)