def test_pserver(self): role = role_maker.UserDefinedRoleMaker( current_id=0, role=role_maker.Role.SERVER, worker_num=2, server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"]) fleet.init(role) batch_size = 128 is_sparse = True is_distribute = False strategy = DistributeTranspilerConfig() strategy.sync_mode = False strategy.geo_sgd_mode = True strategy.geo_sgd_need_push_nums = 5 avg_cost, _, _ = train_network(batch_size, is_distribute, is_sparse) optimizer = fluid.optimizer.SGD(0.1) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) pserver_startup_program = fleet.startup_program pserver_mian_program = fleet.main_program
def run_trainer(self, args): """ run trainer process, you don't need to implement it. Args: args (ArgumentParser): run args to config dist fleet. """ from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet if args.role.upper() != "TRAINER": raise ValueError("args role must be TRAINER") role = role_maker.UserDefinedRoleMaker( current_id=args.current_id, role=role_maker.Role.WORKER, worker_num=args.trainers, server_endpoints=args.endpoints.split(",")) fleet.init(role) self._set_strategy(args) avg_cost = self.net(args) optimizer = fluid.optimizer.SGD(LEARNING_RATE) optimizer = fleet.distributed_optimizer(optimizer, self.strategy) optimizer.minimize(avg_cost) if args.run_params.get("run_from_dataset", False): losses = self.do_training_from_dataset(fleet, args) else: losses = self.do_training(fleet, args) losses = "" if not losses else losses print(losses)
def run_pserver(self, args): if args.role.upper() != "PSERVER": raise ValueError("args role must be PSERVER") role = role_maker.UserDefinedRoleMaker( current_id=args.current_id, role=role_maker.Role.SERVER, worker_num=args.trainers, server_endpoints=args.endpoints.split(",")) fleet.init(role) strategy = DistributeTranspilerConfig() strategy.sync_mode = args.sync_mode strategy.geo_sgd_mode = args.geo_sgd_mode strategy.geo_sgd_need_push_nums = args.geo_sgd_need_push_nums avg_cost = self.net() optimizer = fluid.optimizer.SGD(LEARNING_RATE) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) fleet.init_server() fleet.run_server()
def distribute_train(args): # 根据环境变量确定当前机器/进程在分布式训练中扮演的角色 # 然后使用 fleet api的 init()方法初始化这个节点 role = role_maker.PaddleCloudRoleMaker() fleet.init(role) # 我们还可以进一步指定分布式的运行模式,通过 DistributeTranspilerConfig进行配置 # 如下,我们设置分布式运行模式为异步(async),同时将参数进行切分,以分配到不同的节点 strategy = DistributeTranspilerConfig() strategy.sync_mode = False strategy.runtime_split_send_recv = True ctr_model = CTR() inputs = ctr_model.input_data(args) avg_cost, auc_var = ctr_model.net(inputs, args) # 配置分布式的optimizer,传入我们指定的strategy,构建program optimizer = fluid.optimizer.Adam(args.learning_rate) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) # 根据节点角色,分别运行不同的逻辑 if fleet.is_server(): # 初始化及运行参数服务器节点 fleet.init_server() fleet.run_server() elif fleet.is_worker(): # 初始化工作节点 fleet.init_worker() exe = fluid.Executor(fluid.CPUPlace()) # 初始化含有分布式流程的fleet.startup_program exe.run(fleet.startup_program) dataset, file_list = get_dataset(inputs, args) for epoch in range(args.epochs): # 以文件为粒度进行shuffle random.shuffle(file_list) dataset.set_filelist(file_list) # 训练节点运行的是经过分布式裁剪的fleet.mian_program start_time = time.time() exe.train_from_dataset(program=fleet.main_program, dataset=dataset, fetch_list=[auc_var], fetch_info=["Epoch {} auc ".format(epoch)], print_period=100, debug=False) end_time = time.time() logger.info("epoch %d finished, use time=%d\n" % ((epoch), end_time - start_time)) # 默认使用0号节点保存模型 if args.save_model and fleet.is_first_worker(): model_path = os.path.join(str(args.model_path), "epoch_" + str(epoch)) fleet.save_persistables(executor=exe, dirname=model_path) fleet.stop_worker() logger.info("Distribute Train Success!")
def test(self): endpoints = [ "127.0.0.1:36004", "127.0.0.1:36005", "127.0.0.1:36006", "127.0.0.1:36007" ] role = role_maker.UserDefinedRoleMaker( current_id=0, role=role_maker.Role.SERVER, worker_num=2, server_endpoints=endpoints) fleet.init(role) loss, acc, _ = self.net() optimizer = fluid.optimizer.Adagrad( learning_rate=fluid.layers.exponential_decay( learning_rate=base_lr, decay_steps=500, decay_rate=0.969, staircase=True)) strategy = StrategyFactory.create_async_strategy() optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(loss)
def run_pserver(self, role, strategy): fleet.init(role) avg_cost, x, y = self.net() optimizer = fluid.optimizer.SGD(0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) fleet.init_server() fleet.run_server()
def test_default_strategy(self): role = role_maker.UserDefinedRoleMaker( current_id=0, role=role_maker.Role.WORKER, worker_num=2, server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) fleet.init(role) optimizer = fluid.optimizer.SGD(0.0001) optimizer = fleet.distributed_optimizer(optimizer)
def set_optimizer(self, FLAGS, net_output): """ set optimizer """ optimizer = net_output['optimizer'] strategy = DistributeTranspilerConfig() strategy.sync_mode = (FLAGS.data_reader != "dataset") #pslib, strategy = {"use_cvm": True} optimizer = fleet.distributed_optimizer(optimizer, strategy) return optimizer.minimize(net_output['loss'])
def optimize(self, loss, optimizer_type, lr): log.info('learning rate:%f' % lr) if optimizer_type == "sgd": optimizer = F.optimizer.SGD(learning_rate=lr) elif optimizer_type == "adam": # Don't slice tensor ensure convergence optimizer = F.optimizer.Adam(learning_rate=lr, lazy_mode=True) else: raise ValueError("Unknown Optimizer %s" % optimizer_type) #create the DistributeTranspiler configure self.strategy = StrategyFactory.create_sync_strategy() optimizer = tfleet.distributed_optimizer(optimizer, self.strategy) optimizer.minimize(loss)
def test_dist_geo_server_transpiler(self): num_voc = 128 embed_dim = 64 x_shape, x_lod = [16, 10], [[3, 5, 2, 6]] x = fluid.data(name='x', shape=x_shape, dtype='int32', lod_level=1) hash_embd = fluid.contrib.layers.search_pyramid_hash( input=x, num_emb=embed_dim, space_len=num_voc * embed_dim, pyramid_layer=4, rand_len=16, drop_out_percent=0.5, is_training=True, use_filter=False, white_list_len=6400, black_list_len=2800, seed=3, lr=0.002, param_attr=fluid.ParamAttr( name="PyramidHash_emb_0", learning_rate=0, ), param_attr_wl=fluid.ParamAttr( name="Filter", learning_rate=0, ), param_attr_bl=None, distribute_update_vars=["PyramidHash_emb_0"], name=None) cost = fluid.layers.reduce_sum(hash_embd) role = role_maker.UserDefinedRoleMaker( current_id=0, role=role_maker.Role.SERVER, worker_num=2, server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"]) fleet.init(role) strategy = DistributeTranspilerConfig() strategy.sync_mode = False strategy.geo_sgd_mode = True strategy.geo_sgd_need_push_nums = 5 optimizer = fluid.optimizer.SGD(0.1) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(cost) pserver_startup_program = fleet.startup_program pserver_mian_program = fleet.main_program
def test_half_async_strategy(self): role = role_maker.UserDefinedRoleMaker( current_id=0, role=role_maker.Role.WORKER, worker_num=2, server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) fleet.init(role) half_async_config = DistributeTranspilerConfig() half_async_config.sync_mode = False half_async_config.geo_sgd_mode = False half_async_config.runtime_split_send_recv = False optimizer = fluid.optimizer.SGD(0.0001) optimizer = fleet.distributed_optimizer(optimizer, half_async_config)
def build_optimizer(self, avg_cost, strategy): use_grad_clip = int(os.getenv('GRAD_CLIP', 0)) if use_grad_clip: # 1: clip_by_value; 2: clip_by_norm; 3:clip_by_global_norm if use_grad_clip == 1: fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByValue(2.0)) elif use_grad_clip == 2: fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByNorm(2.0)) elif use_grad_clip == 3: fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByGlobalNorm(2.0)) optimizer = fluid.optimizer.SGD(LEARNING_RATE) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost)
def test(self): endpoints = [ "127.0.0.1:36004", "127.0.0.1:36005", "127.0.0.1:36006", "127.0.0.1:36007" ] role = role_maker.UserDefinedRoleMaker(current_id=0, role=role_maker.Role.SERVER, worker_num=2, server_endpoints=endpoints) fleet.init(role) loss, acc, _ = self.net() optimizer = fluid.optimizer.SGD(base_lr) strategy = StrategyFactory.create_geo_strategy(20) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(loss)
def test_communicator_async(self): role = role_maker.UserDefinedRoleMaker( current_id=0, role=role_maker.Role.WORKER, worker_num=2, server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) fleet.init(role) avg_cost = self.net() optimizer = fluid.optimizer.SGD(0.01) strategy = StrategyFactory.create_async_strategy() optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) fleet.init_worker() time.sleep(10) fleet.stop_worker()
def optimization(base_lr, loss, optimizer='adam'): if optimizer == 'sgd': optimizer = F.optimizer.SGD(base_lr) elif optimizer == 'adam': optimizer = F.optimizer.Adam(base_lr, lazy_mode=True) else: raise ValueError log.info('learning rate:%f' % (base_lr)) #create the DistributeTranspiler configure config = DistributeTranspilerConfig() config.sync_mode = False #config.runtime_split_send_recv = False config.slice_var_up = False #create the distributed optimizer optimizer = fleet.distributed_optimizer(optimizer, config) optimizer.minimize(loss)
def optimization(base_lr, loss, train_steps, optimizer='sgd'): decayed_lr = L.learning_rate_scheduler.polynomial_decay( learning_rate=base_lr, decay_steps=train_steps, end_learning_rate=0.0001 * base_lr, power=1.0, cycle=False) if optimizer == 'sgd': optimizer = F.optimizer.SGD(decayed_lr) elif optimizer == 'adam': optimizer = F.optimizer.Adam(decayed_lr, lazy_mode=True) else: raise ValueError log.info('learning rate:%f' % (base_lr)) #create the DistributeTranspiler configure strategy = StrategyFactory.create_async_strategy() optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(loss)
def init(self, context): self.model.train_net() optimizer = self.model.optimizer() strategy = self.build_strategy() optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(self.model.get_cost_op()) if fleet.is_server(): context['status'] = 'server_pass' else: self.fetch_vars = [] self.fetch_alias = [] self.fetch_period = self.model.get_fetch_period() metrics = self.model.get_metrics() if metrics: self.fetch_vars = metrics.values() self.fetch_alias = metrics.keys() context['status'] = 'train_pass'
def run_trainer(self, role, strategy): place = fluid.core.CPUPlace() exe = fluid.Executor(place) fleet.init(role) avg_cost, x, y = self.net() optimizer = fluid.optimizer.SGD(0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) exe.run(fleet.startup_program) fleet.init_worker() train_reader = paddle.batch(self.fake_reader(), batch_size=24) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) for batch_id, data in enumerate(train_reader()): exe.run(fleet.main_program, feed=feeder.feed(data), fetch_list=[]) fleet.stop_worker()
def test_communicator_init_and_start(self): role = role_maker.UserDefinedRoleMaker( current_id=0, role=role_maker.Role.WORKER, worker_num=2, server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) fleet.init(role) avg_cost = self.net() optimizer = fluid.optimizer.SGD(0.01) strategy = DistributeTranspilerConfig() strategy.sync_mode = True strategy.wait_port = False optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) comm = Communicator(fleet.main_program) comm.start() time.sleep(10) comm.stop()
def run_trainer(self, args): """run trainer""" from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet import paddle.fluid as fluid from paddle.fluid.transpiler.ps_dispatcher import RoundRobin from paddle.fluid.transpiler.ps_dispatcher import HashName fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 if args.role.upper() != "TRAINER": raise ValueError("args role must be TRAINER") role = role_maker.UserDefinedRoleMaker( current_id=args.current_id, role=role_maker.Role.WORKER, worker_num=args.trainers, server_endpoints=args.endpoints.split(",")) fleet.init(role) strategy = DistributeTranspilerConfig() strategy.sync_mode = args.run_params["sync_mode"] strategy.async_mode = args.run_params["async_mode"] strategy.mode = "pserver" strategy.slice_var_up = args.run_params['slice_var_up'] strategy.enable_dc_asgd = args.run_params['enable_dc_asgd'] if args.run_params['split_method']: strategy.split_method = HashName strategy.split_method = RoundRobin strategy.wait_port = args.run_params['wait_port'] strategy.runtime_split_send_recv = args.run_params['runtime_split_send_recv'] strategy.use_hierarchical_allreduce = args.run_params['use_hierarchical_allreduce'] # strategy.hierarchical_allreduce_exter_nranks = args.run_params['hierarchical_allreduce_exter_nranks'] # strategy.hierarchical_allreduce_inter_nranks = args.run_params['hierarchical_allreduce_inter_nranks'] strategy.geo_sgd_mode = args.run_params['geo_sgd'] strategy.geo_sgd_need_push_nums = args.run_params['push_nums'] avg_cost = self.net() optimizer = fluid.optimizer.SGD(LEARNING_RATE) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) losses = self.do_training(fleet, args) losses = "" if not losses else losses print(losses)
def test_debug_info(self): x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) cost = fluid.layers.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) role = role_maker.UserDefinedRoleMaker( current_id=0, role=role_maker.Role.WORKER, worker_num=2, server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) fleet.init(role) optimizer = fluid.optimizer.SGD(0.0001) strategy = StrategyFactory.create_sync_strategy() strategy.set_debug_opt({ "dump_param": ["fc_0.tmp_0"], "dump_fields": ["fc_0.tmp_0", "fc_0.tmp_0@GRAD"], "dump_fields_path": "dump_text/" }) optimizer = fleet.distributed_optimizer(optimizer, strategy)
def test_communicator_async(self): role = role_maker.UserDefinedRoleMaker( current_id=0, role=role_maker.Role.WORKER, worker_num=2, server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) fleet.init(role) avg_cost = self.net() optimizer = fluid.optimizer.SGD(0.01) strategy = DistributeTranspilerConfig() strategy.sync_mode = False strategy.runtime_split_send_recv = True strategy.wait_port = False optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) fleet.init_worker() time.sleep(10) fleet.stop_worker()
def run_pserver(self, args): """ run pserver process, you don't need to implement it. Args: args (ArgumentParser): run args to config dist fleet. """ from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet if args.role.upper() != "PSERVER": raise ValueError("args role must be PSERVER") role = role_maker.UserDefinedRoleMaker( current_id=args.current_id, role=role_maker.Role.SERVER, worker_num=args.trainers, server_endpoints=args.endpoints.split(",")) fleet.init(role) self._set_strategy(args) avg_cost = self.net(args) optimizer = fluid.optimizer.SGD(LEARNING_RATE) optimizer = fleet.distributed_optimizer(optimizer, self.strategy) optimizer.minimize(avg_cost) fleet.init_server(model_dir=args.run_params.get("model_dir", "")) fleet.run_server()
def test_pserver(self): role = role_maker.UserDefinedRoleMaker( current_id=0, role=role_maker.Role.SERVER, worker_num=2, server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"]) fleet.init(role) batch_size = 128 is_sparse = True is_distribute = False strategy = StrategyFactory.create_geo_strategy(5) avg_cost, _, _, _ = train_network(batch_size, is_distribute, is_sparse) optimizer = fluid.optimizer.SGD(0.1) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) pserver_startup_program = fleet.startup_program pserver_mian_program = fleet.main_program
def run_trainer(self, args): if args.role.upper() != "TRAINER": raise ValueError("args role must be TRAINER") role = role_maker.UserDefinedRoleMaker( current_id=args.current_id, role=role_maker.Role.WORKER, worker_num=args.trainers, server_endpoints=args.endpoints.split(",")) fleet.init(role) strategy = DistributeTranspilerConfig() strategy.sync_mode = args.sync_mode avg_cost = self.net() optimizer = fluid.optimizer.SGD(LEARNING_RATE) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) self.do_training(fleet) out = self.do_training(fleet)
def init(self, context): self.model.train_net() optimizer = self.model.optimizer() optimizer_name = envs.get_global_env("hyper_parameters.optimizer", None, "train.model") if optimizer_name not in ["", "sgd", "SGD", "Sgd"]: os.environ["FLAGS_communicator_is_sgd_optimizer"] = '0' strategy = self.build_strategy() optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(self.model.get_avg_cost()) if fleet.is_server(): context['status'] = 'server_pass' else: self.fetch_vars = [] self.fetch_alias = [] self.fetch_period = self.model.get_fetch_period() metrics = self.model.get_metrics() if metrics: self.fetch_vars = metrics.values() self.fetch_alias = metrics.keys() context['status'] = 'startup_pass'
def optimization(base_lr, loss, train_steps, optimizer='sgd'): decayed_lr = L.learning_rate_scheduler.polynomial_decay( learning_rate=base_lr, decay_steps=train_steps, end_learning_rate=0.0001 * base_lr, power=1.0, cycle=False) if optimizer == 'sgd': optimizer = F.optimizer.SGD(decayed_lr) elif optimizer == 'adam': optimizer = F.optimizer.Adam(decayed_lr, lazy_mode=True) else: raise ValueError log.info('learning rate:%f' % (base_lr)) #create the DistributeTranspiler configure config = DistributeTranspilerConfig() config.sync_mode = False #config.runtime_split_send_recv = False config.slice_var_up = False #create the distributed optimizer optimizer = fleet.distributed_optimizer(optimizer, config) optimizer.minimize(loss)
def set_program(self, avg_cost, strategy): with fluid.scope_guard(fluid.Scope()): optimizer = fluid.optimizer.SGD(0.1) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost)
def test_dataset_fleet2(self): """ Testcase for InMemoryDataset from create to run. """ with open("test_in_memory_dataset2_run2_a.txt", "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) with open("test_in_memory_dataset2_run2_b.txt", "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" data += "1 7 2 3 6 4 8 8 8 8 1 7\n" f.write(data) train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet with fluid.program_guard(train_program, startup_program): slots = ["slot1_ff", "slot2_ff", "slot3_ff", "slot4_ff"] slots_vars = [] for slot in slots: var = fluid.layers.data(\ name=slot, shape=[1], dtype="float32", lod_level=1) slots_vars.append(var) fake_cost = \ fluid.layers.elementwise_sub(slots_vars[0], slots_vars[-1]) fake_cost = fluid.layers.mean(fake_cost) with fluid.scope_guard(scope): place = fluid.CPUPlace() exe = fluid.Executor(place) try: fleet.init() except ImportError as e: print("warning: no mpi4py") adam = fluid.optimizer.Adam(learning_rate=0.000005) try: adam = fleet.distributed_optimizer(adam, strategy={ "fs_uri": "fs_uri_xxx", "fs_user": "******", "fs_passwd": "fs_passwd_xxx", "fs_hadoop_bin": "fs_hadoop_bin_xxx" }) adam.minimize([fake_cost], [scope]) except AttributeError as e: print("warning: no mpi") except ImportError as e: print("warning: no mpi4py") exe.run(startup_program) dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( "InMemoryDataset") dataset.set_batch_size(32) dataset.set_thread(3) dataset.set_filelist([ "test_in_memory_dataset2_run2_a.txt", "test_in_memory_dataset2_run2_b.txt" ]) dataset.set_pipe_command("cat") dataset.set_use_var(slots_vars) dataset.load_into_memory() try: dataset.global_shuffle(fleet) except: print("warning: catch expected error") fleet._opt_info = None fleet._fleet_ptr = None dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( "InMemoryDataset") dataset.set_rank_offset("") dataset.set_pv_batch_size(1) dataset.set_hdfs_config("", "") d = paddle.distributed.fleet.DatasetBase() try: dataset.set_feed_type("MultiSlotInMemoryDataFeed") except: print("warning: catch expected error") dataset.thread_num = 0 try: dataset._prepare_to_run() except: print("warning: catch expected error") dataset.set_parse_logkey(True) dataset.set_merge_by_sid(True) dataset.set_enable_pv_merge(True) try: dataset.preprocess_instance() except: print("warning: catch expected error") try: dataset.set_current_phase(1) except: print("warning: catch expected error") try: dataset.postprocess_instance() except: print("warning: catch expected error") dataset.set_fleet_send_batch_size(1024) try: dataset.global_shuffle() except: print("warning: catch expected error") dataset.get_pv_data_size() dataset.get_memory_data_size() dataset.get_shuffle_data_size() dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( "QueueDataset") try: dataset.local_shuffle() except: print("warning: catch expected error") try: dataset.global_shuffle() except: print("warning: catch expected error") dataset = paddle.distributed.fleet.FileInstantDataset() try: dataset.local_shuffle() except: print("warning: catch expected error") try: dataset.global_shuffle() except: print("warning: catch expected error") os.remove("./test_in_memory_dataset2_run2_a.txt") os.remove("./test_in_memory_dataset2_run2_b.txt")
def test_dataset_fleet(self): """ Testcase for InMemoryDataset from create to run. """ self.skipTest("parameter server will add pslib UT later") with open("test_in_memory_dataset2_run_a.txt", "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) with open("test_in_memory_dataset2_run_b.txt", "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" data += "1 7 2 3 6 4 8 8 8 8 1 7\n" f.write(data) train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet with fluid.program_guard(train_program, startup_program): slots = ["slot1_ff", "slot2_ff", "slot3_ff", "slot4_ff"] slots_vars = [] for slot in slots: var = fluid.layers.data(\ name=slot, shape=[1], dtype="float32", lod_level=1) slots_vars.append(var) fake_cost = \ fluid.layers.elementwise_sub(slots_vars[0], slots_vars[-1]) fake_cost = fluid.layers.mean(fake_cost) with fluid.scope_guard(scope): place = fluid.CPUPlace() exe = fluid.Executor(place) try: fleet.init() except ImportError as e: print("warning: no mpi4py") adam = fluid.optimizer.Adam(learning_rate=0.000005) try: adam = fleet.distributed_optimizer(adam) adam.minimize([fake_cost], [scope]) except AttributeError as e: print("warning: no mpi") except ImportError as e: print("warning: no mpi4py") exe.run(startup_program) dataset = paddle.distributed.fleet.DatasetFactory().create_dataset( "InMemoryDataset") dataset.set_batch_size(32) dataset.set_thread(3) dataset.set_filelist([ "test_in_memory_dataset2_run_a.txt", "test_in_memory_dataset2_run_b.txt" ]) dataset.set_pipe_command("cat") dataset.set_use_var(slots_vars) dataset.load_into_memory() fleet._opt_info = None fleet._fleet_ptr = None os.remove("./test_in_memory_dataset2_run_a.txt") os.remove("./test_in_memory_dataset2_run_b.txt")