def do_dataset_training(self, fleet): train_file_list = ctr_dataset_reader.prepare_fake_data() exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) fleet.init_worker() thread_num = int(os.getenv("CPU_NUM", 2)) batch_size = 128 filelist = fleet.util.get_file_shard(train_file_list) print("filelist: {}".format(filelist)) # config dataset dataset = paddle.distributed.QueueDataset() dataset._set_batch_size(batch_size) dataset._set_use_var(self.feeds) pipe_command = 'python ctr_dataset_reader.py' dataset._set_pipe_command(pipe_command) dataset.set_filelist(filelist) dataset._set_thread(thread_num) for epoch_id in range(1): pass_start = time.time() dataset.set_filelist(filelist) exe.train_from_dataset(program=fluid.default_main_program(), dataset=dataset, fetch_list=[self.avg_cost], fetch_info=["cost"], print_period=2, debug=int(os.getenv("Debug", "0"))) pass_time = time.time() - pass_start print("do_dataset_training done. using time {}".format(pass_time))
def do_dataset_training(self, fleet): train_file_list = ctr_dataset_reader.prepare_fake_data() exe = self.get_executor() exe.run(fluid.default_startup_program()) fleet.init_worker() thread_num = 2 batch_size = 128 filelist = train_file_list # config dataset dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") dataset.set_use_var(self.feeds) dataset.set_batch_size(128) dataset.set_thread(2) dataset.set_filelist(filelist) dataset.set_pipe_command('python ctr_dataset_reader.py') dataset.load_into_memory() dataset.global_shuffle(fleet, 12) ##TODO: thread configure shuffle_data_size = dataset.get_shuffle_data_size(fleet) local_data_size = dataset.get_shuffle_data_size() data_size_list = fleet.util.all_gather(local_data_size) print('after global_shuffle data_size_list: ', data_size_list) print('after global_shuffle data_size: ', shuffle_data_size) for epoch_id in range(1): pass_start = time.time() exe.train_from_dataset(program=fluid.default_main_program(), dataset=dataset, fetch_list=[self.avg_cost], fetch_info=["cost"], print_period=2, debug=int(os.getenv("Debug", "0"))) pass_time = time.time() - pass_start dataset.release_memory() if os.getenv("SAVE_MODEL") == "1": model_dir = tempfile.mkdtemp() fleet.save_inference_model(exe, model_dir, [feed.name for feed in self.feeds], self.avg_cost) self.check_model_right(model_dir) shutil.rmtree(model_dir) dirname = os.getenv("SAVE_DIRNAME", None) if dirname: fleet.save_persistables(exe, dirname=dirname) cache_dirname = os.getenv("SAVE_CACHE_DIRNAME", None) if cache_dirname: fleet.save_cache_model(cache_dirname)
def do_dataset_training_queuedataset(self, fleet): train_file_list = ctr_dataset_reader.prepare_fake_data() exe = self.get_executor() exe.run(fluid.default_startup_program()) fleet.init_worker() thread_num = 2 batch_size = 128 filelist = train_file_list # config dataset dataset = paddle.distributed.QueueDataset() pipe_command = 'python ctr_dataset_reader.py' dataset.init(batch_size=batch_size, use_var=self.feeds, pipe_command=pipe_command, thread_num=thread_num) dataset.set_filelist(filelist) for epoch_id in range(1): pass_start = time.time() dataset.set_filelist(filelist) exe.train_from_dataset(program=fluid.default_main_program(), dataset=dataset, fetch_list=[self.avg_cost], fetch_info=["cost"], print_period=2, debug=int(os.getenv("Debug", "0"))) pass_time = time.time() - pass_start if os.getenv("SAVE_MODEL") == "1": model_dir = tempfile.mkdtemp() fleet.save_inference_model(exe, model_dir, [feed.name for feed in self.feeds], self.avg_cost) self.check_model_right(model_dir) shutil.rmtree(model_dir) dirname = os.getenv("SAVE_DIRNAME", None) if dirname: fleet.save_persistables(exe, dirname=dirname)
def do_dataset_training(self, fleet): train_file_list = ctr_dataset_reader.prepare_fake_data() exe = fluid.Executor(fluid.CPUPlace()) fleet.init_worker() exe.run(fluid.default_startup_program()) thread_num = 2 batch_size = 128 filelist = train_file_list # config dataset dataset = paddle.distributed.fleet.DatasetFactory().create_dataset() dataset.set_batch_size(batch_size) dataset.set_use_var(self.feeds) pipe_command = 'python ctr_dataset_reader.py' dataset.set_pipe_command(pipe_command) dataset.set_filelist(filelist) dataset.set_thread(thread_num) for epoch_id in range(1): pass_start = time.time() dataset.set_filelist(filelist) exe.train_from_dataset( program=fluid.default_main_program(), dataset=dataset, fetch_list=[self.avg_cost], fetch_info=["cost"], print_period=2, debug=int(os.getenv("Debug", "0"))) pass_time = time.time() - pass_start if os.getenv("SAVE_MODEL") == "1": model_dir = tempfile.mkdtemp() fleet.save_inference_model(exe, model_dir, [feed.name for feed in self.feeds], self.avg_cost) self.check_model_right(model_dir) shutil.rmtree(model_dir) fleet.stop_worker()