def dataloader_by_name(readerclass, dataset_name, yaml_file): reader_class = lazy_instance_by_fliename(readerclass, "TrainReader") name = "dataset." + dataset_name + "." data_path = get_global_env(name + "data_path") if data_path.startswith("paddlerec::"): package_base = get_runtime_environ("PACKAGE_BASE") assert package_base is not None data_path = os.path.join(package_base, data_path.split("::")[1]) files = [str(data_path) + "/%s" % x for x in os.listdir(data_path)] reader = reader_class(yaml_file) reader.init() def gen_reader(): for file in files: with open(file, 'r') as f: for line in f: line = line.rstrip('\n') iter = reader.generate_sample(line) for parsed_line in iter(): if parsed_line is None: continue else: values = [] for pased in parsed_line: values.append(pased[1]) yield values def gen_batch_reader(): return reader.generate_batch_from_trainfiles(files) if hasattr(reader, 'generate_batch_from_trainfiles'): return gen_batch_reader() return gen_reader
def build_network(self, context): context["model"] = {} for model_dict in context["phases"]: context["model"][model_dict["name"]] = {} train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() dataset_name = model_dict["dataset_name"] with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): with fluid.scope_guard(scope): model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename( model_path, "Model")(context["env"]) if context["is_infer"]: model._infer_data_var = model.input_data( is_infer=context["is_infer"], dataset_name=model_dict["dataset_name"]) else: model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader( is_infer=context["is_infer"]) data_loader = DataLoader(context) data_loader.get_dataloader(context, dataset_name, model._data_loader) if context["is_infer"]: model.net(model._infer_data_var, context["is_infer"]) else: model.net(model._data_var, context["is_infer"]) optimizer = model.optimizer() optimizer.minimize(model._cost) context["model"][ model_dict["name"]]["main_program"] = train_program context["model"][ model_dict["name"]]["startup_program"] = startup_program context["model"][model_dict["name"]]["scope"] = scope context["model"][model_dict["name"]]["model"] = model context["model"][model_dict["name"]][ "default_main_program"] = train_program.clone() context["model"][model_dict["name"]]["compiled_program"] = None context["dataset"] = {} for dataset in context["env"]["dataset"]: type = envs.get_global_env("dataset." + dataset["name"] + ".type") if type != "DataLoader": dataset_class = QueueDataset(context) context["dataset"][ dataset["name"]] = dataset_class.create_dataset( dataset["name"], context) context["status"] = "startup_pass"
def get_dataloader(self, context, dataset_name, dataloader): name = "dataset." + dataset_name + "." sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip() dense_slots = envs.get_global_env(name + "dense_slots", "").strip() batch_size = envs.get_global_env(name + "batch_size") reader_class = envs.get_global_env(name + "data_converter") reader_class_name = envs.get_global_env(name + "reader_class_name", "Reader") if sparse_slots == "" and dense_slots == "": reader = dataloader_instance.dataloader_by_name( reader_class, dataset_name, context["config_yaml"], context, reader_class_name=reader_class_name) reader_class = envs.lazy_instance_by_fliename( reader_class, reader_class_name) reader_ins = reader_class(context["config_yaml"]) else: reader = dataloader_instance.slotdataloader_by_name( "", dataset_name, context["config_yaml"], context) reader_ins = SlotReader(context["config_yaml"]) if hasattr(reader_ins, 'generate_batch_from_trainfiles'): dataloader.set_sample_list_generator(reader) elif hasattr(reader_ins, 'batch_tensor_creator'): dataloader.set_batch_generator(reader) else: dataloader.set_sample_generator(reader, batch_size) return dataloader
def build_network(self, context): context["model"] = {} if len(context["env"]["phase"]) > 1: warnings.warn("Cluster Train Only Support One Phase.", category=UserWarning, stacklevel=2) model_dict = context["env"]["phase"][0] context["model"][model_dict["name"]] = {} dataset_name = model_dict["dataset_name"] model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename(model_path, "Model")(context["env"]) model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader(is_infer=False) model.net(model._data_var, False) optimizer = model.optimizer() strategy = self._build_strategy(context) optimizer = context["fleet"].distributed_optimizer(optimizer, strategy) optimizer.minimize(model._cost) context["model"][model_dict["name"]][ "main_program"] = paddle.static.default_main_program() context["model"][model_dict["name"]][ "startup_program"] = paddle.static.default_startup_program() context["model"][ model_dict["name"]]["scope"] = paddle.static.global_scope() context["model"][model_dict["name"]]["model"] = model context["model"][model_dict["name"]][ "default_main_program"] = paddle.static.default_main_program( ).clone() context["model"][model_dict["name"]][ "compiled_program"] = paddle.static.default_main_program() if context["fleet"].is_server(): self._server(context) else: context["dataset"] = {} for phase in context["env"]["phase"]: type = envs.get_global_env("dataset." + phase["dataset_name"] + ".type") if type == "DataLoader": data_loader = DataLoader(context) data_loader.get_dataloader(context, dataset_name, model._data_loader) elif type == "QueueDataset": if context["fleet_mode"] == "COLLECTIVE": raise ValueError( "Collective don't support QueueDataset training, please use DataLoader." ) dataset_class = QueueDataset(context) context["dataset"][ phase["dataset_name"]] = dataset_class.create_dataset( phase["dataset_name"], context) context["status"] = "startup_pass"
def build_network(self, context): context["model"] = {} if len(context["env"]["phase"]) > 1: warnings.warn("Cluster Train Only Support One Phase.", category=UserWarning, stacklevel=2) model_dict = context["env"]["phase"][0] train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() dataset_name = model_dict["dataset_name"] with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): with fluid.scope_guard(scope): context["model"][model_dict["name"]] = {} model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename( model_path, "Model")(context["env"]) model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader(is_infer=False) data_loader = DataLoader(context) data_loader.get_dataloader(context, dataset_name, model._data_loader) model.net(model._data_var, False) optimizer = model.optimizer() optimizer = context["fleet"].distributed_optimizer( optimizer) optimizer.minimize([model._cost], [fluid.global_scope()]) context["model"][ model_dict["name"]]["main_program"] = train_program context["model"][model_dict["name"]][ "startup_program"] = startup_program context["model"][model_dict["name"]]["scope"] = scope context["model"][model_dict["name"]]["model"] = model context["model"][model_dict["name"]][ "default_main_program"] = train_program.clone() context["model"][ model_dict["name"]]["compile_program"] = None if context["fleet"].is_server(): self._server(context) else: context["dataset"] = {} for dataset in context["env"]["dataset"]: type = envs.get_global_env("dataset." + dataset["name"] + ".type") if type != "DataLoader": dataset_class = QueueDataset(context) context["dataset"][ dataset["name"]] = dataset_class.create_dataset( dataset["name"], context) context["status"] = "startup_pass"
def build_network(self, context): context["model"] = {} if len(context["env"]["phase"]) > 1: print("CollectiveNetwork phase:{}".format(context["env"]["phase"])) warnings.warn("Cluster Train Only Support One Phase.", category=UserWarning, stacklevel=2) model_dict = context["env"]["phase"][0] context["model"][model_dict["name"]] = {} dataset_name = model_dict["dataset_name"] train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() with fluid.program_guard(train_program, startup_program): with fluid.scope_guard(scope): model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename(model_path, "Model")(context["env"]) model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader(is_infer=False) data_loader = DataLoader(context) data_loader.get_dataloader(context, dataset_name, model._data_loader) model.net(model._data_var, False) optimizer = model.optimizer() strategy = self._build_strategy(context) optimizer = context["fleet"].distributed_optimizer( optimizer, strategy) optimizer.minimize(model._cost) context["model"][model_dict["name"]]["main_program"] = context[ "fleet"].main_program context["model"][ model_dict["name"]]["startup_program"] = startup_program context["model"][model_dict["name"]]["scope"] = scope context["model"][model_dict["name"]]["model"] = model context["model"][ model_dict["name"]]["default_main_program"] = train_program context["model"][model_dict["name"]]["compiled_program"] = None context["dataset"] = {} for phase in context["env"]["phase"]: type = envs.get_global_env("dataset." + phase["dataset_name"] + ".type") if type == "QueueDataset": raise ValueError( "Collective don't support QueueDataset training, please use DataLoader." ) dataset_class = QueueDataset(context) context["dataset"][ phase["dataset_name"]] = dataset_class.create_dataset( phase["dataset_name"], context) context["status"] = "startup_pass"
def runner(self, context): runner_class_path = envs.get_global_env(self.runner_env_name + ".runner_class_path", default_value=None) if runner_class_path: runner_class = envs.lazy_instance_by_fliename( runner_class_path, "Runner")(context) else: if self.engine == EngineMode.SINGLE and not context["is_infer"]: runner_class_name = "SingleRunner" else: raise ValueError( "FineTuningTrainer can only support SingleTraining.") runner_path = os.path.join(self.abs_dir, "framework", "runner.py") runner_class = envs.lazy_instance_by_fliename( runner_path, runner_class_name)(context) runner_class.run(context)
def user_define_engine(engine_yaml): _config = envs.load_yaml(engine_yaml) envs.set_runtime_environs(_config) train_location = envs.get_global_env("engine.file") train_dirname = os.path.dirname(train_location) base_name = os.path.splitext(os.path.basename(train_location))[0] sys.path.append(train_dirname) trainer_class = envs.lazy_instance_by_fliename(base_name, "UserDefineTraining") return trainer_class
def terminal(self, context): terminal_class_path = envs.get_global_env(self.runner_env_name + ".terminal_class_path", default_value=None) if terminal_class_path: terminal_class = envs.lazy_instance_by_fliename( terminal_class_path, "Terminal")(context) terminal_class.terminal(context) else: terminal_class_name = "TerminalBase" if self.engine != EngineMode.SINGLE and self.fleet_mode != FleetMode.COLLECTIVE: terminal_class_name = "PSTerminal" terminal_path = os.path.join(self.abs_dir, "framework", "terminal.py") terminal_class = envs.lazy_instance_by_fliename( terminal_path, terminal_class_name)(context) terminal_class.terminal(context) context['is_exit'] = True
def startup(self, context): startup_class_path = envs.get_global_env(self.runner_env_name + ".startup_class_path", default_value=None) if startup_class_path: startup_class = envs.lazy_instance_by_fliename( startup_class_path, "Startup")(context) else: if self.engine == EngineMode.SINGLE and not context["is_infer"]: startup_class_name = "FineTuningStartup" else: raise ValueError( "FineTuningTrainer can only support SingleTraining.") startup_path = os.path.join(self.abs_dir, "framework", "startup.py") startup_class = envs.lazy_instance_by_fliename( startup_path, startup_class_name)(context) startup_class.startup(context)
def network(self, context): network_class_path = envs.get_global_env(self.runner_env_name + ".network_class_path", default_value=None) if network_class_path: network_class = envs.lazy_instance_by_fliename( network_class_path, "Network")(context) else: if self.engine == EngineMode.SINGLE: network_class_name = "FineTuningNetwork" else: raise ValueError( "FineTuningTrainer can only support SingleTraining.") network_path = os.path.join(self.abs_dir, "framework", "network.py") network_class = envs.lazy_instance_by_fliename( network_path, network_class_name)(context) network_class.build_network(context)
def startup(self, context): startup_class_path = envs.get_global_env(self.runner_env_name + ".startup_class_path", default_value=None) if startup_class_path: startup_class = envs.lazy_instance_by_fliename( startup_class_path, "Startup")(context) else: if self.engine == EngineMode.SINGLE: startup_class_name = "SingleStartup" elif self.fleet_mode == FleetMode.PS or self.fleet_mode == FleetMode.PSLIB: startup_class_name = "PSStartup" elif self.fleet_mode == FleetMode.COLLECTIVE: startup_class_name = "CollectiveStartup" else: raise ValueError("Startup Init Error") startup_path = os.path.join(self.abs_dir, "framework", "startup.py") startup_class = envs.lazy_instance_by_fliename( startup_path, startup_class_name)(context) startup_class.startup(context)
def instance(self, context): instance_class_path = envs.get_global_env(self.runner_env_name + ".instance_class_path", default_value=None) if instance_class_path: instance_class = envs.lazy_instance_by_fliename( instance_class_path, "Instance")(context) else: if self.engine == EngineMode.SINGLE: instance_class_name = "SingleInstance" else: raise ValueError( "FineTuningTrainer can only support SingleTraining.") instance_path = os.path.join(self.abs_dir, "framework", "instance.py") instance_class = envs.lazy_instance_by_fliename( instance_path, instance_class_name)(context) instance_class.instance(context)
def instance(self, context): instance_class_path = envs.get_global_env(self.runner_env_name + ".instance_class_path", default_value=None) if instance_class_path: instance_class = envs.lazy_instance_by_fliename( instance_class_path, "Instance")(context) else: if self.engine == EngineMode.SINGLE: instance_class_name = "SingleInstance" elif self.fleet_mode in [ FleetMode.PSLIB, FleetMode.PS, FleetMode.COLLECTIVE ]: instance_class_name = "FleetInstance" else: raise ValueError("Instance Init Error") instance_path = os.path.join(self.abs_dir, "framework", "instance.py") instance_class = envs.lazy_instance_by_fliename( instance_path, instance_class_name)(context) instance_class.instance(context)
def runner(self, context): runner_class_path = envs.get_global_env(self.runner_env_name + ".runner_class_paht", default_value=None) if runner_class_path: runner_class = envs.lazy_instance_by_fliename( runner_class_path, "Runner")(context) else: if self.engine == EngineMode.SINGLE: runner_class_name = "SingleRunner" elif self.fleet_mode == FleetMode.PSLIB: runner_class_name = "PslibRunner" elif self.fleet_mode == FleetMode.PS: runner_class_name = "PSRunner" elif self.fleet_mode == FleetMode.COLLECTIVE: runner_class_name = "CollectiveRunner" else: raise ValueError("Runner Init Error") runner_path = os.path.join(self.abs_dir, "framework", "runner.py") runner_class = envs.lazy_instance_by_fliename( runner_path, runner_class_name)(context) runner_class.run(context)
def runner(self, context): runner_class_path = envs.get_global_env(self.runner_env_name + ".runner_class_path", default_value=None) if runner_class_path: runner_class = envs.lazy_instance_by_fliename( runner_class_path, "Runner")(context) else: if self.engine == EngineMode.SINGLE and context["is_infer"]: runner_class_name = "SingleInferRunner" elif self.engine == EngineMode.SINGLE and not context["is_infer"]: runner_class_name = "SingleRunner" elif self.fleet_mode in [ FleetMode.PSLIB, FleetMode.PS, FleetMode.COLLECTIVE ]: runner_class_name = "FleetRunner" else: raise ValueError("Runner Init Error") runner_path = os.path.join(self.abs_dir, "framework", "runner.py") runner_class = envs.lazy_instance_by_fliename( runner_path, runner_class_name)(context) runner_class.run(context)
def network(self, context): network_class_path = envs.get_global_env(self.runner_env_name + ".network_class_path", default_value=None) if network_class_path: network_class = envs.lazy_instance_by_fliename( network_class_path, "Network")(context) else: if self.engine == EngineMode.SINGLE: network_class_name = "SingleNetwork" elif self.fleet_mode in [ FleetMode.PSLIB, FleetMode.PS, FleetMode.COLLECTIVE ]: network_class_name = "FleetNetwork" else: raise ValueError("NetWork Init Error") network_path = os.path.join(self.abs_dir, "framework", "network.py") network_class = envs.lazy_instance_by_fliename( network_path, network_class_name)(context) network_class.build_network(context)
def startup(self, context): startup_class_path = envs.get_global_env(self.runner_env_name + ".startup_class_path", default_value=None) if startup_class_path: startup_class = envs.lazy_instance_by_fliename( startup_class_path, "Startup")(context) else: if self.engine == EngineMode.SINGLE and context["is_infer"]: startup_class_name = "SingleInferStartup" elif self.engine == EngineMode.SINGLE and not context["is_infer"]: startup_class_name = "SingleStartup" elif self.fleet_mode in [ FleetMode.PSLIB, FleetMode.PS, FleetMode.COLLECTIVE ]: startup_class_name = "FleetStartup" else: raise ValueError("Startup Init Error") startup_path = os.path.join(self.abs_dir, "framework", "startup.py") startup_class = envs.lazy_instance_by_fliename( startup_path, startup_class_name)(context) startup_class.startup(context)
def user_define_engine(engine_yaml): with open(engine_yaml, 'r') as rb: _config = yaml.load(rb.read(), Loader=yaml.FullLoader) assert _config is not None envs.set_runtime_environs(_config) train_location = envs.get_global_env("engine.file") train_dirname = os.path.dirname(train_location) base_name = os.path.splitext(os.path.basename(train_location))[0] sys.path.append(train_dirname) trainer_class = envs.lazy_instance_by_fliename(base_name, "UserDefineTraining") return trainer_class
def _build_trainer(yaml_path): print(envs.pretty_print_envs(envs.get_global_envs())) train_mode = envs.get_trainer() trainer_abs = trainers.get(train_mode, None) if trainer_abs is None: if not os.path.isfile(train_mode): raise IOError("trainer {} can not be recognized".format( train_mode)) trainer_abs = train_mode train_mode = "UserDefineTrainer" trainer_class = envs.lazy_instance_by_fliename(trainer_abs, train_mode) trainer = trainer_class(yaml_path) return trainer
def dataloader_by_name(readerclass, dataset_name, yaml_file, context, reader_class_name="Reader"): reader_class = lazy_instance_by_fliename(readerclass, reader_class_name) name = "dataset." + dataset_name + "." data_path = get_global_env(name + "data_path") if data_path.startswith("paddlerec::"): package_base = get_runtime_environ("PACKAGE_BASE") assert package_base is not None data_path = os.path.join(package_base, data_path.split("::")[1]) files = [str(data_path) + "/%s" % x for x in os.listdir(data_path)] if context["engine"] == EngineMode.LOCAL_CLUSTER: files = split_files(files, context["fleet"].worker_index(), context["fleet"].worker_num()) print("file_list : {}".format(files)) reader = reader_class(yaml_file) reader.init() def gen_reader(): for file in files: with open(file, 'r') as f: for line in f: line = line.rstrip('\n') iter = reader.generate_sample(line) for parsed_line in iter(): if parsed_line is None: continue else: values = [] for pased in parsed_line: values.append(pased[1]) yield values def gen_batch_reader(): return reader.generate_batch_from_trainfiles(files) if hasattr(reader, 'generate_batch_from_trainfiles'): return gen_batch_reader() return gen_reader
def _get_dataloader(self, state="TRAIN"): if state == "TRAIN": dataloader = self.model._data_loader namespace = "train.reader" class_name = "TrainReader" else: dataloader = self.model._infer_data_loader namespace = "evaluate.reader" class_name = "EvaluateReader" sparse_slots = envs.get_global_env("sparse_slots", None, namespace) dense_slots = envs.get_global_env("dense_slots", None, namespace) batch_size = envs.get_global_env("batch_size", None, namespace) print("batch_size: {}".format(batch_size)) if sparse_slots is None and dense_slots is None: reader_class = envs.get_global_env("class", None, namespace) reader = dataloader_instance.dataloader(reader_class, state, self._config_yaml) reader_class = envs.lazy_instance_by_fliename( reader_class, class_name) reader_ins = reader_class(self._config_yaml) else: reader = dataloader_instance.slotdataloader( "", state, self._config_yaml) reader_ins = SlotReader(self._config_yaml) if hasattr(reader_ins, 'generate_batch_from_trainfiles'): dataloader.set_sample_list_generator(reader) else: dataloader.set_sample_generator(reader, batch_size) debug_mode = envs.get_global_env("reader_debug_mode", False, namespace) if debug_mode: print("--- DataLoader Debug Mode Begin , show pre 10 data ---") for idx, line in enumerate(reader()): print(line) if idx >= 9: break print("--- DataLoader Debug Mode End , show pre 10 data ---") exit(0) return dataloader
def init(self, context): for model_dict in self._env["phase"]: self._model[model_dict["name"]] = [None] * 5 train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() dataset_name = model_dict["dataset_name"] opt_name = envs.get_global_env("hyper_parameters.optimizer.class") opt_lr = envs.get_global_env( "hyper_parameters.optimizer.learning_rate") opt_strategy = envs.get_global_env( "hyper_parameters.optimizer.strategy") with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): with fluid.scope_guard(scope): model_path = model_dict["model"].replace( "{workspace}", envs.path_adapter(self._env["workspace"])) model = envs.lazy_instance_by_fliename( model_path, "Model")(self._env) model._infer_data_var = model.input_data( is_infer=True, dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader(is_infer=True) self._get_dataloader(dataset_name, model._data_loader) model.net(model._infer_data_var, True) self._model[model_dict["name"]][0] = train_program self._model[model_dict["name"]][1] = startup_program self._model[model_dict["name"]][2] = scope self._model[model_dict["name"]][3] = model self._model[model_dict["name"]][4] = train_program.clone() for dataset in self._env["dataset"]: if dataset["type"] != "DataLoader": self._dataset[dataset["name"]] = self._create_dataset( dataset["name"]) context['status'] = 'startup_pass'
def _get_dataloader(self, dataset_name, dataloader): name = "dataset." + dataset_name + "." thread_num = envs.get_global_env(name + "thread_num") batch_size = envs.get_global_env(name + "batch_size") reader_class = envs.get_global_env(name + "data_converter") abs_dir = os.path.dirname(os.path.abspath(__file__)) sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip() dense_slots = envs.get_global_env(name + "dense_slots", "").strip() if sparse_slots == "" and dense_slots == "": reader = dataloader_instance.dataloader_by_name( reader_class, dataset_name, self._config_yaml) reader_class = envs.lazy_instance_by_fliename( reader_class, "TrainReader") reader_ins = reader_class(self._config_yaml) else: reader = dataloader_instance.slotdataloader_by_name( "", dataset_name, self._config_yaml) reader_ins = SlotReader(self._config_yaml) if hasattr(reader_ins, 'generate_batch_from_trainfiles'): dataloader.set_sample_list_generator(reader) else: dataloader.set_sample_generator(reader, batch_size) return dataloader
from paddlerec.core.reader import SlotReader from paddlerec.core.utils import envs if len(sys.argv) < 4: raise ValueError("reader only accept 3 argument: 1. reader_class 2.train/evaluate/slotreader 3.yaml_abs_path") reader_package = sys.argv[1] if sys.argv[2].upper() == "TRAIN": reader_name = "TrainReader" elif sys.argv[2].upper() == "EVALUATE": reader_name = "EvaluateReader" else: reader_name = "SlotReader" namespace = sys.argv[4] sparse_slots = sys.argv[5].replace("#", " ") dense_slots = sys.argv[6].replace("#", " ") padding = int(sys.argv[7]) yaml_abs_path = sys.argv[3] if reader_name != "SlotReader": reader_class = lazy_instance_by_fliename(reader_package, reader_name) reader = reader_class(yaml_abs_path) reader.init() reader.run_from_stdin() else: reader = SlotReader(yaml_abs_path) reader.init(sparse_slots, dense_slots, padding) reader.run_from_stdin()
def dataloader_by_name(readerclass, dataset_name, yaml_file, context, reader_class_name="Reader"): reader_class = lazy_instance_by_fliename(readerclass, reader_class_name) name = "dataset." + dataset_name + "." data_path = get_global_env(name + "data_path") if data_path.startswith("paddlerec::"): package_base = get_runtime_environ("PACKAGE_BASE") assert package_base is not None data_path = os.path.join(package_base, data_path.split("::")[1]) hidden_file_list, files = check_filelist(hidden_file_list=[], data_file_list=[], train_data_path=data_path) if (hidden_file_list is not None): print( "Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}" .format(hidden_file_list)) files.sort() # for local cluster: discard some files if files cannot be divided equally between GPUs if (context["device"] == "GPU") and "PADDLEREC_GPU_NUMS" in os.environ: selected_gpu_nums = int(os.getenv("PADDLEREC_GPU_NUMS")) discard_file_nums = len(files) % selected_gpu_nums if (discard_file_nums != 0): warnings.warn( "Because files cannot be divided equally between GPUs,discard these files:{}" .format(files[-discard_file_nums:])) files = files[:len(files) - discard_file_nums] need_split_files = False if context["engine"] == EngineMode.LOCAL_CLUSTER: # for local cluster: split files for multi process need_split_files = True elif context["engine"] == EngineMode.CLUSTER and context[ "cluster_type"] == "K8S": # for k8s mount mode, split files for every node need_split_files = True print("need_split_files: {}".format(need_split_files)) if need_split_files: files = split_files(files, context["fleet"].worker_index(), context["fleet"].worker_num()) context["file_list"] = files reader = reader_class(yaml_file) reader.init() def gen_reader(): for file in files: with open(file, 'r') as f: for line in f: line = line.rstrip('\n') iter = reader.generate_sample(line) for parsed_line in iter(): if parsed_line is None: continue else: values = [] for pased in parsed_line: values.append(pased[1]) yield values def gen_batch_reader(): return reader.generate_batch_from_trainfiles(files) if hasattr(reader, 'generate_batch_from_trainfiles'): return gen_batch_reader() if hasattr(reader, "batch_tensor_creator"): return reader.batch_tensor_creator(gen_reader) return gen_reader
def build_network(self, context): context["model"] = {} for model_dict in context["phases"]: context["model"][model_dict["name"]] = {} train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() dataset_name = model_dict["dataset_name"] with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): with fluid.scope_guard(scope): model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename( model_path, "Model")(context["env"]) model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader( is_infer=context["is_infer"]) data_loader = DataLoader(context) data_loader.get_dataloader(context, dataset_name, model._data_loader) model.net(model._data_var, context["is_infer"]) finetuning_varnames = envs.get_global_env( "runner." + context["runner_name"] + ".finetuning_aspect_varnames", default_value=[]) if len(finetuning_varnames) == 0: raise ValueError( "nothing need to be fine tuning, you may use other traning mode" ) if len(finetuning_varnames) != 1: raise ValueError( "fine tuning mode can only accept one varname now" ) varname = finetuning_varnames[0] finetuning_vars = train_program.global_block( ).vars[varname] finetuning_vars.stop_gradient = True optimizer = model.optimizer() optimizer.minimize(model._cost) context["model"][ model_dict["name"]]["main_program"] = train_program context["model"][ model_dict["name"]]["startup_program"] = startup_program context["model"][model_dict["name"]]["scope"] = scope context["model"][model_dict["name"]]["model"] = model context["model"][model_dict["name"]][ "default_main_program"] = train_program.clone() context["model"][model_dict["name"]]["compiled_program"] = None context["dataset"] = {} for dataset in context["env"]["dataset"]: type = envs.get_global_env("dataset." + dataset["name"] + ".type") if type == "QueueDataset": dataset_class = QueueDataset(context) context["dataset"][ dataset["name"]] = dataset_class.create_dataset( dataset["name"], context) context["status"] = "startup_pass"
def instance(self, context): models = envs.get_global_env("train.model.models") model_class = envs.lazy_instance_by_fliename(models, "Model") self.model = model_class(None) context['status'] = 'init_pass'