def build_network(self, context): context["model"] = {} for model_dict in context["phases"]: context["model"][model_dict["name"]] = {} train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() dataset_name = model_dict["dataset_name"] with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): with fluid.scope_guard(scope): model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename( model_path, "Model")(context["env"]) if context["is_infer"]: model._infer_data_var = model.input_data( is_infer=context["is_infer"], dataset_name=model_dict["dataset_name"]) else: model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader( is_infer=context["is_infer"]) data_loader = DataLoader(context) data_loader.get_dataloader(context, dataset_name, model._data_loader) if context["is_infer"]: model.net(model._infer_data_var, context["is_infer"]) else: model.net(model._data_var, context["is_infer"]) optimizer = model.optimizer() optimizer.minimize(model._cost) context["model"][ model_dict["name"]]["main_program"] = train_program context["model"][ model_dict["name"]]["startup_program"] = startup_program context["model"][model_dict["name"]]["scope"] = scope context["model"][model_dict["name"]]["model"] = model context["model"][model_dict["name"]][ "default_main_program"] = train_program.clone() context["model"][model_dict["name"]]["compiled_program"] = None context["dataset"] = {} for dataset in context["env"]["dataset"]: type = envs.get_global_env("dataset." + dataset["name"] + ".type") if type != "DataLoader": dataset_class = QueueDataset(context) context["dataset"][ dataset["name"]] = dataset_class.create_dataset( dataset["name"], context) context["status"] = "startup_pass"
def build_network(self, context): context["model"] = {} if len(context["env"]["phase"]) > 1: warnings.warn("Cluster Train Only Support One Phase.", category=UserWarning, stacklevel=2) model_dict = context["env"]["phase"][0] context["model"][model_dict["name"]] = {} dataset_name = model_dict["dataset_name"] model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename(model_path, "Model")(context["env"]) model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader(is_infer=False) model.net(model._data_var, False) optimizer = model.optimizer() strategy = self._build_strategy(context) optimizer = context["fleet"].distributed_optimizer(optimizer, strategy) optimizer.minimize(model._cost) context["model"][model_dict["name"]][ "main_program"] = paddle.static.default_main_program() context["model"][model_dict["name"]][ "startup_program"] = paddle.static.default_startup_program() context["model"][ model_dict["name"]]["scope"] = paddle.static.global_scope() context["model"][model_dict["name"]]["model"] = model context["model"][model_dict["name"]][ "default_main_program"] = paddle.static.default_main_program( ).clone() context["model"][model_dict["name"]][ "compiled_program"] = paddle.static.default_main_program() if context["fleet"].is_server(): self._server(context) else: context["dataset"] = {} for phase in context["env"]["phase"]: type = envs.get_global_env("dataset." + phase["dataset_name"] + ".type") if type == "DataLoader": data_loader = DataLoader(context) data_loader.get_dataloader(context, dataset_name, model._data_loader) elif type == "QueueDataset": if context["fleet_mode"] == "COLLECTIVE": raise ValueError( "Collective don't support QueueDataset training, please use DataLoader." ) dataset_class = QueueDataset(context) context["dataset"][ phase["dataset_name"]] = dataset_class.create_dataset( phase["dataset_name"], context) context["status"] = "startup_pass"
def build_network(self, context): context["model"] = {} if len(context["env"]["phase"]) > 1: print("CollectiveNetwork phase:{}".format(context["env"]["phase"])) warnings.warn("Cluster Train Only Support One Phase.", category=UserWarning, stacklevel=2) model_dict = context["env"]["phase"][0] context["model"][model_dict["name"]] = {} dataset_name = model_dict["dataset_name"] train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() with fluid.program_guard(train_program, startup_program): with fluid.scope_guard(scope): model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename(model_path, "Model")(context["env"]) model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader(is_infer=False) data_loader = DataLoader(context) data_loader.get_dataloader(context, dataset_name, model._data_loader) model.net(model._data_var, False) optimizer = model.optimizer() strategy = self._build_strategy(context) optimizer = context["fleet"].distributed_optimizer( optimizer, strategy) optimizer.minimize(model._cost) context["model"][model_dict["name"]]["main_program"] = context[ "fleet"].main_program context["model"][ model_dict["name"]]["startup_program"] = startup_program context["model"][model_dict["name"]]["scope"] = scope context["model"][model_dict["name"]]["model"] = model context["model"][ model_dict["name"]]["default_main_program"] = train_program context["model"][model_dict["name"]]["compiled_program"] = None context["dataset"] = {} for phase in context["env"]["phase"]: type = envs.get_global_env("dataset." + phase["dataset_name"] + ".type") if type == "QueueDataset": raise ValueError( "Collective don't support QueueDataset training, please use DataLoader." ) dataset_class = QueueDataset(context) context["dataset"][ phase["dataset_name"]] = dataset_class.create_dataset( phase["dataset_name"], context) context["status"] = "startup_pass"
def build_network(self, context): context["model"] = {} if len(context["env"]["phase"]) > 1: warnings.warn("Cluster Train Only Support One Phase.", category=UserWarning, stacklevel=2) model_dict = context["env"]["phase"][0] train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() dataset_name = model_dict["dataset_name"] with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): with fluid.scope_guard(scope): context["model"][model_dict["name"]] = {} model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename( model_path, "Model")(context["env"]) model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader(is_infer=False) data_loader = DataLoader(context) data_loader.get_dataloader(context, dataset_name, model._data_loader) model.net(model._data_var, False) optimizer = model.optimizer() optimizer = context["fleet"].distributed_optimizer( optimizer) optimizer.minimize([model._cost], [fluid.global_scope()]) context["model"][ model_dict["name"]]["main_program"] = train_program context["model"][model_dict["name"]][ "startup_program"] = startup_program context["model"][model_dict["name"]]["scope"] = scope context["model"][model_dict["name"]]["model"] = model context["model"][model_dict["name"]][ "default_main_program"] = train_program.clone() context["model"][ model_dict["name"]]["compile_program"] = None if context["fleet"].is_server(): self._server(context) else: context["dataset"] = {} for dataset in context["env"]["dataset"]: type = envs.get_global_env("dataset." + dataset["name"] + ".type") if type != "DataLoader": dataset_class = QueueDataset(context) context["dataset"][ dataset["name"]] = dataset_class.create_dataset( dataset["name"], context) context["status"] = "startup_pass"
def build_network(self, context): context["model"] = {} for model_dict in context["phases"]: context["model"][model_dict["name"]] = {} train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() dataset_name = model_dict["dataset_name"] with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): with fluid.scope_guard(scope): model_path = envs.os_path_adapter( envs.workspace_adapter(model_dict["model"])) model = envs.lazy_instance_by_fliename( model_path, "Model")(context["env"]) model._data_var = model.input_data( dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": model._init_dataloader( is_infer=context["is_infer"]) data_loader = DataLoader(context) data_loader.get_dataloader(context, dataset_name, model._data_loader) model.net(model._data_var, context["is_infer"]) finetuning_varnames = envs.get_global_env( "runner." + context["runner_name"] + ".finetuning_aspect_varnames", default_value=[]) if len(finetuning_varnames) == 0: raise ValueError( "nothing need to be fine tuning, you may use other traning mode" ) if len(finetuning_varnames) != 1: raise ValueError( "fine tuning mode can only accept one varname now" ) varname = finetuning_varnames[0] finetuning_vars = train_program.global_block( ).vars[varname] finetuning_vars.stop_gradient = True optimizer = model.optimizer() optimizer.minimize(model._cost) context["model"][ model_dict["name"]]["main_program"] = train_program context["model"][ model_dict["name"]]["startup_program"] = startup_program context["model"][model_dict["name"]]["scope"] = scope context["model"][model_dict["name"]]["model"] = model context["model"][model_dict["name"]][ "default_main_program"] = train_program.clone() context["model"][model_dict["name"]]["compiled_program"] = None context["dataset"] = {} for dataset in context["env"]["dataset"]: type = envs.get_global_env("dataset." + dataset["name"] + ".type") if type == "QueueDataset": dataset_class = QueueDataset(context) context["dataset"][ dataset["name"]] = dataset_class.create_dataset( dataset["name"], context) context["status"] = "startup_pass"