def __init__(self, project_dir=None, max_training_processes=1, response_log=None, emulation_mode=None, remote_storage=None, component_builder=None, model_server=None, wait_time_between_pulls=None): self._training_processes = max(max_training_processes, 1) self._current_training_processes = 0 self.responses = self._create_query_logger(response_log) self.project_dir = config.make_path_absolute(project_dir) self.emulator = self._create_emulator(emulation_mode) self.remote_storage = remote_storage self.model_server = model_server self.wait_time_between_pulls = wait_time_between_pulls if component_builder: self.component_builder = component_builder else: self.component_builder = ComponentBuilder(use_cache=True) self.project_store = self._create_project_store(project_dir) # tensorflow sessions are not fork-safe, # and training processes have to be spawned instead of forked. See # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment # -258934405 multiprocessing.set_start_method('spawn', force=True) self.pool = ProcessPool(self._training_processes)
def persist(self, path: Text, persistor: Optional[Persistor] = None, project_name: Text = None, fixed_model_name: Text = None) -> Text: """Persist all components of the pipeline to the passed path. Returns the directory of the persisted model.""" timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') metadata = { "language": self.config["language"], "adapter": self.config['adapter'], "pipeline": [], } if project_name is None: project_name = "default" if fixed_model_name: model_name = fixed_model_name else: model_name = "model_" + timestamp path = make_path_absolute(path) dir_name = os.path.join(path, project_name, model_name) create_dir(dir_name) if self.training_data: metadata.update(self.training_data.persist(dir_name)) for i, component in enumerate(self.pipeline): file_name = self._file_name(i, component.name) update = component.persist(file_name, dir_name) component_meta = component.component_config if update: component_meta.update(update) component_meta["class"] = utils.module_path_from_object(component) metadata["pipeline"].append(component_meta) Metadata(metadata, dir_name).persist(dir_name) if persistor is not None: persistor.persist(dir_name, model_name, project_name) logger.info("Successfully saved model into " "'{}'".format(os.path.abspath(dir_name))) return dir_name
def __init__( self, project_dir=None, max_worker_processes=1, response_log=None, emulation_mode=None, remote_storage=None, component_builder=None, model_server=None, wait_time_between_pulls=None, ): self._worker_processes = max(max_worker_processes, 1) self._current_worker_processes = 0 self.responses = self._create_query_logger(response_log) self.project_dir = config.make_path_absolute(project_dir) self.emulator = self._create_emulator(emulation_mode) self.remote_storage = remote_storage self.model_server = model_server self.wait_time_between_pulls = wait_time_between_pulls if component_builder: self.component_builder = component_builder else: self.component_builder = ComponentBuilder(use_cache=True) # TODO: Should be moved to separate method loop = asyncio.get_event_loop() if loop.is_closed(): loop = asyncio.new_event_loop() self.project_store = loop.run_until_complete( self._create_project_store(self.project_dir) ) loop.close() # tensorflow sessions are not fork-safe, # and training processes have to be spawned instead of forked. See # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment # -258934405 multiprocessing.set_start_method("spawn", force=True) self.pool = ProcessPoolExecutor(max_workers=self._worker_processes)