def run(self, config: Dict[Text, Any]): # Extract the paths to create the tar logger.info('Orchestrating pipeline on Kubernetes..') repo: Repository = Repository.get_instance() repo_path = repo.path config_dir = repo.zenml_config.config_dir tar_file_name = \ f'{EXTRACTED_TAR_DIR_NAME}_{str(int(time.time()))}.tar.gz' path_to_tar = os.path.join(config_dir, tar_file_name) # Create tarfile but exclude .zenml folder if exists path_utils.create_tarfile(repo_path, path_to_tar) logger.info(f'Created tar of current repository at: {path_to_tar}') # Upload tar to artifact store store_path = config[keys.GlobalKeys.ARTIFACT_STORE] store_staging_area = os.path.join(store_path, STAGING_AREA) store_path_to_tar = os.path.join(store_staging_area, tar_file_name) path_utils.copy(path_to_tar, store_path_to_tar) logger.info(f'Copied tar to artifact store at: {store_path_to_tar}') # Remove tar path_utils.rm_dir(path_to_tar) logger.info(f'Removed tar at: {path_to_tar}') # Append path of tar in config orchestrator utils config[keys.GlobalKeys.BACKEND][ keys.BackendKeys.ARGS][TAR_PATH_ARG] = store_path_to_tar # Launch the instance self.launch_job(config)
def run(self, config: Dict[Text, Any]): """ This run function essentially calls an underlying TFX orchestrator run. However it is meant as a higher level abstraction with some opinionated decisions taken. Args: config: a ZenML config dict """ # Extract the paths to create the tar logger.info('Orchestrating pipeline on GCP..') repo: Repository = Repository.get_instance() repo_path = repo.path config_dir = repo.zenml_config.config_dir tar_file_name = \ f'{EXTRACTED_TAR_DIR_NAME}_{str(int(time.time()))}.tar.gz' path_to_tar = os.path.join(config_dir, tar_file_name) # Create tarfile but excluse .zenml folder if exists path_utils.create_tarfile(repo_path, path_to_tar) logger.info(f'Created tar of current repository at: {path_to_tar}') # Upload tar to artifact store store_path = config[keys.GlobalKeys.ARTIFACT_STORE] store_staging_area = os.path.join(store_path, STAGING_AREA) store_path_to_tar = os.path.join(store_staging_area, tar_file_name) path_utils.copy(path_to_tar, store_path_to_tar) logger.info(f'Copied tar to artifact store at: {store_path_to_tar}') # Remove tar path_utils.rm_dir(path_to_tar) logger.info(f'Removed tar at: {path_to_tar}') # Append path of tar in config orchestrator utils config[keys.GlobalKeys.BACKEND][ keys.BackendKeys.ARGS][TAR_PATH_ARG] = store_path_to_tar # Launch the instance self.launch_instance(config)
def run_pipeline(self, config_b64: str): # Load config from base64 config = json.loads(base64.b64decode(config_b64)) # Remove tar_path arg from config tar_path = config[keys.GlobalKeys.BACKEND][keys.BackendKeys.ARGS].pop( TAR_PATH_ARG) # Copy it over locally because it will be remote path_utils.copy(tar_path, EXTRACTED_TAR_FILE_PATH) # Extract it to EXTRACTED_TAR_DIR path_utils.extract_tarfile(EXTRACTED_TAR_FILE_PATH, EXTRACTED_TAR_DIR) # Append to sys to make user code discoverable sys.path.append(EXTRACTED_TAR_DIR) # Make sure the Repository is initialized at the right path Repository.get_instance(EXTRACTED_TAR_DIR) # Change orchestrator of pipeline to local OrchestratorBaseBackend().run(config)