def run(self, config: Dict[Text, Any]):
        # Extract the paths to create the tar
        logger.info('Orchestrating pipeline on Kubernetes..')

        repo: Repository = Repository.get_instance()
        repo_path = repo.path
        config_dir = repo.zenml_config.config_dir
        tar_file_name = \
            f'{EXTRACTED_TAR_DIR_NAME}_{str(int(time.time()))}.tar.gz'
        path_to_tar = os.path.join(config_dir, tar_file_name)

        # Create tarfile but exclude .zenml folder if exists
        path_utils.create_tarfile(repo_path, path_to_tar)
        logger.info(f'Created tar of current repository at: {path_to_tar}')

        # Upload tar to artifact store
        store_path = config[keys.GlobalKeys.ARTIFACT_STORE]
        store_staging_area = os.path.join(store_path, STAGING_AREA)
        store_path_to_tar = os.path.join(store_staging_area, tar_file_name)
        path_utils.copy(path_to_tar, store_path_to_tar)
        logger.info(f'Copied tar to artifact store at: {store_path_to_tar}')

        # Remove tar
        path_utils.rm_dir(path_to_tar)
        logger.info(f'Removed tar at: {path_to_tar}')

        # Append path of tar in config orchestrator utils
        config[keys.GlobalKeys.BACKEND][
            keys.BackendKeys.ARGS][TAR_PATH_ARG] = store_path_to_tar

        # Launch the instance
        self.launch_job(config)
    def run(self, config: Dict[Text, Any]):
        """
        This run function essentially calls an underlying TFX orchestrator run.
        However it is meant as a higher level abstraction with some
        opinionated decisions taken.

        Args:
            config: a ZenML config dict
        """
        # Extract the paths to create the tar
        logger.info('Orchestrating pipeline on GCP..')

        repo: Repository = Repository.get_instance()
        repo_path = repo.path
        config_dir = repo.zenml_config.config_dir
        tar_file_name = \
            f'{EXTRACTED_TAR_DIR_NAME}_{str(int(time.time()))}.tar.gz'
        path_to_tar = os.path.join(config_dir, tar_file_name)

        # Create tarfile but excluse .zenml folder if exists
        path_utils.create_tarfile(repo_path, path_to_tar)
        logger.info(f'Created tar of current repository at: {path_to_tar}')

        # Upload tar to artifact store
        store_path = config[keys.GlobalKeys.ARTIFACT_STORE]
        store_staging_area = os.path.join(store_path, STAGING_AREA)
        store_path_to_tar = os.path.join(store_staging_area, tar_file_name)
        path_utils.copy(path_to_tar, store_path_to_tar)
        logger.info(f'Copied tar to artifact store at: {store_path_to_tar}')

        # Remove tar
        path_utils.rm_dir(path_to_tar)
        logger.info(f'Removed tar at: {path_to_tar}')

        # Append path of tar in config orchestrator utils
        config[keys.GlobalKeys.BACKEND][
            keys.BackendKeys.ARGS][TAR_PATH_ARG] = store_path_to_tar

        # Launch the instance
        self.launch_instance(config)
Example #3
0
    def run_pipeline(self, config_b64: str):
        # Load config from base64
        config = json.loads(base64.b64decode(config_b64))

        # Remove tar_path arg from config
        tar_path = config[keys.GlobalKeys.BACKEND][keys.BackendKeys.ARGS].pop(
            TAR_PATH_ARG)

        # Copy it over locally because it will be remote
        path_utils.copy(tar_path, EXTRACTED_TAR_FILE_PATH)

        # Extract it to EXTRACTED_TAR_DIR
        path_utils.extract_tarfile(EXTRACTED_TAR_FILE_PATH, EXTRACTED_TAR_DIR)

        # Append to sys to make user code discoverable
        sys.path.append(EXTRACTED_TAR_DIR)

        # Make sure the Repository is initialized at the right path
        Repository.get_instance(EXTRACTED_TAR_DIR)

        # Change orchestrator of pipeline to local
        OrchestratorBaseBackend().run(config)