def __init__(self, *args, **kwargs): """ This is a Singleton class. Args: *args: **kwargs: """ if GlobalConfig.__instance__ is None: self.path = os.path.join( GlobalConfig.get_config_dir(), 'info.json') # Create default global config if it does not exist. path_utils.create_dir_recursive_if_not_exists( GlobalConfig.get_config_dir()) if path_utils.file_exists(self.path): # Load the config self.load() else: # Set up a default config # True by default but user is always asked self['analytics_opt_in'] = True # Create user ID will save the whole thing self.user_id = self.create_user_id() super(GlobalConfig, self).__init__(*args, **kwargs) GlobalConfig.__instance__ = self else: raise Exception("You cannot create another GlobalConfig class!")
def to_config(path: Text, artifact_store_path: Text = None, metadata_store: Optional[Type[ZenMLMetadataStore]] = None, pipelines_dir: Text = None): """ Creates a default .zenml config at path/zenml/.zenml_config. Args: path (str): path to a directory. metadata_store: metadata store definition. artifact_store_path (str): path where to store artifacts. pipelines_dir (str): path where to store pipeline configs. """ config_dir_path = os.path.join(path, ZENML_DIR_NAME) config_path = os.path.join(config_dir_path, ZENML_CONFIG_NAME) if path_utils.file_exists(config_path): raise AssertionError(f'.zenml file already exists at ' f'{config_path}. ' f'Cannot replace. Please delete the ' f'{config_dir_path} directory first.') # Create config dir path_utils.create_dir_if_not_exists(config_dir_path) if artifact_store_path is None: artifact_store_path = \ os.path.join(config_dir_path, ARTIFACT_STORE_DEFAULT_DIR) else: # if provided, then resolve it absolutely artifact_store_path = path_utils.resolve_relative_path( artifact_store_path) # create artifact_store path path_utils.create_dir_if_not_exists(artifact_store_path) if metadata_store is None: uri = os.path.join(artifact_store_path, ML_METADATA_SQLITE_DEFAULT_NAME) from zenml.metadata import \ SQLiteMetadataStore metadata_dict = SQLiteMetadataStore(uri).to_config() else: metadata_dict = metadata_store.to_config() if pipelines_dir is None: pipelines_dir = os.path.join(path, PIPELINES_DEFAULT_DIR_NAME) else: # if provided, still resolve pipelines_dir = path_utils.resolve_relative_path(pipelines_dir) path_utils.create_dir_if_not_exists(pipelines_dir) config_dict = { ARTIFACT_STORE_KEY: artifact_store_path, METADATA_KEY: metadata_dict, PIPELINES_DIR_KEY: pipelines_dir, } # Write initial config yaml_utils.write_yaml(config_path, config_dict)
def is_zenml_dir(path: Text): """ Check if dir is a zenml dir or not. Args: path (str): path to the root. """ config_path = os.path.join(path, ZENML_DIR_NAME, ZENML_CONFIG_NAME) if not path_utils.file_exists(config_path): return False return True
def read_files_from_disk(pipeline: beam.Pipeline, base_path: Text) -> beam.pvalue.PCollection: """ The Beam PTransform used to read data from a collection of CSV files on a local file system. Args: pipeline: Input beam.Pipeline object coming from a TFX Executor. base_path: Base path pointing either to the directory containing the CSV files, or to a (single) CSV file. Returns: A beam.PCollection of data points. Each row in the collection of CSV files represents a single data point. """ wildcard_qualifier = "*" file_pattern = os.path.join(base_path, wildcard_qualifier) if path_utils.is_dir(base_path): csv_files = path_utils.list_dir(base_path) if not csv_files: raise RuntimeError( 'Split pattern {} does not match any files.'.format( file_pattern)) else: if path_utils.file_exists(base_path): csv_files = [base_path] else: raise RuntimeError(f'{base_path} does not exist.') # weed out bad file exts with this logic allowed_file_exts = [".csv", ".txt"] # ".dat" csv_files = [ uri for uri in csv_files if os.path.splitext(uri)[1] in allowed_file_exts ] logger.info(f'Matched {len(csv_files)}: {csv_files}') # Always use header from file logger.info(f'Using header from file: {csv_files[0]}.') column_names = path_utils.load_csv_header(csv_files[0]) logger.info(f'Header: {column_names}.') parsed_csv_lines = ( pipeline | 'ReadFromText' >> beam.io.ReadFromText(file_pattern=base_path, skip_header_lines=1) | 'ParseCSVLine' >> beam.ParDo(csv_decoder.ParseCSVLine(delimiter=',')) | 'ExtractParsedCSVLines' >> beam.Map(lambda x: dict(zip(column_names, x[0])))) return parsed_csv_lines
def read_json(file_path: Text): """ Read JSON on file path and returns contents as dict. Args: file_path (str): Path to JSON file. """ if path_utils.file_exists(file_path): with open(file_path, 'r') as f: return json.loads(f.read()) else: raise Exception(f'{file_path} does not exist.')
def read_yaml(file_path: Text): """ Read YAML on file path and returns contents as dict. Args: file_path (str): Path to YAML file. """ if path_utils.file_exists(file_path): with open(file_path, 'r') as f: return yaml.load(f.read(), Loader=yaml.FullLoader) else: raise Exception(f'{file_path} does not exist.')
def add_gitignore(self, items: List[Text]): """ Adds `items` to .gitignore, if .gitignore exists. Otherwise creates and adds. Args: items (list[str]): Items to add. """ str_items = '\n'.join(items) str_items = '\n\n# ZenML\n' + str_items gitignore_path = os.path.join(self.repo_path, '.gitignore') if not path_utils.file_exists(gitignore_path): path_utils.create_file_if_not_exists(gitignore_path, str_items) else: path_utils.append_file(gitignore_path, str_items)