def add_postgresql( name: str = typer.Option(..., help="A memorable name for the database"), username: str = typer.Option(..., help="Username or role to connect database"), password: str = typer.Option(..., help="Password of username or role"), database: str = typer.Option(..., help="Database name"), uri: str = typer.Option(..., help="Hostname or URI of the database"), port: Optional[int] = typer.Option(None, help="Port number of the database"), ): catalog = open_catalog(**app_state["catalog_connection"]) with closing(catalog): init_db(catalog) with catalog.managed_session: catalog.add_source( name=name, source_type="postgresql", username=username, password=password, database=database, uri=uri, port=port, ) typer.echo("Registered Postgres database {}".format(name))
def scan( source_name: Optional[List[str]] = typer.Option( None, help="List of names of database and data warehouses"), include_schema: Optional[List[str]] = typer.Option(None, help=schema_help_text), exclude_schema: Optional[List[str]] = typer.Option( None, help=exclude_schema_help_text), include_table: Optional[List[str]] = typer.Option(None, help=table_help_text), exclude_table: Optional[List[str]] = typer.Option( None, help=exclude_table_help_text), ): catalog = open_catalog(**app_state["catalog_connection"]) with closing(catalog): init_db(catalog) try: scan_sources( catalog=catalog, source_names=source_name, include_schema_regex=include_schema, exclude_schema_regex=exclude_schema, include_table_regex=include_table, exclude_table_regex=exclude_table, ) except NoMatchesError: typer.echo( "No schema or tables scanned. Ensure include/exclude patterns are correct " "and database has tables")
def add_snowflake( name: str = typer.Option(..., help="A memorable name for the database"), username: str = typer.Option( ..., help="Username or role to connect database"), password: str = typer.Option(..., help="Password of username or role"), database: str = typer.Option(..., help="Database name"), account: str = typer.Option(..., help="Snowflake Account Name"), warehouse: str = typer.Option(..., help="Snowflake Warehouse Name"), role: str = typer.Option(..., help="Snowflake Role Name"), ): catalog = open_catalog(**app_state["catalog_connection"]) with closing(catalog): init_db(catalog) with catalog.managed_session: catalog.add_source( name=name, source_type="snowflake", username=username, password=password, database=database, account=account, warehouse=warehouse, role=role, ) typer.echo("Registered Snowflake database {}".format(name))
def add_sqlite( name: str = typer.Option(..., help="A memorable name for the database"), path: Path = typer.Option(..., help="File path to SQLite database"), ): catalog = open_catalog(**app_state["catalog_connection"]) with closing(catalog): init_db(catalog) with catalog.managed_session: catalog.add_source(name=name, source_type="sqlite", path=path) typer.echo("Registered SQLite database {}".format(name))
def add_athena( name: str = typer.Option(..., help="A memorable name for the database"), aws_access_key_id: str = typer.Option(..., help="AWS Access Key"), aws_secret_access_key: str = typer.Option(..., help="AWS Secret Key"), region_name: str = typer.Option(..., help="AWS Region Name"), s3_staging_dir: str = typer.Option(..., help="S3 Staging Dir"), ): catalog = open_catalog(**app_state["catalog_connection"]) with closing(catalog): init_db(catalog) with catalog.managed_session: catalog.add_source( name=name, source_type="snowflake", aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=region_name, s3_staging_dir=s3_staging_dir, ) typer.echo("Registered AWS Athena {}".format(name))
def scan( source_name: Optional[List[str]] = typer.Option( None, help="List of names of database and data warehouses" ), include_schema: Optional[List[str]] = typer.Option(None, help=schema_help_text), exclude_schema: Optional[List[str]] = typer.Option( None, help=exclude_schema_help_text ), include_table: Optional[List[str]] = typer.Option(None, help=table_help_text), exclude_table: Optional[List[str]] = typer.Option( None, help=exclude_table_help_text ), ): catalog = open_catalog( app_dir=dbcat.settings.APP_DIR, secret=dbcat.settings.CATALOG_SECRET, path=dbcat.settings.CATALOG_PATH, host=dbcat.settings.CATALOG_HOST, port=dbcat.settings.CATALOG_PORT, user=dbcat.settings.CATALOG_USER, password=dbcat.settings.CATALOG_PASSWORD, database=dbcat.settings.CATALOG_DB, ) with closing(catalog): init_db(catalog) try: scan_sources( catalog=catalog, source_names=source_name, include_schema_regex=include_schema, exclude_schema_regex=exclude_schema, include_table_regex=include_table, exclude_table_regex=exclude_table, ) except NoMatchesError: typer.echo( "No schema or tables scanned. Ensure include/exclude patterns are correct " "and database has tables" )
def test_default_catalog(tmpdir): catalog = open_catalog(app_dir=tmpdir, secret=settings.DEFAULT_CATALOG_SECRET) default_catalog = tmpdir / "catalog.db" assert isinstance(catalog, SqliteCatalog) init_db(catalog) assert default_catalog.exists()
def detect( source_name: str = typer.Option(..., help="Name of database to scan."), scan_type: ScanTypeEnum = typer.Option( ScanTypeEnum.metadata, help="Choose deep(scan data) or shallow(scan column names only)", ), incremental: bool = typer.Option( True, help="Scan columns updated or created since last run", ), list_all: bool = typer.Option( False, help= "List all columns. By default only columns with PII information is listed", ), include_schema: Optional[List[str]] = typer.Option(None, help=schema_help_text), exclude_schema: Optional[List[str]] = typer.Option( None, help=exclude_schema_help_text), include_table: Optional[List[str]] = typer.Option(None, help=table_help_text), exclude_table: Optional[List[str]] = typer.Option( None, help=exclude_table_help_text), sample_size: int = typer.Option( SMALL_TABLE_MAX, help="Sample size for large tables when running deep scan."), ): catalog = open_catalog( app_dir=dbcat.settings.APP_DIR, secret=dbcat.settings.CATALOG_SECRET, path=dbcat.settings.CATALOG_PATH, host=dbcat.settings.CATALOG_HOST, port=dbcat.settings.CATALOG_PORT, user=dbcat.settings.CATALOG_USER, password=dbcat.settings.CATALOG_PASSWORD, database=dbcat.settings.CATALOG_DB, ) with closing(catalog) as catalog: init_db(catalog) with catalog.managed_session: source = catalog.get_source(source_name) try: op = scan_database( catalog=catalog, source=source, scan_type=scan_type, incremental=incremental, output_format=dbcat.settings.OUTPUT_FORMAT, list_all=list_all, include_schema_regex=include_schema, exclude_schema_regex=exclude_schema, include_table_regex=include_table, exclude_table_regex=exclude_table, sample_size=sample_size, ) typer.echo( message=str_output(op, dbcat.settings.OUTPUT_FORMAT)) except NoMatchesError: typer.echo(message=NoMatchesError.message) typer.Exit(1)
def test_default_catalog(tmpdir): catalog = open_catalog(app_dir=tmpdir) default_catalog = tmpdir / "catalog.db" assert isinstance(catalog, SqliteCatalog) init_db(catalog) assert default_catalog.exists()
def open_catalog_connection( catalog_conf) -> Generator[Tuple[Catalog, str], None, None]: with closing(catalog_connection_yaml(catalog_conf)) as conn: init_db(conn) yield conn, catalog_conf