def test_load(pytestconfig, filename, golden_config, env, error_type):
    filepath = pytestconfig.rootpath / filename

    with mock.patch.dict(os.environ, env):
        if error_type:
            with pytest.raises(error_type):
                _ = load_config_file(filepath)
        else:
            loaded_config = load_config_file(filepath)
            assert loaded_config == golden_config
Beispiel #2
0
def run(
    ctx: click.Context,
    config: str,
    dry_run: bool,
    preview: bool,
    strict_warnings: bool,
    preview_workunits: int,
) -> None:
    """Ingest metadata into DataHub."""

    logger.info("DataHub CLI version: %s", datahub_package.nice_version_name())

    config_file = pathlib.Path(config)
    pipeline_config = load_config_file(config_file)

    try:
        logger.debug(f"Using config: {pipeline_config}")
        pipeline = Pipeline.create(pipeline_config, dry_run, preview,
                                   preview_workunits)
    except ValidationError as e:
        click.echo(e, err=True)
        sys.exit(1)
    except Exception as e:
        # The pipeline_config may contain sensitive information, so we wrap the exception
        # in a SensitiveError to prevent detailed variable-level information from being logged.
        raise SensitiveError() from e

    logger.info("Starting metadata ingestion")
    pipeline.run()
    logger.info("Finished metadata ingestion")
    ret = pipeline.pretty_print_summary(warnings_as_failure=strict_warnings)
    pipeline.log_ingestion_stats()
    sys.exit(ret)
Beispiel #3
0
def run(
    ctx: click.Context,
    config: str,
    dry_run: bool,
    preview: bool,
    strict_warnings: bool,
    preview_workunits: int,
    suppress_error_logs: bool,
) -> None:
    """Ingest metadata into DataHub."""

    logger.info("DataHub CLI version: %s", datahub_package.nice_version_name())

    config_file = pathlib.Path(config)
    pipeline_config = load_config_file(config_file)

    try:
        logger.debug(f"Using config: {pipeline_config}")
        pipeline = Pipeline.create(pipeline_config, dry_run, preview, preview_workunits)
    except ValidationError as e:
        click.echo(e, err=True)
        sys.exit(1)
    except Exception as e:
        # The pipeline_config may contain sensitive information, so we wrap the exception
        # in a SensitiveError to prevent detailed variable-level information from being logged.
        raise SensitiveError() from e

    logger.info("Starting metadata ingestion")
    try:
        pipeline.run()
    except Exception as e:
        logger.info(
            f"Source ({pipeline.config.source.type}) report:\n{pipeline.source.get_report().as_string()}"
        )
        logger.info(
            f"Sink ({pipeline.config.sink.type}) report:\n{pipeline.sink.get_report().as_string()}"
        )
        # We dont want to log sensitive information in variables if the pipeline fails due to
        # an unexpected error. Disable printing sensitive info to logs if ingestion is running
        # with `--suppress-error-logs` flag.
        if suppress_error_logs:
            raise SensitiveError() from e
        else:
            raise e
    else:
        logger.info("Finished metadata pipeline")
        pipeline.log_ingestion_stats()
        ret = pipeline.pretty_print_summary(warnings_as_failure=strict_warnings)
        sys.exit(ret)
Beispiel #4
0
def ingest(config: str) -> None:
    """Ingest metadata into DataHub."""

    config_file = pathlib.Path(config)
    pipeline_config = load_config_file(config_file)

    try:
        logger.info(f"Using config: {pipeline_config}")
        pipeline = Pipeline.create(pipeline_config)
    except ValidationError as e:
        click.echo(e, err=True)
        sys.exit(1)

    pipeline.run()
    ret = pipeline.pretty_print_summary()
    sys.exit(ret)
def run(config: str) -> None:
    """Ingest metadata into DataHub."""
    logger.debug("DataHub CLI version: %s",
                 datahub_package.nice_version_name())

    config_file = pathlib.Path(config)
    pipeline_config = load_config_file(config_file)

    try:
        logger.debug(f"Using config: {pipeline_config}")
        pipeline = Pipeline.create(pipeline_config)
    except ValidationError as e:
        click.echo(e, err=True)
        sys.exit(1)

    logger.info("Starting metadata ingestion")
    pipeline.run()
    logger.info("Finished metadata ingestion")
    ret = pipeline.pretty_print_summary()
    sys.exit(ret)
Beispiel #6
0
def run(config: str, dry_run: bool, preview: bool,
        strict_warnings: bool) -> None:
    """Ingest metadata into DataHub."""

    logger.debug("DataHub CLI version: %s",
                 datahub_package.nice_version_name())

    config_file = pathlib.Path(config)
    pipeline_config = load_config_file(config_file)

    try:
        logger.debug(f"Using config: {pipeline_config}")
        pipeline = Pipeline.create(pipeline_config, dry_run, preview)
    except ValidationError as e:
        click.echo(e, err=True)
        sys.exit(1)

    logger.info("Starting metadata ingestion")
    pipeline.run()
    logger.info("Finished metadata ingestion")
    ret = pipeline.pretty_print_summary(warnings_as_failure=strict_warnings)
    pipeline.log_ingestion_stats()
    sys.exit(ret)
Beispiel #7
0
 def load_lineage_config(file_name: str) -> LineageConfig:
     config = load_config_file(file_name)
     lineage_config = LineageConfig.parse_obj(config)
     return lineage_config
Beispiel #8
0
 def load_glossary_config(self, file_name: str) -> BusinessGlossaryConfig:
     config = load_config_file(file_name)
     glossary_cfg = BusinessGlossaryConfig.parse_obj(config)
     return glossary_cfg
def datahub_recipe():
    config = load_config_file("path/to/recipe.yml")

    pipeline = Pipeline.create(config)
    pipeline.run()
    pipeline.raise_from_status()