Python ConfigurationError.ConfigurationErrorの例、datahub.configuration.common.ConfigurationError.ConfigurationError Pythonの例

コード例 #1

0

ファイルを表示

def ingest(config: str):
    """Main command for ingesting metadata into DataHub"""

    config_file = pathlib.Path(config)
    if not config_file.is_file():
        raise ConfigurationError(f"Cannot open config file {config}")

    config_mech: ConfigurationMechanism
    if config_file.suffix in [".yaml", ".yml"]:
        config_mech = YamlConfigurationMechanism()
    elif config_file.suffix == ".toml":
        config_mech = TomlConfigurationMechanism()
    else:
        raise ConfigurationError(
            "Only .toml and .yml are supported. Cannot process file type {}".format(
                config_file.suffix
            )
        )

    with config_file.open() as fp:
        pipeline_config = config_mech.load_config(fp)

    with nicely_formatted_validation_errors():
        logger.debug(f"Using config: {pipeline_config}")
        pipeline = Pipeline.create(pipeline_config)
    pipeline.run()

コード例 #2

0

ファイルを表示

ファイル: elastic_search.py プロジェクト: hsheth2/datahub

    def host_colon_port_comma(cls, host_val: str) -> str:
        for entry in host_val.split(","):
            # The port can be provided but is not required.
            port = None
            for prefix in ["http://", "https://"]:
                if entry.startswith(prefix):
                    entry = entry[len(prefix):]
            for suffix in ["/"]:
                if entry.endswith(suffix):
                    entry = entry[:-len(suffix)]

            if ":" in entry:
                (host, port) = entry.rsplit(":", 1)
            else:
                host = entry
            if not re.match(
                    # This regex is quite loose. Many invalid hostnames or IPs will slip through,
                    # but it serves as a good first line of validation. We defer to Elastic for the
                    # remaining validation.
                    r"^[\w\-\.]+$",
                    host,
            ):
                raise ConfigurationError(
                    f"host contains bad characters, found {host}")
            if port is not None and not port.isdigit():
                raise ConfigurationError(
                    f"port must be all digits, found {port}")
        return host_val

コード例 #3

0

ファイルを表示

def ingest(config: str):
    """Main command for ingesting metadata into DataHub"""

    config_file = pathlib.Path(config)
    if not config_file.is_file():
        raise ConfigurationError(f"Cannot open config file {config}")

    config_mech: ConfigurationMechanism
    if config_file.suffix in [".yaml", ".yml"]:
        config_mech = YamlConfigurationMechanism()
    elif config_file.suffix == ".toml":
        config_mech = TomlConfigurationMechanism()
    else:
        raise ConfigurationError(
            "Only .toml and .yml are supported. Cannot process file type {}".
            format(config_file.suffix))

    with config_file.open() as fp:
        pipeline_config = config_mech.load_config(fp)

    try:
        logger.debug(f"Using config: {pipeline_config}")
        pipeline = Pipeline.create(pipeline_config)
    except ValidationError as e:
        click.echo(e, err=True)
        sys.exit(1)

    pipeline.run()
    ret = pipeline.pretty_print_summary()
    sys.exit(ret)

コード例 #4

0

ファイルを表示

 def validate(self, at_least_one: bool) -> bool:
     variables = re.findall("({[^}{]+})", self.pattern)
     self.variables = [v[1:-1] for v in variables]
     for v in variables:
         if v[1:-1] not in self.allowed_vars:
             raise ConfigurationError(
                 f"Failed to find {v} in allowed_variables {self.allowed_vars}"
             )
     if at_least_one and len(variables) == 0:
         raise ConfigurationError(
             f"Failed to find any variable assigned to pattern {self.pattern}. Must have at least one. Allowed variables are {self.allowed_vars}"
         )
     return True

コード例 #5

0

ファイルを表示

ファイル: pulsar.py プロジェクト: hsheth2/datahub

    def web_service_url_scheme_host_port(cls, val: str) -> str:
        # Tokenize the web url
        url = urlparse(val)

        if url.scheme not in ["http", "https"]:
            raise ConfigurationError(
                f"Scheme should be http or https, found {url.scheme}"
            )

        if not _is_valid_hostname(url.hostname.__str__()):
            raise ConfigurationError(
                f"Not a valid hostname, hostname contains invalid characters, found {url.hostname}"
            )

        return config_clean.remove_trailing_slashes(val)

コード例 #6

0

ファイルを表示

ファイル: kafka.py プロジェクト: shirshanka/datahub

    def __init__(self, config: KafkaSourceConfig, ctx: PipelineContext):
        super().__init__(config, ctx)
        self.source_config = config
        if (
            self.is_stateful_ingestion_configured()
            and not self.source_config.platform_instance
        ):
            raise ConfigurationError(
                "Enabling kafka stateful ingestion requires to specify a platform instance."
            )

        self.consumer = confluent_kafka.Consumer(
            {
                "group.id": "test",
                "bootstrap.servers": self.source_config.connection.bootstrap,
                **self.source_config.connection.consumer_config,
            }
        )
        # Use the fully qualified name for SchemaRegistryClient to make it mock patchable for testing.
        self.schema_registry_client = (
            confluent_kafka.schema_registry.schema_registry_client.SchemaRegistryClient(
                {
                    "url": self.source_config.connection.schema_registry_url,
                    **self.source_config.connection.schema_registry_config,
                }
            )
        )
        self.report = KafkaSourceReport()
        self.known_schema_registry_subjects: List[str] = []
        try:
            self.known_schema_registry_subjects.extend(
                self.schema_registry_client.get_subjects()
            )
        except Exception as e:
            logger.warning(f"Failed to get subjects from schema registry: {e}")

コード例 #7

0

ファイルを表示

ファイル: add_dataset_ownership.py プロジェクト: taufiqibrahim/datahub

 def __init__(self, config: AddDatasetOwnershipConfig, ctx: PipelineContext):
     self.ctx = ctx
     self.config = config
     if self.config.semantics == Semantics.PATCH and self.ctx.graph is None:
         raise ConfigurationError(
             "With PATCH semantics, AddDatasetOwnership requires a datahub_api to connect to. Consider using the datahub-rest sink or provide a datahub_api: configuration on your ingestion recipe"
         )

コード例 #8

0

ファイルを表示

ファイル: datahub_ingestion_state_provider.py プロジェクト: taufiqibrahim/datahub

 def create(cls, config_dict: Dict[str, Any],
            ctx: PipelineContext) -> IngestionStateProvider:
     if ctx.graph:
         return cls(ctx.graph)
     elif config_dict is None:
         raise ConfigurationError("Missing provider configuration")
     else:
         provider_config = DatahubIngestionStateProviderConfig.parse_obj(
             config_dict)
         if provider_config.datahub_api:
             graph = DataHubGraph(provider_config.datahub_api)
             return cls(graph)
         else:
             raise ConfigurationError(
                 "Missing datahub_api. Provide either a global one or under the state_provider."
             )

コード例 #9

0

ファイルを表示

ファイル: powerbi.py プロジェクト: hsheth2/datahub

    def get_access_token(self):
        if self.__access_token != "":
            LOGGER.info("Returning the cached access token")
            return self.__access_token

        LOGGER.info("Generating PowerBi access token")

        auth_response = self.__msal_client.acquire_token_for_client(
            scopes=[self.__config.scope])

        if not auth_response.get("access_token"):
            LOGGER.warn(
                "Failed to generate the PowerBi access token. Please check input configuration"
            )
            raise ConfigurationError(
                "Powerbi authorization failed . Please check your input configuration."
            )

        LOGGER.info("Generated PowerBi access token")

        self.__access_token = "Bearer {}".format(
            auth_response.get("access_token"))

        LOGGER.debug("{}={}".format(Constant.PBIAccessToken,
                                    self.__access_token))

        return self.__access_token

コード例 #10

0

ファイルを表示

ファイル: lookml.py プロジェクト: swaroopjagadish/datahub

 def check_either_project_name_or_api_provided(cls, values):
     """Validate that we must either have a project name or an api credential to fetch project names"""
     if not values.get("project_name") and not values.get("api"):
         raise ConfigurationError(
             "Neither project_name not an API credential was found. LookML source requires either api credentials for Looker or a project_name to accurately name views and models."
         )
     return values

コード例 #11

0

ファイルを表示

 def type_must_be_supported(cls, v: str) -> str:
     allowed_types = ["dataset"]
     if v not in allowed_types:
         raise ConfigurationError(
             f"Type must be one of {allowed_types}, {v} is not yet supported."
         )
     return v

コード例 #12

0

ファイルを表示

    def _authenticate(self):
        # https://tableau.github.io/server-client-python/docs/api-ref#authentication
        authentication = None
        if self.config.username and self.config.password:
            authentication = TableauAuth(
                username=self.config.username,
                password=self.config.password,
                site_id=self.config.site,
            )
        elif self.config.token_name and self.config.token_value:
            authentication = PersonalAccessTokenAuth(self.config.token_name,
                                                     self.config.token_value,
                                                     self.config.site)
        else:
            raise ConfigurationError(
                "Tableau Source: Either username/password or token_name/token_value must be set"
            )

        try:
            self.server = Server(self.config.connect_uri,
                                 use_server_version=True)
            self.server.auth.sign_in(authentication)
        except ServerResponseError as e:
            logger.error(e)
            self.report.report_failure(
                key="tableau-login",
                reason=f"Unable to Login with credentials provided"
                f"Reason: {str(e)}",
            )
        except Exception as e:
            logger.error(e)
            self.report.report_failure(key="tableau-login",
                                       reason=f"Unable to Login"
                                       f"Reason: {str(e)}")

コード例 #13

0

ファイルを表示

 def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
     if values.get("enabled"):
         if values.get("state_provider") is None:
             raise ConfigurationError(
                 "Must specify state_provider configuration if stateful ingestion is enabled."
             )
     return values

コード例 #14

0

ファイルを表示

    def _initialize_state_provider(self) -> None:
        self.ingestion_state_provider: Optional[IngestionStateProvider] = None
        if (
            self.stateful_ingestion_config is not None
            and self.stateful_ingestion_config.state_provider is not None
            and self.stateful_ingestion_config.enabled
        ):
            if self.ctx.pipeline_name is None:
                raise ConfigurationError(
                    "pipeline_name must be provided if stateful ingestion is enabled."
                )
            state_provider_class = ingestion_state_provider_registry.get(
                self.stateful_ingestion_config.state_provider.type
            )
            self.ingestion_state_provider = state_provider_class.create(
                self.stateful_ingestion_config.state_provider.dict().get("config", {}),
                self.ctx,
            )
            if self.stateful_ingestion_config.ignore_old_state:
                logger.warning(
                    "The 'ignore_old_state' config is True. The old checkpoint state will not be provided."
                )
            if self.stateful_ingestion_config.ignore_new_state:
                logger.warning(
                    "The 'ignore_new_state' config is True. The new checkpoint state will not be created."
                )

            logger.debug(
                f"Successfully created {self.stateful_ingestion_config.state_provider.type} state provider."
            )

コード例 #15

0

ファイルを表示

ファイル: datahub_ingestion_state_provider.py プロジェクト: taufiqibrahim/datahub

 def __init__(self, graph: DataHubGraph):
     self.graph = graph
     if not self._is_server_stateful_ingestion_capable():
         raise ConfigurationError(
             "Datahub server is not capable of supporting stateful ingestion."
             " Please consider upgrading to the latest server version to use this feature."
         )

コード例 #16

0

ファイルを表示

 def platform_validator(cls, v: str) -> str:
     if not v or v in VALID_PLATFORMS:
         return v
     else:
         raise ConfigurationError(
             f"'platform' can only take following values: {VALID_PLATFORMS}"
         )

コード例 #17

0

ファイルを表示

ファイル: datahub_rest.py プロジェクト: hsheth2/datahub

    def __init__(self, ctx: PipelineContext, config: DatahubRestSinkConfig):
        super().__init__(ctx)
        self.config = config
        self.report = DataHubRestSinkReport()
        self.emitter = DatahubRestEmitter(
            self.config.server,
            self.config.token,
            connect_timeout_sec=self.config.timeout_sec,  # reuse timeout_sec for connect timeout
            read_timeout_sec=self.config.timeout_sec,
            retry_status_codes=self.config.retry_status_codes,
            retry_max_times=self.config.retry_max_times,
            extra_headers=self.config.extra_headers,
            ca_certificate_path=self.config.ca_certificate_path,
        )
        try:
            gms_config = self.emitter.test_connection()
        except Exception as exc:
            raise ConfigurationError(
                f"ðŸ’¥ Failed to connect to DataHub@{self.config.server} (token:{'XXX-redacted' if self.config.token else 'empty'}) over REST",
                exc,
            )

        self.report.gms_version = (
            gms_config.get("versions", {})
            .get("linkedin/datahub", {})
            .get("version", "")
        )
        logger.debug("Setting env variables to override config")
        set_env_variables_override_config(self.config.server, self.config.token)
        logger.debug("Setting gms config")
        set_gms_config(gms_config)
        self.executor = concurrent.futures.ThreadPoolExecutor(
            max_workers=self.config.max_threads
        )

コード例 #18

0

ファイルを表示

    def _initialize_checkpointing_state_provider(self) -> None:
        self.ingestion_checkpointing_state_provider: Optional[
            IngestionCheckpointingProviderBase
        ] = None
        if (
            self.stateful_ingestion_config is not None
            and self.stateful_ingestion_config.state_provider is not None
            and self.stateful_ingestion_config.enabled
        ):
            if self.ctx.pipeline_name is None:
                raise ConfigurationError(
                    "pipeline_name must be provided if stateful ingestion is enabled."
                )
            checkpointing_state_provider_class = (
                ingestion_checkpoint_provider_registry.get(
                    self.stateful_ingestion_config.state_provider.type
                )
            )
            if checkpointing_state_provider_class is None:
                raise ConfigurationError(
                    f"Cannot find checkpoint provider class of type={self.stateful_ingestion_config.state_provider.type} "
                    " in the registry! Please check the type of the checkpointing provider in your config."
                )
            config_dict: Dict[str, Any] = cast(
                Dict[str, Any],
                self.stateful_ingestion_config.state_provider.dict().get("config", {}),
            )
            self.ingestion_checkpointing_state_provider = checkpointing_state_provider_class.create(  # type: ignore
                config_dict=config_dict,
                ctx=self.ctx,
                name=checkpointing_state_provider_class.__name__,
            )
            assert self.ingestion_checkpointing_state_provider
            if self.stateful_ingestion_config.ignore_old_state:
                logger.warning(
                    "The 'ignore_old_state' config is True. The old checkpoint state will not be provided."
                )
            if self.stateful_ingestion_config.ignore_new_state:
                logger.warning(
                    "The 'ignore_new_state' config is True. The new checkpoint state will not be created."
                )
            # Add the checkpoint state provide to the platform context.
            self.ctx.register_checkpointer(self.ingestion_checkpointing_state_provider)

            logger.debug(
                f"Successfully created {self.stateful_ingestion_config.state_provider.type} state provider."
            )

コード例 #19

0

ファイルを表示

ファイル: lookml.py プロジェクト: swaroopjagadish/datahub

 def check_either_connection_map_or_connection_provided(cls, values):
     """Validate that we must either have a connection map or an api credential"""
     if not values.get("connection_to_platform_map", {}) and not values.get(
             "api", {}):
         raise ConfigurationError(
             "Neither api not connection_to_platform_map config was found. LookML source requires either api credentials for Looker or a map of connection names to platform identifiers to work correctly"
         )
     return values

コード例 #20

0

ファイルを表示

 def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext,
            name: str) -> IngestionCheckpointingProviderBase:
     if ctx.graph:
         # Use the pipeline-level graph if set
         return cls(ctx.graph, name)
     elif config_dict is None:
         raise ConfigurationError("Missing provider configuration.")
     else:
         provider_config = DatahubIngestionStateProviderConfig.parse_obj(
             config_dict)
         if provider_config.datahub_api:
             graph = DataHubGraph(provider_config.datahub_api)
             return cls(graph, name)
         else:
             raise ConfigurationError(
                 "Missing datahub_api. Provide either a global one or under the state_provider."
             )

コード例 #21

0

ファイルを表示

ファイル: pulsar.py プロジェクト: hsheth2/datahub

 def ensure_only_issuer_or_token(
     cls, token: Optional[str], values: Dict[str, Optional[str]]
 ) -> Optional[str]:
     if token is not None and values.get("issuer_url") is not None:
         raise ConfigurationError(
             "Expected only one authentication method, either issuer_url or token."
         )
     return token

コード例 #22

0

ファイルを表示

    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
        if self.config.write_semantics == "PATCH" and not self.ctx.graph:
            raise ConfigurationError(
                "With PATCH semantics, dbt source requires a datahub_api to connect to. "
                "Consider using the datahub-rest sink or provide a datahub_api: configuration on your ingestion recipe."
            )

        (
            nodes,
            manifest_schema,
            manifest_version,
            catalog_schema,
            catalog_version,
            manifest_nodes_raw,
        ) = loadManifestAndCatalog(
            self.config.manifest_path,
            self.config.catalog_path,
            self.config.sources_path,
            self.config.load_schemas,
            self.config.use_identifiers,
            self.config.tag_prefix,
            self.config.node_type_pattern,
            self.report,
            self.config.node_name_pattern,
        )

        additional_custom_props = {
            "manifest_schema": manifest_schema,
            "manifest_version": manifest_version,
            "catalog_schema": catalog_schema,
            "catalog_version": catalog_version,
        }

        additional_custom_props_filtered = {
            key: value
            for key, value in additional_custom_props.items()
            if value is not None
        }

        if not self.config.disable_dbt_node_creation:
            yield from self.create_platform_mces(
                nodes,
                additional_custom_props_filtered,
                manifest_nodes_raw,
                DBT_PLATFORM,
            )

        yield from self.create_platform_mces(
            nodes,
            additional_custom_props_filtered,
            manifest_nodes_raw,
            self.config.target_platform,
        )

        if self.is_stateful_ingestion_configured():
            # Clean up stale entities.
            yield from self.gen_removed_entity_workunits()

コード例 #23

0

ファイルを表示

ファイル: bigquery.py プロジェクト: hsheth2/datahub

 def validate_that_bigquery_audit_metadata_datasets_is_correctly_configured(
         cls, values: Dict[str, Any]) -> Dict[str, Any]:
     if (values.get("use_exported_bigquery_audit_metadata")
             and not values.get("use_v2_audit_metadata")
             and not values.get("bigquery_audit_metadata_datasets")):
         raise ConfigurationError(
             "bigquery_audit_metadata_datasets must be specified if using exported audit metadata. Otherwise set use_v2_audit_metadata to True."
         )
         pass
     return values

コード例 #24

0

ファイルを表示

ファイル: source_common.py プロジェクト: swaroopjagadish/datahub

 def env_must_be_one_of(cls, v: str) -> str:
     # Get all the constants from the FabricTypeClass. It's not an enum, so this is a bit hacky but works
     allowed_envs = [
         value for name, value in vars(FabricTypeClass).items()
         if not name.startswith("_")
     ]
     if (v.upper()) not in allowed_envs:
         raise ConfigurationError(
             f"env must be one of {allowed_envs}, found {v}")
     return v.upper()

コード例 #25

0

ファイルを表示

ファイル: pulsar.py プロジェクト: hsheth2/datahub

 def ensure_client_id_and_secret_for_issuer_url(
     cls, client_secret: Optional[str], values: Dict[str, Optional[str]]
 ) -> Optional[str]:
     if values.get("issuer_url") is not None and (
         client_secret is None or values.get("client_id") is None
     ):
         raise ConfigurationError(
             "Missing configuration: client_id and client_secret are mandatory when issuer_url is set."
         )
     return client_secret

コード例 #26

0

ファイルを表示

    def __init__(self, config: SQLAlchemyConfig, ctx: PipelineContext,
                 platform: str):
        super().__init__(ctx)
        self.config = config
        self.platform = platform
        self.report = SQLSourceReport()

        if self.config.profiling.enabled and not self._can_run_profiler():
            raise ConfigurationError(
                "Table profiles requested but profiler plugin is not enabled. "
                f"Try running: pip install '{__package_name__}[sql-profiles]'")

コード例 #27

0

ファイルを表示

ファイル: registry.py プロジェクト: northwesternmutual/datahub

    def get(self, key: str) -> Type[T]:
        if key.find(".") >= 0:
            # If the key contains a dot, we treat it as a import path and attempt
            # to load it dynamically.
            MyClass = import_key(key)
            self._check_cls(MyClass)
            return MyClass

        if key not in self._mapping:
            raise KeyError(f"Did not find a registered class for {key}")
        tp = self._mapping[key]
        if isinstance(tp, ModuleNotFoundError):
            raise ConfigurationError(
                f"{key} is disabled; try running: pip install '{__package_name__}[{key}]'"
            ) from tp
        elif isinstance(tp, Exception):
            raise ConfigurationError(
                f"{key} is disabled due to an error in initialization") from tp
        else:
            # If it's not an exception, then it's a registered type.
            return tp

コード例 #28

0

ファイルを表示

    def from_looker_connection(
        cls, looker_connection: DBConnection
    ) -> "LookerConnectionDefinition":
        """Dialect definitions are here: https://docs.looker.com/setup-and-management/database-config"""
        extractors: Dict[str, Any] = {
            "^bigquery": _get_bigquery_definition,
            ".*": _get_generic_definition,
        }

        if looker_connection.dialect_name is not None:
            for extractor_pattern, extracting_function in extractors.items():
                if re.match(extractor_pattern, looker_connection.dialect_name):
                    (platform, db, schema) = extracting_function(looker_connection)
                    return cls(platform=platform, default_db=db, default_schema=schema)
            raise ConfigurationError(
                f"Could not find an appropriate platform for looker_connection: {looker_connection.name} with dialect: {looker_connection.dialect_name}"
            )
        else:
            raise ConfigurationError(
                f"Unable to fetch a fully filled out connection for {looker_connection.name}. Please check your API permissions."
            )

コード例 #29

0

ファイルを表示

ファイル: config_loader.py プロジェクト: swaroopjagadish/datahub

def load_config_file(config_file: Union[pathlib.Path, str]) -> dict:
    if isinstance(config_file, str):
        config_file = pathlib.Path(config_file)
    if not config_file.is_file():
        raise ConfigurationError(f"Cannot open config file {config_file}")

    config_mech: ConfigurationMechanism
    if config_file.suffix in [".yaml", ".yml"]:
        config_mech = YamlConfigurationMechanism()
    elif config_file.suffix == ".toml":
        config_mech = TomlConfigurationMechanism()
    else:
        raise ConfigurationError(
            "Only .toml and .yml are supported. Cannot process file type {}".
            format(config_file.suffix))

    with config_file.open() as raw_config_fp:
        raw_config_file = raw_config_fp.read()
    config_fp = io.StringIO(raw_config_file)
    config = config_mech.load_config(config_fp)
    resolve_env_variables(config)
    return config

コード例 #30

0

ファイルを表示

    def test_connection(self) -> dict:
        response = self._session.get(f"{self._gms_server}/config")
        if response.status_code == 200:
            config: dict = response.json()
            if config.get("noCode") == "true":
                return config

            else:
                # Looks like we either connected to an old GMS or to some other service. Let's see if we can determine which before raising an error
                # A common misconfiguration is connecting to datahub-frontend so we special-case this check
                if (config.get("config", {}).get("application")
                        == "datahub-frontend" or config.get(
                            "config", {}).get("shouldShowDatasetLineage")
                        is not None):
                    message = "You seem to have connected to the frontend instead of the GMS endpoint. The rest emitter should connect to DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms)"
                else:
                    message = "You have either connected to a pre-v0.8.0 DataHub GMS instance, or to a different server altogether! Please check your configuration and make sure you are talking to the DataHub GMS endpoint."
                raise ConfigurationError(message)
        else:
            auth_message = "Maybe you need to set up authentication? "
            message = f"Unable to connect to {self._gms_server}/config with status_code: {response.status_code}. {auth_message if response.status_code == 401 else ''}Please check your configuration and make sure you are talking to the DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms)."
            raise ConfigurationError(message)