Beispiel #1
0
    def _get_domains(
        self,
        variables: Optional[ParameterContainer] = None,
    ) -> List[Domain]:
        domains: List[Domain] = [Domain(domain_type=MetricDomainTypes.TABLE, )]

        return domains
Beispiel #2
0
    def _get_domains(
        self,
        variables: Optional[ParameterContainer] = None,
    ) -> List[Domain]:
        """
        Obtains and returns domains for all columns of a table.
        """
        batch_id: str = self.get_batch_id(variables=variables)
        table_column_names: List[str] = self.get_validator(
            variables=variables).get_metric(metric=MetricConfiguration(
                metric_name="table.columns",
                metric_domain_kwargs={
                    "batch_id": batch_id,
                },
                metric_value_kwargs=None,
                metric_dependencies=None,
            ))

        column_name: str
        domains: List[Domain] = [
            Domain(
                domain_type=MetricDomainTypes.COLUMN,
                domain_kwargs={
                    "column": column_name,
                },
            ) for column_name in table_column_names
        ]

        return domains
Beispiel #3
0
def column_Description_domain():
    return Domain(
        domain_type=MetricDomainTypes.COLUMN,
        domain_kwargs={
            "column": "Description",
            "batch_id": "c260e179bb1bc81d84bba72a8110d8e2",
        },
        details=None,
    )
Beispiel #4
0
    def _get_domains(
        self,
        variables: Optional[ParameterContainer] = None,
    ) -> List[Domain]:
        """
        Find the column suffix for each column and return all domains matching the specified suffix.
        """
        column_name_suffixes: Union[str, Iterable,
                                    List[str]] = self._column_name_suffixes
        if isinstance(column_name_suffixes, str):
            column_name_suffixes = [column_name_suffixes]
        else:
            if not isinstance(column_name_suffixes, (Iterable, List)):
                raise ValueError(
                    "Unrecognized column_name_suffixes directive -- must be a list or a string."
                )

        batch_id: str = self.get_batch_id(variables=variables)
        table_column_names: List[str] = self.get_validator(
            variables=variables).get_metric(metric=MetricConfiguration(
                metric_name="table.columns",
                metric_domain_kwargs={
                    "batch_id": batch_id,
                },
                metric_value_kwargs=None,
                metric_dependencies=None,
            ))

        candidate_column_names: List[str] = list(
            filter(
                lambda candidate_column_name: candidate_column_name.endswith(
                    tuple(column_name_suffixes)),
                table_column_names,
            ))

        column_name: str
        domains: List[Domain] = [
            Domain(
                domain_type=MetricDomainTypes.COLUMN,
                domain_kwargs={
                    "column": column_name,
                },
            ) for column_name in candidate_column_names
        ]

        return domains
Beispiel #5
0
    def _get_domains(
        self,
        variables: Optional[ParameterContainer] = None,
    ) -> List[Domain]:
        """
        Find the semantic column type for each column and return all domains matching the specified type or types.
        """
        batch_id: str = self.get_batch_id(variables=variables)
        table_column_names: List[str] = self.get_validator(
            variables=variables).get_metric(metric=MetricConfiguration(
                metric_name="table.columns",
                metric_domain_kwargs={
                    "batch_id": batch_id,
                },
                metric_value_kwargs=None,
                metric_dependencies=None,
            ))

        # First check the column name ends in "_id".
        candidate_column_names: List[str] = list(
            filter(
                lambda candidate_column_name: candidate_column_name.endswith(
                    tuple(self._column_name_suffixes)),
                table_column_names,
            ))

        column_name: str
        domains: List[Domain] = [
            Domain(
                domain_type=MetricDomainTypes.COLUMN,
                domain_kwargs={
                    "column": column_name,
                },
            ) for column_name in candidate_column_names
        ]

        return domains
Beispiel #6
0
def table_Users_domain():
    return Domain(
        domain_type=MetricDomainTypes.TABLE,
        domain_kwargs=None,
        details=None,
    )
Beispiel #7
0
    def _get_domains(
        self,
        variables: Optional[ParameterContainer] = None,
    ) -> List[Domain]:
        """
        Find the semantic column type for each column and return all domains matching the specified type or types.
        """
        semantic_types: List[
            SemanticDomainTypes] = _parse_semantic_domain_type_argument(
                semantic_types=self._semantic_types)

        batch_id: str = self.get_batch_id(variables=variables)
        column_types_dict_list: List[Dict[str, Any]] = self.get_validator(
            variables=variables).get_metric(metric=MetricConfiguration(
                metric_name="table.column_types",
                metric_domain_kwargs={
                    "batch_id": batch_id,
                },
                metric_value_kwargs={
                    "include_nested": True,
                },
                metric_dependencies=None,
            ))

        table_column_names: List[str] = self.get_validator(
            variables=variables).get_metric(metric=MetricConfiguration(
                metric_name="table.columns",
                metric_domain_kwargs={
                    "batch_id": batch_id,
                },
                metric_value_kwargs=None,
                metric_dependencies=None,
            ))

        column_name: str

        # A semantic type is distinguished from the structured column type;
        # An example structured column type would be "integer".  The inferred semantic type would be "id".
        table_column_name_to_inferred_semantic_domain_type_mapping: Dict[
            str, SemanticDomainTypes] = {
                column_name:
                self.infer_semantic_domain_type_from_table_column_type(
                    column_types_dict_list=column_types_dict_list,
                    column_name=column_name,
                ).semantic_domain_type
                for column_name in table_column_names
            }
        candidate_column_names: List[str] = list(
            filter(
                lambda candidate_column_name:
                table_column_name_to_inferred_semantic_domain_type_mapping[
                    candidate_column_name] in semantic_types,
                table_column_names,
            ))

        domains: List[Domain] = [
            Domain(
                domain_type=MetricDomainTypes.COLUMN,
                domain_kwargs={
                    "column": column_name,
                },
                details={
                    "inferred_semantic_domain_type":
                    table_column_name_to_inferred_semantic_domain_type_mapping[
                        column_name],
                },
            ) for column_name in candidate_column_names
        ]

        return domains