Exemple #1
0
    def profiling_column(self, variable_metadata: VariableMetadata,
                         column: pd.Series) -> VariableMetadata:
        """Profiling single column for necessary fields of metadata, if data is present .

        Args:
            variable_metadata: the original VariableMetadata instance.
            column: the column to profile.

        Returns:
            profiled VariableMetadata instance
        """

        if not variable_metadata.name:
            variable_metadata.name = column.name

        if not variable_metadata.description:
            variable_metadata.description = self.profiler.construct_variable_description(
                column)

        if variable_metadata.named_entity is None:
            variable_metadata.named_entity = self.profiler.profile_named_entity(
                column)

        if variable_metadata.temporal_coverage:
            if variable_metadata.temporal_coverage[
                    'start'] or not variable_metadata.temporal_coverage['end']:
                variable_metadata.temporal_coverage = self.profiler.profile_temporal_coverage(
                    variable_metadata.temporal_coverage, column)

        return variable_metadata
Exemple #2
0
    def basic_profiling_column(cls, description: dict,
                               variable_metadata: VariableMetadata,
                               column: pd.Series) -> VariableMetadata:
        """Profiling single column for necessary fields of metadata, if data is present .

        Args:
            description: description dict about the column.
            variable_metadata: the original VariableMetadata instance.
            column: the column to profile.

        Returns:
            profiled VariableMetadata instance
        """

        if not variable_metadata.name:
            variable_metadata.name = str(column.name)

        if not variable_metadata.description:
            variable_metadata.description = cls.construct_variable_description(
                column)

        if variable_metadata.named_entity is None:
            variable_metadata.named_entity = cls.profile_named_entity(column)

        elif variable_metadata.named_entity is False and not description:
            if cls.named_entity_column_recognize(column):
                variable_metadata.named_entity = cls.profile_named_entity(
                    column)

        if variable_metadata.temporal_coverage is not False:
            if not variable_metadata.temporal_coverage[
                    'start'] or not variable_metadata.temporal_coverage['end']:
                variable_metadata.temporal_coverage = cls.profile_temporal_coverage(
                    column=column,
                    coverage=variable_metadata.temporal_coverage)
        elif not description:
            temporal_coverage = cls.profile_temporal_coverage(column=column)
            if temporal_coverage:
                variable_metadata.temporal_coverage = temporal_coverage

        if not variable_metadata.semantic_type:
            variable_metadata.semantic_type = cls.profile_semantic_type(column)

        return variable_metadata