Example #1
0
    def profiling_column(self, variable_metadata: VariableMetadata,
                         column: pd.Series) -> VariableMetadata:
        """Profiling single column for necessary fields of metadata, if data is present .

        Args:
            variable_metadata: the original VariableMetadata instance.
            column: the column to profile.

        Returns:
            profiled VariableMetadata instance
        """

        if not variable_metadata.name:
            variable_metadata.name = column.name

        if not variable_metadata.description:
            variable_metadata.description = self.profiler.construct_variable_description(
                column)

        if variable_metadata.named_entity is None:
            variable_metadata.named_entity = self.profiler.profile_named_entity(
                column)

        if variable_metadata.temporal_coverage:
            if variable_metadata.temporal_coverage[
                    'start'] or not variable_metadata.temporal_coverage['end']:
                variable_metadata.temporal_coverage = self.profiler.profile_temporal_coverage(
                    variable_metadata.temporal_coverage, column)

        return variable_metadata
Example #2
0
 def setUp(self):
     self.variable_1 = copy.deepcopy(sample_variable_1)
     self.variable_2 = copy.deepcopy(sample_variable_2)
     self.metadata_1 = VariableMetadata(description=self.variable_1,
                                        datamart_id=0)
     self.metadata_2 = VariableMetadata(description=self.variable_2,
                                        datamart_id=10)
Example #3
0
    def construct_variable_metadata(
            self,
            description: dict,
            global_datamart_id: int,
            col_offset: int,
            data: pd.DataFrame = None) -> VariableMetadata:
        """Construct variable metadata.

        Args:
            description: description dict.
            global_datamart_id: integer of datamart id.
            col_offset: integer, the column index.
            data: dataframe of data.

        Returns:
            VariableMetadata instance
        """

        variable_metadata = VariableMetadata.construct_variable(
            description, datamart_id=col_offset + global_datamart_id + 1)

        if data is not None:
            variable_metadata = self._profiling_column(
                description, variable_metadata, data.iloc[:, col_offset])

        return variable_metadata
 def test_add_variable(self):
     self.assertEqual(len(self.metadata.variables), 0)
     for col_offset, variable_description in enumerate(
             self.global_metadata_description["variables"]):
         variable_metadata = VariableMetadata(variable_description,
                                              datamart_id=col_offset + 1)
         self.metadata.add_variable_metadata(variable_metadata)
     self.assertEqual(len(self.metadata.variables),
                      len(sample_global_metadata_description["variables"]))
     self.assertEqual(self.metadata.value, gt["metadata"])
Example #5
0
 def test_add_variable(self):
     print("[Test]{}/test_add_variable".format(self.__class__.__name__))
     self.assertEqual(len(self.metadata.variables), 0)
     for col_offset, variable_description in enumerate(
             self.global_metadata_description["variables"]):
         variable_metadata = VariableMetadata(variable_description,
                                              datamart_id=col_offset + 1)
         self.metadata.add_variable_metadata(variable_metadata)
     self.assertEqual(len(self.metadata.variables),
                      len(sample_global_metadata_description["variables"]))
     self.assertEqual(self.metadata.value, gt["metadata"])
     print(colored('.Done', 'red'))
Example #6
0
    def basic_profiling_column(cls, description: dict,
                               variable_metadata: VariableMetadata,
                               column: pd.Series) -> VariableMetadata:
        """Profiling single column for necessary fields of metadata, if data is present .

        Args:
            description: description dict about the column.
            variable_metadata: the original VariableMetadata instance.
            column: the column to profile.

        Returns:
            profiled VariableMetadata instance
        """

        if not variable_metadata.name:
            variable_metadata.name = str(column.name)

        if not variable_metadata.description:
            variable_metadata.description = cls.construct_variable_description(
                column)

        if variable_metadata.named_entity is None:
            variable_metadata.named_entity = cls.profile_named_entity(column)

        elif variable_metadata.named_entity is False and not description:
            if cls.named_entity_column_recognize(column):
                variable_metadata.named_entity = cls.profile_named_entity(
                    column)

        if variable_metadata.temporal_coverage is not False:
            if not variable_metadata.temporal_coverage[
                    'start'] or not variable_metadata.temporal_coverage['end']:
                variable_metadata.temporal_coverage = cls.profile_temporal_coverage(
                    column=column,
                    coverage=variable_metadata.temporal_coverage)
        elif not description:
            temporal_coverage = cls.profile_temporal_coverage(column=column)
            if temporal_coverage:
                variable_metadata.temporal_coverage = temporal_coverage

        if not variable_metadata.semantic_type:
            variable_metadata.semantic_type = cls.profile_semantic_type(column)

        return variable_metadata