Exemple #1
0
    def _test_metadata(self, metadata: metadata_base.DataMetadata,
                       names: typing.Sequence[str]) -> None:
        self.assertEqual(metadata.query(())['dimension']['length'], 4)
        self.assertEqual(
            metadata.query(
                (metadata_base.ALL_ELEMENTS, ))['dimension']['length'], 5)

        self.assertEqual(names[0], metadata.query_column(3)['name'])
        self.assertEqual(self._target_semantic_types,
                         set(metadata.query_column(3)['semantic_types']))

        self.assertEqual(names[1], metadata.query_column(4)['name'])
        self.assertEqual(self._target_semantic_types,
                         set(metadata.query_column(4)['semantic_types']))
        return None
Exemple #2
0
    def _get_target_columns_metadata(
            cls, outputs_metadata: metadata_base.DataMetadata,
            hyperparams) -> List[OrderedDict]:
        """
        Output metadata of selected columns.
        Args:
            outputs_metadata: metadata_base.DataMetadata
            hyperparams: d3m.metadata.hyperparams.Hyperparams

        Returns:
            d3m.metadata.base.DataMetadata
        """
        outputs_length = outputs_metadata.query(
            (metadata_base.ALL_ELEMENTS, ))['dimension']['length']

        target_columns_metadata: List[OrderedDict] = []
        for column_index in range(outputs_length):
            column_metadata = OrderedDict(
                outputs_metadata.query_column(column_index))

            # Update semantic types and prepare it for predicted targets.
            semantic_types = set(column_metadata.get('semantic_types', []))
            semantic_types_to_remove = set([])
            add_semantic_types = []
            add_semantic_types.add(hyperparams["return_semantic_type"])
            semantic_types = semantic_types - semantic_types_to_remove
            semantic_types = semantic_types.union(add_semantic_types)
            column_metadata['semantic_types'] = list(semantic_types)

            target_columns_metadata.append(column_metadata)

        return target_columns_metadata
Exemple #3
0
    def _get_target_columns_metadata(
            self,
            outputs_metadata: metadata_base.DataMetadata) -> List[OrderedDict]:
        outputs_length = outputs_metadata.query(
            (metadata_base.ALL_ELEMENTS, ))['dimension']['length']

        target_columns_metadata: List[OrderedDict] = []
        for column_index in range(outputs_length):
            column_metadata = OrderedDict(
                outputs_metadata.query_column(column_index))

            # Update semantic types and prepare it for predicted targets.
            semantic_types = list(column_metadata.get('semantic_types', []))
            if 'https://metadata.datadrivendiscovery.org/types/PredictedTarget' not in semantic_types:
                semantic_types.append(
                    'https://metadata.datadrivendiscovery.org/types/PredictedTarget'
                )
            semantic_types = [
                semantic_type for semantic_type in semantic_types
                if semantic_type !=
                'https://metadata.datadrivendiscovery.org/types/TrueTarget'
            ]
            column_metadata['semantic_types'] = semantic_types

            target_columns_metadata.append(column_metadata)

        return target_columns_metadata
    def _get_target_columns_metadata(
            cls, outputs_metadata: metadata_base.DataMetadata,
            hyperparams) -> List[OrderedDict]:
        outputs_length = outputs_metadata.query(
            (metadata_base.ALL_ELEMENTS, ))['dimension']['length']

        target_columns_metadata: List[OrderedDict] = []
        for column_index in range(outputs_length):
            column_metadata = OrderedDict(
                outputs_metadata.query_column(column_index))

            # Update semantic types and prepare it for predicted targets.
            semantic_types = set(column_metadata.get('semantic_types', []))
            semantic_types_to_remove = set([
                "https://metadata.datadrivendiscovery.org/types/TrueTarget",
                "https://metadata.datadrivendiscovery.org/types/SuggestedTarget",
            ])
            add_semantic_types = set([
                "https://metadata.datadrivendiscovery.org/types/PredictedTarget",
            ])
            add_semantic_types.add(hyperparams["return_semantic_type"])
            semantic_types = semantic_types - semantic_types_to_remove
            semantic_types = semantic_types.union(add_semantic_types)
            column_metadata['semantic_types'] = list(semantic_types)

            target_columns_metadata.append(column_metadata)

        return target_columns_metadata
Exemple #5
0
    def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int],
                                        outputs_metadata: metadata_base.DataMetadata, hyperparams):
        outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']
        target_columns_metadata: List[OrderedDict] = []
        for column_index in input_indices:
            column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name")
            if column_name is None:
                column_name = "output_{}".format(column_index)

            column_metadata = OrderedDict(inputs_metadata.query_column(column_index))
            semantic_types = set(column_metadata.get('semantic_types', []))
            semantic_types_to_remove = set([])
            add_semantic_types = set()
            add_semantic_types.add(hyperparams["return_semantic_type"])
            semantic_types = semantic_types - semantic_types_to_remove
            semantic_types = semantic_types.union(add_semantic_types)
            column_metadata['semantic_types'] = list(semantic_types)

            column_metadata["name"] = str(column_name)
            target_columns_metadata.append(column_metadata)

        #  If outputs has more columns than index, add Attribute Type to all remaining
        if outputs_length > len(input_indices):
            for column_index in range(len(input_indices), outputs_length):
                column_metadata = OrderedDict()
                semantic_types = set()
                semantic_types.add(hyperparams["return_semantic_type"])
                column_name = "output_{}".format(column_index)
                column_metadata["semantic_types"] = list(semantic_types)
                column_metadata["name"] = str(column_name)
                target_columns_metadata.append(column_metadata)

        return target_columns_metadata
Exemple #6
0
    def _can_use_column(
        self, inputs_metadata: metadata_base.DataMetadata, column_index: int
    ) -> bool:
        """ originally from from d3m.primitives.schema_discovery.profiler.Common """

        column_metadata = inputs_metadata.query_column(column_index)

        semantic_types = column_metadata.get("semantic_types", [])

        # We detect only on columns which have no semantic types or where it is explicitly set as unknown.
        if (
            not semantic_types
            or "https://metadata.datadrivendiscovery.org/types/UnknownType"
            in semantic_types
        ):
            return True

        # A special case to handle setting "https://metadata.datadrivendiscovery.org/types/TrueTarget".
        if (
            "https://metadata.datadrivendiscovery.org/types/SuggestedTarget"
            in semantic_types
        ):
            return True

        return False
Exemple #7
0
    def _can_use_column(self, inputs_metadata: metadata_base.DataMetadata,
                        column_index: int) -> bool:
        column_metadata = inputs_metadata.query_column(column_index)

        semantic_types = column_metadata.get('semantic_types', [])

        # We detect only on columns which have no semantic types or
        # where it is explicitly set as unknown.
        if not semantic_types or 'https://metadata.datadrivendiscovery.org/types/UnknownType' in semantic_types:
            return True

        # A special case to handle setting "https://metadata.datadrivendiscovery.org/types/TrueTarget".
        if 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget' in semantic_types:
            return True

        return False
Exemple #8
0
    def _copy_columns_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_indices, hyperparams) -> List[OrderedDict]:
        outputs_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']

        target_columns_metadata: List[OrderedDict] = []
        for column_index in column_indices:
            column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name")
            column_metadata = OrderedDict(inputs_metadata.query_column(column_index))
            semantic_types = set(column_metadata.get('semantic_types', []))
            semantic_types_to_remove = set([])
            add_semantic_types = set()
            add_semantic_types.add(hyperparams["return_semantic_type"])
            semantic_types = semantic_types - semantic_types_to_remove
            semantic_types = semantic_types.union(add_semantic_types)
            column_metadata['semantic_types'] = list(semantic_types)

            column_metadata["name"] = str(column_name)
            target_columns_metadata.append(column_metadata)

        return target_columns_metadata