def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams):
        if not hyperparams['use_semantic_types']:
            return data, list(data.columns), []

        metadata = data.metadata

        def can_produce_column(column_index: int) -> bool:
            accepted_semantic_types = set()
            accepted_semantic_types.add(
                "https://metadata.datadrivendiscovery.org/types/TrueTarget")
            column_metadata = metadata.query(
                (metadata_base.ALL_ELEMENTS, column_index))
            semantic_types = set(column_metadata.get('semantic_types', []))
            if len(semantic_types) == 0:
                cls.logger.warning(
                    "No semantic types found in column metadata")
                return False
            # Making sure all accepted_semantic_types are available in semantic_types
            if len(accepted_semantic_types - semantic_types) == 0:
                return True
            return False

        target_column_indices, target_columns_not_to_produce = common_utils.get_columns_to_use(
            metadata,
            use_columns=hyperparams['use_output_columns'],
            exclude_columns=hyperparams['exclude_output_columns'],
            can_use_column=can_produce_column)
        targets = common_utils.select_columns(data, target_column_indices)
        target_column_names = []
        for idx in target_column_indices:
            target_column_names.append(data.columns[idx])
        return targets, target_column_names, target_column_indices
Example #2
0
    def _get_columns(
            cls, inputs_metadata: metadata_base.DataMetadata,
            hyperparams: hyperparams.Hyperparams) -> typing.Sequence[int]:
        def can_use_column(column_index: int) -> bool:
            return cls._can_use_column(inputs_metadata, column_index)

        columns_to_use, columns_not_to_use = utils.get_columns_to_use(
            inputs_metadata, hyperparams['use_columns'],
            hyperparams['exclude_columns'], can_use_column)
        return columns_to_use
Example #3
0
    def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: LablerHyperparams):
        if not hyperparams['use_semantic_types']:
            return inputs, list(range(len(inputs.columns)))

        inputs_metadata = inputs.metadata

        def can_produce_column(column_index: int) -> bool:
            return cls._can_produce_column(inputs_metadata, column_index, hyperparams)

        columns_to_produce, columns_not_to_produce = common_utils.get_columns_to_use(
            inputs_metadata, use_columns=hyperparams['use_columns'], exclude_columns=hyperparams['exclude_columns'],
            can_use_column=can_produce_column)
        return inputs.iloc[:, columns_to_produce], columns_to_produce