def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): if not hyperparams['use_semantic_types']: return data, list(data.columns), [] metadata = data.metadata def can_produce_column(column_index: int) -> bool: accepted_semantic_types = set() accepted_semantic_types.add( "https://metadata.datadrivendiscovery.org/types/TrueTarget") column_metadata = metadata.query( (metadata_base.ALL_ELEMENTS, column_index)) semantic_types = set(column_metadata.get('semantic_types', [])) if len(semantic_types) == 0: cls.logger.warning( "No semantic types found in column metadata") return False # Making sure all accepted_semantic_types are available in semantic_types if len(accepted_semantic_types - semantic_types) == 0: return True return False target_column_indices, target_columns_not_to_produce = common_utils.get_columns_to_use( metadata, use_columns=hyperparams['use_output_columns'], exclude_columns=hyperparams['exclude_output_columns'], can_use_column=can_produce_column) targets = common_utils.select_columns(data, target_column_indices) target_column_names = [] for idx in target_column_indices: target_column_names.append(data.columns[idx]) return targets, target_column_names, target_column_indices
def _get_columns( cls, inputs_metadata: metadata_base.DataMetadata, hyperparams: hyperparams.Hyperparams) -> typing.Sequence[int]: def can_use_column(column_index: int) -> bool: return cls._can_use_column(inputs_metadata, column_index) columns_to_use, columns_not_to_use = utils.get_columns_to_use( inputs_metadata, hyperparams['use_columns'], hyperparams['exclude_columns'], can_use_column) return columns_to_use
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: LablerHyperparams): if not hyperparams['use_semantic_types']: return inputs, list(range(len(inputs.columns))) inputs_metadata = inputs.metadata def can_produce_column(column_index: int) -> bool: return cls._can_produce_column(inputs_metadata, column_index, hyperparams) columns_to_produce, columns_not_to_produce = common_utils.get_columns_to_use( inputs_metadata, use_columns=hyperparams['use_columns'], exclude_columns=hyperparams['exclude_columns'], can_use_column=can_produce_column) return inputs.iloc[:, columns_to_produce], columns_to_produce