def _test_metadata(self, metadata: metadata_base.DataMetadata, names: typing.Sequence[str]) -> None: self.assertEqual(metadata.query(())['dimension']['length'], 4) self.assertEqual( metadata.query( (metadata_base.ALL_ELEMENTS, ))['dimension']['length'], 5) self.assertEqual(names[0], metadata.query_column(3)['name']) self.assertEqual(self._target_semantic_types, set(metadata.query_column(3)['semantic_types'])) self.assertEqual(names[1], metadata.query_column(4)['name']) self.assertEqual(self._target_semantic_types, set(metadata.query_column(4)['semantic_types'])) return None
def _get_target_columns_metadata( cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: """ Output metadata of selected columns. Args: outputs_metadata: metadata_base.DataMetadata hyperparams: d3m.metadata.hyperparams.Hyperparams Returns: d3m.metadata.base.DataMetadata """ outputs_length = outputs_metadata.query( (metadata_base.ALL_ELEMENTS, ))['dimension']['length'] target_columns_metadata: List[OrderedDict] = [] for column_index in range(outputs_length): column_metadata = OrderedDict( outputs_metadata.query_column(column_index)) # Update semantic types and prepare it for predicted targets. semantic_types = set(column_metadata.get('semantic_types', [])) semantic_types_to_remove = set([]) add_semantic_types = [] add_semantic_types.add(hyperparams["return_semantic_type"]) semantic_types = semantic_types - semantic_types_to_remove semantic_types = semantic_types.union(add_semantic_types) column_metadata['semantic_types'] = list(semantic_types) target_columns_metadata.append(column_metadata) return target_columns_metadata
def _get_target_columns_metadata( self, outputs_metadata: metadata_base.DataMetadata) -> List[OrderedDict]: outputs_length = outputs_metadata.query( (metadata_base.ALL_ELEMENTS, ))['dimension']['length'] target_columns_metadata: List[OrderedDict] = [] for column_index in range(outputs_length): column_metadata = OrderedDict( outputs_metadata.query_column(column_index)) # Update semantic types and prepare it for predicted targets. semantic_types = list(column_metadata.get('semantic_types', [])) if 'https://metadata.datadrivendiscovery.org/types/PredictedTarget' not in semantic_types: semantic_types.append( 'https://metadata.datadrivendiscovery.org/types/PredictedTarget' ) semantic_types = [ semantic_type for semantic_type in semantic_types if semantic_type != 'https://metadata.datadrivendiscovery.org/types/TrueTarget' ] column_metadata['semantic_types'] = semantic_types target_columns_metadata.append(column_metadata) return target_columns_metadata
def _get_target_columns_metadata( cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: outputs_length = outputs_metadata.query( (metadata_base.ALL_ELEMENTS, ))['dimension']['length'] target_columns_metadata: List[OrderedDict] = [] for column_index in range(outputs_length): column_metadata = OrderedDict( outputs_metadata.query_column(column_index)) # Update semantic types and prepare it for predicted targets. semantic_types = set(column_metadata.get('semantic_types', [])) semantic_types_to_remove = set([ "https://metadata.datadrivendiscovery.org/types/TrueTarget", "https://metadata.datadrivendiscovery.org/types/SuggestedTarget", ]) add_semantic_types = set([ "https://metadata.datadrivendiscovery.org/types/PredictedTarget", ]) add_semantic_types.add(hyperparams["return_semantic_type"]) semantic_types = semantic_types - semantic_types_to_remove semantic_types = semantic_types.union(add_semantic_types) column_metadata['semantic_types'] = list(semantic_types) target_columns_metadata.append(column_metadata) return target_columns_metadata
def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], outputs_metadata: metadata_base.DataMetadata, hyperparams): outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] target_columns_metadata: List[OrderedDict] = [] for column_index in input_indices: column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") if column_name is None: column_name = "output_{}".format(column_index) column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) semantic_types = set(column_metadata.get('semantic_types', [])) semantic_types_to_remove = set([]) add_semantic_types = set() add_semantic_types.add(hyperparams["return_semantic_type"]) semantic_types = semantic_types - semantic_types_to_remove semantic_types = semantic_types.union(add_semantic_types) column_metadata['semantic_types'] = list(semantic_types) column_metadata["name"] = str(column_name) target_columns_metadata.append(column_metadata) # If outputs has more columns than index, add Attribute Type to all remaining if outputs_length > len(input_indices): for column_index in range(len(input_indices), outputs_length): column_metadata = OrderedDict() semantic_types = set() semantic_types.add(hyperparams["return_semantic_type"]) column_name = "output_{}".format(column_index) column_metadata["semantic_types"] = list(semantic_types) column_metadata["name"] = str(column_name) target_columns_metadata.append(column_metadata) return target_columns_metadata
def _can_use_column( self, inputs_metadata: metadata_base.DataMetadata, column_index: int ) -> bool: """ originally from from d3m.primitives.schema_discovery.profiler.Common """ column_metadata = inputs_metadata.query_column(column_index) semantic_types = column_metadata.get("semantic_types", []) # We detect only on columns which have no semantic types or where it is explicitly set as unknown. if ( not semantic_types or "https://metadata.datadrivendiscovery.org/types/UnknownType" in semantic_types ): return True # A special case to handle setting "https://metadata.datadrivendiscovery.org/types/TrueTarget". if ( "https://metadata.datadrivendiscovery.org/types/SuggestedTarget" in semantic_types ): return True return False
def _can_use_column(self, inputs_metadata: metadata_base.DataMetadata, column_index: int) -> bool: column_metadata = inputs_metadata.query_column(column_index) semantic_types = column_metadata.get('semantic_types', []) # We detect only on columns which have no semantic types or # where it is explicitly set as unknown. if not semantic_types or 'https://metadata.datadrivendiscovery.org/types/UnknownType' in semantic_types: return True # A special case to handle setting "https://metadata.datadrivendiscovery.org/types/TrueTarget". if 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget' in semantic_types: return True return False
def _copy_columns_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_indices, hyperparams) -> List[OrderedDict]: outputs_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] target_columns_metadata: List[OrderedDict] = [] for column_index in column_indices: column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) semantic_types = set(column_metadata.get('semantic_types', [])) semantic_types_to_remove = set([]) add_semantic_types = set() add_semantic_types.add(hyperparams["return_semantic_type"]) semantic_types = semantic_types - semantic_types_to_remove semantic_types = semantic_types.union(add_semantic_types) column_metadata['semantic_types'] = list(semantic_types) column_metadata["name"] = str(column_name) target_columns_metadata.append(column_metadata) return target_columns_metadata