def _update_predictions_metadata(
        cls, inputs_metadata: metadata_base.DataMetadata,
        outputs: Optional[Outputs], target_columns_metadata: List[OrderedDict]
    ) -> metadata_base.DataMetadata:
        outputs_metadata = inputs_metadata.clear(for_value=outputs,
                                                 generate_metadata=True)

        for column_index, column_metadata in enumerate(
                target_columns_metadata):
            outputs_metadata = outputs_metadata.update_column(
                column_index, column_metadata)

        return outputs_metadata
Example #2
0
    def _update_metadata(
            cls,
            metadata: metadata_base.DataMetadata,
            resource_id: str,
            target: str,
            features: list,
            update_target: bool = True,
            for_value: DataFrame = None,
            source: typing.Any = None) -> metadata_base.DataMetadata:

        if source is None:
            source = cls

        resource_metadata = dict(metadata.query((resource_id, )))

        resource_metadata.update(
            {
                'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
                'structural_type': DataFrame,
                'target_column': target,
            }, )

        new_metadata = metadata.clear(resource_metadata,
                                      for_value=for_value,
                                      source=source)

        new_metadata = cls._copy_elements_metadata(metadata, (resource_id, ),
                                                   (),
                                                   new_metadata,
                                                   source=source)

        old_resource_metadata = metadata.query((resource_id, ALL_ELEMENTS))
        resource_metadata = dict(old_resource_metadata)
        # TODO: what if we don't include target? len(features) + 1
        resource_metadata['dimension'] = {'length': len(features) + 1}
        if update_target:
            resource_metadata['dimension'] = {'length': len(features) + 2}

        resource_metadata['ft_features'] = cloudpickle.dumps(features)
        new_metadata = new_metadata.update((ALL_ELEMENTS, ), resource_metadata)

        for i, f in enumerate(features):
            resource_metadata = {
                'semantic_types': [
                    D3MMetadataTypes.to_d3m(f.variable_type),
                    D3MMetadataTypes.Attribute
                ],
                'structural_type':
                D3MMetadataTypes.to_default_structural_type(f.variable_type),
                'name':
                f.get_name()
            }
            if for_value is not None:
                structural_type = type(for_value[f.get_name()].iloc[0])
                resource_metadata['structural_type'] = structural_type

            new_metadata = new_metadata.update((ALL_ELEMENTS, i),
                                               resource_metadata)

        # update index
        resource_metadata = {
            'semantic_types': [D3MMetadataTypes.PrimaryKey],
            'name': 'd3mIndex'
        }

        if for_value is not None:
            structural_type = type(for_value['d3mIndex'].iloc[0])
            resource_metadata['structural_type'] = structural_type

        new_metadata = new_metadata.update((ALL_ELEMENTS, len(features)),
                                           resource_metadata)
        if update_target:
            resource_metadata = {
                'semantic_types': [
                    D3MMetadataTypes.TrueTarget,
                    D3MMetadataTypes.SuggestedTarget
                ],
                'name':
                target
            }

            if for_value is not None:
                structural_type = type(for_value[target].iloc[0])
                resource_metadata['structural_type'] = structural_type

            new_metadata = new_metadata.update(
                (ALL_ELEMENTS, len(features) + 1), resource_metadata)

        return new_metadata