예제 #1
0
 def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:  # inputs: m x n numpy array
     if self._fitted:
         output = inputs.iloc[:, self._index]
         output.metadata = utils.select_columns_metadata(inputs.metadata, columns=self._index)
         return CallResult(output)
     else:
         raise ValueError('Model should be fitted first.')
예제 #2
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> base.CallResult[Outputs]:
        columns_to_use = self._get_columns(inputs.metadata, self.hyperparams)
        _logger.debug(f'converting columns: {columns_to_use}')
        _logger.debug(
            f'converting columns: {inputs.iloc[:, columns_to_use].columns}')
        output = inputs.copy()
        for col in columns_to_use:
            output.iloc[:, col] = pd.to_numeric(output.iloc[:, col])
            column_metadata = output.metadata.query(
                (metadata_base.ALL_ELEMENTS, col))
            semantic_type = column_metadata.get('semantic_types', None)
            if 'http://schema.org/Integer' in semantic_type:
                output.metadata = output.metadata.update(
                    (metadata_base.ALL_ELEMENTS, col),
                    {'structural_type': int})
            elif 'http://schema.org/Float' in semantic_type:
                output.metadata = output.metadata.update(
                    (metadata_base.ALL_ELEMENTS, col),
                    {'structural_type': float})
            # What to do with missing values?
            # has_missing_value = pd.isnull(output.iloc[:, col]).sum() > 0
        if self.hyperparams['drop_non_numeric_columns']:
            _logger.debug(
                f'dropping columns: {list(np.where(output.dtypes == object)[0])}'
            )
            _logger.debug(
                f'dropping columns: {output.iloc[:, list(np.where(output.dtypes == object)[0])].columns}'
            )
            # np.where returns int64 instead of int, D3M metadata checks for int
            numeric_colum_indices = [
                int(x) for x in np.where(output.dtypes != object)[0]
            ]
            output = output.iloc[:, numeric_colum_indices]
            output.metadata = utils.select_columns_metadata(
                output.metadata, numeric_colum_indices)

        return base.CallResult(output)
예제 #3
0
 def _get_new_df(inputs: container.DataFrame, use_cols: list):
     metadata = common_utils.select_columns_metadata(
         inputs_metadata=inputs.metadata, columns=use_cols)
     new_df = inputs.iloc[:, use_cols]
     new_df.metadata = metadata
     return new_df