def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: # inputs: m x n numpy array if self._fitted: output = inputs.iloc[:, self._index] output.metadata = utils.select_columns_metadata(inputs.metadata, columns=self._index) return CallResult(output) else: raise ValueError('Model should be fitted first.')
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: columns_to_use = self._get_columns(inputs.metadata, self.hyperparams) _logger.debug(f'converting columns: {columns_to_use}') _logger.debug( f'converting columns: {inputs.iloc[:, columns_to_use].columns}') output = inputs.copy() for col in columns_to_use: output.iloc[:, col] = pd.to_numeric(output.iloc[:, col]) column_metadata = output.metadata.query( (metadata_base.ALL_ELEMENTS, col)) semantic_type = column_metadata.get('semantic_types', None) if 'http://schema.org/Integer' in semantic_type: output.metadata = output.metadata.update( (metadata_base.ALL_ELEMENTS, col), {'structural_type': int}) elif 'http://schema.org/Float' in semantic_type: output.metadata = output.metadata.update( (metadata_base.ALL_ELEMENTS, col), {'structural_type': float}) # What to do with missing values? # has_missing_value = pd.isnull(output.iloc[:, col]).sum() > 0 if self.hyperparams['drop_non_numeric_columns']: _logger.debug( f'dropping columns: {list(np.where(output.dtypes == object)[0])}' ) _logger.debug( f'dropping columns: {output.iloc[:, list(np.where(output.dtypes == object)[0])].columns}' ) # np.where returns int64 instead of int, D3M metadata checks for int numeric_colum_indices = [ int(x) for x in np.where(output.dtypes != object)[0] ] output = output.iloc[:, numeric_colum_indices] output.metadata = utils.select_columns_metadata( output.metadata, numeric_colum_indices) return base.CallResult(output)
def _get_new_df(inputs: container.DataFrame, use_cols: list): metadata = common_utils.select_columns_metadata( inputs_metadata=inputs.metadata, columns=use_cols) new_df = inputs.iloc[:, use_cols] new_df.metadata = metadata return new_df