Esempio n. 1
0
class OrdinalEncoderImpl():
    def __init__(self, categories='auto', dtype=None):
        self._hyperparams = {'categories': categories, 'dtype': dtype}

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def transform(self, X):
        return self._sklearn_model.transform(X)
Esempio n. 2
0
class NanOrdinalEncoder(_BaseEncoder):
    def __init__(self):
        self.lbe = OrdinalEncoder()

    def fit(self, X):
        self.lbe.fit(X)
        return self

    def transform(self, X):
        result = self.lbe.transform(X)
        for col in range(result.shape[1]):
            nan_idx = list(self.lbe.categories_[col]).index(nan_replace)
            column = result[:, col]
            column[column == nan_idx] = np.nan
        return result
Esempio n. 3
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
Esempio n. 4
0
def calculate_metafeatures(dataset, dataset_id=None, data_dir='./', task_type=None):
    if isinstance(dataset, str):
        X, y, feature_types = load_data(dataset, data_dir, datanode_returned=False, preprocess=False, task_type=task_type)
        dataset_id = dataset
    elif isinstance(dataset, DataNode):
        X, y, feature_types = dataset.data[0], dataset.data[1], dataset.feature_types
        import pandas as pd
        X = pd.DataFrame(data=X)
    else:
        raise ValueError('Invalid dataset input!')

    categorical_idx = [i for i, feature_type in enumerate(feature_types) if feature_type == 'categorical']

    nan_val = np.array(X.isnull()).astype('int')
    nan_avg = np.average(nan_val, axis=0)
    nan_idx = [idx for idx in range(len(nan_avg)) if nan_avg[idx] != 0]
    nan_categorical_idx = list(set(nan_idx).intersection(categorical_idx))

    for col in X.columns[nan_categorical_idx]:
        X[col].fillna(nan_replace, inplace=True)
    X = np.array(X)
    y = np.array(y)
    normal_categorical_idx = list(set(categorical_idx) - set(nan_categorical_idx))
    lbe = ColumnTransformer([('lbe', OrdinalEncoder(), normal_categorical_idx),
                             ('nan_lbe', NanOrdinalEncoder(), nan_categorical_idx)],
                            remainder="passthrough")
    X = lbe.fit_transform(X).astype('float64')
    categorical_ = [True] * len(categorical_idx)
    categorical_false = [False] * (len(feature_types) - len(categorical_idx))
    categorical_.extend(categorical_false)
    mf = calculate_all_metafeatures(X=X, y=y,
                                    categorical=categorical_,
                                    dataset_name=dataset_id,
                                    task_type=task_type)
    return mf.load_values()
Esempio n. 5
0
 def __init__(self):
     self.lbe = OrdinalEncoder()
Esempio n. 6
0
 def __init__(self, categories='auto', dtype=None):
     self._hyperparams = {'categories': categories, 'dtype': dtype}
     self._wrapped_model = SKLModel(**self._hyperparams)