Ejemplo n.º 1
0
def get_all_estimators():

    ignored = (Creme2SKLBase, SKL2CremeBase, compose.FuncTransformer,
               compose.Pipeline, compose.Grouper,
               ensemble.StackingBinaryClassifier, facto.FFMClassifier,
               facto.FFMRegressor, facto.FMClassifier, facto.FMRegressor,
               facto.FwFMClassifier, facto.FwFMRegressor, facto.HOFMClassifier,
               facto.HOFMRegressor, feature_extraction.Agg,
               feature_extraction.TargetAgg,
               feature_selection.PoissonInclusion, impute.PreviousImputer,
               impute.StatImputer, linear_model.SoftmaxRegression,
               meta.PredClipper, meta.TransformedTargetRegressor,
               model_selection.SuccessiveHalvingClassifier,
               model_selection.SuccessiveHalvingRegressor,
               preprocessing.OneHotEncoder, reco.Baseline, reco.BiasedMF,
               reco.FunkMF, reco.RandomNormal, sampling.HardSamplingClassifier,
               sampling.HardSamplingRegressor, sampling.RandomOverSampler,
               sampling.RandomUnderSampler, sampling.RandomSampler,
               time_series.Detrender, time_series.GroupDetrender,
               time_series.SNARIMAX)

    try:
        ignored = (*ignored, compat.PyTorch2CremeRegressor)
    except AttributeError:
        pass

    def is_estimator(obj):
        return inspect.isclass(obj) and issubclass(obj, base.Estimator)

    for submodule in importlib.import_module('creme').__all__:

        if submodule == 'base':
            continue

        for _, obj in inspect.getmembers(
                importlib.import_module(f'creme.{submodule}'), is_estimator):

            if issubclass(obj, ignored):
                continue

            elif issubclass(obj, multioutput.RegressorChain):
                inst = obj(model=linear_model.LinearRegression())

            elif issubclass(obj, multioutput.ClassifierChain):
                inst = obj(model=linear_model.LogisticRegression())

            elif issubclass(obj, dummy.StatisticRegressor):
                inst = obj(statistic=stats.Mean())

            elif issubclass(obj, tree.RandomForestClassifier):
                inst = obj()

            elif issubclass(obj, ensemble.BaggingClassifier):
                inst = obj(linear_model.LogisticRegression())

            elif issubclass(obj, ensemble.BaggingRegressor):
                inst = obj(linear_model.LinearRegression())

            elif issubclass(obj, ensemble.AdaBoostClassifier):
                inst = obj(linear_model.LogisticRegression())

            elif issubclass(obj, ensemble.HedgeRegressor):
                inst = obj([
                    preprocessing.StandardScaler()
                    | linear_model.LinearRegression(intercept_lr=.1),
                    preprocessing.StandardScaler()
                    | linear_model.PARegressor(),
                ])

            elif issubclass(obj, feature_selection.SelectKBest):
                inst = obj(similarity=stats.PearsonCorr())

            elif issubclass(obj, linear_model.LinearRegression):
                inst = preprocessing.StandardScaler() | obj(intercept_lr=.1)

            elif issubclass(obj, linear_model.PARegressor):
                inst = preprocessing.StandardScaler() | obj()

            elif issubclass(obj, multiclass.OneVsRestClassifier):
                inst = obj(classifier=linear_model.LogisticRegression())

            elif issubclass(obj, multiclass.OneVsOneClassifier):
                inst = obj(classifier=linear_model.LogisticRegression())

            elif issubclass(obj, multiclass.OutputCodeClassifier):
                inst = obj(classifier=linear_model.LogisticRegression(),
                           code_size=10)

            else:
                inst = obj()

            yield inst
Ejemplo n.º 2
0
    n = stat.window_size
    X = [random.random() for _ in range(30)]

    for i, x in enumerate(X):
        stat.update(x)
        if i >= 1:
            assert math.isclose(stat.get(),
                                func(tail(X[:i + 1], n)),
                                abs_tol=1e-10)


@pytest.mark.parametrize(
    'stat, func',
    [(stats.Cov(), lambda x, y: np.cov(x, y)[0, 1]),
     (stats.PearsonCorr(), lambda x, y: sp_stats.pearsonr(x, y)[0])])
def test_bivariate(stat, func):

    # Shhh
    np.warnings.filterwarnings('ignore')

    X = [random.random() for _ in range(30)]
    Y = [random.random() * x for x in X]

    for i, (x, y) in enumerate(zip(X, Y)):
        stat.update(x, y)
        if i >= 1:
            assert math.isclose(stat.get(),
                                func(X[:i + 1], Y[:i + 1]),
                                abs_tol=1e-10)
Ejemplo n.º 3
0
            else:
                inst = obj()

            yield inst


@pytest.mark.parametrize('estimator, check', [
    pytest.param(estimator, check, id=f'{estimator}:{check.__name__}')
    for estimator in list(get_all_estimators()) + [
        feature_extraction.TFIDF(),
        linear_model.LogisticRegression(),
        preprocessing.StandardScaler() | linear_model.LinearRegression(),
        preprocessing.StandardScaler() | linear_model.PAClassifier(),
        preprocessing.StandardScaler()
        | multiclass.OneVsRestClassifier(linear_model.LogisticRegression()),
        preprocessing.StandardScaler()
        | multiclass.OneVsRestClassifier(linear_model.PAClassifier()),
        naive_bayes.GaussianNB(),
        preprocessing.StandardScaler(),
        cluster.KMeans(n_clusters=5, seed=42),
        preprocessing.MinMaxScaler(),
        preprocessing.MinMaxScaler() + preprocessing.StandardScaler(),
        preprocessing.PolynomialExtender(),
        feature_selection.VarianceThreshold(),
        feature_selection.SelectKBest(similarity=stats.PearsonCorr())
    ] for check in utils.estimator_checks.yield_checks(estimator)
])
def test_check_estimator(estimator, check):
    check(copy.deepcopy(estimator))
Ejemplo n.º 4
0
 def _default_params(cls):
     return {'similarity': stats.PearsonCorr()}