コード例 #1
0
ファイル: base.py プロジェクト: Leo-VK/creme
 def _unit_test_params(cls):
     yield {
         "models": [
             compose.Pipeline(
                 preprocessing.StandardScaler(),
                 linear_model.LinearRegression(optimizer=optim.SGD(
                     lr=1e-2)),
             ),
             compose.Pipeline(
                 preprocessing.StandardScaler(),
                 linear_model.LinearRegression(optimizer=optim.SGD(
                     lr=1e-1)),
             ),
         ],
         "metric":
         metrics.MAE(),
     }
     yield {
         "models": [
             compose.Pipeline(
                 preprocessing.StandardScaler(),
                 linear_model.LinearRegression(optimizer=optim.SGD(lr=lr)),
             ) for lr in [1e-4, 1e-3, 1e-2, 1e-1]
         ],
         "metric":
         metrics.MAE(),
     }
コード例 #2
0
    def __init__(
        self,
        p: int,
        d: int,
        q: int,
        m: int = 1,
        sp: int = 0,
        sd: int = 0,
        sq: int = 0,
        regressor: base.Regressor = None,
    ):

        self.p = p
        self.d = d
        self.q = q
        self.m = m
        self.sp = sp
        self.sd = sd
        self.sq = sq
        self.regressor = (
            regressor
            if regressor is not None
            else preprocessing.StandardScaler() | linear_model.LinearRegression()
        )
        self.differencer = Differencer(d=d, m=1) + Differencer(d=sd, m=1)
        self.y_trues = collections.deque(maxlen=max(p, m * sp))
        self.errors = collections.deque(maxlen=max(p, m * sq))
コード例 #3
0
ファイル: ewa.py プロジェクト: xianghu-xiaokachekkk/river
 def _default_params(cls):
     return {
         'regressors': [
             pp.StandardScaler() | lm.LinearRegression(intercept_lr=.1),
             pp.StandardScaler() | lm.PARegressor(),
         ]
     }
コード例 #4
0
ファイル: ewa.py プロジェクト: AdilZouitine/creme
 def _unit_test_params(cls):
     return {
         "models": [
             pp.StandardScaler() | lm.LinearRegression(intercept_lr=0.1),
             pp.StandardScaler() | lm.PARegressor(),
         ]
     }
コード例 #5
0
def test_pipeline_add_at_start():
    def a(x):
        pass

    pipeline = preprocessing.StandardScaler() | linear_model.LinearRegression()
    pipeline = a | pipeline
    assert str(pipeline) == "a | StandardScaler | LinearRegression"
コード例 #6
0
def test_one_many_consistent():
    """Checks that using learn_one or learn_many produces the same result."""

    X = pd.read_csv(datasets.TrumpApproval().path)
    Y = X.pop('five_thirty_eight')

    one = lm.LinearRegression()
    for x, y in stream.iter_pandas(X, Y):
        one.learn_one(x, y)

    many = lm.LinearRegression()
    for xb, yb in zip(np.array_split(X, len(X)), np.array_split(Y, len(Y))):
        many.learn_many(xb, yb)

    for i in X:
        assert math.isclose(one.weights[i], many.weights[i])
コード例 #7
0
ファイル: bandit.py プロジェクト: yangmingmath/river
 def _unit_test_params(cls):
     return {
         "models": [
             compose.Pipeline(
                 preprocessing.StandardScaler(),
                 linear_model.LinearRegression(optimizer=optim.SGD(
                     lr=0.01)),
             ),
             compose.Pipeline(
                 preprocessing.StandardScaler(),
                 linear_model.LinearRegression(optimizer=optim.SGD(lr=0.1)),
             ),
         ],
         "metric":
         metrics.MAE(),
     }
コード例 #8
0
    def __init__(
        self,
        grace_period: int = 200,
        max_depth: int = None,
        split_confidence: float = 1e-7,
        tie_threshold: float = 0.05,
        leaf_prediction: str = "model",
        leaf_model: base.Regressor = None,
        model_selector_decay: float = 0.95,
        nominal_attributes: list = None,
        attr_obs: str = "e-bst",
        attr_obs_params: dict = None,
        min_samples_split: int = 5,
        **kwargs,
    ):
        super().__init__(max_depth=max_depth, **kwargs)

        self._split_criterion: str = "vr"
        self.grace_period = grace_period
        self.split_confidence = split_confidence
        self.tie_threshold = tie_threshold
        self.leaf_prediction = leaf_prediction
        self.leaf_model = leaf_model if leaf_model else linear_model.LinearRegression(
        )
        self.model_selector_decay = model_selector_decay
        self.nominal_attributes = nominal_attributes
        self.min_samples_split = min_samples_split

        if attr_obs not in self._VALID_AO:
            raise AttributeError(
                f'Invalid "attr_obs" option. Valid options are: {self._VALID_AO}'
            )
        self.attr_obs = attr_obs
        self.attr_obs_params = attr_obs_params if attr_obs_params is not None else {}
        self.kwargs = kwargs
コード例 #9
0
def get_model():
    extract_features = compose.TransformerUnion(
    get_ordinal_date,
    get_day_distances
    )
    
    model = (
     extract_features |
     time_series.SNARIMAX(
        p=0,
        d=0,
        q=0,
        m=7,
        sp=3,
        sq=0,
        regressor=(
            preprocessing.StandardScaler() |
            linear_model.LinearRegression(
                intercept_init=0,
                intercept_lr=0.3,
                optimizer=optim.SGD(0.01)
                )
            )
        )
    )
    return model
コード例 #10
0
def test_shuffle_columns():
    """Checks that learn_many works identically whether columns are shuffled or not."""

    X = pd.read_csv(datasets.TrumpApproval().path)
    Y = X.pop('five_thirty_eight')

    normal = lm.LinearRegression()
    for xb, yb in zip(np.array_split(X, 10), np.array_split(Y, 10)):
        normal.learn_many(xb, yb)

    shuffled = lm.LinearRegression()
    for xb, yb in zip(np.array_split(X, 10), np.array_split(Y, 10)):
        cols = np.random.permutation(X.columns)
        shuffled.learn_many(xb[cols], yb)

    for i in X:
        assert math.isclose(normal.weights[i], shuffled.weights[i])
コード例 #11
0
def test_set_params():

    obj = linear_model.LinearRegression(l2=42)
    obj.learn_one({'x': 3}, 6)

    new = obj._set_params({'l2': 21})
    assert new.l2 == 21
    assert obj.l2 == 42
    assert new.weights == {}
    assert new.weights != obj.weights
コード例 #12
0
def test_set_params_pipeline():

    obj = preprocessing.StandardScaler() | linear_model.LinearRegression(l2=42)
    obj.learn_one({'x': 3}, 6)

    new = obj._set_params({'LinearRegression': {'l2': 21}})
    assert new['LinearRegression'].l2 == 21
    assert obj['LinearRegression'].l2 == 42
    assert new['LinearRegression'].weights == {}
    assert new['LinearRegression'].weights != obj['LinearRegression'].weights
コード例 #13
0
ファイル: pred_clipper.py プロジェクト: online-ml/river
    def _unit_test_params(cls):
        import math

        from river import linear_model

        yield {
            "regressor": linear_model.LinearRegression(),
            "y_min": -math.inf,
            "y_max": math.inf,
        }
コード例 #14
0
def test_add_remove_columns():
    """Checks that no exceptions are raised whenever columns are dropped and/or added."""

    X = pd.read_csv(datasets.TrumpApproval().path)
    Y = X.pop('five_thirty_eight')

    lin_reg = lm.LinearRegression()
    for xb, yb in zip(np.array_split(X, 10), np.array_split(Y, 10)):
        # Pick half of the columns at random
        cols = np.random.choice(X.columns, len(X.columns) // 2, replace=False)
        lin_reg.learn_many(xb[cols], yb)
コード例 #15
0
def test_set_params_pipeline():

    obj = preprocessing.StandardScaler() | linear_model.LinearRegression(l2=42)
    obj.learn_one({"x": 3}, 6)

    params = {"LinearRegression": {"l2": 21}}
    new = obj._set_params(params)
    assert new["LinearRegression"].l2 == 21
    assert obj["LinearRegression"].l2 == 42
    assert new["LinearRegression"].weights == {}
    assert new["LinearRegression"].weights != obj["LinearRegression"].weights
コード例 #16
0
    def __init__(
        self,
        grace_period: int = 200,
        max_depth: int = None,
        split_confidence: float = 1e-7,
        tie_threshold: float = 0.05,
        leaf_prediction: str = "model",
        leaf_model: base.Regressor = None,
        model_selector_decay: float = 0.95,
        nominal_attributes: list = None,
        attr_obs: str = "e-bst",
        attr_obs_params: dict = None,
        min_samples_split: int = 5,
        **kwargs,
    ):
        super().__init__(max_depth=max_depth, **kwargs)

        self._split_criterion: str = "vr"
        self.grace_period = grace_period
        self.split_confidence = split_confidence
        self.tie_threshold = tie_threshold
        self.leaf_prediction = leaf_prediction
        self.leaf_model = leaf_model if leaf_model else linear_model.LinearRegression()
        self.model_selector_decay = model_selector_decay
        self.nominal_attributes = nominal_attributes
        self.min_samples_split = min_samples_split

        if attr_obs not in self._VALID_AO:
            raise AttributeError(
                f'Invalid "attr_obs" option. Valid options are: {self._VALID_AO}'
            )
        self.attr_obs = attr_obs
        self.attr_obs_params = attr_obs_params if attr_obs_params is not None else {}
        self.kwargs = kwargs

        if self.attr_obs == self._QO:
            self._qo_std_div = 3
            if "std_div" in self.attr_obs_params:
                # Make sure the passed std_div value is valid
                if (
                    self.attr_obs_params["std_div"] is None
                    or self.attr_obs_params["std_div"] > 0
                ):
                    self._qo_std_div = self.attr_obs_params["std_div"]

            if self._qo_std_div:  # Dynamically evolving radii will be used
                self._feat_var = {}
                self._qo_radii = defaultdict(dict)
            else:  # Static values will be used
                if "radius" in self.attr_obs_params:
                    self._qo_radii = {"radius": self.attr_obs_params["radius"]}
                else:
                    self._qo_radii = {}
コード例 #17
0
def test_lin_reg_sklearn_coherence(river_params, sklearn_params):
    """Checks that the sklearn and river implementations produce the same results."""

    ss = preprocessing.StandardScaler()
    rv = lm.LinearRegression(**river_params)
    sk = sklm.SGDRegressor(**sklearn_params)

    for x, y in datasets.TrumpApproval().take(100):
        x = ss.learn_one(x).transform_one(x)
        rv.learn_one(x, y)
        sk.partial_fit([list(x.values())], [y])

    for i, w in enumerate(rv.weights.values()):
        assert math.isclose(w, sk.coef_[i])

    assert math.isclose(rv.intercept, sk.intercept_[0])
コード例 #18
0
    def __init__(
        self,
        grace_period: int = 200,
        max_depth: int = None,
        split_confidence: float = 1e-7,
        tie_threshold: float = 0.05,
        leaf_prediction: str = "model",
        leaf_model: base.Regressor = None,
        model_selector_decay: float = 0.95,
        nominal_attributes: list = None,
        splitter: Splitter = None,
        min_samples_split: int = 5,
        binary_split: bool = False,
        max_size: int = 500,
        memory_estimate_period: int = 1000000,
        stop_mem_management: bool = False,
        remove_poor_attrs: bool = False,
        merit_preprune: bool = True,
    ):
        super().__init__(
            max_depth=max_depth,
            binary_split=binary_split,
            max_size=max_size,
            memory_estimate_period=memory_estimate_period,
            stop_mem_management=stop_mem_management,
            remove_poor_attrs=remove_poor_attrs,
            merit_preprune=merit_preprune,
        )

        self._split_criterion: str = "vr"
        self.grace_period = grace_period
        self.split_confidence = split_confidence
        self.tie_threshold = tie_threshold
        self.leaf_prediction = leaf_prediction
        self.leaf_model = leaf_model if leaf_model else linear_model.LinearRegression(
        )
        self.model_selector_decay = model_selector_decay
        self.nominal_attributes = nominal_attributes
        self.min_samples_split = min_samples_split

        if splitter is None:
            self.splitter = EBSTSplitter()
        else:
            if splitter.is_target_class:
                raise ValueError(
                    "The chosen splitter cannot be used in regression tasks.")
            self.splitter = splitter
コード例 #19
0
    def __init__(
        self,
        n_min: int = 200,
        delta: float = 1e-7,
        tau: float = 0.05,
        pred_type: str = "adaptive",
        pred_model: base.Regressor = None,
        splitter: Splitter = None,
        drift_detector: base.DriftDetector = None,
        alpha: float = 0.99,
        anomaly_threshold: float = -0.75,
        m_min: int = 30,
        ordered_rule_set: bool = True,
        min_samples_split: int = 5,
    ):
        self.n_min = n_min
        self.delta = delta
        self.tau = tau

        if pred_type not in self._VALID_PRED:
            raise ValueError(f"Invalid 'pred_type': {pred_type}")
        self.pred_type = pred_type
        self.pred_model = pred_model if pred_model else linear_model.LinearRegression(
        )

        if splitter is None:
            self.splitter = tree.splitter.EBSTSplitter()
        else:
            self.splitter = splitter

        self.drift_detector = (drift_detector if drift_detector is not None
                               else drift.PageHinkley())

        self.alpha = alpha
        self.anomaly_threshold = anomaly_threshold
        self.m_min = m_min
        self.ordered_rule_set = ordered_rule_set
        self.min_samples_split = min_samples_split

        self._default_rule = self._new_rule()
        self._rules: typing.Dict[typing.Hashable, RegRule] = {}

        self._n_drifts_detected: int = 0
コード例 #20
0
def test_lin_reg_sklearn_coherence():
    """Checks that the sklearn and river implementations produce the same results."""
    class SquaredLoss:
        """sklearn removes the leading 2 from the gradient of the squared loss."""
        def gradient(self, y_true, y_pred):
            return y_pred - y_true

    ss = preprocessing.StandardScaler()
    cr = lm.LinearRegression(optimizer=optim.SGD(.01), loss=SquaredLoss())
    sk = sklm.SGDRegressor(learning_rate='constant', eta0=.01, alpha=.0)

    for x, y in datasets.TrumpApproval():
        x = ss.learn_one(x).transform_one(x)
        cr.learn_one(x, y)
        sk.partial_fit([list(x.values())], [y])

    for i, w in enumerate(cr.weights.values()):
        assert math.isclose(w, sk.coef_[i])

    assert math.isclose(cr.intercept, sk.intercept_[0])
コード例 #21
0
    def __init__(
        self,
        grace_period: int = 200,
        max_depth: int = None,
        split_confidence: float = 1e-7,
        tie_threshold: float = 0.05,
        leaf_prediction: str = "model",
        leaf_model: base.Regressor = None,
        model_selector_decay: float = 0.95,
        nominal_attributes: list = None,
        splitter: Splitter = None,
        min_samples_split: int = 5,
        **kwargs,
    ):
        super().__init__(max_depth=max_depth, **kwargs)

        self._split_criterion: str = "vr"
        self.grace_period = grace_period
        self.split_confidence = split_confidence
        self.tie_threshold = tie_threshold
        self.leaf_prediction = leaf_prediction
        self.leaf_model = leaf_model if leaf_model else linear_model.LinearRegression()
        self.model_selector_decay = model_selector_decay
        self.nominal_attributes = nominal_attributes
        self.min_samples_split = min_samples_split

        if splitter is None:
            self.splitter = EBSTSplitter()
        else:
            if splitter.is_target_class:
                raise ValueError(
                    "The chosen splitter cannot be used in regression tasks."
                )
            self.splitter = splitter

        self.kwargs = kwargs
コード例 #22
0
ファイル: hard_sampling.py プロジェクト: AdilZouitine/creme
 def _unit_test_params(cls):
     return {
         "regressor": linear_model.LinearRegression(),
         "p": 0.1,
         "size": 40
     }
コード例 #23
0
import pytest

from river import compose, linear_model, optim, preprocessing, tree, utils


@pytest.mark.parametrize(
    "model, param_grid, count",
    [
        (
            linear_model.LinearRegression(),
            {
                "optimizer": [
                    (optim.SGD, {"lr": [1, 2]}),
                    (
                        optim.Adam,
                        {
                            "beta_1": [0.1, 0.01, 0.001],
                            "lr": [0.1, 0.01, 0.001, 0.0001],
                        },
                    ),
                ]
            },
            2 + 3 * 4,
        ),
        (
            preprocessing.StandardScaler() | linear_model.LinearRegression(),
            {
                "LinearRegression": {
                    "optimizer": [
                        (optim.SGD, {"lr": [1, 2]}),
                        (
コード例 #24
0
    def build_model_4snarimax(self):
        if os.path.exists(
                self.pck_filename
        ):  #if model backup exists then load it and update model from start1 to start2
            src_bck = pickle.load(open(self.pck_filename, 'rb'))
            model = src_bck.snarimax_model
            metric = src_bck.snarimax_metric
            self.snarimax_para = src_bck.snarimax_para
            self.snarimax_model = model
            self.snarimax_metric = metric

            start1 = src_bck.data.index[-1]
            start2 = self.data.index[
                -1]  #self.data.index[-self.data.index[-1].weekday()]

        else:  #if model backup does not exist then rebuild model from the start
            p, d, q, m, sp, sd, sq = self.snarimax_para
            extract_features = compose.TransformerUnion(get_ordinal_date)
            model = (
                extract_features | time_series.SNARIMAX(
                    p=p,
                    d=d,
                    q=q,
                    m=m,
                    sp=sp,
                    sd=sd,
                    sq=sq,
                    regressor=(
                        #preprocessing.Normalizer() |
                        preprocessing.AdaptiveStandardScaler(alpha=0.1)
                        | preprocessing.StandardScaler() |

                        #preprocessing.RobustScaler(with_scaling=True) |
                        linear_model.LinearRegression(
                            intercept_init=0,
                            optimizer=optim.SGD(0.0001),  #important parameter
                            #optimizer=optim.AdaDelta(0.8,0.00001), #important parameter
                            #optimizer=optim.AMSGrad(lr=0.01,beta_1=0.8,beta_2=0.1),
                            intercept_lr=0.001))))

            metric = metrics.Rolling(metrics.MSE(), self.dd_historic)
            #metric = metrics.MSE()

            start1 = self.data.index[0]
            start2 = self.data.index[
                -1]  #self.data.index[-self.data.index[-1].weekday()]

        if start1 < start2:
            for t in pd.date_range(start1, start2, freq='D'):
                x, y = self.snarimax_data.loc[t][['ds', 'temp']].values
                y_pred = model.forecast(horizon=1, xs=[x])
                #print(x,y,y_pred[0],y-y_pred[0])
                model = model.learn_one(x, y)
                metric = metric.update(y, y_pred[0])

            self.snarimax_model = model
            self.snarimax_metric = metric
            with open(self.pck_filename, 'wb') as fh:
                pickle.dump(self, fh)

            #for t in pd.date_range(start1, start2):
            #    x = self.snarimax_data.loc[pd.date_range(t-timedelta(self.dd_historic),t)][['ds']].values
            #    y = self.snarimax_data.loc[pd.date_range(t-timedelta(self.dd_historic),t)][['temp']].values
            #    x = np.hstack(x)
            #    y = np.hstack(y)
            #    y_pred = model.forecast(horizon=self.dd_historic+1, xs=x)
            #    for i in range(0,self.dd_historic):
            #        model = model.learn_one(x[i], y[i])
            #        metric = metric.update(y[i], y_pred[i])

        return
コード例 #25
0
ファイル: test_.py プロジェクト: renatacgcastanha/river
        for _, obj in inspect.getmembers(importlib.import_module(submodule),
                                         is_estimator):
            if issubclass(obj, ignored):
                continue
            params = obj._unit_test_params()
            yield obj(**params)


@pytest.mark.parametrize(
    "estimator, check",
    [
        pytest.param(estimator, check, id=f"{estimator}:{check.__name__}")
        for estimator in list(get_all_estimators()) + [
            feature_extraction.TFIDF(),
            linear_model.LogisticRegression(),
            preprocessing.StandardScaler() | linear_model.LinearRegression(),
            preprocessing.StandardScaler() | linear_model.PAClassifier(),
            (preprocessing.StandardScaler()
             | multiclass.OneVsRestClassifier(
                 linear_model.LogisticRegression())),
            (preprocessing.StandardScaler()
             | multiclass.OneVsRestClassifier(linear_model.PAClassifier())),
            naive_bayes.GaussianNB(),
            preprocessing.StandardScaler(),
            cluster.KMeans(n_clusters=5, seed=42),
            preprocessing.MinMaxScaler(),
            preprocessing.MinMaxScaler() + preprocessing.StandardScaler(),
            feature_extraction.PolynomialExtender(),
            (feature_extraction.PolynomialExtender()
             | preprocessing.StandardScaler()
             | linear_model.LinearRegression()),
コード例 #26
0
ファイル: chain.py プロジェクト: Leo-VK/creme
 def _unit_test_params(cls):
     yield {"model": linear_model.LinearRegression()}
コード例 #27
0
ファイル: bagging.py プロジェクト: xianghu-xiaokachekkk/river
 def _default_params(cls):
     return {'model': linear_model.LinearRegression()}
コード例 #28
0
ファイル: online_training.py プロジェクト: supermt/ALCHEMIST
                         n_samples=1440)

    def __iter__(self):
        return stream.iter_csv(self.path,
                               target='interval_qps',
                               converters={'interval_qps': int})


def get_ordinal_date(x):
    return {'ordinal_date': int(x['secs_elapsed'])}


model = compose.Pipeline(
    ('ordinal_date', compose.FuncTransformer(get_ordinal_date)),
    ('scale', preprocessing.MinMaxScaler()),
    ('lin_reg', linear_model.LinearRegression()))

from river import metrics
import matplotlib.pyplot as plt

# target_data = "../log_traces/Mixgraph/1000_0.0000073_45000/report.csv"
target_data = "../log_traces/StorageMaterial.NVMeSSD/12CPU/64MB/report.csv_1180"
import os
target_data = os.path.abspath(target_data)


def evaluate_model(model):

    metric = metrics.Rolling(metrics.MAE(), 12)

    # dates = []
コード例 #29
0
ファイル: chain.py プロジェクト: venuraja79/river
 def _unit_test_params(cls):
     return {"model": linear_model.LinearRegression()}
コード例 #30
0
import pytest
from sklearn.utils import estimator_checks
from sklearn import linear_model as sk_linear_model

from river import base
from river import cluster
from river import compat
from river import linear_model
from river import preprocessing


@pytest.mark.parametrize(
    "estimator",
    [
        pytest.param(estimator, id=str(estimator)) for estimator in [
            linear_model.LinearRegression(),
            linear_model.LogisticRegression(),
            preprocessing.StandardScaler(),
            cluster.KMeans(seed=42),
        ]
    ],
)
@pytest.mark.filterwarnings(
    "ignore::sklearn.utils.estimator_checks.SkipTestWarning")
def test_river_to_sklearn_check_estimator(estimator: base.Estimator):
    skl_estimator = compat.convert_river_to_sklearn(estimator)
    estimator_checks.check_estimator(skl_estimator)


@pytest.mark.filterwarnings(
    "ignore::sklearn.utils.estimator_checks.SkipTestWarning")