def __init__(self, optimizer: optim.Optimizer = None, loss: optim.losses.Loss = None, l2=0., initializer: optim.initializers.Initializer = None, clip_gradient=1e12): self.optimizer = optim.SGD() if optimizer is None else copy.deepcopy( optimizer) self.u_optimizer = optim.SGD() if optimizer is None else copy.deepcopy( optimizer) self.i_optimizer = optim.SGD() if optimizer is None else copy.deepcopy( optimizer) self.loss = optim.losses.Squared() if loss is None else loss self.l2 = l2 if initializer is None: initializer = optim.initializers.Zeros() self.initializer = initializer self.clip_gradient = clip_gradient self.global_mean = stats.Mean() self.u_biases: typing.DefaultDict[ int, optim.initializers.Initializer] = collections.defaultdict( initializer) self.i_biases: typing.DefaultDict[ int, optim.initializers.Initializer] = collections.defaultdict( initializer)
def roll_dataframe_stats( frame: pd.DataFrame, window=14, min_steps: int = 1, callback: Optional[Callable] = None, model=None, metric: metrics.ClassificationReport = metrics.ClassificationReport()): windower = Windowing(frame, window_size=window, adaptive_window=True, adapted_window_size=0) step_count = 0 history = [] model_copy = copy(model) _mean_down = stats.Mean() _mean_up = stats.Mean() while windower.has_next_observation: res = windower.step() x = res.to_dict(orient="record")[0] y = x.pop("y") if model_copy is not None: y_pred = boolean_flip(model.predict_one(x)) model.fit_one(x, y) if y_pred != y: prob_up = model.predict_proba_one(x) prob_values = list(prob_up.values()) is_false_pct = _mean_down.update(prob_values[0]).get() is_true_pct = _mean_up.update(prob_values[1]).get() down_msg = f"Probability going DOWNWARDS for incorrect classifications: {is_false_pct}" up_msg = f"Probability going UPWARDS for incorrect classifications: {is_true_pct}" logger.error(up_msg) logger.warning(down_msg) metric.update(y_pred, y) mod_acc = metric.accuracy logger.debug(f"Overall model accuracy: {mod_acc} \n\n") if callback is not None: history.append(callback(res)) step_count += 1 return step_count >= min_steps, history
def main(): def add_hour(x): x['hour'] = x['moment'].hour return x benchmark.benchmark( get_X_y=datasets.fetch_bikes, n=182470, get_pp=lambda: (compose.Whitelister('clouds', 'humidity', 'pressure', 'temperature', 'wind') + (add_hour | feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean()) ) | preprocessing.StandardScaler()), models=[ # ('creme', 'LinReg', linear_model.LinearRegression( # optimizer=optim.VanillaSGD(0.01), # l2=0. # )), ('creme', 'GLM', linear_model.GLMRegressor(optimizer=optim.VanillaSGD(0.01), l2=0.)), ('creme', 'GLM', meta.Detrender( linear_model.GLMRegressor(optimizer=optim.VanillaSGD(0.01), l2=0.))), # ('sklearn', 'SGD', compat.CremeRegressorWrapper( # sklearn_estimator=sk_linear_model.SGDRegressor( # learning_rate='constant', # eta0=0.01, # fit_intercept=True, # penalty='none' # ), # )), # ('sklearn', 'SGD no intercept', compat.CremeRegressorWrapper( # sklearn_estimator=sk_linear_model.SGDRegressor( # learning_rate='constant', # eta0=0.01, # fit_intercept=False, # penalty='none' # ), # )), ], get_metric=metrics.MSE)
def main(): import datetime as dt from creme import compose from creme import datasets from creme import feature_extraction from creme import linear_model from creme import metrics as metricss from creme import preprocessing from creme import stats from creme import stream X_y = datasets.Bikes() X_y = stream.simulate_qa(X_y, moment='moment', delay=dt.timedelta(minutes=30)) def add_time_features(x): return {**x, 'hour': x['moment'].hour, 'day': x['moment'].weekday()} model = add_time_features model |= (compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind') + feature_extraction.TargetAgg( by=['station', 'hour'], how=stats.Mean()) + feature_extraction.TargetAgg(by='station', how=stats.EWMean())) model |= preprocessing.StandardScaler() model |= linear_model.LinearRegression() metric = metricss.MAE() questions = {} for i, x, y in X_y: # Question is_question = y is None if is_question: y_pred = model.predict_one(x) questions[i] = y_pred # Answer else: metric.update(y, questions[i]) model = model.fit_one(x, y) if i >= 30000 and i % 30000 == 0: print(i, metric)
def load_stats(): for _, obj in inspect.getmembers(importlib.import_module('creme.stats'), inspect.isclass): try: if issubclass(obj, stats.Link): yield obj(stats.Shift(1), stats.Mean()) continue sig = inspect.signature(obj) yield obj( **{ param.name: param.default if param.default != param.empty else 1 for param in sig.parameters.values() }) except ValueError: yield obj()
def __init__(self, n_factors=10, bias_optimizer: optim.Optimizer = None, latent_optimizer: optim.Optimizer = None, loss: optim.losses.Loss = None, l2_bias=0., l2_latent=0., weight_initializer: optim.initializers.Initializer = None, latent_initializer: optim.initializers.Initializer = None, clip_gradient=1e12, seed: int = None): self.n_factors = n_factors self.u_bias_optimizer = optim.SGD( ) if bias_optimizer is None else copy.deepcopy(bias_optimizer) self.i_bias_optimizer = optim.SGD( ) if bias_optimizer is None else copy.deepcopy(bias_optimizer) self.u_latent_optimizer = optim.SGD( ) if latent_optimizer is None else copy.deepcopy(latent_optimizer) self.i_latent_optimizer = optim.SGD( ) if latent_optimizer is None else copy.deepcopy(latent_optimizer) self.loss = optim.losses.Squared() if loss is None else loss self.l2_bias = l2_bias self.l2_latent = l2_latent if weight_initializer is None: weight_initializer = optim.initializers.Zeros() self.weight_initializer = weight_initializer if latent_initializer is None: latent_initializer = optim.initializers.Normal(sigma=.1, seed=seed) self.latent_initializer = latent_initializer self.clip_gradient = clip_gradient self.seed = seed self.global_mean = stats.Mean() self.u_biases = collections.defaultdict(weight_initializer) self.i_biases = collections.defaultdict(weight_initializer) random_latents = functools.partial(self.latent_initializer, shape=self.n_factors) self.u_latents = collections.defaultdict(random_latents) self.i_latents = collections.defaultdict(random_latents)
def __init__(self, regressor: base.Regressor, window_size: int = None): self.regressor = regressor self.mean = stats.Mean() if window_size is None else stats.RollingMean( window_size)
def get_all_estimators(): ignored = (Creme2SKLBase, SKL2CremeBase, compat.PyTorch2CremeRegressor, compose.FuncTransformer, compose.Pipeline, ensemble.StackingBinaryClassifier, feature_extraction.Agg, feature_extraction.TargetAgg, feature_extraction.Differ, feature_selection.PoissonInclusion, imblearn.RandomOverSampler, imblearn.RandomUnderSampler, imblearn.RandomSampler, impute.PreviousImputer, impute.StatImputer, linear_model.FFMClassifier, linear_model.FFMRegressor, linear_model.FMClassifier, linear_model.FMRegressor, linear_model.HOFMClassifier, linear_model.HOFMRegressor, linear_model.SoftmaxRegression, meta.PredClipper, meta.TransformedTargetRegressor, multioutput.ClassifierChain, multioutput.RegressorChain, preprocessing.OneHotEncoder, reco.Baseline, reco.BiasedMF, reco.FunkMF, reco.RandomNormal, time_series.Detrender, time_series.GroupDetrender, time_series.SNARIMAX) def is_estimator(obj): return inspect.isclass(obj) and issubclass(obj, base.Estimator) for submodule in importlib.import_module('creme').__all__: if submodule == 'base': continue for _, obj in inspect.getmembers( importlib.import_module(f'creme.{submodule}'), is_estimator): if issubclass(obj, ignored): continue elif issubclass(obj, dummy.StatisticRegressor): inst = obj(statistic=stats.Mean()) elif issubclass(obj, meta.BoxCoxRegressor): inst = obj(regressor=linear_model.LinearRegression()) elif issubclass(obj, tree.RandomForestClassifier): inst = obj() elif issubclass(obj, ensemble.BaggingClassifier): inst = obj(linear_model.LogisticRegression()) elif issubclass(obj, ensemble.BaggingRegressor): inst = obj(linear_model.LinearRegression()) elif issubclass(obj, ensemble.AdaBoostClassifier): inst = obj(linear_model.LogisticRegression()) elif issubclass(obj, ensemble.HedgeRegressor): inst = obj([ preprocessing.StandardScaler() | linear_model.LinearRegression(intercept_lr=.1), preprocessing.StandardScaler() | linear_model.PARegressor(), ]) elif issubclass(obj, feature_selection.SelectKBest): inst = obj(similarity=stats.PearsonCorrelation()) elif issubclass(obj, linear_model.LinearRegression): inst = preprocessing.StandardScaler() | obj(intercept_lr=.1) elif issubclass(obj, linear_model.PARegressor): inst = preprocessing.StandardScaler() | obj() elif issubclass(obj, multiclass.OneVsRestClassifier): inst = obj(binary_classifier=linear_model.LogisticRegression()) else: inst = obj() yield inst
assert isinstance(pickle.loads(pickle.dumps(stat)), stat.__class__) assert isinstance(copy.deepcopy(stat), stat.__class__) # Check the statistic has a working __str__ and name method assert isinstance(str(stat), str) if isinstance(stat, stats.Univariate): assert isinstance(stat.name, str) @pytest.mark.parametrize( 'stat, func', [(stats.Kurtosis(bias=True), sp_stats.kurtosis), (stats.Kurtosis(bias=False), functools.partial(sp_stats.kurtosis, bias=False)), (stats.Mean(), statistics.mean), (stats.Skew(bias=True), sp_stats.skew), (stats.Skew(bias=False), functools.partial(sp_stats.skew, bias=False)), (stats.Var(ddof=0), np.var), (stats.Var(), functools.partial(np.var, ddof=1))]) def test_univariate(stat, func): # Shut up np.warnings.filterwarnings('ignore') X = [random.random() for _ in range(30)] for i, x in enumerate(X): stat.update(x) try: assert math.isclose(stat.get(), func(X[:i + 1]), abs_tol=1e-10) except AssertionError:
def __init__(self): self._mean = stats.Mean()
def get_all_estimators(): ignored = (CremeBaseWrapper, SKLBaseWrapper, base.Wrapper, compose.FuncTransformer, ensemble.StackingBinaryClassifier, feature_extraction.Agg, feature_extraction.TargetAgg, feature_extraction.Differ, linear_model.FMRegressor, linear_model.SoftmaxRegression, multioutput.ClassifierChain, multioutput.RegressorChain, naive_bayes.BernoulliNB, naive_bayes.ComplementNB, preprocessing.OneHotEncoder, tree.DecisionTreeClassifier) def is_estimator(obj): return inspect.isclass(obj) and issubclass(obj, base.Estimator) for submodule in importlib.import_module('creme').__all__: if submodule == 'base': continue for name, obj in inspect.getmembers( importlib.import_module(f'creme.{submodule}'), is_estimator): if issubclass(obj, ignored): continue if issubclass(obj, dummy.StatisticRegressor): inst = obj(statistic=stats.Mean()) elif issubclass(obj, ensemble.BaggingClassifier): inst = obj(linear_model.LogisticRegression()) elif issubclass(obj, ensemble.BaggingRegressor): inst = obj(linear_model.LinearRegression()) elif issubclass(obj, ensemble.HedgeRegressor): inst = obj([ preprocessing.StandardScaler() | linear_model.LinearRegression(intercept_lr=0.1), preprocessing.StandardScaler() | linear_model.PARegressor(), ]) elif issubclass(obj, feature_selection.RandomDiscarder): inst = obj(n_to_keep=5) elif issubclass(obj, feature_selection.SelectKBest): inst = obj(similarity=stats.PearsonCorrelation()) elif issubclass(obj, linear_model.LinearRegression): inst = preprocessing.StandardScaler() | obj(intercept_lr=0.1) elif issubclass(obj, linear_model.PARegressor): inst = preprocessing.StandardScaler() | obj() elif issubclass(obj, multiclass.OneVsRestClassifier): inst = obj(binary_classifier=linear_model.LogisticRegression()) else: inst = obj() yield inst
def __init__(self, seed=None): super().__init__() self.variance = stats.Var() self.mean = stats.Mean() self.seed = seed self._rng = random.Random(seed)
def roll_dataframe_stats( frame: pd.DataFrame, window=14, min_steps: int = 1, callback: Optional[Callable] = None, metric: metrics.ClassificationReport = metrics.ClassificationReport()): windower = Windowing(frame, window_size=window, adaptive_window=False, adapted_window_size=0) # while windower.has_next_observation: # res = windower.step() # x = res.to_dict(orient="record")[0] # y = x.pop("y") # if model_copy is not None: # y_pred = boolean_flip(model.predict_one(x)) # model.fit_one(x, y) # if y_pred != y: # prob_up = model.predict_proba_one(x) # prob_values = list(prob_up.values()) # window=14, # min_steps: int = 1, # callback: Optional[Callable] = None, # metric: metrics.ClassificationReport = metrics.ClassificationReport()): step_count = 0 history = [] model = None model_copy = copy(model) _mean_down = stats.Mean() _mean_up = stats.Mean() likelihood = gpytorch.likelihoods.GaussianLikelihood() has_train = False model = ExactGPModel(torch.tensor([[]]), torch.tensor([]), likelihood) optimizer = torch.optim.Adam( [ # Includes GaussianLikelihood parameters { 'params': model.parameters() }, ], lr=0.1) prior_training = None while windower.has_next_observation: if not windower.is_between_bounds: windower.step(incr_only=True) continue res = windower.step() train_x = torch.tensor(res.values.astype(np.float32)) y = res.pop("y") train_y = torch.tensor(y.values.astype(np.float32)) print(train_x.size()) print(train_y.size()) if has_train is False: model = ExactGPModel(train_x, train_y, likelihood) model.train() likelihood.train() has_train = True else: model.eval() likelihood.eval() logger.warning(model) predicted = model(train_x) model = model.get_fantasy_model(train_x, train_y) model.train() likelihood.train() optimizer = torch.optim.Adam( [ # Includes GaussianLikelihood parameters { 'params': model.parameters() }, ], lr=0.1) prior_information = train_x training_iter = 5 for i in range(training_iter): optimizer.zero_grad() output = model(train_x) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) loss = -mll(output, train_y) loss.backward() print('Iter %d/%d - Loss: %.3f lengthscale: %.3f noise: %.3f' % (i + 1, training_iter, loss.item(), model.covar_module.base_kernel.lengthscale.item(), model.likelihood.noise.item())) optimizer.step() return step_count >= min_steps, history
assert isinstance(pickle.loads(pickle.dumps(stat)), stat.__class__) assert isinstance(copy.deepcopy(stat), stat.__class__) # Check the statistic has a working __str__ and name method assert isinstance(str(stat), str) if isinstance(stat, stats.Univariate): assert isinstance(stat.name, str) @pytest.mark.parametrize( 'stat, func', [ (stats.Kurtosis(bias=True), sp_stats.kurtosis), (stats.Kurtosis(bias=False), functools.partial(sp_stats.kurtosis, bias=False)), (stats.Mean(), statistics.mean), (stats.Skew(bias=True), sp_stats.skew), (stats.Skew(bias=False), functools.partial(sp_stats.skew, bias=False)), (stats.Var(ddof=0), np.var), (stats.Var(), functools.partial(np.var, ddof=1)) ] ) def test_univariate(stat, func): # Shut up np.warnings.filterwarnings('ignore') X = [random.random() for _ in range(30)] for i, x in enumerate(X): stat.update(x)
def _default_params(cls): return {'statistic': stats.Mean()}