def __init__(self): self.incremental_kmeans = cluster.KMeans( n_clusters=5, halflife=0.4, sigma=3, seed=0) self.metric_ssw = metrics.cluster.SSW() self.metric_cohesion = metrics.cluster.Cohesion() self.metric_separation = metrics.cluster.Separation() self.metric_ssb = metrics.cluster.SSB() self.metric_bic = metrics.cluster.BIC() self.metric_silhouette = metrics.cluster.Silhouette() self.metric_xieBeni = metrics.cluster.XieBeni()
def __init__(self, chunk_size=10, n_clusters=2, **kwargs): super().__init__() self.time_stamp = 0 self.n_clusters = n_clusters self.chunk_size = chunk_size self.kwargs = kwargs self._kmeans = cluster.KMeans(n_clusters=self.n_clusters, **self.kwargs) self._temp_chunk = {} self.centers = {}
def predict_one(self, x): micro_cluster_centers = { i: self._get_micro_clustering_result()[i].center for i in range(len(self._get_micro_clustering_result())) } kmeans = cluster.KMeans(n_clusters=self.n_macro_clusters, seed=self.seed, **self.kwargs) for center in micro_cluster_centers.values(): kmeans = kmeans.learn_one(center) self.centers = kmeans.centers index, _ = self._get_closest_micro_cluster( x, self._get_micro_clustering_result()) y = kmeans.predict_one(micro_cluster_centers[index]) return y
def predict_one(self, x): micro_cluster_centers = { i: result.center for i, result in self._get_micro_clustering_result().items() } kmeans = cluster.KMeans(n_clusters=self.n_macro_clusters, seed=self.seed, **self.kwargs) for center in micro_cluster_centers.values(): kmeans = kmeans.learn_one(center) self.centers = kmeans.centers index, _ = self._get_closest_micro_cluster( x, self._get_micro_clustering_result()) try: return kmeans.predict_one(micro_cluster_centers[index]) except KeyError: return 0
def learn_one(self, x, sample_weight=None): self.time_stamp += 1 index = self.time_stamp % self.chunk_size if index == 0: self._temp_chunk[self.chunk_size - 1] = x elif index == 1: self._temp_chunk = {0: x} else: self._temp_chunk[index - 1] = x if index == 0: kmeans_i = cluster.KMeans(n_clusters=self.n_clusters, **self.kwargs) for point_j in self._temp_chunk.values(): kmeans_i = kmeans_i.learn_one(point_j) for center_j in kmeans_i.centers.values(): self._kmeans = self._kmeans.learn_one(center_j) self.centers = self._kmeans.centers return self
"estimator, check", [ pytest.param(estimator, check, id=f"{estimator}:{check.__name__}") for estimator in list(get_all_estimators()) + [ feature_extraction.TFIDF(), linear_model.LogisticRegression(), preprocessing.StandardScaler() | linear_model.LinearRegression(), preprocessing.StandardScaler() | linear_model.PAClassifier(), (preprocessing.StandardScaler() | multiclass.OneVsRestClassifier( linear_model.LogisticRegression())), (preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(linear_model.PAClassifier())), naive_bayes.GaussianNB(), preprocessing.StandardScaler(), cluster.KMeans(n_clusters=5, seed=42), preprocessing.MinMaxScaler(), preprocessing.MinMaxScaler() + preprocessing.StandardScaler(), feature_extraction.PolynomialExtender(), (feature_extraction.PolynomialExtender() | preprocessing.StandardScaler() | linear_model.LinearRegression()), feature_selection.VarianceThreshold(), feature_selection.SelectKBest(similarity=stats.PearsonCorr()), ] for check in utils.estimator_checks.yield_checks(estimator) if check.__name__ not in estimator._unit_test_skips() ], ) def test_check_estimator(estimator, check): check(copy.deepcopy(estimator))
# sns.relplot(x="monetary_value", y="recency", hue='cluster', # sizes=(50, 500), alpha=.3,palette=sns.color_palette('hls', 5), # height=5, data=rm_cluster5) #------------------------------------------------------------------------------------------------------------------------------------------------------------------------- #------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Incremental k-means from river import cluster from river import metrics X = df_rm_normal.iloc[0:50] incremental_kmeans = cluster.KMeans(n_clusters=5, halflife=0.4, sigma=3, seed=0) metric_ssw = metrics.cluster.SSW() metric_cohesion = metrics.cluster.Cohesion() metric_separation = metrics.cluster.Separation() metric_ssb = metrics.cluster.SSB() metric_bic = metrics.cluster.BIC() metric_silhouette = metrics.cluster.Silhouette() metric_xieBeni = metrics.cluster.XieBeni() for row in X.to_dict('records'): incremental_kmeans = incremental_kmeans.learn_one(row) prediction = incremental_kmeans.predict_one(row) metric_ssw = metric_ssw.update(row, prediction, incremental_kmeans.centers) metric_cohesion = metric_cohesion.update(row, prediction,
from river import base from river import cluster from river import compat from river import linear_model from river import preprocessing @pytest.mark.parametrize( "estimator", [ pytest.param(estimator, id=str(estimator)) for estimator in [ linear_model.LinearRegression(), linear_model.LogisticRegression(), preprocessing.StandardScaler(), cluster.KMeans(seed=42), ] ], ) @pytest.mark.filterwarnings( "ignore::sklearn.utils.estimator_checks.SkipTestWarning") def test_river_to_sklearn_check_estimator(estimator: base.Estimator): skl_estimator = compat.convert_river_to_sklearn(estimator) estimator_checks.check_estimator(skl_estimator) @pytest.mark.filterwarnings( "ignore::sklearn.utils.estimator_checks.SkipTestWarning") def test_sklearn_check_twoway(): estimator = sk_linear_model.SGDRegressor() river_estimator = compat.convert_sklearn_to_river(estimator)
import functools import random import time import pandas as pd from streamz import Stream import hvplot.streamz from streamz.river import RiverTrain from river import cluster import holoviews as hv from panel.pane.holoviews import HoloViews import panel as pn hv.extension('bokeh') model = cluster.KMeans(n_clusters=3, sigma=0.1, mu=0.5) centres = [[random.random(), random.random()] for _ in range(3)] count = [0] def gen(move_chance=0.05): centre = int(random.random() * 3) # 3x faster than random.randint(0, 2) if random.random() < move_chance: centres[centre][0] += random.random() / 5 - 0.1 centres[centre][1] += random.random() / 5 - 0.1 value = { 'x': random.random() / 20 + centres[centre][0], 'y': random.random() / 20 + centres[centre][1] } count[0] += 1 return value