Beispiel #1
0
 def __init__(self):
     self.incremental_kmeans = cluster.KMeans(
         n_clusters=5, halflife=0.4, sigma=3, seed=0)
     self.metric_ssw = metrics.cluster.SSW()
     self.metric_cohesion = metrics.cluster.Cohesion()
     self.metric_separation = metrics.cluster.Separation()
     self.metric_ssb = metrics.cluster.SSB()
     self.metric_bic = metrics.cluster.BIC()
     self.metric_silhouette = metrics.cluster.Silhouette()
     self.metric_xieBeni = metrics.cluster.XieBeni()
Beispiel #2
0
    def __init__(self, chunk_size=10, n_clusters=2, **kwargs):

        super().__init__()
        self.time_stamp = 0
        self.n_clusters = n_clusters
        self.chunk_size = chunk_size
        self.kwargs = kwargs

        self._kmeans = cluster.KMeans(n_clusters=self.n_clusters,
                                      **self.kwargs)
        self._temp_chunk = {}
        self.centers = {}
Beispiel #3
0
    def predict_one(self, x):

        micro_cluster_centers = {
            i: self._get_micro_clustering_result()[i].center
            for i in range(len(self._get_micro_clustering_result()))
        }

        kmeans = cluster.KMeans(n_clusters=self.n_macro_clusters,
                                seed=self.seed,
                                **self.kwargs)
        for center in micro_cluster_centers.values():
            kmeans = kmeans.learn_one(center)

        self.centers = kmeans.centers

        index, _ = self._get_closest_micro_cluster(
            x, self._get_micro_clustering_result())
        y = kmeans.predict_one(micro_cluster_centers[index])

        return y
Beispiel #4
0
    def predict_one(self, x):

        micro_cluster_centers = {
            i: result.center
            for i, result in self._get_micro_clustering_result().items()
        }

        kmeans = cluster.KMeans(n_clusters=self.n_macro_clusters,
                                seed=self.seed,
                                **self.kwargs)
        for center in micro_cluster_centers.values():
            kmeans = kmeans.learn_one(center)

        self.centers = kmeans.centers

        index, _ = self._get_closest_micro_cluster(
            x, self._get_micro_clustering_result())
        try:
            return kmeans.predict_one(micro_cluster_centers[index])
        except KeyError:
            return 0
Beispiel #5
0
    def learn_one(self, x, sample_weight=None):

        self.time_stamp += 1

        index = self.time_stamp % self.chunk_size

        if index == 0:
            self._temp_chunk[self.chunk_size - 1] = x
        elif index == 1:
            self._temp_chunk = {0: x}
        else:
            self._temp_chunk[index - 1] = x

        if index == 0:
            kmeans_i = cluster.KMeans(n_clusters=self.n_clusters,
                                      **self.kwargs)
            for point_j in self._temp_chunk.values():
                kmeans_i = kmeans_i.learn_one(point_j)
            for center_j in kmeans_i.centers.values():
                self._kmeans = self._kmeans.learn_one(center_j)

        self.centers = self._kmeans.centers

        return self
Beispiel #6
0
    "estimator, check",
    [
        pytest.param(estimator, check, id=f"{estimator}:{check.__name__}")
        for estimator in list(get_all_estimators()) + [
            feature_extraction.TFIDF(),
            linear_model.LogisticRegression(),
            preprocessing.StandardScaler() | linear_model.LinearRegression(),
            preprocessing.StandardScaler() | linear_model.PAClassifier(),
            (preprocessing.StandardScaler()
             | multiclass.OneVsRestClassifier(
                 linear_model.LogisticRegression())),
            (preprocessing.StandardScaler()
             | multiclass.OneVsRestClassifier(linear_model.PAClassifier())),
            naive_bayes.GaussianNB(),
            preprocessing.StandardScaler(),
            cluster.KMeans(n_clusters=5, seed=42),
            preprocessing.MinMaxScaler(),
            preprocessing.MinMaxScaler() + preprocessing.StandardScaler(),
            feature_extraction.PolynomialExtender(),
            (feature_extraction.PolynomialExtender()
             | preprocessing.StandardScaler()
             | linear_model.LinearRegression()),
            feature_selection.VarianceThreshold(),
            feature_selection.SelectKBest(similarity=stats.PearsonCorr()),
        ] for check in utils.estimator_checks.yield_checks(estimator)
        if check.__name__ not in estimator._unit_test_skips()
    ],
)
def test_check_estimator(estimator, check):
    check(copy.deepcopy(estimator))
Beispiel #7
0
# sns.relplot(x="monetary_value", y="recency", hue='cluster',
#             sizes=(50, 500), alpha=.3,palette=sns.color_palette('hls', 5),
#             height=5, data=rm_cluster5)

#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Incremental k-means

from river import cluster
from river import metrics

X = df_rm_normal.iloc[0:50]

incremental_kmeans = cluster.KMeans(n_clusters=5,
                                    halflife=0.4,
                                    sigma=3,
                                    seed=0)
metric_ssw = metrics.cluster.SSW()
metric_cohesion = metrics.cluster.Cohesion()
metric_separation = metrics.cluster.Separation()
metric_ssb = metrics.cluster.SSB()
metric_bic = metrics.cluster.BIC()
metric_silhouette = metrics.cluster.Silhouette()
metric_xieBeni = metrics.cluster.XieBeni()

for row in X.to_dict('records'):
    incremental_kmeans = incremental_kmeans.learn_one(row)
    prediction = incremental_kmeans.predict_one(row)

    metric_ssw = metric_ssw.update(row, prediction, incremental_kmeans.centers)
    metric_cohesion = metric_cohesion.update(row, prediction,
Beispiel #8
0
from river import base
from river import cluster
from river import compat
from river import linear_model
from river import preprocessing


@pytest.mark.parametrize(
    "estimator",
    [
        pytest.param(estimator, id=str(estimator)) for estimator in [
            linear_model.LinearRegression(),
            linear_model.LogisticRegression(),
            preprocessing.StandardScaler(),
            cluster.KMeans(seed=42),
        ]
    ],
)
@pytest.mark.filterwarnings(
    "ignore::sklearn.utils.estimator_checks.SkipTestWarning")
def test_river_to_sklearn_check_estimator(estimator: base.Estimator):
    skl_estimator = compat.convert_river_to_sklearn(estimator)
    estimator_checks.check_estimator(skl_estimator)


@pytest.mark.filterwarnings(
    "ignore::sklearn.utils.estimator_checks.SkipTestWarning")
def test_sklearn_check_twoway():
    estimator = sk_linear_model.SGDRegressor()
    river_estimator = compat.convert_sklearn_to_river(estimator)
Beispiel #9
0
import functools
import random
import time

import pandas as pd

from streamz import Stream
import hvplot.streamz
from streamz.river import RiverTrain
from river import cluster
import holoviews as hv
from panel.pane.holoviews import HoloViews
import panel as pn
hv.extension('bokeh')

model = cluster.KMeans(n_clusters=3, sigma=0.1, mu=0.5)
centres = [[random.random(), random.random()] for _ in range(3)]
count = [0]


def gen(move_chance=0.05):
    centre = int(random.random() * 3)  # 3x faster than random.randint(0, 2)
    if random.random() < move_chance:
        centres[centre][0] += random.random() / 5 - 0.1
        centres[centre][1] += random.random() / 5 - 0.1
    value = {
        'x': random.random() / 20 + centres[centre][0],
        'y': random.random() / 20 + centres[centre][1]
    }
    count[0] += 1
    return value