Esempi in Python per KMeans, esempi in Python per river.cluster.KMeans

Esempio n. 1

0

Mostra file

 def __init__(self):
     self.incremental_kmeans = cluster.KMeans(
         n_clusters=5, halflife=0.4, sigma=3, seed=0)
     self.metric_ssw = metrics.cluster.SSW()
     self.metric_cohesion = metrics.cluster.Cohesion()
     self.metric_separation = metrics.cluster.Separation()
     self.metric_ssb = metrics.cluster.SSB()
     self.metric_bic = metrics.cluster.BIC()
     self.metric_silhouette = metrics.cluster.Silhouette()
     self.metric_xieBeni = metrics.cluster.XieBeni()

Esempio n. 2

0

Mostra file

    def __init__(self, chunk_size=10, n_clusters=2, **kwargs):

        super().__init__()
        self.time_stamp = 0
        self.n_clusters = n_clusters
        self.chunk_size = chunk_size
        self.kwargs = kwargs

        self._kmeans = cluster.KMeans(n_clusters=self.n_clusters,
                                      **self.kwargs)
        self._temp_chunk = {}
        self.centers = {}

Esempio n. 3

0

Mostra file

File: clustream.py Progetto: pplonski/river

    def predict_one(self, x):

        micro_cluster_centers = {
            i: self._get_micro_clustering_result()[i].center
            for i in range(len(self._get_micro_clustering_result()))
        }

        kmeans = cluster.KMeans(n_clusters=self.n_macro_clusters,
                                seed=self.seed,
                                **self.kwargs)
        for center in micro_cluster_centers.values():
            kmeans = kmeans.learn_one(center)

        self.centers = kmeans.centers

        index, _ = self._get_closest_micro_cluster(
            x, self._get_micro_clustering_result())
        y = kmeans.predict_one(micro_cluster_centers[index])

        return y

Esempio n. 4

0

Mostra file

    def predict_one(self, x):

        micro_cluster_centers = {
            i: result.center
            for i, result in self._get_micro_clustering_result().items()
        }

        kmeans = cluster.KMeans(n_clusters=self.n_macro_clusters,
                                seed=self.seed,
                                **self.kwargs)
        for center in micro_cluster_centers.values():
            kmeans = kmeans.learn_one(center)

        self.centers = kmeans.centers

        index, _ = self._get_closest_micro_cluster(
            x, self._get_micro_clustering_result())
        try:
            return kmeans.predict_one(micro_cluster_centers[index])
        except KeyError:
            return 0

Esempio n. 5

0

Mostra file

    def learn_one(self, x, sample_weight=None):

        self.time_stamp += 1

        index = self.time_stamp % self.chunk_size

        if index == 0:
            self._temp_chunk[self.chunk_size - 1] = x
        elif index == 1:
            self._temp_chunk = {0: x}
        else:
            self._temp_chunk[index - 1] = x

        if index == 0:
            kmeans_i = cluster.KMeans(n_clusters=self.n_clusters,
                                      **self.kwargs)
            for point_j in self._temp_chunk.values():
                kmeans_i = kmeans_i.learn_one(point_j)
            for center_j in kmeans_i.centers.values():
                self._kmeans = self._kmeans.learn_one(center_j)

        self.centers = self._kmeans.centers

        return self

Esempio n. 6

0

Mostra file

File: test_.py Progetto: renatacgcastanha/river

    "estimator, check",
    [
        pytest.param(estimator, check, id=f"{estimator}:{check.__name__}")
        for estimator in list(get_all_estimators()) + [
            feature_extraction.TFIDF(),
            linear_model.LogisticRegression(),
            preprocessing.StandardScaler() | linear_model.LinearRegression(),
            preprocessing.StandardScaler() | linear_model.PAClassifier(),
            (preprocessing.StandardScaler()
             | multiclass.OneVsRestClassifier(
                 linear_model.LogisticRegression())),
            (preprocessing.StandardScaler()
             | multiclass.OneVsRestClassifier(linear_model.PAClassifier())),
            naive_bayes.GaussianNB(),
            preprocessing.StandardScaler(),
            cluster.KMeans(n_clusters=5, seed=42),
            preprocessing.MinMaxScaler(),
            preprocessing.MinMaxScaler() + preprocessing.StandardScaler(),
            feature_extraction.PolynomialExtender(),
            (feature_extraction.PolynomialExtender()
             | preprocessing.StandardScaler()
             | linear_model.LinearRegression()),
            feature_selection.VarianceThreshold(),
            feature_selection.SelectKBest(similarity=stats.PearsonCorr()),
        ] for check in utils.estimator_checks.yield_checks(estimator)
        if check.__name__ not in estimator._unit_test_skips()
    ],
)
def test_check_estimator(estimator, check):
    check(copy.deepcopy(estimator))

Esempio n. 7

0

Mostra file

# sns.relplot(x="monetary_value", y="recency", hue='cluster',
#             sizes=(50, 500), alpha=.3,palette=sns.color_palette('hls', 5),
#             height=5, data=rm_cluster5)

#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Incremental k-means

from river import cluster
from river import metrics

X = df_rm_normal.iloc[0:50]

incremental_kmeans = cluster.KMeans(n_clusters=5,
                                    halflife=0.4,
                                    sigma=3,
                                    seed=0)
metric_ssw = metrics.cluster.SSW()
metric_cohesion = metrics.cluster.Cohesion()
metric_separation = metrics.cluster.Separation()
metric_ssb = metrics.cluster.SSB()
metric_bic = metrics.cluster.BIC()
metric_silhouette = metrics.cluster.Silhouette()
metric_xieBeni = metrics.cluster.XieBeni()

for row in X.to_dict('records'):
    incremental_kmeans = incremental_kmeans.learn_one(row)
    prediction = incremental_kmeans.predict_one(row)

    metric_ssw = metric_ssw.update(row, prediction, incremental_kmeans.centers)
    metric_cohesion = metric_cohesion.update(row, prediction,

Esempio n. 8

0

Mostra file

File: test_sklearn.py Progetto: renatacgcastanha/river

from river import base
from river import cluster
from river import compat
from river import linear_model
from river import preprocessing


@pytest.mark.parametrize(
    "estimator",
    [
        pytest.param(estimator, id=str(estimator)) for estimator in [
            linear_model.LinearRegression(),
            linear_model.LogisticRegression(),
            preprocessing.StandardScaler(),
            cluster.KMeans(seed=42),
        ]
    ],
)
@pytest.mark.filterwarnings(
    "ignore::sklearn.utils.estimator_checks.SkipTestWarning")
def test_river_to_sklearn_check_estimator(estimator: base.Estimator):
    skl_estimator = compat.convert_river_to_sklearn(estimator)
    estimator_checks.check_estimator(skl_estimator)


@pytest.mark.filterwarnings(
    "ignore::sklearn.utils.estimator_checks.SkipTestWarning")
def test_sklearn_check_twoway():
    estimator = sk_linear_model.SGDRegressor()
    river_estimator = compat.convert_sklearn_to_river(estimator)

Esempio n. 9

0

Mostra file

File: river_kmeans.py Progetto: vishalbelsare/streamz

import functools
import random
import time

import pandas as pd

from streamz import Stream
import hvplot.streamz
from streamz.river import RiverTrain
from river import cluster
import holoviews as hv
from panel.pane.holoviews import HoloViews
import panel as pn
hv.extension('bokeh')

model = cluster.KMeans(n_clusters=3, sigma=0.1, mu=0.5)
centres = [[random.random(), random.random()] for _ in range(3)]
count = [0]


def gen(move_chance=0.05):
    centre = int(random.random() * 3)  # 3x faster than random.randint(0, 2)
    if random.random() < move_chance:
        centres[centre][0] += random.random() / 5 - 0.1
        centres[centre][1] += random.random() / 5 - 0.1
    value = {
        'x': random.random() / 20 + centres[centre][0],
        'y': random.random() / 20 + centres[centre][1]
    }
    count[0] += 1
    return value