def main():
    X, y = load_dataset.load_dataset("stream1")

    # Experiment parameters
    nclusters = 4
    nsamples = 2000 * nclusters
    train_size = 800 * nclusters
    window_size = 100

    evol_model = EvolvingClustering.EvolvingClustering(macro_cluster_update=1,
                                                       variance_limit=0.001,
                                                       debug=True)
    Benchmarks.prequential_evaluation(evol_model, X, y, adjusted_rand_score,
                                      train_size, window_size)
예제 #2
0
def main():
    X, y = load_dataset.load_dataset("gaussian")
    #    X, y = load_dataset.load_dataset("s2")

    X = X[:1000, :8]
    y = y[:1000]

    standardized_X = preprocessing.scale(X)
    minmaxscaler = preprocessing.MinMaxScaler()
    minmaxscaler.fit(standardized_X)
    X = minmaxscaler.transform(standardized_X)

    evol_model = EvolvingClustering.EvolvingClustering(variance_limit=0.01,
                                                       debug=True)
    #    evol_model = EvolvingClustering2.EvolvingClustering2(rad=0.04, debug=True)
    evol_model.fit(X[:100])
    evol_model.fit(X[100:200])
    y_pred = evol_model.predict(X[:3000])
from sklearn import preprocessing
from evolving import EvolvingClustering
from evolving.util import Metrics, load_dataset
import matplotlib.pyplot as plt
from time import time as time
from benchmarks.denstream.DenStream import DenStream

cmap = plt.cm.get_cmap('rainbow')

#X, y = load_dataset.load_dataset("s2")
#X, y = load_dataset.load_dataset("blobs", n_samples=1000, n_features=2)
X, y = load_dataset.load_dataset("gaussian")

X = X[:100, :20]
standardized_X = preprocessing.scale(X)
minmaxscaler = preprocessing.MinMaxScaler()
minmaxscaler.fit(standardized_X)
X = minmaxscaler.transform(standardized_X)

# CLUSTREAM #########################################

#clustream = CluStream(q=100, m=10, radius_factor = 1.8, delta=10, k=5, init_number=100)
#y_pred = clustream.fit_predict(X)
#y_pred[y_pred == -1] = 5

#print("Purity: %10.4f"% (Metrics.purity(y,y_pred)))
#print("Precision: %10.4f"% (Metrics.precision(y,y_pred)))
#print("Recall: %10.4f"% (Metrics.recall(y,y_pred)))

# CLUSTREAM #########################################
from sklearn import preprocessing
from evolving import EvolvingClustering
from evolving.util import Benchmarks, load_dataset
import numpy as np
from sklearn.metrics import adjusted_rand_score

X, y = load_dataset.load_dataset("s2")
standardized_X = preprocessing.scale(X)
minmaxscaler = preprocessing.MinMaxScaler()
minmaxscaler.fit(standardized_X)
X = minmaxscaler.transform(standardized_X)
y = np.array([el[0] for el in y])

evol_model = EvolvingClustering.EvolvingClustering(macro_cluster_update=1,
                                                   variance_limit=0.01,
                                                   debug=False)

train_size = 3000
window_size = 100
Benchmarks.monte_carlo_evaluation(evol_model,
                                  adjusted_rand_score,
                                  X[0:400],
                                  y[0:400],
                                  trials=10)