def main(): X, y = load_dataset.load_dataset("stream1") # Experiment parameters nclusters = 4 nsamples = 2000 * nclusters train_size = 800 * nclusters window_size = 100 evol_model = EvolvingClustering.EvolvingClustering(macro_cluster_update=1, variance_limit=0.001, debug=True) Benchmarks.prequential_evaluation(evol_model, X, y, adjusted_rand_score, train_size, window_size)
def main(): X, y = load_dataset.load_dataset("gaussian") # X, y = load_dataset.load_dataset("s2") X = X[:1000, :8] y = y[:1000] standardized_X = preprocessing.scale(X) minmaxscaler = preprocessing.MinMaxScaler() minmaxscaler.fit(standardized_X) X = minmaxscaler.transform(standardized_X) evol_model = EvolvingClustering.EvolvingClustering(variance_limit=0.01, debug=True) # evol_model = EvolvingClustering2.EvolvingClustering2(rad=0.04, debug=True) evol_model.fit(X[:100]) evol_model.fit(X[100:200]) y_pred = evol_model.predict(X[:3000])
from sklearn import preprocessing from evolving import EvolvingClustering from evolving.util import Metrics, load_dataset import matplotlib.pyplot as plt from time import time as time from benchmarks.denstream.DenStream import DenStream cmap = plt.cm.get_cmap('rainbow') #X, y = load_dataset.load_dataset("s2") #X, y = load_dataset.load_dataset("blobs", n_samples=1000, n_features=2) X, y = load_dataset.load_dataset("gaussian") X = X[:100, :20] standardized_X = preprocessing.scale(X) minmaxscaler = preprocessing.MinMaxScaler() minmaxscaler.fit(standardized_X) X = minmaxscaler.transform(standardized_X) # CLUSTREAM ######################################### #clustream = CluStream(q=100, m=10, radius_factor = 1.8, delta=10, k=5, init_number=100) #y_pred = clustream.fit_predict(X) #y_pred[y_pred == -1] = 5 #print("Purity: %10.4f"% (Metrics.purity(y,y_pred))) #print("Precision: %10.4f"% (Metrics.precision(y,y_pred))) #print("Recall: %10.4f"% (Metrics.recall(y,y_pred))) # CLUSTREAM #########################################
from sklearn import preprocessing from evolving import EvolvingClustering from evolving.util import Benchmarks, load_dataset import numpy as np from sklearn.metrics import adjusted_rand_score X, y = load_dataset.load_dataset("s2") standardized_X = preprocessing.scale(X) minmaxscaler = preprocessing.MinMaxScaler() minmaxscaler.fit(standardized_X) X = minmaxscaler.transform(standardized_X) y = np.array([el[0] for el in y]) evol_model = EvolvingClustering.EvolvingClustering(macro_cluster_update=1, variance_limit=0.01, debug=False) train_size = 3000 window_size = 100 Benchmarks.monte_carlo_evaluation(evol_model, adjusted_rand_score, X[0:400], y[0:400], trials=10)