def test_2d_1cluster(self): #create simple data: a = range(0, 11) print a d = [] for i in a: for j in a: d.append([i*1.0, j*1.0]) dbscanner = Dbscan(np.array(d), 3, 2.0) dbscanner.run() plotting.plotting(dbscanner.getClusterList(),dbscanner.getNoise())
def test_plotting(self): #some dummy data number, x_coordinate, y_coordinate = loadtxt('testdata/eps3-minpts5-cluster5-noise20.dat', unpack = True) D=[None]*len(x_coordinate) for ii in range(len(x_coordinate)): D[ii]=[x_coordinate[ii],y_coordinate[ii]] #put in the data we want to use minNeighbors = 5 #epsilon = 2 data = array(D) # use dbscan dbscanner = Dbscan(data, minNeighbors) result = dbscanner.run() # use plotting plotting.plotting(result)
def test_plotting(self): #some dummy data number, x_coordinate, y_coordinate = loadtxt('testdata/eps3-minpts5-cluster5-noise20.dat', unpack = True) D=[None]*len(x_coordinate) for ii in range(len(x_coordinate)): D[ii]=[x_coordinate[ii],y_coordinate[ii]] #put in the data we want to use minNeighbors = 5 epsilon = 3. data = np.array(D, dtype=np.float64) # use dbscan dbscanner = Dbscan(data, minNeighbors, epsilon) dbscanner.run() # use plotting plotting.plotting(dbscanner.getClusterList(),dbscanner.getNoise())
def test_dbscan2(self): testdata2='testdata/eps0p01-minpts1-cluster0-noise100.dat' number, x_coordinate, y_coordinate = np.loadtxt(testdata2, unpack = True) D=[None]*len(x_coordinate) for ii in range(len(x_coordinate)): D[ii]=[x_coordinate[ii],y_coordinate[ii]] D=np.array(D) epsilon=0.01 minNeighbors=1 dbscanner = Dbscan(D, minNeighbors,epsilon) test1 = dbscanner.run() datennoise=test1[len(test1)-1] datennoise=np.array(datennoise) b=[] cluster1=test1[:len(test1)-1] assert cluster1[0]==b assert datennoise.all()==D.all()
def test_dbscan2(self): testdata2='testdata/eps0p01-minpts1-cluster0-noise100.dat' number, x_coordinate, y_coordinate = np.loadtxt(testdata2, unpack = True) D=[None]*len(x_coordinate) for ii in range(len(x_coordinate)): D[ii]=[x_coordinate[ii],y_coordinate[ii]] D=np.array(D) epsilon=0.01 minNeighbors=1 dbscanner = Dbscan(D, minNeighbors, epsilon) dbscanner.run() datennoise=dbscanner.getNoise() datennoise=np.array(datennoise) b=[] cluster1=dbscanner.getClusterList() np.testing.assert_array_equal(cluster1,b) np.testing.assert_array_equal(datennoise,D)
def test_dbscan1(self): testdata='testdata/eps2-minpts3-cluster1-noise0.dat' number, x_coordinate, y_coordinate = np.loadtxt(testdata, unpack = True) D=[None]*len(x_coordinate) for ii in range(len(x_coordinate)): D[ii]=[x_coordinate[ii],y_coordinate[ii]] D=np.array(D) epsilon=4.0 minNeighbors=3 dbscanner = Dbscan(D, minNeighbors, epsilon) dbscanner.run() datennoise=dbscanner.getNoise() Dresultall=[] for daten in dbscanner.getClusterList(): Dresult=[] for item in daten: Dresult.append(item) Dresultall.append(Dresult) # #assert D==a Dr=np.array(Dresultall) a=np.array([]) np.testing.assert_array_equal(a,datennoise)
def test_dbscan1(self): testdata='testdata/eps2-minpts3-cluster1-noise0.dat' number, x_coordinate, y_coordinate = np.loadtxt(testdata, unpack = True) D=[None]*len(x_coordinate) for ii in range(len(x_coordinate)): D[ii]=[x_coordinate[ii],y_coordinate[ii]] D=np.array(D) epsilon=2 minNeighbors=3 dbscanner = Dbscan(D,minNeighbors,epsilon,) test1 = dbscanner.run() datennoise=test1[len(test1)-1] Dresultall=[] for daten in test1[:len(test1)-1]: Dresult=[] for item in daten: Dresult.append(item) Dresultall.append(Dresult) # #assert D==a Dr=np.array(Dresultall) a=[] assert datennoise==a assert D.all()==Dr.all()
def test_2d_1cluster(self): #create simple data: a = range(0, 11) print a d = [] for i in a: for j in a: d.append([i * 1.0, j * 1.0]) dbscanner = Dbscan(np.array(d), 3, 2.0) dbscanner.run() plotting.plotting(dbscanner.getClusterList(), dbscanner.getNoise())
def test_dbscan(self): exact_labels = [] label_1 = "Iris-setosa" label_2 = "Iris-versicolor" label_3 = "Iris-virginica" for item in self.data["label"]: if item == label_1: exact_labels.append(2) elif item == label_2: exact_labels.append(3) else: exact_labels.append(1) epsilon = 2 min_pts = 2 dbscan = Dbscan(epsilon, min_pts) X_train, X_test, y_train, y_test = train_test_split(self.features, self.exact_labels, test_size=0.33, random_state=42) dbscan.load_data(X_train.to_numpy().tolist()) dbscan.train() labels = dbscan.predict(X_test.to_numpy().tolist()) accurate_sum = 0 for i in range(len(labels)): if labels[i] == y_test[i]: accurate_sum += 1 print("Akurasi DBScan: ", accurate_sum / len(labels)) clustering_labels = DBSCAN(eps=epsilon, min_samples=min_pts).fit_predict(X_train) clustering_labels = [c + 3 for c in clustering_labels] sklearn_accurate_sum = 0 for i in range(len(labels)): if clustering_labels[i] == y_test[i]: sklearn_accurate_sum += 1 print("Akurasi DBScan sklearn: ", sklearn_accurate_sum / len(labels))
def test_plotting(self): #some dummy data number, x_coordinate, y_coordinate = loadtxt( 'testdata/eps3-minpts5-cluster5-noise20.dat', unpack=True) D = [None] * len(x_coordinate) for ii in range(len(x_coordinate)): D[ii] = [x_coordinate[ii], y_coordinate[ii]] #put in the data we want to use minNeighbors = 5 epsilon = 3. data = np.array(D, dtype=np.float64) # use dbscan dbscanner = Dbscan(data, minNeighbors, epsilon) dbscanner.run() # use plotting plotting.plotting(dbscanner.getClusterList(), dbscanner.getNoise())
def test_dbscan2(self): testdata2 = 'testdata/eps0p01-minpts1-cluster0-noise100.dat' number, x_coordinate, y_coordinate = np.loadtxt(testdata2, unpack=True) D = [None] * len(x_coordinate) for ii in range(len(x_coordinate)): D[ii] = [x_coordinate[ii], y_coordinate[ii]] D = np.array(D) epsilon = 0.01 minNeighbors = 1 dbscanner = Dbscan(D, minNeighbors, epsilon) dbscanner.run() datennoise = dbscanner.getNoise() datennoise = np.array(datennoise) b = [] cluster1 = dbscanner.getClusterList() np.testing.assert_array_equal(cluster1, b) np.testing.assert_array_equal(datennoise, D)
def test_dbscan1(self): testdata = 'testdata/eps2-minpts3-cluster1-noise0.dat' number, x_coordinate, y_coordinate = np.loadtxt(testdata, unpack=True) D = [None] * len(x_coordinate) for ii in range(len(x_coordinate)): D[ii] = [x_coordinate[ii], y_coordinate[ii]] D = np.array(D) epsilon = 4.0 minNeighbors = 3 dbscanner = Dbscan(D, minNeighbors, epsilon) dbscanner.run() datennoise = dbscanner.getNoise() Dresultall = [] for daten in dbscanner.getClusterList(): Dresult = [] for item in daten: Dresult.append(item) Dresultall.append(Dresult) # #assert D==a Dr = np.array(Dresultall) a = np.array([]) np.testing.assert_array_equal(a, datennoise)
from sklearn import metrics from sklearn.datasets import make_moons from dbscan import Dbscan import numpy as np import matplotlib.pyplot as plt X, labels_true = make_moons(n_samples=200, noise=0.1, random_state=19) Y = np.array_split(X, 2) db = Dbscan(eps=0.25, min_samples=12).partial_fit(Y[0]).partial_fit(Y[1]) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) n_noise_ = list(labels).count(-1) print('Estimated number of clusters: %d' % n_clusters_) print('Estimated number of noise points: %d' % n_noise_) print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels)) print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels)) print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels)) unique_labels = set(labels) colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
from sklearn import metrics from sklearn.datasets import make_moons from dbscan import Dbscan import numpy as np import matplotlib.pyplot as plt X, labels_true = make_moons(n_samples=200, noise=0.1, random_state=19) db = Dbscan(eps=0.25, min_samples=12).fit(X) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) n_noise_ = list(labels).count(-1) print('Estimated number of clusters: %d' % n_clusters_) print('Estimated number of noise points: %d' % n_noise_) print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels)) print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels)) print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels)) unique_labels = set(labels) colors = [ plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels)) ]