def training(feat_mat): models = [] k = km.K_means(10) k_labels, k_centroids, k_inertia = k.train(feat_mat) d = db.Dbscan(2, 3) d_labels = d.train(feat_mat) models.append(k) models.append(d) print ("\nMeal Data: kmeans clustering results") #labels print(k_labels) print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(feat_mat, k_labels)) print("BUILT-IN LIB: Sum of squared distances of samples to their closest cluster center: %0.3f" % k_inertia) print("Manual SSE: %0.3f" %SSE_kmeans(feat_mat, k_labels, k_centroids)) print ("\nMeal Data: dbscan cluster results") #labels print(d_labels) print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(feat_mat, d_labels)) print("Manual SSE: %0.3f" % SSE_dbscan(feat_mat,d_labels)) return models
def main(): if len(sys.argv) > 2: if float(sys.argv[1]) <= 1 and float(sys.argv[1]) > 0: if int(sys.argv[2]) > 0: load = data_input.DataLoad('Dataset') load.data_input() clustering = dbscan.Dbscan(load.dataset, int(sys.argv[2]), float(sys.argv[1])) my_labels = clustering.DBSCAN() n_clusters_ = len(set(my_labels)) - (1 if -1 in my_labels else 0) print('Estimated number of DBSCAN clusters: %d' % n_clusters_) else: print "Please enter valid minPts value" else: print "Please pass an eps between 0 to 1." else: print "Please enter epsilon and minPts values ..."
#i =0 for line in file: temp = line.split('\t') temp = list(map(float, temp)) temp.pop() data.append(tuple.Tuple(temp)) #i+=1 #if i==5000: break file.close() ''' Dbscan ''' print("start dbscan") dbscan = db.Dbscan(data,minPts,eps) clusters = dbscan.perform() print("start drawing") i = 0 print("Liczba klastrow = " + str(len(clusters))) ''' Print Dbscan output ''' color = ['ro','go','bo', 'co', 'mo', 'yo', 'ko', 'wo','rs','gs','bs','cs', 'ms', 'ys', 'ks', 'ws','r^','g^', 'b^', 'c^', 'm^', 'y^', 'k^', 'w^', 'r*', 'g*', 'b*','c*', 'm*', 'y*', 'k*', 'w*'] x = [] y = [] plt.subplot(211) for d in data: x.append(d.values[0]) y.append(d.values[1])
random.seed(42) centers = [(random.randrange(-20,20, ), random.randrange(-20,20)) for i in range(13)] X, labels_true = make_blobs(n_samples=500, centers=centers, cluster_std=1, random_state=42) # # rng = np.random.RandomState(42) # transformation = rng.normal(size=(2, 2)) # X = np.dot(X, transformation) # X = StandardScaler().fit_transform(X) # print(X) # ############################################################################# # Compute DBSCAN d = dbscan.Dbscan(x = False) d.fit(X, 1.2, 6) db = DBSCAN(eps=1.2, min_samples=6).fit(X) core_samples_mask = np.zeros_like(d.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = d.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) n_noise_ = list(labels).count(-1) print('Estimated number of clusters: %d' % n_clusters_) print('Estimated number of noise points: %d' % n_noise_) # print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) # print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
sd = [] for a in range(num_of_atributes): temp = 0 for r in range(num_of_records): temp += (data[r][a] - means[a])**2 sd.append(math.sqrt(temp / num_of_records)) for r in range(num_of_records): for a in range(num_of_atributes): data[r][a] = (data[r][a] - means[a]) / sd[a] data_set = [] for r in range(num_of_records): data_set.append(tuple.Tuple(data[r])) print(data_set[0].values) dbscan = db.Dbscan(data_set, minPts, eps) clusters = dbscan.perform() color = [ 'ro', 'go', 'bo', 'co', 'mo', 'yo', 'ko', 'wo', 'rs', 'gs', 'bs', 'cs', 'ms', 'ys', 'ks', 'ws', 'r^', 'g^', 'b^', 'c^', 'm^', 'y^', 'k^', 'w^', 'r*', 'g*', 'b*', 'c*', 'm*', 'y*', 'k*', 'w*' ] i = 0 print("Liczba klastrow = " + str(len(clusters))) for c in clusters: x = [] y = [] for t in c: x.append(t.values[4]) y.append(t.values[5])
import dbscan from sklearn import cluster d = dbscan.Dbscan() arr1 = [(1, 2), (2, 3), (3, 4), (4, 5)] arr2 = [(1, 2), (2, 2), (1, 1), (2, 1), (3, 4), (4, 5), (4, 4), (9, 9)] d.fit(arr2, dist=3, minp=4) d_true = cluster.DBSCAN(eps=3, min_samples=4) dbtest = d_true.fit(arr2) print(d.labels_) print(dbtest.labels_) print(type(dbtest.labels_))
import numpy as np import dbscan #from pca import pca def fromFile(filename, length): with open(str(filename),'r') as f_handle: x = np.fromfile(f_handle, dtype=bool) system = np.reshape(x,(-1,length*length)) return system x = fromFile("out_wo_header_16_2000.bin",16) print x.shape minNeighbors = 30 epsilon = 1.01 data = np.array(x, dtype=float) dbscanner = dbscan.Dbscan(data, minNeighbors) dbscanner.run() clusterList=dbscanner.getClusterList() sum = 0 for cluster in clusterList: print np.array(cluster).shape[0] sum = np.array(cluster).shape[0] + sum print sum print np.array(dbscanner.getNoise()).shape