def scan_points(points): scans = {} for pos in points: X = points[pos] (eps, min_pts) = get_params(pos) scan = DBScan(eps, min_pts) scan.fit(X) scans[pos] = scan return scans
def dbscan_estimate(self, pf): eps = 0.5 minposes = 4 # Run DBSCAN db = DBScan(pf.particlecloud, eps, minposes) db.run() # Get the estimate guess = db.getguess() self.largestClusterSize = db.largestClusterSize return guess
def main(): data_iris = genfromtxt('iris-data.csv', delimiter=',') data_iris_values = np.delete(data_iris, np.s_[4], 1) data_iris = genfromtxt('iris-data.csv', delimiter=',', dtype=int) data_iris_labels = np.delete(data_iris, np.s_[0:4], 1) data_iris_labels = np.concatenate(data_iris_labels) labels = DBScan(data_iris, 1, 15) print(labels) score = 0 maxscore = 0 for i in range(0, len(data_iris_labels)): if (data_iris_labels[i] == labels[i]): score += 1 maxscore += 1 accuracy = score / maxscore print('Accuracy score (DBScan): %.2f' % accuracy)
plt.figure() sum_squared_errors = [] for n_clusters in range(2, 10): kmeans = KMeans(n_clusters=n_clusters, max_iter=100) kmeans.fit(iris_data, normalize=True) sse = kmeans.sum_squared_error() sum_squared_errors.append(sse) plt.plot(sum_squared_errors) plt.xlabel('# of clusters') plt.ylabel('SSE') plt.show() # TODO 7: DBSCAN nad Iris podacima, prikazati rezultate na grafiku isto kao kod K-means dbscan = DBScan(epsilon=0.5, min_points=3) dbscan.fit(iris_data) colors = {0: 'red', 1: 'green', 2: 'blue'} plt.figure() for idx, cluster in enumerate(dbscan.clusters): for datum in cluster.data: # iscrtavanje tacaka plt.scatter(datum[0], datum[1], c=colors[idx]) plt.xlabel('Sepal width') plt.ylabel('Petal length') plt.show()
def test_dbscan(db, radius=0.3, min_pts=50): dbscan = DBScan(db, radius, min_pts) clusters = dbscan.cluster() plot_clusters(clusters) print('Found %d clusters' % len(clusters))
data.append((x1, y1)) data.append((x2, y2)) plt.show() # TODO 5: K-means nad ovim podacima kmeans = KMeans(n_clusters=2, max_iter=100) kmeans.fit(data) colors = {0: 'red', 1: 'green'} plt.figure() for idx, cluster in enumerate(kmeans.clusters): plt.scatter(cluster.center[0], cluster.center[1], c=colors[idx], marker='x', s=200) # iscrtavanje centara for datum in cluster.data: # iscrtavanje tacaka plt.scatter(datum[0], datum[1], c=colors[idx]) plt.show() # TODO 7: DBSCAN nad ovim podacima dbscan = DBScan(epsilon=1.2, min_points=3) dbscan.fit(data) colors = {0: 'red', 1: 'pink', 2: 'yellow', 3: 'cyan', 4: 'green', 5: 'blue'} plt.figure() for idx, cluster in enumerate(dbscan.clusters): for datum in cluster.data: # iscrtavanje tacaka plt.scatter(datum[0], datum[1], c=colors[idx%6]) plt.show()
class Main: exp_execute = 4 ############################################ # Experiment DBScan 1 # Verify distances with and without scale ############################################ if exp_execute == 1: for d, scale in zip([0, 0, 1, 1], [False, True, False, True]): datasets = read_datasets() if scale: datasets = scale_datasets(datasets) datasets = split_data(datasets) dist = 1.0 min_neighb = 3 model = DBScan(distance=dist, min_neighbors=min_neighb) dataset = datasets[d] res = model.fit(dataset['train'].to_numpy()) res = model.get_description_dist() print(res) ############################################ # Experiment DBScan 2 # Execute model for Dataset 1 ############################################ if exp_execute == 2: datasets = read_datasets() datasets = scale_datasets(datasets) datasets = split_data(datasets) for dist in [0.05, 0.1, 0.15]: df_merge = datasets[0]['train'].copy() df_merge['Type'] = -2 df_merge['Cluster'] = -2 df_merge['Experiment'] = 'remove' for min_neighb in [3, 5, 8]: model = DBScan(distance=dist, min_neighbors=min_neighb) dataset = datasets[0] res = model.fit(dataset['train'].to_numpy()) pred = model.predict(dataset['train'].to_numpy(), res, dataset['test'].to_numpy()) # plot(dataset['train'].copy(), res, file_name=f'dbscan_vardist_{dist}_{min_neighb}.png') write_results(datasets, f'dbscan_vardist_{dist}_{min_neighb}_datasets') write_results(res, f'dbscan_vardist_{dist}_{min_neighb}_res') write_results(pred, f'dbscan_vardist_{dist}_{min_neighb}_pred') df_merge = pd.concat([ df_merge, merge_result(dataset['train'].copy(), res, f'Distance: {dist} Neighbor: {min_neighb}') ]) plot_all(df_merge, file_name=f'all_dbscan_{dist}.png') ############################################ # Experiment DBScan 3 # Execute model for Dataset 1 ############################################ if exp_execute == 3: datasets = read_datasets() datasets = scale_datasets(datasets) datasets = split_data(datasets) dist = 0.1 min_neighb = 3 model = DBScan(distance=dist, min_neighbors=min_neighb) dataset = datasets[0] res = model.fit(dataset['train'].to_numpy()) pred = model.predict(dataset['train'].to_numpy(), res, dataset['test'].to_numpy()) plot_pred(dataset['test'].copy(), pred, file_name=f'dbscan_pred_{dist}_{min_neighb}.png') ############################################ # Experiment DBScan 4 # Execute model for Dataset 2 ############################################ if exp_execute == 4: datasets = read_datasets() datasets = scale_datasets(datasets) datasets = split_data(datasets) for dist in [0.05, 0.1, 0.15]: df_merge = datasets[1]['train'].copy() df_merge['Type'] = -2 df_merge['Cluster'] = -2 df_merge['Experiment'] = 'remove' for min_neighb in [3, 5, 8]: model = DBScan(distance=dist, min_neighbors=min_neighb) dataset = datasets[1] res = model.fit(dataset['train'].to_numpy()) pred = model.predict(dataset['train'].to_numpy(), res, dataset['test'].to_numpy()) # plot(dataset['train'].copy(), res, file_name=f'dbscan_vardist_{dist}_{min_neighb}.png') pca = PCA(n_components=2, random_state=42) df_pca = pca.fit_transform(['train'].copy()) write_results( datasets, f'dbscan_vardist_{dist}_{min_neighb}_datasets_2') write_results(res, f'dbscan_vardist_{dist}_{min_neighb}_res_2') write_results(pred, f'dbscan_vardist_{dist}_{min_neighb}_pred_2') df_merge = pd.concat([ df_merge, merge_result(df_pca, res, f'Distance: {dist} Neighbor: {min_neighb}') ]) plot_all(df_merge, file_name=f'all_dbscan_{dist}_2.png')