def test_fit_with_small_eps(self): expected_core_sample_indices = np.array([]) expected_components = np.array([]) expected_labels = np.array([-1, -1, -1, -1, -1, -1, -1]) data = self.get_two_clusters() dbscan = DBSCAN(eps=0.1, min_samples=3) dbscan.fit(data) np.testing.assert_equal(expected_core_sample_indices, dbscan.core_sample_indices_) np.testing.assert_equal(expected_components, dbscan.components_) np.testing.assert_equal(expected_labels, dbscan.labels_)
class DbscanDialog(QDialog): def __init__(self, data): super().__init__() self.setWindowTitle("DBSCAN özellikleri") self.data = data self.model = None self.run = False self._configure() def _configure(self): mainLayout = QVBoxLayout() hor1 = QHBoxLayout() epsilonLabel = QLabel("Epsilon: ") self.epsilonLineEdit = QLineEdit("0.3") hor1.addWidget(epsilonLabel) hor1.addWidget(self.epsilonLineEdit) hor2 = QHBoxLayout() minPointsLabel = QLabel("Min Points: ") self.minPointsLineEdit = QLineEdit("10") hor2.addWidget(minPointsLabel) hor2.addWidget(self.minPointsLineEdit) trainButton = QPushButton("Modeli çalıştır") trainButton.clicked.connect(self._run_model) mainLayout.addLayout(hor1) mainLayout.addLayout(hor2) mainLayout.addWidget(trainButton) self.setLayout(mainLayout) def _run_model(self): eps = float(self.epsilonLineEdit.text()) minPoints = int(self.minPointsLineEdit.text()) self.model = DBSCAN(epsilon=eps, minPoints=minPoints) self.model.fit(self.data) self.run = True self.accept() self.close()
def main(): datasets = get_datasets() min_points = 5 eps = [20, 17, 11, 4] for i, dataset in enumerate(datasets): # Plot kdist plot to determine EPS param kdist_data = get_kdist_data(dataset, min_points) plot_data(kdist_data) # Get dbscan object dbscan = DBSCAN(min_points, eps[i]) labels = dbscan.fit(dataset) print_labels(labels) plot_labeled_data(dataset, labels)
agglo_accuracy_average_group += accuracy print ('Agglomerative - ' + str(linkage_type)) print ('Accuracy\t', accuracy) print ('Format {Real class : cluster}') print ('Dict\t\t', str(dict)) print () # DBSCAN for i in range (0, len(epss)) : eps = epss[i] min_pts = min_ptss[i] dbscan = DBSCAN(eps, min_pts) sk_dbscan = sklearn_DBSCAN(eps=eps, min_samples=min_pts) dbscan.fit(X_train) result = dbscan.predict(X_test) accuracy, dict = clustering_accuracy_score(np.asarray(y_test), np.asarray(result)) dbscan_accuracy += accuracy print ('DBSCAN') print ('Epsilon : {} Min Points : {}'.format(eps, min_pts)) print ('Accuracy\t', accuracy) print ('Format {Real class : cluster}') print ('Dict\t\t', str(dict)) print () k += 1 print ('=== RESULT TRAIN TEST SPLIT ===') print ('KMeans\t\t\t\t', kmeans_accuracy / k) print ('Agglomerative Single\t\t', agglo_accuracy_single / k)
def main(): num_clusters = 4 clusters = generate_data(num_clusters, seed=1) dbscan = DBSCAN(eps=7, min_samples=5) dbscan.fit(clusters) plot_clusters(clusters, dbscan.labels_, dbscan.components_)
if __name__ == '__main__': X1 = create_artificial_gaussiandata(np.array([1, 2]), np.array([[2, 1], [1, 2]]), 20) X2 = create_artificial_gaussiandata(np.array([10, 8]), np.array([[2, 1], [1, 2]]), 20) X = np.concatenate([X1, X2], 0) # 2つのndarrayを結合 # データの可視化 plotter = PlotUtility() plotter.scatter_plot(X1[:, 0], X1[:, 1], [1 for _ in range(len(X1))], size=5) plotter.scatter_plot(X2[:, 0], X2[:, 1], [2 for _ in range(len(X2))], size=5) plotter.show() # クラスタリング dbscan = DBSCAN(2, 3) dist_matrix = make_distance_matrix(X) cluster = dbscan.fit(dist_matrix) print(cluster) # 可視化 plotter = PlotUtility() for i in range(int(min(cluster)), int(max(cluster)) + 1): c = devide(X, cluster, i) plotter.scatter_plot(c[:, 0], c[:, 1], [i for _ in range(len(c))], size=5) plotter.show()
from dbscan import DBSCAN from sklearn.datasets import make_moons x, _ = make_moons(n_samples=300, noise=0.1) radius = 0.2 min_points = 10 print('Radius = ' + str(radius) + ', Minpoints = ' + str(min_points)) model = DBSCAN(x, radius, min_points) #Fitting model to dataset point_labels, clusters = model.fit() print('Number of clusters: ' + str(clusters - 1)) #Plotting result model.plot_result(x, point_labels, clusters)
def dbscan_visualization_test(data, eps=0.3, minPoints=10): test = DBSCAN(eps, minPoints) test.fit(data) plot_automation(test)
from data_loader import load_file from k_means import Kmeans from dbscan import DBSCAN from random import shuffle from utils import calculate_accuracy from sklearn.cluster import KMeans from utils import euclidean_distance import pry raw_data = load_file('iris.data') classes = set([x[-1] for x in raw_data]) class_dict = {} test_data = {} train_data = [] for kelas in classes: class_dict[kelas] = list(filter(lambda x: x[-1] == kelas, raw_data)) shuffle(class_dict[kelas]) test_data[kelas] = [x[:-1] for x in class_dict[kelas][:10]] train_data += [x[:-1] for x in class_dict[kelas][10:]] db_scan = DBSCAN(1, 0.5) pry() db_scan.fit(train_data[:10]) db_scan.clusters
def fit(self): data = self.get_two_clusters() dbscan = DBSCAN(eps=self.eps, min_samples=self.min_samples) dbscan.fit(data) return dbscan