class DBscanUnitTest(object): """ DBscanUnitTest class provides unit test method for DBscan class """ test_array = [[0, 100], [0, 200], [0, 275], [100, 150], [200, 100], [250, 200], [0, 300],[675, 700], [675, 710], [675, 720]] proper_cluster = [-1, 1, 1, -1, -1, -1, 1, 2, 2, 2] dbscan = 0 def __init__(self): self.dbscan = DBscan() # start_test function start unit test for DBscan class. Return False if failed elsewhere return True def start_test(self): point_array = [] for p in self.test_array: pt = Point(p[0], p[1]) point_array.append(pt) self.dbscan.start(point_array, 100, 3) i = 0 for p in point_array: if p.get_clusterID() is not self.proper_cluster[i]: print("Clustering algorithm failed") return False i += 1 return True
def start_test(self): db = DBscan() table = [100, 1000, 3000, 5000, 10000] for i in range(0, len(table)): number_of_centers = randint(2, 5) range1 = randint(1, 10) scikit_average = 0.0 our_average = 0.0 random_generator = RandomDataGenerator(number_of_samples=table[i], number_of_centers=number_of_centers, range=range1) for _ in range(0, 3): EPS = uniform(0.09, range1/10) MIN_SAMPLES = randint(5, 10) ################################################################################# # Generate sample data ################################################################################# X, labels = random_generator.generate_data() xx = X.tolist() point_array = [] for x in xx: pt = Point(x[0], x[1]) point_array.append(pt) ################################################################################# # sklearn ################################################################################# start = timeit.default_timer() DBSCAN(eps=EPS, min_samples=MIN_SAMPLES).fit(X) end = timeit.default_timer() scikit_average += end - start start =timeit.default_timer() clusters = db.start(points=point_array, eps=EPS, minPts=MIN_SAMPLES) end = timeit.default_timer() our_average += end - start print("Number of samples: %d" % table[i]) print("Scikit average time of computing = %0.5f" % (scikit_average / 3)) print("Our version average time of computing = %0.5f" % (our_average / 3)) print("Our algorithm is %0.2f slower than algorithm from scikit" % (our_average/scikit_average)) return True
################################################################################# # Compute DBSCAN ################################################################################# ##xx = X.tolist() ##print( "\nxx= \n", xx) xx, test_tab = getTextData() #xx = StandardScaler().fit_transform(xxx).tolist() #print( xx ) point_array = [] for x in xx: pt = Point(x[0], x[1]) point_array.append(pt) # db = DBscan(point_array=parr, start_point_index= 0, cluster_map=clustmap, epsilon=EPS, min_neighbour=MIN_SAMPLES) db = DBscan() clusters = db.start(points=point_array, eps=EPS, minPts=MIN_SAMPLES) ################################################################################# # Plot result ################################################################################# plt.subplot(212) colors = plt.cm.Spectral(np.linspace(0, 1, len(clusters))) for p in point_array: if p.get_clusterID() == Point.NOISE: col = 'k' elif p.get_clusterID == Point.UNCLASSIFIED: col = 'g' else: col = colors[p.get_clusterID() - 1]