Example #1
0
class DBscanUnitTest(object):
    """
        DBscanUnitTest class provides unit test method for DBscan class
    """

    test_array = [[0, 100],  [0, 200],  [0, 275],  [100, 150],  [200, 100],  [250, 200],  [0, 300],[675, 700],  [675, 710],  [675, 720]]
    proper_cluster = [-1, 1, 1, -1, -1, -1, 1, 2, 2, 2]
    dbscan = 0

    def __init__(self):
        self.dbscan = DBscan()

    # start_test function start unit test for DBscan class. Return False if failed elsewhere return True
    def start_test(self):

        point_array = []
        for p in self.test_array:
            pt = Point(p[0], p[1])
            point_array.append(pt)

        self.dbscan.start(point_array, 100, 3)

        i = 0
        for p in point_array:
            if p.get_clusterID() is not self.proper_cluster[i]:
                print("Clustering algorithm failed")
                return False
            i += 1

        return True
Example #2
0
    def start_test(self):

        db = DBscan()
        table = [100, 1000, 3000, 5000, 10000]
        for i in range(0, len(table)):
            number_of_centers = randint(2, 5)
            range1 = randint(1, 10)
            scikit_average = 0.0
            our_average = 0.0
            random_generator = RandomDataGenerator(number_of_samples=table[i],
                                                   number_of_centers=number_of_centers, range=range1)

            for _ in range(0, 3):
                EPS = uniform(0.09, range1/10)
                MIN_SAMPLES = randint(5, 10)

                #################################################################################
                # Generate sample data
                #################################################################################
                X, labels = random_generator.generate_data()

                xx = X.tolist()
                point_array = []
                for x in xx:
                    pt = Point(x[0], x[1])
                    point_array.append(pt)

                #################################################################################
                # sklearn
                #################################################################################

                start = timeit.default_timer()
                DBSCAN(eps=EPS, min_samples=MIN_SAMPLES).fit(X)
                end = timeit.default_timer()
                scikit_average += end - start

                start =timeit.default_timer()
                clusters = db.start(points=point_array, eps=EPS, minPts=MIN_SAMPLES)
                end = timeit.default_timer()
                our_average += end - start


            print("Number of samples: %d" % table[i])
            print("Scikit average time of computing = %0.5f" % (scikit_average / 3))
            print("Our version average time of computing = %0.5f" % (our_average / 3))
            print("Our algorithm is %0.2f slower than algorithm from scikit" % (our_average/scikit_average))

        return True
Example #3
0
 def __init__(self):
     self.dbscan = DBscan()
Example #4
0
import matplotlib.pyplot as plt
#################################################################################
# Compute DBSCAN
#################################################################################
##xx = X.tolist()
##print( "\nxx= \n", xx)
xx, test_tab = getTextData()
#xx = StandardScaler().fit_transform(xxx).tolist()
#print( xx )
point_array = []

for x in xx:
    pt = Point(x[0], x[1])
    point_array.append(pt)
# db = DBscan(point_array=parr, start_point_index= 0, cluster_map=clustmap, epsilon=EPS, min_neighbour=MIN_SAMPLES)
db = DBscan()
clusters = db.start(points=point_array, eps=EPS, minPts=MIN_SAMPLES)

#################################################################################
# Plot result
#################################################################################
plt.subplot(212)
colors = plt.cm.Spectral(np.linspace(0, 1, len(clusters)))

for p in point_array:
    if p.get_clusterID() == Point.NOISE:
        col = 'k'
    elif p.get_clusterID == Point.UNCLASSIFIED:
        col = 'g'
    else:
        col = colors[p.get_clusterID() - 1]