def startdb(self):
        scan = ddbscan.DDBSCAN(self.eps, self.minpts)

        # Add points to model
        data = self.traindata
        data = [[float(y) for y in x] for x in data]
        #data = [[1,  100], [2,  2], [1,  3], [2, 3], [3, 3], [8, 9],[7,  6], [9,  7], [6, 9], [6, 8], [5, 5], [7, 8],[10,10],[0.002,-0.4]]
        for point in data:
            print point
            scan.add_point(point=point, count=2, desc="")

        # Compute clusters
        scan.compute()

        print 'Clusters found and its members points index:'
        cluster_number = 0
        for cluster in scan.clusters:
            print '=== Cluster %d ===' % cluster_number
            print 'Cluster points index: %s' % list(cluster)
            cluster_number += 1

        print '\nCluster assigned to each point:'
        for i in xrange(len(scan.points)):
            print '=== Point: %s ===' % scan.points[i]
            print 'Cluster: %2d' % scan.points_data[i].cluster,
            # If a point cluster is -1, it's an anomaly
            if scan.points_data[i].cluster == -1:
                print '\t <== Anomaly found!'
            else:
                print

        # Print ASCII plot
        print
        print ' ===== Data plot ====='
        print
        print 'Legend => x: anomaly'

        symbols = 'x#=~+-opwbcnkjhf@uyt()987654321asdfghjklpoiuytrewqzxcvbnm0)(*&^'  # Symbols used to plot points
        cluster_number = 0
        for cluster in scan.clusters:
            print '          %s: cluster %d' % (symbols[cluster_number + 1],
                                                cluster_number)
            cluster_number += 1
        print

        print '-----------------------'
        for x in range(0, 10):
            print '|',
            for y in range(0, 10):
                if [x, y] in data:
                    index = data.index([x, y])
                    cluster = scan.points_data[index].cluster + 1
                    #print cluster
                    print symbols[cluster],
                else:
                    print ' ',
            print '|'
        print '-----------------------'

        print "started"
예제 #2
0
def test_add_existing_point():
    scan = ddbscan.DDBSCAN(5, 10)

    # Add one point to ddbscan
    scan.add_point([2, 3], 5, "First point.")

    # Test list size
    assert_equals(len(scan.points), 1)
    assert_equals(len(scan.points_data), 1)

    # Test first point data
    assert_equals(scan.points[0], [2,3])
    assert_equals(scan.points_data[0].desc, "First point.")
    assert_equals(scan.points_data[0].neighbourhood, [0])
    assert_equals(scan.points_data[0].count, 5)
    assert_equals(scan.points_data[0].size_neighbourhood, 5)

    # Add same point to ddbscan, but with count = 8
    scan.add_point([2, 3], 8, "Changed point.")

    # List size must be unchanged
    assert_equals(len(scan.points), 1)
    assert_equals(len(scan.points_data), 1)

    # Must've change desc, count and size_neighbourhoodnos
    assert_equals(scan.points[0], [2,3])
    assert_equals(scan.points_data[0].desc, "Changed point.")
    assert_equals(scan.points_data[0].neighbourhood, [0])
    assert_equals(scan.points_data[0].count, 5 + 8)
    assert_equals(scan.points_data[0].size_neighbourhood, 5 + 8)
예제 #3
0
def test_set_params_simple():
    # Test parameters setting
    scan = ddbscan.DDBSCAN(5, 10)
    assert_equals(scan.eps, 5)
    assert_equals(scan.min_pts, 10)

    scan.set_params(2, 15)
    assert_equals(scan.eps, 2)
    assert_equals(scan.min_pts, 15)
예제 #4
0
def test_add_compute_increment_exception():
    scan = ddbscan.DDBSCAN(5, 10)

    # Add one point to ddbscan
    scan.add_point([2, 3], 5, "First point.", compute_increment=True)

    # Try to increment passing a new count less than the actual count
    # This should raise a ValueError
    scan.add_point([2, 3], 4, "First point incremented.", compute_increment=True)
예제 #5
0
def test_add_n_dimensional_point():
    scan = ddbscan.DDBSCAN(5, 10)

    # Add 3-D point to ddbscan
    scan.add_point(point=[2,3,5], count=2, desc="First point.")

    # Test points matrix dimension
    assert_equals(len(scan.points), 1)
    assert_equals(len(scan.points[0]), 3)
   
    # Test points_data list size
    assert_equals(len(scan.points_data), 1)

    # Add more points
    scan.add_point(point=[1,2,3], count=1, desc="Second point.")
예제 #6
0
def test_add_compute_increment():
    scan = ddbscan.DDBSCAN(5, 10)

    # Add one point to ddbscan
    scan.add_point([2, 3], 5, "First point.", compute_increment=True)

    # Add same point computing increment
    scan.add_point([2, 3], 8, "First point incremented.", compute_increment=True)

    # Test first point data
    assert_equals(scan.points[0], [2,3])
    assert_equals(scan.points_data[0].desc, "First point incremented.")
    assert_equals(scan.points_data[0].neighbourhood, [0])
    assert_equals(scan.points_data[0].count, 8)
    assert_equals(scan.points_data[0].size_neighbourhood, 8)

    # Add second point
    scan.add_point([3, 4], 9, "Second point.", compute_increment=True)

    # Test first point data
    assert_equals(scan.points[0], [2,3])
    assert_equals(scan.points_data[0].desc, "First point incremented.")
    assert_equals(scan.points_data[0].neighbourhood, [0, 1])
    assert_equals(scan.points_data[0].count, 8)
    assert_equals(scan.points_data[0].size_neighbourhood, 17)

    # Increment second point
    scan.add_point([3, 4], 11, "Second point incremented.", compute_increment=True)

    # Test first point data
    assert_equals(scan.points[0], [2,3])
    assert_equals(scan.points_data[0].desc, "First point incremented.")
    assert_equals(scan.points_data[0].neighbourhood, [0, 1])
    assert_equals(scan.points_data[0].count, 8)
    assert_equals(scan.points_data[0].size_neighbourhood, 19)

    # Test second point data
    assert_equals(scan.points[1], [3,4])
    assert_equals(scan.points_data[1].desc, "Second point incremented.")
    assert_equals(scan.points_data[1].neighbourhood, [0, 1])
    assert_equals(scan.points_data[1].count, 11)
    assert_equals(scan.points_data[1].size_neighbourhood, 19)
예제 #7
0
import ddbscan

# Create a DDBSCAN model with eps = 2 and min_pts = 5
scan = ddbscan.DDBSCAN(2, 4)

# Add points to model
data = [[1, 100], [2, 2], [1, 3], [2, 3], [3, 3], [8, 9], [7, 6], [9, 7],
        [6, 9], [6, 8], [5, 5], [7, 8], [10, 10], [0.002, -0.4]]

for point in data:
    scan.add_point(point=point, count=1, desc="")

# Compute clusters
scan.compute()

print 'Clusters found and its members points index:'
cluster_number = 0
for cluster in scan.clusters:
    print '=== Cluster %d ===' % cluster_number
    print 'Cluster points index: %s' % list(cluster)
    cluster_number += 1

print '\nCluster assigned to each point:'
for i in xrange(len(scan.points)):
    print '=== Point: %s ===' % scan.points[i]
    print 'Cluster: %2d' % scan.points_data[i].cluster,
    # If a point cluster is -1, it's an anomaly
    if scan.points_data[i].cluster == -1:
        print '\t <== Anomaly found!'
    else:
        print
예제 #8
0
def test_add_new_points():
    scan = ddbscan.DDBSCAN(5, 10)

    # Add one point to ddbscan
    scan.add_point(point=[2,3], count=5, desc="First point.")

    # Test list size
    assert_equals(len(scan.points), 1)
    assert_equals(len(scan.points_data), 1)

    # Test first point data
    assert_equals(scan.points[0], [2,3])
    assert_equals(scan.points_data[0].desc, "First point.")
    assert_equals(scan.points_data[0].count, 5)
    assert_equals(scan.points_data[0].neighbourhood, [0])
    assert_equals(scan.points_data[0].size_neighbourhood, 5)

    # Add another point with a distance great than eps
    # Distance = Sqrt((4-2)^2 + (8-3)^2) ~=  5.38
    scan.add_point([4, 8], 9, "Second point.")

    # Test list size
    assert_equals(len(scan.points), 2)
    assert_equals(len(scan.points_data), 2)

    # First point data must be unchanged
    assert_equals(scan.points[0], [2,3])
    assert_equals(scan.points_data[0].desc, "First point.")
    assert_equals(scan.points_data[0].count, 5)
    assert_equals(scan.points_data[0].neighbourhood, [0])
    assert_equals(scan.points_data[0].size_neighbourhood, 5)

    # Test second point data
    assert_equals(scan.points[1], [4,8])
    assert_equals(scan.points_data[1].desc, "Second point.")
    assert_equals(scan.points_data[1].count, 9)
    assert_equals(scan.points_data[1].neighbourhood, [1])
    assert_equals(scan.points_data[1].size_neighbourhood, 9)

    # Add third point within eps of second one
    # Distance = Sqrt((5-4)^2 + (11-8)^2) ~=  3.16
    scan.add_point([5, 11], 2, "Third point.")
    assert_equals(len(scan.points), 3)
    assert_equals(len(scan.points_data), 3)

    # First point data must be unchanged
    assert_equals(scan.points[0], [2,3])
    assert_equals(scan.points_data[0].desc, "First point.")
    assert_equals(scan.points_data[0].count, 5)
    assert_equals(scan.points_data[0].neighbourhood, [0])
    assert_equals(scan.points_data[0].size_neighbourhood, 5)

    # Second point neighbourhood data must be changed
    assert_equals(scan.points[1], [4,8])
    assert_equals(scan.points_data[1].desc, "Second point.")
    assert_equals(scan.points_data[1].count, 9)
    assert_equals(scan.points_data[1].neighbourhood, [1,2]) # 1 is neighbour of 2
    assert_equals(scan.points_data[1].size_neighbourhood, 9 + 2)

    # Test third point data
    assert_equals(scan.points[2], [5,11])
    assert_equals(scan.points_data[2].desc, "Third point.")
    assert_equals(scan.points_data[2].count, 2)
    assert_equals(scan.points_data[2].neighbourhood, [1,2])
    assert_equals(scan.points_data[2].size_neighbourhood, 2 + 9)
예제 #9
0
def test_compute_clusters_repeated_points():

    data = [[ 3,  2], [ 3,  2], [ 6,  9], [ 0,  9], [ 2,  4], [ 9,  0], [ 0,  1],
            [ 4,  7], [ 4, 10], [ 7,  3], [ 6, 10], [ 6,  6], [ 6,  1], [ 2,  7],
            [ 1,  0], [ 3,  7], [ 3,  5], [ 4,  7], [ 4, 10], [ 1,  6], [ 5,  5],
            [ 1,  6], [ 9,  2], [ 6,  7], [ 4,  4], [ 0,  3], [ 7,  5], [ 7,  4],
            [ 3,  4], [ 3,  9], [10,  3], [ 5,  5], [ 0,  0], [ 6,  8], [ 9,  6],
            [ 9,  8], [ 1,  5], [ 4,  0], [ 7,  8], [ 3,  5], [ 8,  4], [ 5, 10],
            [10,  7], [ 2,  2], [ 3,  6], [ 8,  2], [ 2,  8], [ 8,  3], [ 2,  3],
            [ 1,  2], [ 8,  3], [ 8, 10], [ 5,  9], [10,  6], [ 5,  5], [ 4,  4],
            [ 9,  1], [ 5,  8], [ 2, 10], [ 5,  0], [ 4,  2], [ 1,  6], [ 9,  3],
            [ 3,  6], [10,  2], [ 5,  1], [10,  6], [ 5,  0], [ 9,  3], [ 4,  7],
            [ 4,  5], [ 6,  2], [ 4,  4], [10,  7], [ 5,  3], [ 6,  1], [ 5,  7],
            [ 8,  6], [ 3,  0], [ 2,  9], [ 5,  4], [ 3,  3], [ 1,  6], [ 7,  4],
            [ 2,  7], [10,  5], [ 5,  2], [ 3,  0], [ 6,  8], [ 6,  6], [ 1,  1],
            [ 4,  4], [ 5,  6], [ 4,  1], [ 9,  6], [ 7,  6], [ 2,  7], [ 6,  9],
            [ 8, 10], [ 2,  0], [ 9,  5], [ 0,  7], [ 1,  3], [10,  7], [ 7,  2],
            [ 3,  1], [10,  0], [ 6, 10], [ 2,  5], [ 5,  8], [ 6,  5], [ 9,  2],
            [10,  5], [ 4,  0], [ 5, 10], [ 9,  1], [ 5,  5], [ 0,  5], [ 3,  5],
            [10, 10], [ 8,  2], [ 8,  0], [ 5,  6], [ 4,  9], [ 5,  2], [ 9,  7],
            [ 7,  9], [ 2,  7], [10, 10], [ 1,  3], [ 7,  6], [ 2,  5], [ 5,  8],
            [ 6,  7], [ 5,  6], [ 8,  6], [ 4,  4], [ 6,  8], [ 0,  4], [ 7,  1],
            [ 1,  6], [10,  6], [ 0,  7], [ 7,  0], [ 1,  8], [ 5,  5], [ 0,  3],
            [ 9,  3], [ 6,  5], [ 0,  5], [ 8,  6], [10,  6], [ 6,  1], [ 1,  4],
            [ 3,  8], [ 4, 10], [ 5,  2], [ 2,  6], [ 0,  1], [ 1,  6], [ 9,  3],
            [ 8, 10], [ 6, 10], [ 6,  8], [10,  4], [10,  1], [ 8,  9], [ 1, 10],
            [ 6,  7], [ 5,  9], [ 0,  3], [ 0,  8], [ 7,  4], [ 8,  7], [ 7,  9],
            [ 1,  0], [ 6,  3], [ 7, 10], [ 8,  9], [ 1,  9], [10, 10], [10, 10],
            [ 2,  1], [ 1,  3], [10,  0], [ 6,  5], [ 6,  1], [ 1,  0], [ 9,  8],
            [ 6,  2], [ 6,  3], [ 7,  1], [10,  8], [ 1,  1], [ 3,  1], [ 6,  2],
            [ 1,  0], [ 4,  2], [ 6,  8], [ 1,  3]]

    # Test with parameters eps=5 and min_pts=5
    scan = ddbscan.DDBSCAN(5, 5)

    for p in data:
        scan.add_point(p, 1, "Desc")

    scan.compute()

    count_outliers = 0
    for p in scan.points_data:
        if p.cluster == -1:
            count_outliers = count_outliers + p.count
    
    count_clusters = len(scan.clusters)

    assert_equals(count_outliers, 0)
    assert_equals(count_clusters, 1)

    # Test with parameters eps=3 and min_pts=50
    scan = ddbscan.DDBSCAN(3 , 50)

    for p in data:
        scan.add_point(p, 1, "Desc")

    scan.compute()

    count_outliers = 0
    for p in scan.points_data:
        if p.cluster == -1:
            count_outliers = count_outliers + p.count
    
    count_clusters = len(scan.clusters)

    assert_equals(count_outliers, 31)
    assert_equals(count_clusters, 1)
예제 #10
0
def test_compute_clusters():

    # Random generated data
    data = [[ 92, 109], [146, 245], [295,  42], [ 19, 116], [221, 273], [ 12, 122],
            [187,  72], [ 92,  79], [278, 282], [  6, 240], [295,  23], [184, 257],
            [116, 151], [263,   3], [217,  24], [ 87,  12], [170, 143], [148,  91],
            [205, 176], [ 17,  68], [122, 278], [272, 171], [ 33, 169], [ 95, 191],
            [158,  83], [288,  92], [206,  26], [ 94, 193], [ 83, 194], [273, 187],
            [240,  78], [ 55, 152], [ 78, 183], [215,  77], [ 41, 103], [225, 239],
            [ 94,  13], [170, 123], [187,   0], [237, 212], [ 36,   5], [ 69, 141],
            [ 97, 126], [210, 264], [ 12, 151], [151, 225], [ 75,  73], [224, 134],
            [  3, 150], [297, 236], [104,  19], [142,  45], [ 80, 175], [170,  76],
            [176, 281], [153, 223], [147, 181], [ 27,   6], [ 18,  89], [110, 122],
            [131, 170], [299, 275], [257, 134], [ 79, 166], [ 28, 262], [275,  77],
            [231,  14], [174,  65], [170,  43], [292, 228], [ 53,  29], [ 33, 288],
            [ 39, 104], [137,  21], [115, 174], [ 79, 217], [ 77, 195], [ 98, 115],
            [190,  53], [  8, 211], [227,  30], [119, 291], [139, 279], [ 53,  65],
            [120,  35], [112, 269], [188, 152], [ 91,  32], [204,   9], [ 12,  30],
            [125, 137], [167,  79], [135,  62], [ 60, 207], [ 26, 283], [ 48, 265],
            [189, 247], [162, 139], [246, 113], [125, 118]]

    # Expected output is based on sklearn DBSCAN implementation
    # Example:
    #
    # from sklearn.cluster import DBSCAN
    # import numpy
    #
    # scan = DBSCAN(eps=40, min_samples=5)
    # predictions = scan.fit_predict(data)
    # count_outliers = 0
    # for p in predictions:
    #     if p == -1:
    #         count_outliers = count_outliers + 1
    # count_clusters = max(predictions) + 1

    # Test with parameters eps=40 and min_pts=5
    scan = ddbscan.DDBSCAN(40, 5)

    for p in data:
        scan.add_point(p, 1, "Desc")

    scan.compute()

    count_outliers = 0
    for p in scan.points_data:
        if p.cluster == -1:
            count_outliers = count_outliers + p.count
    
    count_clusters = len(scan.clusters)

    assert_equals(count_outliers, 16)
    assert_equals(count_clusters, 3)

    # Test with parameters eps=30 and min_pts=2
    scan = ddbscan.DDBSCAN(30, 2)

    for p in data:
        scan.add_point(p, 1, "Desc")

    scan.compute()

    count_outliers = 0
    for p in scan.points_data:
        if p.cluster == -1:
            count_outliers = count_outliers + p.count
    
    count_clusters = len(scan.clusters)

    assert_equals(count_outliers, 2)
    assert_equals(count_clusters, 16)

    # Test with parameters eps=80 and min_pts=10
    scan = ddbscan.DDBSCAN(80, 10)

    for p in data:
        scan.add_point(p, 1, "Desc")

    scan.compute()

    count_outliers = 0
    for p in scan.points_data:
        if p.cluster == -1:
            count_outliers = count_outliers + p.count
    
    count_clusters = len(scan.clusters)

    assert_equals(count_outliers, 1)
    assert_equals(count_clusters, 1)

    # Test with parameters eps=10 and min_pts=5
    scan = ddbscan.DDBSCAN(10, 5)

    for p in data:
        scan.add_point(p, 1, "Desc")

    scan.compute()

    count_outliers = 0
    for p in scan.points_data:
        if p.cluster == -1:
            count_outliers = count_outliers + p.count
    
    count_clusters = len(scan.clusters)

    assert_equals(count_outliers, 100)
    assert_equals(count_clusters, 0)
예제 #11
0
def test_set_params_advanced():
    scan = ddbscan.DDBSCAN(5, 10)

    # Add three points
    scan.add_point(point=[2, 3], count=5, desc="First point.")
    scan.add_point([4, 8], 9, "Second point.")
    scan.add_point([5, 11], 2, "Third point.")

    # With eps=5, we know the neighbourhood and its size
    assert_equals(scan.points_data[0].size_neighbourhood, 5)
    assert_equals(scan.points_data[1].neighbourhood, [1,2]) # 1 is neighbour of 2
    assert_equals(scan.points_data[1].size_neighbourhood, 9 + 2)
    assert_equals(scan.points_data[2].neighbourhood, [1,2])
    assert_equals(scan.points_data[2].size_neighbourhood, 2 + 9)

    # Set eps to small value, so that no one has neighbours
    scan.set_params(2, 15)

    assert_equals(scan.points_data[0].neighbourhood, [0])
    assert_equals(scan.points_data[0].size_neighbourhood, 5)
    assert_equals(scan.points_data[1].neighbourhood, [1])
    assert_equals(scan.points_data[1].size_neighbourhood, 9)
    assert_equals(scan.points_data[2].neighbourhood, [2])
    assert_equals(scan.points_data[2].size_neighbourhood, 2)

    # Set eps to initial, to see if the values remains correct
    scan.set_params(5, 10)

    assert_equals(scan.points_data[0].size_neighbourhood, 5)
    assert_equals(scan.points_data[1].neighbourhood, [1,2]) # 1 is neighbour of 2
    assert_equals(scan.points_data[1].size_neighbourhood, 9 + 2)
    assert_equals(scan.points_data[2].neighbourhood, [1,2])
    assert_equals(scan.points_data[2].size_neighbourhood, 2 + 9)

    # Set eps to large value, so that everyone is neighbour of anyone
    scan.set_params(12, 12)

    assert_equals(scan.points_data[0].neighbourhood, [0, 1, 2])
    assert_equals(scan.points_data[0].size_neighbourhood, 16)
    assert_equals(scan.points_data[1].neighbourhood, [0, 1, 2])
    assert_equals(scan.points_data[1].size_neighbourhood, 16)
    assert_equals(scan.points_data[2].neighbourhood, [0, 1, 2])
    assert_equals(scan.points_data[2].size_neighbourhood, 16)

    # Set eps to initial, to see if the values remains correct
    scan.set_params(5, 10)

    assert_equals(scan.points_data[0].size_neighbourhood, 5)
    assert_equals(scan.points_data[1].neighbourhood, [1,2]) # 1 is neighbour of 2
    assert_equals(scan.points_data[1].size_neighbourhood, 9 + 2)
    assert_equals(scan.points_data[2].neighbourhood, [1,2])
    assert_equals(scan.points_data[2].size_neighbourhood, 2 + 9)

    # Add far away point
    scan.add_point(point=[18, 15], count=7, desc="Far away point.")

    # See if it affects the other points data
    scan.set_params(12, 12)

    assert_equals(scan.points_data[0].neighbourhood, [0, 1, 2])
    assert_equals(scan.points_data[0].size_neighbourhood, 16)
    assert_equals(scan.points_data[1].neighbourhood, [0, 1, 2])
    assert_equals(scan.points_data[1].size_neighbourhood, 16)
    assert_equals(scan.points_data[2].neighbourhood, [0, 1, 2])
    assert_equals(scan.points_data[2].size_neighbourhood, 16)

    scan.set_params(5, 10)

    assert_equals(scan.points_data[0].size_neighbourhood, 5)
    assert_equals(scan.points_data[1].neighbourhood, [1,2]) # 1 is neighbour of 2
    assert_equals(scan.points_data[1].size_neighbourhood, 9 + 2)
    assert_equals(scan.points_data[2].neighbourhood, [1,2])
    assert_equals(scan.points_data[2].size_neighbourhood, 2 + 9)