def training(feat_mat):
    models = []

    k = km.K_means(10)
    k_labels, k_centroids, k_inertia = k.train(feat_mat)

    d = db.Dbscan(2, 3)
    d_labels = d.train(feat_mat)

    models.append(k)
    models.append(d)

    print ("\nMeal Data: kmeans clustering results") #labels
    print(k_labels)
    print("Silhouette Coefficient: %0.3f"
          % metrics.silhouette_score(feat_mat, k_labels))
    print("BUILT-IN LIB: Sum of squared distances of samples to their closest cluster center: %0.3f"
          % k_inertia)
    print("Manual SSE: %0.3f"
          %SSE_kmeans(feat_mat, k_labels, k_centroids))

    print ("\nMeal Data: dbscan cluster results") #labels
    print(d_labels)
    print("Silhouette Coefficient: %0.3f"
          % metrics.silhouette_score(feat_mat, d_labels))
    print("Manual SSE: %0.3f"
          % SSE_dbscan(feat_mat,d_labels))

    return models
def main():
    if len(sys.argv) > 2:
        if float(sys.argv[1]) <= 1 and float(sys.argv[1]) > 0:
            if int(sys.argv[2]) > 0:
                load = data_input.DataLoad('Dataset')
                load.data_input()
                clustering = dbscan.Dbscan(load.dataset, int(sys.argv[2]), float(sys.argv[1]))
                my_labels = clustering.DBSCAN()
                n_clusters_ = len(set(my_labels)) - (1 if -1 in my_labels else 0)
                print('Estimated number of DBSCAN clusters: %d' % n_clusters_)
            else:
                print "Please enter valid minPts value"
        else:
            print "Please pass an eps between 0 to 1."
    else:
        print "Please enter epsilon and minPts values ..."
Esempio n. 3
0
#i =0
for line in file:
    temp = line.split('\t')
    temp = list(map(float, temp))
    temp.pop()
    data.append(tuple.Tuple(temp))
    #i+=1
    #if i==5000: break
file.close()


'''
Dbscan
'''
print("start dbscan")
dbscan = db.Dbscan(data,minPts,eps)
clusters = dbscan.perform()
print("start drawing")
i = 0
print("Liczba klastrow = " + str(len(clusters)))

'''
Print Dbscan output
'''
color = ['ro','go','bo', 'co', 'mo', 'yo', 'ko', 'wo','rs','gs','bs','cs', 'ms', 'ys', 'ks', 'ws','r^','g^', 'b^', 'c^', 'm^', 'y^', 'k^', 'w^', 'r*', 'g*', 'b*','c*', 'm*', 'y*', 'k*', 'w*']
x = []
y = []
plt.subplot(211)
for d in data:
    x.append(d.values[0])
    y.append(d.values[1])
Esempio n. 4
0
random.seed(42)
centers = [(random.randrange(-20,20, ), random.randrange(-20,20)) for i in range(13)]
X, labels_true = make_blobs(n_samples=500, centers=centers, cluster_std=1,
                            random_state=42)

#
# rng = np.random.RandomState(42)
# transformation = rng.normal(size=(2, 2))
# X = np.dot(X, transformation)
# X = StandardScaler().fit_transform(X)

# print(X)
# #############################################################################
# Compute DBSCAN
d = dbscan.Dbscan(x = False)
d.fit(X, 1.2, 6)
db = DBSCAN(eps=1.2, min_samples=6).fit(X)
core_samples_mask = np.zeros_like(d.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = d.labels_


# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters_)
print('Estimated number of noise points: %d' % n_noise_)
# print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
# print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
Esempio n. 5
0
sd = []
for a in range(num_of_atributes):
    temp = 0
    for r in range(num_of_records):
        temp += (data[r][a] - means[a])**2
    sd.append(math.sqrt(temp / num_of_records))
for r in range(num_of_records):
    for a in range(num_of_atributes):
        data[r][a] = (data[r][a] - means[a]) / sd[a]

data_set = []
for r in range(num_of_records):
    data_set.append(tuple.Tuple(data[r]))
print(data_set[0].values)

dbscan = db.Dbscan(data_set, minPts, eps)
clusters = dbscan.perform()

color = [
    'ro', 'go', 'bo', 'co', 'mo', 'yo', 'ko', 'wo', 'rs', 'gs', 'bs', 'cs',
    'ms', 'ys', 'ks', 'ws', 'r^', 'g^', 'b^', 'c^', 'm^', 'y^', 'k^', 'w^',
    'r*', 'g*', 'b*', 'c*', 'm*', 'y*', 'k*', 'w*'
]
i = 0
print("Liczba klastrow = " + str(len(clusters)))
for c in clusters:
    x = []
    y = []
    for t in c:
        x.append(t.values[4])
        y.append(t.values[5])
Esempio n. 6
0
import dbscan
from sklearn import cluster

d = dbscan.Dbscan()
arr1 = [(1, 2), (2, 3), (3, 4), (4, 5)]
arr2 = [(1, 2), (2, 2), (1, 1), (2, 1), (3, 4), (4, 5), (4, 4), (9, 9)]

d.fit(arr2, dist=3, minp=4)

d_true = cluster.DBSCAN(eps=3, min_samples=4)
dbtest = d_true.fit(arr2)

print(d.labels_)
print(dbtest.labels_)
print(type(dbtest.labels_))
import numpy as np
import dbscan
#from pca import pca
def fromFile(filename, length):
        with open(str(filename),'r') as f_handle:
            x = np.fromfile(f_handle, dtype=bool)
        system = np.reshape(x,(-1,length*length))
        return system


x = fromFile("out_wo_header_16_2000.bin",16)
print x.shape
minNeighbors = 30
epsilon = 1.01


data = np.array(x, dtype=float)
dbscanner = dbscan.Dbscan(data, minNeighbors)
dbscanner.run()
clusterList=dbscanner.getClusterList()
sum = 0
for cluster in clusterList:
    print  np.array(cluster).shape[0]
    sum = np.array(cluster).shape[0] + sum

print sum
print np.array(dbscanner.getNoise()).shape