Beispiel #1
0
def dbscan_run_all():
    pd.set_option('expand_frame_repr', True)
    pd.set_option('max_rows', 100)
    pd.set_option('max_columns', 250)
    pd.set_option('display.width', 1000)
    np.set_printoptions(precision=3, floatmode='fixed')
    for fn in c.DB_TESTS:
        e, pts = c.e_pts[fn]
        df, class_id = parse_csv(fn)
        clusters, outliers = dbscan(df, e, pts)
        results = evaluate_clusters(clusters,
                                    None,
                                    verbose=False,
                                    outliers=outliers)
        totals = results.loc[:, [MAX, MIN, AVG, PTS, SSE]].sum()
        totals[NUM_DROPPED] = '-'
        totals[PCT_DROPPED] = '-'
        totals.name = TOTALS
        results = results.append(totals)
        sfn = strip_file_path(fn)
        print(f'\nSummary - {sfn}')
        print(results.round(3))
        centroids = np.array([cluster.mean() for cluster in clusters])
        for idx, (cluster, centroid) in enumerate(zip(clusters, centroids)):
            print(f'\nCluster {idx + 1}')
            print(f'Centroid: {centroid}')
            print(cluster)
        print('\nOutliers')
        print(outliers)
        if 2 <= clusters[0].shape[1] <= 3:
            plot_clusters(clusters, centroids, f'dbscan {sfn}')
Beispiel #2
0
 def run(self):
     self.read_input()
     algo = self.options['algo']
     params = self.options['params']
     if algo == 'kmeans':
         model = kmeans(self.doc, params)
     elif algo == 'dbscan':
         model = dbscan(self.doc, params)
     elif algo == 'agglo':
         model = agglo(self.doc, params)
     elif algo == 'minib':
         model = minib(self.doc, params)
     model.evaluate()
def calculate_mbrs(points, epsilon, min_pts, debug=False):
    """
    Find clusters using DBscan and then create a list of bounding rectangles
    to return.
    """
    mbrs = {}
    clusters = dbscan(points,
                      epsilon,
                      min_pts,
                      distance=euclidean,
                      debug=debug)
    extremes = {
        'max_x': sys.maxint * -1,
        'max_y': sys.maxint * -1,
        'min_x': sys.maxint,
        'min_y': sys.maxint
    }
    """
    Traditional dictionary iteration to populate mbr list
    Does same as below
    """
    for id, cpoints in clusters.items():
        print(id)
        xs = []
        ys = []
        for p in cpoints:
            xs.append(p[0])
            ys.append(p[1])
        max_x = max(xs)
        max_y = max(ys)
        min_x = min(xs)
        min_y = min(ys)

        if max_x > extremes['max_x']:
            extremes['max_x'] = max_x
        if max_y > extremes['max_y']:
            extremes['max_y'] = max_y
        if min_x < extremes['min_x']:
            extremes['min_x'] = min_x
        if min_y < extremes['min_y']:
            extremes['min_y'] = min_y

        mbrs[id] = [(min_x, min_y), (max_x, min_y), (max_x, max_y),
                    (min_x, max_y), (min_x, min_y)]
    mbrs['extremes'] = extremes
    return mbrs
Beispiel #4
0
def dbscan_hypertuning(fn: str):
    df, class_id = parse_csv(fn)
    # best: k, pts, SSE
    best = (0, 0, float('inf'))
    max_dist = df.max().max()
    step = min_dist = max(df.min().min() * 1.01, 1)
    num_steps = int(max_dist // min_dist + 1)
    for e in range(1, num_steps):
        for pts in range(2, df.shape[0] // 2, 2):
            clusters, noise = dbscan(df, e * step, pts)
            if len(clusters) == 0:
                continue
            if check_pct_outliers(df, noise):
                break
            measures = evaluate_clusters(clusters, None, verbose=False)
            if measures[SSE].sum() < best[2]:
                best = (e * step, pts, measures[SSE].sum())
    print(f'{fn}: e: {best[0]}, pts: {best[1]}, sse: {best[2]}')
    return best[0], best[1]
Beispiel #5
0
    def calculate_mbrs(self, points, epsilon, min_pts):
        """
        Find clusters using DBscan and then create a list of bounding rectangles
        to return.
        """
        mbrs = []

        clusters = dbscan(points, epsilon, min_pts)
        """
        Traditional dictionary iteration to populate mbr list
        Does same as below
        """
        # for id,cpoints in clusters.items():
        #     xs = []
        #     ys = []
        #     for p in cpoints:
        #         xs.append(p[0])
        #         ys.append(p[1])
        #     max_x = max(xs)
        #     max_y = max(ys)
        #     min_x = min(xs)
        #     min_y = min(ys)
        #     mbrs.append([(min_x,min_y),(max_x,min_y),(max_x,max_y),(min_x,max_y),(min_x,min_y)])
        # return mbrs
        """
        Using list index value to iterate over the clusters dictionary
        Does same as above
        """
        for id in range(len(clusters) - 1):
            xs = []
            ys = []
            for p in clusters[id]:
                xs.append(p[0])
                ys.append(p[1])
            max_x = max(xs)
            max_y = max(ys)
            min_x = min(xs)
            min_y = min(ys)
            mbrs.append([(min_x, min_y), (max_x, min_y), (max_x, max_y),
                         (min_x, max_y), (min_x, min_y)])
            print(mbrs)
        return mbrs
Beispiel #6
0
def calculate_mbrs(points, epsilon, min_pts):
    """
    Find clusters using DBscan and then create a list of bounding rectangles
    to return.
    """
    mbrs = []
    clusters =  dbscan(points, epsilon, min_pts)
    """
    Using list index value to iterate over the clusters dictionary
    Does same as above
    """
    for id in range(len(clusters)-1):
        xs = []
        ys = []
        for p in clusters[id]:
            xs.append(p[0])
            ys.append(p[1])
        max_x = max(xs) 
        max_y = max(ys)
        min_x = min(xs)
        min_y = min(ys)
        mbrs.append([(min_x,min_y),(max_x,min_y),(max_x,max_y),(min_x,max_y),(min_x,min_y)])
    return mbrs
Beispiel #7
0
import numpy as np
from dbscan import *
from plot3d import *

encodedImgs = np.loadtxt('datasets/temp/encodedImgs', delimiter=',')
encodedImgsTrain = np.loadtxt('datasets/temp/encodedImgsTrain', delimiter=',')
labels = np.loadtxt('datasets/temp/labels', delimiter=',')
labels = labels.astype(int)

epsilon = 0.015
minSamps = 2

print("Training")
dbscan(encodedImgsTrain, labels, epsilon=epsilon, minSamples=minSamps)
print("Training")
dbscan(encodedImgs, labels, epsilon=epsilon, minSamples=minSamps)
Beispiel #8
0
                datum['Time'], datum['temparature'], datum['humidity'],
                datum['LPG'], datum['label']
            ])

    if checkFileExist("%s.csv" % name) == True:
        with open(output_file_name, 'a', encoding='utf-8') as output_file:
            csvwriter = csv.writer(output_file)
            csvwriter.writerow([
                datum['Time'], datum['temparature'], datum['humidity'],
                datum['LPG'], datum['label']
            ])

        output_file.close()

    if count >= 50 and count % 10 == 0:
        label = dbscan(temp_list[count - 50:count], hum_list[count - 50:count],
                       gas_list[count - 50:count],
                       label_list[count - 50:count])
        label = map(str, label)
        data_s = " ".join(label)
        conn.sendall(data_s.encode())
    else:
        reply = "send"

        conn.sendall(reply.encode())

    count += 1

conn.close()
s.close()
print('close')
Beispiel #9
0
import sys
import utils
from dbscan import *

if __name__ == "__main__":
    filename = sys.argv[1]
    points = utils.read_input(filename)
    dbscan(points, 0.5)  # it will labeled cluster of elements of points
    utils.visualize(points)
Beispiel #10
0
import sys
import utils
from dbscan import *

if __name__ == "__main__":
    filename = sys.argv[1]
    points = utils.read_input(filename) 
    dbscan(points, 0.5) # it will labeled cluster of elements of points
    utils.visualize(points)
Beispiel #11
0
#Creiamo la matrice x
X = data[:, :data.shape[1]-1]
#------------------------------------ NORMALIZAZZIONE FEATURE ---------------------------------#

#------------------------------------- ZSCORE -----------------------------------#


mu, sigma = muSigma(X)
X = zScore(X, mu, sigma)

#--------------------------------------------------------------------------------#


#------------------------------------ MINMAX ------------------------------------#

# min, diff, max = minmax(X)
# X = Min_Max(X, min, diff)

#--------------------------------------------------------------------------------#


#-------------------------------- FEATURE SCALING -------------------------------#

# min, diff, max = minmax(X)
# X = Feat_Scaling(X, max)

#--------------------------------------------------------------------------------#
epsylon=2
punti_min=2
centroid=dbscan(X,epsylon,punti_min)
Beispiel #12
0
import arcpy
from dbscan import *
import numpy as np
from matplotlib import pyplot as plt

# curs  = arcpy.SearchCursor("C:\Users\huangyixiu\Documents\Course\gisData\SH_hos\Hospital.shp")
curs  = arcpy.SearchCursor("../Export_Output.shp")
pointx = []
pointy = []
c = 0
for row in curs:
	pointx.append(row.Shape.firstPoint.X)
	pointy.append(row.Shape.firstPoint.Y)
	c += 1
	if c%1000 ==0:
		print "processing line %d"%c
m = np.matrix([pointx,pointy])
# 0.005 degree in wgs84, proximately 500m
eps = 0.005
min_points = 4

clusterlis = dbscan(m, eps, min_points)
res = open("../cluster2.csv","w")
res.write("x,y,clusterid\n")
for i in range(0,len(pointy)):
	res.write("%f,%f,%s\n"%(pointx[i],pointy[i],clusterlis[i]))
res.close()
print "result written done!"