Ejemplo n.º 1
0
    def split(self):

        print "LEN", len(self.centroids[0].pts)
        """ generated source for method split """
        print "Size is now ", str(2 * len(self.centroids))
        temp = self.a1d(len(self.centroids) * 2)
        tCo = 0
        i = 0

        while i < len(temp):

            tCo = self.a2d(2, self.dimension)
            j = 0
            while j < self.dimension:

                tCo[0][j] = self.centroids[i / 2].getCo(j) * (1 + self.SPLIT)
                # print "tCo[0]["+str(j)+"]", tCo[0][j],  self.centroids[i / 2].getCo(j)
                j += 1
            temp[i] = Centroid(tCo[0])
            # print temp[i].pts
            # print "LEN",len(temp[0].pts)
            j = 0
            while j < self.dimension:
                tCo[1][j] = self.centroids[i / 2].getCo(j) * (1 - self.SPLIT)
                # print "tCo[1]["+str(j)+"]", tCo[1][j],  self.centroids[i / 2].getCo(j)
                j += 1
            temp[i + 1] = Centroid(tCo[1])
            i += 2

        self.centroids = temp
Ejemplo n.º 2
0
def reCalculateCentroids(x, y, lables, centroids):
    """
	O(??)
	"""

    newX = [0] * len(centroids)
    newY = [0] * len(centroids)
    numberOfPoints = [0] * len(centroids)
    newCentroids = []

    for index in xrange(0, len(x)):
        newX[lables[index]] += x[index]
        newY[lables[index]] += y[index]
        numberOfPoints[lables[index]] += 1

    for index in xrange(0, len(centroids)):
        oldCentroid = centroids[index]

        # TODO why is this one here??? is this good?
        if numberOfPoints[index] == 0:
            newCentroid = Centroid(oldCentroid.point, oldCentroid.color,
                                   oldCentroid.lable)
            newCentroids.append(newCentroid)
            continue

        newX[index] = newX[index] / numberOfPoints[index]
        newY[index] = newY[index] / numberOfPoints[index]
        averageX = (oldCentroid.point.X + newX[index]) / 2
        averageY = (oldCentroid.point.Y + newY[index]) / 2
        newCentroid = Centroid(Point(averageX, averageY), oldCentroid.color,
                               oldCentroid.lable)
        newCentroids.append(newCentroid)

    return newCentroids
Ejemplo n.º 3
0
def generateCentroids(points):
    centroids = [
        Centroid(random.uniform(-10, 10), random.uniform(-10, 10))
        for i in range(nCentroids)
    ]
    if args.no_dead:
        e = deadCentroids(points, centroids)
        while len(e) != 0:
            for i in e:
                centroids[i] = Centroid(random.uniform(-10, 10),
                                        random.uniform(-10, 10))
            e = deadCentroids(points, centroids)
    return centroids
Ejemplo n.º 4
0
 def get_detection_algoritm(self, name, tweets, vectors):
     if name == "knn":
         return KNN(tweets, vectors)
     if name == "centroid":
         return Centroid(tweets, vectors)
     # Unsupported or non existing detection algoritm
     return None
Ejemplo n.º 5
0
    def __new__(cls, count):
        swarm = list.__new__(cls)
        for n in range(count):
            x = random() - random()
            y = random() - random()
            c = Centroid(x, y)
            c.inertia = Vec2(0, 0)

            swarm.append(c)

        return swarm
Ejemplo n.º 6
0
 def runKNN(self, k=0, maxIterations=1):
     self.groups = []
     if (k > 0):
         self.centroids = [
             Centroid(secrets.choice(self.docList).tfidf) for i in range(k)
         ]
         for _ in range(maxIterations):
             if self.kNNiteration():
                 break
         for c in self.centroids:
             self.groups.append(c.copyOfElements)
     for g in self.groups:
         print([e.title for e in g])
Ejemplo n.º 7
0
    def initialize(self):
        """ generated source for method initialize """
        distortion_before_update = 0
        distortion_after_update = 0
        self.centroids = []
        origin = Centroid([0] * self.dimension)
        self.centroids.append(origin)
        for i in range(len(self.pt)):
            # print "CENTROID", len(Centroid.pts)
            self.centroids[0].add(self.pt[i], 0)

        self.centroids[0].update()
        """
        for i in self.centroids:
        	pprint(vars(i))
        return
        """

        while len(self.centroids) < self.codebook_size:

            # print "B4 SPLIT ", i, "UPDATE"
            # for z in self.centroids:
            # 	pprint(vars(z))
            # 	for px in z.pts:
            # 		print px.coordinates
            # 	print len(z.pts)
            # print "B4 END"

            self.split()

            # print "AF SPLIT ", i, "UPDATE"
            # for z in self.centroids:
            # 	pprint(vars(z))
            # 	for px in z.pts:
            # 		print px.coordinates
            # 	print len(z.pts)
            # print "AF END"

            self.groupPtoC()

            # print "P2C ", i, "UPDATE"
            # for z in self.centroids:
            # 	pprint(vars(z))
            # 	for px in z.pts:
            # 		print px.coordinates
            # 	print len(z.pts)
            # print "P2C END"

            #
            while True:
                i = 0
                while i < len(self.centroids):
                    distortion_before_update += self.centroids[
                        i].getDistortion()

                    # print "BFR ", i, "UPDATE"
                    # for z in self.centroids:
                    # 	pprint(vars(z))
                    # 	for px in z.pts:
                    # 		print px.coordinates
                    # 	print len(z.pts)
                    # print "BFR END"

                    self.centroids[i].update()

                    # print "AFT", i, "UPDATE"
                    # for z in self.centroids:
                    # 	pprint(vars(z))
                    # 	# for px in z.pts:
                    # 	# 	print px.coordinates
                    # 	print len(z.pts)
                    # print "AFT END"

                    i += 1

                self.groupPtoC()

                i = 0
                while i < len(self.centroids):
                    distortion_after_update += self.centroids[i].getDistortion(
                    )
                    i += 1
                if not ((abs(distortion_after_update -
                             distortion_before_update) < self.MIN_DISTORTION)):
                    break
Ejemplo n.º 8
0
# This will be an implementation of a variation on the KMeans clustering algorithm

# 1. Randomly select x amount of centroids
# 2. Select random values for each feature within each centroid
# 3. Further split each cluster (amount of clusters == amount of centroids)

number_of_centroids = sys.argv[0]
calculator = Calculation()
file_parser = FileParser("headers_clean.dat", "cluster_definitions.dat")

candidate_emails = file_parser.read_file()

centroids = []

for x in number_of_centroids:
    centroid = Centroid()
    centroids.append(centroid)

for candidate_email in candidate_emails:
    minimum_distance = 12
    distance_from_centroid = 12
    centroid_to_be_assigned = None

    candidate_email.classification = calculator.classify(candidate_email)

    for centroid in centroids:
        distance_from_centroid = centroid.distance_from(candidate_email)
        if distance_from_centroid < minimum_distance:
            centroid_to_be_assigned = centroid
            minimum_distance = distance_from_centroid
Ejemplo n.º 9
0
    def init_demand(self):
        "Create the demand database table"

        if self.centroid == 'weighted':

            self.demand_geo_weight = self.demand_geo_weight.merge(self.demand_pop, on = 'geouid')
            print(list(self.demand_geo_weight))
            centroid = Centroid(self.demand_geo, self.demand_geo_weight)
            self.centroid_df = centroid.calculate_weighted_centroid()

        else:
            self.demand_geo = self.demand_geo.merge(self.demand_pop, on = 'geouid')
            centroid = Centroid(self.demand_geo)
            self.centroid_df = centroid.calculate_geographic_centroid()

        #self.centroid_df.pop = self.centroid_df['pop_Total'].astype(float)
        self.centroid_df.geouid = self.centroid_df['geouid'].astype(int)

        self.centroid_df.reset_index(inplace = True)
        self.centroid_df.rename(columns = {'index': 'id'}, inplace = True)

        centroid_df = self.centroid_df.copy(deep = True)

        centroid_df['centroid'] = [x.wkt for x in centroid_df['centroid']]

        centroid_df = osgeo.ogr.Open(centroid_df.to_json())

        layer = centroid_df.GetLayer(0)

        # create demand table
        query_create = """
        	DROP TABLE IF EXISTS demand;
        	CREATE TABLE demand(
        	id serial PRIMARY KEY,
        	geoUID int,
        	centroid geometry,
        	boundary geometry
        """
        req_columns = ['id', 'geouid']
        geo_columns = ['centroid', 'boundary']


        pop_columns = []
        for col in [col for col in self.centroid_df if col.startswith('pop_')]:
            if self.centroid_df[col].dtype == 'O':
                unit = 'text'
            else:
                unit = 'float'

            pop_columns.append(col)
            query_create = query_create + """,  %s %s""" % ('"' + col + '"', unit)

        sql_columns = req_columns + geo_columns + pop_columns
        sql_col_string = '"' + '", "'.join(sql_columns) + '"'

        query_create = query_create + """)"""

        self.execute_query(query_create, "created demand")

        for i in self.centroid_df.index:
            feature = layer.GetFeature(i)
            values = self.centroid_df.loc[i]

            req_values = "'" + "', '".join(values[req_columns].astype(str).values.flatten().tolist()) + "'"
            # req_values = self.centroid_df[req_columns].loc[i] # .astype(str).values.flatten().tolist()
            geometry = feature.GetGeometryRef().ExportToWkt()
            centroid = feature.GetGeometryRef().Centroid().ExportToWkt()

            if len(pop_columns) == 0:
                query_insert = """ INSERT into demand(%s) VALUES (%s, ST_Transform(ST_SetSRID(ST_GeomFromText(%s),%s),3347), ST_Transform(ST_SetSRID(ST_GeomFromText(%s),%s),3347));
                """ % (sql_col_string, req_values, "'" + centroid + "'", self.config.demand_geo_crs, "'" + geometry + "'", self.config.demand_geo_crs)

            else:
                pop_values = "'" + "', '".join(values[pop_columns].astype(str).values.flatten().tolist()) + "'"

                query_insert = """ INSERT into demand(%s) VALUES (%s, ST_Transform(ST_SetSRID(ST_GeomFromText(%s),%s),3347), ST_Transform(ST_SetSRID(ST_GeomFromText(%s),%s),3347), %s);
                """ % (sql_col_string, req_values, "'" + centroid + "'", self.config.demand_geo_crs, "'" + geometry + "'", self.config.demand_geo_crs, pop_values)

            self.execute_query(query_insert, "updated demand")

        # (fuid,ST_Transform(ST_SetSRID(ST_GeomFromText(wkt),self.config.demand_geo_crs),3347),ST_Transform(ST_SetSRID(ST_GeomFromText(centroid),self.config.demand_geo_crs),3347),pop)

        # loop through all features
        '''
        for i in range(layer.GetFeatureCount()):
            # import pdb; pdb.set_trace()
            feature = layer.GetFeature(i) # index value
            fuid = feature.GetField('geouid') # id
            centroid = feature.GetField('centroid') # centroid
            if centroid.startswith("POINT (-n") or centroid.startswith("POINT (n"):
            	centroid = feature.GetGeometryRef().Centroid().ExportToWkt()
            pop = feature.GetField('pop') # population / weight ?
            geometry = feature.GetGeometryRef()
            wkt = geometry.ExportToWkt()
            self.execute_query("INSERT INTO demand (geoUID, boundary, centroid, pop) VALUES (%s,ST_Transform(ST_SetSRID(ST_GeomFromText(%s),%s),3347),ST_Transform(ST_SetSRID(ST_GeomFromText(%s),%s),3347),%s);", "updated demand",
             (fuid, wkt, self.config.demand_geo_crs, centroid, self.config.demand_geo_crs, pop))
            self.db_conn.conn.commit()
        '''

        # create index for demand table
        self.execute_query("CREATE INDEX idx_demand ON demand USING GIST(centroid, boundary);", "indexed demand")
Ejemplo n.º 10
0
#!/bin/env python

from Module import Module
from Centroid import Centroid

import matplotlib
matplotlib.use('pdf')
import matplotlib.pyplot as plt
from SDLDisplay import SDLDisplay
from DetectorGeometry import DetectorGeometry

import os

# Centroid database
centroidDB = Centroid("data/centroid_2020_0428.txt")

nmod = 0
for key in centroidDB.data:
    if Module(key).logicalLayer() == 6 or Module(
            key).logicalLayer() == 11 or Module(key).isLower() == 0:
        continue
    if Module(key).ring() == 15 and Module(key).logicalLayer() == 7:
        continue
    if Module(key).ring() == 15 and Module(key).logicalLayer() == 8:
        continue
    if Module(key).ring() == 12 and Module(key).logicalLayer() == 9:
        continue
    if Module(key).ring() == 12 and Module(key).logicalLayer() == 10:
        continue
    nmod += 1
print(nmod)
Ejemplo n.º 11
0
def getCentroids(clusterPoints):
	centroids = []
	colors = iter(cm.rainbow(np.linspace(0, 1, len(clusterPoints))))
	for index in xrange(0, len(clusterPoints)):
		centroids.append(Centroid(clusterPoints[index], next(colors), index))
	return centroids
Ejemplo n.º 12
0
import pickle, os, math
from pprint import pprint
from Centroid import Centroid
from Points import Points
from Codebook import Codebook


def pb(ob):
    pprint(vars(ob))


x = [[1, 2, 3], [4, 5, 6], [7, 8, 10], [10, 11, 12], [13, 14, 15],
     [16, 17, 18]]

c = Codebook()

c.genCB(x)
"""
print x
"""

ct = Centroid([1, 2, 3])
print "X is ", x
Ejemplo n.º 13
0
def test3():

    from Centroid import Centroid
    centroidDB = Centroid("data/centroid_2020_0428.txt")

    # figure
    # fig, ax = plt.subplots(figsize=(5.2,2.*math.pi))
    fig, ax = plt.subplots(figsize=(4. * 2, 2. * math.pi))
    dirpath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    det_geom = DetectorGeometry("{}/data/phase2_2020_0428.txt".format(dirpath))
    sdlDisplay = SDLDisplay(det_geom)
    # list_of_detids_etaphi = det_geom.getDetIds(lambda x: Module(x[0]).subdet() == 5 and Module(x[0]).side() == 3 and Module(x[0]).module() == 7 and Module(x[0]).layer() == 1 and Module(x[0]).isLower() == 1 and Module(x[0]).rod() == 1)
    layer = 1

    def get_etaphi(point):
        x, y, z = point
        phi = math.atan2(y, x)
        eta = math.copysign(
            -math.log(math.tan(
                math.atan(math.sqrt(y**2 + x**2) / abs(z)) / 2.)), z)
        return (eta, phi)

    def deltaR(point1, point2):
        return np.linalg.norm(np.array(point1) - np.array(point2))

    list_of_detids_etaphi_layer1 = det_geom.getDetIds(
        lambda x: Module(x[0]).subdet() == 5 and Module(x[0]).side(
        ) == 3 and Module(x[0]).layer() == 1 and deltaR(
            get_etaphi(centroidDB.getCentroid(Module(x[0]).detId())),
            (0, 0)) < 0.1)
    list_of_detids_etaphi_layer2 = det_geom.getDetIds(
        lambda x: Module(x[0]).subdet() == 5 and Module(x[0]).side(
        ) == 3 and Module(x[0]).layer() == 2 and deltaR(
            get_etaphi(centroidDB.getCentroid(Module(x[0]).detId())),
            (0, 0)) < 0.1)
    list_of_detids_etaphi_layer3 = det_geom.getDetIds(
        lambda x: Module(x[0]).subdet() == 5 and Module(x[0]).side(
        ) == 3 and Module(x[0]).layer() == 3 and deltaR(
            get_etaphi(centroidDB.getCentroid(Module(x[0]).detId())),
            (0, 0)) < 0.1)
    list_of_detids_etaphi_layer4 = det_geom.getDetIds(
        lambda x: Module(x[0]).subdet() == 5 and Module(x[0]).side(
        ) == 3 and Module(x[0]).layer() == 4 and deltaR(
            get_etaphi(centroidDB.getCentroid(Module(x[0]).detId())),
            (0, 0)) < 0.1)
    list_of_detids_etaphi_layer5 = det_geom.getDetIds(
        lambda x: Module(x[0]).subdet() == 5 and Module(x[0]).side(
        ) == 3 and Module(x[0]).layer() == 5 and deltaR(
            get_etaphi(centroidDB.getCentroid(Module(x[0]).detId())),
            (0, 0)) < 0.1)
    list_of_detids_etaphi_layer6 = det_geom.getDetIds(
        lambda x: Module(x[0]).subdet() == 5 and Module(x[0]).side(
        ) == 3 and Module(x[0]).layer() == 6 and deltaR(
            get_etaphi(centroidDB.getCentroid(Module(x[0]).detId())),
            (0, 0)) < 0.1)
    sdlDisplay.set_detector_etaphi_collection(list_of_detids_etaphi_layer1)
    sdlDisplay.display_detector_etaphi(ax, color=(1, 0, 0))
    sdlDisplay.set_detector_etaphi_collection(list_of_detids_etaphi_layer2)
    sdlDisplay.display_detector_etaphi(ax, color=(1, 1, 0))
    sdlDisplay.set_detector_etaphi_collection(list_of_detids_etaphi_layer3)
    sdlDisplay.display_detector_etaphi(ax, color=(1, 0, 1))
    sdlDisplay.set_detector_etaphi_collection(list_of_detids_etaphi_layer4)
    sdlDisplay.display_detector_etaphi(ax, color=(0, 1, 1))
    sdlDisplay.set_detector_etaphi_collection(list_of_detids_etaphi_layer5)
    sdlDisplay.display_detector_etaphi(ax, color=(0, 0, 1))
    sdlDisplay.set_detector_etaphi_collection(list_of_detids_etaphi_layer6)
    sdlDisplay.display_detector_etaphi(ax, color=(0, 0, 0))
    fig.savefig("test3.pdf")
    rangeTotal = str(rangeStart) + ":" + str(rangeEnd)
    print()
    print("Test range -> ", rangeStart, ":", rangeEnd)
    splitData2TestTrain("pickedClasses.csv", numberOfDataitemsInAClass,
                        rangeTotal)
    #print()

    #print
    train = pd.read_csv("train.csv")
    test = pd.read_csv("test.csv")
    print()
    score = KNN(train, test)
    print()
    CVAvgKNN = CVAvgKNN + score

    score = Centroid(train, test)
    CVAvgCentroid = CVAvgCentroid + score

    score = SVM(train, test)
    CVAvgSVM = CVAvgSVM + score

    rangeStart = rangeEnd + 1
    rangeEnd = (rangeStart + jump) - 1

print()
print("*************************************")
print("Average 5-fold CV KNN score: ", CVAvgKNN / 5, "/1.0")
print("*************************************")

print()
print("*************************************")
Ejemplo n.º 15
0
 def __init__(self):
     self.centcls = []
     for i in xrange(1, 34):
         trainsets = self.loadTrainData(i)
         cen = Centroid(trainsets)
         self.centcls.append(cen)
Ejemplo n.º 16
0
 def __init__(self, det_geom):
     self.det_geom = det_geom
     self.centroidDB = Centroid(
         "/data2/segmentlinking/centroid_2020_0428.txt")