def testLostFunctionReference(self): "test for bug #1727558" cl = KMeansClustering([(1, 1), (20, 40), (20, 41)], lambda x, y: x + y) clusters = cl.getclusters(3) expected = [(1, 1), (20, 40), (20, 41)] self.assertTrue(compare_list(clusters, expected), "Elements differ!\n%s\n%s" % (clusters, expected))
def mk_clusters(self, points, k, clusters): cl = KMeansClustering(points, None) res = cl.getclusters(k) for c in res: count = len(c) if count <= 2: clusters.append(c) elif count <= 9: if type(c) == types.ListType: tmp = self.mk_clusters(c, 2, []) if len(tmp) > 1: p1 = Point(tmp[0]) p2 = Point(tmp[1]) d = p1.centroid.distance(p2.centroid) if d >= 0.025: for c in tmp: clusters.append(c) continue clusters.append(c) else: clusters = self.mk_clusters(c, 2, clusters) return clusters
def getFlickrClusters(self, place = 'san francisco'): start = time.clock() clusterData = [] for photo in self.flickr.find({"place":place}): lat = photo['loc'][0] lon = photo['loc'][1] clusterData.append((lat,lon)) cl = KMeansClustering(clusterData) clusters = cl.getclusters(self.clusterNum) elapsed = (time.clock() - start) print "getFlickrCluster finished in %s seconds" % elapsed #only return center of clusters resData = "[" for cluster in clusters: latTotal = 0 lonTotal = 0 for point in cluster: latTotal += point[0] lonTotal += point[1] centerLat = latTotal/len(cluster) centerLon = lonTotal/len(cluster) resData = "%s(%s,%s)," % (resData, centerLat, centerLon) if resData.endswith(","):resData = resData[:-1] #remove the trailing comma resData = "%s]"%resData self.response.write(resData)
def mk_clusters (self, points, k, clusters) : cl = KMeansClustering(points, None) res = cl.getclusters(k) for c in res : count = len(c) if count <= 2 : clusters.append(c) elif count <= 9 : if type(c) == types.ListType : tmp = self.mk_clusters(c, 2, []) if len(tmp) > 1 : p1 = Point(tmp[0]) p2 = Point(tmp[1]) d = p1.centroid.distance(p2.centroid) if d >= 0.025 : for c in tmp : clusters.append(c) continue clusters.append(c) else : clusters = self.mk_clusters(c, 2, clusters) return clusters
def cluster_stations(stations, empty='empty'): """Uses the cluster library to perform kmeans geographical kmeans clustering on the input stations list. Expects a list of that format returned by prep_stations, and returns a list similar to the input list with the cluster number of each element added """ if empty == 'empty': tocluster = [i for i in stations if (i[3] - i[2])/float(i[3]) < .2] else: tocluster = [i for i in stations if (i[2])/float(i[3]) < .2] cl = KMeansClustering([(i[4], i[5]) for i in tocluster]) clusters = cl.getclusters(4) # Note that this returns a list of lists of lat/long tuples. We're # going to have to re-associate them back to the rest of the stations clustered = [] for ix, i in enumerate(clusters): for j in i: for k in tocluster: if (j[0], j[1]) == (k[4], k[5]): clustered.append([k[0], k[1], k[2], k[3], k[4], k[5], ix+1]) return clustered
def buildKcluster(data,k): """ Description:Build K-mean Cluster Input: data: e.g. data = [ [12,12],[34,34], [23,23],[32,32], [46,46],[96,96], [13,13],[1,1], [4,4],[9,9]] # The first variable is key, not counted for clustering k: number of cluster Output: cluster record file /searchc/save/K.cluster """ print "Clustering..." a = datetime.datetime.now() cl = KMeansClustering(data,distance_function) clusterK = cl.getclusters(k) # get k clusters b = datetime.datetime.now() print "Naming..." featureAll = readFeature('all') c = nameCluster(clusterK,featureAll) name = c[0] centroid = c[1] writeCluster('K',clusterK,name,centroid,k) print "Writting log..." with open(path+'/log/K_'+str(k)+'.log','w') as outfile: outfile.write("KMean Clustering Log\nDate:\t"+str(a.date())+"\nStart:\t"+str(a.time())+"\nEnd:\t"+str(b.time())+"\nDuration:\t"+str(b-a)+"\nK:\t"+str(k)+"\nNo. cluster:\t"+str(len(clusterK))) for cluster in clusterK: outfile.write(str(len(cluster)-2)+"\n") return
def testNumpyRandom(self): from cluster import KMeansClustering from numpy import random as rnd data = rnd.rand(500, 2) cl = KMeansClustering(data, lambda p0, p1: ( p0[0] - p1[0]) ** 2 + (p0[1] - p1[1]) ** 2, numpy.array_equal) cl.getclusters(10)
def cluster(self, clusters): """ clusters is the final numbers of clusters """ cl = KMeansClustering(map(lambda x: (x), range(len(self.__tags))), self.__tagDistance) self.__clusters = cl.getclusters(clusters) return self.__clusters
def tod_clusters(self): """Returns Timespans""" try: # get tod clustering data tod_data = self.time_of_day_data() except ApiException: deauthenticate() # determine tod clusters kmcl = KMeansClustering(tod_data) clusters = kmcl.getclusters(10) #todo: kmeans is paralelizable, so I could use MapReduce #todo: move this to an ajax call # format tod groups groups=[] for cl in clusters: tod_max=max([i[0] for i in cl]) tod_min=min([i[0] for i in cl]) groups.append(dict( len=len(cl), tod_avg=sum([i[0] for i in cl])/len(cl), tod_min=tod_min, start=pretty_tod(tod_min), end=pretty_tod(tod_max), width=int(floor((tod_max-tod_min)/(60*60*24/100))), left=int(floor(tod_min/(60*60*24/100))) )) groups=sorted(groups, key=lambda k: k['tod_min'])
def testMultidimArray(self): from random import random data = [] for _ in range(200): data.append([random(), random()]) cl = KMeansClustering( data, lambda p0, p1: (p0[0] - p1[0])**2 + (p0[1] - p1[1])**2) cl.getclusters(10)
def testMultidimArray(self): from random import random data = [] for _ in range(200): data.append([random(), random()]) cl = KMeansClustering(data, lambda p0, p1: ( p0[0] - p1[0]) ** 2 + (p0[1] - p1[1]) ** 2) cl.getclusters(10)
def testNumpyRandom(self): from cluster import KMeansClustering from numpy import random as rnd data = rnd.rand(500, 2) cl = KMeansClustering( data, lambda p0, p1: (p0[0] - p1[0])**2 + (p0[1] - p1[1])**2, numpy.array_equal) cl.getclusters(10)
def make_cluster(): print "Starting Clustering.." start_time=time.time() cl = KMeansClustering(INPUT_SPACE) clusters = cl.getclusters(NUMBER_OF_CLUSTERS) end_time=time.time() print "Clustering Done.." pp.pprint(clusters) print "total time " + str(end_time-start_time) + " secs for "+ str(len(INPUT_SPACE)) +" element"
def testClustering(self): "Basic clustering test" data = [(8, 2), (7, 3), (2, 6), (3, 5), (3, 6), (1, 5), (8, 1), (3, 4), (8, 3), (9, 2), (2, 5), (9, 3)] cl = KMeansClustering(data) self.assertEqual( cl.getclusters(2), [[(8, 2), (8, 1), (8, 3), (7, 3), (9, 2), (9, 3)], [(3, 5), (1, 5), (3, 4), (2, 6), (2, 5), (3, 6)]])
def geo_clusters(self, days=None, start_time=None, end_time=None): """Returns Clusters of Checkins for this User""" try: # get geo clustering data geo_data = self.geo_data() except ApiException: deauthenticate() # determine geo clusters kmcl = KMeansClustering(geo_data) clusters = kmcl.getclusters(20) #todo: kmeans is paralelizable, so I could use MapReduce #todo: move this to an ajax call # format tod groups areas=[] most=dict( north=None, south=None, east=None, west=None ) for cl in clusters: lat_max=max([i[0] for i in cl]) if not most['north'] or lat_max > most['north']: most['north'] = lat_max lat_min=min([i[0] for i in cl]) if not most['south'] or lat_min < most['south']: most['south'] = lat_min lng_max=max([i[1] for i in cl]) if not most['east'] or lng_max > most['east']: most['east'] = lng_max lng_min=min([i[1] for i in cl]) if not most['west'] or lng_min < most['west']: most['west'] = lng_min areas.append(dict( len=len(cl), percent=len(geo_data)/len(cl)/100, opacity=float(len(geo_data))/100.0/float(len(cl)), avg_lat=sum([i[0] for i in cl])/len(cl), avg_lng=sum([i[1] for i in cl])/len(cl), lat_max=lat_max, lat_min=lat_min, lat_mid=lat_max-(lat_max-lat_min) / 2, lng_max=lng_max, lng_min=lng_min, lng_mid=lng_max-(lng_max-lng_min) / 2, radius=(lat_max - lat_min + lng_max - lng_min) / 2 )) areas=sorted(areas, key=lambda k: k['percent']) pass
def testUnmodifiedData(self): "Basic clustering test" data = [(8, 2), (7, 3), (2, 6), (3, 5), (3, 6), (1, 5), (8, 1), (3, 4), (8, 3), (9, 2), (2, 5), (9, 3)] cl = KMeansClustering(data) new_data = [] [new_data.extend(_) for _ in cl.getclusters(2)] self.assertEqual(sorted(new_data), sorted(data))
def testLostFunctionReference(self): "test for bug #1727558" cl = KMeansClustering([(1, 1), (20, 40), (20, 41)], lambda x, y: x + y) clusters = cl.getclusters(3) expected = [(1, 1), (20, 40), (20, 41)] self.assertTrue(compare_list( clusters, expected), "Elements differ!\n%s\n%s" % (clusters, expected))
def clusterSet(traingingStart,traningEndDate,clu_num): con = common.getDBConnection() cur = con.cursor() finalClusterRecord = [] stockList = ["MERVAL","MEXBOL","CHILE65","BVPSBVPS","COLCAP","CRSMBCT","IBOV","IGBVL","IBVC"] finalOrderCluster = {} for stock in stockList: sql = "select embers_id,post_date,current_value,previous_close_value,one_day_change,change_percent,name from t_enriched_bloomberg_prices where name=? and post_date<=? and post_date>=? order by post_date asc" cur.execute(sql,(stock,traningEndDate,traingingStart)) rows = cur.fetchall() changes = [row[5] for row in rows] fdist = nltk.FreqDist(changes) clusterS = [(0,x) for x in fdist.keys()] c1 = KMeansClustering(clusterS) cluster = c1.getclusters(clu_num) "The sample data of cluster by the KMeans algorithm" # cluster = [[(0, 0.0862), (0, 0.088), (0, 0.0914), (0, 0.094), (0, 0.0957), (0, 0.097), (0, 0.1017), (0, 0.1024), (0, 0.0774), (0, 0.0882), (0, 0.0783), (0, 0.11), (0, 0.0807), (0, 0.0813), (0, 0.1367), (0, 0.0831), (0, 0.0836), (0, 0.0855), (0, 0.0879), (0, 0.0912), (0, 0.0763), (0, 0.1046), (0, 0.0784), (0, 0.0815), (0, 0.1464), (0, 0.1987), (0, 0.1053), (0, 0.1101), (0, 0.1176), (0, 0.0868), (0, 0.1342), (0, 0.1466), (0, 0.0761), (0, 0.0772)], [(0, -0.0001), (0, 0.0), (0, 0.0001), (0, -0.0002), (0, -0.0003), (0, -0.0004), (0, -0.0005), (0, -0.0006), (0, 0.0002), (0, 0.0003), (0, 0.0004), (0, 0.0005), (0, 0.0006), (0, 0.0007), (0, 0.0008), (0, 0.0009), (0, 0.001), (0, 0.0011), (0, 0.0012), (0, 0.0013), (0, 0.0014), (0, 0.0015), (0, 0.0016), (0, 0.0017), (0, 0.0018), (0, 0.0019), (0, 0.002), (0, 0.0021), (0, 0.0022), (0, 0.0023), (0, 0.0024), (0, 0.0025), (0, 0.0026), (0, 0.0027), (0, 0.0028), (0, 0.0029), (0, 0.003), (0, 0.0031), (0, 0.0032), (0, 0.0033), (0, 0.0034), (0, 0.0035), (0, 0.0036), (0, 0.0037), (0, 0.0038), (0, 0.0039), (0, 0.004), (0, 0.0041), (0, 0.0042), (0, 0.0043), (0, 0.0044), (0, 0.0045), (0, 0.0046), (0, 0.0047), (0, 0.0048), (0, 0.0049), (0, 0.005), (0, -0.0007), (0, -0.0008)], [(0, 0.0297), (0, 0.0296), (0, 0.0298), (0, 0.0299), (0, 0.0301), (0, 0.03), (0, 0.0303), (0, 0.0302), (0, 0.0304), (0, 0.0305), (0, 0.0306), (0, 0.0308), (0, 0.0307), (0, 0.0309), (0, 0.031), (0, 0.0311), (0, 0.0313), (0, 0.0314), (0, 0.0312), (0, 0.0316), (0, 0.0315), (0, 0.0317), (0, 0.0318), (0, 0.032), (0, 0.0319), (0, 0.0322), (0, 0.0321), (0, 0.0324), (0, 0.0323), (0, 0.0326), (0, 0.0325), (0, 0.0328), (0, 0.033), (0, 0.0327), (0, 0.0332), (0, 0.0331), (0, 0.0333), (0, 0.0329), (0, 0.0335), (0, 0.0336), (0, 0.0334), (0, 0.0337), (0, 0.0338), (0, 0.0339), (0, 0.034), (0, 0.0341), (0, 0.0342), (0, 0.0343), (0, 0.0344), (0, 0.0345), (0, 0.0346), (0, 0.0348), (0, 0.0349), (0, 0.035), (0, 0.0351), (0, 0.0352), (0, 0.0355), (0, 0.0356), (0, 0.0358), (0, 0.0357), (0, 0.0359), (0, 0.036), (0, 0.0361), (0, 0.0362), (0, 0.0363), (0, 0.0365)], [(0, 0.0559), (0, 0.0564), (0, 0.0568), (0, 0.0571), (0, 0.0573), (0, 0.0579), (0, 0.0578), (0, 0.0581), (0, 0.0587), (0, 0.0589), (0, 0.0595), (0, 0.0591), (0, 0.0594), (0, 0.0604), (0, 0.0598), (0, 0.06), (0, 0.0602), (0, 0.0609), (0, 0.0612), (0, 0.059), (0, 0.0606), (0, 0.0614), (0, 0.0619), (0, 0.0625), (0, 0.0628), (0, 0.0615), (0, 0.0637), (0, 0.0633), (0, 0.0634), (0, 0.0636), (0, 0.0654), (0, 0.0658), (0, 0.0659), (0, 0.0669), (0, 0.0667), (0, 0.0664), (0, 0.067), (0, 0.0675), (0, 0.0673), (0, 0.0676), (0, 0.0686), (0, 0.07), (0, 0.0697), (0, 0.0709), (0, 0.0716), (0, 0.0717), (0, 0.0738), (0, 0.0747)], [(0, -0.0133), (0, -0.0132), (0, -0.0135), (0, -0.0134), (0, -0.0137), (0, -0.0138), (0, -0.0136), (0, -0.014), (0, -0.0139), (0, -0.0142), (0, -0.0143), (0, -0.0144), (0, -0.0141), (0, -0.0145), (0, -0.0146), (0, -0.0147), (0, -0.0148), (0, -0.0149), (0, -0.015), (0, -0.0151), (0, -0.0152), (0, -0.0153), (0, -0.0154), (0, -0.0155), (0, -0.0156), (0, -0.0157), (0, -0.0158), (0, -0.0159), (0, -0.016), (0, -0.0161), (0, -0.0162), (0, -0.0163), (0, -0.0164), (0, -0.0165), (0, -0.0166), (0, -0.0167), (0, -0.0168), (0, -0.0169), (0, -0.017), (0, -0.0171), (0, -0.0172), (0, -0.0173), (0, -0.0174), (0, -0.0175), (0, -0.0176), (0, -0.0177), (0, -0.0178), (0, -0.0179), (0, -0.018), (0, -0.0181), (0, -0.0182), (0, -0.0183), (0, -0.0184), (0, -0.0185), (0, -0.0186), (0, -0.0187), (0, -0.0188), (0, -0.0189), (0, -0.019), (0, -0.0191), (0, -0.0192), (0, -0.0193), (0, -0.0194), (0, -0.0195)], [(0, 0.0448), (0, 0.0451), (0, 0.0452), (0, 0.0446), (0, 0.0447), (0, 0.0456), (0, 0.045), (0, 0.0455), (0, 0.0462), (0, 0.0459), (0, 0.0461), (0, 0.0466), (0, 0.046), (0, 0.0467), (0, 0.0445), (0, 0.0458), (0, 0.0464), (0, 0.0477), (0, 0.0463), (0, 0.0472), (0, 0.0478), (0, 0.0457), (0, 0.0476), (0, 0.0481), (0, 0.0484), (0, 0.0488), (0, 0.0483), (0, 0.0487), (0, 0.0471), (0, 0.0482), (0, 0.0496), (0, 0.0474), (0, 0.0495), (0, 0.0485), (0, 0.0504), (0, 0.0505), (0, 0.0506), (0, 0.0501), (0, 0.0509), (0, 0.0508), (0, 0.051), (0, 0.0515), (0, 0.0516), (0, 0.052), (0, 0.0522), (0, 0.0524), (0, 0.053), (0, 0.0531), (0, 0.0534), (0, 0.0535), (0, 0.0536), (0, 0.0537), (0, 0.0538), (0, 0.0541), (0, 0.0542), (0, 0.0545), (0, 0.0546), (0, 0.0548), (0, 0.055)], [(0, 0.0172), (0, 0.017), (0, 0.0173), (0, 0.0174), (0, 0.0171), (0, 0.0177), (0, 0.0175), (0, 0.0178), (0, 0.0179), (0, 0.0176), (0, 0.0181), (0, 0.018), (0, 0.0183), (0, 0.0182), (0, 0.0186), (0, 0.0185), (0, 0.0187), (0, 0.0184), (0, 0.0189), (0, 0.0188), (0, 0.019), (0, 0.0191), (0, 0.0192), (0, 0.0194), (0, 0.0193), (0, 0.0196), (0, 0.0195), (0, 0.0197), (0, 0.0199), (0, 0.0198), (0, 0.02), (0, 0.0201), (0, 0.0202), (0, 0.0204), (0, 0.0205), (0, 0.0206), (0, 0.0203), (0, 0.0208), (0, 0.0207), (0, 0.021), (0, 0.0209), (0, 0.0211), (0, 0.0213), (0, 0.0212), (0, 0.0214), (0, 0.0215), (0, 0.0216), (0, 0.0217), (0, 0.0218), (0, 0.0219), (0, 0.022), (0, 0.0221), (0, 0.0222), (0, 0.0223), (0, 0.0224), (0, 0.0225), (0, 0.0226), (0, 0.0227), (0, 0.0228), (0, 0.0229), (0, 0.023), (0, 0.0231)], [(0, -0.0408), (0, -0.041), (0, -0.0411), (0, -0.0412), (0, -0.0413), (0, -0.0415), (0, -0.0416), (0, -0.0417), (0, -0.0419), (0, -0.042), (0, -0.0423), (0, -0.0424), (0, -0.0418), (0, -0.0425), (0, -0.0428), (0, -0.043), (0, -0.0431), (0, -0.0432), (0, -0.0433), (0, -0.0434), (0, -0.0436), (0, -0.0438), (0, -0.0439), (0, -0.044), (0, -0.0442), (0, -0.0441), (0, -0.0446), (0, -0.0443), (0, -0.0448), (0, -0.0447), (0, -0.045), (0, -0.0449), (0, -0.0453), (0, -0.0451), (0, -0.0454), (0, -0.0455), (0, -0.0458), (0, -0.0456), (0, -0.0459), (0, -0.0463), (0, -0.0461), (0, -0.046), (0, -0.0464), (0, -0.0465), (0, -0.0467), (0, -0.0462), (0, -0.0466), (0, -0.0472), (0, -0.0469), (0, -0.0475), (0, -0.0473), (0, -0.0478), (0, -0.0477), (0, -0.0476), (0, -0.0482), (0, -0.0481), (0, -0.0483), (0, -0.0487), (0, -0.0488), (0, -0.049), (0, -0.0492), (0, -0.0494)], [(0, -0.0261), (0, -0.0262), (0, -0.0263), (0, -0.0264), (0, -0.0266), (0, -0.0265), (0, -0.0267), (0, -0.0268), (0, -0.0269), (0, -0.0271), (0, -0.027), (0, -0.0273), (0, -0.0272), (0, -0.0275), (0, -0.0274), (0, -0.0277), (0, -0.0278), (0, -0.0276), (0, -0.0279), (0, -0.0281), (0, -0.028), (0, -0.0283), (0, -0.0282), (0, -0.0284), (0, -0.0285), (0, -0.0286), (0, -0.0287), (0, -0.0288), (0, -0.0289), (0, -0.0291), (0, -0.0292), (0, -0.0293), (0, -0.029), (0, -0.0294), (0, -0.0295), (0, -0.0297), (0, -0.0296), (0, -0.0299), (0, -0.03), (0, -0.0301), (0, -0.0302), (0, -0.0298), (0, -0.0303), (0, -0.0304), (0, -0.0307), (0, -0.0305), (0, -0.0308), (0, -0.031), (0, -0.0309), (0, -0.0312), (0, -0.0311), (0, -0.0313), (0, -0.0315), (0, -0.0314), (0, -0.0316), (0, -0.0317), (0, -0.0319), (0, -0.0318), (0, -0.032), (0, -0.0321), (0, -0.0322), (0, -0.0323), (0, -0.0325), (0, -0.0326), (0, -0.0327), (0, -0.0328), (0, -0.0329)], [(0, -0.0619), (0, -0.0622), (0, -0.0627), (0, -0.064), (0, -0.0645), (0, -0.065), (0, -0.0653), (0, -0.0651), (0, -0.0659), (0, -0.0663), (0, -0.0665), (0, -0.066), (0, -0.0666), (0, -0.0674), (0, -0.0671), (0, -0.0684), (0, -0.0672), (0, -0.0691), (0, -0.0689), (0, -0.0692), (0, -0.0701), (0, -0.0698), (0, -0.0709), (0, -0.0715), (0, -0.0717), (0, -0.0722), (0, -0.0734), (0, -0.0741), (0, -0.0749), (0, -0.0763), (0, -0.0772), (0, -0.0758), (0, -0.0762), (0, -0.0787), (0, -0.0788), (0, -0.0759), (0, -0.0775), (0, -0.0808)], [(0, -0.0905), (0, -0.1081), (0, -0.1018), (0, -0.094), (0, -0.0937), (0, -0.0936), (0, -0.0927), (0, -0.0919), (0, -0.0863), (0, -0.1593), (0, -0.1245), (0, -0.0847), (0, -0.1215), (0, -0.1139), (0, -0.1099), (0, -0.1068), (0, -0.0868), (0, -0.0856), (0, -0.0854), (0, -0.0837), (0, -0.0822), (0, -0.0877), (0, -0.1241), (0, -0.1073), (0, -0.1065), (0, -0.1011), (0, -0.0835)], [(0, -0.0196), (0, -0.0198), (0, -0.0197), (0, -0.0199), (0, -0.02), (0, -0.0201), (0, -0.0202), (0, -0.0204), (0, -0.0203), (0, -0.0205), (0, -0.0206), (0, -0.0208), (0, -0.0207), (0, -0.021), (0, -0.0209), (0, -0.0212), (0, -0.0211), (0, -0.0214), (0, -0.0215), (0, -0.0213), (0, -0.0217), (0, -0.0216), (0, -0.0219), (0, -0.0218), (0, -0.0221), (0, -0.022), (0, -0.0223), (0, -0.0222), (0, -0.0225), (0, -0.0224), (0, -0.0227), (0, -0.0226), (0, -0.0229), (0, -0.0228), (0, -0.023), (0, -0.0231), (0, -0.0232), (0, -0.0234), (0, -0.0233), (0, -0.0236), (0, -0.0235), (0, -0.0238), (0, -0.0237), (0, -0.024), (0, -0.0239), (0, -0.0242), (0, -0.0241), (0, -0.0244), (0, -0.0243), (0, -0.0245), (0, -0.0246), (0, -0.0247), (0, -0.0248), (0, -0.0249), (0, -0.025), (0, -0.0251), (0, -0.0252), (0, -0.0253), (0, -0.0254), (0, -0.0255), (0, -0.0256), (0, -0.0257), (0, -0.0258), (0, -0.0259), (0, -0.026)], [(0, -0.05), (0, -0.0504), (0, -0.0499), (0, -0.0507), (0, -0.0501), (0, -0.0509), (0, -0.0513), (0, -0.0505), (0, -0.051), (0, -0.0508), (0, -0.0517), (0, -0.0519), (0, -0.0516), (0, -0.052), (0, -0.0524), (0, -0.0525), (0, -0.0526), (0, -0.0528), (0, -0.0529), (0, -0.0533), (0, -0.0538), (0, -0.0535), (0, -0.0532), (0, -0.0542), (0, -0.0543), (0, -0.0546), (0, -0.054), (0, -0.055), (0, -0.0556), (0, -0.0545), (0, -0.056), (0, -0.0554), (0, -0.0567), (0, -0.0563), (0, -0.0571), (0, -0.0572), (0, -0.0576), (0, -0.0579), (0, -0.058), (0, -0.0584), (0, -0.0581), (0, -0.0588), (0, -0.0589), (0, -0.0591), (0, -0.0593), (0, -0.0596), (0, -0.0595), (0, -0.0601), (0, -0.0613), (0, -0.0614)], [(0, -0.001), (0, -0.0012), (0, -0.0017), (0, -0.0016), (0, -0.0013), (0, -0.0011), (0, -0.002), (0, -0.0018), (0, -0.0015), (0, -0.0014), (0, -0.0019), (0, -0.0021), (0, -0.0022), (0, -0.0023), (0, -0.0009), (0, -0.0024), (0, -0.0025), (0, -0.0026), (0, -0.0027), (0, -0.0028), (0, -0.0029), (0, -0.003), (0, -0.0031), (0, -0.0032), (0, -0.0033), (0, -0.0034), (0, -0.0035), (0, -0.0036), (0, -0.0037), (0, -0.0038), (0, -0.0039), (0, -0.004), (0, -0.0041), (0, -0.0042), (0, -0.0043), (0, -0.0044), (0, -0.0045), (0, -0.0046), (0, -0.0047), (0, -0.0048), (0, -0.0049), (0, -0.005), (0, -0.0051), (0, -0.0052), (0, -0.0053), (0, -0.0054), (0, -0.0055), (0, -0.0056), (0, -0.0057), (0, -0.0058), (0, -0.0059), (0, -0.006), (0, -0.0061), (0, -0.0062), (0, -0.0063), (0, -0.0064), (0, -0.0065), (0, -0.0066), (0, -0.0067), (0, -0.0068), (0, -0.0069)], [(0, -0.033), (0, -0.0332), (0, -0.0331), (0, -0.0334), (0, -0.0333), (0, -0.0336), (0, -0.0337), (0, -0.0335), (0, -0.0338), (0, -0.034), (0, -0.0339), (0, -0.0342), (0, -0.0343), (0, -0.0341), (0, -0.0344), (0, -0.0345), (0, -0.0346), (0, -0.0347), (0, -0.0348), (0, -0.035), (0, -0.0349), (0, -0.0351), (0, -0.0352), (0, -0.0353), (0, -0.0354), (0, -0.0355), (0, -0.0357), (0, -0.0356), (0, -0.0358), (0, -0.0359), (0, -0.0361), (0, -0.036), (0, -0.0363), (0, -0.0362), (0, -0.0365), (0, -0.0366), (0, -0.0364), (0, -0.0368), (0, -0.0369), (0, -0.0372), (0, -0.0371), (0, -0.0367), (0, -0.0375), (0, -0.0373), (0, -0.0376), (0, -0.0374), (0, -0.0378), (0, -0.038), (0, -0.0379), (0, -0.0377), (0, -0.0382), (0, -0.0384), (0, -0.0383), (0, -0.0386), (0, -0.0381), (0, -0.0387), (0, -0.0389), (0, -0.0385), (0, -0.039), (0, -0.0391), (0, -0.0388), (0, -0.0392), (0, -0.0395), (0, -0.0393), (0, -0.0397), (0, -0.0398), (0, -0.0396), (0, -0.0399), (0, -0.0402), (0, -0.0401), (0, -0.0403), (0, -0.0406), (0, -0.0407)], [(0, 0.0232), (0, 0.0233), (0, 0.0234), (0, 0.0235), (0, 0.0237), (0, 0.0236), (0, 0.0238), (0, 0.0239), (0, 0.024), (0, 0.0241), (0, 0.0242), (0, 0.0243), (0, 0.0244), (0, 0.0245), (0, 0.0247), (0, 0.0248), (0, 0.0246), (0, 0.0249), (0, 0.025), (0, 0.0251), (0, 0.0253), (0, 0.0252), (0, 0.0255), (0, 0.0254), (0, 0.0257), (0, 0.0256), (0, 0.0259), (0, 0.026), (0, 0.0258), (0, 0.0261), (0, 0.0262), (0, 0.0264), (0, 0.0265), (0, 0.0263), (0, 0.0267), (0, 0.0268), (0, 0.0266), (0, 0.027), (0, 0.0269), (0, 0.0271), (0, 0.0272), (0, 0.0274), (0, 0.0273), (0, 0.0276), (0, 0.0275), (0, 0.0277), (0, 0.0278), (0, 0.0279), (0, 0.0281), (0, 0.0282), (0, 0.0283), (0, 0.0284), (0, 0.0285), (0, 0.0286), (0, 0.0287), (0, 0.0288), (0, 0.0289), (0, 0.029), (0, 0.0291), (0, 0.0292), (0, 0.0293), (0, 0.0294)], [(0, 0.011), (0, 0.0112), (0, 0.0113), (0, 0.0111), (0, 0.0115), (0, 0.0114), (0, 0.0117), (0, 0.0116), (0, 0.0118), (0, 0.0119), (0, 0.0121), (0, 0.0122), (0, 0.0123), (0, 0.0124), (0, 0.012), (0, 0.0126), (0, 0.0125), (0, 0.0128), (0, 0.0127), (0, 0.013), (0, 0.0129), (0, 0.0131), (0, 0.0133), (0, 0.0132), (0, 0.0135), (0, 0.0134), (0, 0.0136), (0, 0.0137), (0, 0.0138), (0, 0.014), (0, 0.0139), (0, 0.0142), (0, 0.0141), (0, 0.0143), (0, 0.0144), (0, 0.0145), (0, 0.0146), (0, 0.0147), (0, 0.0148), (0, 0.0149), (0, 0.015), (0, 0.0151), (0, 0.0153), (0, 0.0152), (0, 0.0154), (0, 0.0155), (0, 0.0156), (0, 0.0157), (0, 0.0158), (0, 0.0159), (0, 0.016), (0, 0.0161), (0, 0.0162), (0, 0.0163), (0, 0.0164), (0, 0.0165), (0, 0.0166), (0, 0.0167), (0, 0.0168), (0, 0.0169)], [(0, -0.007), (0, -0.0071), (0, -0.0072), (0, -0.0073), (0, -0.0074), (0, -0.0075), (0, -0.0076), (0, -0.0077), (0, -0.0078), (0, -0.0079), (0, -0.0081), (0, -0.008), (0, -0.0082), (0, -0.0083), (0, -0.0084), (0, -0.0085), (0, -0.0086), (0, -0.0087), (0, -0.0088), (0, -0.0089), (0, -0.009), (0, -0.0091), (0, -0.0092), (0, -0.0093), (0, -0.0094), (0, -0.0095), (0, -0.0096), (0, -0.0097), (0, -0.0098), (0, -0.0099), (0, -0.01), (0, -0.0101), (0, -0.0102), (0, -0.0103), (0, -0.0104), (0, -0.0105), (0, -0.0106), (0, -0.0107), (0, -0.0108), (0, -0.0109), (0, -0.011), (0, -0.0111), (0, -0.0112), (0, -0.0113), (0, -0.0114), (0, -0.0115), (0, -0.0116), (0, -0.0117), (0, -0.0118), (0, -0.0119), (0, -0.012), (0, -0.0121), (0, -0.0122), (0, -0.0123), (0, -0.0124), (0, -0.0125), (0, -0.0126), (0, -0.0127), (0, -0.0128), (0, -0.0129), (0, -0.013), (0, -0.0131)], [(0, 0.0051), (0, 0.0052), (0, 0.0053), (0, 0.0055), (0, 0.0054), (0, 0.0057), (0, 0.0056), (0, 0.0059), (0, 0.0058), (0, 0.0061), (0, 0.006), (0, 0.0062), (0, 0.0063), (0, 0.0064), (0, 0.0065), (0, 0.0066), (0, 0.0068), (0, 0.0069), (0, 0.0067), (0, 0.007), (0, 0.0072), (0, 0.0071), (0, 0.0073), (0, 0.0074), (0, 0.0075), (0, 0.0076), (0, 0.0077), (0, 0.0078), (0, 0.0079), (0, 0.008), (0, 0.0081), (0, 0.0082), (0, 0.0083), (0, 0.0084), (0, 0.0085), (0, 0.0086), (0, 0.0087), (0, 0.0088), (0, 0.0089), (0, 0.009), (0, 0.0091), (0, 0.0092), (0, 0.0093), (0, 0.0094), (0, 0.0095), (0, 0.0096), (0, 0.0097), (0, 0.0098), (0, 0.0099), (0, 0.01), (0, 0.0101), (0, 0.0102), (0, 0.0103), (0, 0.0104), (0, 0.0105), (0, 0.0106), (0, 0.0107), (0, 0.0108), (0, 0.0109)], [(0, 0.0369), (0, 0.0371), (0, 0.0367), (0, 0.037), (0, 0.0375), (0, 0.0373), (0, 0.0376), (0, 0.0372), (0, 0.0377), (0, 0.038), (0, 0.0379), (0, 0.0374), (0, 0.0381), (0, 0.0382), (0, 0.0378), (0, 0.0384), (0, 0.0386), (0, 0.0387), (0, 0.0385), (0, 0.0389), (0, 0.0391), (0, 0.039), (0, 0.0392), (0, 0.0394), (0, 0.0395), (0, 0.0396), (0, 0.0398), (0, 0.0399), (0, 0.04), (0, 0.0401), (0, 0.0404), (0, 0.0405), (0, 0.0406), (0, 0.0407), (0, 0.0408), (0, 0.0409), (0, 0.041), (0, 0.0411), (0, 0.0412), (0, 0.0414), (0, 0.0415), (0, 0.0416), (0, 0.0417), (0, 0.0419), (0, 0.042), (0, 0.0421), (0, 0.0422), (0, 0.0426), (0, 0.0428), (0, 0.0427), (0, 0.043), (0, 0.0429), (0, 0.0431), (0, 0.0433), (0, 0.0434), (0, 0.0435), (0, 0.0436), (0, 0.0438), (0, 0.0437), (0, 0.044), (0, 0.0442), (0, 0.0444)]] namedCluster = {} i = 0 orderCluster = {} for clu in cluster: i = i + 1 namedCluster[i] = clu orderCluster[i] = [min(clu)[1],max(clu)[1]] "The number of rows to be committed for each interval" committedInterval=0 for row in rows: for nc in namedCluster: if (0,row[5]) in namedCluster[nc]: newRow = list(row) newRow.append(nc) "update the trend type into Database" UpdateEnrichedData(con, committedInterval, newRow) finalClusterRecord.append(newRow) con.commit() finalOrderCluster[stock] = orderCluster print stock, " Done" "Write the type range into a file" trendRangeFile = common.get_configuration("model", "TREND_RANGE_FILE") dataStr = json.dumps(finalOrderCluster) with open(trendRangeFile,"w") as output: output.write(dataStr) "Write the training data into file" trendSetRecordFile = common.get_configuration("training", "TRAINING_TREND_RECORDS") dataStr = json.dumps(finalClusterRecord) with open(trendSetRecordFile,"w") as output: output.write(dataStr) if con: con.close()
def get_data(inf='ders1.txt', nclusters=3): dct = {} data = [] infile = open(inf) for line in infile: root, vals = eval(line) dct[vals] = root data.append(vals) infile.close() cl = KMeansClustering(data) return cl.getclusters(nclusters), dct
def testPointDoubling(self): "test for bug #1604868" data = [(18, 13), (15, 12), (17, 12), (18, 12), (19, 12), (16, 11), (18, 11), (19, 10), (0, 0), (1, 4), (1, 2), (2, 3), (4, 1), (4, 3), (5, 2), (6, 1)] cl = KMeansClustering(data) clusters = cl.getclusters(2) expected = [[(18, 13), (15, 12), (17, 12), (18, 12), (19, 12), (16, 11), (18, 11), (19, 10)], [(0, 0), (1, 4), (1, 2), (2, 3), (4, 1), (5, 2), (6, 1), (4, 3)]] self.assertTrue(compare_list(clusters, expected), "Elements differ!\n%s\n%s" % (clusters, expected))
def doClusterByWords(k, question): clusteredIdeas = [] try: vectors, texts, phrases, ids = computeBagsOfWords(question) cl = KMeansClustering(vectors) clusterData = cl.getclusters(k) clusters = clusterData["clusters"] ideaIndices = clusterData["indices"] # Delete existing clusters from database (including those created by other algorithms) Cluster.deleteAllClusters(question) clusterNum = 0 for cluster in clusters: clusterObj = Cluster.createCluster( "Cluster #" + str(clusterNum + 1), clusterNum, question, CLUSTER_BY_WORDS ) entry = [] ideas = [] if type(cluster) is tuple: # Cluster may only have a single tuple instead of a collection of them index = ideaIndices[clusterNum][0] text = texts[index] phrase = phrases[index] idea_id = ids[index] idea = Idea.assignCluster(idea_id, clusterObj) ideas.append(idea.toDict()) else: j = 0 for vector in cluster: index = ideaIndices[clusterNum][j] text = texts[index] phrase = phrases[index] idea_id = ids[index] entry.append([text, phrase]) idea = Idea.assignCluster(idea_id, clusterObj) ideas.append(idea.toDict()) j += 1 clusteredIdeas.append({"name": clusterObj.text, "ideas": ideas}) clusterNum += 1 # Clean up any existing tags and cluster assignments since clusters have been reformed ClusterTag.deleteAllTags(question) ClusterAssignment.deleteAllClusterAssignments(question) except: clusteredIdeas = [] raise return clusteredIdeas
def testPointDoubling(self): "test for bug #1604868" data = [(18, 13), (15, 12), (17, 12), (18, 12), (19, 12), (16, 11), (18, 11), (19, 10), (0, 0), (1, 4), (1, 2), (2, 3), (4, 1), (4, 3), (5, 2), (6, 1)] cl = KMeansClustering(data) clusters = cl.getclusters(2) expected = [[(18, 13), (15, 12), (17, 12), (18, 12), (19, 12), (16, 11), (18, 11), (19, 10)], [(0, 0), (1, 4), (1, 2), (2, 3), (4, 1), (5, 2), (6, 1), (4, 3)]] self.assertTrue(compare_list( clusters, expected), "Elements differ!\n%s\n%s" % (clusters, expected))
def Kmeanscluster(self, frontiers, no_robots): from cluster import KMeansClustering self.freeclusters = [] cl = KMeansClustering(frontiers) clusters = cl.getclusters(no_robots) #print clusters centroids = [] for i in range(no_robots): gen = centroidnp(len(clusters[i]), clusters[i]) centroids.append(gen) genagain = Cluster.Cluster(centroids[i][0], centroids[i][1]) genagain.occupied = False self.freeclusters.append(genagain) return self.freeclusters, clusters
def testNonsenseCluster(self): """ Test that asking for more clusters than data-items available raises an error """ cl = KMeansClustering([876, 123], distance=lambda x, y: abs(x - y)) self.assertRaises(ClusteringError, cl.getclusters, 5)
def get_clusters(self, input_list, number_of_clusters=3): # 1.) Reshape for KMeans input_list = [(0, x) for x in input_list] # 2.) Magic clusterer = KMeansClustering(input_list) clusters = clusterer.getclusters(number_of_clusters) # 3.) Reshape Back to Normal and Sort Highest To Lowest temp = [] for index, cluster in enumerate(clusters): x1 = [x[1] for x in cluster] temp.append(sorted(x1, reverse=True)) clusters = sorted(temp, reverse=True, key=lambda x: x[0]) return clusters
def Kmeanscluster(self, frontiers, no_robots): from cluster import KMeansClustering self.freeclusters = [] # Perform clustering cl = KMeansClustering(frontiers) clusters = cl.getclusters(no_robots) # Compute centroids centroids = [] for i in range(no_robots): # Cheap hack (If the algorithm returns only a tuple convert it into a list, # because centroidnp takes as input only a list) if type(clusters[i]) is tuple: clusters[i] = [clusters[i]] temp = centroidnp(clusters[i]) centroids.append(temp) # Store each centroid in its corresponding Cluster object clusterObject = Cluster.Cluster(centroids[i][0], centroids[i][1]) clusterObject.occupied = False self.freeclusters.append(clusterObject) return self.freeclusters, clusters
def cluster_trajectories(): json_data = open('static/datasets/dj-mag-top-100.json') data = json.load(json_data) uniques_djs = set() for year in range(1997, 2014): for name in data[str(year)]: uniques_djs.add(name) dj_vectors = [] dj_vector_map = {} for dj in uniques_djs: trajectory = () for year in range(1997, 2014): if dj in data[str(year)]: trajectory += (data[str(year)].index(dj),) else: trajectory += (-999,) dj_vectors.append(trajectory) dj_vector_map[trajectory] = dj cl = KMeansClustering(dj_vectors) clusters = cl.getclusters(10) dj_clusters = [] for cluster in clusters: dj_group = [] for vector in cluster: dj_group.append(dj_vector_map[vector]) dj_clusters.append(dj_group) print json.dumps(dj_clusters) #Close file stream json_data.close()
def testClusterLen0(self): "Testing if clustering an empty set, returns an empty set" cl = KMeansClustering([]) self.assertEqual([], cl.getclusters(2)) self.assertEqual([], cl.getclusters(7))
def testClusterLen1(self): "Testing that a search space of length 1 returns only one cluster" cl = KMeansClustering([876]) self.assertEqual([876], cl.getclusters(2)) self.assertEqual([876], cl.getclusters(5))
def testNumpyRandom(self): data = numpy.random.rand(500, 2) cl = KMeansClustering(data, lambda p0, p1: ( p0[0] - p1[0]) ** 2 + (p0[1] - p1[1]) ** 2, numpy.array_equal) cl.getclusters(10)
((lat, lon), f) = coords_freqs[label] expanded_coords.append((label, [(lon, lat)] * f)) # Flip lat/lon for Google Earth # No need to clutter the map with unnecessary placemarks... kml_items = [{'label': label, 'coords': '%s,%s' % coords[0]} for (label, coords) in expanded_coords] # It could also be interesting to include names of your contacts on the map for display for item in kml_items: item['contacts'] = '\n'.join(['%s %s.' % (ec.first_name, ec.last_name[0]) for ec in extended_connections if ec.location == item['label']]) cl = KMeansClustering([coords for (label, coords_list) in expanded_coords for coords in coords_list]) centroids = [{'label': 'CENTROID', 'coords': '%s,%s' % centroid(c)} for c in cl.getclusters(K)] kml_items.extend(centroids) kml = createKML(kml_items) if not os.path.isdir('out'): os.mkdir('out') f = open("out/" + OUT, 'w') f.write(kml) f.close() print >> sys.stderr, 'Data pickled to out/' + OUT
def doClusterBySimilarity(k, question, includeUnclustered=False): clusteredIdeas = [] similarityDict = createSimilarityDict(question) if similarityDict: # create array of tuples containing similarity counts for each item pair # (e.g., # of users who said item1 and item2 were the same) countVectors = [] rowIds = [] sortedKeys = sorted(similarityDict.iterkeys()) for idea1_key in sortedKeys: rowCounts = [] for idea2_key in sortedKeys: # if same idea, value is 1 (e.g., idea1_key == idea2_key) # if idea1 and idea2 were never marked as similar, value is 0 # otherwise, value is # of users who marked idea pair as similar # TODO: for k-means clustering, what value should be used when idea1_key == idea2_key count = ( similarityDict[idea1_key]["counts"][idea2_key] if idea2_key in similarityDict[idea1_key]["counts"] else (1 if idea1_key == idea2_key else 0) ) count = 1 if count > 0 else 0 rowCounts.append(count) rowIds.append(similarityDict[idea1_key]["idea"]["id"]) countVectors.append(tuple(rowCounts)) # FOR DEBUGGING: print count vectors # row = 0 # for idea_key in similarityDict: # idea = similarityDict[idea_key]["idea"] # helpers.log("row={0},{1}:\t\t{2}".format(row, idea["text"], countVectors[row])) # row += 1 try: cl = KMeansClustering(countVectors) clusterData = cl.getclusters(k) clusters = clusterData["clusters"] ideaIndices = clusterData["indices"] # Delete existing clusters from database (including those created by other algorithms) Cluster.deleteAllClusters(question) clusterNum = 0 for cluster in clusters: clusterObj = Cluster.createCluster( "Cluster #" + str(clusterNum + 1), clusterNum, question, CLUSTER_BY_SIMILARITY ) ideas = [] i = 0 for vector in cluster: idea_index = ideaIndices[clusterNum][i] idea_id = rowIds[idea_index] # idea = similarityDict[idea_key]["idea"] idea = Idea.assignCluster(idea_id, clusterObj) ideas.append(idea.toDict()) i += 1 clusteredIdeas.append({"name": clusterObj.text, "ideas": ideas}) clusterNum += 1 # Clean up any existing tags and cluster assignments since clusters have been reformed ClusterTag.deleteAllTags(question) ClusterAssignment.deleteAllClusterAssignments(question) except ClusteringError: clusteredIdeas = [] raise # TODO: need more efficient way to get unclustered ideas unclusteredIdeas = [] if includeUnclustered: compared = {} for idea_key in similarityDict: compared[idea_key] = similarityDict[idea_key]["idea"] for idea in Idea.all().filter("question =", question): idea_key = str(idea.key().id()) if idea_key not in compared: unclusteredIdeas.append(idea.toDict()) if len(unclusteredIdeas) > 0: clusteredIdeas.append({"name": "Unclustered", "ideas": unclusteredIdeas}) return clusteredIdeas
def clusterSet(traningEndDate): con = common.getDBConnection() cur = con.cursor() finalClusterRecord = [] stockList = [ "MERVAL", "MEXBOL", "CHILE65", "BVPSBVPS", "COLCAP", "CRSMBCT", "IBOV", "IGBVL" ] finalOrderCluster = {} for stock in stockList: sql = "select embers_id,sub_sequence,date,last_price,one_day_change,round(one_day_change/(last_price-one_day_change),4),stock_index from t_daily_stockindex where stock_index=? and date<=?" cur.execute(sql, (stock, traningEndDate)) rows = cur.fetchall() changes = [row[5] for row in rows] fdist = nltk.FreqDist(changes) clusterS = [(0, x) for x in fdist.keys()] print "StartTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") c1 = KMeansClustering(clusterS) print "MiddleTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") cluster = c1.getclusters(20) # cluster = [[(0, 0.0862), (0, 0.088), (0, 0.0914), (0, 0.094), (0, 0.0957), (0, 0.097), (0, 0.1017), (0, 0.1024), (0, 0.0774), (0, 0.0882), (0, 0.0783), (0, 0.11), (0, 0.0807), (0, 0.0813), (0, 0.1367), (0, 0.0831), (0, 0.0836), (0, 0.0855), (0, 0.0879), (0, 0.0912), (0, 0.0763), (0, 0.1046), (0, 0.0784), (0, 0.0815), (0, 0.1464), (0, 0.1987), (0, 0.1053), (0, 0.1101), (0, 0.1176), (0, 0.0868), (0, 0.1342), (0, 0.1466), (0, 0.0761), (0, 0.0772)], [(0, -0.0001), (0, 0.0), (0, 0.0001), (0, -0.0002), (0, -0.0003), (0, -0.0004), (0, -0.0005), (0, -0.0006), (0, 0.0002), (0, 0.0003), (0, 0.0004), (0, 0.0005), (0, 0.0006), (0, 0.0007), (0, 0.0008), (0, 0.0009), (0, 0.001), (0, 0.0011), (0, 0.0012), (0, 0.0013), (0, 0.0014), (0, 0.0015), (0, 0.0016), (0, 0.0017), (0, 0.0018), (0, 0.0019), (0, 0.002), (0, 0.0021), (0, 0.0022), (0, 0.0023), (0, 0.0024), (0, 0.0025), (0, 0.0026), (0, 0.0027), (0, 0.0028), (0, 0.0029), (0, 0.003), (0, 0.0031), (0, 0.0032), (0, 0.0033), (0, 0.0034), (0, 0.0035), (0, 0.0036), (0, 0.0037), (0, 0.0038), (0, 0.0039), (0, 0.004), (0, 0.0041), (0, 0.0042), (0, 0.0043), (0, 0.0044), (0, 0.0045), (0, 0.0046), (0, 0.0047), (0, 0.0048), (0, 0.0049), (0, 0.005), (0, -0.0007), (0, -0.0008)], [(0, 0.0297), (0, 0.0296), (0, 0.0298), (0, 0.0299), (0, 0.0301), (0, 0.03), (0, 0.0303), (0, 0.0302), (0, 0.0304), (0, 0.0305), (0, 0.0306), (0, 0.0308), (0, 0.0307), (0, 0.0309), (0, 0.031), (0, 0.0311), (0, 0.0313), (0, 0.0314), (0, 0.0312), (0, 0.0316), (0, 0.0315), (0, 0.0317), (0, 0.0318), (0, 0.032), (0, 0.0319), (0, 0.0322), (0, 0.0321), (0, 0.0324), (0, 0.0323), (0, 0.0326), (0, 0.0325), (0, 0.0328), (0, 0.033), (0, 0.0327), (0, 0.0332), (0, 0.0331), (0, 0.0333), (0, 0.0329), (0, 0.0335), (0, 0.0336), (0, 0.0334), (0, 0.0337), (0, 0.0338), (0, 0.0339), (0, 0.034), (0, 0.0341), (0, 0.0342), (0, 0.0343), (0, 0.0344), (0, 0.0345), (0, 0.0346), (0, 0.0348), (0, 0.0349), (0, 0.035), (0, 0.0351), (0, 0.0352), (0, 0.0355), (0, 0.0356), (0, 0.0358), (0, 0.0357), (0, 0.0359), (0, 0.036), (0, 0.0361), (0, 0.0362), (0, 0.0363), (0, 0.0365)], [(0, 0.0559), (0, 0.0564), (0, 0.0568), (0, 0.0571), (0, 0.0573), (0, 0.0579), (0, 0.0578), (0, 0.0581), (0, 0.0587), (0, 0.0589), (0, 0.0595), (0, 0.0591), (0, 0.0594), (0, 0.0604), (0, 0.0598), (0, 0.06), (0, 0.0602), (0, 0.0609), (0, 0.0612), (0, 0.059), (0, 0.0606), (0, 0.0614), (0, 0.0619), (0, 0.0625), (0, 0.0628), (0, 0.0615), (0, 0.0637), (0, 0.0633), (0, 0.0634), (0, 0.0636), (0, 0.0654), (0, 0.0658), (0, 0.0659), (0, 0.0669), (0, 0.0667), (0, 0.0664), (0, 0.067), (0, 0.0675), (0, 0.0673), (0, 0.0676), (0, 0.0686), (0, 0.07), (0, 0.0697), (0, 0.0709), (0, 0.0716), (0, 0.0717), (0, 0.0738), (0, 0.0747)], [(0, -0.0133), (0, -0.0132), (0, -0.0135), (0, -0.0134), (0, -0.0137), (0, -0.0138), (0, -0.0136), (0, -0.014), (0, -0.0139), (0, -0.0142), (0, -0.0143), (0, -0.0144), (0, -0.0141), (0, -0.0145), (0, -0.0146), (0, -0.0147), (0, -0.0148), (0, -0.0149), (0, -0.015), (0, -0.0151), (0, -0.0152), (0, -0.0153), (0, -0.0154), (0, -0.0155), (0, -0.0156), (0, -0.0157), (0, -0.0158), (0, -0.0159), (0, -0.016), (0, -0.0161), (0, -0.0162), (0, -0.0163), (0, -0.0164), (0, -0.0165), (0, -0.0166), (0, -0.0167), (0, -0.0168), (0, -0.0169), (0, -0.017), (0, -0.0171), (0, -0.0172), (0, -0.0173), (0, -0.0174), (0, -0.0175), (0, -0.0176), (0, -0.0177), (0, -0.0178), (0, -0.0179), (0, -0.018), (0, -0.0181), (0, -0.0182), (0, -0.0183), (0, -0.0184), (0, -0.0185), (0, -0.0186), (0, -0.0187), (0, -0.0188), (0, -0.0189), (0, -0.019), (0, -0.0191), (0, -0.0192), (0, -0.0193), (0, -0.0194), (0, -0.0195)], [(0, 0.0448), (0, 0.0451), (0, 0.0452), (0, 0.0446), (0, 0.0447), (0, 0.0456), (0, 0.045), (0, 0.0455), (0, 0.0462), (0, 0.0459), (0, 0.0461), (0, 0.0466), (0, 0.046), (0, 0.0467), (0, 0.0445), (0, 0.0458), (0, 0.0464), (0, 0.0477), (0, 0.0463), (0, 0.0472), (0, 0.0478), (0, 0.0457), (0, 0.0476), (0, 0.0481), (0, 0.0484), (0, 0.0488), (0, 0.0483), (0, 0.0487), (0, 0.0471), (0, 0.0482), (0, 0.0496), (0, 0.0474), (0, 0.0495), (0, 0.0485), (0, 0.0504), (0, 0.0505), (0, 0.0506), (0, 0.0501), (0, 0.0509), (0, 0.0508), (0, 0.051), (0, 0.0515), (0, 0.0516), (0, 0.052), (0, 0.0522), (0, 0.0524), (0, 0.053), (0, 0.0531), (0, 0.0534), (0, 0.0535), (0, 0.0536), (0, 0.0537), (0, 0.0538), (0, 0.0541), (0, 0.0542), (0, 0.0545), (0, 0.0546), (0, 0.0548), (0, 0.055)], [(0, 0.0172), (0, 0.017), (0, 0.0173), (0, 0.0174), (0, 0.0171), (0, 0.0177), (0, 0.0175), (0, 0.0178), (0, 0.0179), (0, 0.0176), (0, 0.0181), (0, 0.018), (0, 0.0183), (0, 0.0182), (0, 0.0186), (0, 0.0185), (0, 0.0187), (0, 0.0184), (0, 0.0189), (0, 0.0188), (0, 0.019), (0, 0.0191), (0, 0.0192), (0, 0.0194), (0, 0.0193), (0, 0.0196), (0, 0.0195), (0, 0.0197), (0, 0.0199), (0, 0.0198), (0, 0.02), (0, 0.0201), (0, 0.0202), (0, 0.0204), (0, 0.0205), (0, 0.0206), (0, 0.0203), (0, 0.0208), (0, 0.0207), (0, 0.021), (0, 0.0209), (0, 0.0211), (0, 0.0213), (0, 0.0212), (0, 0.0214), (0, 0.0215), (0, 0.0216), (0, 0.0217), (0, 0.0218), (0, 0.0219), (0, 0.022), (0, 0.0221), (0, 0.0222), (0, 0.0223), (0, 0.0224), (0, 0.0225), (0, 0.0226), (0, 0.0227), (0, 0.0228), (0, 0.0229), (0, 0.023), (0, 0.0231)], [(0, -0.0408), (0, -0.041), (0, -0.0411), (0, -0.0412), (0, -0.0413), (0, -0.0415), (0, -0.0416), (0, -0.0417), (0, -0.0419), (0, -0.042), (0, -0.0423), (0, -0.0424), (0, -0.0418), (0, -0.0425), (0, -0.0428), (0, -0.043), (0, -0.0431), (0, -0.0432), (0, -0.0433), (0, -0.0434), (0, -0.0436), (0, -0.0438), (0, -0.0439), (0, -0.044), (0, -0.0442), (0, -0.0441), (0, -0.0446), (0, -0.0443), (0, -0.0448), (0, -0.0447), (0, -0.045), (0, -0.0449), (0, -0.0453), (0, -0.0451), (0, -0.0454), (0, -0.0455), (0, -0.0458), (0, -0.0456), (0, -0.0459), (0, -0.0463), (0, -0.0461), (0, -0.046), (0, -0.0464), (0, -0.0465), (0, -0.0467), (0, -0.0462), (0, -0.0466), (0, -0.0472), (0, -0.0469), (0, -0.0475), (0, -0.0473), (0, -0.0478), (0, -0.0477), (0, -0.0476), (0, -0.0482), (0, -0.0481), (0, -0.0483), (0, -0.0487), (0, -0.0488), (0, -0.049), (0, -0.0492), (0, -0.0494)], [(0, -0.0261), (0, -0.0262), (0, -0.0263), (0, -0.0264), (0, -0.0266), (0, -0.0265), (0, -0.0267), (0, -0.0268), (0, -0.0269), (0, -0.0271), (0, -0.027), (0, -0.0273), (0, -0.0272), (0, -0.0275), (0, -0.0274), (0, -0.0277), (0, -0.0278), (0, -0.0276), (0, -0.0279), (0, -0.0281), (0, -0.028), (0, -0.0283), (0, -0.0282), (0, -0.0284), (0, -0.0285), (0, -0.0286), (0, -0.0287), (0, -0.0288), (0, -0.0289), (0, -0.0291), (0, -0.0292), (0, -0.0293), (0, -0.029), (0, -0.0294), (0, -0.0295), (0, -0.0297), (0, -0.0296), (0, -0.0299), (0, -0.03), (0, -0.0301), (0, -0.0302), (0, -0.0298), (0, -0.0303), (0, -0.0304), (0, -0.0307), (0, -0.0305), (0, -0.0308), (0, -0.031), (0, -0.0309), (0, -0.0312), (0, -0.0311), (0, -0.0313), (0, -0.0315), (0, -0.0314), (0, -0.0316), (0, -0.0317), (0, -0.0319), (0, -0.0318), (0, -0.032), (0, -0.0321), (0, -0.0322), (0, -0.0323), (0, -0.0325), (0, -0.0326), (0, -0.0327), (0, -0.0328), (0, -0.0329)], [(0, -0.0619), (0, -0.0622), (0, -0.0627), (0, -0.064), (0, -0.0645), (0, -0.065), (0, -0.0653), (0, -0.0651), (0, -0.0659), (0, -0.0663), (0, -0.0665), (0, -0.066), (0, -0.0666), (0, -0.0674), (0, -0.0671), (0, -0.0684), (0, -0.0672), (0, -0.0691), (0, -0.0689), (0, -0.0692), (0, -0.0701), (0, -0.0698), (0, -0.0709), (0, -0.0715), (0, -0.0717), (0, -0.0722), (0, -0.0734), (0, -0.0741), (0, -0.0749), (0, -0.0763), (0, -0.0772), (0, -0.0758), (0, -0.0762), (0, -0.0787), (0, -0.0788), (0, -0.0759), (0, -0.0775), (0, -0.0808)], [(0, -0.0905), (0, -0.1081), (0, -0.1018), (0, -0.094), (0, -0.0937), (0, -0.0936), (0, -0.0927), (0, -0.0919), (0, -0.0863), (0, -0.1593), (0, -0.1245), (0, -0.0847), (0, -0.1215), (0, -0.1139), (0, -0.1099), (0, -0.1068), (0, -0.0868), (0, -0.0856), (0, -0.0854), (0, -0.0837), (0, -0.0822), (0, -0.0877), (0, -0.1241), (0, -0.1073), (0, -0.1065), (0, -0.1011), (0, -0.0835)], [(0, -0.0196), (0, -0.0198), (0, -0.0197), (0, -0.0199), (0, -0.02), (0, -0.0201), (0, -0.0202), (0, -0.0204), (0, -0.0203), (0, -0.0205), (0, -0.0206), (0, -0.0208), (0, -0.0207), (0, -0.021), (0, -0.0209), (0, -0.0212), (0, -0.0211), (0, -0.0214), (0, -0.0215), (0, -0.0213), (0, -0.0217), (0, -0.0216), (0, -0.0219), (0, -0.0218), (0, -0.0221), (0, -0.022), (0, -0.0223), (0, -0.0222), (0, -0.0225), (0, -0.0224), (0, -0.0227), (0, -0.0226), (0, -0.0229), (0, -0.0228), (0, -0.023), (0, -0.0231), (0, -0.0232), (0, -0.0234), (0, -0.0233), (0, -0.0236), (0, -0.0235), (0, -0.0238), (0, -0.0237), (0, -0.024), (0, -0.0239), (0, -0.0242), (0, -0.0241), (0, -0.0244), (0, -0.0243), (0, -0.0245), (0, -0.0246), (0, -0.0247), (0, -0.0248), (0, -0.0249), (0, -0.025), (0, -0.0251), (0, -0.0252), (0, -0.0253), (0, -0.0254), (0, -0.0255), (0, -0.0256), (0, -0.0257), (0, -0.0258), (0, -0.0259), (0, -0.026)], [(0, -0.05), (0, -0.0504), (0, -0.0499), (0, -0.0507), (0, -0.0501), (0, -0.0509), (0, -0.0513), (0, -0.0505), (0, -0.051), (0, -0.0508), (0, -0.0517), (0, -0.0519), (0, -0.0516), (0, -0.052), (0, -0.0524), (0, -0.0525), (0, -0.0526), (0, -0.0528), (0, -0.0529), (0, -0.0533), (0, -0.0538), (0, -0.0535), (0, -0.0532), (0, -0.0542), (0, -0.0543), (0, -0.0546), (0, -0.054), (0, -0.055), (0, -0.0556), (0, -0.0545), (0, -0.056), (0, -0.0554), (0, -0.0567), (0, -0.0563), (0, -0.0571), (0, -0.0572), (0, -0.0576), (0, -0.0579), (0, -0.058), (0, -0.0584), (0, -0.0581), (0, -0.0588), (0, -0.0589), (0, -0.0591), (0, -0.0593), (0, -0.0596), (0, -0.0595), (0, -0.0601), (0, -0.0613), (0, -0.0614)], [(0, -0.001), (0, -0.0012), (0, -0.0017), (0, -0.0016), (0, -0.0013), (0, -0.0011), (0, -0.002), (0, -0.0018), (0, -0.0015), (0, -0.0014), (0, -0.0019), (0, -0.0021), (0, -0.0022), (0, -0.0023), (0, -0.0009), (0, -0.0024), (0, -0.0025), (0, -0.0026), (0, -0.0027), (0, -0.0028), (0, -0.0029), (0, -0.003), (0, -0.0031), (0, -0.0032), (0, -0.0033), (0, -0.0034), (0, -0.0035), (0, -0.0036), (0, -0.0037), (0, -0.0038), (0, -0.0039), (0, -0.004), (0, -0.0041), (0, -0.0042), (0, -0.0043), (0, -0.0044), (0, -0.0045), (0, -0.0046), (0, -0.0047), (0, -0.0048), (0, -0.0049), (0, -0.005), (0, -0.0051), (0, -0.0052), (0, -0.0053), (0, -0.0054), (0, -0.0055), (0, -0.0056), (0, -0.0057), (0, -0.0058), (0, -0.0059), (0, -0.006), (0, -0.0061), (0, -0.0062), (0, -0.0063), (0, -0.0064), (0, -0.0065), (0, -0.0066), (0, -0.0067), (0, -0.0068), (0, -0.0069)], [(0, -0.033), (0, -0.0332), (0, -0.0331), (0, -0.0334), (0, -0.0333), (0, -0.0336), (0, -0.0337), (0, -0.0335), (0, -0.0338), (0, -0.034), (0, -0.0339), (0, -0.0342), (0, -0.0343), (0, -0.0341), (0, -0.0344), (0, -0.0345), (0, -0.0346), (0, -0.0347), (0, -0.0348), (0, -0.035), (0, -0.0349), (0, -0.0351), (0, -0.0352), (0, -0.0353), (0, -0.0354), (0, -0.0355), (0, -0.0357), (0, -0.0356), (0, -0.0358), (0, -0.0359), (0, -0.0361), (0, -0.036), (0, -0.0363), (0, -0.0362), (0, -0.0365), (0, -0.0366), (0, -0.0364), (0, -0.0368), (0, -0.0369), (0, -0.0372), (0, -0.0371), (0, -0.0367), (0, -0.0375), (0, -0.0373), (0, -0.0376), (0, -0.0374), (0, -0.0378), (0, -0.038), (0, -0.0379), (0, -0.0377), (0, -0.0382), (0, -0.0384), (0, -0.0383), (0, -0.0386), (0, -0.0381), (0, -0.0387), (0, -0.0389), (0, -0.0385), (0, -0.039), (0, -0.0391), (0, -0.0388), (0, -0.0392), (0, -0.0395), (0, -0.0393), (0, -0.0397), (0, -0.0398), (0, -0.0396), (0, -0.0399), (0, -0.0402), (0, -0.0401), (0, -0.0403), (0, -0.0406), (0, -0.0407)], [(0, 0.0232), (0, 0.0233), (0, 0.0234), (0, 0.0235), (0, 0.0237), (0, 0.0236), (0, 0.0238), (0, 0.0239), (0, 0.024), (0, 0.0241), (0, 0.0242), (0, 0.0243), (0, 0.0244), (0, 0.0245), (0, 0.0247), (0, 0.0248), (0, 0.0246), (0, 0.0249), (0, 0.025), (0, 0.0251), (0, 0.0253), (0, 0.0252), (0, 0.0255), (0, 0.0254), (0, 0.0257), (0, 0.0256), (0, 0.0259), (0, 0.026), (0, 0.0258), (0, 0.0261), (0, 0.0262), (0, 0.0264), (0, 0.0265), (0, 0.0263), (0, 0.0267), (0, 0.0268), (0, 0.0266), (0, 0.027), (0, 0.0269), (0, 0.0271), (0, 0.0272), (0, 0.0274), (0, 0.0273), (0, 0.0276), (0, 0.0275), (0, 0.0277), (0, 0.0278), (0, 0.0279), (0, 0.0281), (0, 0.0282), (0, 0.0283), (0, 0.0284), (0, 0.0285), (0, 0.0286), (0, 0.0287), (0, 0.0288), (0, 0.0289), (0, 0.029), (0, 0.0291), (0, 0.0292), (0, 0.0293), (0, 0.0294)], [(0, 0.011), (0, 0.0112), (0, 0.0113), (0, 0.0111), (0, 0.0115), (0, 0.0114), (0, 0.0117), (0, 0.0116), (0, 0.0118), (0, 0.0119), (0, 0.0121), (0, 0.0122), (0, 0.0123), (0, 0.0124), (0, 0.012), (0, 0.0126), (0, 0.0125), (0, 0.0128), (0, 0.0127), (0, 0.013), (0, 0.0129), (0, 0.0131), (0, 0.0133), (0, 0.0132), (0, 0.0135), (0, 0.0134), (0, 0.0136), (0, 0.0137), (0, 0.0138), (0, 0.014), (0, 0.0139), (0, 0.0142), (0, 0.0141), (0, 0.0143), (0, 0.0144), (0, 0.0145), (0, 0.0146), (0, 0.0147), (0, 0.0148), (0, 0.0149), (0, 0.015), (0, 0.0151), (0, 0.0153), (0, 0.0152), (0, 0.0154), (0, 0.0155), (0, 0.0156), (0, 0.0157), (0, 0.0158), (0, 0.0159), (0, 0.016), (0, 0.0161), (0, 0.0162), (0, 0.0163), (0, 0.0164), (0, 0.0165), (0, 0.0166), (0, 0.0167), (0, 0.0168), (0, 0.0169)], [(0, -0.007), (0, -0.0071), (0, -0.0072), (0, -0.0073), (0, -0.0074), (0, -0.0075), (0, -0.0076), (0, -0.0077), (0, -0.0078), (0, -0.0079), (0, -0.0081), (0, -0.008), (0, -0.0082), (0, -0.0083), (0, -0.0084), (0, -0.0085), (0, -0.0086), (0, -0.0087), (0, -0.0088), (0, -0.0089), (0, -0.009), (0, -0.0091), (0, -0.0092), (0, -0.0093), (0, -0.0094), (0, -0.0095), (0, -0.0096), (0, -0.0097), (0, -0.0098), (0, -0.0099), (0, -0.01), (0, -0.0101), (0, -0.0102), (0, -0.0103), (0, -0.0104), (0, -0.0105), (0, -0.0106), (0, -0.0107), (0, -0.0108), (0, -0.0109), (0, -0.011), (0, -0.0111), (0, -0.0112), (0, -0.0113), (0, -0.0114), (0, -0.0115), (0, -0.0116), (0, -0.0117), (0, -0.0118), (0, -0.0119), (0, -0.012), (0, -0.0121), (0, -0.0122), (0, -0.0123), (0, -0.0124), (0, -0.0125), (0, -0.0126), (0, -0.0127), (0, -0.0128), (0, -0.0129), (0, -0.013), (0, -0.0131)], [(0, 0.0051), (0, 0.0052), (0, 0.0053), (0, 0.0055), (0, 0.0054), (0, 0.0057), (0, 0.0056), (0, 0.0059), (0, 0.0058), (0, 0.0061), (0, 0.006), (0, 0.0062), (0, 0.0063), (0, 0.0064), (0, 0.0065), (0, 0.0066), (0, 0.0068), (0, 0.0069), (0, 0.0067), (0, 0.007), (0, 0.0072), (0, 0.0071), (0, 0.0073), (0, 0.0074), (0, 0.0075), (0, 0.0076), (0, 0.0077), (0, 0.0078), (0, 0.0079), (0, 0.008), (0, 0.0081), (0, 0.0082), (0, 0.0083), (0, 0.0084), (0, 0.0085), (0, 0.0086), (0, 0.0087), (0, 0.0088), (0, 0.0089), (0, 0.009), (0, 0.0091), (0, 0.0092), (0, 0.0093), (0, 0.0094), (0, 0.0095), (0, 0.0096), (0, 0.0097), (0, 0.0098), (0, 0.0099), (0, 0.01), (0, 0.0101), (0, 0.0102), (0, 0.0103), (0, 0.0104), (0, 0.0105), (0, 0.0106), (0, 0.0107), (0, 0.0108), (0, 0.0109)], [(0, 0.0369), (0, 0.0371), (0, 0.0367), (0, 0.037), (0, 0.0375), (0, 0.0373), (0, 0.0376), (0, 0.0372), (0, 0.0377), (0, 0.038), (0, 0.0379), (0, 0.0374), (0, 0.0381), (0, 0.0382), (0, 0.0378), (0, 0.0384), (0, 0.0386), (0, 0.0387), (0, 0.0385), (0, 0.0389), (0, 0.0391), (0, 0.039), (0, 0.0392), (0, 0.0394), (0, 0.0395), (0, 0.0396), (0, 0.0398), (0, 0.0399), (0, 0.04), (0, 0.0401), (0, 0.0404), (0, 0.0405), (0, 0.0406), (0, 0.0407), (0, 0.0408), (0, 0.0409), (0, 0.041), (0, 0.0411), (0, 0.0412), (0, 0.0414), (0, 0.0415), (0, 0.0416), (0, 0.0417), (0, 0.0419), (0, 0.042), (0, 0.0421), (0, 0.0422), (0, 0.0426), (0, 0.0428), (0, 0.0427), (0, 0.043), (0, 0.0429), (0, 0.0431), (0, 0.0433), (0, 0.0434), (0, 0.0435), (0, 0.0436), (0, 0.0438), (0, 0.0437), (0, 0.044), (0, 0.0442), (0, 0.0444)]] print "EndTime: ", datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") namedCluster = {} i = 0 orderCluster = {} for clu in cluster: i = i + 1 namedCluster[i] = clu orderCluster[i] = [min(clu)[1], max(clu)[1]] for m in orderCluster: min1 = orderCluster[m][0] max1 = orderCluster[m][1] for n in orderCluster: min2 = orderCluster[n][0] max2 = orderCluster[n][1] if (min1 > min2 and min1 < max2) or (max1 > min2 and max1 < max2): print m, " intersect with ", n, " values: ", min1, max1, min2, max2 clusterR = [] for row in rows: for nc in namedCluster: if (0, row[5]) in namedCluster[nc]: newRow = list(row) newRow.append(nc) clusterR.append(newRow) finalClusterRecord.append(newRow) #insert the clusterR into Database insertSql = "insert into t_daily_enrichedIndex (embers_id,derived_from,sub_sequence,stock_index,date,last_price,one_day_change,change_percent,trend_type)values (?,?,?,?,?,?,?,?,?)" m = 0 for j in clusterR: contentStr = json.dumps(j) embersId = hashlib.sha1(contentStr).hexdigest() derivedFrom = "[" + str(j[0]) + "]" subsequenceId = j[1] postDate = j[2] lastPrice = j[3] oneDayChange = j[4] changePercent = j[5] stockIndex = j[6] trendType = j[7] cur.execute( insertSql, (embersId, derivedFrom, subsequenceId, stockIndex, postDate, lastPrice, oneDayChange, changePercent, trendType)) m = m + 1 if m % 1000 == 0: con.commit() con.commit() finalOrderCluster[stock] = orderCluster "Write the type range into a file" trendRangeFile = common.get_configuration("model", "TREND_RANGE_FILE") dataStr = json.dumps(finalOrderCluster) with open(trendRangeFile, "w") as output: output.write(dataStr) "Write the training data into file" trendSetRecordFile = common.get_configuration("model", "TRAINING_TREND_RECORDS") dataStr = json.dumps(finalClusterRecord) with open(trendSetRecordFile, "w") as output: output.write(dataStr) if con: con.close()
CLUSTER_COUNT = 3 i = 0 for trend in mainTrend: location_list = list() for tweet in tweepy.Cursor(api.search, q=trend['query']).items(TWEET_SAMPLE_SIZE): if tweet.user.location: print(i) i += 1 try: location = geocoder.geocode(tweet.user.location) location_list.append({ "lat": location.latitude, "lon": location.longitude }) except Exception as e: print("An exception occurred: ") print(e) pass with open('out.txt', 'w') as f: print(location_list, file=f) cluster = KMeansClustering([(l['lat'], l['lon']) for l in location_list]) centroids = [centroid(c) for c in cluster.getclusters(CLUSTER_COUNT)] kml_clusters = simplekml.Kml() for i, c in enumerate(centroids): kml_clusters.newpoint(name='Cluster {}'.format(i), coords=[(c[1], c[0])]) kml_clusters.save('{}.kml'.format(trend['query']))
((lat, lon), f) = coords_freqs[label] expanded_coords.append((label, [(lon, lat)] * f)) # Flip lat/lon for Google Earth # No need to clutter the map with unnecessary placemarks... kml_items = [{'label': label, 'coords': '%s,%s' % coords[0]} for (label, coords) in expanded_coords] # It could also be interesting to include names of your contacts on the map for display for item in kml_items: item['contacts'] = '\n'.join(['%s %s.' % (ec.first_name, ec.last_name[0]) for ec in extended_connections if ec['location'] == item['label']]) cl = KMeansClustering([coords for (label, coords_list) in expanded_coords for coords in coords_list]) centroids = [{'label': 'CENTROID', 'coords': '%s,%s' % centroid(c)} for c in cl.getclusters(K)] kml_items.extend(centroids) kml = createKML(kml_items) if not os.path.isdir('out'): os.mkdir('out') f = open("out/" + OUT, 'w') f.write(kml) f.close() print >> sys.stderr, 'Data pickled to out/' + OUT
num = 0 # Collecting attrbutes of each result dir for item in os.listdir(path): if os.path.isdir(item): data = item +"/cluster_attr.pacc" content = eval(open(data,'r').readline()) clst_i.append(content) if num > LTT_NU: clst_i_2.append(content) num = num +1 table[item] = content # Starting cluster analysis process cl = KMeansClustering(clst_i) if cl_param != "" and cl_param.isdigit() : clusters = cl.getclusters(int(cl_param)) else: clusters = cl.getclusters(2) # Starting cluster analysis process -- syscall only cl_2 = KMeansClustering(clst_i_2) if cl_param != "" and cl_param.isdigit() : clusters_2 = cl_2.getclusters(int(cl_param)) else: clusters_2 = cl_2.getclusters(2) # Cluster Density def add(x,y):
f_categories = open("cat_less.txt", "r").read() f_json = open("data_less.txt", "r").read() json_data = json.loads(f_json) categories = f_categories.split("\n") k_means_list = [] # category = "Advertising Agencies" for category in categories: try: if(json_data[category]): for cat in json_data[category]: v = cat["latlon"] k_means_list.append((float(v.split(",")[0]), float(v.split(",")[1]))) cl = KMeansClustering(k_means_list) clusters = cl.getclusters(12) # print category # print clusters cluster_file = open("./Output/" + category, "w") for cluster in clusters: for tup in cluster: # print tup[0] cluster_file.write(str(tup[0]) + "," + str(tup[1]) + " ") cluster_file.write("\n") print category + " Done" cluster_file.close() except Exception as e: pass
def clusterSet(traningEndDate): con = common.getDBConnection() cur = con.cursor() finalClusterRecord = [] stockList = ["MERVAL","MEXBOL","CHILE65","BVPSBVPS","COLCAP","CRSMBCT","IBOV","IGBVL"] finalOrderCluster = {} for stock in stockList: sql = "select embers_id,sub_sequence,date,last_price,one_day_change,round(one_day_change/(last_price-one_day_change),4),stock_index from t_daily_stockindex where stock_index=? and date<=?" cur.execute(sql,(stock,traningEndDate)) rows = cur.fetchall() changes = [row[5] for row in rows] fdist = nltk.FreqDist(changes) clusterS = [(0,x) for x in fdist.keys()] print "StartTime: ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") c1 = KMeansClustering(clusterS) print "MiddleTime: ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") cluster = c1.getclusters(20) # cluster = [[(0, 0.0862), (0, 0.088), (0, 0.0914), (0, 0.094), (0, 0.0957), (0, 0.097), (0, 0.1017), (0, 0.1024), (0, 0.0774), (0, 0.0882), (0, 0.0783), (0, 0.11), (0, 0.0807), (0, 0.0813), (0, 0.1367), (0, 0.0831), (0, 0.0836), (0, 0.0855), (0, 0.0879), (0, 0.0912), (0, 0.0763), (0, 0.1046), (0, 0.0784), (0, 0.0815), (0, 0.1464), (0, 0.1987), (0, 0.1053), (0, 0.1101), (0, 0.1176), (0, 0.0868), (0, 0.1342), (0, 0.1466), (0, 0.0761), (0, 0.0772)], [(0, -0.0001), (0, 0.0), (0, 0.0001), (0, -0.0002), (0, -0.0003), (0, -0.0004), (0, -0.0005), (0, -0.0006), (0, 0.0002), (0, 0.0003), (0, 0.0004), (0, 0.0005), (0, 0.0006), (0, 0.0007), (0, 0.0008), (0, 0.0009), (0, 0.001), (0, 0.0011), (0, 0.0012), (0, 0.0013), (0, 0.0014), (0, 0.0015), (0, 0.0016), (0, 0.0017), (0, 0.0018), (0, 0.0019), (0, 0.002), (0, 0.0021), (0, 0.0022), (0, 0.0023), (0, 0.0024), (0, 0.0025), (0, 0.0026), (0, 0.0027), (0, 0.0028), (0, 0.0029), (0, 0.003), (0, 0.0031), (0, 0.0032), (0, 0.0033), (0, 0.0034), (0, 0.0035), (0, 0.0036), (0, 0.0037), (0, 0.0038), (0, 0.0039), (0, 0.004), (0, 0.0041), (0, 0.0042), (0, 0.0043), (0, 0.0044), (0, 0.0045), (0, 0.0046), (0, 0.0047), (0, 0.0048), (0, 0.0049), (0, 0.005), (0, -0.0007), (0, -0.0008)], [(0, 0.0297), (0, 0.0296), (0, 0.0298), (0, 0.0299), (0, 0.0301), (0, 0.03), (0, 0.0303), (0, 0.0302), (0, 0.0304), (0, 0.0305), (0, 0.0306), (0, 0.0308), (0, 0.0307), (0, 0.0309), (0, 0.031), (0, 0.0311), (0, 0.0313), (0, 0.0314), (0, 0.0312), (0, 0.0316), (0, 0.0315), (0, 0.0317), (0, 0.0318), (0, 0.032), (0, 0.0319), (0, 0.0322), (0, 0.0321), (0, 0.0324), (0, 0.0323), (0, 0.0326), (0, 0.0325), (0, 0.0328), (0, 0.033), (0, 0.0327), (0, 0.0332), (0, 0.0331), (0, 0.0333), (0, 0.0329), (0, 0.0335), (0, 0.0336), (0, 0.0334), (0, 0.0337), (0, 0.0338), (0, 0.0339), (0, 0.034), (0, 0.0341), (0, 0.0342), (0, 0.0343), (0, 0.0344), (0, 0.0345), (0, 0.0346), (0, 0.0348), (0, 0.0349), (0, 0.035), (0, 0.0351), (0, 0.0352), (0, 0.0355), (0, 0.0356), (0, 0.0358), (0, 0.0357), (0, 0.0359), (0, 0.036), (0, 0.0361), (0, 0.0362), (0, 0.0363), (0, 0.0365)], [(0, 0.0559), (0, 0.0564), (0, 0.0568), (0, 0.0571), (0, 0.0573), (0, 0.0579), (0, 0.0578), (0, 0.0581), (0, 0.0587), (0, 0.0589), (0, 0.0595), (0, 0.0591), (0, 0.0594), (0, 0.0604), (0, 0.0598), (0, 0.06), (0, 0.0602), (0, 0.0609), (0, 0.0612), (0, 0.059), (0, 0.0606), (0, 0.0614), (0, 0.0619), (0, 0.0625), (0, 0.0628), (0, 0.0615), (0, 0.0637), (0, 0.0633), (0, 0.0634), (0, 0.0636), (0, 0.0654), (0, 0.0658), (0, 0.0659), (0, 0.0669), (0, 0.0667), (0, 0.0664), (0, 0.067), (0, 0.0675), (0, 0.0673), (0, 0.0676), (0, 0.0686), (0, 0.07), (0, 0.0697), (0, 0.0709), (0, 0.0716), (0, 0.0717), (0, 0.0738), (0, 0.0747)], [(0, -0.0133), (0, -0.0132), (0, -0.0135), (0, -0.0134), (0, -0.0137), (0, -0.0138), (0, -0.0136), (0, -0.014), (0, -0.0139), (0, -0.0142), (0, -0.0143), (0, -0.0144), (0, -0.0141), (0, -0.0145), (0, -0.0146), (0, -0.0147), (0, -0.0148), (0, -0.0149), (0, -0.015), (0, -0.0151), (0, -0.0152), (0, -0.0153), (0, -0.0154), (0, -0.0155), (0, -0.0156), (0, -0.0157), (0, -0.0158), (0, -0.0159), (0, -0.016), (0, -0.0161), (0, -0.0162), (0, -0.0163), (0, -0.0164), (0, -0.0165), (0, -0.0166), (0, -0.0167), (0, -0.0168), (0, -0.0169), (0, -0.017), (0, -0.0171), (0, -0.0172), (0, -0.0173), (0, -0.0174), (0, -0.0175), (0, -0.0176), (0, -0.0177), (0, -0.0178), (0, -0.0179), (0, -0.018), (0, -0.0181), (0, -0.0182), (0, -0.0183), (0, -0.0184), (0, -0.0185), (0, -0.0186), (0, -0.0187), (0, -0.0188), (0, -0.0189), (0, -0.019), (0, -0.0191), (0, -0.0192), (0, -0.0193), (0, -0.0194), (0, -0.0195)], [(0, 0.0448), (0, 0.0451), (0, 0.0452), (0, 0.0446), (0, 0.0447), (0, 0.0456), (0, 0.045), (0, 0.0455), (0, 0.0462), (0, 0.0459), (0, 0.0461), (0, 0.0466), (0, 0.046), (0, 0.0467), (0, 0.0445), (0, 0.0458), (0, 0.0464), (0, 0.0477), (0, 0.0463), (0, 0.0472), (0, 0.0478), (0, 0.0457), (0, 0.0476), (0, 0.0481), (0, 0.0484), (0, 0.0488), (0, 0.0483), (0, 0.0487), (0, 0.0471), (0, 0.0482), (0, 0.0496), (0, 0.0474), (0, 0.0495), (0, 0.0485), (0, 0.0504), (0, 0.0505), (0, 0.0506), (0, 0.0501), (0, 0.0509), (0, 0.0508), (0, 0.051), (0, 0.0515), (0, 0.0516), (0, 0.052), (0, 0.0522), (0, 0.0524), (0, 0.053), (0, 0.0531), (0, 0.0534), (0, 0.0535), (0, 0.0536), (0, 0.0537), (0, 0.0538), (0, 0.0541), (0, 0.0542), (0, 0.0545), (0, 0.0546), (0, 0.0548), (0, 0.055)], [(0, 0.0172), (0, 0.017), (0, 0.0173), (0, 0.0174), (0, 0.0171), (0, 0.0177), (0, 0.0175), (0, 0.0178), (0, 0.0179), (0, 0.0176), (0, 0.0181), (0, 0.018), (0, 0.0183), (0, 0.0182), (0, 0.0186), (0, 0.0185), (0, 0.0187), (0, 0.0184), (0, 0.0189), (0, 0.0188), (0, 0.019), (0, 0.0191), (0, 0.0192), (0, 0.0194), (0, 0.0193), (0, 0.0196), (0, 0.0195), (0, 0.0197), (0, 0.0199), (0, 0.0198), (0, 0.02), (0, 0.0201), (0, 0.0202), (0, 0.0204), (0, 0.0205), (0, 0.0206), (0, 0.0203), (0, 0.0208), (0, 0.0207), (0, 0.021), (0, 0.0209), (0, 0.0211), (0, 0.0213), (0, 0.0212), (0, 0.0214), (0, 0.0215), (0, 0.0216), (0, 0.0217), (0, 0.0218), (0, 0.0219), (0, 0.022), (0, 0.0221), (0, 0.0222), (0, 0.0223), (0, 0.0224), (0, 0.0225), (0, 0.0226), (0, 0.0227), (0, 0.0228), (0, 0.0229), (0, 0.023), (0, 0.0231)], [(0, -0.0408), (0, -0.041), (0, -0.0411), (0, -0.0412), (0, -0.0413), (0, -0.0415), (0, -0.0416), (0, -0.0417), (0, -0.0419), (0, -0.042), (0, -0.0423), (0, -0.0424), (0, -0.0418), (0, -0.0425), (0, -0.0428), (0, -0.043), (0, -0.0431), (0, -0.0432), (0, -0.0433), (0, -0.0434), (0, -0.0436), (0, -0.0438), (0, -0.0439), (0, -0.044), (0, -0.0442), (0, -0.0441), (0, -0.0446), (0, -0.0443), (0, -0.0448), (0, -0.0447), (0, -0.045), (0, -0.0449), (0, -0.0453), (0, -0.0451), (0, -0.0454), (0, -0.0455), (0, -0.0458), (0, -0.0456), (0, -0.0459), (0, -0.0463), (0, -0.0461), (0, -0.046), (0, -0.0464), (0, -0.0465), (0, -0.0467), (0, -0.0462), (0, -0.0466), (0, -0.0472), (0, -0.0469), (0, -0.0475), (0, -0.0473), (0, -0.0478), (0, -0.0477), (0, -0.0476), (0, -0.0482), (0, -0.0481), (0, -0.0483), (0, -0.0487), (0, -0.0488), (0, -0.049), (0, -0.0492), (0, -0.0494)], [(0, -0.0261), (0, -0.0262), (0, -0.0263), (0, -0.0264), (0, -0.0266), (0, -0.0265), (0, -0.0267), (0, -0.0268), (0, -0.0269), (0, -0.0271), (0, -0.027), (0, -0.0273), (0, -0.0272), (0, -0.0275), (0, -0.0274), (0, -0.0277), (0, -0.0278), (0, -0.0276), (0, -0.0279), (0, -0.0281), (0, -0.028), (0, -0.0283), (0, -0.0282), (0, -0.0284), (0, -0.0285), (0, -0.0286), (0, -0.0287), (0, -0.0288), (0, -0.0289), (0, -0.0291), (0, -0.0292), (0, -0.0293), (0, -0.029), (0, -0.0294), (0, -0.0295), (0, -0.0297), (0, -0.0296), (0, -0.0299), (0, -0.03), (0, -0.0301), (0, -0.0302), (0, -0.0298), (0, -0.0303), (0, -0.0304), (0, -0.0307), (0, -0.0305), (0, -0.0308), (0, -0.031), (0, -0.0309), (0, -0.0312), (0, -0.0311), (0, -0.0313), (0, -0.0315), (0, -0.0314), (0, -0.0316), (0, -0.0317), (0, -0.0319), (0, -0.0318), (0, -0.032), (0, -0.0321), (0, -0.0322), (0, -0.0323), (0, -0.0325), (0, -0.0326), (0, -0.0327), (0, -0.0328), (0, -0.0329)], [(0, -0.0619), (0, -0.0622), (0, -0.0627), (0, -0.064), (0, -0.0645), (0, -0.065), (0, -0.0653), (0, -0.0651), (0, -0.0659), (0, -0.0663), (0, -0.0665), (0, -0.066), (0, -0.0666), (0, -0.0674), (0, -0.0671), (0, -0.0684), (0, -0.0672), (0, -0.0691), (0, -0.0689), (0, -0.0692), (0, -0.0701), (0, -0.0698), (0, -0.0709), (0, -0.0715), (0, -0.0717), (0, -0.0722), (0, -0.0734), (0, -0.0741), (0, -0.0749), (0, -0.0763), (0, -0.0772), (0, -0.0758), (0, -0.0762), (0, -0.0787), (0, -0.0788), (0, -0.0759), (0, -0.0775), (0, -0.0808)], [(0, -0.0905), (0, -0.1081), (0, -0.1018), (0, -0.094), (0, -0.0937), (0, -0.0936), (0, -0.0927), (0, -0.0919), (0, -0.0863), (0, -0.1593), (0, -0.1245), (0, -0.0847), (0, -0.1215), (0, -0.1139), (0, -0.1099), (0, -0.1068), (0, -0.0868), (0, -0.0856), (0, -0.0854), (0, -0.0837), (0, -0.0822), (0, -0.0877), (0, -0.1241), (0, -0.1073), (0, -0.1065), (0, -0.1011), (0, -0.0835)], [(0, -0.0196), (0, -0.0198), (0, -0.0197), (0, -0.0199), (0, -0.02), (0, -0.0201), (0, -0.0202), (0, -0.0204), (0, -0.0203), (0, -0.0205), (0, -0.0206), (0, -0.0208), (0, -0.0207), (0, -0.021), (0, -0.0209), (0, -0.0212), (0, -0.0211), (0, -0.0214), (0, -0.0215), (0, -0.0213), (0, -0.0217), (0, -0.0216), (0, -0.0219), (0, -0.0218), (0, -0.0221), (0, -0.022), (0, -0.0223), (0, -0.0222), (0, -0.0225), (0, -0.0224), (0, -0.0227), (0, -0.0226), (0, -0.0229), (0, -0.0228), (0, -0.023), (0, -0.0231), (0, -0.0232), (0, -0.0234), (0, -0.0233), (0, -0.0236), (0, -0.0235), (0, -0.0238), (0, -0.0237), (0, -0.024), (0, -0.0239), (0, -0.0242), (0, -0.0241), (0, -0.0244), (0, -0.0243), (0, -0.0245), (0, -0.0246), (0, -0.0247), (0, -0.0248), (0, -0.0249), (0, -0.025), (0, -0.0251), (0, -0.0252), (0, -0.0253), (0, -0.0254), (0, -0.0255), (0, -0.0256), (0, -0.0257), (0, -0.0258), (0, -0.0259), (0, -0.026)], [(0, -0.05), (0, -0.0504), (0, -0.0499), (0, -0.0507), (0, -0.0501), (0, -0.0509), (0, -0.0513), (0, -0.0505), (0, -0.051), (0, -0.0508), (0, -0.0517), (0, -0.0519), (0, -0.0516), (0, -0.052), (0, -0.0524), (0, -0.0525), (0, -0.0526), (0, -0.0528), (0, -0.0529), (0, -0.0533), (0, -0.0538), (0, -0.0535), (0, -0.0532), (0, -0.0542), (0, -0.0543), (0, -0.0546), (0, -0.054), (0, -0.055), (0, -0.0556), (0, -0.0545), (0, -0.056), (0, -0.0554), (0, -0.0567), (0, -0.0563), (0, -0.0571), (0, -0.0572), (0, -0.0576), (0, -0.0579), (0, -0.058), (0, -0.0584), (0, -0.0581), (0, -0.0588), (0, -0.0589), (0, -0.0591), (0, -0.0593), (0, -0.0596), (0, -0.0595), (0, -0.0601), (0, -0.0613), (0, -0.0614)], [(0, -0.001), (0, -0.0012), (0, -0.0017), (0, -0.0016), (0, -0.0013), (0, -0.0011), (0, -0.002), (0, -0.0018), (0, -0.0015), (0, -0.0014), (0, -0.0019), (0, -0.0021), (0, -0.0022), (0, -0.0023), (0, -0.0009), (0, -0.0024), (0, -0.0025), (0, -0.0026), (0, -0.0027), (0, -0.0028), (0, -0.0029), (0, -0.003), (0, -0.0031), (0, -0.0032), (0, -0.0033), (0, -0.0034), (0, -0.0035), (0, -0.0036), (0, -0.0037), (0, -0.0038), (0, -0.0039), (0, -0.004), (0, -0.0041), (0, -0.0042), (0, -0.0043), (0, -0.0044), (0, -0.0045), (0, -0.0046), (0, -0.0047), (0, -0.0048), (0, -0.0049), (0, -0.005), (0, -0.0051), (0, -0.0052), (0, -0.0053), (0, -0.0054), (0, -0.0055), (0, -0.0056), (0, -0.0057), (0, -0.0058), (0, -0.0059), (0, -0.006), (0, -0.0061), (0, -0.0062), (0, -0.0063), (0, -0.0064), (0, -0.0065), (0, -0.0066), (0, -0.0067), (0, -0.0068), (0, -0.0069)], [(0, -0.033), (0, -0.0332), (0, -0.0331), (0, -0.0334), (0, -0.0333), (0, -0.0336), (0, -0.0337), (0, -0.0335), (0, -0.0338), (0, -0.034), (0, -0.0339), (0, -0.0342), (0, -0.0343), (0, -0.0341), (0, -0.0344), (0, -0.0345), (0, -0.0346), (0, -0.0347), (0, -0.0348), (0, -0.035), (0, -0.0349), (0, -0.0351), (0, -0.0352), (0, -0.0353), (0, -0.0354), (0, -0.0355), (0, -0.0357), (0, -0.0356), (0, -0.0358), (0, -0.0359), (0, -0.0361), (0, -0.036), (0, -0.0363), (0, -0.0362), (0, -0.0365), (0, -0.0366), (0, -0.0364), (0, -0.0368), (0, -0.0369), (0, -0.0372), (0, -0.0371), (0, -0.0367), (0, -0.0375), (0, -0.0373), (0, -0.0376), (0, -0.0374), (0, -0.0378), (0, -0.038), (0, -0.0379), (0, -0.0377), (0, -0.0382), (0, -0.0384), (0, -0.0383), (0, -0.0386), (0, -0.0381), (0, -0.0387), (0, -0.0389), (0, -0.0385), (0, -0.039), (0, -0.0391), (0, -0.0388), (0, -0.0392), (0, -0.0395), (0, -0.0393), (0, -0.0397), (0, -0.0398), (0, -0.0396), (0, -0.0399), (0, -0.0402), (0, -0.0401), (0, -0.0403), (0, -0.0406), (0, -0.0407)], [(0, 0.0232), (0, 0.0233), (0, 0.0234), (0, 0.0235), (0, 0.0237), (0, 0.0236), (0, 0.0238), (0, 0.0239), (0, 0.024), (0, 0.0241), (0, 0.0242), (0, 0.0243), (0, 0.0244), (0, 0.0245), (0, 0.0247), (0, 0.0248), (0, 0.0246), (0, 0.0249), (0, 0.025), (0, 0.0251), (0, 0.0253), (0, 0.0252), (0, 0.0255), (0, 0.0254), (0, 0.0257), (0, 0.0256), (0, 0.0259), (0, 0.026), (0, 0.0258), (0, 0.0261), (0, 0.0262), (0, 0.0264), (0, 0.0265), (0, 0.0263), (0, 0.0267), (0, 0.0268), (0, 0.0266), (0, 0.027), (0, 0.0269), (0, 0.0271), (0, 0.0272), (0, 0.0274), (0, 0.0273), (0, 0.0276), (0, 0.0275), (0, 0.0277), (0, 0.0278), (0, 0.0279), (0, 0.0281), (0, 0.0282), (0, 0.0283), (0, 0.0284), (0, 0.0285), (0, 0.0286), (0, 0.0287), (0, 0.0288), (0, 0.0289), (0, 0.029), (0, 0.0291), (0, 0.0292), (0, 0.0293), (0, 0.0294)], [(0, 0.011), (0, 0.0112), (0, 0.0113), (0, 0.0111), (0, 0.0115), (0, 0.0114), (0, 0.0117), (0, 0.0116), (0, 0.0118), (0, 0.0119), (0, 0.0121), (0, 0.0122), (0, 0.0123), (0, 0.0124), (0, 0.012), (0, 0.0126), (0, 0.0125), (0, 0.0128), (0, 0.0127), (0, 0.013), (0, 0.0129), (0, 0.0131), (0, 0.0133), (0, 0.0132), (0, 0.0135), (0, 0.0134), (0, 0.0136), (0, 0.0137), (0, 0.0138), (0, 0.014), (0, 0.0139), (0, 0.0142), (0, 0.0141), (0, 0.0143), (0, 0.0144), (0, 0.0145), (0, 0.0146), (0, 0.0147), (0, 0.0148), (0, 0.0149), (0, 0.015), (0, 0.0151), (0, 0.0153), (0, 0.0152), (0, 0.0154), (0, 0.0155), (0, 0.0156), (0, 0.0157), (0, 0.0158), (0, 0.0159), (0, 0.016), (0, 0.0161), (0, 0.0162), (0, 0.0163), (0, 0.0164), (0, 0.0165), (0, 0.0166), (0, 0.0167), (0, 0.0168), (0, 0.0169)], [(0, -0.007), (0, -0.0071), (0, -0.0072), (0, -0.0073), (0, -0.0074), (0, -0.0075), (0, -0.0076), (0, -0.0077), (0, -0.0078), (0, -0.0079), (0, -0.0081), (0, -0.008), (0, -0.0082), (0, -0.0083), (0, -0.0084), (0, -0.0085), (0, -0.0086), (0, -0.0087), (0, -0.0088), (0, -0.0089), (0, -0.009), (0, -0.0091), (0, -0.0092), (0, -0.0093), (0, -0.0094), (0, -0.0095), (0, -0.0096), (0, -0.0097), (0, -0.0098), (0, -0.0099), (0, -0.01), (0, -0.0101), (0, -0.0102), (0, -0.0103), (0, -0.0104), (0, -0.0105), (0, -0.0106), (0, -0.0107), (0, -0.0108), (0, -0.0109), (0, -0.011), (0, -0.0111), (0, -0.0112), (0, -0.0113), (0, -0.0114), (0, -0.0115), (0, -0.0116), (0, -0.0117), (0, -0.0118), (0, -0.0119), (0, -0.012), (0, -0.0121), (0, -0.0122), (0, -0.0123), (0, -0.0124), (0, -0.0125), (0, -0.0126), (0, -0.0127), (0, -0.0128), (0, -0.0129), (0, -0.013), (0, -0.0131)], [(0, 0.0051), (0, 0.0052), (0, 0.0053), (0, 0.0055), (0, 0.0054), (0, 0.0057), (0, 0.0056), (0, 0.0059), (0, 0.0058), (0, 0.0061), (0, 0.006), (0, 0.0062), (0, 0.0063), (0, 0.0064), (0, 0.0065), (0, 0.0066), (0, 0.0068), (0, 0.0069), (0, 0.0067), (0, 0.007), (0, 0.0072), (0, 0.0071), (0, 0.0073), (0, 0.0074), (0, 0.0075), (0, 0.0076), (0, 0.0077), (0, 0.0078), (0, 0.0079), (0, 0.008), (0, 0.0081), (0, 0.0082), (0, 0.0083), (0, 0.0084), (0, 0.0085), (0, 0.0086), (0, 0.0087), (0, 0.0088), (0, 0.0089), (0, 0.009), (0, 0.0091), (0, 0.0092), (0, 0.0093), (0, 0.0094), (0, 0.0095), (0, 0.0096), (0, 0.0097), (0, 0.0098), (0, 0.0099), (0, 0.01), (0, 0.0101), (0, 0.0102), (0, 0.0103), (0, 0.0104), (0, 0.0105), (0, 0.0106), (0, 0.0107), (0, 0.0108), (0, 0.0109)], [(0, 0.0369), (0, 0.0371), (0, 0.0367), (0, 0.037), (0, 0.0375), (0, 0.0373), (0, 0.0376), (0, 0.0372), (0, 0.0377), (0, 0.038), (0, 0.0379), (0, 0.0374), (0, 0.0381), (0, 0.0382), (0, 0.0378), (0, 0.0384), (0, 0.0386), (0, 0.0387), (0, 0.0385), (0, 0.0389), (0, 0.0391), (0, 0.039), (0, 0.0392), (0, 0.0394), (0, 0.0395), (0, 0.0396), (0, 0.0398), (0, 0.0399), (0, 0.04), (0, 0.0401), (0, 0.0404), (0, 0.0405), (0, 0.0406), (0, 0.0407), (0, 0.0408), (0, 0.0409), (0, 0.041), (0, 0.0411), (0, 0.0412), (0, 0.0414), (0, 0.0415), (0, 0.0416), (0, 0.0417), (0, 0.0419), (0, 0.042), (0, 0.0421), (0, 0.0422), (0, 0.0426), (0, 0.0428), (0, 0.0427), (0, 0.043), (0, 0.0429), (0, 0.0431), (0, 0.0433), (0, 0.0434), (0, 0.0435), (0, 0.0436), (0, 0.0438), (0, 0.0437), (0, 0.044), (0, 0.0442), (0, 0.0444)]] print "EndTime: ",datetime.strftime(datetime.now(),"%Y-%m-%d %H:%M:%S") namedCluster = {} i = 0 orderCluster = {} for clu in cluster: i = i + 1 namedCluster[i] = clu orderCluster[i] = [min(clu)[1],max(clu)[1]] for m in orderCluster: min1 = orderCluster[m][0] max1 = orderCluster[m][1] for n in orderCluster: min2 = orderCluster[n][0] max2 = orderCluster[n][1] if (min1 > min2 and min1 < max2) or (max1 > min2 and max1 < max2): print m," intersect with ", n, " values: ",min1,max1,min2,max2 clusterR = [] for row in rows: for nc in namedCluster: if (0,row[5]) in namedCluster[nc]: newRow = list(row) newRow.append(nc) clusterR.append(newRow) finalClusterRecord.append(newRow) #insert the clusterR into Database insertSql = "insert into t_daily_enrichedIndex (embers_id,derived_from,sub_sequence,stock_index,date,last_price,one_day_change,change_percent,trend_type)values (?,?,?,?,?,?,?,?,?)" m = 0 for j in clusterR: contentStr = json.dumps(j) embersId = hashlib.sha1(contentStr).hexdigest() derivedFrom = "[" + str(j[0]) + "]" subsequenceId = j[1] postDate = j[2] lastPrice = j[3] oneDayChange = j[4] changePercent = j[5] stockIndex = j[6] trendType = j[7] cur.execute(insertSql,(embersId,derivedFrom,subsequenceId,stockIndex,postDate,lastPrice,oneDayChange,changePercent,trendType)) m = m + 1 if m%1000 == 0: con.commit() con.commit() finalOrderCluster[stock] = orderCluster "Write the type range into a file" trendRangeFile = common.get_configuration("model", "TREND_RANGE_FILE") dataStr = json.dumps(finalOrderCluster) with open(trendRangeFile,"w") as output: output.write(dataStr) "Write the training data into file" trendSetRecordFile = common.get_configuration("model", "TRAINING_TREND_RECORDS") dataStr = json.dumps(finalClusterRecord) with open(trendSetRecordFile,"w") as output: output.write(dataStr) if con: con.close()
f_categories = open("cat_less.txt", "r").read() f_json = open("data_less.txt", "r").read() json_data = json.loads(f_json) categories = f_categories.split("\n") k_means_list = [] # category = "Advertising Agencies" for category in categories: try: if (json_data[category]): for cat in json_data[category]: v = cat["latlon"] k_means_list.append( (float(v.split(",")[0]), float(v.split(",")[1]))) cl = KMeansClustering(k_means_list) clusters = cl.getclusters(12) # print category # print clusters cluster_file = open("./Output/" + category, "w") for cluster in clusters: for tup in cluster: # print tup[0] cluster_file.write(str(tup[0]) + "," + str(tup[1]) + " ") cluster_file.write("\n") print category + " Done" cluster_file.close() except Exception as e: pass
'NetIncomeLoss', 'OperatingIncomeLoss', 'PropertyPlantAndEquipmentNet', 'RetainedEarningsAccumulatedDeficit', 'StockholdersEquity', ] # Importing the dataset dataset_path = os.path.join(os.path.abspath(os.getcwd()), 'output', 'xbrl_dataset', '2017.csv') dataset = pd.read_csv(dataset_path, usecols=cols) dataset.fillna(0, inplace=True) # dataset = dataset.transpose() # 'rotate' 90 degrees # print(dataset) # cor = dataset.corr() # Correlation of columns # # sns.heatmap(cor, square=True) # Plot the correlation as heat map # plt.subplots_adjust(bottom=0.2, top=1, left=0.07, right=0.87) # plt.show() wh1 = dataset.head(100) ss = StandardScaler() ss.fit_transform(wh1) wh1 = [tuple(x) for x in wh1.values] cl = KMeansClustering(wh1) clusters = cl.getclusters(3) print(clusters)
expanded_coords = [] for label in coords_freqs: ((lat,lon),f)=coords_freqs[label] expanded_coords.append((label, [(lon, lat)]*f)) # flip lat/lon for google earth # No need to clutter the map with unnecessary placemarks... kml_items = [{'label': label, 'coords': '%s,%s' % coords[0]} for (label, coords) in expanded_coords] # It could also be interesting to include names of your contacts on the map for display for item in kml_items: item['contacts'] = '\n'.join(['%s %s.' % (ec.first_name, ec.last_name[0]) for ec in extended_connections if ec.location == item['label']]) cl = KMeansClustering([coords for (label, coords_list) in expanded_coords for coords in coords_list]) centroids=[{'label': 'CENTROID', 'coords': '%s,%s' % centroid(c)} for c in cl.getclusters(K)] #kml_items.extend(centroids) #kml=createKML(kml_items) if not os.path.isdir('out'): os.mkdir('out') f = open("out/" + OUT, 'w') f.write(centroids) f.close() print >> sys.stderr, "Data pickled to out/" +OUT
# MAIN # #################################################### sses = [0 ] * 10 #stores the sse metric for each number of clusters from 5 to 50 num_users = 100 numsse = 0 numclusters = 5 # starts at 5 max_iterations = 10 start_time = datetime.datetime.now() while numclusters <= 50: # compute SSE from num_clusters=5 to 50 users = [] # users are the items of this example for i in range(num_users): user = createProfile() users.append(user) print(" inicializing kmeans...") cl = KMeansClustering(users, HDdistItems, HDequals) print(" executing...", numclusters) st = datetime.datetime.now() print(st) numclusters = numclusters solution = cl.HDgetclusters(numclusters, max_iterations) for i in range(numclusters): a = solution[i] print(util.HDcentroid(a), ",") st = datetime.datetime.now() sses[numsse] = HDcomputeSSE(solution, numclusters) numsse += 1 numclusters += 5 end_time = datetime.datetime.now() print("start_time:", start_time)
from cluster import KMeansClustering import random import time sample_space= [11,21,31,41,51,61,71,81,91,101,201,301,401,501,601,701,801,901,1001,2001,3001,4001,5001,6001,7001,8001,9001,10001] #print "Input \n" for key,value in enumerate(sample_space): #print "For value " + str(value) + "=>" a=[] for i in range(1,value): a.append((i*random.random(),i*random.random())) #print a #print "\nOutput \n" start_time=time.time() cl = KMeansClustering(a) clusters = cl.getclusters(9) end_time=time.time() #print clusters print "total time " + str(end_time-start_time) + " secs for "+ str(i) +" element"
def testClusterCount(self): "Test that asking for less than 2 clusters raises an error" cl = KMeansClustering([876, 123, 344, 676], distance=lambda x, y: abs(x - y)) self.assertRaises(ClusteringError, cl.getclusters, 0) self.assertRaises(ClusteringError, cl.getclusters, 1)