def pcaanalysis(self,catfile,outfilename): # build a matrix with photos on rows, categories in columns categories = [line.split()[0] for line in open(catfile).readlines()] cat_matrix = [] latlon = [] for col in self.cols: docs = col.find({"$and":[{'prediction':{"$ne": 0}},{'prediction': {"$exists":True}}]},timeout=False) for i,doc in enumerate(docs): pca_row = [doc['prediction'][cat] for cat in categories] ll_row = [doc['longitude'],doc['latitude']] cat_matrix.append(pca_row) latlon.append(ll_row) matrix = np.log10(np.asarray(cat_matrix)) mscaled = self.scaleData(matrix,3) npcoords = np.asarray(latlon) pca = self.pca(matrix,5) #ms = preprocessing.MinMaxScaler() #fc = ms.fit_transform(pca) fc = self.scaleData(pca,3) fc = np.power(10,fc-1.0) # pyplot plt.scatter(npcoords[:,0],npcoords[:,1],s=16,facecolors=fc[:,(0,1,2)],edgecolors='none') #plt.scatter(matrix[:,0],matrix[:,1],s=16,facecolors=mscaled[:,(2,3,4)],edgecolors='none') #plt.scatter(fc[:,2],fc[:,1],s=16,facecolors=fc[:,(0,3,4)],edgecolors='none') #plt.scatter(pca[:,2],pca[:,1],s=16,facecolors=fc[:,(0,3,4)],edgecolors='none') plt.show() # geojson g = gj.gjson() g.initFile('sample-geojson.js') latlon = np.asarray(latlon) g.writePointsFeatureCollection(latlon[:],fc[:,(0,1,2)],'data') g.closeFile()
def pcaanalysis(self, catfile, outfilename): # build a matrix with photos on rows, categories in columns categories = [line.split()[0] for line in open(catfile).readlines()] cat_matrix = [] latlon = [] for col in self.cols: docs = col.find( { "$and": [{ 'prediction': { "$ne": 0 } }, { 'prediction': { "$exists": True } }] }, timeout=False) for i, doc in enumerate(docs): pca_row = [doc['prediction'][cat] for cat in categories] ll_row = [doc['longitude'], doc['latitude']] cat_matrix.append(pca_row) latlon.append(ll_row) matrix = np.log10(np.asarray(cat_matrix)) mscaled = self.scaleData(matrix, 3) npcoords = np.asarray(latlon) pca = self.pca(matrix, 5) #ms = preprocessing.MinMaxScaler() #fc = ms.fit_transform(pca) fc = self.scaleData(pca, 3) fc = np.power(10, fc - 1.0) # pyplot plt.scatter(npcoords[:, 0], npcoords[:, 1], s=16, facecolors=fc[:, (0, 1, 2)], edgecolors='none') #plt.scatter(matrix[:,0],matrix[:,1],s=16,facecolors=mscaled[:,(2,3,4)],edgecolors='none') #plt.scatter(fc[:,2],fc[:,1],s=16,facecolors=fc[:,(0,3,4)],edgecolors='none') #plt.scatter(pca[:,2],pca[:,1],s=16,facecolors=fc[:,(0,3,4)],edgecolors='none') plt.show() # geojson g = gj.gjson() g.initFile('sample-geojson.js') latlon = np.asarray(latlon) g.writePointsFeatureCollection(latlon[:], fc[:, (0, 1, 2)], 'data') g.closeFile()
def clusterCoordinates(self,eps,min_samples): r = self.dbscan(self.coordinates,self.predictions,eps,min_samples) #r = self.optics(self.coordinates,self.predictions,100,min_samples) rks = r['data_active'].keys() rks.sort() x = [] y = [] c = [] s = [] predictions = [] cluster_radii = [] polygons = [] for cl in rks[1:]: coords = np.array([a.tolist() for a in r['data_active'][cl]]) preds = np.array([a.tolist() for a in r['data_inert'][cl]]) predictions.append(np.max(preds,0)) center = np.mean(coords,0) radius = np.std(coords,0) cluster_radii.append(radius) skr = zip(*r['data_active'][cl]) color = [cl+1]*len(skr[0]) size = [1+400*(cl>=0)]*len(skr[0]) x.extend(skr[1]) y.extend(skr[0]) c.extend(color) s.extend(size) try: polygon = [alphashape.alpha_shape_wrapper(coords[:,(1,0)],50.0),] except: n = 4 polygon = [[[center[1]+radius[1]*np.cos(i*3.1415/n),center[0]+radius[0]*np.sin(i*3.1415/n)] for i in range(n)]] polygons.append(polygon) nppreds = np.log10(np.asarray(predictions)) pcapred = self.pca(nppreds,6) ms = preprocessing.MinMaxScaler() fc = ms.fit_transform(pcapred) # find low probability points and color them opaque cols = (0,1,2,3,4,5) kde = KernelDensity(kernel='gaussian',bandwidth=0.05).fit(fc[:,cols]) kdescores = kde.score_samples(fc[:,cols]) ss = preprocessing.MinMaxScaler() datan = np.power(10.0,(0.01*ss.fit_transform(kdescores))) opacity = ss.fit_transform(datan).tolist() zero = (1.0+0.0*datan).tolist() plt.hist(opacity,50) plt.show() hxcolor1 = ["#%02x%02x%02x"%tuple([255*aa for aa in x][0:3]) for x in fc.tolist()] hxcolor2 = ["#%02x%02x%02x"%tuple([255*aa for aa in x][3:6]) for x in fc.tolist()] C = np.array([x,y]).T color = np.array(c) npcr = np.asarray(cluster_radii) n = 17 self.makePointsToMultiPointFeatureCollection(self.coordinates[::n,(1,0)],self.coordinates[::n,0],self.coordinates[::n,1],'sample-geojson-cluster2.js') g = gj.gjson() g.initFile('sample-geojson-poly.js') #g.writeMultiPolygonFeatureCollection_(polygons,fillColor=hxcolor1,fillOpacity=opacity,color=hxcolor2,opacity=opacity) g.writeMultiPolygonFeatureCollection_(polygons,fillColor=hxcolor1,color=hxcolor2,fillOpacity=opacity) g.closeFile()
def clusterCoordinates(self, eps, min_samples): r = self.dbscan(self.coordinates, self.predictions, eps, min_samples) #r = self.optics(self.coordinates,self.predictions,100,min_samples) rks = r['data_active'].keys() rks.sort() x = [] y = [] c = [] s = [] predictions = [] cluster_radii = [] polygons = [] for cl in rks[1:]: coords = np.array([a.tolist() for a in r['data_active'][cl]]) preds = np.array([a.tolist() for a in r['data_inert'][cl]]) predictions.append(np.max(preds, 0)) center = np.mean(coords, 0) radius = np.std(coords, 0) cluster_radii.append(radius) skr = zip(*r['data_active'][cl]) color = [cl + 1] * len(skr[0]) size = [1 + 400 * (cl >= 0)] * len(skr[0]) x.extend(skr[1]) y.extend(skr[0]) c.extend(color) s.extend(size) try: polygon = [ alphashape.alpha_shape_wrapper(coords[:, (1, 0)], 50.0), ] except: n = 4 polygon = [[[ center[1] + radius[1] * np.cos(i * 3.1415 / n), center[0] + radius[0] * np.sin(i * 3.1415 / n) ] for i in range(n)]] polygons.append(polygon) nppreds = np.log10(np.asarray(predictions)) pcapred = self.pca(nppreds, 6) ms = preprocessing.MinMaxScaler() fc = ms.fit_transform(pcapred) # find low probability points and color them opaque cols = (0, 1, 2, 3, 4, 5) kde = KernelDensity(kernel='gaussian', bandwidth=0.05).fit(fc[:, cols]) kdescores = kde.score_samples(fc[:, cols]) ss = preprocessing.MinMaxScaler() datan = np.power(10.0, (0.01 * ss.fit_transform(kdescores))) opacity = ss.fit_transform(datan).tolist() zero = (1.0 + 0.0 * datan).tolist() plt.hist(opacity, 50) plt.show() hxcolor1 = [ "#%02x%02x%02x" % tuple([255 * aa for aa in x][0:3]) for x in fc.tolist() ] hxcolor2 = [ "#%02x%02x%02x" % tuple([255 * aa for aa in x][3:6]) for x in fc.tolist() ] C = np.array([x, y]).T color = np.array(c) npcr = np.asarray(cluster_radii) n = 17 self.makePointsToMultiPointFeatureCollection( self.coordinates[::n, (1, 0)], self.coordinates[::n, 0], self.coordinates[::n, 1], 'sample-geojson-cluster2.js') g = gj.gjson() g.initFile('sample-geojson-poly.js') #g.writeMultiPolygonFeatureCollection_(polygons,fillColor=hxcolor1,fillOpacity=opacity,color=hxcolor2,opacity=opacity) g.writeMultiPolygonFeatureCollection_(polygons, fillColor=hxcolor1, color=hxcolor2, fillOpacity=opacity) g.closeFile()