Example #1
0
    def pcaanalysis(self,catfile,outfilename):
        # build a matrix with photos on rows, categories in columns
        categories = [line.split()[0] for line in open(catfile).readlines()]
        cat_matrix = []
        latlon = []
        for col in self.cols:
            docs = col.find({"$and":[{'prediction':{"$ne": 0}},{'prediction': {"$exists":True}}]},timeout=False)
            for i,doc in enumerate(docs):
                pca_row = [doc['prediction'][cat] for cat in categories]
                ll_row = [doc['longitude'],doc['latitude']]
                cat_matrix.append(pca_row)
                latlon.append(ll_row)
        
        matrix = np.log10(np.asarray(cat_matrix))
        mscaled = self.scaleData(matrix,3)
        npcoords = np.asarray(latlon)
        pca = self.pca(matrix,5)
        #ms = preprocessing.MinMaxScaler()
        #fc = ms.fit_transform(pca)
        fc = self.scaleData(pca,3)
        fc = np.power(10,fc-1.0)

        # pyplot
        plt.scatter(npcoords[:,0],npcoords[:,1],s=16,facecolors=fc[:,(0,1,2)],edgecolors='none')
        #plt.scatter(matrix[:,0],matrix[:,1],s=16,facecolors=mscaled[:,(2,3,4)],edgecolors='none')
        #plt.scatter(fc[:,2],fc[:,1],s=16,facecolors=fc[:,(0,3,4)],edgecolors='none')
        #plt.scatter(pca[:,2],pca[:,1],s=16,facecolors=fc[:,(0,3,4)],edgecolors='none')
        plt.show()

        # geojson
        g = gj.gjson()
        g.initFile('sample-geojson.js')
        latlon = np.asarray(latlon)
        g.writePointsFeatureCollection(latlon[:],fc[:,(0,1,2)],'data')
        g.closeFile()
Example #2
0
    def pcaanalysis(self, catfile, outfilename):
        # build a matrix with photos on rows, categories in columns
        categories = [line.split()[0] for line in open(catfile).readlines()]
        cat_matrix = []
        latlon = []
        for col in self.cols:
            docs = col.find(
                {
                    "$and": [{
                        'prediction': {
                            "$ne": 0
                        }
                    }, {
                        'prediction': {
                            "$exists": True
                        }
                    }]
                },
                timeout=False)
            for i, doc in enumerate(docs):
                pca_row = [doc['prediction'][cat] for cat in categories]
                ll_row = [doc['longitude'], doc['latitude']]
                cat_matrix.append(pca_row)
                latlon.append(ll_row)

        matrix = np.log10(np.asarray(cat_matrix))
        mscaled = self.scaleData(matrix, 3)
        npcoords = np.asarray(latlon)
        pca = self.pca(matrix, 5)
        #ms = preprocessing.MinMaxScaler()
        #fc = ms.fit_transform(pca)
        fc = self.scaleData(pca, 3)
        fc = np.power(10, fc - 1.0)

        # pyplot
        plt.scatter(npcoords[:, 0],
                    npcoords[:, 1],
                    s=16,
                    facecolors=fc[:, (0, 1, 2)],
                    edgecolors='none')
        #plt.scatter(matrix[:,0],matrix[:,1],s=16,facecolors=mscaled[:,(2,3,4)],edgecolors='none')
        #plt.scatter(fc[:,2],fc[:,1],s=16,facecolors=fc[:,(0,3,4)],edgecolors='none')
        #plt.scatter(pca[:,2],pca[:,1],s=16,facecolors=fc[:,(0,3,4)],edgecolors='none')
        plt.show()

        # geojson
        g = gj.gjson()
        g.initFile('sample-geojson.js')
        latlon = np.asarray(latlon)
        g.writePointsFeatureCollection(latlon[:], fc[:, (0, 1, 2)], 'data')
        g.closeFile()
Example #3
0
    def clusterCoordinates(self,eps,min_samples):
        r = self.dbscan(self.coordinates,self.predictions,eps,min_samples)
        #r = self.optics(self.coordinates,self.predictions,100,min_samples)
        rks = r['data_active'].keys()
        rks.sort()
        x = []
        y = []
        c = []
        s = []
        
        predictions = []
        cluster_radii = []
        polygons = []

        for cl in rks[1:]:
            coords = np.array([a.tolist() for a in r['data_active'][cl]])
            preds = np.array([a.tolist() for a in r['data_inert'][cl]])

            predictions.append(np.max(preds,0))
            center = np.mean(coords,0)
            radius = np.std(coords,0)
            cluster_radii.append(radius)
            skr = zip(*r['data_active'][cl])
            color = [cl+1]*len(skr[0])
            size = [1+400*(cl>=0)]*len(skr[0])

            x.extend(skr[1])
            y.extend(skr[0])
            c.extend(color)
            s.extend(size)

            try:
                polygon = [alphashape.alpha_shape_wrapper(coords[:,(1,0)],50.0),]
            except:
                n = 4
                polygon = [[[center[1]+radius[1]*np.cos(i*3.1415/n),center[0]+radius[0]*np.sin(i*3.1415/n)] for i in range(n)]]
            polygons.append(polygon)

        nppreds = np.log10(np.asarray(predictions))
        pcapred = self.pca(nppreds,6)
        ms = preprocessing.MinMaxScaler()
        fc = ms.fit_transform(pcapred)

        # find low probability points and color them opaque
        cols = (0,1,2,3,4,5)
        kde = KernelDensity(kernel='gaussian',bandwidth=0.05).fit(fc[:,cols])
        kdescores = kde.score_samples(fc[:,cols])
        ss = preprocessing.MinMaxScaler()

        datan = np.power(10.0,(0.01*ss.fit_transform(kdescores)))
        opacity = ss.fit_transform(datan).tolist()
        zero = (1.0+0.0*datan).tolist()
        plt.hist(opacity,50)
        plt.show()

        hxcolor1 = ["#%02x%02x%02x"%tuple([255*aa for aa in x][0:3]) for x in fc.tolist()]
        hxcolor2 = ["#%02x%02x%02x"%tuple([255*aa for aa in x][3:6]) for x in fc.tolist()]

        C = np.array([x,y]).T
        color = np.array(c)
        npcr = np.asarray(cluster_radii)
        n = 17
        self.makePointsToMultiPointFeatureCollection(self.coordinates[::n,(1,0)],self.coordinates[::n,0],self.coordinates[::n,1],'sample-geojson-cluster2.js')

        g = gj.gjson()
        g.initFile('sample-geojson-poly.js')
        #g.writeMultiPolygonFeatureCollection_(polygons,fillColor=hxcolor1,fillOpacity=opacity,color=hxcolor2,opacity=opacity)
        g.writeMultiPolygonFeatureCollection_(polygons,fillColor=hxcolor1,color=hxcolor2,fillOpacity=opacity)
        g.closeFile()
Example #4
0
    def clusterCoordinates(self, eps, min_samples):
        r = self.dbscan(self.coordinates, self.predictions, eps, min_samples)
        #r = self.optics(self.coordinates,self.predictions,100,min_samples)
        rks = r['data_active'].keys()
        rks.sort()
        x = []
        y = []
        c = []
        s = []

        predictions = []
        cluster_radii = []
        polygons = []

        for cl in rks[1:]:
            coords = np.array([a.tolist() for a in r['data_active'][cl]])
            preds = np.array([a.tolist() for a in r['data_inert'][cl]])

            predictions.append(np.max(preds, 0))
            center = np.mean(coords, 0)
            radius = np.std(coords, 0)
            cluster_radii.append(radius)
            skr = zip(*r['data_active'][cl])
            color = [cl + 1] * len(skr[0])
            size = [1 + 400 * (cl >= 0)] * len(skr[0])

            x.extend(skr[1])
            y.extend(skr[0])
            c.extend(color)
            s.extend(size)

            try:
                polygon = [
                    alphashape.alpha_shape_wrapper(coords[:, (1, 0)], 50.0),
                ]
            except:
                n = 4
                polygon = [[[
                    center[1] + radius[1] * np.cos(i * 3.1415 / n),
                    center[0] + radius[0] * np.sin(i * 3.1415 / n)
                ] for i in range(n)]]
            polygons.append(polygon)

        nppreds = np.log10(np.asarray(predictions))
        pcapred = self.pca(nppreds, 6)
        ms = preprocessing.MinMaxScaler()
        fc = ms.fit_transform(pcapred)

        # find low probability points and color them opaque
        cols = (0, 1, 2, 3, 4, 5)
        kde = KernelDensity(kernel='gaussian', bandwidth=0.05).fit(fc[:, cols])
        kdescores = kde.score_samples(fc[:, cols])
        ss = preprocessing.MinMaxScaler()

        datan = np.power(10.0, (0.01 * ss.fit_transform(kdescores)))
        opacity = ss.fit_transform(datan).tolist()
        zero = (1.0 + 0.0 * datan).tolist()
        plt.hist(opacity, 50)
        plt.show()

        hxcolor1 = [
            "#%02x%02x%02x" % tuple([255 * aa for aa in x][0:3])
            for x in fc.tolist()
        ]
        hxcolor2 = [
            "#%02x%02x%02x" % tuple([255 * aa for aa in x][3:6])
            for x in fc.tolist()
        ]

        C = np.array([x, y]).T
        color = np.array(c)
        npcr = np.asarray(cluster_radii)
        n = 17
        self.makePointsToMultiPointFeatureCollection(
            self.coordinates[::n, (1, 0)], self.coordinates[::n, 0],
            self.coordinates[::n, 1], 'sample-geojson-cluster2.js')

        g = gj.gjson()
        g.initFile('sample-geojson-poly.js')
        #g.writeMultiPolygonFeatureCollection_(polygons,fillColor=hxcolor1,fillOpacity=opacity,color=hxcolor2,opacity=opacity)
        g.writeMultiPolygonFeatureCollection_(polygons,
                                              fillColor=hxcolor1,
                                              color=hxcolor2,
                                              fillOpacity=opacity)
        g.closeFile()