def extract_diff_image(self, data): from databundles.geo.array import statistics, std_norm from osgeo.gdalconst import GDT_Float32 from numpy import ma import numpy as np partition = self.partitions.all[0]# There is only one hdf = partition.database hdf.open() i1, aa = hdf.get_geo(data['type1']) i2, aa = hdf.get_geo(data['type2']) file_name = self.filesystem.path('extracts',format(data['name'])) self.log("Extracting difference, {} - {} ".format(data['type1'], data['type2'])) # After subtraction, 0 is a valid value, so we need to change it. # [...] converts to a numpy array. a1 = ma.masked_equal(i1[...],0) a2 = ma.masked_equal(i2[...],0) diff = a1 - a2 o = diff # std_norm(diff) o.set_fill_value(-1) self.log("Stats: \n{}".format(statistics(o))) aa.write_geotiff(file_name, ma.filled(o), data_type=GDT_Float32, nodata = -1) self.log("Wrote Difference TIFF {}".format(file_name)) hdf.close() return file_name
def demo(self): '''A commented demonstration of how to create crime data extracts as GeoTIFF images Run with: python bundle.py run demo ''' from databundles.geo.analysisarea import get_analysis_area, draw_edges from databundles.geo.util import create_bb from databundles.geo import Point from databundles.geo.kernel import GaussianKernel from databundles.geo.array import statistics, unity_norm, std_norm from osgeo.gdalconst import GDT_Float32, GDT_Byte, GDT_Int16 from numpy import ma import random # Get the San Diego analysis area from the GEOID ( Defined by the US Census) # you can look up geoids in clarinova.com-extents-2012-7ba4/meta/san-diego-places.csv, # or query the places table in clarinova.com-extents-2012-7ba4.db aa = get_analysis_area(self.library, geoid = 'CG0666000') # Get a function to translate coodinates from the default lat/lon, WGS84, # into the cordinate system of the AnalysisArea, which in this case # is 20m square cells in an area based on a California StatePlane Zone trans = aa.get_translator() print "\n---- Display Analysis Area ----" print aa # This should print a small value, something close to (0,0). # It won't be exactly (0,0), since the analysis area envelope must be # larger than the envelop of the place to account for rotation from # re-projection print "Origin", trans(aa.lonmin, aa.latmin) # At the Sandiego latitude, 1/5000 of a degree, .0002, is about 20 meters, # So incrementing by that amount should advance our cell position by one print "\n---- Check translation function ----" import numpy as np for i,x in enumerate(np.arange(0,.002,.0002)): print i,x,trans(aa.lonmin+x, aa.latmin+x) # Now we can load in the crime incident data, translate the lat/lon points # to our array coordinates, and produce an image. # Get a reference to the bundle named as "crime" in the bundle.yaml configuration # file. crime = spotcrime.com-us_crime_incidents-orig-7ba4 r = self.library.dep('crime') # Fill in the values for the extents of the analysis area into the # query template. q = self.config.build.incident_query.format(**aa.__dict__) q += " AND type = 'Theft' " # A 'Kernel' is a matrix in a process called 'convolution'. We're doing something # somewhat different, but are re-using the name. This kernel is added # onto the output array for each crime incident, and represents a Normal # distribution, so it spreads out the influence over a larger area than # a single cell. # The matrix is square, 9 cells to a side. The function has 1/2 of its # maximun ( Full-Width-Half Maximum, FWHM) three cells from the center. kernel = GaussianKernel(33,11) # We're going to need an output array. This creates a numpy array that # has the correct size a = aa.new_array() # Main array ar = aa.new_array() # Array with random perturbation rs = 4 print "Array shape: ",a.shape for i,row in enumerate(r.bundle.database.connection.execute(q)): if i > 0 and i%1000 == 0: print "Processed {} rows".format(i) if i > 5000: break point = trans(row['longitude'], row['latitude']) kernel.apply_add(a,point) # The source data is coded to the 'hundred block' address, # such as: 12XX Main Street. This make the points quantized, so # add a little randomness for a smoother map. rpoint = Point(point.x+random.randint(-rs, rs), point.y+random.randint(-rs, rs)) kernel.apply_add(ar,rpoint) # make a helper to store files in the extracts directory ed = lambda f: self.filesystem.path('extracts','demo',f+'.tiff') print "\n--- Statistics, Before Normalizing ---" print statistics(a) aa.write_geotiff(ed('orig'), a, type_=GDT_Float32) print "\n--- Statistics, After Masking Normalizing ---" # # Masking marks some values as invalid, so they don't get used in statistics. # I this case, we are making 0 invalid, which will keep it from being # considered in the std deviation later in std_norm. a = ma.masked_equal(a,0) print statistics(a) aa.write_geotiff(ed('masked'), a, type_=GDT_Float32) print "\n--- Statistics, After StdDev Normalizing ---" o = std_norm(a) print statistics(o) aa.write_geotiff(ed('stddev'), o, type_=GDT_Float32) print "\n--- Statistics, After Unity Normalizing ---" o = unity_norm(a) print statistics(o) aa.write_geotiff(ed('unity'), o, type_=GDT_Float32) # Write the array with randomness ar = ma.masked_equal(ar,0) aa.write_geotiff('/tmp/random.tiff', std_norm(ar), type_=GDT_Float32)