/
heatmapper.py
executable file
·157 lines (129 loc) · 8.26 KB
/
heatmapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import os, sys
import ConfigParser #TODO lol
from osgeo import gdal
from osgeo import osr
import numpy as np
import numpy.lib.recfunctions as rf
from scipy.ndimage.filters import gaussian_filter
def retain_relevant_fields(data):
#TODO read these from a conf file
aggregate_fields = {}
aggregate_fields['aggr0_6'] = data['ika0'] + data['ika1'] + data['ika2'] + data['ika3'] + data['ika4'] + data['ika5'] + data['ika6']
aggregate_fields['aggr7_12'] = data['ika7'] + data['ika8'] + data['ika9'] + data['ika10'] + data['ika11'] + data['ika12']
aggregate_fields['aggr13_17'] = data['ika13'] + data['ika14'] + data['ika15'] + data['ika16'] + data['ika17']
aggregate_fields['aggr18_29'] = data['ika18'] + data['ika19'] + data['ika20'] + data['ika21'] + data['ika22'] + data['ika23'] + data['ika24'] + data['ika25_29']
aggregate_fields['aggr30_64'] = data['ika30_34'] + data['ika35_39'] + data['ika40_44'] + data['ika45_49'] + data['ika50_54'] + data['ika55_59'] + data['ika60_64']
aggregate_fields['aggr64_'] = data['ika65_69'] + data['ika70_74'] + data['ika75_79'] + data['ika80_84'] + data['ika85_89'] + data['ika90_94'] + data['ika95_']
#add the fields to data rec array
augmented_data = rf.rec_append_fields(data, aggregate_fields.keys(), aggregate_fields.values())
#...and add these fields later. they are here for their column names, but the line above would cause an exception if we added them before
aggregate_fields['asyht'] = data['asyht']
aggregate_fields['ruots'] = data['ruots']
aggregate_fields['ekoord'] = data['ekoord']
aggregate_fields['nkoord'] = data['nkoord']
# drop all fields whose names are not in aggregate_fields
fields2drop = [d for d in data.dtype.names if d not in aggregate_fields.keys()]
return rf.rec_drop_fields(augmented_data, fields2drop)
def read_file_prune_fields_clean_values(infile_name, x_name, y_name):
data = np.recfromcsv(infile_name, delimiter=',')
data = retain_relevant_fields(data)
data = data[data[y_name] != -1] #this takes care of the garbage rows
return data[y_name], data[x_name], rf.rec_drop_fields(data, [y_name, x_name])
def compute_geotransform(x, y, binsize=1):
#take these into account for lattice transformation
min_y = np.min(y)
max_y = np.max(y)
min_x = np.min(x)
max_x = np.max(x)
pextent = max_y - min_y
iextent = max_x - min_x
#output raster image dimensions & resolutions
nrows, ncols = (pextent/binsize, iextent/binsize)
nres = (max_y-min_y)/float(nrows)
eres = (max_x-min_x)/float(ncols)
#...which is basically the geotransform
geotransform = [min_x, eres, 0, max_y, 0, -nres]
return ncols, nrows, geotransform
def createRaster(outfilename, nrows, ncols, geotransform, EPSG, n_bands):
outputraster = gdal.GetDriverByName('GTiff').Create('./output_tiffs/'+outfilename+'.tif', ncols+1, nrows+1, n_bands, gdal.GDT_Float32)
outputraster.SetGeoTransform(geotransform)
srs = osr.SpatialReference()
# ETRS89-GK25FIN maps to 3879
# before 2012 everything is in KKJ2, maps to 2392
srs.ImportFromEPSG(int(epsg))
# source: http://www.maanmittauslaitos.fi/sites/default/files/tiedostolataukset/kartat/koordinaatit/epsg_koodit.pdf
outputraster.SetProjection( srs.ExportToWkt() )
return outputraster
def heatmap(x, y, weights, nrows, ncols, cutoff=0, noise=0.0, binsize=1, windowscale=1, windowarea_squareroot=100):
'''
Generates a lattice and adds weights[i] to coordinate ((x/binsize)[i], (y/binsize)[i]). Reverts all cell with value < cutoff to 0 and adds noise to the freq at each cell.
Then generates the heatmap by smoothing with a gaussian kernel which has area windowarea_squareroot(default=100)**2 coordinate units.
Binsize affects this, as we need the window area to be defined in terms of the original coordinate units.
E.g. we expect the x and y to correspond to meters and want a kernel with an area of 100m*100m (i.e. 1ha). However, a lattice where each cell is 1m*1m is too fine grained,
so we can set binsize=10, meaning that each cell represents 10m*10m. This way we will still be able to make sure that the area of the kernel window is windowarea_squareroot**2
regardless of the "size" of the cells in the lattice where the weights are aggregated.
'''
lattice = np.zeros((nrows+1, ncols+1))
for i,freq in enumerate(weights):
ycell = (y[i]-np.min(y)) / binsize
xcell = (x[i]-np.min(x)) / binsize
# print ycell, xcell
lattice[ycell][xcell] = freq
highpassed = np.copy(lattice)
highpassed[highpassed < cutoff] = 0
#what a monster. for each element: add or subtract noise that's up to (noise*100)% of the value. 0 adds nothing, 0.1 adds 10%
noised = np.array([np.floor(datum + np.random.uniform(-(np.ceil(datum*noise)),np.ceil(datum*noise))) for datum in highpassed])
#FUNCTION GIVES SKEWED RESULTS because of ndi.gaussian_filter's truncation algorithm. it always adds one unit after truncation to make the function nicely trail to 0
# this skews results quite quickly ESPECIALLY with large bins because the one unit is squared when computing the area
#...
# so it would go something like this:
# (sigma * truncate) + 1 = r_window
# sigma = (r_window - 1) / truncate
scale = windowscale*windowarea_squareroot/binsize
r_window = np.sqrt(scale*scale / np.pi)
#parameter for controlling when the filter is truncated, and thus effects our window size. default is 4, but just explicating it here
truncate = 4.0
# bandwidth_sigma = r_window / truncate
bandwidth_sigma = (r_window - 1) / truncate
smoothed = gaussian_filter(noised, bandwidth_sigma, truncate=truncate)
return smoothed
if __name__ == '__main__':
if len(sys.argv) != 4:
print 'Usage: python heatmapper.py source_srs binsize windowscale'
sys.exit(0)
# Check requisite xml existence
try:
styletemplate = open('styletemplate.xml').read()
except e:
print "need to have the style template named styletemplate.xml in the same folder"
sys.exit(1)
epsg = sys.argv[1]
binsize = int(sys.argv[2])
windowscale = int(sys.argv[3]) # for visual purposes
# do the stuff for all csv-files in this folder
infiles = [f for f in os.listdir('.') if 'csv' in f.split('.')[-1]]
for infile_name in infiles[::-1]: #reverse to process latest file last
print 'processing file '+infile_name
# discard all those sensitive and useless columns
yarr, xarr, datarec = read_file_prune_fields_clean_values(infile_name, 'ekoord', 'nkoord')
# get some simple parameters
ncols, nrows, geotransform = compute_geotransform(xarr, yarr, binsize)
outfilename = infile_name.split('.')[0]
raster = createRaster(outfilename, nrows, ncols, geotransform, epsg, len(datarec.dtype.names))
# for all columns we have left
for i,field in enumerate(datarec.dtype.names):
# compute heatmap and save it
data = datarec[field]
heatmap_lattice = heatmap(xarr, yarr, data, nrows, ncols, cutoff=5, noise=0.1, binsize=binsize, windowscale=windowscale, windowarea_squareroot=100) #array like
raster.GetRasterBand(i+1).WriteArray(np.flipud(heatmap_lattice))
# save corresponding style file
with open('./sld/popdensity_%s.xml' % (field), 'w') as f:
lowlimit1 = np.percentile(heatmap_lattice[heatmap_lattice > 0], 1)
lowlimit2 = np.percentile(heatmap_lattice[heatmap_lattice > 0], 2)
lowlimit3 = np.percentile(heatmap_lattice[heatmap_lattice > 0], 3)
lowlimit4 = np.percentile(heatmap_lattice[heatmap_lattice > 0], 10)
# lowlimit = np.min(heatmap_lattice[heatmap_lattice > 0])
highlimit = np.percentile(heatmap_lattice[heatmap_lattice > 0], 95)
midlimit = lowlimit4 + ((highlimit - lowlimit4) * 0.75) #we want the midlimit to be between min & max.
print 'column '+field+', low4:', lowlimit4,' mid:', midlimit,' high:', highlimit
f.write(styletemplate % {'stratum':field, 'band_n':(i+1), 'minlimit':np.min(heatmap_lattice[heatmap_lattice > 0]), 'lowlimit1':lowlimit1, 'lowlimit2':lowlimit2, 'lowlimit3':lowlimit3, 'lowlimit4':lowlimit4, 'midlimit':midlimit, 'highlimit':highlimit})