/
dataset_old.py
426 lines (364 loc) · 15.5 KB
/
dataset_old.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 27 14:43:21 2019
@author: cordolo
Functions to create the 'old' dataset
"""
import os
import random
import csv
import gc
import numpy as np
from osgeo import gdal
from utils import list_dir
from dataset import csv_to_patch
def tile_to_patch(inputpath, coordspath, coordsfilename, dtmpath, patchespath,
patch_size, patches_per_tile, classes, resolution):
""" extract patches from tile.
parameters
----------
inputpath: string
path to folders with tiles. Each tile should be in separate folder
coordspath: string
path to outputfolder to save file with coordinates
coordsfilename: string
output filename, extention should be '.csv'
dtmpath: string
path to folder containing a dtm (will be resampled to same resolution)
patchespath: string
path to outputfolder to save the patches
patch_size: int
size of patch to extract. Final extracted patches will include padding
to be able to predict full image.
patches_per_tile: int
number of patches to extract per tile. Final number can be lower if the
classes cover very few pixels
classes: list
list with classes to be predicted
resolution: int
either 1 for 1m or 20 for 20cm
Calls
-----
tile_to_csv()
csv_to_patch()
Output
------
patches saved at patchespath in two folders:
images and labels.
"""
# extract coordinates for patches and save in csv
tile_to_csv(inputpath=inputpath, coordspath=coordspath,
coordsfilename=coordsfilename, patch_size=patch_size, patches_per_tile = patches_per_tile, classes = classes)
print("\n")
# extract patches based on coordinates and save
csv_to_patch(inputpath = inputpath, dtmpath = dtmpath,
coordsfile = coordspath + coordsfilename,
patchespath = patchespath,
patch_size=patch_size, classes = classes, resolution = resolution)
def tile_to_csv(inputpath, coordspath, coordsfilename, patches_per_tile, patch_size, classes):
""" save location of patches in csv-file.
Locations of top-left corner-pixel of patches are saved. These pixels
are chosen at random, however the percentual class division is respected.
parameters
----------
inputpath: string
path to folders with tiles. Each tile should be in separate folder
coordspath: string
path to outputfolder to save file with coordinates
coordsfilename: string
output filename, extention should be '.csv'
patches_per_tile: int
number of patches to extract per tile. Final number can be lower if the
classes cover very few pixels
patch_size: int
size of patch to extract. Final extracted patches will include padding
to be able to predict full image.
classes: list
list with classes to be predicted
calls
-----
sample_patches_of_class()
output
-------
outputfile: csv
each row contains tile-name and row + column of top-left pixel of patch:
tile,row,column
saved at outputpath.
"""
# init
dirs = list_dir(inputpath)
patch_size = patch_size // 5 # because downsample from 20cmX20cm to 1mx1m
patch_size_padded = int(patch_size * 3)
if not os.path.isdir(coordspath):
os.makedirs(coordspath)
for i_lap, d in enumerate(dirs):
# ground truth
path_SHP = inputpath + d + '/tare.tif'
gt = gdal.Open(path_SHP,gdal.GA_ReadOnly)
# resample to 1m resolution
gt = gdal.Warp('', [gt], format='MEM', width=gt.RasterXSize//5, height=gt.RasterYSize//5, resampleAlg=0)
band = gt.GetRasterBand(1)
gt = np.int16(band.ReadAsArray())
del band
# take care of classes
tara0_mask = gt==classes[0]
tara20_mask = gt==classes[1]
tara50_mask = gt==classes[2]
woods_mask = np.logical_or(gt==classes[3],gt==656)
no_coltivable_mask = np.logical_or(gt==classes[4],gt==780)
gt[woods_mask]=classes[3]
gt[no_coltivable_mask]=classes[4]
classes_mask = np.logical_or(tara50_mask,np.logical_or(tara0_mask,tara20_mask))
classes_mask = np.logical_or(no_coltivable_mask,np.logical_or(classes_mask,woods_mask))
gt[np.logical_not(classes_mask)]=0
rc_tara0 = np.argwhere(tara0_mask[0:-patch_size_padded, 0:-patch_size_padded])
rc_tara20 = np.argwhere(tara20_mask[0:-patch_size_padded, 0:-patch_size_padded])
rc_tara50 = np.argwhere(tara50_mask[0:-patch_size_padded, 0:-patch_size_padded])
rc_woods = np.argwhere(woods_mask[0:-patch_size_padded, 0:-patch_size_padded])
rc_no_coltivable = np.argwhere(no_coltivable_mask[0:-patch_size_padded, 0:-patch_size_padded])
rc_UPAS = np.argwhere(gt[0:-patch_size_padded, 0:-patch_size_padded]!=0)
if np.sum(tara0_mask)==0 and np.sum(tara20_mask)==0 and np.sum(tara50_mask)==0 and np.sum(woods_mask)==0 and np.sum(no_coltivable_mask)==0 :
continue
# sample patches and write coordinate of origin to output csv-file
sample_patches_of_class(rc_tara0, rc_UPAS, patches_per_tile, classes[0], gt, patch_size_padded, coordspath+coordsfilename,d)
sample_patches_of_class(rc_tara20, rc_UPAS, patches_per_tile, classes[1], gt, patch_size_padded, coordspath+coordsfilename,d)
sample_patches_of_class(rc_tara50, rc_UPAS, patches_per_tile, classes[2], gt, patch_size_padded, coordspath+coordsfilename,d)
sample_patches_of_class(rc_woods, rc_UPAS, patches_per_tile, classes[3], gt, patch_size_padded, coordspath+coordsfilename,d)
sample_patches_of_class(rc_no_coltivable, rc_UPAS, patches_per_tile, classes[4], gt, patch_size_padded, coordspath+coordsfilename,d)
del gt
gc.collect()
if i_lap+1 % 10 == 0:
print('\r {}/{}'.format(i_lap+1, len(dirs)),end='')
def sample_patches_of_class(population, population_all, patches_per_tile, cl, gt, patch_size_padded, outputfile, d):
""" sample origin pixels (row+col) of patches and save in csv file.
Parameters
----------
population: numpy.ndarray
pixels containing class cl
population_all: numpy.ndarray
pixels containing all classes (except null-class)
patches_per_tile: int
number of samples to extract for whole tile
cl: int or string
ground truth class (e.g. classes[4])
gt: numpy.ndarray
ground truth image
patch_size_padded: int
size of patch (including padding) to be extracted in number of pixels
(patch will be squared)
outputfile: string
path + filename + extention to save the coordiantes
output
-------
outputfile: csv
each row contains tile-name and row + column of top-left pixel of patch:
tile,row,column
saved at outputpath.
"""
# percentage
p_class = len(population)/len(population_all)
# number of samples
n_samples = np.uint8(patches_per_tile*p_class)
n_loops = 0
with open(outputfile, 'a') as file:
writer = csv.writer(file, delimiter =',')
while n_samples > 0 and len(population) > n_samples:
idx = random.sample(range(len(population)),n_samples)
gt_idx = population[idx]
population = np.delete(population,idx,axis=0)
for r,c in gt_idx:
patch = gt[r:(r+patch_size_padded),c:(c+patch_size_padded)]
mask_past = np.uint16(patch==cl)
mask_el = np.uint16(patch==0)
if (np.sum(mask_past)>=1000 and np.sum(mask_el)==0 and patch.shape[0]==patch_size_padded and patch.shape[1]==patch_size_padded) or n_loops>=1000:
writer.writerow([d, r, c])
n_samples -= 1
n_loops =0
continue
n_loops += 1
if n_loops==100:
n_samples==0
break
def read_patch_1m(inputRGB, inputNIR, inputDTMfile, inputGT, coords_df, patch_size_padded, idx, classes):
""" load patch based on left-top coordinate
Parameters
----------
inputpath: string
path to the folder containing dataset
patchespath: string
path to folder containing the resampled dtm
coords_df: pandas.dataframe
dataframe with coordinates in the format [tile, r (=left), c (=top)]
patch_size_padded: int
size of patch (including padding) to be extracted in number of pixels
(patch will be squared)
idx: int
index of row in coords file to be read
classes: list
list with classes to be predicted
Calls
-----
to_categorical_classes
Returns
------
patch: numpy.ndarray
patch of size (patch_size, patch_size, 5) with normalized RGB, NIR, DTM
gt: numpy.ndarray
patch of size (patch_size, patch_size, n_classes) with one-hot encoded ground truth
"""
n_features = 5 #R,G,B, NIR, DTM
r, c, folder = coords_df.iloc[idx]
r = int(r)
c = int(c)
patch = np.zeros([patch_size_padded,patch_size_padded,n_features],dtype=np.float16)
# RGB
ds = inputRGB
# needed for resampling of dtm
for x in range(1, ds.RasterCount + 1):
band = ds.GetRasterBand(x)
patch[:,:,x-1] = band.ReadAsArray(c,r,int(patch_size_padded), int(patch_size_padded))
# NIR
ds = inputNIR
band = ds.GetRasterBand(1)
patch[:,:,3] = band.ReadAsArray(c, r, patch_size_padded, patch_size_padded)
# DTM
ds = gdal.Open(inputDTMfile, gdal.GA_ReadOnly)
band = ds.GetRasterBand(1)
patch[:,:,4] = band.ReadAsArray(c, r, patch_size_padded, patch_size_padded)
#normalization
patch = np.divide(patch,255)
# load ground truth
ds = inputGT
band = ds.GetRasterBand(1)
gt = band.ReadAsArray(c, r, patch_size_padded, patch_size_padded)
# take care of classes
gt[np.where(gt == 656)] = 650
gt[np.where(gt == 780)] = 770
gt[np.isin(gt, classes)==False] = 0
gt = to_categorical_classes(gt, classes)
return((patch,gt))
def read_patch(inputpath, dtmpath, coords_df, patch_size_padded, idx, classes):
""" load patch based on left-top coordinate
Parameters
----------
inputpath: string
path to the folder containing dataset
dtmpath: string
path to folder containing the resampled dtm
coords_df: pandas.dataframe
dataframe with coordinates in the format [tile, r (=left), c (=top)]
patch_size_padded: int
size of patch (including padding) to be extracted in number of pixels
(patch will be squared)
idx: int
index of row in coords file to be read
classes: list
list with classes to be predicted
Calls
-----
to_categorical_classes
Returns
------
patch: numpy.ndarray
patch of size (patch_size, patch_size, 5) with normalized RGB, NIR, DTM
gt: numpy.ndarray
patch of size (patch_size, patch_size, n_classes) with one-hot encoded ground truth
"""
n_features = 5 #R,G,B, NIR, DTM
r, c, folder = coords_df.iloc[idx]
r = int(r*5)
c = int(c*5)
patch = np.zeros([patch_size_padded,patch_size_padded,n_features],dtype=np.float16)
# RGB
ds = gdal.Open(inputpath + folder + '/' + folder + '_RGB.tif',gdal.GA_ReadOnly)
# needed for resampling of dtm
for x in range(1, ds.RasterCount + 1):
band = ds.GetRasterBand(x)
patch[:,:,x-1] = band.ReadAsArray(c,r,patch_size_padded, patch_size_padded)
# NIR
ds = gdal.Open(inputpath + folder + '/' + folder + '_NIR.tif' ,gdal.GA_ReadOnly)
band = ds.GetRasterBand(1)
patch[:,:,3] = band.ReadAsArray(c, r, patch_size_padded, patch_size_padded)
# DTM
ds = gdal.Open(dtmpath + folder + '/dtm135_20cm.tif' ,gdal.GA_ReadOnly)
band = ds.GetRasterBand(1)
patch[:,:,4] = band.ReadAsArray(c, r, patch_size_padded, patch_size_padded)
#normalization
patch = np.divide(patch,255)
# load ground truth
ds = gdal.Open(inputpath + folder + '/tare.tif' ,gdal.GA_ReadOnly)
band = ds.GetRasterBand(1)
gt = band.ReadAsArray(c, r, patch_size_padded, patch_size_padded)
# take care of classes
gt[np.where(gt == 656)] = 650
gt[np.where(gt == 780)] = 770
gt[np.isin(gt, classes)==False] = 0
gt = to_categorical_classes(gt, classes)
return((patch,gt))
# =============================================================================
# def csv_to_patch(inputpath, dtmpath, patchespath, coordsfile, patch_size, classes):
# """ extract the patches from the original images, normalize and save.
#
# Ground truth is converted to one-hot labels.
#
# Parameters
# ----------
# inputpath: string
# path to folders with tiles. Each tile should be in separate folder
# dtmpath: string
# path to folder containing a dtm (will be resampled to same resolution)
# patchespath: string
# path to outputfolder to save the patches
# coordsfile: csv-file
# path to file where the coordinates are saved
# patch_size: int
# size of patch to extract. Final extracted patches will include padding
# to be able to predict full image.
# classes: list
# list with classes to be predicted
#
# Calls
# -----
# read_patch()
# to_categorical_classes()
#
# Output
# ------
# patches saved at patchespath in two folders:
# images and labels.
# """
# imagespath = patchespath + 'images/'
# labelspath = patchespath + 'labels/'
#
# if not os.path.isdir(imagespath):
# os.makedirs(imagespath)
# if not os.path.isdir(labelspath):
# os.makedirs(labelspath)
#
# dirs = list_dir(inputpath)
# coords = pd.read_csv(coordsfile, sep=',',header=None)
# patch_size_padded = int(patch_size * 3)
#
# # resample dtm to 20cmx20xm
# for d in dirs:
# if not os.path.isdir(dtmpath + d + '/'):
# os.makedirs(dtmpath + d + '/')
# input_file = inputpath + d + '/dtm135.tif'
# shadow_file = inputpath + d + '/' + d + '_NIR.tif'
# dtm_file = dtmpath + d + '/dtm135_20cm.tif'
#
# ds = gdal.Open(shadow_file)
# width = ds.RasterXSize
# height = ds.RasterYSize
# ds = gdal.Warp(dtm_file, input_file, format='GTiff', width=width, height=height, resampleAlg=1)
# ds = None
#
# # extract patches
# for idx in range(len(coords)):
# im, gt = read_patch(inputpath, dtmpath, coords, patch_size_padded, idx, classes)
# np.save(imagespath + str(idx)+'.npy', im)
# np.save(labelspath+str(idx) + '.npy', gt)
# if idx % 500 == 0:
# print('\r {}/{}'.format(idx, len(coords)),end='')
# =============================================================================