-
Notifications
You must be signed in to change notification settings - Fork 3
/
Sample_Creation_LMDB.py
321 lines (275 loc) · 12.5 KB
/
Sample_Creation_LMDB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
import os, math
from sys.exc_traceback import tb_lineno
from osgeo import gdal
import numpy as np
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from scipy.ndimage.interpolation import rotate
import xdibias
import lmdb
import caffe
import cv2
from random import shuffle
# Only needed for HDF5 creation
# import h5py
# How many degrees the cookie cutter should be rotated
degrees = 10
# The size which the squares are cliped in pixels
clipSize = 900
# How far the center points of the rotation should be
stepSize = 500
# Input Images. Special case for labelPath with Xdibias files
rasterPath = '/space/export/data/zwie_th/Files/eichenau_4k'
labelPath = rasterPath
# Where the output is
path_dst = '/space/export/data/zwie_th/DLR/TrainingData/dilation_all'
# prefix of the datasets that will be created
prefix = 'dataset'
def main(degrees, clipSize, stepSize, rasterpath, labelpath, HDF5, path_dst):
#gets driver information
#loops through layer, gets angle from attributes and writes file with degree information
index = 0
#this is important because the programm uses not the distance from the middle to the side
#but to the corners and the plus 2 are added, because afterwards it will get cropped again to the actual size the NN is using. Is only important for the coordinates creation
clipSizeData = int(clipSize * math.cos(math.radians(45)))+2
# needs to be 372 pixels smaller. because thats what the neural networks gives as an ouput
margin = 186
clipSizeLabel = clipSize - 2*margin
dim = (clipSizeLabel/8, clipSizeLabel/8)
# creates a fille list of both the raster data and the ground truth
fileList = make_list(rasterpath, labelpath)
#Iterates over the list
for path in fileList['f']:
# Loads the image infos, plus the Image
imInfo0 = get_image_info(path[0])
imInfo1 = get_image_info(path[1])
# allpoints holds all coordinates of the squares that will be used for cropping
allpoints = calculates_coord(imInfo0['xSize'], imInfo0['ySize'], degrees, \
clipSizeData, stepSize)
# the squares get shuffled. Otherwise it would be needed to hold the data in cache and shuffle it afterwards. I have not found a wy to shuffle the LMDB databases with the same seed.
shuffle(allpoints)
# iterates over points of the squares and clips the image acordingly
for points in allpoints:
#clips the label and resizes it to the output dimensions and changes the class house from 255 to one
label = clip(imInfo1['image'], points, clipSizeLabel, True).astype('int')
label = cv2.resize(label, dim, interpolation=cv2.INTER_NEAREST)
label[label == 255] = 1
# Prepares the label so it can be read by caffe
newlabel = np.empty((1,dim[0],dim[1]))
newlabel[0,:,:] = label
average = np.average(label)
#leaves out the imagees where there is less then 10% of the class 1
if average > 0.1:
# clips data
data = clip(imInfo0['image'][:,:,::-1], points, clipSize, False)\
.astype('float')
#prepares the data for the NN dilation
img_dat = pre_dilation(data)
img_dat = caffe.io.array_to_datum(img_dat)
# writes the data and the labels in two different DB
with lmdb.open(path_dst + 'Data', map_size=int(1e12)).begin(write=True) as dat_in:
dat_in.put('{:0>10d}'.format(index), img_dat.SerializeToString())
lab_dat = caffe.io.array_to_datum(newlabel)
with lmdb.open(path_dst + 'Label', map_size=int(1e12)).begin(write=True) as lab_in:
lab_in.put('{:0>10d}'.format(index), lab_dat.SerializeToString())
index += 1
print 'finished', path
return
def pre_dilation(image):
''' just transposes the input, to make it accesible for the NN '''
caffe_in = image.transpose([2, 0, 1])
return caffe_in
def make_list(rasterpath, labelpath):
""" creates a list of files that are in the paths rasterpath and labelpath"""
f = []
numberOfFiles = 0
for subdir, dirs, files in os.walk(rasterpath):
for file in files:
if file.endswith('.jpg'):
if os.path.exists(os.path.join(subdir, 'groundtruth', file[:-4] + '.roof')):
f.append((os.path.join(subdir, file), (os.path.join(subdir, 'groundtruth', file[:-4] + '.roof'))))
numberOfFiles += 1
print ('files are read into array', numberOfFiles)
return {'f':f, 'n' :numberOfFiles}
def get_image_info(dataFile):
""" gets image and finds all kind of information as well as returns a n-dimensional np array
normalizes imgae between 0 and 1
Usually uses GDAL to get Inforamtion of the data, but in the case of an error
it will try to open the file with the help of xdibias. Maybe there is a better way
but this was the easiest one"""
try:
dsI = gdal.Open(dataFile)
sizeX = dsI.RasterXSize
sizeY = dsI.RasterYSize
rCount = dsI.RasterCount
im = np.float32(dsI.GetRasterBand(1).ReadAsArray())
#is only needed when the vlaues have to be between 0 and 1
#im = im / im.max()
#if image has more than one channel
if dsI.RasterCount > 1:
for (bandnew) in range(2, (rCount+1)):
new = np.float32(dsI.GetRasterBand(bandnew).ReadAsArray())
# is the same aswith im
#new = new / new.max()
im = np.dstack((im, new))
except:
info = xdibias.Image(dataFile)
im = info.readImageData()
#im = im / im.max()
sizeX = im.shape[1]
sizeY = im.shape[0]
try:
rCount = im.shape[2]
except:
rCount = 1
return {'xSize':sizeX, 'ySize':sizeY, 'rasterCount':rCount, 'image':im}
def calculates_coord(x, y, degrees, clipSize, stepSize):
'''calculates squares that are equally distributed within the image.
additionally it adds
x = image Size x
y = image Size Y
degrees = rotation angle
clipSize = the size of the clips
stepSize = number of pixels the squares should be separted from each other'''
points = []
for coordX in range((0+clipSize), (x-clipSize), stepSize):
for coordY in range(0, y, stepSize):
for i in range(0, 360, degrees):
#calculates length of X and Y
X1 = int(coordX + math.cos(math.radians(i)) * clipSize)
Y1 = int(coordY - math.sin(math.radians(i)) * clipSize)
X2 = int(coordX - math.cos(math.radians(i+90)) * clipSize)
Y2 = int(coordY + math.sin(math.radians(i+90)) * clipSize)
X3 = int(coordX - math.cos(math.radians(i)) * clipSize)
Y3 = int(coordY + math.sin(math.radians(i)) * clipSize)
X4 = int(coordX + math.cos(math.radians(i+90)) * clipSize)
Y4 = int(coordY - math.sin(math.radians(i+90)) * clipSize)
singlePointsX = [X1, X2, X3, X4]
singlePointsY = [Y1, Y2, Y3, Y4]
if all(0 <= item <= x for item in singlePointsX) and \
all(0 <= item <= y for item in singlePointsY):
#more efficent way
points.append([[(X1,Y1), (X2,Y2), (X3,Y3), (X4, Y4)],\
[45-i],[coordX, coordY]])
#points.append([singlePointsX, singlePointsY])
print 'finished getting points. Nr. of created squares: ', len(points)
return points
def clip(ar, points, clipSize, if_Label):
'''
clips the input array to the given points. Because how the images are saved, a black border is created which can be removed by the function crop_around_center
'''
img = Image.new('F', (ar.shape[1], ar.shape[0]), 0)
#ImageDraw.Draw(img).polygon([X1, Y1, X2, Y2, X3, Y3, X4, Y4], outline=1, fill=1)
ImageDraw.Draw(img).polygon(points[0], outline=1, fill=1)
mask = np.array(img)
#create new image
newImArray = np.empty(ar.shape, dtype=np.float32)
#newImArray = ar * mask[:,:, np.newaxis]
if len(ar.shape) == 3:
newImArray = ar * mask[:,:,np.newaxis]
else:
newImArray = ar * mask[:,:]
croped_image = crop(newImArray, points[0], ar.shape)
newImArray = rotate(croped_image, points[1][0],\
reshape=False, order=0, mode='constant', cval=0.0)
if if_Label:
final = crop_around_center(newImArray, clipSize, points[1][0])
else:
final = crop_around_center(newImArray, clipSize, points[1][0])
#counter for fileIndex
#save_2_HDF5(inputFile, numberOfFiles, prefix, outputDir, channel, width, height, batch):
return (final)
def crop_around_center(image, clipSize, angle):
"""
Given a NumPy / OpenCV 2 image, crops it to the given width and height,
around it's centre point
"""
width = clipSize
height = clipSize
image_size = (image.shape[1], image.shape[0])
image_center = (int(image_size[0] * 0.5), int(image_size[1] * 0.5))
if(width > image_size[0]):
width = image_size[0]
if(height > image_size[1]):
height = image_size[1]
x1 = int(image_center[0] - width * 0.5)
x2 = int(image_center[0] + width * 0.5)
y1 = int(image_center[1] - height * 0.5)
y2 = int(image_center[1] + height * 0.5)
return image[y1:y2, x1:x2]
def crop(image, points, image_size):
# find biggest and smallest X and Y value and crops the image
X = []
Y = []
for i in points: X.append(i[0])
for i in points: Y.append(i[1])
maxX = max(X)
minX = min(X)
maxY = max(Y)
minY = min(Y)
return image[minY:maxY, minX:maxX]
def save_2_HDF5(dataAr, labelAr, prefix, outputDir, batch):
'''
saves the input to HDF5.
'''
try:
hdfFileList=[]
widthData = dataAr.shape[0]
heightData = dataAr.shape[1]
channelData = dataAr.shape[2]
widthLabel = labelAr.shape[0]
heightLabel = labelAr.shape[1]
global index
global fileIndex
if (fileIndex % batch) == 0:
# open and create hdf5 file output directory
outputHDFFile = prefix + "_" + str(fileIndex) + ".h5"
#print "file name: " + outputHDFFile
outputHDFPath = os.path.join(outputDir, outputHDFFile)
#print "hdf5 file: ", outputHDFPath
if os.path.exists(outputHDFPath):
fileOut = h5py.File(outputHDFPath, 'r+')
else:
fileOut = h5py.File(outputHDFPath, 'w')
hdfFileList.append(outputHDFPath)
data = fileOut.create_dataset("data", (batch,channelData, (widthData), heightData), dtype=np.float32)
label = fileOut.create_dataset("label", (batch, widthLabel, heightLabel), dtype=np.float32)
# image data matrix
#labelStack = np.empty((batch,width,height))
# initialize index at every batch
index=0
plt.imshow(dataAr)
#this one here cost me around a mont
#WICHTIG
dataAr = dataAr.transpose(2,0,1)
data[index,:,:,:]=dataAr
label[index,:,:]=labelAr.astype('float32')
if (fileIndex % batch) == 0:
# close the last file
dataAr.__init__()
#labelStack.__init__()
fileOut.close()
outputHDFListFile = prefix + '.txt'
outputHDFListPath = os.path.join(outputDir, outputHDFListFile)
if os.path.exists(outputHDFListPath):
outputHDFListFile = prefix + '-list.txt'
outputHDFListPath = os.path.join(outputDir, outputHDFListFile)
with open(outputHDFListPath, 'a') as trainOut:
for hdfFile in hdfFileList:
writeOut=hdfFile + "\n"
trainOut.write(writeOut)
except Exception, e:
print 'Data already exists or '
print str(e)
print tb_lineno
return
return(index)
def reset_globals():
global fileIndex
global index
fileIndex = 0
index=0
return
for_HDF5_creation = (prefix, path_dst, batchSize)
if __name__ == '__main__':
main(degrees, clipSize, stepSize, rasterPath, labelPath, for_HDF5_creation, path_dst)