-
Notifications
You must be signed in to change notification settings - Fork 1
/
process_folder.py
72 lines (57 loc) · 2.15 KB
/
process_folder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
from collections import namedtuple
import random
from create_dataset import DatasetCreator
Condition = namedtuple('Condition', ['cond_function', 'train'])
large_train = [
Condition(lambda x: x > 0, 0.7)
]
small_train = [
Condition(lambda x: x <= 10, 0.7),
Condition(lambda x: x < 100, 10),
Condition(lambda x: x >= 100, 0.1)
]
def getTagCoordinates(folder, YX=True):
tagPosition = None
tagInformationParts = folder.rsplit('_', 1)
if len(tagInformationParts) > 1:
tagInformationString = tagInformationParts[1]
tagCoords = tagInformationString.split('x')
if len(tagCoords) == 4:
tagPosition = map(int, tagCoords)
if YX:
tp = tagPosition
tagPosition = [tp[1], tp[0], tp[3], tp[2]]
tp = tagPosition
if tp[0] > tp[2]:
tp[0], tp[2] = tp[2], tp[0]
if tp[1] > tp[3]:
tp[1], tp[3] = tp[3], tp[1]
if tagPosition is None:
raise ValueError("Incorrect folder name format. Folder MUST contain tag position information")
return tagPosition
def process_folder(folder, rules=large_train, negativeMultiplicator=3, interestingWindowsFolder=None, datasetCreator=None):
files = []
acceptableExtensions = ('jpg', 'jpeg', 'png')
for filename in os.listdir(folder):
if filename.lower().endswith(acceptableExtensions):
files.append(os.path.join(folder, filename))
trainAmount = 0
for c in rules:
if c.cond_function(len(files)):
if c.train >= 1:
trainAmount = c.train
else:
trainAmount = int(len(files) * c.train)
break
# testAmount = len(files) - trainAmount
# if testAmount <= 0:
# testAmount = 0
random.shuffle(files)
trainFiles = set(files[:trainAmount])
testFiles = set(files[trainAmount:])
tagPosition = getTagCoordinates(folder)
if datasetCreator is None:
datasetCreator = DatasetCreator()
datasetCreator.prepareImageProcessing(trainFiles, testFiles, tagPosition, negativeMultiplicator, interestingWindowsFolder)
return datasetCreator