def process_base_folder(folder, negativeMultiplicator=3, rulesType='large', jobs=-1, prefix=None, interestingWindowsFolder=None, onlyFirstTagSymbol=False, positiveWindowNeighboursAmount=7): dc = DatasetCreator() rules = large_train if rulesType == 'large' else small_train for f in os.listdir(folder): ff = os.path.join(folder, f) if os.path.isdir(ff): if not prefix or f.startswith(prefix): process_folder(ff, rules=rules, negativeMultiplicator=negativeMultiplicator, datasetCreator=dc, interestingWindowsFolder=interestingWindowsFolder, onlyFirstTagSymbol=onlyFirstTagSymbol, positiveWindowNeighboursAmount=positiveWindowNeighboursAmount ) dc.processPrepared(jobs) return dc
def process_folder(folder, rules=large_train, negativeMultiplicator=3, interestingWindowsFolder=None, datasetCreator=None, onlyFirstTagSymbol=False, positiveWindowNeighboursAmount=7): files = [] acceptableExtensions = ('jpg', 'jpeg', 'png') for filename in os.listdir(folder): if filename.lower().endswith(acceptableExtensions): files.append(os.path.join(folder, filename)) trainAmount = 0 for c in rules: if c.cond_function(len(files)): if c.train >= 1: trainAmount = c.train else: trainAmount = int(len(files) * c.train) break # testAmount = len(files) - trainAmount # if testAmount <= 0: # testAmount = 0 random.shuffle(files) trainFiles = set(files[:trainAmount]) testFiles = set(files[trainAmount:]) tagPosition = getTagCoordinates(folder) if onlyFirstTagSymbol: tp = tagPosition tagPosition = [tp[0], tp[1], tp[2], tp[1] + (tp[2] - tp[0])] if datasetCreator is None: datasetCreator = DatasetCreator() datasetCreator.prepareImageProcessing(trainFiles, testFiles, tagPosition, negativeMultiplicator, interestingWindowsFolder, positiveWindowNeighboursAmount ) return datasetCreator