def process_folder(folder, rules=large_train, negativeMultiplicator=3, interestingWindowsFolder=None, datasetCreator=None): files = [] acceptableExtensions = ('jpg', 'jpeg', 'png') for filename in os.listdir(folder): if filename.lower().endswith(acceptableExtensions): files.append(os.path.join(folder, filename)) trainAmount = 0 for c in rules: if c.cond_function(len(files)): if c.train >= 1: trainAmount = c.train else: trainAmount = int(len(files) * c.train) break # testAmount = len(files) - trainAmount # if testAmount <= 0: # testAmount = 0 random.shuffle(files) trainFiles = set(files[:trainAmount]) testFiles = set(files[trainAmount:]) tagPosition = getTagCoordinates(folder) if datasetCreator is None: datasetCreator = DatasetCreator() datasetCreator.prepareImageProcessing(trainFiles, testFiles, tagPosition, negativeMultiplicator, interestingWindowsFolder) return datasetCreator
def process_base_folder(folder, negativeMultiplicator=3, rulesType='large', jobs=-1, prefix=None, interestingWindowsFolder=None): dc = DatasetCreator() for f in os.listdir(folder): ff = os.path.join(folder, f) if os.path.isdir(ff): if not prefix or f.startswith(prefix): rules = large_train if rulesType == 'large' else small_train process_folder(ff, rules=rules, negativeMultiplicator=negativeMultiplicator, datasetCreator=dc, interestingWindowsFolder=interestingWindowsFolder) dc.processPrepared(jobs) return dc