Example #1
0
def process_folder(folder, rules=large_train, negativeMultiplicator=3, interestingWindowsFolder=None, datasetCreator=None):
    files = []
    acceptableExtensions = ('jpg', 'jpeg', 'png')
    for filename in os.listdir(folder):
        if filename.lower().endswith(acceptableExtensions):
            files.append(os.path.join(folder, filename))

    trainAmount = 0
    for c in rules:
        if c.cond_function(len(files)):
            if c.train >= 1:
                trainAmount = c.train
            else:
                trainAmount = int(len(files) * c.train)
            break

    # testAmount = len(files) - trainAmount
    # if testAmount <= 0:
    #     testAmount = 0

    random.shuffle(files)
    trainFiles = set(files[:trainAmount])
    testFiles = set(files[trainAmount:])

    tagPosition = getTagCoordinates(folder)

    if datasetCreator is None:
        datasetCreator = DatasetCreator()

    datasetCreator.prepareImageProcessing(trainFiles, testFiles, tagPosition, negativeMultiplicator, interestingWindowsFolder)

    return datasetCreator
def process_base_folder(folder, negativeMultiplicator=3, rulesType='large', jobs=-1, prefix=None, interestingWindowsFolder=None):
    dc = DatasetCreator()
    for f in os.listdir(folder):
        ff = os.path.join(folder, f)
        if os.path.isdir(ff):
            if not prefix or f.startswith(prefix):
                rules = large_train if rulesType == 'large' else small_train
                process_folder(ff, rules=rules, negativeMultiplicator=negativeMultiplicator, datasetCreator=dc, interestingWindowsFolder=interestingWindowsFolder)

    dc.processPrepared(jobs)
    return dc