Esempio n. 1
0
def process_base_folder(folder,
                        negativeMultiplicator=3,
                        rulesType='large',
                        jobs=-1,
                        prefix=None,
                        interestingWindowsFolder=None,
                        onlyFirstTagSymbol=False,
                        positiveWindowNeighboursAmount=7):

    dc = DatasetCreator()
    rules = large_train if rulesType == 'large' else small_train
    for f in os.listdir(folder):
        ff = os.path.join(folder, f)
        if os.path.isdir(ff):
            if not prefix or f.startswith(prefix):
                process_folder(ff,
                               rules=rules,
                               negativeMultiplicator=negativeMultiplicator,
                               datasetCreator=dc,
                               interestingWindowsFolder=interestingWindowsFolder,
                               onlyFirstTagSymbol=onlyFirstTagSymbol,
                               positiveWindowNeighboursAmount=positiveWindowNeighboursAmount
                )

    dc.processPrepared(jobs)
    return dc
Esempio n. 2
0
def process_folder(folder,
                   rules=large_train,
                   negativeMultiplicator=3,
                   interestingWindowsFolder=None,
                   datasetCreator=None,
                   onlyFirstTagSymbol=False,
                   positiveWindowNeighboursAmount=7):

    files = []
    acceptableExtensions = ('jpg', 'jpeg', 'png')
    for filename in os.listdir(folder):
        if filename.lower().endswith(acceptableExtensions):
            files.append(os.path.join(folder, filename))

    trainAmount = 0
    for c in rules:
        if c.cond_function(len(files)):
            if c.train >= 1:
                trainAmount = c.train
            else:
                trainAmount = int(len(files) * c.train)
            break

    # testAmount = len(files) - trainAmount
    # if testAmount <= 0:
    #     testAmount = 0

    random.shuffle(files)
    trainFiles = set(files[:trainAmount])
    testFiles = set(files[trainAmount:])

    tagPosition = getTagCoordinates(folder)
    if onlyFirstTagSymbol:
        tp = tagPosition
        tagPosition = [tp[0], tp[1], tp[2], tp[1] + (tp[2] - tp[0])]

    if datasetCreator is None:
        datasetCreator = DatasetCreator()

    datasetCreator.prepareImageProcessing(trainFiles,
                                          testFiles,
                                          tagPosition,
                                          negativeMultiplicator,
                                          interestingWindowsFolder,
                                          positiveWindowNeighboursAmount
    )

    return datasetCreator