Ejemplo n.º 1
0
def matchVideosToMarks(marks, videos):
    marks = os.listdir(marks) if not isinstance(marks, list) else marks
    videos = os.listdir(videos) if not isinstance(videos, list) else videos

    transformer = lambda x: makeJSONname(extractBasename(x))

    return matchLists(master=marks, slave=videos, transformer=transformer, showMessages=True)
Ejemplo n.º 2
0
def updateCategoriesIndices(datasetPath, categories):
    from utils import walk, makeJSONname
    from verifier import getFullCategory

    marks = walk(datasetPath, targetFiles=makeJSONname(const.marks)).get("files")

    for mrk in marks:
        try:
            marksPath = os.path.join(datasetPath, *mrk)

            category, subcategory = mrk[-3:-1]
            fullCategory = getFullCategory(category, subcategory)

            if fullCategory not in categories:
                continue

            marks = openJsonSafely(marksPath)

            for f, value in marks.items():
                fullCategory = value[const.fullCategory]
                value[const.ctgIdx] = categories.index(fullCategory)

            json.dump(marks, open(marksPath, "w"), indent=3)
            print(f"{Fore.BLUE}JSON file {marksPath} has been fixed{Style.RESET_ALL}")
        except Exception as e:
            print(e)
Ejemplo n.º 3
0
def extractCrops(categoryDir,
                 extractionPath=None,
                 extension=Extensions.png,
                 params=None,
                 globalIdx=0):
    marksPath = os.path.join(categoryDir, makeJSONname(const.marks))
    framesDir = os.path.join(categoryDir, const.frames)
    cutDir = os.path.join(
        categoryDir, const.cut) if extractionPath is None else extractionPath

    os.makedirs(cutDir, exist_ok=True)

    try:
        marks = json.load(open(marksPath, "r"))
    except FileNotFoundError:
        return

    print(
        f"{Fore.GREEN}Processing crop operation for {marksPath} {Style.RESET_ALL}"
    )
    time.sleep(0.5)

    for frameIdx, frameName in enumerate(marks):
        frameMarks = marks[frameName]
        framePath = os.path.join(framesDir, frameMarks[const.image])

        if not os.path.exists(framePath):
            globalIdx += 1
            continue

        y1, x1, y2, x2 = frameMarks[const.coords]
        fullCategory = frameMarks[const.fullCategory]
        h, w = frameMarks[const.imageShape]

        cutName = f"{globalIdx}_{extendName(fullCategory, extension)}"
        globalIdx += 1

        if not checkBoundingBoxIsCorrect(x2 - x1, y2 - y1):
            continue

        y1, x1, y2, x2 = fitCoords((y1 - 10, x1 - 10, y2 + 10, x2 + 10),
                                   (h, w))

        if os.path.exists(os.path.join(cutDir, cutName)):
            print("\r{:.1f}% of work has been done for {} category".format(
                (frameIdx + 1) / len(marks) * 100, fullCategory),
                  end="")
            continue

        frame = cv2.imread(framePath)
        cut = frame[y1:y2, x1:x2, ...]
        cv2.imwrite(os.path.join(cutDir, cutName), cut, params)

        print("\r{:.1f}% of work has been done for {} category".format(
            (frameIdx + 1) / len(marks) * 100, fullCategory),
              end="")

    return globalIdx
Ejemplo n.º 4
0
def xml2json(xmlPath, wpath=None, overwrite=False):
    jsonData = {}
    category, basename = extractCategory(xmlPath)
    jsonName = makeJSONname(basename)

    if wpath is not None:
        os.makedirs(wpath, exist_ok=True)
        if not overwrite and os.path.exists(os.path.join(wpath, jsonName)):
            print(
                f"{Fore.RED}JSON {jsonName} already exists in {wpath} {Style.RESET_ALL}"
            )
            return
        elif overwrite and os.path.exists(os.path.join(wpath, jsonName)):
            print(
                f"{Fore.RED}JSON {jsonName} will be overwritten in {wpath} {Style.RESET_ALL}"
            )
        else:
            print(
                f"{Fore.GREEN}JSON {jsonName} will be written to {wpath} {Style.RESET_ALL}"
            )

    try:
        file = open(os.path.join(xmlPath), "r")
        data = file.read()
        o = xmltodict.parse(data)
        imgList = o["dataset"]["images"]["image"]
    except:
        print(f"{Fore.RED} Couldn't parse {xmlPath}")
        return {}, None

    for image in imgList:
        if not "@frame" in image:
            # print(f"{Fore.RED} {filename} : The attribute '@frame' was not found")
            continue

        imgIdx = image['@frame']
        x1 = int(image['box']['@left'])
        y1 = int(image['box']['@top'])
        x2 = x1 + int(image['box']['@width'])
        y2 = y1 + int(image['box']['@height'])

        subCategory = image.get("@category", const.merged)

        jsonData[f"frame_{imgIdx}"] = {
            const.category: category,
            const.subcategory: subCategory,
            const.coords: [y1, x1, y2, x2]
        }

    if wpath is not None:
        json.dump(jsonData, open(os.path.join(wpath, jsonName), "w"), indent=3)

    return jsonData
Ejemplo n.º 5
0
def summarizeInfo(rawPath=Path.raw, summarizedPath=Path.summarizedRaw, allowedCategories=None, allowedSubCtgList=None,
                  overwrite=True):

    summarized = openJsonSafely(summarizedPath) if not overwrite else {}

    rawVideosPath = os.path.join(rawPath, const.videos)
    rawJsonsPath = os.path.join(rawPath, const.json)

    rawVideos = sorted([j for j in os.listdir(rawVideosPath) if j.endswith(Extensions.videos())])

    maxIdx = summarized.get(const.maxIdx, 0)
    for i, video in enumerate(rawVideos):
        print(f"\rProcessing {video} ({i + 1} out of {len(rawVideos)})", end="")

        category, name = extractCategory(video)

        if category not in allowedCategories:
            continue

        categoryInfo = summarized.get(category, {})

        videoJson = os.path.join(rawJsonsPath, makeJSONname(name))
        videoMarks = getVideoMarks(os.path.join(rawVideosPath, video), videoJson)

        for subctg, subctgMarks in videoMarks.items():
            if allowedSubCtgList is not None and subctg not in allowedSubCtgList:
                continue

            if subctg not in categoryInfo:
                subctgIdx = maxIdx
                maxIdx += 1

                curSubctgMarks = {
                    const.overall: 0,
                    const.ctgIdx: subctgIdx,
                    const.videos: {},
                    const.parent: category
                }
            else:
                curSubctgMarks = categoryInfo[subctg]

            if video not in curSubctgMarks[const.videos]:
                curSubctgMarks[const.videos][video] = subctgMarks
                curSubctgMarks[const.overall] += len(subctgMarks)

            categoryInfo[subctg] = curSubctgMarks

        if categoryInfo:
            summarized[category] = categoryInfo
            summarized[const.maxIdx] = maxIdx

    json.dump(summarized, open(summarizedPath, "w"), indent=3)
    print(f"\n{Fore.GREEN}Summarized info file {summarizedPath} has been updated{Style.RESET_ALL}")
Ejemplo n.º 6
0
def makeNegativesMarks(rpath):
    negatives = os.listdir(os.path.join(rpath, const.frames))
    marks = {}

    for nimage in negatives:
        marks[nimage] = {
            const.coords: [0, 0, 0, 0],
            const.ctgIdx: 0,
            const.imageShape: [1, 1],
            const.image: nimage
        }

    json.dump(marks, open(os.path.join(rpath, makeJSONname(const.marks)), "w"), indent=3)
Ejemplo n.º 7
0
def extractMarks(categoryDir):
    marksPath = os.path.join(categoryDir, makeJSONname(const.marks))
    framesDir = os.path.join(categoryDir, const.frames)

    try:
        marks = json.load(open(marksPath, "r"))
    except FileNotFoundError:
        return

    print(
        f"\n{Fore.GREEN}Processing extraction marks for {marksPath} {Style.RESET_ALL}"
    )

    for frameIdx, frameName in enumerate(marks):
        frameMarks = marks[frameName]
        framePath = os.path.join(framesDir, frameMarks[const.image])

        if not os.path.exists(framePath):
            continue

        y1, x1, y2, x2 = frameMarks[const.coords]
        ctgIdx = frameMarks[const.ctgIdx]
        h, w = frameMarks[const.imageShape]

        xc = (x2 + x1) / (2 * w)
        yc = (y2 + y1) / (2 * h)
        bw = (x2 - x1) / w
        bh = (y2 - y1) / h

        darknetString = f"{ctgIdx} {xc} {yc} {bw} {bh}\n"

        if not checkBoundingBoxIsCorrect(bw, bh):
            darknetString = ""

        txtName = os.path.splitext(frameMarks['image'])[0]
        with open(os.path.join(framesDir, extendName(txtName, Extensions.txt)),
                  "w") as f:
            f.write(darknetString)

        print("\r{:.1f}% of work has been done".format(
            (frameIdx + 1) / len(marks) * 100),
              end="")
Ejemplo n.º 8
0
def extractCropsThroughDataset(datasetPath,
                               extractionPath=None,
                               categories=None,
                               subcategories=None,
                               extension=Extensions.png,
                               params=None,
                               parallel=True,
                               threads=16):

    frames = walk(datasetPath, targetDirs=const.frames).get("dirs")
    frames = filterFolders(frames, categories, subcategories)

    if parallel:
        threads = min(threads, mp.cpu_count())
    else:
        threads = 1

    globalIdx = 0
    threadsList = []
    with mp.Pool(threads) as pool:
        for dirsSet in frames:
            dirsSet = dirsSet[:-1]
            categoryDir = os.path.join(datasetPath, *dirsSet)

            length = len(
                openJsonSafely(
                    os.path.join(categoryDir, makeJSONname(const.marks))))

            threadsList.append(
                pool.apply_async(extractCrops,
                                 args=(categoryDir, ),
                                 kwds={
                                     "extractionPath": extractionPath,
                                     "extension": extension,
                                     "params": params,
                                     "globalIdx": globalIdx
                                 }))

            globalIdx += length

        for r in threadsList:
            r.get()
Ejemplo n.º 9
0
def actualizeInfoWithJsons(datasetPath):
    print("\nActualizing info...")
    actualInfo = {}
    os.makedirs(os.path.dirname(Path.actualInfo), exist_ok=True)

    frames = walk(datasetPath, targetDirs=const.frames)
    frames = frames.get("dirs")

    for idx, dirsList in enumerate(frames):
        dirsList = dirsList[:-1]

        fullpath = os.path.join(datasetPath, *dirsList, makeJSONname(const.marks))
        marks = json.load(open(fullpath, "r"))

        putNested(dictionary=actualInfo, keys=dirsList, value=len(marks))
        dirsList[-1] = const.overall
        updateNested(dictionary=actualInfo, keys=dirsList, value=len(marks))

        print("\r{:.1f}% of work has been done".format((idx + 1) / len(frames) * 100), end="")

    print()
    json.dump(actualInfo, open(Path.actualInfo, "w"), indent=3)
Ejemplo n.º 10
0
def augmentCategoryWithRepeats(categoryPath,
                               fullCategory,
                               augmentPath,
                               augmentations,
                               extension=Extensions.jpg,
                               repeats=1,
                               params=None):

    print(f"Category {fullCategory} is being augmented")
    if repeats == 0:
        print(
            f"{Fore.RED}Too many original images for {categoryPath}, aborting augmentation {Style.RESET_ALL}"
        )
        return

    marksName = makeJSONname(const.marks)
    marksPath = os.path.join(categoryPath, marksName)
    framesPath = os.path.join(categoryPath, const.frames)

    augmentedCategoryPath = os.path.join(augmentPath,
                                         *splitFullCategory(fullCategory))

    try:
        marks = json.load(open(marksPath, "r"))
    except:
        print(
            f"{Fore.RED}There is no marks {marksPath} for frames in {categoryPath} {Style.RESET_ALL}"
        )
        return

    idx = 0
    augmentedMarks = {}
    for i, name in enumerate(marks):
        print("\r{:.1f}% of work has been done".format(
            (i + 1) / len(marks) * 100),
              end="")

        frameData = marks[name]
        frameName = frameData[const.image]
        box = frameData[const.coords]
        ctgIdx = frameData[const.ctgIdx]
        shape = frameData[const.imageShape]

        frameID = name.split(const.separator)[1]

        image = cv2.imread(os.path.join(framesPath, frameName))
        augmented = augmentImageRepeated(image=image,
                                         augmentations=augmentations,
                                         repeats=repeats,
                                         boxes=[box])

        augmentedFramesPath = os.path.join(augmentedCategoryPath, const.frames)
        os.makedirs(augmentedFramesPath, exist_ok=True)

        for augImage, augBox in augmented:
            augmentedName = f"{fullCategory}{const.separator}{frameID}_{idx}{const.separator}{const.augmented}"
            augmentedFileName = extendName(augmentedName, extension)
            augmentedMarks[augmentedName] = {
                const.image: augmentedFileName,
                const.coords: augBox,
                const.fullCategory: fullCategory,
                const.ctgIdx: ctgIdx,
                const.imageShape: shape
            }

            cv2.imwrite(os.path.join(augmentedFramesPath, augmentedFileName),
                        augImage, params)
            idx += 1

    print()
    json.dump(augmentedMarks,
              open(os.path.join(augmentedCategoryPath, marksName), "w"),
              indent=3)
    print(
        f"{Fore.GREEN}Category {fullCategory} has been successfully augmented. "
        f"Results in {augmentedCategoryPath} {Style.RESET_ALL}")
Ejemplo n.º 11
0
def augmentCategoryWithGenerator(categoryPath,
                                 fullCategory,
                                 augmentPath,
                                 augmentations,
                                 augmentationsNumber,
                                 extension=Extensions.jpg,
                                 params=None):
    print('category: {:>50} \t process_id: {:>10} \t process_name: {}'.format(
        fullCategory, os.getpid(), mp.current_process()))
    time.sleep(0.5)

    augmentations = customAugmentations if augmentations is None else augmentations  # хардкод для запуска мультипроцессинга
    # print(f"Category {fullCategory} is being augmented")
    if augmentationsNumber == 0:
        print(
            f"{Fore.RED}No augmentations for {categoryPath}{Style.RESET_ALL}")
        return

    marksName = makeJSONname(const.marks)
    marksPath = os.path.join(categoryPath, marksName)
    framesPath = os.path.join(categoryPath, const.frames)

    augmentedCategoryPath = os.path.join(augmentPath,
                                         *splitFullCategory(fullCategory))

    try:
        marks = json.load(open(marksPath, "r"))
    except:
        print(
            f"{Fore.RED}There is no marks {marksPath} for frames in {categoryPath} {Style.RESET_ALL}"
        )
        return

    augGenerator = augmentationGenerator(framesPath, marks, augmentations,
                                         augmentationsNumber)

    augmentedFramesPath = os.path.join(augmentedCategoryPath, const.frames)
    os.makedirs(augmentedFramesPath, exist_ok=True)

    augmentedMarks = {}
    for i, aug in enumerate(augGenerator):
        print("\r{} {:.1f} is ready".format(fullCategory,
                                            i / augmentationsNumber * 100),
              end="")

        augFrame, augFrameData = aug

        augmentedName = augFrameData.pop(const.image)
        augmentedFileName = extendName(augmentedName, extension)
        augFrameData[const.image] = augmentedFileName
        cv2.imwrite(os.path.join(augmentedFramesPath, augmentedFileName),
                    augFrame, params)

        augmentedMarks[augmentedName] = augFrameData

    print()
    json.dump(augmentedMarks,
              open(os.path.join(augmentedCategoryPath, marksName), "w"),
              indent=3)
    print(
        f"\n{Fore.GREEN}Category {fullCategory} has been successfully augmented. "
        f"Results in {augmentedCategoryPath} {Style.RESET_ALL}")
Ejemplo n.º 12
0
def frameVideo(filePath,
               marksPath,
               datasetPath,
               actualInfo,
               overwrite=False,
               extension=Extensions.jpg,
               params=None,
               ctgLimit=None):

    categories = readLines(Path.categories)
    basename = extractBasename(filePath)

    try:
        jsonName = makeJSONname(basename)
        marks = json.load(open(os.path.join(marksPath, jsonName), "r"))
    except:
        print(
            f"{Fore.RED}There is no json file {marksPath} for {filePath} {Style.RESET_ALL}"
        )
        return

    framesGenerator = generateFrames(filePath)
    offset = getKeysOffset(marks.keys())
    marksSeparated = {}
    total = 0
    for idx, frame in enumerate(framesGenerator):
        # if idx == 20:
        #     break

        frameMarks = getFrameMarks(idx, marks, offset)
        if not frameMarks:
            continue

        category = frameMarks[const.category]
        subcategory = frameMarks[const.subcategory]

        countKeys = [const.original, category, subcategory]
        if idx == 0:
            globalIdx = getNested(dictionary=actualInfo,
                                  keys=countKeys,
                                  default=0)

        localIdx = idx + globalIdx
        if ctgLimit is not None and localIdx == ctgLimit:
            break

        frameID = f"frame_{localIdx}"
        fullCategory = getFullCategory(category, subcategory)

        if fullCategory not in categories:
            categories.append(fullCategory)

        ctgIdx = categories.index(fullCategory)
        frameName = f"{fullCategory}{const.separator}{frameID}{const.separator}{const.original}"

        dirPath = os.path.join(datasetPath, const.original, category,
                               subcategory)
        framesPath = os.path.join(dirPath, const.frames)
        framePath = os.path.join(framesPath, extendName(frameName, extension))

        updateNested(dictionary=actualInfo, keys=countKeys, value=1)
        if not overwrite and os.path.exists(framePath):
            print("\rFrame #{} has been passed".format(idx), end="")
            continue

        os.makedirs(framesPath, exist_ok=True)

        frameInfo = {
            const.image: extendName(frameName, extension),
            const.coords: fitCoords(frameMarks[const.coords], frame.shape[:2]),
            const.fullCategory: fullCategory,
            const.ctgIdx: ctgIdx,
            const.imageShape: frame.shape[:2]
        }

        keySet = countKeys + [
            frameName
        ]  # ["original", category, subcategory, frameName]
        putNested(dictionary=marksSeparated, keys=keySet, value=frameInfo)

        cv2.imwrite(framePath, frame, params)
        total += 1

        print("\rFrame #{} has been added".format(idx), end="")

    marksSeparated = marksSeparated[const.original]
    print()
    for ctg, value in marksSeparated.items():
        for subctg, subctgMarks in value.items():
            subctgMarksJson = os.path.join(
                datasetPath, const.original, ctg, subctg,
                extendName(const.marks, Extensions.json))

            oldMarks = openJsonSafely(subctgMarksJson)
            for k, v in subctgMarks.items():
                oldMarks[k] = v

            json.dump(oldMarks, open(subctgMarksJson, "w"), indent=3)

            print(
                f"{Fore.GREEN}Added marks to {subctgMarksJson} {Style.RESET_ALL}"
            )

    writeLines(categories, Path.categories)
    print(
        f"{Fore.GREEN}Updated categories file {Path.categories} {Style.RESET_ALL}"
    )
    print(f"{Fore.GREEN}Added {total} frames in total {Style.RESET_ALL}")
Ejemplo n.º 13
0
def extract(ctg,
            ctgInfo,
            videosPath=Path.rawVideos,
            extractionPath=Path.original,
            extension=Extensions.jpg,
            limit=None,
            augmentFunc=None,
            augmentations=None,
            augmentationName=const.augmented,
            augmentationPath=None,
            overwriteOriginal=False,
            overwriteAugmented=True):

    try:
        parent = ctgInfo.get(const.parent, "")
        fullExtractionPath = os.path.join(extractionPath, parent, ctg)
        os.makedirs(os.path.join(fullExtractionPath, const.frames),
                    exist_ok=True)

        videos = ctgInfo[const.videos]

        overall = ctgInfo[const.overall]

        limit = limit if limit is not None else overall

        if augmentFunc is not None:
            augmentFunc = proxifyAugmentFunc(augmentFunc)

            augmentations = int(
                augmentations) if augmentations is not None else min(
                    limit, overall)
            augmentations = max(augmentations, augmentations + limit - overall)

            augRepeats = ceil(augmentations / min(limit, overall))

            augmentationPath = augmentationPath if augmentationPath is not None \
                else extractionPath.replace(const.original, augmentationName)

            fullAugmentationPath = os.path.join(augmentationPath, parent, ctg)
            os.makedirs(os.path.join(fullAugmentationPath, const.frames),
                        exist_ok=True)

            existingAugs = len(
                os.listdir(os.path.join(fullAugmentationPath, const.frames)))

            augMarks = {}
            totalAugs = 0

        fullCategory = getFullCategory(parent, ctg)

        print(
            "Cutting videos: {:>50} \t expected orig frames {:>10} \t expected aug frames \t {:>10} process id: {:>10}"
            .format(fullCategory, min(limit, overall), augmentations,
                    os.getpid()))
        sleep(0.5)

        # time.sleep(0.5)

        generator = createGenerator(videosPath, videos, overall, limit)

        marks = {}
        total = 0
        for idx, genInfo in enumerate(generator):
            frame, frameName, coords = genInfo

            fullFrameName = const.separator.join(
                (fullCategory, frameName, const.original))
            framePath = os.path.join(fullExtractionPath, const.frames,
                                     extendName(fullFrameName, extension))

            coords = fitCoords(coords, frame.shape[:2])

            status = "passed"
            if not os.path.exists(framePath) or overwriteOriginal:
                status = "added"
                frameMarks = {
                    const.fullCategory: fullCategory,
                    const.ctgIdx: ctgInfo[const.ctgIdx],
                    const.image: extendName(fullFrameName, extension),
                    const.coords: coords,
                    const.imageShape: frame.shape[:2]
                }

                cv2.imwrite(framePath, frame)
                marks[frameName] = frameMarks

                total += 1

            if augmentFunc is not None:
                frameAugments = 0
                for i in range(augRepeats):
                    augFrameName = f"{fullCategory}{const.separator}{frameName}_{i}{const.separator}{augmentationName}"
                    augFramePath = os.path.join(
                        fullAugmentationPath, const.frames,
                        extendName(augFrameName, extension))

                    if totalAugs >= augmentations or (
                            existingAugs >= augmentations
                            and not overwriteAugmented):
                        break

                    if os.path.exists(augFramePath) and not overwriteAugmented:
                        continue

                    augFrame, augCoords = augmentFunc(frame, coords)

                    augFrameMarks = {
                        const.fullCategory: fullCategory,
                        const.image: extendName(augFrameName, extension),
                        const.ctgIdx: ctgInfo[const.ctgIdx],
                        const.coords: fitCoords(augCoords, augFrame.shape[:2]),
                        const.imageShape: augFrame.shape[:2]
                    }

                    cv2.imwrite(augFramePath, augFrame)

                    augMarks[augFrameName] = augFrameMarks

                    frameAugments += 1
                totalAugs += frameAugments

            print("\rFrame #{} has been {} with {} augmentations".format(
                idx + 1, status, frameAugments),
                  end="")

        marksPath = os.path.join(fullExtractionPath, makeJSONname(const.marks))
        oldMarks = openJsonSafely(marksPath)
        json.dump(updateMarks(oldMarks, marks, overwriteOriginal),
                  open(marksPath, "w"),
                  indent=3,
                  sort_keys=True)
        print(
            f"\n{Fore.GREEN}Added marks to {fullExtractionPath} {Style.RESET_ALL}"
        )

        if augmentFunc is not None:
            augMarksPath = os.path.join(fullAugmentationPath,
                                        makeJSONname(const.marks))
            oldAugMarks = openJsonSafely(augMarksPath)
            json.dump(updateMarks(oldAugMarks, augMarks, overwriteAugmented),
                      open(augMarksPath, "w"),
                      indent=3,
                      sort_keys=True)
            print(
                f"{Fore.GREEN}Added marks to {fullAugmentationPath} {Style.RESET_ALL}"
            )

        print(
            f"{Fore.GREEN}Added {total} pure frames and {totalAugs} augmented frames in total {Style.RESET_ALL}"
        )

    except Exception as e:
        print(e)