def matchVideosToMarks(marks, videos): marks = os.listdir(marks) if not isinstance(marks, list) else marks videos = os.listdir(videos) if not isinstance(videos, list) else videos transformer = lambda x: makeJSONname(extractBasename(x)) return matchLists(master=marks, slave=videos, transformer=transformer, showMessages=True)
def updateCategoriesIndices(datasetPath, categories): from utils import walk, makeJSONname from verifier import getFullCategory marks = walk(datasetPath, targetFiles=makeJSONname(const.marks)).get("files") for mrk in marks: try: marksPath = os.path.join(datasetPath, *mrk) category, subcategory = mrk[-3:-1] fullCategory = getFullCategory(category, subcategory) if fullCategory not in categories: continue marks = openJsonSafely(marksPath) for f, value in marks.items(): fullCategory = value[const.fullCategory] value[const.ctgIdx] = categories.index(fullCategory) json.dump(marks, open(marksPath, "w"), indent=3) print(f"{Fore.BLUE}JSON file {marksPath} has been fixed{Style.RESET_ALL}") except Exception as e: print(e)
def extractCrops(categoryDir, extractionPath=None, extension=Extensions.png, params=None, globalIdx=0): marksPath = os.path.join(categoryDir, makeJSONname(const.marks)) framesDir = os.path.join(categoryDir, const.frames) cutDir = os.path.join( categoryDir, const.cut) if extractionPath is None else extractionPath os.makedirs(cutDir, exist_ok=True) try: marks = json.load(open(marksPath, "r")) except FileNotFoundError: return print( f"{Fore.GREEN}Processing crop operation for {marksPath} {Style.RESET_ALL}" ) time.sleep(0.5) for frameIdx, frameName in enumerate(marks): frameMarks = marks[frameName] framePath = os.path.join(framesDir, frameMarks[const.image]) if not os.path.exists(framePath): globalIdx += 1 continue y1, x1, y2, x2 = frameMarks[const.coords] fullCategory = frameMarks[const.fullCategory] h, w = frameMarks[const.imageShape] cutName = f"{globalIdx}_{extendName(fullCategory, extension)}" globalIdx += 1 if not checkBoundingBoxIsCorrect(x2 - x1, y2 - y1): continue y1, x1, y2, x2 = fitCoords((y1 - 10, x1 - 10, y2 + 10, x2 + 10), (h, w)) if os.path.exists(os.path.join(cutDir, cutName)): print("\r{:.1f}% of work has been done for {} category".format( (frameIdx + 1) / len(marks) * 100, fullCategory), end="") continue frame = cv2.imread(framePath) cut = frame[y1:y2, x1:x2, ...] cv2.imwrite(os.path.join(cutDir, cutName), cut, params) print("\r{:.1f}% of work has been done for {} category".format( (frameIdx + 1) / len(marks) * 100, fullCategory), end="") return globalIdx
def xml2json(xmlPath, wpath=None, overwrite=False): jsonData = {} category, basename = extractCategory(xmlPath) jsonName = makeJSONname(basename) if wpath is not None: os.makedirs(wpath, exist_ok=True) if not overwrite and os.path.exists(os.path.join(wpath, jsonName)): print( f"{Fore.RED}JSON {jsonName} already exists in {wpath} {Style.RESET_ALL}" ) return elif overwrite and os.path.exists(os.path.join(wpath, jsonName)): print( f"{Fore.RED}JSON {jsonName} will be overwritten in {wpath} {Style.RESET_ALL}" ) else: print( f"{Fore.GREEN}JSON {jsonName} will be written to {wpath} {Style.RESET_ALL}" ) try: file = open(os.path.join(xmlPath), "r") data = file.read() o = xmltodict.parse(data) imgList = o["dataset"]["images"]["image"] except: print(f"{Fore.RED} Couldn't parse {xmlPath}") return {}, None for image in imgList: if not "@frame" in image: # print(f"{Fore.RED} {filename} : The attribute '@frame' was not found") continue imgIdx = image['@frame'] x1 = int(image['box']['@left']) y1 = int(image['box']['@top']) x2 = x1 + int(image['box']['@width']) y2 = y1 + int(image['box']['@height']) subCategory = image.get("@category", const.merged) jsonData[f"frame_{imgIdx}"] = { const.category: category, const.subcategory: subCategory, const.coords: [y1, x1, y2, x2] } if wpath is not None: json.dump(jsonData, open(os.path.join(wpath, jsonName), "w"), indent=3) return jsonData
def summarizeInfo(rawPath=Path.raw, summarizedPath=Path.summarizedRaw, allowedCategories=None, allowedSubCtgList=None, overwrite=True): summarized = openJsonSafely(summarizedPath) if not overwrite else {} rawVideosPath = os.path.join(rawPath, const.videos) rawJsonsPath = os.path.join(rawPath, const.json) rawVideos = sorted([j for j in os.listdir(rawVideosPath) if j.endswith(Extensions.videos())]) maxIdx = summarized.get(const.maxIdx, 0) for i, video in enumerate(rawVideos): print(f"\rProcessing {video} ({i + 1} out of {len(rawVideos)})", end="") category, name = extractCategory(video) if category not in allowedCategories: continue categoryInfo = summarized.get(category, {}) videoJson = os.path.join(rawJsonsPath, makeJSONname(name)) videoMarks = getVideoMarks(os.path.join(rawVideosPath, video), videoJson) for subctg, subctgMarks in videoMarks.items(): if allowedSubCtgList is not None and subctg not in allowedSubCtgList: continue if subctg not in categoryInfo: subctgIdx = maxIdx maxIdx += 1 curSubctgMarks = { const.overall: 0, const.ctgIdx: subctgIdx, const.videos: {}, const.parent: category } else: curSubctgMarks = categoryInfo[subctg] if video not in curSubctgMarks[const.videos]: curSubctgMarks[const.videos][video] = subctgMarks curSubctgMarks[const.overall] += len(subctgMarks) categoryInfo[subctg] = curSubctgMarks if categoryInfo: summarized[category] = categoryInfo summarized[const.maxIdx] = maxIdx json.dump(summarized, open(summarizedPath, "w"), indent=3) print(f"\n{Fore.GREEN}Summarized info file {summarizedPath} has been updated{Style.RESET_ALL}")
def makeNegativesMarks(rpath): negatives = os.listdir(os.path.join(rpath, const.frames)) marks = {} for nimage in negatives: marks[nimage] = { const.coords: [0, 0, 0, 0], const.ctgIdx: 0, const.imageShape: [1, 1], const.image: nimage } json.dump(marks, open(os.path.join(rpath, makeJSONname(const.marks)), "w"), indent=3)
def extractMarks(categoryDir): marksPath = os.path.join(categoryDir, makeJSONname(const.marks)) framesDir = os.path.join(categoryDir, const.frames) try: marks = json.load(open(marksPath, "r")) except FileNotFoundError: return print( f"\n{Fore.GREEN}Processing extraction marks for {marksPath} {Style.RESET_ALL}" ) for frameIdx, frameName in enumerate(marks): frameMarks = marks[frameName] framePath = os.path.join(framesDir, frameMarks[const.image]) if not os.path.exists(framePath): continue y1, x1, y2, x2 = frameMarks[const.coords] ctgIdx = frameMarks[const.ctgIdx] h, w = frameMarks[const.imageShape] xc = (x2 + x1) / (2 * w) yc = (y2 + y1) / (2 * h) bw = (x2 - x1) / w bh = (y2 - y1) / h darknetString = f"{ctgIdx} {xc} {yc} {bw} {bh}\n" if not checkBoundingBoxIsCorrect(bw, bh): darknetString = "" txtName = os.path.splitext(frameMarks['image'])[0] with open(os.path.join(framesDir, extendName(txtName, Extensions.txt)), "w") as f: f.write(darknetString) print("\r{:.1f}% of work has been done".format( (frameIdx + 1) / len(marks) * 100), end="")
def extractCropsThroughDataset(datasetPath, extractionPath=None, categories=None, subcategories=None, extension=Extensions.png, params=None, parallel=True, threads=16): frames = walk(datasetPath, targetDirs=const.frames).get("dirs") frames = filterFolders(frames, categories, subcategories) if parallel: threads = min(threads, mp.cpu_count()) else: threads = 1 globalIdx = 0 threadsList = [] with mp.Pool(threads) as pool: for dirsSet in frames: dirsSet = dirsSet[:-1] categoryDir = os.path.join(datasetPath, *dirsSet) length = len( openJsonSafely( os.path.join(categoryDir, makeJSONname(const.marks)))) threadsList.append( pool.apply_async(extractCrops, args=(categoryDir, ), kwds={ "extractionPath": extractionPath, "extension": extension, "params": params, "globalIdx": globalIdx })) globalIdx += length for r in threadsList: r.get()
def actualizeInfoWithJsons(datasetPath): print("\nActualizing info...") actualInfo = {} os.makedirs(os.path.dirname(Path.actualInfo), exist_ok=True) frames = walk(datasetPath, targetDirs=const.frames) frames = frames.get("dirs") for idx, dirsList in enumerate(frames): dirsList = dirsList[:-1] fullpath = os.path.join(datasetPath, *dirsList, makeJSONname(const.marks)) marks = json.load(open(fullpath, "r")) putNested(dictionary=actualInfo, keys=dirsList, value=len(marks)) dirsList[-1] = const.overall updateNested(dictionary=actualInfo, keys=dirsList, value=len(marks)) print("\r{:.1f}% of work has been done".format((idx + 1) / len(frames) * 100), end="") print() json.dump(actualInfo, open(Path.actualInfo, "w"), indent=3)
def augmentCategoryWithRepeats(categoryPath, fullCategory, augmentPath, augmentations, extension=Extensions.jpg, repeats=1, params=None): print(f"Category {fullCategory} is being augmented") if repeats == 0: print( f"{Fore.RED}Too many original images for {categoryPath}, aborting augmentation {Style.RESET_ALL}" ) return marksName = makeJSONname(const.marks) marksPath = os.path.join(categoryPath, marksName) framesPath = os.path.join(categoryPath, const.frames) augmentedCategoryPath = os.path.join(augmentPath, *splitFullCategory(fullCategory)) try: marks = json.load(open(marksPath, "r")) except: print( f"{Fore.RED}There is no marks {marksPath} for frames in {categoryPath} {Style.RESET_ALL}" ) return idx = 0 augmentedMarks = {} for i, name in enumerate(marks): print("\r{:.1f}% of work has been done".format( (i + 1) / len(marks) * 100), end="") frameData = marks[name] frameName = frameData[const.image] box = frameData[const.coords] ctgIdx = frameData[const.ctgIdx] shape = frameData[const.imageShape] frameID = name.split(const.separator)[1] image = cv2.imread(os.path.join(framesPath, frameName)) augmented = augmentImageRepeated(image=image, augmentations=augmentations, repeats=repeats, boxes=[box]) augmentedFramesPath = os.path.join(augmentedCategoryPath, const.frames) os.makedirs(augmentedFramesPath, exist_ok=True) for augImage, augBox in augmented: augmentedName = f"{fullCategory}{const.separator}{frameID}_{idx}{const.separator}{const.augmented}" augmentedFileName = extendName(augmentedName, extension) augmentedMarks[augmentedName] = { const.image: augmentedFileName, const.coords: augBox, const.fullCategory: fullCategory, const.ctgIdx: ctgIdx, const.imageShape: shape } cv2.imwrite(os.path.join(augmentedFramesPath, augmentedFileName), augImage, params) idx += 1 print() json.dump(augmentedMarks, open(os.path.join(augmentedCategoryPath, marksName), "w"), indent=3) print( f"{Fore.GREEN}Category {fullCategory} has been successfully augmented. " f"Results in {augmentedCategoryPath} {Style.RESET_ALL}")
def augmentCategoryWithGenerator(categoryPath, fullCategory, augmentPath, augmentations, augmentationsNumber, extension=Extensions.jpg, params=None): print('category: {:>50} \t process_id: {:>10} \t process_name: {}'.format( fullCategory, os.getpid(), mp.current_process())) time.sleep(0.5) augmentations = customAugmentations if augmentations is None else augmentations # хардкод для запуска мультипроцессинга # print(f"Category {fullCategory} is being augmented") if augmentationsNumber == 0: print( f"{Fore.RED}No augmentations for {categoryPath}{Style.RESET_ALL}") return marksName = makeJSONname(const.marks) marksPath = os.path.join(categoryPath, marksName) framesPath = os.path.join(categoryPath, const.frames) augmentedCategoryPath = os.path.join(augmentPath, *splitFullCategory(fullCategory)) try: marks = json.load(open(marksPath, "r")) except: print( f"{Fore.RED}There is no marks {marksPath} for frames in {categoryPath} {Style.RESET_ALL}" ) return augGenerator = augmentationGenerator(framesPath, marks, augmentations, augmentationsNumber) augmentedFramesPath = os.path.join(augmentedCategoryPath, const.frames) os.makedirs(augmentedFramesPath, exist_ok=True) augmentedMarks = {} for i, aug in enumerate(augGenerator): print("\r{} {:.1f} is ready".format(fullCategory, i / augmentationsNumber * 100), end="") augFrame, augFrameData = aug augmentedName = augFrameData.pop(const.image) augmentedFileName = extendName(augmentedName, extension) augFrameData[const.image] = augmentedFileName cv2.imwrite(os.path.join(augmentedFramesPath, augmentedFileName), augFrame, params) augmentedMarks[augmentedName] = augFrameData print() json.dump(augmentedMarks, open(os.path.join(augmentedCategoryPath, marksName), "w"), indent=3) print( f"\n{Fore.GREEN}Category {fullCategory} has been successfully augmented. " f"Results in {augmentedCategoryPath} {Style.RESET_ALL}")
def frameVideo(filePath, marksPath, datasetPath, actualInfo, overwrite=False, extension=Extensions.jpg, params=None, ctgLimit=None): categories = readLines(Path.categories) basename = extractBasename(filePath) try: jsonName = makeJSONname(basename) marks = json.load(open(os.path.join(marksPath, jsonName), "r")) except: print( f"{Fore.RED}There is no json file {marksPath} for {filePath} {Style.RESET_ALL}" ) return framesGenerator = generateFrames(filePath) offset = getKeysOffset(marks.keys()) marksSeparated = {} total = 0 for idx, frame in enumerate(framesGenerator): # if idx == 20: # break frameMarks = getFrameMarks(idx, marks, offset) if not frameMarks: continue category = frameMarks[const.category] subcategory = frameMarks[const.subcategory] countKeys = [const.original, category, subcategory] if idx == 0: globalIdx = getNested(dictionary=actualInfo, keys=countKeys, default=0) localIdx = idx + globalIdx if ctgLimit is not None and localIdx == ctgLimit: break frameID = f"frame_{localIdx}" fullCategory = getFullCategory(category, subcategory) if fullCategory not in categories: categories.append(fullCategory) ctgIdx = categories.index(fullCategory) frameName = f"{fullCategory}{const.separator}{frameID}{const.separator}{const.original}" dirPath = os.path.join(datasetPath, const.original, category, subcategory) framesPath = os.path.join(dirPath, const.frames) framePath = os.path.join(framesPath, extendName(frameName, extension)) updateNested(dictionary=actualInfo, keys=countKeys, value=1) if not overwrite and os.path.exists(framePath): print("\rFrame #{} has been passed".format(idx), end="") continue os.makedirs(framesPath, exist_ok=True) frameInfo = { const.image: extendName(frameName, extension), const.coords: fitCoords(frameMarks[const.coords], frame.shape[:2]), const.fullCategory: fullCategory, const.ctgIdx: ctgIdx, const.imageShape: frame.shape[:2] } keySet = countKeys + [ frameName ] # ["original", category, subcategory, frameName] putNested(dictionary=marksSeparated, keys=keySet, value=frameInfo) cv2.imwrite(framePath, frame, params) total += 1 print("\rFrame #{} has been added".format(idx), end="") marksSeparated = marksSeparated[const.original] print() for ctg, value in marksSeparated.items(): for subctg, subctgMarks in value.items(): subctgMarksJson = os.path.join( datasetPath, const.original, ctg, subctg, extendName(const.marks, Extensions.json)) oldMarks = openJsonSafely(subctgMarksJson) for k, v in subctgMarks.items(): oldMarks[k] = v json.dump(oldMarks, open(subctgMarksJson, "w"), indent=3) print( f"{Fore.GREEN}Added marks to {subctgMarksJson} {Style.RESET_ALL}" ) writeLines(categories, Path.categories) print( f"{Fore.GREEN}Updated categories file {Path.categories} {Style.RESET_ALL}" ) print(f"{Fore.GREEN}Added {total} frames in total {Style.RESET_ALL}")
def extract(ctg, ctgInfo, videosPath=Path.rawVideos, extractionPath=Path.original, extension=Extensions.jpg, limit=None, augmentFunc=None, augmentations=None, augmentationName=const.augmented, augmentationPath=None, overwriteOriginal=False, overwriteAugmented=True): try: parent = ctgInfo.get(const.parent, "") fullExtractionPath = os.path.join(extractionPath, parent, ctg) os.makedirs(os.path.join(fullExtractionPath, const.frames), exist_ok=True) videos = ctgInfo[const.videos] overall = ctgInfo[const.overall] limit = limit if limit is not None else overall if augmentFunc is not None: augmentFunc = proxifyAugmentFunc(augmentFunc) augmentations = int( augmentations) if augmentations is not None else min( limit, overall) augmentations = max(augmentations, augmentations + limit - overall) augRepeats = ceil(augmentations / min(limit, overall)) augmentationPath = augmentationPath if augmentationPath is not None \ else extractionPath.replace(const.original, augmentationName) fullAugmentationPath = os.path.join(augmentationPath, parent, ctg) os.makedirs(os.path.join(fullAugmentationPath, const.frames), exist_ok=True) existingAugs = len( os.listdir(os.path.join(fullAugmentationPath, const.frames))) augMarks = {} totalAugs = 0 fullCategory = getFullCategory(parent, ctg) print( "Cutting videos: {:>50} \t expected orig frames {:>10} \t expected aug frames \t {:>10} process id: {:>10}" .format(fullCategory, min(limit, overall), augmentations, os.getpid())) sleep(0.5) # time.sleep(0.5) generator = createGenerator(videosPath, videos, overall, limit) marks = {} total = 0 for idx, genInfo in enumerate(generator): frame, frameName, coords = genInfo fullFrameName = const.separator.join( (fullCategory, frameName, const.original)) framePath = os.path.join(fullExtractionPath, const.frames, extendName(fullFrameName, extension)) coords = fitCoords(coords, frame.shape[:2]) status = "passed" if not os.path.exists(framePath) or overwriteOriginal: status = "added" frameMarks = { const.fullCategory: fullCategory, const.ctgIdx: ctgInfo[const.ctgIdx], const.image: extendName(fullFrameName, extension), const.coords: coords, const.imageShape: frame.shape[:2] } cv2.imwrite(framePath, frame) marks[frameName] = frameMarks total += 1 if augmentFunc is not None: frameAugments = 0 for i in range(augRepeats): augFrameName = f"{fullCategory}{const.separator}{frameName}_{i}{const.separator}{augmentationName}" augFramePath = os.path.join( fullAugmentationPath, const.frames, extendName(augFrameName, extension)) if totalAugs >= augmentations or ( existingAugs >= augmentations and not overwriteAugmented): break if os.path.exists(augFramePath) and not overwriteAugmented: continue augFrame, augCoords = augmentFunc(frame, coords) augFrameMarks = { const.fullCategory: fullCategory, const.image: extendName(augFrameName, extension), const.ctgIdx: ctgInfo[const.ctgIdx], const.coords: fitCoords(augCoords, augFrame.shape[:2]), const.imageShape: augFrame.shape[:2] } cv2.imwrite(augFramePath, augFrame) augMarks[augFrameName] = augFrameMarks frameAugments += 1 totalAugs += frameAugments print("\rFrame #{} has been {} with {} augmentations".format( idx + 1, status, frameAugments), end="") marksPath = os.path.join(fullExtractionPath, makeJSONname(const.marks)) oldMarks = openJsonSafely(marksPath) json.dump(updateMarks(oldMarks, marks, overwriteOriginal), open(marksPath, "w"), indent=3, sort_keys=True) print( f"\n{Fore.GREEN}Added marks to {fullExtractionPath} {Style.RESET_ALL}" ) if augmentFunc is not None: augMarksPath = os.path.join(fullAugmentationPath, makeJSONname(const.marks)) oldAugMarks = openJsonSafely(augMarksPath) json.dump(updateMarks(oldAugMarks, augMarks, overwriteAugmented), open(augMarksPath, "w"), indent=3, sort_keys=True) print( f"{Fore.GREEN}Added marks to {fullAugmentationPath} {Style.RESET_ALL}" ) print( f"{Fore.GREEN}Added {total} pure frames and {totalAugs} augmented frames in total {Style.RESET_ALL}" ) except Exception as e: print(e)