def findSpark(logger=None, verbose=True): from systemtools.logger import log from systemtools.location import sortedGlob, homeDir import findspark sparkPath = sortedGlob(homeDir() + "/lib/spark-*2*")[-1] log("Spark path: " + str(sparkPath), logger) findspark.init(sparkPath)
def testFileToMultiParts(): directory = getExecDir(__file__) + "/testdata" filePath = sortedGlob(directory + "/*")[0] workingDir = tmpDir("vectors-test") result = extract(filePath, destinationDir=workingDir) outputDir = fileToMultiParts(result, checkLineCount=True, compress=True) print(outputDir)
def getSize(path, unit='b', humanReadable=False, decimal=2): def __convertSize(size, unit): unit = unit.lower() if unit in ['k', 'ko', 'kilo']: size = size / 1024 elif unit in ['m', 'mo', 'mega']: size = size / 1024 / 1024 elif unit in ['g', 'go', 'giga']: size = size / 1024 / 1024 / 1024 else: # unit in ['b', 'bytes'] pass return size size = None if isFile(path): size = os.path.getsize(path) size = __convertSize(size, unit) elif isDir(path): totalSize = 0 for current in sortedGlob(path + "/*"): totalSize += getSize(current, unit='b') size = __convertSize(totalSize, unit) if unit in ['a', 'auto', None]: tempSize = size for u in ['k', 'm', 'g']: tempSize = tempSize / 1024 if tempSize < 1024 and tempSize > 0: size = tempSize unit = u break if humanReadable: return str(truncateFloat(size, decimal)) + unit else: return size
def extract(filePath, destinationDir=None, upIfUnique=True, doDoubleExtract=True): if not isFile(filePath): print(filePath + " does not exist") return None # We get the dir of the file to extract: (dirPath, _, _, filenameExt) = decomposePath(filePath) # We extract it: extractedDirPath = xtract.xtract(filePath) # Here we check if the file end with ".tar": if doDoubleExtract and extractedDirPath[-4:] == ".tar": # So we re-extract it: previousPath = extractedDirPath extractedDirPath = xtract.xtract(extractedDirPath) # We remove the previous element: if isDir(previousPath): remove(previousPath, minSlashCount=4) elif isFile(previousPath): remove(previousPath, minSlashCount=4) # If there is only one folder or file under extractedDirPath, we up it: if upIfUnique and len(sortedGlob(extractedDirPath + "/*")) == 1: # We get the element path: elementPath = sortedGlob(extractedDirPath + "/*")[0] # We make the dst path: dst = dirPath + "/" + elementPath.split("/")[-1] # First we check if the element exists inthe parent dir: if isFile(dst) or isDir(dst): dst += time.strftime("-%Y.%m.%d-%H.%M.%S") # then we move it: shutil.move(elementPath, dst) # And finally we remove the dir: remove(extractedDirPath, minSlashCount=4) # We update extractedDirPath: extractedDirPath = dst # We move the element: if destinationDir is not None: # We move it: newDestFilePath = destinationDir + "/" + decomposePath(extractedDirPath)[3] shutil.move(extractedDirPath, newDestFilePath) # We update extractedDirPath: extractedDirPath = newDestFilePath # Finally we return the new path: return extractedDirPath
def normalizeNumericalFilePaths(globRegex): """ This function get a glob path and rename all file1.json file2.json ... file20.json to file01.json file02.json ... file20.json to better sort the folder by file names """ # We get all paths: allPaths = sortedGlob(globRegex) allNumbers = [] # We get all ints: for path in allPaths: # Get the filename without extension: (dir, filename, ext, filenameExt) = decomposePath(path) # Get all numbers: currentNumbers = getAllNumbers(filename) # Check if we have a int first: if currentNumbers is None or len(currentNumbers) == 0: print("A filename has no number.") return False firstNumber = currentNumbers[0] if not isinstance(firstNumber, int): print("A filename has no float as first number.") return False # Add it in the list: allNumbers.append(firstNumber) # Get the max int: maxInt = max(allNumbers) # Calculate the nmber of digit: digitCountHasToBe = len(str(maxInt)) # Replace all : i = 0 for i in range(len(allNumbers)): currentPath = allPaths[i] (dir, filename, ext, filenameExt) = decomposePath(currentPath) currentInt = allNumbers[i] currentRegex = "0*" + str(currentInt) zerosCountToAdd = digitCountHasToBe - len(str(currentInt)) zerosStr = "0" * zerosCountToAdd newFilename = re.sub(currentRegex, zerosStr + str(currentInt), filename, count=1) newFilename = dir + newFilename + "." + ext if currentPath != newFilename: os.rename(currentPath, newFilename) print(newFilename + " done.") i += 1 return True
def cleanDir\ ( path, startsWith=None, endsWith=None, olderHour=4, onlyOwner=True, verbose=False, logger=None, dryRun=False, removeKwargs={}, pathContains="/tmp" # For security purpose ): me = getpass.getuser() elementsToDelete = [] for element in sortedGlob(path + "/*"): if onlyOwner and owner(element) != me: continue if olderHour is not None and getLastModifiedTimeSpent(element, timeSpentUnit=TIMESPENT_UNIT.HOURS, logger=logger, verbose=False) < olderHour: continue if startsWith is not None and not decomposePath(element)[3].startswith(startsWith): continue if endsWith is not None and not decomposePath(element)[3].endswith(endsWith): continue elementsToDelete.append(element) for element in elementsToDelete: if pathContains in element: try: if not dryRun: if "secure" not in removeKwargs: removeKwargs["secure"] = False remove(element, **removeKwargs) if verbose: msg = "We removed " + element if logger is not None: try: logger.log(msg) except: pass else: print(msg) except Exception as e: print(e)
def purgeOldFiles(pattern, maxTimeSpent, timeSpentUnit=TIMESPENT_UNIT.SECONDS): allPlugins = sortedGlob(pattern) for current in allPlugins: timeSpent = getLastModifiedTimeSpent(current, timeSpentUnit) if timeSpent > maxTimeSpent: removeFile(current)
def globRemove(globPattern): filesPaths = sortedGlob(globPattern) removeFiles(filesPaths)