Example #1
0
def findSpark(logger=None, verbose=True):
    from systemtools.logger import log
    from systemtools.location import sortedGlob, homeDir
    import findspark
    sparkPath = sortedGlob(homeDir() + "/lib/spark-*2*")[-1]
    log("Spark path: " + str(sparkPath), logger)
    findspark.init(sparkPath)
Example #2
0
def testFileToMultiParts():
    directory = getExecDir(__file__) + "/testdata"
    filePath = sortedGlob(directory + "/*")[0]
    workingDir = tmpDir("vectors-test")
    result = extract(filePath, destinationDir=workingDir)
    outputDir = fileToMultiParts(result, checkLineCount=True, compress=True)
    print(outputDir)
Example #3
0
def getSize(path, unit='b', humanReadable=False, decimal=2):
    def __convertSize(size, unit):
        unit = unit.lower()
        if unit in ['k', 'ko', 'kilo']:
            size = size / 1024
        elif unit in ['m', 'mo', 'mega']:
            size = size / 1024 / 1024
        elif unit in ['g', 'go', 'giga']:
            size = size / 1024 / 1024 / 1024
        else: # unit in ['b', 'bytes']
            pass
        return size
    size = None
    if isFile(path):
        size = os.path.getsize(path)
        size = __convertSize(size, unit)
    elif isDir(path):
        totalSize = 0
        for current in sortedGlob(path + "/*"):
            totalSize += getSize(current, unit='b')
        size = __convertSize(totalSize, unit)
    if unit in ['a', 'auto', None]:
        tempSize = size
        for u in ['k', 'm', 'g']:
            tempSize = tempSize / 1024
            if tempSize < 1024 and tempSize > 0:
                size = tempSize
                unit = u
                break
    if humanReadable:
        return str(truncateFloat(size, decimal)) + unit
    else:
        return size
Example #4
0
def extract(filePath, destinationDir=None, upIfUnique=True, doDoubleExtract=True):
    if not isFile(filePath):
        print(filePath + " does not exist")
        return None
    # We get the dir of the file to extract:
    (dirPath, _, _, filenameExt) = decomposePath(filePath)
    # We extract it:
    extractedDirPath = xtract.xtract(filePath)
    # Here we check if the file end with ".tar":
    if doDoubleExtract and extractedDirPath[-4:] == ".tar":
        # So we re-extract it:
        previousPath = extractedDirPath
        extractedDirPath = xtract.xtract(extractedDirPath)
        # We remove the previous element:
        if isDir(previousPath):
            remove(previousPath, minSlashCount=4)
        elif isFile(previousPath):
            remove(previousPath, minSlashCount=4)
    # If there is only one folder or file under extractedDirPath, we up it:
    if upIfUnique and len(sortedGlob(extractedDirPath + "/*")) == 1:
        # We get the element path:
        elementPath = sortedGlob(extractedDirPath + "/*")[0]
        # We make the dst path:
        dst = dirPath + "/" + elementPath.split("/")[-1]
        # First we check if the element exists inthe parent dir:
        if isFile(dst) or isDir(dst):
            dst += time.strftime("-%Y.%m.%d-%H.%M.%S")
        # then we move it:
        shutil.move(elementPath, dst)
        # And finally we remove the dir:
        remove(extractedDirPath, minSlashCount=4)
        # We update extractedDirPath:
        extractedDirPath = dst
    # We move the element:
    if destinationDir is not None:
        # We move it:
        newDestFilePath = destinationDir + "/" + decomposePath(extractedDirPath)[3]
        shutil.move(extractedDirPath, newDestFilePath)
        # We update extractedDirPath:
        extractedDirPath = newDestFilePath
    # Finally we return the new path:
    return extractedDirPath
Example #5
0
def normalizeNumericalFilePaths(globRegex):
    """
        This function get a glob path and rename all file1.json file2.json ... file20.json
        to file01.json file02.json ... file20.json to better sort the folder by file names
    """
    # We get all paths:
    allPaths = sortedGlob(globRegex)
    allNumbers = []
    # We get all ints:
    for path in allPaths:
        # Get the filename without extension:
        (dir, filename, ext, filenameExt) = decomposePath(path)
        # Get all numbers:
        currentNumbers = getAllNumbers(filename)
        # Check if we have a int first:
        if currentNumbers is None or len(currentNumbers) == 0:
            print("A filename has no number.")
            return False
        firstNumber = currentNumbers[0]
        if not isinstance(firstNumber, int):
            print("A filename has no float as first number.")
            return False
        # Add it in the list:
        allNumbers.append(firstNumber)
    # Get the max int:
    maxInt = max(allNumbers)
    # Calculate the nmber of digit:
    digitCountHasToBe = len(str(maxInt))
    # Replace all :
    i = 0
    for i in range(len(allNumbers)):
        currentPath = allPaths[i]
        (dir, filename, ext, filenameExt) = decomposePath(currentPath)
        currentInt = allNumbers[i]
        currentRegex = "0*" + str(currentInt)
        zerosCountToAdd = digitCountHasToBe - len(str(currentInt))
        zerosStr = "0" * zerosCountToAdd
        newFilename = re.sub(currentRegex, zerosStr + str(currentInt), filename, count=1)
        newFilename = dir + newFilename + "." + ext
        if currentPath != newFilename:
            os.rename(currentPath, newFilename)
            print(newFilename + " done.")
        i += 1
    return True
Example #6
0
def cleanDir\
(
    path,
    startsWith=None,
    endsWith=None,
    olderHour=4,
    onlyOwner=True,
    verbose=False,
    logger=None,
    dryRun=False,
    removeKwargs={},
    pathContains="/tmp" # For security purpose

):
    me = getpass.getuser()
    elementsToDelete = []
    for element in sortedGlob(path + "/*"):
        if onlyOwner and owner(element) != me:
            continue
        if olderHour is not None and getLastModifiedTimeSpent(element, timeSpentUnit=TIMESPENT_UNIT.HOURS, logger=logger, verbose=False) < olderHour:
            continue
        if startsWith is not None and not decomposePath(element)[3].startswith(startsWith):
            continue
        if endsWith is not None and not decomposePath(element)[3].endswith(endsWith):
            continue
        elementsToDelete.append(element)
    for element in elementsToDelete:
        if pathContains in element:
            try:
                if not dryRun:
                    if "secure" not in removeKwargs:
                        removeKwargs["secure"] = False
                    remove(element, **removeKwargs)
                if verbose:
                    msg = "We removed " + element
                    if logger is not None:
                        try:
                            logger.log(msg)
                        except: pass
                    else:
                        print(msg)
            except Exception as e:
                print(e)
Example #7
0
def purgeOldFiles(pattern, maxTimeSpent, timeSpentUnit=TIMESPENT_UNIT.SECONDS):
    allPlugins = sortedGlob(pattern)
    for current in allPlugins:
        timeSpent = getLastModifiedTimeSpent(current, timeSpentUnit)
        if timeSpent > maxTimeSpent:
            removeFile(current)
Example #8
0
def globRemove(globPattern):
    filesPaths = sortedGlob(globPattern)
    removeFiles(filesPaths)