コード例 #1
0
def convertLidarDataToCsv(lidarFolder):
    '''Make sure all lidar data is available in a readable text format'''

    logger = logging.getLogger(__name__)
    logger.info('Converting LIDAR files...')

    # Loop through all lidar files in the folder
    lidarFiles = os.listdir(lidarFolder)
    for f in lidarFiles:
        extension = icebridge_common.fileExtension(f)

        # Only interested in a few file types
        if (extension != '.qi') and (extension != '.hdf5') and (extension !=
                                                                '.h5'):
            continue

        # Handle paths
        fullPath = os.path.join(lidarFolder, f)
        outputPath = os.path.join(lidarFolder, os.path.splitext(f)[0] + '.csv')
        if os.path.exists(outputPath):
            continue

        # Call the conversion
        extract_icebridge_ATM_points.main([fullPath])
        if not os.path.exists(outputPath):
            raise Exception('Failed to parse LIDAR file: ' + fullPath)
コード例 #2
0
def doFetch(options, outputFolder):
    '''The main fetch function.
       Returns the number of failures.'''

    # Verify that required files exist
    home = os.path.expanduser("~")
    if not (os.path.exists(home + '/.netrc')
            and os.path.exists(home + '/.urs_cookies')):
        logger.error(
            'Missing a required authentication file!  See instructions here:\n'
            +
            '    https://nsidc.org/support/faq/what-options-are-available-bulk-'
            + 'downloading-data-https-earthdata-login-enabled')
        return -1

    curlPath = asp_system_utils.which("curl")
    curlOpts = ' -n -L '
    cookiePaths = ' -b ~/.urs_cookies -c ~/.urs_cookies '
    baseCurlCmd = curlPath + curlOpts + cookiePaths

    logger.info('Creating output folder: ' + outputFolder)
    os.system('mkdir -p ' + outputFolder)

    isSouth = (options.site == 'AN')

    if options.type == 'nav':  # Nav fetching is much less complicated
        return fetchNavData(options, outputFolder)

    parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd,
                                             outputFolder)
    if not icebridge_common.fileNonEmpty(parsedIndexPath):
        # Some dirs are weird, both images, fireball dems, and ortho.
        # Just accept whatever there is, but with a warning.
        logger.info('Warning: Missing index file: ' + parsedIndexPath)

    # Store file information in a dictionary
    # - Keep track of the earliest and latest frame
    logger.info('Reading file list from ' + parsedIndexPath)
    try:
        (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath)
    except:
        # We probably ran into old format index file. Must refetch.
        logger.info('Could not read index file. Try again.')
        options.refetchIndex = True
        parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd,
                                                 outputFolder)
        (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath)

    if options.stopAfterIndexFetch:
        return 0

    isLidar = (options.type in LIDAR_TYPES)

    allFrames = sorted(frameDict.keys())

    if not isLidar:
        # The lidar frames use a totally different numbering than the image/ortho/dem frames
        firstFrame = icebridge_common.getLargestFrame()  # start big
        lastFrame = icebridge_common.getSmallestFrame()  # start small
        for frameNumber in allFrames:
            if frameNumber < firstFrame:
                firstFrame = frameNumber
            if frameNumber > lastFrame:
                lastFrame = frameNumber

        if options.allFrames:
            options.startFrame = firstFrame
            options.stopFrame = lastFrame

    if isLidar:
        # Based on image frames, determine which lidar frames to fetch.
        if options.ignoreMissingLidar and len(frameDict.keys()) == 0:
            # Nothing we can do if this run has no lidar and we are told to continue
            logger.info("Warning: missing lidar, but continuing.")
            lidarsToFetch = set()
        else:
            lidarsToFetch = lidarFilesInRange(frameDict, outputFolder,
                                              options.startFrame,
                                              options.stopFrame)

    # There is always a chance that not all requested frames are available.
    # That is particularly true for Fireball DEMs. Instead of failing,
    # just download what is present and give a warning.
    if options.startFrame not in frameDict and not isLidar:
        logger.info("Warning: Frame " + str(options.startFrame) +
                    " is not found in this flight.")

    if options.stopFrame and (options.stopFrame
                              not in frameDict) and not isLidar:
        logger.info("Warning: Frame " + str(options.stopFrame) +
                    " is not found in this flight.")

    allFilesToFetch = [
    ]  # Files that we will fetch, relative to the current dir.
    allUrlsToFetch = []  # Full url of each file.

    # Loop through all found frames within the provided range
    currentFileCount = 0
    lastFrame = ""
    if len(allFrames) > 0:
        lastFrame = allFrames[len(allFrames) - 1]

    hasTfw = (options.type == 'fireball')
    hasXml = (isLidar or (options.type == 'ortho') or hasTfw)
    numFetched = 0
    skipCount = 0
    for frame in allFrames:

        # Skip frame outside of range
        if isLidar:
            if frameDict[frame] not in lidarsToFetch:
                continue
        else:
            if ((frame < options.startFrame) or (frame > options.stopFrame)):
                continue

        # Handle the frame skip option
        if options.frameSkip > 0:
            if skipCount < options.frameSkip:
                skipCount += 1
                continue
            skipCount = 0

        filename = frameDict[frame]

        # Some files have an associated xml file. Fireball DEMs also have a tfw file.
        currFilesToFetch = [filename]
        if hasXml:
            currFilesToFetch.append(icebridge_common.xmlFile(filename))
        if hasTfw:
            currFilesToFetch.append(icebridge_common.tfwFile(filename))

        for filename in currFilesToFetch:
            url = os.path.join(urlDict[frame], filename)
            outputPath = os.path.join(outputFolder, filename)
            allFilesToFetch.append(outputPath)
            allUrlsToFetch.append(url)

    # Restrict lidar fetch amount according to the parameter
    if (isLidar and options.maxNumLidarToFetch > 0
            and len(allFilesToFetch) > options.maxNumLidarToFetch):

        # Ensure an even number, to fetch both the lidar file and its xml
        if options.maxNumLidarToFetch % 2 == 1:
            options.maxNumLidarToFetch += 1

        allFilesToFetch = allFilesToFetch[0:options.maxNumLidarToFetch]
        allUrlsToFetch = allUrlsToFetch[0:options.maxNumLidarToFetch]

    icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL,
                                         options.dryRun, outputFolder,
                                         allFilesToFetch, allUrlsToFetch,
                                         logger)

    # Fetch from disk the set of already validated files, if any
    validFilesList = icebridge_common.validFilesList(
        os.path.dirname(outputFolder), options.startFrame, options.stopFrame)
    validFilesSet = set()
    validFilesSet = icebridge_common.updateValidFilesListFromDisk(
        validFilesList, validFilesSet)
    numInitialValidFiles = len(validFilesSet)

    # Verify that all files were fetched and are in good shape
    failedFiles = []
    for outputPath in allFilesToFetch:

        if options.skipValidate:
            continue

        if not icebridge_common.fileNonEmpty(outputPath):
            logger.info('Missing file: ' + outputPath)
            failedFiles.append(outputPath)
            continue

        if icebridge_common.hasImageExtension(outputPath):
            if False:
                # This check is just so slow. Turn it off for now.
                # This will impact only the validation of jpegs,
                # as the other files can be validated via the checksum.
                # Jpegs will be validated when converting them to 1 band images
                if outputPath in validFilesSet and os.path.exists(outputPath):
                    #logger.info('Previously validated: ' + outputPath)   # verbose
                    continue
                else:
                    if not icebridge_common.isValidImage(outputPath):
                        logger.info('Found an invalid image. Will wipe it: ' +
                                    outputPath)
                        if os.path.exists(outputPath): os.remove(outputPath)
                        failedFiles.append(outputPath)
                        continue
                    else:
                        logger.info('Valid image: ' + outputPath)
                        validFilesSet.add(outputPath)  # mark it as validated

        # Sanity check: XML files must have the right latitude.
        if icebridge_common.fileExtension(outputPath) == '.xml':
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath) #verbose
                continue
            else:
                if os.path.exists(outputPath):
                    try:
                        latitude = icebridge_common.parseLatitude(outputPath)
                        logger.info('Valid file: ' + outputPath)
                        validFilesSet.add(outputPath)  # mark it as validated
                    except:
                        # Corrupted file
                        logger.info("Failed to parse latitude, will wipe: " +
                                    outputPath)
                        if os.path.exists(outputPath): os.remove(outputPath)
                        failedFiles.append(outputPath)

                    # On a second thought, don't wipe files with wrong latitude, as
                    # next time we run fetch we will have to fetch them again.
                    # Hopefully they will be ignored.
                    #isGood = hasGoodLat(latitude, isSouth)
                    #if not isGood:
                    #    logger.info("Wiping XML file " + outputPath + " with bad latitude " + \
                    #                str(latitude))
                    #    os.remove(outputPath)
                    #    imageFile = icebridge_common.xmlToImage(outputPath)
                    #    if os.path.exists(imageFile):
                    #        logger.info("Wiping TIF file " + imageFile + " with bad latitude " + \
                    #                    str(latitude))
                    #        os.remove(imageFile)

        # Verify the chcksum
        if hasXml and len(outputPath) >= 4 and outputPath[-4:] != '.xml' \
               and outputPath[-4:] != '.tfw':
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath) # verbose
                continue
            else:
                isGood = icebridge_common.hasValidChkSum(outputPath, logger)
                if not isGood:
                    xmlFile = icebridge_common.xmlFile(outputPath)
                    logger.info('Found invalid data. Will wipe: ' +
                                outputPath + ' ' + xmlFile)
                    if os.path.exists(outputPath): os.remove(outputPath)
                    if os.path.exists(xmlFile): os.remove(xmlFile)
                    failedFiles.append(outputPath)
                    failedFiles.append(xmlFile)
                    continue
                else:
                    logger.info('Valid file: ' + outputPath)
                    validFilesSet.add(outputPath)

        if hasTfw and icebridge_common.fileExtension(outputPath) == '.tfw':
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath)
                continue
            else:
                isGood = icebridge_common.isValidTfw(outputPath, logger)
                if not isGood:
                    xmlFile = icebridge_common.xmlFile(outputPath)
                    logger.info('Found invalid tfw. Will wipe: ' + outputPath +
                                ' ' + xmlFile)
                    if os.path.exists(outputPath): os.remove(outputPath)
                    if os.path.exists(xmlFile): os.remove(xmlFile)
                    failedFiles.append(outputPath)
                    failedFiles.append(xmlFile)
                    continue
                else:
                    logger.info('Valid tfw file: ' + outputPath)
                    validFilesSet.add(outputPath)

    # Write to disk the list of validated files, but only if new
    # validations happened.  First re-read that list, in case a
    # different process modified it in the meantime, such as if two
    # managers are running at the same time.
    numFinalValidFiles = len(validFilesSet)
    if numInitialValidFiles != numFinalValidFiles:
        validFilesSet = \
                      icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet)
        icebridge_common.writeValidFilesList(validFilesList, validFilesSet)

    numFailed = len(failedFiles)
    if numFailed > 0:
        logger.info("Number of files that could not be processed: " +
                    str(numFailed))

    return numFailed
コード例 #3
0
def pairLidarFiles(lidarFolder, skipValidate, logger):
    '''For each pair of lidar files generate a double size point cloud.
       We can use these later since they do not have any gaps between adjacent files.'''

    logger.info('Generating lidar pairs...')

    # Create the output folder
    pairedFolder = icebridge_common.getPairedLidarFolder(lidarFolder)
    os.system('mkdir -p ' + pairedFolder)

    convLidarFile = icebridge_common.getConvertedLidarIndexFile(lidarFolder)
    if not os.path.exists(convLidarFile):
        raise Exception("Missing file: " + convLidarFile)

    (lidarDict, dummyUrlDict) = icebridge_common.readIndexFile(convLidarFile)
    lidarExt = ''
    for frame in lidarDict:
        lidarExt = icebridge_common.fileExtension(lidarDict[frame])

    numLidarFiles = len(lidarDict.keys())

    pairedDict = {}

    # Loop through all pairs of csv files in the folder
    badFiles = False
    lidarKeys = sorted(lidarDict.keys())
    for i in range(len(lidarKeys) - 1):

        thisFile = lidarDict[lidarKeys[i]]
        nextFile = lidarDict[lidarKeys[i + 1]]

        date2, time2 = icebridge_common.parseTimeStamps(nextFile)

        # Record the name with the second file
        # - More useful because the time for the second file represents the middle of the file.
        outputName = icebridge_common.lidar_pair_prefix(
        ) + date2 + '_' + time2 + lidarExt

        pairedDict[lidarKeys[i]] = outputName

        # Handle paths
        path1 = os.path.join(lidarFolder, thisFile)
        path2 = os.path.join(lidarFolder, nextFile)
        outputPath = os.path.join(pairedFolder, outputName)

        if not os.path.exists(path1) or not os.path.exists(path2):
            logger.info("Cannot create " + outputPath +
                        " as we are missing its inputs")
            # If the inputs are missing, but the output is there, most likely it is corrupt.
            # Wipe it. Hopefully a subsequent fetch and convert step will bring it back.
            if os.path.exists(outputPath):
                logger.info("Wiping: " + outputPath)
                os.system('rm -f ' + outputPath)  # will not throw
                badFiles = True
            continue

        # Skip existing valid files
        if skipValidate:
            if os.path.exists(outputPath):
                logger.info("File exists, skipping: " + outputPath)
                continue
        else:
            if icebridge_common.isValidLidarCSV(outputPath):
                #logger.info("File exists and is valid, skipping: " + outputPath)
                continue

        # Concatenate the two files
        cmd1 = 'cat ' + path1 + ' > ' + outputPath
        cmd2 = 'tail -n +2 -q ' + path2 + ' >> ' + outputPath
        logger.info(cmd1)
        p = subprocess.Popen(cmd1, stdout=subprocess.PIPE, shell=True)
        out, err = p.communicate()
        logger.info(cmd2)
        p = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True)
        out, err = p.communicate()

        if not icebridge_common.isValidLidarCSV(outputPath):
            logger.error('Failed to generate merged LIDAR file, will wipe: ' +
                         outputPath)
            os.system('rm -f ' + outputPath)  # will not throw
            badFiles = True

    pairedLidarFile = icebridge_common.getPairedIndexFile(pairedFolder)

    willWritePairedFile = False
    if not os.path.exists(pairedLidarFile):
        willWritePairedFile = True
    else:
        # Bugfix: Sometimes the written converted file has the wrong size, maybe
        # something got interrupted earlier.
        (lidarDictIn,
         dummyUrlDict) = icebridge_common.readIndexFile(pairedLidarFile)
        if lidarDictIn != pairedDict:
            willWritePairedFile = True

    if willWritePairedFile:
        logger.info("Writing: " + pairedLidarFile)
        icebridge_common.writeIndexFile(pairedLidarFile, pairedDict, {})

    return (not badFiles)
コード例 #4
0
def convertLidarDataToCsv(lidarFolder, startFrame, stopFrame, skipValidate,
                          logger):
    '''Make sure all lidar data is available in a readable text format.
       Returns false if any files failed to convert.'''

    logger.info('Converting LIDAR files...')

    lidarIndexPath = icebridge_common.csvIndexFile(lidarFolder)
    (frameDict, urlDict) = icebridge_common.readIndexFile(lidarIndexPath)

    if not skipValidate:
        validFilesList = icebridge_common.validFilesList(
            os.path.dirname(lidarFolder), startFrame, stopFrame)
        validFilesSet = set()
        validFilesSet = icebridge_common.updateValidFilesListFromDisk(
            validFilesList, validFilesSet)
        numInitialValidFiles = len(validFilesSet)

    convDict = {}

    # Loop through all files in the folder
    badFiles = False
    for frame in sorted(frameDict.keys()):

        f = frameDict[frame]
        extension = icebridge_common.fileExtension(f)

        # Only interested in a few file types
        if (extension != '.qi') and (extension != '.hdf5') and (extension !=
                                                                '.h5'):
            convDict[frame] = f  # these are already in plain text
            continue

        convDict[frame] = os.path.splitext(f)[0] + '.csv'
        outputPath = os.path.join(lidarFolder, convDict[frame])

        # Handle paths
        fullPath = os.path.join(lidarFolder, f)
        if not os.path.exists(fullPath):
            logger.info("Cannot convert missing file: " + fullPath)
            continue

        # If the input is invalid, wipe both it, its xml, and the output
        # Hopefully there will be a subsquent fetch step where it will get
        # refetched.
        if not icebridge_common.hasValidChkSum(fullPath, logger):
            logger.info("Will wipe invalid file: " + fullPath)
            xmlFile = icebridge_common.xmlFile(fullPath)
            os.system('rm -f ' + fullPath)  # will not throw
            os.system('rm -f ' + xmlFile)  # will not throw
            os.system('rm -f ' + outputPath)  # will not throw
            badFiles = True
            continue

        # Skip existing valid files
        if skipValidate:
            if os.path.exists(outputPath):
                logger.info("File exists, skipping: " + outputPath)
                continue
        else:
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath) # verbose
                continue
            if icebridge_common.isValidLidarCSV(outputPath):
                #logger.info("File exists and is valid, skipping: " + outputPath)
                continue

        # Call the conversion
        logger.info("Process " + fullPath)
        extract_icebridge_ATM_points.main([fullPath])

        # Check the result
        if not icebridge_common.isValidLidarCSV(outputPath):
            logger.error('Failed to parse LIDAR file, will wipe: ' +
                         outputPath)
            os.system('rm -f ' + outputPath)  # will not throw
            badFiles = True
        else:
            if not skipValidate:
                validFilesSet.add(outputPath)  # mark it as validated

    convLidarFile = icebridge_common.getConvertedLidarIndexFile(lidarFolder)

    willWriteConvFile = False
    if not os.path.exists(convLidarFile):
        willWriteConvFile = True
    else:
        # Bugfix: Sometimes the written converted file has the wrong size, maybe
        # something got interrupted earlier.
        (lidarDictIn,
         dummyUrlDict) = icebridge_common.readIndexFile(convLidarFile)
        if lidarDictIn != convDict:
            willWriteConvFile = True

    if willWriteConvFile:
        logger.info("Writing: " + convLidarFile)
        icebridge_common.writeIndexFile(convLidarFile, convDict, {})

    if not skipValidate:
        # Write to disk the list of validated files, but only if new
        # validations happened.  First re-read that list, in case a
        # different process modified it in the meantime, such as if two
        # managers are running at the same time.
        numFinalValidFiles = len(validFilesSet)
        if numInitialValidFiles != numFinalValidFiles:
            validFilesSet = icebridge_common.updateValidFilesListFromDisk(
                validFilesList, validFilesSet)
            icebridge_common.writeValidFilesList(validFilesList, validFilesSet)

    return (not badFiles)
コード例 #5
0
def doFetch(options, outputFolder):
    '''The main fetch function.
       Returns the number of failures.'''
    
    # Verify that required files exist
    home = os.path.expanduser("~")
    if not (os.path.exists(home+'/.netrc') and os.path.exists(home+'/.urs_cookies')):
        logger.error('Missing a required authentication file!  See instructions here:\n' +
                     '    https://nsidc.org/support/faq/what-options-are-available-bulk-' +
                     'downloading-data-https-earthdata-login-enabled')
        return -1
    
    curlPath = asp_system_utils.which("curl")
    curlOpts    = ' -n -L '
    cookiePaths = ' -b ~/.urs_cookies -c ~/.urs_cookies '
    baseCurlCmd = curlPath + curlOpts + cookiePaths

    logger.info('Creating output folder: ' + outputFolder)
    os.system('mkdir -p ' + outputFolder)  

    isSouth = (options.site == 'AN')
    
    if options.type == 'nav': # Nav fetching is much less complicated
        return fetchNavData(options, outputFolder)
    
    parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder)
    if not icebridge_common.fileNonEmpty(parsedIndexPath):
        # Some dirs are weird, both images, fireball dems, and ortho.
        # Just accept whatever there is, but with a warning.
        logger.info('Warning: Missing index file: ' + parsedIndexPath)

    # Store file information in a dictionary
    # - Keep track of the earliest and latest frame
    logger.info('Reading file list from ' + parsedIndexPath)
    try:
        (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath)
    except:
        # We probably ran into old format index file. Must refetch.
        logger.info('Could not read index file. Try again.')
        options.refetchIndex = True
        parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder)
        (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath)

    if options.stopAfterIndexFetch:
        return 0
    
    isLidar = (options.type in LIDAR_TYPES)

    allFrames  = sorted(frameDict.keys())
    
    if not isLidar:
        # The lidar frames use a totally different numbering than the image/ortho/dem frames
        firstFrame = icebridge_common.getLargestFrame()    # start big
        lastFrame  = icebridge_common.getSmallestFrame()   # start small
        for frameNumber in allFrames:
            if frameNumber < firstFrame:
                firstFrame = frameNumber
            if frameNumber > lastFrame:
                lastFrame = frameNumber

        if options.allFrames:
            options.startFrame = firstFrame
            options.stopFrame  = lastFrame

    if isLidar:
        # Based on image frames, determine which lidar frames to fetch.
        if options.ignoreMissingLidar and len(frameDict.keys()) == 0:
            # Nothing we can do if this run has no lidar and we are told to continue
            logger.info("Warning: missing lidar, but continuing.")
            lidarsToFetch = set()
        else:
            lidarsToFetch = lidarFilesInRange(frameDict, outputFolder,
                                              options.startFrame, options.stopFrame)
        
    # There is always a chance that not all requested frames are available.
    # That is particularly true for Fireball DEMs. Instead of failing,
    # just download what is present and give a warning. 
    if options.startFrame not in frameDict and not isLidar:
        logger.info("Warning: Frame " + str(options.startFrame) +
                    " is not found in this flight.")
                    
    if options.stopFrame and (options.stopFrame not in frameDict) and not isLidar:
        logger.info("Warning: Frame " + str(options.stopFrame) +
                    " is not found in this flight.")

    allFilesToFetch = [] # Files that we will fetch, relative to the current dir. 
    allUrlsToFetch  = [] # Full url of each file.
    
    # Loop through all found frames within the provided range
    currentFileCount = 0
    lastFrame = ""
    if len(allFrames) > 0:
        lastFrame = allFrames[len(allFrames)-1]

    hasTfw = (options.type == 'fireball')
    hasXml = ( isLidar or (options.type == 'ortho') or hasTfw )
    numFetched = 0
    skipCount  = 0
    for frame in allFrames:

        # Skip frame outside of range
        if isLidar:
            if frameDict[frame] not in lidarsToFetch:
                continue
        else:       
            if ((frame < options.startFrame) or (frame > options.stopFrame) ):
                continue
                
        # Handle the frame skip option
        if options.frameSkip > 0: 
            if skipCount < options.frameSkip:
                skipCount += 1
                continue
            skipCount = 0

        filename = frameDict[frame]
        
        # Some files have an associated xml file. Fireball DEMs also have a tfw file.
        currFilesToFetch = [filename]
        if hasXml: 
            currFilesToFetch.append(icebridge_common.xmlFile(filename))
        if hasTfw: 
            currFilesToFetch.append(icebridge_common.tfwFile(filename))

        for filename in currFilesToFetch:    
            url        = os.path.join(urlDict[frame], filename)
            outputPath = os.path.join(outputFolder, filename)
            allFilesToFetch.append(outputPath)
            allUrlsToFetch.append(url)

    # Restrict lidar fetch amount according to the parameter
    if (isLidar and options.maxNumLidarToFetch > 0 and 
           len(allFilesToFetch) > options.maxNumLidarToFetch):

        # Ensure an even number, to fetch both the lidar file and its xml
        if options.maxNumLidarToFetch % 2 == 1:
            options.maxNumLidarToFetch += 1
        
        allFilesToFetch = allFilesToFetch[0:options.maxNumLidarToFetch]
        allUrlsToFetch  = allUrlsToFetch [0:options.maxNumLidarToFetch]
                
    icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL, options.dryRun,
                                         outputFolder,
                                         allFilesToFetch, allUrlsToFetch, logger)

    # Fetch from disk the set of already validated files, if any
    validFilesList = icebridge_common.validFilesList(os.path.dirname(outputFolder),
                                                     options.startFrame, options.stopFrame)
    validFilesSet = set()
    validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet)
    numInitialValidFiles = len(validFilesSet)
    
    # Verify that all files were fetched and are in good shape
    failedFiles = []
    for outputPath in allFilesToFetch:

        if options.skipValidate:
            continue
        
        if not icebridge_common.fileNonEmpty(outputPath):
            logger.info('Missing file: ' + outputPath)
            failedFiles.append(outputPath)
            continue

        if icebridge_common.hasImageExtension(outputPath):
            if False:
                # This check is just so slow. Turn it off for now.
                # This will impact only the validation of jpegs,
                # as the other files can be validated via the checksum.
                # Jpegs will be validated when converting them to 1 band images
                if outputPath in validFilesSet and os.path.exists(outputPath):
                    #logger.info('Previously validated: ' + outputPath)   # verbose
                    continue
                else:
                    if not icebridge_common.isValidImage(outputPath):
                        logger.info('Found an invalid image. Will wipe it: ' + outputPath)
                        if os.path.exists(outputPath): os.remove(outputPath)
                        failedFiles.append(outputPath)
                        continue
                    else:
                        logger.info('Valid image: ' + outputPath)
                        validFilesSet.add(outputPath) # mark it as validated

        # Sanity check: XML files must have the right latitude.
        if icebridge_common.fileExtension(outputPath) == '.xml':
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath) #verbose
                continue
            else:
                if os.path.exists(outputPath):
                    try:
                        latitude = icebridge_common.parseLatitude(outputPath)
                        logger.info('Valid file: ' + outputPath)
                        validFilesSet.add(outputPath) # mark it as validated
                    except:
                        # Corrupted file
                        logger.info("Failed to parse latitude, will wipe: " + outputPath)
                        if os.path.exists(outputPath): os.remove(outputPath)
                        failedFiles.append(outputPath)

                    # On a second thought, don't wipe files with wrong latitude, as
                    # next time we run fetch we will have to fetch them again.
                    # Hopefully they will be ignored.
                    #isGood = hasGoodLat(latitude, isSouth)
                    #if not isGood:
                    #    logger.info("Wiping XML file " + outputPath + " with bad latitude " + \
                    #                str(latitude))
                    #    os.remove(outputPath)
                    #    imageFile = icebridge_common.xmlToImage(outputPath)
                    #    if os.path.exists(imageFile):
                    #        logger.info("Wiping TIF file " + imageFile + " with bad latitude " + \
                    #                    str(latitude))
                    #        os.remove(imageFile)
                    
        # Verify the chcksum    
        if hasXml and len(outputPath) >= 4 and outputPath[-4:] != '.xml' \
               and outputPath[-4:] != '.tfw':
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath) # verbose
                continue
            else:
                isGood = icebridge_common.hasValidChkSum(outputPath, logger)
                if not isGood:
                    xmlFile = icebridge_common.xmlFile(outputPath)
                    logger.info('Found invalid data. Will wipe: ' + outputPath + ' ' + xmlFile)
                    if os.path.exists(outputPath): os.remove(outputPath)
                    if os.path.exists(xmlFile):    os.remove(xmlFile)
                    failedFiles.append(outputPath)
                    failedFiles.append(xmlFile)
                    continue
                else:
                    logger.info('Valid file: ' + outputPath)
                    validFilesSet.add(outputPath)

        if hasTfw and icebridge_common.fileExtension(outputPath) == '.tfw':
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath)
                continue
            else:
                isGood = icebridge_common.isValidTfw(outputPath, logger)
                if not isGood:
                    xmlFile = icebridge_common.xmlFile(outputPath)
                    logger.info('Found invalid tfw. Will wipe: ' + outputPath + ' ' + xmlFile)
                    if os.path.exists(outputPath): os.remove(outputPath)
                    if os.path.exists(xmlFile):    os.remove(xmlFile)
                    failedFiles.append(outputPath)
                    failedFiles.append(xmlFile)
                    continue
                else:
                    logger.info('Valid tfw file: ' + outputPath)
                    validFilesSet.add(outputPath)

    # Write to disk the list of validated files, but only if new
    # validations happened.  First re-read that list, in case a
    # different process modified it in the meantime, such as if two
    # managers are running at the same time.
    numFinalValidFiles = len(validFilesSet)
    if numInitialValidFiles != numFinalValidFiles:
        validFilesSet = \
                      icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet)
        icebridge_common.writeValidFilesList(validFilesList, validFilesSet)

    numFailed = len(failedFiles)
    if numFailed > 0:
        logger.info("Number of files that could not be processed: " + str(numFailed))
        
    return numFailed
コード例 #6
0
def doFetch(options, outputFolder):

    # Verify that required files exist
    home = os.path.expanduser("~")
    if not (os.path.exists(home + '/.netrc')
            and os.path.exists(home + '/.urs_cookies')):
        logger.error(
            'Missing a required authentication file!  See instructions here:\n'
            +
            '    https://nsidc.org/support/faq/what-options-are-available-bulk-downloading-data-https-earthdata-login-enabled'
        )
        return -1

    curlPath = asp_system_utils.which("curl")
    curlOpts = ' -n -L '
    cookiePaths = ' -b ~/.urs_cookies -c ~/.urs_cookies '
    baseCurlCmd = curlPath + curlOpts + cookiePaths

    logger.info('Creating output folder: ' + outputFolder)
    os.system('mkdir -p ' + outputFolder)

    isSouth = (options.site == 'AN')
    parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd,
                                             outputFolder)
    if not icebridge_common.fileNonEmpty(parsedIndexPath):
        # Some dirs are weird, both images, dems, and ortho.
        # Just accept whatever there is, but with a warning.
        logger.info('Warning: Missing index file: ' + parsedIndexPath)

    # Store file information in a dictionary
    # - Keep track of the earliest and latest frame
    logger.info('Reading file list from ' + parsedIndexPath)
    try:
        (frameDict, urlDict) = readIndexFile(parsedIndexPath)
    except:
        # We probably ran into old format index file. Must refetch.
        logger.info('Could not read index file. Try again.')
        options.refetchIndex = True
        parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd,
                                                 outputFolder)
        (frameDict, urlDict) = readIndexFile(parsedIndexPath)

    allFrames = sorted(frameDict.keys())
    firstFrame = icebridge_common.getLargestFrame()  # start big
    lastFrame = icebridge_common.getSmallestFrame()  # start small
    for frameNumber in allFrames:
        if frameNumber < firstFrame:
            firstFrame = frameNumber
        if frameNumber > lastFrame:
            lastFrame = frameNumber

    if options.allFrames:
        options.startFrame = firstFrame
        options.stopFrame = lastFrame

    # There is always a chance that not all requested frames are available.
    # That is particularly true for Fireball DEMs. Instead of failing,
    # just download what is present and give a warning.
    if options.startFrame not in frameDict:
        logger.info("Warning: Frame " + str(options.startFrame) + \
                    " is not found in this flight.")

    if options.stopFrame and (options.stopFrame not in frameDict):
        logger.info("Warning: Frame " + str(options.stopFrame) + \
                    " is not found in this flight.")

    allFilesToFetch = [
    ]  # Files that we will fetch, relative to the current dir.
    allUrlsToFetch = []  # Full url of each file.

    # Loop through all found frames within the provided range
    currentFileCount = 0
    lastFrame = ""
    if len(allFrames) > 0:
        lastFrame = allFrames[len(allFrames) - 1]

    hasTfw = (options.type == 'dem')
    hasXml = ((options.type in LIDAR_TYPES) or (options.type == 'ortho')
              or hasTfw)
    numFetched = 0
    for frame in allFrames:
        if (frame >= options.startFrame) and (frame <= options.stopFrame):

            filename = frameDict[frame]

            # Some files have an associated xml file. DEMs also have a tfw file.
            currFilesToFetch = [filename]
            if hasXml:
                currFilesToFetch.append(icebridge_common.xmlFile(filename))
            if hasTfw:
                currFilesToFetch.append(icebridge_common.tfwFile(filename))

            for filename in currFilesToFetch:
                url = os.path.join(urlDict[frame], filename)
                outputPath = os.path.join(outputFolder, filename)
                allFilesToFetch.append(outputPath)
                allUrlsToFetch.append(url)

    if options.maxNumToFetch > 0 and len(
            allFilesToFetch) > options.maxNumToFetch:
        allFilesToFetch = allFilesToFetch[0:options.maxNumToFetch]
        allUrlsToFetch = allUrlsToFetch[0:options.maxNumToFetch]

    icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL,
                                         options.dryRun, outputFolder,
                                         allFilesToFetch, allUrlsToFetch,
                                         logger)

    # Verify that all files were fetched and are in good shape
    failedFiles = []
    for outputPath in allFilesToFetch:

        if options.skipValidate: continue

        if not icebridge_common.fileNonEmpty(outputPath):
            logger.info('Missing file: ' + outputPath)
            failedFiles.append(outputPath)
            continue

        if icebridge_common.hasImageExtension(outputPath):
            if not icebridge_common.isValidImage(outputPath):
                logger.info('Found an invalid image. Will wipe it: ' +
                            outputPath)
                if os.path.exists(outputPath): os.remove(outputPath)
                failedFiles.append(outputPath)
                continue
            else:
                logger.info('Valid image: ' + outputPath)

        # Sanity check: XML files must have the right latitude.
        if icebridge_common.fileExtension(outputPath) == '.xml':
            if os.path.exists(outputPath):
                latitude = icebridge_common.parseLatitude(outputPath)
                isGood = hasGoodLat(latitude, isSouth)
                if not isGood:
                    logger.info("Wiping XML file " + outputPath + " with bad latitude " + \
                                str(latitude))
                    os.remove(outputPath)
                    imageFile = icebridge_common.xmlToImage(outputPath)
                    if os.path.exists(imageFile):
                        logger.info("Wiping TIF file " + imageFile + " with bad latitude " + \
                                    str(latitude))
                        os.remove(imageFile)

        # Verify the chcksum
        if hasXml and len(outputPath) >= 4 and outputPath[-4:] != '.xml' \
               and outputPath[-4:] != '.tfw':
            isGood = icebridge_common.hasValidChkSum(outputPath)
            if not isGood:
                xmlFile = icebridge_common.xmlFile(outputPath)
                logger.info('Found invalid data. Will wipe it: ' + outputPath +
                            ' ' + xmlFile)
                if os.path.exists(outputPath): os.remove(outputPath)
                if os.path.exists(xmlFile): os.remove(xmlFile)
                failedFiles.append(outputPath)
                failedFiles.append(xmlFile)
                continue
            else:
                logger.info('Valid chksum: ' + outputPath)

        if hasTfw and icebridge_common.fileExtension(outputPath) == '.tfw':
            isGood = icebridge_common.isValidTfw(outputPath)
            if not isGood:
                xmlFile = icebridge_common.xmlFile(outputPath)
                logger.info('Found invalid data. Will wipe it: ' + outputPath +
                            ' ' + xmlFile)
                if os.path.exists(outputPath): os.remove(outputPath)
                if os.path.exists(xmlFile): os.remove(xmlFile)
                failedFiles.append(outputPath)
                failedFiles.append(xmlFile)
                continue
            else:
                logger.info('Valid tfw file: ' + outputPath)

    numFailed = len(failedFiles)
    if numFailed > 0:
        logger.info("Number of files that could not be processed: " +
                    str(numFailed))

    return numFailed
コード例 #7
0
def pairLidarFiles(lidarFolder, skipValidate, logger):
    '''For each pair of lidar files generate a double size point cloud.
       We can use these later since they do not have any gaps between adjacent files.'''
    
    logger.info('Generating lidar pairs...')

    # Create the output folder
    pairedFolder = icebridge_common.getPairedLidarFolder(lidarFolder)
    os.system('mkdir -p ' + pairedFolder)

    convLidarFile = icebridge_common.getConvertedLidarIndexFile(lidarFolder)
    if not os.path.exists(convLidarFile):
        raise Exception("Missing file: " + convLidarFile)

    (lidarDict, dummyUrlDict) = icebridge_common.readIndexFile(convLidarFile)
    lidarExt = ''
    for frame in lidarDict:
        lidarExt = icebridge_common.fileExtension(lidarDict[frame])

    numLidarFiles = len(lidarDict.keys())

    pairedDict = {}
    
    # Loop through all pairs of csv files in the folder    
    badFiles = False
    lidarKeys = sorted(lidarDict.keys())
    for i in range(len(lidarKeys)-1):
        
        thisFile = lidarDict[lidarKeys[i  ]]
        nextFile = lidarDict[lidarKeys[i+1]]

        date2, time2 = icebridge_common.parseTimeStamps(nextFile)
        
        # Record the name with the second file
        # - More useful because the time for the second file represents the middle of the file.
        outputName = icebridge_common.lidar_pair_prefix() + date2 +'_'+ time2 + lidarExt

        pairedDict[lidarKeys[i]] = outputName
        
        # Handle paths
        path1      = os.path.join(lidarFolder, thisFile)
        path2      = os.path.join(lidarFolder, nextFile)
        outputPath = os.path.join(pairedFolder, outputName)

        if not os.path.exists(path1) or not os.path.exists(path2):
            logger.info("Cannot create " + outputPath + " as we are missing its inputs")
            # If the inputs are missing, but the output is there, most likely it is corrupt.
            # Wipe it. Hopefully a subsequent fetch and convert step will bring it back.
            if os.path.exists(outputPath):
                logger.info("Wiping: " + outputPath)
                os.system('rm -f ' + outputPath) # will not throw
                badFiles = True
            continue
        
        # Skip existing valid files
        if skipValidate:
            if os.path.exists(outputPath):
                logger.info("File exists, skipping: " + outputPath)
                continue
        else:
            if icebridge_common.isValidLidarCSV(outputPath):
                #logger.info("File exists and is valid, skipping: " + outputPath)
                continue

        # Concatenate the two files
        cmd1 = 'cat ' + path1 + ' > ' + outputPath
        cmd2 = 'tail -n +2 -q ' + path2 + ' >> ' + outputPath
        logger.info(cmd1)
        p        = subprocess.Popen(cmd1, stdout=subprocess.PIPE, shell=True,
                                    universal_newlines=True)
        out, err = p.communicate()
        logger.info(cmd2)
        p        = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True,
                                    universal_newlines=True)
        out, err = p.communicate()

        if not icebridge_common.isValidLidarCSV(outputPath):
            logger.error('Failed to generate merged LIDAR file, will wipe: ' + outputPath)
            os.system('rm -f ' + outputPath) # will not throw
            badFiles = True

    pairedLidarFile = icebridge_common.getPairedIndexFile(pairedFolder)
    
    willWritePairedFile = False
    if not os.path.exists(pairedLidarFile):
        willWritePairedFile = True
    else: 
        # Bugfix: Sometimes the written converted file has the wrong size, maybe
        # something got interrupted earlier.
        (lidarDictIn, dummyUrlDict) = icebridge_common.readIndexFile(pairedLidarFile)
        if lidarDictIn != pairedDict:
            willWritePairedFile = True
    
    if willWritePairedFile:
        logger.info("Writing: " + pairedLidarFile)
        icebridge_common.writeIndexFile(pairedLidarFile, pairedDict, {})

    return (not badFiles)
コード例 #8
0
def convertLidarDataToCsv(lidarFolder, startFrame, stopFrame, 
                          skipValidate, logger):
    '''Make sure all lidar data is available in a readable text format.
       Returns false if any files failed to convert.'''

    logger.info('Converting LIDAR files...')

    lidarIndexPath = icebridge_common.csvIndexFile(lidarFolder)
    (frameDict, urlDict) = icebridge_common.readIndexFile(lidarIndexPath)

    if not skipValidate:
        validFilesList = icebridge_common.validFilesList(os.path.dirname(lidarFolder),
                                                         startFrame, stopFrame)
        validFilesSet = set()
        validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet)
        numInitialValidFiles = len(validFilesSet)

    convDict = {}
    
    # Loop through all files in the folder
    badFiles = False
    for frame in sorted(frameDict.keys()):

        f = frameDict[frame]
        extension = icebridge_common.fileExtension(f)
        
        # Only interested in a few file types
        if (extension != '.qi') and (extension != '.hdf5') and (extension != '.h5'):
            convDict[frame] = f # these are already in plain text
            continue

        convDict[frame] = os.path.splitext(f)[0] + '.csv'
        outputPath = os.path.join(lidarFolder, convDict[frame])

        # Handle paths
        fullPath = os.path.join(lidarFolder, f)
        if not os.path.exists(fullPath):
            logger.info("Cannot convert missing file: " + fullPath)
            continue

        # If the input is invalid, wipe both it, its xml, and the output
        # Hopefully there will be a subsquent fetch step where it will get
        # refetched.
        if not icebridge_common.hasValidChkSum(fullPath, logger):
            logger.info("Will wipe invalid file: " + fullPath)
            xmlFile = icebridge_common.xmlFile(fullPath)
            os.system('rm -f ' + fullPath) # will not throw
            os.system('rm -f ' + xmlFile) # will not throw
            os.system('rm -f ' + outputPath) # will not throw
            badFiles = True
            continue

        # Skip existing valid files
        if skipValidate:
            if os.path.exists(outputPath):
                logger.info("File exists, skipping: " + outputPath)
                continue
        else:
            if outputPath in validFilesSet and os.path.exists(outputPath):
                #logger.info('Previously validated: ' + outputPath) # verbose
                continue
            if icebridge_common.isValidLidarCSV(outputPath):
                #logger.info("File exists and is valid, skipping: " + outputPath)
                continue
        
        # Call the conversion
        logger.info("Process " + fullPath)
        extract_icebridge_ATM_points.main([fullPath])
        
        # Check the result
        if not icebridge_common.isValidLidarCSV(outputPath):
            logger.error('Failed to parse LIDAR file, will wipe: ' + outputPath)
            os.system('rm -f ' + outputPath) # will not throw            
            badFiles = True
        else:
            if not skipValidate:
                validFilesSet.add(outputPath) # mark it as validated
            
    convLidarFile = icebridge_common.getConvertedLidarIndexFile(lidarFolder)

    willWriteConvFile = False
    if not os.path.exists(convLidarFile):
        willWriteConvFile = True
    else: 
        # Bugfix: Sometimes the written converted file has the wrong size, maybe
        # something got interrupted earlier.
        (lidarDictIn, dummyUrlDict) = icebridge_common.readIndexFile(convLidarFile)
        if lidarDictIn != convDict:
            willWriteConvFile = True
            
    if willWriteConvFile:
        logger.info("Writing: " + convLidarFile)
        icebridge_common.writeIndexFile(convLidarFile, convDict, {})
        
    if not skipValidate:
        # Write to disk the list of validated files, but only if new
        # validations happened.  First re-read that list, in case a
        # different process modified it in the meantime, such as if two
        # managers are running at the same time.
        numFinalValidFiles = len(validFilesSet)
        if numInitialValidFiles != numFinalValidFiles:
            validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList,
                                                                          validFilesSet)
            icebridge_common.writeValidFilesList(validFilesList, validFilesSet)

    return (not badFiles)