def validateOrthosAndFireball(options, fileType, logger): '''Validate ortho and fireball files within the current frame range. This is expected to be in called in parallel for smaller chunks. Lidar files will be validated serially. Jpegs get validated when converted to tif. Return True if all is good.''' badFiles = False logger.info("Validating files of type: " + fileType) if fileType == 'ortho': dataFolder = icebridge_common.getOrthoFolder(options.outputFolder) elif fileType == 'fireball': dataFolder = icebridge_common.getFireballFolder(options.outputFolder) else: raise Exception("Unknown file type: " + fileType) indexPath = icebridge_common.csvIndexFile(dataFolder) if not os.path.exists(indexPath): # The issue of what to do when the index does not exist should # have been settled by now. return (not badFiles) # Fetch from disk the set of already validated files, if any validFilesList = icebridge_common.validFilesList(options.outputFolder, options.startFrame, options.stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) (frameDict, urlDict) = icebridge_common.readIndexFile(indexPath, prependFolder = True) for frame in frameDict.keys(): if frame < options.startFrame or frame > options.stopFrame: continue outputPath = frameDict[frame] xmlFile = icebridge_common.xmlFile(outputPath) if outputPath in validFilesSet and os.path.exists(outputPath) and \ xmlFile in validFilesSet and os.path.exists(xmlFile): #logger.info('Previously validated: ' + outputPath + ' ' + xmlFile) continue else: isGood = icebridge_common.hasValidChkSum(outputPath, logger) if not isGood: logger.info('Found invalid data. Will wipe: ' + outputPath + ' ' + xmlFile) os.system('rm -f ' + outputPath) # will not throw os.system('rm -f ' + xmlFile) # will not throw badFiles = True else: logger.info('Valid file: ' + outputPath) validFilesSet.add(outputPath) validFilesSet.add(xmlFile) if fileType != 'fireball': continue # Also validate tfw tfwFile = icebridge_common.tfwFile(outputPath) xmlFile = icebridge_common.xmlFile(tfwFile) if tfwFile in validFilesSet and os.path.exists(tfwFile) and \ xmlFile in validFilesSet and os.path.exists(xmlFile): #logger.info('Previously validated: ' + tfwFile + ' ' + xmlFile) continue else: isGood = icebridge_common.isValidTfw(tfwFile, logger) if not isGood: logger.info('Found invalid tfw. Will wipe: ' + tfwFile + ' ' + xmlFile) os.system('rm -f ' + tfwFile) # will not throw os.system('rm -f ' + xmlFile) # will not throw badFiles = True else: logger.info('Valid tfw file: ' + tfwFile) validFilesSet.add(tfwFile) validFilesSet.add(xmlFile) # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = \ icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) return (not badFiles)
def validateOrthosAndFireball(options, fileType, logger): '''Validate ortho and fireball files within the current frame range. This is expected to be in called in parallel for smaller chunks. Lidar files will be validated serially. Jpegs get validated when converted to tif. Return True if all is good.''' badFiles = False logger.info("Validating files of type: " + fileType) if fileType == 'ortho': dataFolder = icebridge_common.getOrthoFolder(options.outputFolder) elif fileType == 'fireball': dataFolder = icebridge_common.getFireballFolder(options.outputFolder) else: raise Exception("Unknown file type: " + fileType) indexPath = icebridge_common.csvIndexFile(dataFolder) if not os.path.exists(indexPath): # The issue of what to do when the index does not exist should # have been settled by now. return (not badFiles) # Fetch from disk the set of already validated files, if any validFilesList = icebridge_common.validFilesList(options.outputFolder, options.startFrame, options.stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk( validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) (frameDict, urlDict) = icebridge_common.readIndexFile(indexPath, prependFolder=True) for frame in frameDict.keys(): if frame < options.startFrame or frame > options.stopFrame: continue outputPath = frameDict[frame] xmlFile = icebridge_common.xmlFile(outputPath) if outputPath in validFilesSet and os.path.exists(outputPath) and \ xmlFile in validFilesSet and os.path.exists(xmlFile): #logger.info('Previously validated: ' + outputPath + ' ' + xmlFile) continue else: isGood = icebridge_common.hasValidChkSum(outputPath, logger) if not isGood: logger.info('Found invalid data. Will wipe: ' + outputPath + ' ' + xmlFile) os.system('rm -f ' + outputPath) # will not throw os.system('rm -f ' + xmlFile) # will not throw badFiles = True else: logger.info('Valid file: ' + outputPath) validFilesSet.add(outputPath) validFilesSet.add(xmlFile) if fileType != 'fireball': continue # Also validate tfw tfwFile = icebridge_common.tfwFile(outputPath) xmlFile = icebridge_common.xmlFile(tfwFile) if tfwFile in validFilesSet and os.path.exists(tfwFile) and \ xmlFile in validFilesSet and os.path.exists(xmlFile): #logger.info('Previously validated: ' + tfwFile + ' ' + xmlFile) continue else: isGood = icebridge_common.isValidTfw(tfwFile, logger) if not isGood: logger.info('Found invalid tfw. Will wipe: ' + tfwFile + ' ' + xmlFile) os.system('rm -f ' + tfwFile) # will not throw os.system('rm -f ' + xmlFile) # will not throw badFiles = True else: logger.info('Valid tfw file: ' + tfwFile) validFilesSet.add(tfwFile) validFilesSet.add(xmlFile) # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = \ icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) return (not badFiles)
def doFetch(options, outputFolder): '''The main fetch function. Returns the number of failures.''' # Verify that required files exist home = os.path.expanduser("~") if not (os.path.exists(home+'/.netrc') and os.path.exists(home+'/.urs_cookies')): logger.error('Missing a required authentication file! See instructions here:\n' + ' https://nsidc.org/support/faq/what-options-are-available-bulk-' + 'downloading-data-https-earthdata-login-enabled') return -1 curlPath = asp_system_utils.which("curl") curlOpts = ' -n -L ' cookiePaths = ' -b ~/.urs_cookies -c ~/.urs_cookies ' baseCurlCmd = curlPath + curlOpts + cookiePaths logger.info('Creating output folder: ' + outputFolder) os.system('mkdir -p ' + outputFolder) isSouth = (options.site == 'AN') if options.type == 'nav': # Nav fetching is much less complicated return fetchNavData(options, outputFolder) parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder) if not icebridge_common.fileNonEmpty(parsedIndexPath): # Some dirs are weird, both images, fireball dems, and ortho. # Just accept whatever there is, but with a warning. logger.info('Warning: Missing index file: ' + parsedIndexPath) # Store file information in a dictionary # - Keep track of the earliest and latest frame logger.info('Reading file list from ' + parsedIndexPath) try: (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath) except: # We probably ran into old format index file. Must refetch. logger.info('Could not read index file. Try again.') options.refetchIndex = True parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder) (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath) if options.stopAfterIndexFetch: return 0 isLidar = (options.type in LIDAR_TYPES) allFrames = sorted(frameDict.keys()) if not isLidar: # The lidar frames use a totally different numbering than the image/ortho/dem frames firstFrame = icebridge_common.getLargestFrame() # start big lastFrame = icebridge_common.getSmallestFrame() # start small for frameNumber in allFrames: if frameNumber < firstFrame: firstFrame = frameNumber if frameNumber > lastFrame: lastFrame = frameNumber if options.allFrames: options.startFrame = firstFrame options.stopFrame = lastFrame if isLidar: # Based on image frames, determine which lidar frames to fetch. if options.ignoreMissingLidar and len(frameDict.keys()) == 0: # Nothing we can do if this run has no lidar and we are told to continue logger.info("Warning: missing lidar, but continuing.") lidarsToFetch = set() else: lidarsToFetch = lidarFilesInRange(frameDict, outputFolder, options.startFrame, options.stopFrame) # There is always a chance that not all requested frames are available. # That is particularly true for Fireball DEMs. Instead of failing, # just download what is present and give a warning. if options.startFrame not in frameDict and not isLidar: logger.info("Warning: Frame " + str(options.startFrame) + " is not found in this flight.") if options.stopFrame and (options.stopFrame not in frameDict) and not isLidar: logger.info("Warning: Frame " + str(options.stopFrame) + " is not found in this flight.") allFilesToFetch = [] # Files that we will fetch, relative to the current dir. allUrlsToFetch = [] # Full url of each file. # Loop through all found frames within the provided range currentFileCount = 0 lastFrame = "" if len(allFrames) > 0: lastFrame = allFrames[len(allFrames)-1] hasTfw = (options.type == 'fireball') hasXml = ( isLidar or (options.type == 'ortho') or hasTfw ) numFetched = 0 skipCount = 0 for frame in allFrames: # Skip frame outside of range if isLidar: if frameDict[frame] not in lidarsToFetch: continue else: if ((frame < options.startFrame) or (frame > options.stopFrame) ): continue # Handle the frame skip option if options.frameSkip > 0: if skipCount < options.frameSkip: skipCount += 1 continue skipCount = 0 filename = frameDict[frame] # Some files have an associated xml file. Fireball DEMs also have a tfw file. currFilesToFetch = [filename] if hasXml: currFilesToFetch.append(icebridge_common.xmlFile(filename)) if hasTfw: currFilesToFetch.append(icebridge_common.tfwFile(filename)) for filename in currFilesToFetch: url = os.path.join(urlDict[frame], filename) outputPath = os.path.join(outputFolder, filename) allFilesToFetch.append(outputPath) allUrlsToFetch.append(url) # Restrict lidar fetch amount according to the parameter if (isLidar and options.maxNumLidarToFetch > 0 and len(allFilesToFetch) > options.maxNumLidarToFetch): # Ensure an even number, to fetch both the lidar file and its xml if options.maxNumLidarToFetch % 2 == 1: options.maxNumLidarToFetch += 1 allFilesToFetch = allFilesToFetch[0:options.maxNumLidarToFetch] allUrlsToFetch = allUrlsToFetch [0:options.maxNumLidarToFetch] icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL, options.dryRun, outputFolder, allFilesToFetch, allUrlsToFetch, logger) # Fetch from disk the set of already validated files, if any validFilesList = icebridge_common.validFilesList(os.path.dirname(outputFolder), options.startFrame, options.stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) # Verify that all files were fetched and are in good shape failedFiles = [] for outputPath in allFilesToFetch: if options.skipValidate: continue if not icebridge_common.fileNonEmpty(outputPath): logger.info('Missing file: ' + outputPath) failedFiles.append(outputPath) continue if icebridge_common.hasImageExtension(outputPath): if False: # This check is just so slow. Turn it off for now. # This will impact only the validation of jpegs, # as the other files can be validated via the checksum. # Jpegs will be validated when converting them to 1 band images if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # verbose continue else: if not icebridge_common.isValidImage(outputPath): logger.info('Found an invalid image. Will wipe it: ' + outputPath) if os.path.exists(outputPath): os.remove(outputPath) failedFiles.append(outputPath) continue else: logger.info('Valid image: ' + outputPath) validFilesSet.add(outputPath) # mark it as validated # Sanity check: XML files must have the right latitude. if icebridge_common.fileExtension(outputPath) == '.xml': if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) #verbose continue else: if os.path.exists(outputPath): try: latitude = icebridge_common.parseLatitude(outputPath) logger.info('Valid file: ' + outputPath) validFilesSet.add(outputPath) # mark it as validated except: # Corrupted file logger.info("Failed to parse latitude, will wipe: " + outputPath) if os.path.exists(outputPath): os.remove(outputPath) failedFiles.append(outputPath) # On a second thought, don't wipe files with wrong latitude, as # next time we run fetch we will have to fetch them again. # Hopefully they will be ignored. #isGood = hasGoodLat(latitude, isSouth) #if not isGood: # logger.info("Wiping XML file " + outputPath + " with bad latitude " + \ # str(latitude)) # os.remove(outputPath) # imageFile = icebridge_common.xmlToImage(outputPath) # if os.path.exists(imageFile): # logger.info("Wiping TIF file " + imageFile + " with bad latitude " + \ # str(latitude)) # os.remove(imageFile) # Verify the chcksum if hasXml and len(outputPath) >= 4 and outputPath[-4:] != '.xml' \ and outputPath[-4:] != '.tfw': if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # verbose continue else: isGood = icebridge_common.hasValidChkSum(outputPath, logger) if not isGood: xmlFile = icebridge_common.xmlFile(outputPath) logger.info('Found invalid data. Will wipe: ' + outputPath + ' ' + xmlFile) if os.path.exists(outputPath): os.remove(outputPath) if os.path.exists(xmlFile): os.remove(xmlFile) failedFiles.append(outputPath) failedFiles.append(xmlFile) continue else: logger.info('Valid file: ' + outputPath) validFilesSet.add(outputPath) if hasTfw and icebridge_common.fileExtension(outputPath) == '.tfw': if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) continue else: isGood = icebridge_common.isValidTfw(outputPath, logger) if not isGood: xmlFile = icebridge_common.xmlFile(outputPath) logger.info('Found invalid tfw. Will wipe: ' + outputPath + ' ' + xmlFile) if os.path.exists(outputPath): os.remove(outputPath) if os.path.exists(xmlFile): os.remove(xmlFile) failedFiles.append(outputPath) failedFiles.append(xmlFile) continue else: logger.info('Valid tfw file: ' + outputPath) validFilesSet.add(outputPath) # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = \ icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) numFailed = len(failedFiles) if numFailed > 0: logger.info("Number of files that could not be processed: " + str(numFailed)) return numFailed
def doFetch(options, outputFolder): '''The main fetch function. Returns the number of failures.''' # Verify that required files exist home = os.path.expanduser("~") if not (os.path.exists(home + '/.netrc') and os.path.exists(home + '/.urs_cookies')): logger.error( 'Missing a required authentication file! See instructions here:\n' + ' https://nsidc.org/support/faq/what-options-are-available-bulk-' + 'downloading-data-https-earthdata-login-enabled') return -1 curlPath = asp_system_utils.which("curl") curlOpts = ' -n -L ' cookiePaths = ' -b ~/.urs_cookies -c ~/.urs_cookies ' baseCurlCmd = curlPath + curlOpts + cookiePaths logger.info('Creating output folder: ' + outputFolder) os.system('mkdir -p ' + outputFolder) isSouth = (options.site == 'AN') if options.type == 'nav': # Nav fetching is much less complicated return fetchNavData(options, outputFolder) parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder) if not icebridge_common.fileNonEmpty(parsedIndexPath): # Some dirs are weird, both images, fireball dems, and ortho. # Just accept whatever there is, but with a warning. logger.info('Warning: Missing index file: ' + parsedIndexPath) # Store file information in a dictionary # - Keep track of the earliest and latest frame logger.info('Reading file list from ' + parsedIndexPath) try: (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath) except: # We probably ran into old format index file. Must refetch. logger.info('Could not read index file. Try again.') options.refetchIndex = True parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder) (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath) if options.stopAfterIndexFetch: return 0 isLidar = (options.type in LIDAR_TYPES) allFrames = sorted(frameDict.keys()) if not isLidar: # The lidar frames use a totally different numbering than the image/ortho/dem frames firstFrame = icebridge_common.getLargestFrame() # start big lastFrame = icebridge_common.getSmallestFrame() # start small for frameNumber in allFrames: if frameNumber < firstFrame: firstFrame = frameNumber if frameNumber > lastFrame: lastFrame = frameNumber if options.allFrames: options.startFrame = firstFrame options.stopFrame = lastFrame if isLidar: # Based on image frames, determine which lidar frames to fetch. if options.ignoreMissingLidar and len(frameDict.keys()) == 0: # Nothing we can do if this run has no lidar and we are told to continue logger.info("Warning: missing lidar, but continuing.") lidarsToFetch = set() else: lidarsToFetch = lidarFilesInRange(frameDict, outputFolder, options.startFrame, options.stopFrame) # There is always a chance that not all requested frames are available. # That is particularly true for Fireball DEMs. Instead of failing, # just download what is present and give a warning. if options.startFrame not in frameDict and not isLidar: logger.info("Warning: Frame " + str(options.startFrame) + " is not found in this flight.") if options.stopFrame and (options.stopFrame not in frameDict) and not isLidar: logger.info("Warning: Frame " + str(options.stopFrame) + " is not found in this flight.") allFilesToFetch = [ ] # Files that we will fetch, relative to the current dir. allUrlsToFetch = [] # Full url of each file. # Loop through all found frames within the provided range currentFileCount = 0 lastFrame = "" if len(allFrames) > 0: lastFrame = allFrames[len(allFrames) - 1] hasTfw = (options.type == 'fireball') hasXml = (isLidar or (options.type == 'ortho') or hasTfw) numFetched = 0 skipCount = 0 for frame in allFrames: # Skip frame outside of range if isLidar: if frameDict[frame] not in lidarsToFetch: continue else: if ((frame < options.startFrame) or (frame > options.stopFrame)): continue # Handle the frame skip option if options.frameSkip > 0: if skipCount < options.frameSkip: skipCount += 1 continue skipCount = 0 filename = frameDict[frame] # Some files have an associated xml file. Fireball DEMs also have a tfw file. currFilesToFetch = [filename] if hasXml: currFilesToFetch.append(icebridge_common.xmlFile(filename)) if hasTfw: currFilesToFetch.append(icebridge_common.tfwFile(filename)) for filename in currFilesToFetch: url = os.path.join(urlDict[frame], filename) outputPath = os.path.join(outputFolder, filename) allFilesToFetch.append(outputPath) allUrlsToFetch.append(url) # Restrict lidar fetch amount according to the parameter if (isLidar and options.maxNumLidarToFetch > 0 and len(allFilesToFetch) > options.maxNumLidarToFetch): # Ensure an even number, to fetch both the lidar file and its xml if options.maxNumLidarToFetch % 2 == 1: options.maxNumLidarToFetch += 1 allFilesToFetch = allFilesToFetch[0:options.maxNumLidarToFetch] allUrlsToFetch = allUrlsToFetch[0:options.maxNumLidarToFetch] icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL, options.dryRun, outputFolder, allFilesToFetch, allUrlsToFetch, logger) # Fetch from disk the set of already validated files, if any validFilesList = icebridge_common.validFilesList( os.path.dirname(outputFolder), options.startFrame, options.stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk( validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) # Verify that all files were fetched and are in good shape failedFiles = [] for outputPath in allFilesToFetch: if options.skipValidate: continue if not icebridge_common.fileNonEmpty(outputPath): logger.info('Missing file: ' + outputPath) failedFiles.append(outputPath) continue if icebridge_common.hasImageExtension(outputPath): if False: # This check is just so slow. Turn it off for now. # This will impact only the validation of jpegs, # as the other files can be validated via the checksum. # Jpegs will be validated when converting them to 1 band images if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # verbose continue else: if not icebridge_common.isValidImage(outputPath): logger.info('Found an invalid image. Will wipe it: ' + outputPath) if os.path.exists(outputPath): os.remove(outputPath) failedFiles.append(outputPath) continue else: logger.info('Valid image: ' + outputPath) validFilesSet.add(outputPath) # mark it as validated # Sanity check: XML files must have the right latitude. if icebridge_common.fileExtension(outputPath) == '.xml': if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) #verbose continue else: if os.path.exists(outputPath): try: latitude = icebridge_common.parseLatitude(outputPath) logger.info('Valid file: ' + outputPath) validFilesSet.add(outputPath) # mark it as validated except: # Corrupted file logger.info("Failed to parse latitude, will wipe: " + outputPath) if os.path.exists(outputPath): os.remove(outputPath) failedFiles.append(outputPath) # On a second thought, don't wipe files with wrong latitude, as # next time we run fetch we will have to fetch them again. # Hopefully they will be ignored. #isGood = hasGoodLat(latitude, isSouth) #if not isGood: # logger.info("Wiping XML file " + outputPath + " with bad latitude " + \ # str(latitude)) # os.remove(outputPath) # imageFile = icebridge_common.xmlToImage(outputPath) # if os.path.exists(imageFile): # logger.info("Wiping TIF file " + imageFile + " with bad latitude " + \ # str(latitude)) # os.remove(imageFile) # Verify the chcksum if hasXml and len(outputPath) >= 4 and outputPath[-4:] != '.xml' \ and outputPath[-4:] != '.tfw': if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # verbose continue else: isGood = icebridge_common.hasValidChkSum(outputPath, logger) if not isGood: xmlFile = icebridge_common.xmlFile(outputPath) logger.info('Found invalid data. Will wipe: ' + outputPath + ' ' + xmlFile) if os.path.exists(outputPath): os.remove(outputPath) if os.path.exists(xmlFile): os.remove(xmlFile) failedFiles.append(outputPath) failedFiles.append(xmlFile) continue else: logger.info('Valid file: ' + outputPath) validFilesSet.add(outputPath) if hasTfw and icebridge_common.fileExtension(outputPath) == '.tfw': if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) continue else: isGood = icebridge_common.isValidTfw(outputPath, logger) if not isGood: xmlFile = icebridge_common.xmlFile(outputPath) logger.info('Found invalid tfw. Will wipe: ' + outputPath + ' ' + xmlFile) if os.path.exists(outputPath): os.remove(outputPath) if os.path.exists(xmlFile): os.remove(xmlFile) failedFiles.append(outputPath) failedFiles.append(xmlFile) continue else: logger.info('Valid tfw file: ' + outputPath) validFilesSet.add(outputPath) # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = \ icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) numFailed = len(failedFiles) if numFailed > 0: logger.info("Number of files that could not be processed: " + str(numFailed)) return numFailed
def doFetch(options, outputFolder): # Verify that required files exist home = os.path.expanduser("~") if not (os.path.exists(home + '/.netrc') and os.path.exists(home + '/.urs_cookies')): logger.error( 'Missing a required authentication file! See instructions here:\n' + ' https://nsidc.org/support/faq/what-options-are-available-bulk-downloading-data-https-earthdata-login-enabled' ) return -1 curlPath = asp_system_utils.which("curl") curlOpts = ' -n -L ' cookiePaths = ' -b ~/.urs_cookies -c ~/.urs_cookies ' baseCurlCmd = curlPath + curlOpts + cookiePaths logger.info('Creating output folder: ' + outputFolder) os.system('mkdir -p ' + outputFolder) isSouth = (options.site == 'AN') parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder) if not icebridge_common.fileNonEmpty(parsedIndexPath): # Some dirs are weird, both images, dems, and ortho. # Just accept whatever there is, but with a warning. logger.info('Warning: Missing index file: ' + parsedIndexPath) # Store file information in a dictionary # - Keep track of the earliest and latest frame logger.info('Reading file list from ' + parsedIndexPath) try: (frameDict, urlDict) = readIndexFile(parsedIndexPath) except: # We probably ran into old format index file. Must refetch. logger.info('Could not read index file. Try again.') options.refetchIndex = True parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder) (frameDict, urlDict) = readIndexFile(parsedIndexPath) allFrames = sorted(frameDict.keys()) firstFrame = icebridge_common.getLargestFrame() # start big lastFrame = icebridge_common.getSmallestFrame() # start small for frameNumber in allFrames: if frameNumber < firstFrame: firstFrame = frameNumber if frameNumber > lastFrame: lastFrame = frameNumber if options.allFrames: options.startFrame = firstFrame options.stopFrame = lastFrame # There is always a chance that not all requested frames are available. # That is particularly true for Fireball DEMs. Instead of failing, # just download what is present and give a warning. if options.startFrame not in frameDict: logger.info("Warning: Frame " + str(options.startFrame) + \ " is not found in this flight.") if options.stopFrame and (options.stopFrame not in frameDict): logger.info("Warning: Frame " + str(options.stopFrame) + \ " is not found in this flight.") allFilesToFetch = [ ] # Files that we will fetch, relative to the current dir. allUrlsToFetch = [] # Full url of each file. # Loop through all found frames within the provided range currentFileCount = 0 lastFrame = "" if len(allFrames) > 0: lastFrame = allFrames[len(allFrames) - 1] hasTfw = (options.type == 'dem') hasXml = ((options.type in LIDAR_TYPES) or (options.type == 'ortho') or hasTfw) numFetched = 0 for frame in allFrames: if (frame >= options.startFrame) and (frame <= options.stopFrame): filename = frameDict[frame] # Some files have an associated xml file. DEMs also have a tfw file. currFilesToFetch = [filename] if hasXml: currFilesToFetch.append(icebridge_common.xmlFile(filename)) if hasTfw: currFilesToFetch.append(icebridge_common.tfwFile(filename)) for filename in currFilesToFetch: url = os.path.join(urlDict[frame], filename) outputPath = os.path.join(outputFolder, filename) allFilesToFetch.append(outputPath) allUrlsToFetch.append(url) if options.maxNumToFetch > 0 and len( allFilesToFetch) > options.maxNumToFetch: allFilesToFetch = allFilesToFetch[0:options.maxNumToFetch] allUrlsToFetch = allUrlsToFetch[0:options.maxNumToFetch] icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL, options.dryRun, outputFolder, allFilesToFetch, allUrlsToFetch, logger) # Verify that all files were fetched and are in good shape failedFiles = [] for outputPath in allFilesToFetch: if options.skipValidate: continue if not icebridge_common.fileNonEmpty(outputPath): logger.info('Missing file: ' + outputPath) failedFiles.append(outputPath) continue if icebridge_common.hasImageExtension(outputPath): if not icebridge_common.isValidImage(outputPath): logger.info('Found an invalid image. Will wipe it: ' + outputPath) if os.path.exists(outputPath): os.remove(outputPath) failedFiles.append(outputPath) continue else: logger.info('Valid image: ' + outputPath) # Sanity check: XML files must have the right latitude. if icebridge_common.fileExtension(outputPath) == '.xml': if os.path.exists(outputPath): latitude = icebridge_common.parseLatitude(outputPath) isGood = hasGoodLat(latitude, isSouth) if not isGood: logger.info("Wiping XML file " + outputPath + " with bad latitude " + \ str(latitude)) os.remove(outputPath) imageFile = icebridge_common.xmlToImage(outputPath) if os.path.exists(imageFile): logger.info("Wiping TIF file " + imageFile + " with bad latitude " + \ str(latitude)) os.remove(imageFile) # Verify the chcksum if hasXml and len(outputPath) >= 4 and outputPath[-4:] != '.xml' \ and outputPath[-4:] != '.tfw': isGood = icebridge_common.hasValidChkSum(outputPath) if not isGood: xmlFile = icebridge_common.xmlFile(outputPath) logger.info('Found invalid data. Will wipe it: ' + outputPath + ' ' + xmlFile) if os.path.exists(outputPath): os.remove(outputPath) if os.path.exists(xmlFile): os.remove(xmlFile) failedFiles.append(outputPath) failedFiles.append(xmlFile) continue else: logger.info('Valid chksum: ' + outputPath) if hasTfw and icebridge_common.fileExtension(outputPath) == '.tfw': isGood = icebridge_common.isValidTfw(outputPath) if not isGood: xmlFile = icebridge_common.xmlFile(outputPath) logger.info('Found invalid data. Will wipe it: ' + outputPath + ' ' + xmlFile) if os.path.exists(outputPath): os.remove(outputPath) if os.path.exists(xmlFile): os.remove(xmlFile) failedFiles.append(outputPath) failedFiles.append(xmlFile) continue else: logger.info('Valid tfw file: ' + outputPath) numFailed = len(failedFiles) if numFailed > 0: logger.info("Number of files that could not be processed: " + str(numFailed)) return numFailed