def validateOrthosAndFireball(options, fileType, logger): '''Validate ortho and fireball files within the current frame range. This is expected to be in called in parallel for smaller chunks. Lidar files will be validated serially. Jpegs get validated when converted to tif. Return True if all is good.''' badFiles = False logger.info("Validating files of type: " + fileType) if fileType == 'ortho': dataFolder = icebridge_common.getOrthoFolder(options.outputFolder) elif fileType == 'fireball': dataFolder = icebridge_common.getFireballFolder(options.outputFolder) else: raise Exception("Unknown file type: " + fileType) indexPath = icebridge_common.csvIndexFile(dataFolder) if not os.path.exists(indexPath): # The issue of what to do when the index does not exist should # have been settled by now. return (not badFiles) # Fetch from disk the set of already validated files, if any validFilesList = icebridge_common.validFilesList(options.outputFolder, options.startFrame, options.stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) (frameDict, urlDict) = icebridge_common.readIndexFile(indexPath, prependFolder = True) for frame in frameDict.keys(): if frame < options.startFrame or frame > options.stopFrame: continue outputPath = frameDict[frame] xmlFile = icebridge_common.xmlFile(outputPath) if outputPath in validFilesSet and os.path.exists(outputPath) and \ xmlFile in validFilesSet and os.path.exists(xmlFile): #logger.info('Previously validated: ' + outputPath + ' ' + xmlFile) continue else: isGood = icebridge_common.hasValidChkSum(outputPath, logger) if not isGood: logger.info('Found invalid data. Will wipe: ' + outputPath + ' ' + xmlFile) os.system('rm -f ' + outputPath) # will not throw os.system('rm -f ' + xmlFile) # will not throw badFiles = True else: logger.info('Valid file: ' + outputPath) validFilesSet.add(outputPath) validFilesSet.add(xmlFile) if fileType != 'fireball': continue # Also validate tfw tfwFile = icebridge_common.tfwFile(outputPath) xmlFile = icebridge_common.xmlFile(tfwFile) if tfwFile in validFilesSet and os.path.exists(tfwFile) and \ xmlFile in validFilesSet and os.path.exists(xmlFile): #logger.info('Previously validated: ' + tfwFile + ' ' + xmlFile) continue else: isGood = icebridge_common.isValidTfw(tfwFile, logger) if not isGood: logger.info('Found invalid tfw. Will wipe: ' + tfwFile + ' ' + xmlFile) os.system('rm -f ' + tfwFile) # will not throw os.system('rm -f ' + xmlFile) # will not throw badFiles = True else: logger.info('Valid tfw file: ' + tfwFile) validFilesSet.add(tfwFile) validFilesSet.add(xmlFile) # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = \ icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) return (not badFiles)
def doFetch(options, outputFolder): '''The main fetch function. Returns the number of failures.''' # Verify that required files exist home = os.path.expanduser("~") if not (os.path.exists(home + '/.netrc') and os.path.exists(home + '/.urs_cookies')): logger.error( 'Missing a required authentication file! See instructions here:\n' + ' https://nsidc.org/support/faq/what-options-are-available-bulk-' + 'downloading-data-https-earthdata-login-enabled') return -1 curlPath = asp_system_utils.which("curl") curlOpts = ' -n -L ' cookiePaths = ' -b ~/.urs_cookies -c ~/.urs_cookies ' baseCurlCmd = curlPath + curlOpts + cookiePaths logger.info('Creating output folder: ' + outputFolder) os.system('mkdir -p ' + outputFolder) isSouth = (options.site == 'AN') if options.type == 'nav': # Nav fetching is much less complicated return fetchNavData(options, outputFolder) parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder) if not icebridge_common.fileNonEmpty(parsedIndexPath): # Some dirs are weird, both images, fireball dems, and ortho. # Just accept whatever there is, but with a warning. logger.info('Warning: Missing index file: ' + parsedIndexPath) # Store file information in a dictionary # - Keep track of the earliest and latest frame logger.info('Reading file list from ' + parsedIndexPath) try: (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath) except: # We probably ran into old format index file. Must refetch. logger.info('Could not read index file. Try again.') options.refetchIndex = True parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder) (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath) if options.stopAfterIndexFetch: return 0 isLidar = (options.type in LIDAR_TYPES) allFrames = sorted(frameDict.keys()) if not isLidar: # The lidar frames use a totally different numbering than the image/ortho/dem frames firstFrame = icebridge_common.getLargestFrame() # start big lastFrame = icebridge_common.getSmallestFrame() # start small for frameNumber in allFrames: if frameNumber < firstFrame: firstFrame = frameNumber if frameNumber > lastFrame: lastFrame = frameNumber if options.allFrames: options.startFrame = firstFrame options.stopFrame = lastFrame if isLidar: # Based on image frames, determine which lidar frames to fetch. if options.ignoreMissingLidar and len(frameDict.keys()) == 0: # Nothing we can do if this run has no lidar and we are told to continue logger.info("Warning: missing lidar, but continuing.") lidarsToFetch = set() else: lidarsToFetch = lidarFilesInRange(frameDict, outputFolder, options.startFrame, options.stopFrame) # There is always a chance that not all requested frames are available. # That is particularly true for Fireball DEMs. Instead of failing, # just download what is present and give a warning. if options.startFrame not in frameDict and not isLidar: logger.info("Warning: Frame " + str(options.startFrame) + " is not found in this flight.") if options.stopFrame and (options.stopFrame not in frameDict) and not isLidar: logger.info("Warning: Frame " + str(options.stopFrame) + " is not found in this flight.") allFilesToFetch = [ ] # Files that we will fetch, relative to the current dir. allUrlsToFetch = [] # Full url of each file. # Loop through all found frames within the provided range currentFileCount = 0 lastFrame = "" if len(allFrames) > 0: lastFrame = allFrames[len(allFrames) - 1] hasTfw = (options.type == 'fireball') hasXml = (isLidar or (options.type == 'ortho') or hasTfw) numFetched = 0 skipCount = 0 for frame in allFrames: # Skip frame outside of range if isLidar: if frameDict[frame] not in lidarsToFetch: continue else: if ((frame < options.startFrame) or (frame > options.stopFrame)): continue # Handle the frame skip option if options.frameSkip > 0: if skipCount < options.frameSkip: skipCount += 1 continue skipCount = 0 filename = frameDict[frame] # Some files have an associated xml file. Fireball DEMs also have a tfw file. currFilesToFetch = [filename] if hasXml: currFilesToFetch.append(icebridge_common.xmlFile(filename)) if hasTfw: currFilesToFetch.append(icebridge_common.tfwFile(filename)) for filename in currFilesToFetch: url = os.path.join(urlDict[frame], filename) outputPath = os.path.join(outputFolder, filename) allFilesToFetch.append(outputPath) allUrlsToFetch.append(url) # Restrict lidar fetch amount according to the parameter if (isLidar and options.maxNumLidarToFetch > 0 and len(allFilesToFetch) > options.maxNumLidarToFetch): # Ensure an even number, to fetch both the lidar file and its xml if options.maxNumLidarToFetch % 2 == 1: options.maxNumLidarToFetch += 1 allFilesToFetch = allFilesToFetch[0:options.maxNumLidarToFetch] allUrlsToFetch = allUrlsToFetch[0:options.maxNumLidarToFetch] icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL, options.dryRun, outputFolder, allFilesToFetch, allUrlsToFetch, logger) # Fetch from disk the set of already validated files, if any validFilesList = icebridge_common.validFilesList( os.path.dirname(outputFolder), options.startFrame, options.stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk( validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) # Verify that all files were fetched and are in good shape failedFiles = [] for outputPath in allFilesToFetch: if options.skipValidate: continue if not icebridge_common.fileNonEmpty(outputPath): logger.info('Missing file: ' + outputPath) failedFiles.append(outputPath) continue if icebridge_common.hasImageExtension(outputPath): if False: # This check is just so slow. Turn it off for now. # This will impact only the validation of jpegs, # as the other files can be validated via the checksum. # Jpegs will be validated when converting them to 1 band images if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # verbose continue else: if not icebridge_common.isValidImage(outputPath): logger.info('Found an invalid image. Will wipe it: ' + outputPath) if os.path.exists(outputPath): os.remove(outputPath) failedFiles.append(outputPath) continue else: logger.info('Valid image: ' + outputPath) validFilesSet.add(outputPath) # mark it as validated # Sanity check: XML files must have the right latitude. if icebridge_common.fileExtension(outputPath) == '.xml': if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) #verbose continue else: if os.path.exists(outputPath): try: latitude = icebridge_common.parseLatitude(outputPath) logger.info('Valid file: ' + outputPath) validFilesSet.add(outputPath) # mark it as validated except: # Corrupted file logger.info("Failed to parse latitude, will wipe: " + outputPath) if os.path.exists(outputPath): os.remove(outputPath) failedFiles.append(outputPath) # On a second thought, don't wipe files with wrong latitude, as # next time we run fetch we will have to fetch them again. # Hopefully they will be ignored. #isGood = hasGoodLat(latitude, isSouth) #if not isGood: # logger.info("Wiping XML file " + outputPath + " with bad latitude " + \ # str(latitude)) # os.remove(outputPath) # imageFile = icebridge_common.xmlToImage(outputPath) # if os.path.exists(imageFile): # logger.info("Wiping TIF file " + imageFile + " with bad latitude " + \ # str(latitude)) # os.remove(imageFile) # Verify the chcksum if hasXml and len(outputPath) >= 4 and outputPath[-4:] != '.xml' \ and outputPath[-4:] != '.tfw': if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # verbose continue else: isGood = icebridge_common.hasValidChkSum(outputPath, logger) if not isGood: xmlFile = icebridge_common.xmlFile(outputPath) logger.info('Found invalid data. Will wipe: ' + outputPath + ' ' + xmlFile) if os.path.exists(outputPath): os.remove(outputPath) if os.path.exists(xmlFile): os.remove(xmlFile) failedFiles.append(outputPath) failedFiles.append(xmlFile) continue else: logger.info('Valid file: ' + outputPath) validFilesSet.add(outputPath) if hasTfw and icebridge_common.fileExtension(outputPath) == '.tfw': if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) continue else: isGood = icebridge_common.isValidTfw(outputPath, logger) if not isGood: xmlFile = icebridge_common.xmlFile(outputPath) logger.info('Found invalid tfw. Will wipe: ' + outputPath + ' ' + xmlFile) if os.path.exists(outputPath): os.remove(outputPath) if os.path.exists(xmlFile): os.remove(xmlFile) failedFiles.append(outputPath) failedFiles.append(xmlFile) continue else: logger.info('Valid tfw file: ' + outputPath) validFilesSet.add(outputPath) # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = \ icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) numFailed = len(failedFiles) if numFailed > 0: logger.info("Number of files that could not be processed: " + str(numFailed)) return numFailed
def validateOrthosAndFireball(options, fileType, logger): '''Validate ortho and fireball files within the current frame range. This is expected to be in called in parallel for smaller chunks. Lidar files will be validated serially. Jpegs get validated when converted to tif. Return True if all is good.''' badFiles = False logger.info("Validating files of type: " + fileType) if fileType == 'ortho': dataFolder = icebridge_common.getOrthoFolder(options.outputFolder) elif fileType == 'fireball': dataFolder = icebridge_common.getFireballFolder(options.outputFolder) else: raise Exception("Unknown file type: " + fileType) indexPath = icebridge_common.csvIndexFile(dataFolder) if not os.path.exists(indexPath): # The issue of what to do when the index does not exist should # have been settled by now. return (not badFiles) # Fetch from disk the set of already validated files, if any validFilesList = icebridge_common.validFilesList(options.outputFolder, options.startFrame, options.stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk( validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) (frameDict, urlDict) = icebridge_common.readIndexFile(indexPath, prependFolder=True) for frame in frameDict.keys(): if frame < options.startFrame or frame > options.stopFrame: continue outputPath = frameDict[frame] xmlFile = icebridge_common.xmlFile(outputPath) if outputPath in validFilesSet and os.path.exists(outputPath) and \ xmlFile in validFilesSet and os.path.exists(xmlFile): #logger.info('Previously validated: ' + outputPath + ' ' + xmlFile) continue else: isGood = icebridge_common.hasValidChkSum(outputPath, logger) if not isGood: logger.info('Found invalid data. Will wipe: ' + outputPath + ' ' + xmlFile) os.system('rm -f ' + outputPath) # will not throw os.system('rm -f ' + xmlFile) # will not throw badFiles = True else: logger.info('Valid file: ' + outputPath) validFilesSet.add(outputPath) validFilesSet.add(xmlFile) if fileType != 'fireball': continue # Also validate tfw tfwFile = icebridge_common.tfwFile(outputPath) xmlFile = icebridge_common.xmlFile(tfwFile) if tfwFile in validFilesSet and os.path.exists(tfwFile) and \ xmlFile in validFilesSet and os.path.exists(xmlFile): #logger.info('Previously validated: ' + tfwFile + ' ' + xmlFile) continue else: isGood = icebridge_common.isValidTfw(tfwFile, logger) if not isGood: logger.info('Found invalid tfw. Will wipe: ' + tfwFile + ' ' + xmlFile) os.system('rm -f ' + tfwFile) # will not throw os.system('rm -f ' + xmlFile) # will not throw badFiles = True else: logger.info('Valid tfw file: ' + tfwFile) validFilesSet.add(tfwFile) validFilesSet.add(xmlFile) # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = \ icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) return (not badFiles)
def correctFireballDems(fireballFolder, corrFireballFolder, startFrame, stopFrame, isNorth, skipValidate, logger): '''Fix the header problem in Fireball DEMs''' logger.info('Correcting Fireball DEMs ...') # Read the existing DEMs fireballIndexPath = icebridge_common.csvIndexFile(fireballFolder) if not os.path.exists(fireballIndexPath): raise Exception("Error: Missing fireball index file: " + fireballIndexPath + ".") (fireballFrameDict, fireballUrlDict) = \ icebridge_common.readIndexFile(fireballIndexPath, prependFolder = True) if not skipValidate: validFilesList = icebridge_common.validFilesList( os.path.dirname(fireballFolder), startFrame, stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk( validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) # Loop through all the input images os.system('mkdir -p ' + corrFireballFolder) badFiles = False for frame in sorted(fireballFrameDict.keys()): # Skip if outside the frame range if not ((frame >= startFrame) and (frame <= stopFrame)): continue inputPath = fireballFrameDict[frame] if not icebridge_common.isDEM(inputPath): continue outputPath = os.path.join(corrFireballFolder, os.path.basename(inputPath)) # Skip existing valid files if skipValidate: if os.path.exists(outputPath): logger.info("File exists, skipping: " + outputPath) continue else: if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # very vebose continue if icebridge_common.isValidImage(outputPath): #logger.info("File exists and is valid, skipping: " + outputPath) validFilesSet.add(outputPath) # mark it as validated continue # Run the correction script execPath = asp_system_utils.which('correct_icebridge_l3_dem') cmd = (('%s %s %s %d') % (execPath, inputPath, outputPath, isNorth)) logger.info(cmd) # TODO: Run this as a subprocess and check the return code os.system(cmd) # Check if the output file is good if not icebridge_common.isValidImage(outputPath): logger.error('Failed to convert dem file, wiping: ' + inputPath + ' ' + outputPath) os.system('rm -f ' + inputPath) # will not throw os.system('rm -f ' + outputPath) # will not throw badFiles = True else: if not skipValidate: validFilesSet.add(outputPath) # mark it as validated if not skipValidate: # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = icebridge_common.updateValidFilesListFromDisk( validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) return (not badFiles)
def convertLidarDataToCsv(lidarFolder, startFrame, stopFrame, skipValidate, logger): '''Make sure all lidar data is available in a readable text format. Returns false if any files failed to convert.''' logger.info('Converting LIDAR files...') lidarIndexPath = icebridge_common.csvIndexFile(lidarFolder) (frameDict, urlDict) = icebridge_common.readIndexFile(lidarIndexPath) if not skipValidate: validFilesList = icebridge_common.validFilesList( os.path.dirname(lidarFolder), startFrame, stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk( validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) convDict = {} # Loop through all files in the folder badFiles = False for frame in sorted(frameDict.keys()): f = frameDict[frame] extension = icebridge_common.fileExtension(f) # Only interested in a few file types if (extension != '.qi') and (extension != '.hdf5') and (extension != '.h5'): convDict[frame] = f # these are already in plain text continue convDict[frame] = os.path.splitext(f)[0] + '.csv' outputPath = os.path.join(lidarFolder, convDict[frame]) # Handle paths fullPath = os.path.join(lidarFolder, f) if not os.path.exists(fullPath): logger.info("Cannot convert missing file: " + fullPath) continue # If the input is invalid, wipe both it, its xml, and the output # Hopefully there will be a subsquent fetch step where it will get # refetched. if not icebridge_common.hasValidChkSum(fullPath, logger): logger.info("Will wipe invalid file: " + fullPath) xmlFile = icebridge_common.xmlFile(fullPath) os.system('rm -f ' + fullPath) # will not throw os.system('rm -f ' + xmlFile) # will not throw os.system('rm -f ' + outputPath) # will not throw badFiles = True continue # Skip existing valid files if skipValidate: if os.path.exists(outputPath): logger.info("File exists, skipping: " + outputPath) continue else: if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # verbose continue if icebridge_common.isValidLidarCSV(outputPath): #logger.info("File exists and is valid, skipping: " + outputPath) continue # Call the conversion logger.info("Process " + fullPath) extract_icebridge_ATM_points.main([fullPath]) # Check the result if not icebridge_common.isValidLidarCSV(outputPath): logger.error('Failed to parse LIDAR file, will wipe: ' + outputPath) os.system('rm -f ' + outputPath) # will not throw badFiles = True else: if not skipValidate: validFilesSet.add(outputPath) # mark it as validated convLidarFile = icebridge_common.getConvertedLidarIndexFile(lidarFolder) willWriteConvFile = False if not os.path.exists(convLidarFile): willWriteConvFile = True else: # Bugfix: Sometimes the written converted file has the wrong size, maybe # something got interrupted earlier. (lidarDictIn, dummyUrlDict) = icebridge_common.readIndexFile(convLidarFile) if lidarDictIn != convDict: willWriteConvFile = True if willWriteConvFile: logger.info("Writing: " + convLidarFile) icebridge_common.writeIndexFile(convLidarFile, convDict, {}) if not skipValidate: # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = icebridge_common.updateValidFilesListFromDisk( validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) return (not badFiles)
badFiles = True logger.error("Wiping bad files: " + inputPath + " and " + outputPath + '\n' + output) os.system('rm -f ' + inputPath) # will not throw os.system('rm -f ' + outputPath) # will not throw if not skipValidate: # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = icebridge_common.updateValidFilesListFromDisk( validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) if badFiles: logger.error( "Converstion of JPEGs failed. If any files were corrupted, " + "they were removed, and need to be re-fetched.") return (not badFiles) def correctFireballDems(fireballFolder, corrFireballFolder, startFrame, stopFrame, isNorth, skipValidate, logger): '''Fix the header problem in Fireball DEMs''' logger.info('Correcting Fireball DEMs ...')
def doFetch(options, outputFolder): '''The main fetch function. Returns the number of failures.''' # Verify that required files exist home = os.path.expanduser("~") if not (os.path.exists(home+'/.netrc') and os.path.exists(home+'/.urs_cookies')): logger.error('Missing a required authentication file! See instructions here:\n' + ' https://nsidc.org/support/faq/what-options-are-available-bulk-' + 'downloading-data-https-earthdata-login-enabled') return -1 curlPath = asp_system_utils.which("curl") curlOpts = ' -n -L ' cookiePaths = ' -b ~/.urs_cookies -c ~/.urs_cookies ' baseCurlCmd = curlPath + curlOpts + cookiePaths logger.info('Creating output folder: ' + outputFolder) os.system('mkdir -p ' + outputFolder) isSouth = (options.site == 'AN') if options.type == 'nav': # Nav fetching is much less complicated return fetchNavData(options, outputFolder) parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder) if not icebridge_common.fileNonEmpty(parsedIndexPath): # Some dirs are weird, both images, fireball dems, and ortho. # Just accept whatever there is, but with a warning. logger.info('Warning: Missing index file: ' + parsedIndexPath) # Store file information in a dictionary # - Keep track of the earliest and latest frame logger.info('Reading file list from ' + parsedIndexPath) try: (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath) except: # We probably ran into old format index file. Must refetch. logger.info('Could not read index file. Try again.') options.refetchIndex = True parsedIndexPath = fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder) (frameDict, urlDict) = icebridge_common.readIndexFile(parsedIndexPath) if options.stopAfterIndexFetch: return 0 isLidar = (options.type in LIDAR_TYPES) allFrames = sorted(frameDict.keys()) if not isLidar: # The lidar frames use a totally different numbering than the image/ortho/dem frames firstFrame = icebridge_common.getLargestFrame() # start big lastFrame = icebridge_common.getSmallestFrame() # start small for frameNumber in allFrames: if frameNumber < firstFrame: firstFrame = frameNumber if frameNumber > lastFrame: lastFrame = frameNumber if options.allFrames: options.startFrame = firstFrame options.stopFrame = lastFrame if isLidar: # Based on image frames, determine which lidar frames to fetch. if options.ignoreMissingLidar and len(frameDict.keys()) == 0: # Nothing we can do if this run has no lidar and we are told to continue logger.info("Warning: missing lidar, but continuing.") lidarsToFetch = set() else: lidarsToFetch = lidarFilesInRange(frameDict, outputFolder, options.startFrame, options.stopFrame) # There is always a chance that not all requested frames are available. # That is particularly true for Fireball DEMs. Instead of failing, # just download what is present and give a warning. if options.startFrame not in frameDict and not isLidar: logger.info("Warning: Frame " + str(options.startFrame) + " is not found in this flight.") if options.stopFrame and (options.stopFrame not in frameDict) and not isLidar: logger.info("Warning: Frame " + str(options.stopFrame) + " is not found in this flight.") allFilesToFetch = [] # Files that we will fetch, relative to the current dir. allUrlsToFetch = [] # Full url of each file. # Loop through all found frames within the provided range currentFileCount = 0 lastFrame = "" if len(allFrames) > 0: lastFrame = allFrames[len(allFrames)-1] hasTfw = (options.type == 'fireball') hasXml = ( isLidar or (options.type == 'ortho') or hasTfw ) numFetched = 0 skipCount = 0 for frame in allFrames: # Skip frame outside of range if isLidar: if frameDict[frame] not in lidarsToFetch: continue else: if ((frame < options.startFrame) or (frame > options.stopFrame) ): continue # Handle the frame skip option if options.frameSkip > 0: if skipCount < options.frameSkip: skipCount += 1 continue skipCount = 0 filename = frameDict[frame] # Some files have an associated xml file. Fireball DEMs also have a tfw file. currFilesToFetch = [filename] if hasXml: currFilesToFetch.append(icebridge_common.xmlFile(filename)) if hasTfw: currFilesToFetch.append(icebridge_common.tfwFile(filename)) for filename in currFilesToFetch: url = os.path.join(urlDict[frame], filename) outputPath = os.path.join(outputFolder, filename) allFilesToFetch.append(outputPath) allUrlsToFetch.append(url) # Restrict lidar fetch amount according to the parameter if (isLidar and options.maxNumLidarToFetch > 0 and len(allFilesToFetch) > options.maxNumLidarToFetch): # Ensure an even number, to fetch both the lidar file and its xml if options.maxNumLidarToFetch % 2 == 1: options.maxNumLidarToFetch += 1 allFilesToFetch = allFilesToFetch[0:options.maxNumLidarToFetch] allUrlsToFetch = allUrlsToFetch [0:options.maxNumLidarToFetch] icebridge_common.fetchFilesInBatches(baseCurlCmd, MAX_IN_ONE_CALL, options.dryRun, outputFolder, allFilesToFetch, allUrlsToFetch, logger) # Fetch from disk the set of already validated files, if any validFilesList = icebridge_common.validFilesList(os.path.dirname(outputFolder), options.startFrame, options.stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) # Verify that all files were fetched and are in good shape failedFiles = [] for outputPath in allFilesToFetch: if options.skipValidate: continue if not icebridge_common.fileNonEmpty(outputPath): logger.info('Missing file: ' + outputPath) failedFiles.append(outputPath) continue if icebridge_common.hasImageExtension(outputPath): if False: # This check is just so slow. Turn it off for now. # This will impact only the validation of jpegs, # as the other files can be validated via the checksum. # Jpegs will be validated when converting them to 1 band images if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # verbose continue else: if not icebridge_common.isValidImage(outputPath): logger.info('Found an invalid image. Will wipe it: ' + outputPath) if os.path.exists(outputPath): os.remove(outputPath) failedFiles.append(outputPath) continue else: logger.info('Valid image: ' + outputPath) validFilesSet.add(outputPath) # mark it as validated # Sanity check: XML files must have the right latitude. if icebridge_common.fileExtension(outputPath) == '.xml': if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) #verbose continue else: if os.path.exists(outputPath): try: latitude = icebridge_common.parseLatitude(outputPath) logger.info('Valid file: ' + outputPath) validFilesSet.add(outputPath) # mark it as validated except: # Corrupted file logger.info("Failed to parse latitude, will wipe: " + outputPath) if os.path.exists(outputPath): os.remove(outputPath) failedFiles.append(outputPath) # On a second thought, don't wipe files with wrong latitude, as # next time we run fetch we will have to fetch them again. # Hopefully they will be ignored. #isGood = hasGoodLat(latitude, isSouth) #if not isGood: # logger.info("Wiping XML file " + outputPath + " with bad latitude " + \ # str(latitude)) # os.remove(outputPath) # imageFile = icebridge_common.xmlToImage(outputPath) # if os.path.exists(imageFile): # logger.info("Wiping TIF file " + imageFile + " with bad latitude " + \ # str(latitude)) # os.remove(imageFile) # Verify the chcksum if hasXml and len(outputPath) >= 4 and outputPath[-4:] != '.xml' \ and outputPath[-4:] != '.tfw': if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # verbose continue else: isGood = icebridge_common.hasValidChkSum(outputPath, logger) if not isGood: xmlFile = icebridge_common.xmlFile(outputPath) logger.info('Found invalid data. Will wipe: ' + outputPath + ' ' + xmlFile) if os.path.exists(outputPath): os.remove(outputPath) if os.path.exists(xmlFile): os.remove(xmlFile) failedFiles.append(outputPath) failedFiles.append(xmlFile) continue else: logger.info('Valid file: ' + outputPath) validFilesSet.add(outputPath) if hasTfw and icebridge_common.fileExtension(outputPath) == '.tfw': if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) continue else: isGood = icebridge_common.isValidTfw(outputPath, logger) if not isGood: xmlFile = icebridge_common.xmlFile(outputPath) logger.info('Found invalid tfw. Will wipe: ' + outputPath + ' ' + xmlFile) if os.path.exists(outputPath): os.remove(outputPath) if os.path.exists(xmlFile): os.remove(xmlFile) failedFiles.append(outputPath) failedFiles.append(xmlFile) continue else: logger.info('Valid tfw file: ' + outputPath) validFilesSet.add(outputPath) # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = \ icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) numFailed = len(failedFiles) if numFailed > 0: logger.info("Number of files that could not be processed: " + str(numFailed)) return numFailed
def convertLidarDataToCsv(lidarFolder, startFrame, stopFrame, skipValidate, logger): '''Make sure all lidar data is available in a readable text format. Returns false if any files failed to convert.''' logger.info('Converting LIDAR files...') lidarIndexPath = icebridge_common.csvIndexFile(lidarFolder) (frameDict, urlDict) = icebridge_common.readIndexFile(lidarIndexPath) if not skipValidate: validFilesList = icebridge_common.validFilesList(os.path.dirname(lidarFolder), startFrame, stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) convDict = {} # Loop through all files in the folder badFiles = False for frame in sorted(frameDict.keys()): f = frameDict[frame] extension = icebridge_common.fileExtension(f) # Only interested in a few file types if (extension != '.qi') and (extension != '.hdf5') and (extension != '.h5'): convDict[frame] = f # these are already in plain text continue convDict[frame] = os.path.splitext(f)[0] + '.csv' outputPath = os.path.join(lidarFolder, convDict[frame]) # Handle paths fullPath = os.path.join(lidarFolder, f) if not os.path.exists(fullPath): logger.info("Cannot convert missing file: " + fullPath) continue # If the input is invalid, wipe both it, its xml, and the output # Hopefully there will be a subsquent fetch step where it will get # refetched. if not icebridge_common.hasValidChkSum(fullPath, logger): logger.info("Will wipe invalid file: " + fullPath) xmlFile = icebridge_common.xmlFile(fullPath) os.system('rm -f ' + fullPath) # will not throw os.system('rm -f ' + xmlFile) # will not throw os.system('rm -f ' + outputPath) # will not throw badFiles = True continue # Skip existing valid files if skipValidate: if os.path.exists(outputPath): logger.info("File exists, skipping: " + outputPath) continue else: if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # verbose continue if icebridge_common.isValidLidarCSV(outputPath): #logger.info("File exists and is valid, skipping: " + outputPath) continue # Call the conversion logger.info("Process " + fullPath) extract_icebridge_ATM_points.main([fullPath]) # Check the result if not icebridge_common.isValidLidarCSV(outputPath): logger.error('Failed to parse LIDAR file, will wipe: ' + outputPath) os.system('rm -f ' + outputPath) # will not throw badFiles = True else: if not skipValidate: validFilesSet.add(outputPath) # mark it as validated convLidarFile = icebridge_common.getConvertedLidarIndexFile(lidarFolder) willWriteConvFile = False if not os.path.exists(convLidarFile): willWriteConvFile = True else: # Bugfix: Sometimes the written converted file has the wrong size, maybe # something got interrupted earlier. (lidarDictIn, dummyUrlDict) = icebridge_common.readIndexFile(convLidarFile) if lidarDictIn != convDict: willWriteConvFile = True if willWriteConvFile: logger.info("Writing: " + convLidarFile) icebridge_common.writeIndexFile(convLidarFile, convDict, {}) if not skipValidate: # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) return (not badFiles)
def convertJpegs(jpegFolder, imageFolder, startFrame, stopFrame, skipValidate, cameraMounting, logger): '''Convert jpeg images from RGB to single channel. Returns false if any files failed.''' badFiles = False logger.info('Converting input images to grayscale...') os.system('mkdir -p ' + imageFolder) # Loop through all the input images jpegIndexPath = icebridge_common.csvIndexFile(jpegFolder) if not os.path.exists(jpegIndexPath): raise Exception("Error: Missing jpeg index file: " + jpegIndexPath + ".") (jpegFrameDict, jpegUrlDict) = icebridge_common.readIndexFile(jpegIndexPath, prependFolder = True) # Need the orthos to get the timestamp orthoFolder = icebridge_common.getOrthoFolder(os.path.dirname(jpegFolder)) orthoIndexPath = icebridge_common.csvIndexFile(orthoFolder) if not os.path.exists(orthoIndexPath): raise Exception("Error: Missing ortho index file: " + orthoIndexPath + ".") (orthoFrameDict, orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath, prependFolder = True) if not skipValidate: validFilesList = icebridge_common.validFilesList(os.path.dirname(jpegFolder), startFrame, stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) # Fast check for missing images. This is fragile, as maybe it gets # the wrong file with a similar name, but an honest check is very slow. imageFiles = icebridge_common.getTifs(imageFolder, prependFolder = True) imageFrameDict = {} for imageFile in imageFiles: frame = icebridge_common.getFrameNumberFromFilename(imageFile) if frame < startFrame or frame > stopFrame: continue imageFrameDict[frame] = imageFile for frame in sorted(jpegFrameDict.keys()): inputPath = jpegFrameDict[frame] # Only deal with frames in range if not ( (frame >= startFrame) and (frame <= stopFrame) ): continue if frame in imageFrameDict.keys() and skipValidate: # Fast, hackish check continue if frame not in orthoFrameDict: logger.info("Error: Could not find ortho image for jpeg frame: " + str(frame)) # Don't want to throw here. Just ignore the missing ortho continue # Make sure the timestamp and frame number are in the output file name try: outputPath = icebridge_common.jpegToImageFile(inputPath, orthoFrameDict[frame]) except Exception as e: logger.info(str(e)) logger.info("Removing bad file: " + inputPath) os.system('rm -f ' + inputPath) # will not throw badFiles = True continue # Skip existing valid files if skipValidate: if os.path.exists(outputPath): logger.info("File exists, skipping: " + outputPath) continue else: if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # very verbose validFilesSet.add(inputPath) # Must have this continue if icebridge_common.isValidImage(outputPath): #logger.info("File exists and is valid, skipping: " + outputPath) # verbose if not skipValidate: # Mark both the input and the output as validated validFilesSet.add(inputPath) validFilesSet.add(outputPath) continue # Use ImageMagick tool to convert from RGB to grayscale # - Some image orientations are rotated to make stereo processing easier. rotateString = '' if cameraMounting == 2: # Flight direction towards top of image rotateString = '-rotate 90 ' if cameraMounting == 3: # Flight direction towards bottom of image rotateString = '-rotate -90 ' cmd = ('%s %s -colorspace Gray %s%s') % \ (asp_system_utils.which('convert'), inputPath, rotateString, outputPath) logger.info(cmd) # Run command and fetch its output p = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) output, error = p.communicate() if p.returncode != 0: badFiles = True logger.error("Command failed.") logger.error("Wiping bad files: " + inputPath + " and " + outputPath + '\n' + output) os.system('rm -f ' + inputPath) # will not throw os.system('rm -f ' + outputPath) # will not throw if not os.path.exists(outputPath): badFiles = True logger.error('Failed to convert jpeg file: ' + inputPath) logger.error("Wiping bad files: " + inputPath + " and " + outputPath + '\n' + output) os.system('rm -f ' + inputPath) # will not throw os.system('rm -f ' + outputPath) # will not throw # Check for corrupted files if error is not None: output += error m = re.match("^.*?premature\s+end", output, re.IGNORECASE|re.MULTILINE|re.DOTALL) if m: badFiles = True logger.error("Wiping bad files: " + inputPath + " and " + outputPath + '\n' + output) os.system('rm -f ' + inputPath) # will not throw os.system('rm -f ' + outputPath) # will not throw if not skipValidate: # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) if badFiles: logger.error("Converstion of JPEGs failed. If any files were corrupted, " + "they were removed, and need to be re-fetched.") return (not badFiles)
def correctFireballDems(fireballFolder, corrFireballFolder, startFrame, stopFrame, isNorth, skipValidate, logger): '''Fix the header problem in Fireball DEMs''' logger.info('Correcting Fireball DEMs ...') # Read the existing DEMs fireballIndexPath = icebridge_common.csvIndexFile(fireballFolder) if not os.path.exists(fireballIndexPath): raise Exception("Error: Missing fireball index file: " + fireballIndexPath + ".") (fireballFrameDict, fireballUrlDict) = \ icebridge_common.readIndexFile(fireballIndexPath, prependFolder = True) if not skipValidate: validFilesList = icebridge_common.validFilesList(os.path.dirname(fireballFolder), startFrame, stopFrame) validFilesSet = set() validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) numInitialValidFiles = len(validFilesSet) # Loop through all the input images os.system('mkdir -p ' + corrFireballFolder) badFiles = False for frame in sorted(fireballFrameDict.keys()): # Skip if outside the frame range if not ( (frame >= startFrame) and (frame <= stopFrame) ): continue inputPath = fireballFrameDict[frame] if not icebridge_common.isDEM(inputPath): continue outputPath = os.path.join(corrFireballFolder, os.path.basename(inputPath)) # Skip existing valid files if skipValidate: if os.path.exists(outputPath): logger.info("File exists, skipping: " + outputPath) continue else: if outputPath in validFilesSet and os.path.exists(outputPath): #logger.info('Previously validated: ' + outputPath) # very vebose continue if icebridge_common.isValidImage(outputPath): #logger.info("File exists and is valid, skipping: " + outputPath) validFilesSet.add(outputPath) # mark it as validated continue # Run the correction script execPath = asp_system_utils.which('correct_icebridge_l3_dem') cmd = (('%s %s %s %d') % (execPath, inputPath, outputPath, isNorth)) logger.info(cmd) # TODO: Run this as a subprocess and check the return code os.system(cmd) # Check if the output file is good if not icebridge_common.isValidImage(outputPath): logger.error('Failed to convert dem file, wiping: ' + inputPath + ' ' + outputPath) os.system('rm -f ' + inputPath) # will not throw os.system('rm -f ' + outputPath) # will not throw badFiles = True else: if not skipValidate: validFilesSet.add(outputPath) # mark it as validated if not skipValidate: # Write to disk the list of validated files, but only if new # validations happened. First re-read that list, in case a # different process modified it in the meantime, such as if two # managers are running at the same time. numFinalValidFiles = len(validFilesSet) if numInitialValidFiles != numFinalValidFiles: validFilesSet = icebridge_common.updateValidFilesListFromDisk(validFilesList, validFilesSet) icebridge_common.writeValidFilesList(validFilesList, validFilesSet) return (not badFiles)