def fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder): '''Create a list of all files that must be fetched unless done already.''' # For AN 20091112, etc, some of the ortho images are stored at the # beginning of the next day's flight. Need to sort this out, and # it is tricky. More comments within the code. fetchNextDay = True separateByLat = (options.type == 'ortho' and isInSeparateByLatTable(options.yyyymmdd)) if separateByLat: # Here we won't fetch the next day, we will just separate by latitude within # a given day fetchNextDay = False orthoOrFireball = ((options.type == 'ortho') or (options.type == 'fireball')) if fetchNextDay: # Normally we fetch for next day only for ortho or fireball. However, # for one single special flight, we do it for jpeg too, as then # the jpegs are also split. if orthoOrFireball or \ ((options.type == 'jpeg') and twoFlightsInOneDay(options.site, options.yyyymmdd)): fetchNextDay = True else: fetchNextDay = False # If we need to parse the next flight day as well, as expected in some runs, # we will fetch two html files, but create a single index out of them. dayVals = [0] if fetchNextDay: dayVals.append(1) indexPath = icebridge_common.htmlIndexFile(outputFolder) currIndexPath = indexPath parsedIndexPath = icebridge_common.csvIndexFile(outputFolder) if options.refetchIndex: os.system('rm -f ' + indexPath) os.system('rm -f ' + parsedIndexPath) if icebridge_common.fileNonEmpty(parsedIndexPath): logger.info('Already have the index file ' + parsedIndexPath + ', keeping it.') return parsedIndexPath frameDict = {} urlDict = {} # We need the list of jpeg frames. Sometimes when fetching ortho images, # and we have to fetch from the next day, don't fetch unless # in the jpeg index. if len(dayVals) > 1 and options.type != 'jpeg': jpegFolder = icebridge_common.getJpegFolder( os.path.dirname(outputFolder)) jpegIndexPath = icebridge_common.csvIndexFile(jpegFolder) (jpegFrameDict, jpegUrlDict) = icebridge_common.readIndexFile(jpegIndexPath) orthoStamp = {} if options.type == 'fireball': # This is a bugfix. Ensure that the fireball DEM has not just # the same frame number, but also same timestamp as the ortho. orthoFolder = icebridge_common.getOrthoFolder( os.path.dirname(outputFolder)) orthoIndexPath = icebridge_common.csvIndexFile(orthoFolder) (orthoFrameDict, orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath) for frame in sorted(orthoFrameDict.keys()): filename = orthoFrameDict[frame] [imageDateString, imageTimeString] = icebridge_common.parseTimeStamps(filename) orthoStamp[frame] = imageTimeString for dayVal in dayVals: if len(dayVals) > 1: currIndexPath = indexPath + '.day' + str(dayVal) if options.refetchIndex: os.system('rm -f ' + currIndexPath) # Find folderUrl which contains all of the files if options.type in LIDAR_TYPES: options.allFrames = True # For lidar, always get all the frames! # For lidar, the data can come from one of three sources. # Unfortunately sometimes there is more than one source, and then # we need to pick by latitude. folderUrls = [] lidar_types = [] for lidar in LIDAR_TYPES: folderUrl = getFolderUrl( options.yyyymmdd, options.year, options.month, options.day, dayVal, # note here the dayVal options.site, lidar) logger.info('Checking lidar URL: ' + folderUrl) if checkIfUrlExists(folderUrl): logger.info('Found match with lidar type: ' + lidar) folderUrls.append(folderUrl) lidar_types.append(lidar) if len(folderUrls) == 0: logger.info( 'WARNING: Could not find any lidar data for the given date!' ) elif len(folderUrls) == 1: # Unique solution folderUrl = folderUrls[0] options.type = lidar_types[0] elif len(folderUrls) >= 2: # Multiple solutions. Pick the good one by latitude. logger.info("Multiples URLs to search: " + " ".join(folderUrls)) count = -1 isGood = False for folderUrl in folderUrls: count += 1 (localFrameDict, localUrlDict) = \ fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, currIndexPath, lidar_types[count]) for frame in sorted(localFrameDict.keys()): filename = localFrameDict[frame] xmlFile = icebridge_common.xmlFile(filename) url = os.path.join(folderUrl, xmlFile) # Download the file curlCmd = baseCurlCmd + ' ' + url + ' > ' + xmlFile logger.info(curlCmd) p = subprocess.Popen(curlCmd, shell=True) os.waitpid(p.pid, 0) latitude = icebridge_common.parseLatitude(xmlFile) if os.path.exists(xmlFile): os.remove(xmlFile) if hasGoodLat(latitude, isSouth): isGood = True options.type = lidar_types[count] logger.info("Good latitude " + str(latitude) + ", will use " + folderUrl + " of type " + lidar_types[count]) else: logger.info("Bad latitude " + str(latitude) + ", will not use " + folderUrl + " of type " + lidar_types[count]) # Stop at first file no matter what break if isGood: break if not isGood: if options.type in LIDAR_TYPES and options.ignoreMissingLidar: logger.info("No lidar. None of these URLs are good: " + " ".join(folderUrls)) else: raise Exception("None of these URLs are good: " + " ".join(folderUrls)) else: # Other cases are simpler folderUrl = getFolderUrl( options.yyyymmdd, options.year, options.month, options.day, dayVal, # note here the dayVal options.site, options.type) logger.info('Fetching from URL: ' + folderUrl) (localFrameDict, localUrlDict) = \ fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, currIndexPath, options.type) # Append to the main index for frame in sorted(localFrameDict.keys()): if options.type == 'fireball': # This is a bugfix. Ensure that the fireball DEM has not just # the same frame number, but also same timestamp as the ortho. # Otherwise we may accidentally getting one from next day. [imageDateString, imageTimeString] = \ icebridge_common.parseTimeStamps(localFrameDict[frame]) if frame not in orthoStamp: #logger.info("Missing ortho for fireball: " + localFrameDict[frame]) continue if abs(int(imageTimeString) - int(orthoStamp[frame])) > 1000: # Apparently a tolerance is needed. Use 10 seconds, so the number 1000. #logger.info("Will not use fireball DEM whose timestamp differs from ortho.") #logger.info("Fireball is: " + localFrameDict[frame]) #logger.info("Ortho is: " + orthoFrameDict[frame]) continue # Fetch from next day, unless already have a value. And don't fetch # frames not in the jpeg index. if len(dayVals) > 1 and options.type != 'jpeg': if not frame in jpegFrameDict.keys(): continue if frame in frameDict.keys(): continue frameDict[frame] = localFrameDict[frame] urlDict[frame] = localUrlDict[frame] # Write the combined index file icebridge_common.writeIndexFile(parsedIndexPath, frameDict, urlDict) return parsedIndexPath
def fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder): '''Create a list of all files that must be fetched unless done already.''' # For AN 20091112, etc, some of the ortho images are stored at the # beginning of the next day's flight. Need to sort this out, and # it is tricky. More comments within the code. fetchNextDay = True separateByLat = (options.type == 'ortho' and isInSeparateByLatTable(options.yyyymmdd)) if separateByLat: # Here we won't fetch the next day, we will just separate by latitude within # a given day fetchNextDay = False orthoOrFireball = ( (options.type == 'ortho') or (options.type == 'fireball') ) if fetchNextDay: # Normally we fetch for next day only for ortho or fireball. However, # for one single special flight, we do it for jpeg too, as then # the jpegs are also split. if orthoOrFireball or \ ((options.type == 'jpeg') and twoFlightsInOneDay(options.site, options.yyyymmdd)): fetchNextDay = True else: fetchNextDay = False # If we need to parse the next flight day as well, as expected in some runs, # we will fetch two html files, but create a single index out of them. dayVals = [0] if fetchNextDay: dayVals.append(1) indexPath = icebridge_common.htmlIndexFile(outputFolder) currIndexPath = indexPath parsedIndexPath = icebridge_common.csvIndexFile(outputFolder) if options.refetchIndex: os.system('rm -f ' + indexPath) os.system('rm -f ' + parsedIndexPath) if icebridge_common.fileNonEmpty(parsedIndexPath): logger.info('Already have the index file ' + parsedIndexPath + ', keeping it.') return parsedIndexPath frameDict = {} urlDict = {} # We need the list of jpeg frames. Sometimes when fetching ortho images, # and we have to fetch from the next day, don't fetch unless # in the jpeg index. if len(dayVals) > 1 and options.type != 'jpeg': jpegFolder = icebridge_common.getJpegFolder(os.path.dirname(outputFolder)) jpegIndexPath = icebridge_common.csvIndexFile(jpegFolder) (jpegFrameDict, jpegUrlDict) = icebridge_common.readIndexFile(jpegIndexPath) orthoStamp = {} if options.type == 'fireball': # This is a bugfix. Ensure that the fireball DEM has not just # the same frame number, but also same timestamp as the ortho. orthoFolder = icebridge_common.getOrthoFolder(os.path.dirname(outputFolder)) orthoIndexPath = icebridge_common.csvIndexFile(orthoFolder) (orthoFrameDict, orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath) for frame in sorted(orthoFrameDict.keys()): filename = orthoFrameDict[frame] [imageDateString, imageTimeString] = icebridge_common.parseTimeStamps(filename) orthoStamp[frame] = imageTimeString for dayVal in dayVals: if len(dayVals) > 1: currIndexPath = indexPath + '.day' + str(dayVal) if options.refetchIndex: os.system('rm -f ' + currIndexPath) # Find folderUrl which contains all of the files if options.type in LIDAR_TYPES: options.allFrames = True # For lidar, always get all the frames! # For lidar, the data can come from one of three sources. # Unfortunately sometimes there is more than one source, and then # we need to pick by latitude. folderUrls = [] lidar_types = [] for lidar in LIDAR_TYPES: folderUrl = getFolderUrl(options.yyyymmdd, options.year, options.month, options.day, dayVal, # note here the dayVal options.site, lidar) logger.info('Checking lidar URL: ' + folderUrl) if checkIfUrlExists(folderUrl, baseCurlCmd): logger.info('Found match with lidar type: ' + lidar) folderUrls.append(folderUrl) lidar_types.append(lidar) if len(folderUrls) == 0: logger.info('WARNING: Could not find any lidar data for the given date!') elif len(folderUrls) == 1: # Unique solution folderUrl = folderUrls[0] options.type = lidar_types[0] elif len(folderUrls) >= 2: # Multiple solutions. Pick the good one by latitude. logger.info("Multiples URLs to search: " + " ".join(folderUrls)) count = -1 isGood = False for folderUrl in folderUrls: count += 1 (localFrameDict, localUrlDict) = \ fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, currIndexPath, lidar_types[count]) for frame in sorted(localFrameDict.keys()): filename = localFrameDict[frame] xmlFile = icebridge_common.xmlFile(filename) url = os.path.join(folderUrl, xmlFile) # Download the file curlCmd = baseCurlCmd + ' ' + url + ' > ' + xmlFile logger.info(curlCmd) p = subprocess.Popen(curlCmd, shell=True, universal_newlines=True) os.waitpid(p.pid, 0) latitude = icebridge_common.parseLatitude(xmlFile) if os.path.exists(xmlFile): os.remove(xmlFile) if hasGoodLat(latitude, isSouth): isGood = True options.type = lidar_types[count] logger.info("Good latitude " + str(latitude) + ", will use " + folderUrl + " of type " + lidar_types[count]) else: logger.info("Bad latitude " + str(latitude) + ", will not use " + folderUrl + " of type " + lidar_types[count]) # Stop at first file no matter what break if isGood: break if not isGood: if options.type in LIDAR_TYPES and options.ignoreMissingLidar: logger.info("No lidar. None of these URLs are good: " + " ".join(folderUrls)) else: raise Exception("None of these URLs are good: " + " ".join(folderUrls)) else: # Other cases are simpler folderUrl = getFolderUrl(options.yyyymmdd, options.year, options.month, options.day, dayVal, # note here the dayVal options.site, options.type) logger.info('Fetching from URL: ' + folderUrl) (localFrameDict, localUrlDict) = \ fetchAndParseIndexFileAux(isSouth, separateByLat, dayVal, baseCurlCmd, folderUrl, currIndexPath, options.type) # Append to the main index for frame in sorted(localFrameDict.keys()): if options.type == 'fireball': # This is a bugfix. Ensure that the fireball DEM has not just # the same frame number, but also same timestamp as the ortho. # Otherwise we may accidentally getting one from next day. [imageDateString, imageTimeString] = \ icebridge_common.parseTimeStamps(localFrameDict[frame]) if frame not in orthoStamp: #logger.info("Missing ortho for fireball: " + localFrameDict[frame]) continue if abs(int(imageTimeString) - int(orthoStamp[frame])) > 1000: # Apparently a tolerance is needed. Use 10 seconds, so the number 1000. #logger.info("Will not use fireball DEM whose timestamp differs from ortho.") #logger.info("Fireball is: " + localFrameDict[frame]) #logger.info("Ortho is: " + orthoFrameDict[frame]) continue # Fetch from next day, unless already have a value. And don't fetch # frames not in the jpeg index. if len(dayVals) > 1 and options.type != 'jpeg': if not frame in jpegFrameDict.keys(): continue if frame in frameDict.keys(): continue frameDict[frame] = localFrameDict[frame] urlDict[frame] = localUrlDict[frame] # Write the combined index file icebridge_common.writeIndexFile(parsedIndexPath, frameDict, urlDict) return parsedIndexPath