예제 #1
0
def fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder):
    '''Create a list of all files that must be fetched unless done already.'''

    # For AN 20091112, etc, some of the ortho images are stored at the
    # beginning of the next day's flight. Need to sort this out, and
    # it is tricky. More comments within the code.
    fetchNextDay = True

    separateByLat = (options.type == 'ortho'
                     and isInSeparateByLatTable(options.yyyymmdd))
    if separateByLat:
        # Here we won't fetch the next day, we will just separate by latitude within
        # a given day
        fetchNextDay = False

    orthoOrFireball = ((options.type == 'ortho')
                       or (options.type == 'fireball'))

    if fetchNextDay:
        # Normally we fetch for next day only for ortho or fireball. However,
        # for one single special flight, we do it for jpeg too, as then
        # the jpegs are also split.
        if orthoOrFireball or \
           ((options.type == 'jpeg') and twoFlightsInOneDay(options.site, options.yyyymmdd)):
            fetchNextDay = True
        else:
            fetchNextDay = False

    # If we need to parse the next flight day as well, as expected in some runs,
    # we will fetch two html files, but create a single index out of them.
    dayVals = [0]
    if fetchNextDay:
        dayVals.append(1)

    indexPath = icebridge_common.htmlIndexFile(outputFolder)

    currIndexPath = indexPath
    parsedIndexPath = icebridge_common.csvIndexFile(outputFolder)

    if options.refetchIndex:
        os.system('rm -f ' + indexPath)
        os.system('rm -f ' + parsedIndexPath)

    if icebridge_common.fileNonEmpty(parsedIndexPath):
        logger.info('Already have the index file ' + parsedIndexPath +
                    ', keeping it.')
        return parsedIndexPath

    frameDict = {}
    urlDict = {}

    # We need the list of jpeg frames. Sometimes when fetching ortho images,
    # and we have to fetch from the next day, don't fetch unless
    # in the jpeg index.
    if len(dayVals) > 1 and options.type != 'jpeg':
        jpegFolder = icebridge_common.getJpegFolder(
            os.path.dirname(outputFolder))
        jpegIndexPath = icebridge_common.csvIndexFile(jpegFolder)
        (jpegFrameDict,
         jpegUrlDict) = icebridge_common.readIndexFile(jpegIndexPath)

    orthoStamp = {}
    if options.type == 'fireball':
        # This is a bugfix. Ensure that the fireball DEM has not just
        # the same frame number, but also same timestamp as the ortho.
        orthoFolder = icebridge_common.getOrthoFolder(
            os.path.dirname(outputFolder))
        orthoIndexPath = icebridge_common.csvIndexFile(orthoFolder)
        (orthoFrameDict,
         orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath)
        for frame in sorted(orthoFrameDict.keys()):
            filename = orthoFrameDict[frame]
            [imageDateString,
             imageTimeString] = icebridge_common.parseTimeStamps(filename)
            orthoStamp[frame] = imageTimeString

    for dayVal in dayVals:

        if len(dayVals) > 1:
            currIndexPath = indexPath + '.day' + str(dayVal)
            if options.refetchIndex:
                os.system('rm -f ' + currIndexPath)

        # Find folderUrl which contains all of the files
        if options.type in LIDAR_TYPES:
            options.allFrames = True  # For lidar, always get all the frames!

            # For lidar, the data can come from one of three sources.
            # Unfortunately sometimes there is more than one source, and then
            # we need to pick by latitude.
            folderUrls = []
            lidar_types = []
            for lidar in LIDAR_TYPES:
                folderUrl = getFolderUrl(
                    options.yyyymmdd,
                    options.year,
                    options.month,
                    options.day,
                    dayVal,  # note here the dayVal
                    options.site,
                    lidar)
                logger.info('Checking lidar URL: ' + folderUrl)
                if checkIfUrlExists(folderUrl):
                    logger.info('Found match with lidar type: ' + lidar)
                    folderUrls.append(folderUrl)
                    lidar_types.append(lidar)

            if len(folderUrls) == 0:
                logger.info(
                    'WARNING: Could not find any lidar data for the given date!'
                )

            elif len(folderUrls) == 1:
                # Unique solution
                folderUrl = folderUrls[0]
                options.type = lidar_types[0]

            elif len(folderUrls) >= 2:
                # Multiple solutions. Pick the good one by latitude.
                logger.info("Multiples URLs to search: " +
                            " ".join(folderUrls))
                count = -1
                isGood = False
                for folderUrl in folderUrls:
                    count += 1
                    (localFrameDict, localUrlDict) = \
                                     fetchAndParseIndexFileAux(isSouth,
                                                               separateByLat, dayVal,
                                                               baseCurlCmd, folderUrl,
                                                               currIndexPath,
                                                               lidar_types[count])
                    for frame in sorted(localFrameDict.keys()):
                        filename = localFrameDict[frame]
                        xmlFile = icebridge_common.xmlFile(filename)
                        url = os.path.join(folderUrl, xmlFile)

                        # Download the file
                        curlCmd = baseCurlCmd + ' ' + url + ' > ' + xmlFile
                        logger.info(curlCmd)
                        p = subprocess.Popen(curlCmd, shell=True)
                        os.waitpid(p.pid, 0)

                        latitude = icebridge_common.parseLatitude(xmlFile)
                        if os.path.exists(xmlFile): os.remove(xmlFile)

                        if hasGoodLat(latitude, isSouth):
                            isGood = True
                            options.type = lidar_types[count]
                            logger.info("Good latitude " + str(latitude) +
                                        ", will use " + folderUrl +
                                        " of type " + lidar_types[count])
                        else:
                            logger.info("Bad latitude " + str(latitude) +
                                        ", will not use " + folderUrl +
                                        " of type " + lidar_types[count])

                        # Stop at first file no matter what
                        break

                    if isGood:
                        break

                if not isGood:
                    if options.type in LIDAR_TYPES and options.ignoreMissingLidar:
                        logger.info("No lidar. None of these URLs are good: " +
                                    " ".join(folderUrls))
                    else:
                        raise Exception("None of these URLs are good: " +
                                        " ".join(folderUrls))

        else:  # Other cases are simpler
            folderUrl = getFolderUrl(
                options.yyyymmdd,
                options.year,
                options.month,
                options.day,
                dayVal,  # note here the dayVal
                options.site,
                options.type)

        logger.info('Fetching from URL: ' + folderUrl)
        (localFrameDict, localUrlDict) = \
                         fetchAndParseIndexFileAux(isSouth,
                                                   separateByLat, dayVal,
                                                   baseCurlCmd, folderUrl,
                                                   currIndexPath, options.type)

        # Append to the main index
        for frame in sorted(localFrameDict.keys()):

            if options.type == 'fireball':
                # This is a bugfix. Ensure that the fireball DEM has not just
                # the same frame number, but also same timestamp as the ortho.
                # Otherwise we may accidentally getting one from next day.
                [imageDateString, imageTimeString] = \
                                  icebridge_common.parseTimeStamps(localFrameDict[frame])
                if frame not in orthoStamp:
                    #logger.info("Missing ortho for fireball: " + localFrameDict[frame])
                    continue
                if abs(int(imageTimeString) - int(orthoStamp[frame])) > 1000:
                    # Apparently a tolerance is needed. Use 10 seconds, so the number 1000.
                    #logger.info("Will not use fireball DEM whose timestamp differs from ortho.")
                    #logger.info("Fireball is: " + localFrameDict[frame])
                    #logger.info("Ortho is:    " + orthoFrameDict[frame])
                    continue

            # Fetch from next day, unless already have a value. And don't fetch
            # frames not in the jpeg index.
            if len(dayVals) > 1 and options.type != 'jpeg':
                if not frame in jpegFrameDict.keys(): continue
                if frame in frameDict.keys(): continue

            frameDict[frame] = localFrameDict[frame]
            urlDict[frame] = localUrlDict[frame]

    # Write the combined index file
    icebridge_common.writeIndexFile(parsedIndexPath, frameDict, urlDict)

    return parsedIndexPath
def fetchAndParseIndexFile(options, isSouth, baseCurlCmd, outputFolder):
    '''Create a list of all files that must be fetched unless done already.'''

    # For AN 20091112, etc, some of the ortho images are stored at the
    # beginning of the next day's flight. Need to sort this out, and
    # it is tricky. More comments within the code. 
    fetchNextDay = True
    
    separateByLat = (options.type == 'ortho' and isInSeparateByLatTable(options.yyyymmdd))
    if separateByLat:
        # Here we won't fetch the next day, we will just separate by latitude within
        # a given day
        fetchNextDay = False

    orthoOrFireball = ( (options.type == 'ortho') or (options.type == 'fireball') )

    if fetchNextDay:
        # Normally we fetch for next day only for ortho or fireball. However,
        # for one single special flight, we do it for jpeg too, as then
        # the jpegs are also split. 
       if orthoOrFireball or \
          ((options.type == 'jpeg') and twoFlightsInOneDay(options.site, options.yyyymmdd)):
           fetchNextDay = True
       else:
           fetchNextDay = False
           
    # If we need to parse the next flight day as well, as expected in some runs,
    # we will fetch two html files, but create a single index out of them.
    dayVals = [0]
    if fetchNextDay:
        dayVals.append(1)

    indexPath       = icebridge_common.htmlIndexFile(outputFolder)
    
    currIndexPath   = indexPath
    parsedIndexPath = icebridge_common.csvIndexFile(outputFolder)

    if options.refetchIndex:
        os.system('rm -f ' + indexPath)
        os.system('rm -f ' + parsedIndexPath)

    if icebridge_common.fileNonEmpty(parsedIndexPath):
        logger.info('Already have the index file ' + parsedIndexPath + ', keeping it.')
        return parsedIndexPath
    
    frameDict  = {}
    urlDict    = {}

    # We need the list of jpeg frames. Sometimes when fetching ortho images,
    # and we have to fetch from the next day, don't fetch unless
    # in the jpeg index.
    if len(dayVals) > 1 and options.type != 'jpeg':
        jpegFolder = icebridge_common.getJpegFolder(os.path.dirname(outputFolder))
        jpegIndexPath = icebridge_common.csvIndexFile(jpegFolder)
        (jpegFrameDict, jpegUrlDict) = icebridge_common.readIndexFile(jpegIndexPath)

    orthoStamp = {}
    if options.type == 'fireball':
        # This is a bugfix. Ensure that the fireball DEM has not just
        # the same frame number, but also same timestamp as the ortho.
        orthoFolder = icebridge_common.getOrthoFolder(os.path.dirname(outputFolder))
        orthoIndexPath = icebridge_common.csvIndexFile(orthoFolder)
        (orthoFrameDict, orthoUrlDict) = icebridge_common.readIndexFile(orthoIndexPath)
        for frame in sorted(orthoFrameDict.keys()):
            filename = orthoFrameDict[frame]
            [imageDateString, imageTimeString] = icebridge_common.parseTimeStamps(filename)
            orthoStamp[frame] = imageTimeString
            
    for dayVal in dayVals:

        if len(dayVals) > 1:
            currIndexPath = indexPath + '.day' + str(dayVal)
            if options.refetchIndex:
                os.system('rm -f ' + currIndexPath)
            
        # Find folderUrl which contains all of the files
        if options.type in LIDAR_TYPES:
            options.allFrames = True # For lidar, always get all the frames!
            
            # For lidar, the data can come from one of three sources.
            # Unfortunately sometimes there is more than one source, and then
            # we need to pick by latitude.
            folderUrls = []
            lidar_types = []
            for lidar in LIDAR_TYPES:
                folderUrl = getFolderUrl(options.yyyymmdd, options.year, options.month,
                                         options.day, dayVal, # note here the dayVal
                                         options.site, lidar)
                logger.info('Checking lidar URL: ' + folderUrl)
                if checkIfUrlExists(folderUrl, baseCurlCmd):
                    logger.info('Found match with lidar type: ' + lidar)
                    folderUrls.append(folderUrl)
                    lidar_types.append(lidar)

            if len(folderUrls) == 0:
                logger.info('WARNING: Could not find any lidar data for the given date!')

            elif len(folderUrls) == 1:
                # Unique solution
                folderUrl = folderUrls[0]
                options.type = lidar_types[0]

            elif len(folderUrls) >= 2:
                # Multiple solutions. Pick the good one by latitude.
                logger.info("Multiples URLs to search: " + " ".join(folderUrls))
                count = -1
                isGood = False
                for folderUrl in folderUrls:
                    count += 1
                    (localFrameDict, localUrlDict) = \
                                     fetchAndParseIndexFileAux(isSouth,
                                                               separateByLat, dayVal,
                                                               baseCurlCmd, folderUrl,
                                                               currIndexPath,
                                                               lidar_types[count])
                    for frame in sorted(localFrameDict.keys()):
                        filename = localFrameDict[frame]
                        xmlFile  = icebridge_common.xmlFile(filename)
                        url      = os.path.join(folderUrl, xmlFile)
                                        
                        # Download the file
                        curlCmd = baseCurlCmd + ' ' + url + ' > ' + xmlFile
                        logger.info(curlCmd)
                        p = subprocess.Popen(curlCmd, shell=True, universal_newlines=True)
                        os.waitpid(p.pid, 0)
                        
                        latitude = icebridge_common.parseLatitude(xmlFile)
                        if os.path.exists(xmlFile): os.remove(xmlFile)

                        if hasGoodLat(latitude, isSouth):
                            isGood = True
                            options.type = lidar_types[count]
                            logger.info("Good latitude " + str(latitude) + ", will use " +
                                        folderUrl + " of type " + lidar_types[count])
                        else:
                            logger.info("Bad latitude " + str(latitude) + ", will not use " +
                                        folderUrl + " of type " + lidar_types[count])
                            
                        # Stop at first file no matter what
                        break

                    if isGood:
                        break

                if not isGood:
                    if options.type in LIDAR_TYPES and options.ignoreMissingLidar:
                        logger.info("No lidar. None of these URLs are good: " +
                                    " ".join(folderUrls))
                    else:
                        raise Exception("None of these URLs are good: " +
                                        " ".join(folderUrls))
                    
        else: # Other cases are simpler
            folderUrl = getFolderUrl(options.yyyymmdd, options.year, options.month,
                                     options.day, dayVal, # note here the dayVal
                                     options.site, options.type)

        logger.info('Fetching from URL: ' + folderUrl)
        (localFrameDict, localUrlDict) = \
                         fetchAndParseIndexFileAux(isSouth,
                                                   separateByLat, dayVal,
                                                   baseCurlCmd, folderUrl,
                                                   currIndexPath, options.type)

        # Append to the main index
        for frame in sorted(localFrameDict.keys()):

            if options.type == 'fireball':
                # This is a bugfix. Ensure that the fireball DEM has not just
                # the same frame number, but also same timestamp as the ortho.
                # Otherwise we may accidentally getting one from next day.
                [imageDateString, imageTimeString] = \
                                  icebridge_common.parseTimeStamps(localFrameDict[frame])
                if frame not in orthoStamp:
                    #logger.info("Missing ortho for fireball: " + localFrameDict[frame])
                    continue
                if abs(int(imageTimeString) - int(orthoStamp[frame])) > 1000:
                    # Apparently a tolerance is needed. Use 10 seconds, so the number 1000.
                    #logger.info("Will not use fireball DEM whose timestamp differs from ortho.")
                    #logger.info("Fireball is: " + localFrameDict[frame])
                    #logger.info("Ortho is:    " + orthoFrameDict[frame])
                    continue
                
            # Fetch from next day, unless already have a value. And don't fetch
            # frames not in the jpeg index.
            if len(dayVals) > 1 and options.type != 'jpeg':
                if not frame in jpegFrameDict.keys(): continue
                if frame in frameDict.keys(): continue
                
            frameDict[frame] = localFrameDict[frame]
            urlDict[frame]   = localUrlDict[frame]
        
    # Write the combined index file
    icebridge_common.writeIndexFile(parsedIndexPath, frameDict, urlDict)
            
    return parsedIndexPath