def getMapunitFeaturesForBoundingBox(config, outputDir, bbox, tileBbox=False, t_srs='EPSG:4326', 
                                     tileDivisor=SSURGO_BBOX_TILE_DIVISOR,
                                     keepOriginals=False,
                                     overwrite=True,
                                     nprocesses=None):
    """ Query USDA Soil Data Mart for SSURGO MapunitPolyExtended features with a given bounding box.
        Features will be written to one or more shapefiles, one file for each bboxTile tile,
        stored in the specified output directory. The filename will be returned as a string.
        Will fetch SSURGO tabular data (see ssurgolib.attributequery.ATTRIBUTE_LIST for a list
        of attributes) and join those data to the features in the final shapefiles(s).
    
        @note Will silently exit if features already exist.
    
        @param config onfigParser containing the section 'GDAL/OGR' and option 'PATH_OF_OGR2OGR'
        @param outputDir String representing the absolute/relative path of the directory into which features should be written
        @param bbox A dict containing keys: minX, minY, maxX, maxY, srs, where srs='EPSG:4326'
        @param tileBoundingBox True if bounding box should be tiled if extent exceeds featurequery.MAX_SSURGO_EXTENT
        @param t_srs String representing the spatial reference system of the output shapefiles, of the form 'EPSG:XXXX'
        @param tileDivisor Float representing amount by which to divide tile slides.
        @param keepOriginals Boolean, if True original feature layers will be retained (otherwise they will be deleted)
        @param overwrite Boolean, if True any existing files will be overwritten
        @param nprocesses Integer representing number of processes to use for fetching SSURGO tiles in parallel (used only if bounding box needs to be tiled).
               if None, multiprocessing.cpu_count() will be used.
        
        @return A list of strings representing the name of the shapefile(s) to which the mapunit features were saved.
        
        @exception IOError if output directory is not a directory
        @exception IOError if output directory is not writable
        @exception Exception if tileDivisor is not a postive float > 0.0.
        @exception Exception if bounding box area is greater than MAX_SSURGO_EXTENT
        @exception Exception if no MUKEYs were returned
    """
    if type(tileDivisor) != float or tileDivisor <= 0.0:
        raise Exception("Tile divisor must be a float > 0.0.")
    if not os.path.isdir(outputDir):
        raise IOError(errno.ENOTDIR, "Output directory %s is not a directory" % (outputDir,))
    if not os.access(outputDir, os.W_OK):
        raise IOError(errno.EACCES, "Not allowed to write to output directory %s" % (outputDir,))
    outputDir = os.path.abspath(outputDir)

    typeName = 'MapunitPolyExtended'

    if tileBbox:
        bboxes = tileBoundingBox(bbox, MAX_SSURGO_EXTENT, t_srs, tileDivisor)
        sys.stderr.write("Dividing bounding box %s into %d tiles\n" % (str(bbox), len(bboxes)))
    else:
        bboxArea = calculateBoundingBoxArea(bbox, t_srs)
        if bboxArea > MAX_SSURGO_EXTENT:
            raise Exception("Bounding box area %.2f sq. km is greater than %.2f sq. km.  You must tile the bounding box." % (bboxArea/1000/1000, MAX_SSURGO_EXTENT/1000/1000,))
        bboxes = [bbox]
    
    numTiles = len(bboxes)
    assert(numTiles >= 1)
    
    outFiles = []
    if numTiles == 1:
        # No tiling, fetch SSURGO features for bbox in the current process
        geojsonFilename = _getMapunitFeaturesForBoundingBoxTile(config, outputDir, bboxes[0], typeName, 1, numTiles)
        outFiles.append(geojsonFilename)
    else:
        # Fetch SSURGO feature tiles in parallel
        if nprocesses is None:
            nprocesses = multiprocessing.cpu_count()
        assert(type(nprocesses) == int)
        assert(nprocesses > 0)
        
        # Start my pool
        pool = multiprocessing.Pool( nprocesses )
        tasks = []
        
        # Build task list
        i = 1
        for bboxTile in bboxes:
            tasks.append( (config, outputDir, bboxTile, typeName, i, numTiles) ) 
            i += 1

        # Send tasks to pool (i.e. fetch SSURGO features for each tile in parallel)
        results = [pool.apply_async(_getMapunitFeaturesForBoundingBoxTile, t) for t in tasks]
        
        # Get resulting filenames for each tile
        for result in results:
            outFiles.append(result.get())
    
        pool.close()
        pool.join()
    
    # Join tiled data if necessary
    if len(outFiles) > 1:
        sys.stderr.write('Merging tiled features to single shapefile...')
        sys.stderr.flush()
        shpFilepath = mergeFeatureLayers(config, outputDir, outFiles, typeName,
                                         outFormat=OGR_SHAPEFILE_DRIVER_NAME,
                                         keepOriginals=keepOriginals,
                                         t_srs=t_srs,
                                         overwrite=overwrite)
        shpFilename = os.path.basename(shpFilepath)
        sys.stderr.write('done\n')
    else:
        # Convert GeoJSON to shapefile
        filepath = outFiles[0]
        sys.stderr.write('Converting SSURGO features from GeoJSON to shapefile format...')
        sys.stderr.flush()
        shpFilename = convertGeoJSONToShapefile(config, outputDir, filepath, typeName, t_srs=t_srs)
        os.unlink(filepath)
        sys.stderr.write('done\n')   
    
    return shpFilename
Exemple #2
0
def getMapunitFeaturesForBoundingBox(config, outputDir, bbox, tileBbox=False, t_srs='EPSG:4326'):
    """ Query USDA Soil Data Mart for SSURGO MapunitPolyExtended features with a given bounding box.
        Features will be written to one or more shapefiles, one file for each bboxTile tile,
        stored in the specified output directory. The filename will be returned as a string.
        Will fetch SSURGO tabular data (see ssurgolib.attributequery.ATTRIBUTE_LIST for a list
        of attributes) and join those data to the features in the final shapefiles(s).
    
        @note Will silently exit if features already exist.
    
        @param config onfigParser containing the section 'GDAL/OGR' and option 'PATH_OF_OGR2OGR'
        @param outputDir String representing the absolute/relative path of the directory into which features should be written
        @param bbox A dict containing keys: minX, minY, maxX, maxY, srs, where srs='EPSG:4326'
        @param tileBoundingBox True if bounding box should be tiled if extent exceeds featurequery.MAX_SSURGO_EXTENT
        @param t_srs String representing the spatial reference system of the output shapefiles, of the form 'EPSG:XXXX'
        
        @return A list of strings representing the name of the shapefile(s) to which the mapunit features were saved.
        
        @exception IOError if output directory is not a directory
        @exception IOError if output directory is not writable
        @exception Exception if bounding box area is greater than MAX_SSURGO_EXTENT
        @exception Exception if no MUKEYs were returned
    """
    if not os.path.isdir(outputDir):
        raise IOError(errno.ENOTDIR, "Output directory %s is not a directory" % (outputDir,))
    if not os.access(outputDir, os.W_OK):
        raise IOError(errno.EACCES, "Not allowed to write to output directory %s" % (outputDir,))
    outputDir = os.path.abspath(outputDir)

    typeName = 'MapunitPolyExtended'

    if tileBbox:
        bboxes = tileBoundingBox(bbox, MAX_SSURGO_EXTENT)
        sys.stderr.write("Dividing bounding box %s into %d tiles\n" % (str(bbox), len(bboxes)))
    else:
        if calculateBoundingBoxArea(bbox, t_srs) > MAX_SSURGO_EXTENT:
            raise Exception("Bounding box area is greater than %f sq. meters" % (MAX_SSURGO_EXTENT,))
        bboxes = [bbox]
    
    outFiles = []
    
    for bboxTile in bboxes:
        minX = bboxTile['minX']; minY = bboxTile['minY']; maxX = bboxTile['maxX']; maxY = bboxTile['maxY']
        bboxLabel = str(minX) + "_" + str(minY) + "_" + str(maxX) + "_" + str(maxY)
    
        gmlFilename = "%s_bbox_%s-attr.gml" % (typeName, bboxLabel)
        gmlFilepath = os.path.join(outputDir, gmlFilename)
    
        if not os.path.exists(gmlFilepath):
            sys.stderr.write("Fetching SSURGO data for sub bboxTile %s\n" % bboxLabel)
        
            wfs = WebFeatureService(WFS_URL, version='1.0.0')
            filter = "<Filter><BBOX><PropertyName>Geometry</PropertyName> <Box srsName='EPSG:4326'><coordinates>%f,%f %f,%f</coordinates> </Box></BBOX></Filter>" % (minX, minY, maxX, maxY)
            gml = wfs.getfeature(typename=(typeName,), filter=filter, propertyname=None)
    
            # Write intermediate GML to a file
            intGmlFilename = "%s_bbox_%s.gml" % (typeName, bboxLabel)
            intGmlFilepath = os.path.join(outputDir, intGmlFilename)
            out = open(intGmlFilepath, 'w')
            out.write(gml.read())
            out.close()
            
            # Parse GML to get list of MUKEYs
            gmlFile = open(intGmlFilepath, 'r')
            ssurgoFeatureHandler = SSURGOFeatureHandler()
            xml.sax.parse(gmlFile, ssurgoFeatureHandler)
            gmlFile.close()
            mukeys = ssurgoFeatureHandler.mukeys
            
            if len(mukeys) < 1:
                raise Exception("No SSURGO features returned from WFS query.  SSURGO GML format may have changed.\nPlease contact the developer.")
            
            # Get attributes (ksat, texture, %clay, %silt, and %sand) for all components in MUKEYS
            attributes = getParentMatKsatTexturePercentClaySiltSandForComponentsInMUKEYs(mukeys)
            
            # Compute weighted average of soil properties across all components in each map unit
            avgAttributes = computeWeightedAverageKsatClaySandSilt(attributes)
            
            # Convert GML to GeoJSON so that we can add fields easily (GDAL 1.10+ validates GML schema 
            #   and won't let us add fields)
            tmpGeoJSONFilename = convertGMLToGeoJSON(config, outputDir, intGmlFilepath, typeName)
            tmpGeoJSONFilepath = os.path.join(outputDir, tmpGeoJSONFilename)
            
            # Join map unit component-averaged soil properties to attribute table in GML file
#             gmlFile = open(intGmlFilepath, 'r')
#             joinedGmlStr = joinSSURGOAttributesToFeaturesByMUKEY(gmlFile, typeName, avgAttributes)
#             gmlFile.close()
            tmpGeoJSONFile = open(tmpGeoJSONFilepath, 'r')
            geojson = json.load(tmpGeoJSONFile)
            tmpGeoJSONFile.close()
            joinSSURGOAttributesToFeaturesByMUKEY_GeoJSON(geojson, typeName, avgAttributes)
            
            # Write Joined GeoJSON to a file
            out = open(tmpGeoJSONFilepath, 'w')
            json.dump(geojson, out)
            out.close()
            
            # Convert GeoJSON to shapefile
            filename = os.path.splitext(intGmlFilename)[0]
            shpFilename = convertGeoJSONToShapefile(config, outputDir, tmpGeoJSONFilepath, filename, t_srs=t_srs)
            
            # Delete intermediate files
            os.unlink(intGmlFilepath)
            os.unlink(tmpGeoJSONFilepath)
        
        outFiles.append(shpFilename)
    
    # TODO: join tiled data if tileBbox
        
    return outFiles
    
    
def getMapunitFeaturesForBoundingBox(config, outputDir, bbox, tileBbox=False, t_srs='EPSG:4326', 
                                     tileDivisor=SSURGO_BBOX_TILE_DIVISOR,
                                     keepOriginals=False,
                                     overwrite=True,
                                     nprocesses=None):
    """ Query USDA Soil Data Mart for SSURGO MapunitPolyExtended features with a given bounding box.
        Features will be written to one or more shapefiles, one file for each bboxTile tile,
        stored in the specified output directory. The filename will be returned as a string.
        Will fetch SSURGO tabular data (see ssurgolib.attributequery.ATTRIBUTE_LIST for a list
        of attributes) and join those data to the features in the final shapefiles(s).
    
        @note Will silently exit if features already exist.
    
        @param config onfigParser containing the section 'GDAL/OGR' and option 'PATH_OF_OGR2OGR'
        @param outputDir String representing the absolute/relative path of the directory into which features should be written
        @param bbox A dict containing keys: minX, minY, maxX, maxY, srs, where srs='EPSG:4326'
        @param tileBoundingBox True if bounding box should be tiled if extent exceeds featurequery.MAX_SSURGO_EXTENT
        @param t_srs String representing the spatial reference system of the output shapefiles, of the form 'EPSG:XXXX'
        @param tileDivisor Float representing amount by which to divide tile slides.
        @param keepOriginals Boolean, if True original feature layers will be retained (otherwise they will be deleted)
        @param overwrite Boolean, if True any existing files will be overwritten
        @param nprocesses Integer representing number of processes to use for fetching SSURGO tiles in parallel (used only if bounding box needs to be tiled).
               if None, multiprocessing.cpu_count() will be used.
        
        @return A list of strings representing the name of the shapefile(s) to which the mapunit features were saved.
        
        @exception IOError if output directory is not a directory
        @exception IOError if output directory is not writable
        @exception Exception if tileDivisor is not a postive float > 0.0.
        @exception Exception if bounding box area is greater than MAX_SSURGO_EXTENT
        @exception Exception if no MUKEYs were returned
    """
    if type(tileDivisor) != float or tileDivisor <= 0.0:
        raise Exception("Tile divisor must be a float > 0.0.")
    if not os.path.isdir(outputDir):
        raise IOError(errno.ENOTDIR, "Output directory %s is not a directory" % (outputDir,))
    if not os.access(outputDir, os.W_OK):
        raise IOError(errno.EACCES, "Not allowed to write to output directory %s" % (outputDir,))
    outputDir = os.path.abspath(outputDir)

    typeName = 'MapunitPolyExtended'

    if tileBbox:
        bboxes = tileBoundingBox(bbox, MAX_SSURGO_EXTENT, t_srs, tileDivisor)
        sys.stderr.write("Dividing bounding box %s into %d tiles\n" % (str(bbox), len(bboxes)))
    else:
        bboxArea = calculateBoundingBoxArea(bbox, t_srs)
        if bboxArea > MAX_SSURGO_EXTENT:
            raise Exception("Bounding box area %.2f sq. km is greater than %.2f sq. km.  You must tile the bounding box." % (bboxArea/1000/1000, MAX_SSURGO_EXTENT/1000/1000,))
        bboxes = [bbox]
    
    numTiles = len(bboxes)
    assert(numTiles >= 1)
    
    outFiles = []
    if numTiles == 1:
        # No tiling, fetch SSURGO features for bbox in the current process
        geojsonFilename = _getMapunitFeaturesForBoundingBoxTile(config, outputDir, bboxes[0], typeName, 1, numTiles)
        outFiles.append(geojsonFilename)
    else:
        # Fetch SSURGO feature tiles in parallel
        if nprocesses is None:
            nprocesses = multiprocessing.cpu_count()
        assert(type(nprocesses) == int)
        assert(nprocesses > 0)
        
        # Start my pool
        pool = multiprocessing.Pool( nprocesses )
        tasks = []
        
        # Build task list
        i = 1
        for bboxTile in bboxes:
            tasks.append( (config, outputDir, bboxTile, typeName, i, numTiles) ) 
            i += 1

        # Send tasks to pool (i.e. fetch SSURGO features for each tile in parallel)
        results = [pool.apply_async(_getMapunitFeaturesForBoundingBoxTile, t) for t in tasks]
        
        # Get resulting filenames for each tile
        for result in results:
            outFiles.append(result.get())
    
        pool.close()
        pool.join()
    
    # Join tiled data if necessary
    if len(outFiles) > 1:
        sys.stderr.write('Merging tiled features to single shapefile...')
        sys.stderr.flush()
        shpFilepath = mergeFeatureLayers(config, outputDir, outFiles, typeName,
                                         outFormat=OGR_SHAPEFILE_DRIVER_NAME,
                                         keepOriginals=keepOriginals,
                                         t_srs=t_srs,
                                         overwrite=overwrite)
        shpFilename = os.path.basename(shpFilepath)
        sys.stderr.write('done\n')
    else:
        # Convert GeoJSON to shapefile
        filepath = outFiles[0]
        sys.stderr.write('Converting SSURGO features from GeoJSON to shapefile format...')
        sys.stderr.flush()
        shpFilename = convertGeoJSONToShapefile(config, outputDir, filepath, typeName, t_srs=t_srs)
        os.unlink(filepath)
        sys.stderr.write('done\n')   
    
    return shpFilename