Exemple #1
0
def test_shp_read_and_write():

    if not os.path.isdir('output'):
        os.makedirs('output')

    # test without geometry
    df = pd.DataFrame({'reach': np.arange(10000001, 10000100, dtype=int), 'value': np.arange(1, 100, dtype=float),
                       'name': ['stuff{}'.format(i) for i in np.arange(1, 100)],
                       'isTrue': [True, False] * 49 + [True]})
    df2shp(df, 'temp/junk.dbf')
    df = shp2df('temp/junk.dbf', true_values='True', false_values='False')
    assert [d.name for d in df.dtypes] == ['bool', 'object', 'int64', 'float64']
    assert df.isTrue.sum() == 50

    # test with geometry
    df = pd.DataFrame({'reach': np.arange(1, 101, dtype=int), 'value': np.arange(100, dtype=float),
                       'name': ['stuff{}'.format(i) for i in np.arange(100)],
                       'geometry': [Point([i, i]) for i in range(100)]})
    original_columns = df.columns.tolist()
    df2shp(df, 'temp/junk.shp')
    df = shp2df('temp/junk.shp')
    assert df.geometry[0] == Point([0.0, 0.0])
    assert np.array_equal(df.index.values, np.arange(100)) # check ordering of rows
    assert df.columns.tolist() == original_columns # check column order

    # test datetime handling and retention of index
    df.index = pd.date_range('2016-01-01 1:00:00', '2016-01-01 1:01:39', freq='s')
    df.index.name = 'datetime'
    df2shp(df, 'temp/junk.shp', index=True)
    df = shp2df('temp/junk.shp')
    assert 'datetime' in df.columns
    assert df.datetime[0] == '2016-01-01 01:00:00'
Exemple #2
0
def test_shp_read_and_write():

    if not os.path.isdir('output'):
        os.makedirs('output')

    # test without geometry
    df = pd.DataFrame({
        'reach': np.arange(10000001, 10000100, dtype=int),
        'value': np.arange(1, 100, dtype=float),
        'name': ['stuff{}'.format(i) for i in np.arange(1, 100)],
        'isTrue': [True, False] * 49 + [True]
    })
    cols = ['reach', 'value', 'name', 'isTrue']
    df1 = df[cols]  #designate a column order
    ta = time.time()
    df2shp(df1, 'temp/junk.dbf', retain_order=True)
    print("wrote shapefile in {:.6f}s\n".format(time.time() - ta))
    ta = time.time()
    df2 = shp2df('temp/junk.dbf', true_values='True', false_values='False')
    print("read shapefile in {:.6f}s\n".format(time.time() - ta))
    #assert list(df2.columns) == cols
    assert [d.name
            for d in df2.dtypes] == ['int64', 'float64', 'object', 'bool']
    assert df2.isTrue.sum() == 50

    # test with geometry
    df1 = pd.DataFrame({
        'reach': np.arange(1, 101, dtype=int),
        'value': np.arange(100, dtype=float),
        'name': ['stuff{}'.format(i) for i in np.arange(100)],
        'geometry': [Point([i, i]) for i in range(100)]
    })
    cols = ['reach', 'value', 'name', 'geometry'
            ]  # geometry is placed in last column when shp is read in
    df1 = df1[cols]
    df2shp(df1, 'temp/junk.shp', retain_order=True)
    df2 = shp2df('temp/junk.shp')
    assert df2.geometry[0] == Point([0.0, 0.0])
    assert np.array_equal(df2.index.values,
                          np.arange(100))  # check ordering of rows
    assert df2.columns.tolist() == cols  # check column order

    # test datetime handling and retention of index
    df.index = pd.date_range('2016-01-01 1:00:00',
                             '2016-01-01 1:01:38',
                             freq='s')
    df.index.name = 'datetime'
    df2shp(df, 'temp/junk.dbf', index=True)
    df = shp2df('temp/junk.dbf')
    assert 'datetime' in df.columns
    assert df.datetime[0] == '2016-01-01 01:00:00'
Exemple #3
0
    def _load_shapefile(self, shp, index_field, convert_coordinates, remove_offset, simplify):

        df = shp2df(shp)

        if index_field is not None:
            df.index = df[index_field]

        proj4 = get_proj4(shp)

        if proj4 != self.proj4:
            df['geometry'] = projectdf(df, proj4, self.proj4)

        # convert projected coordinate units and/or get rid z values if the shapefile has them
        if convert_coordinates != 1 or df.iloc[0]['geometry'].has_z:
            df['geometry'] = [transform(lambda x, y, z=None: (x * convert_coordinates,
                                                              y * convert_coordinates), g)
                              for g in df.geometry]

        # remove model offset from projected coordinates (llcorner = 0,0)
        if remove_offset:
            df['geometry'] = [translate(g,
                                        -1 * self.extent_proj[0],
                                        -1 * self.extent_proj[1]) for g in df.geometry]

        if simplify > 0:
            df['geometry'] = [g.simplify(simplify) for g in df.geometry]
        return df
Exemple #4
0
    def _load_shapefile(self, shp, index_field, convert_coordinates,
                        remove_offset, simplify):

        df = shp2df(shp)

        if index_field is not None:
            df.index = df[index_field]

        proj4 = get_proj4(shp)

        if proj4 != self.proj4:
            df['geometry'] = projectdf(df, proj4, self.proj4)

        # convert projected coordinate units and/or get rid z values if the shapefile has them
        if convert_coordinates != 1 or df.iloc[0]['geometry'].has_z:
            df['geometry'] = [
                transform(lambda x, y, z=None:
                          (x * convert_coordinates, y * convert_coordinates),
                          g) for g in df.geometry
            ]

        # remove model offset from projected coordinates (llcorner = 0,0)
        if remove_offset:
            df['geometry'] = [
                translate(g, -1 * self.extent_proj[0],
                          -1 * self.extent_proj[1]) for g in df.geometry
            ]

        if simplify > 0:
            df['geometry'] = [g.simplify(simplify) for g in df.geometry]
        return df
    def __init__(self, shapefile='', shapefile_hru_col='nhru'):

        shp = shp2df(shapefile)
        try:
            shp.sort(shapefile_hru_col, inplace=True)
            self.geometry = shp['geometry']
            self.geometry.index = shp[shapefile_hru_col].values
        except Exception, e:
            print Exception, e
            print '\nPlease supply an index field relating shapefile geometries ' \
                  'to columns in GDP data (e.g. hru number)'
Exemple #6
0
def test_integer_dtypes():

    # verify that pandas is recasting numpy ints as python ints when converting to dict
    # (numpy ints invalid for shapefiles)
    d = pd.DataFrame(np.ones((3, 3)), dtype=int).astype(object).to_dict(orient='records')
    for i in range(3):
        assert isinstance(d[i][0], int)

    df = pd.DataFrame({'r': np.arange(100), 'c': np.arange(100)})
    df2shp(df, 'temp/ints.dbf')
    df2 = shp2df('temp/ints.dbf')
    assert True
Exemple #7
0
def test_shp_read_and_write():

    if not os.path.isdir('output'):
        os.makedirs('output')

    # test without geometry
    df = pd.DataFrame({'reach': np.arange(10000001, 10000100, dtype=int), 'value': np.arange(1, 100, dtype=float),
                       'name': ['stuff{}'.format(i) for i in np.arange(1, 100)],
                       'isTrue': [True, False] * 49 + [True]})
    cols = ['reach', 'value', 'name', 'isTrue']
    df1 = df[cols] #designate a column order
    ta = time.time()
    df2shp(df1, 'temp/junk.dbf', retain_order=True)
    print("wrote shapefile in {:.6f}s\n".format(time.time() - ta))
    ta = time.time()
    df2 = shp2df('temp/junk.dbf', true_values='True', false_values='False')
    print("read shapefile in {:.6f}s\n".format(time.time() - ta))
    #assert list(df2.columns) == cols
    assert [d.name for d in df2.dtypes] == ['int64', 'float64', 'object', 'bool']
    assert df2.isTrue.sum() == 50

    # test with geometry
    df1 = pd.DataFrame({'reach': np.arange(1, 101, dtype=int), 'value': np.arange(100, dtype=float),
                       'name': ['stuff{}'.format(i) for i in np.arange(100)],
                       'geometry': [Point([i, i]) for i in range(100)]})
    cols = ['reach', 'value', 'name', 'geometry'] # geometry is placed in last column when shp is read in
    df1 = df1[cols]
    df2shp(df1, 'temp/junk.shp', retain_order=True)
    df2 = shp2df('temp/junk.shp')
    assert df2.geometry[0] == Point([0.0, 0.0])
    assert np.array_equal(df2.index.values, np.arange(100)) # check ordering of rows
    assert df2.columns.tolist() == cols # check column order

    # test datetime handling and retention of index
    df.index = pd.date_range('2016-01-01 1:00:00', '2016-01-01 1:01:38', freq='s')
    df.index.name = 'datetime'
    df2shp(df, 'temp/junk.dbf', index=True)
    df = shp2df('temp/junk.dbf')
    assert 'datetime' in df.columns
    assert df.datetime[0] == '2016-01-01 01:00:00'
Exemple #8
0
def test_integer_dtypes():

    # verify that pandas is recasting numpy ints as python ints when converting to dict
    # (numpy ints invalid for shapefiles)
    d = pd.DataFrame(np.ones((3, 3)),
                     dtype=int).astype(object).to_dict(orient='records')
    for i in range(3):
        assert isinstance(d[i][0], int)

    df = pd.DataFrame({'r': np.arange(100), 'c': np.arange(100)})
    df2shp(df, 'temp/ints.dbf')
    df2 = shp2df('temp/ints.dbf')
    assert True
    def __init__(self, dir, variable='tmin', scenarios=['20c3m', 'early', 'late'],
                 shapefile='', shapefile_hru_col='nhru'):

        tminfiles = [os.path.join(dir, f) for f in os.listdir(dir) if variable in f]

        if shapefile is not None:
            shp = shp2df(shapefile)
            try:
                shp.sort(shapefile_hru_col, inplace=True)
                self.geometry = shp[['geometry']]
                self.geometry.index = np.arange(len(shp)) + 1
            except Exception, e:
                print Exception, e
                print '\nPlease supply an index field relating shapefile geometries ' \
                      'to columns in GDP data (e.g. hru number)'
Exemple #10
0
    def __init__(
        self,
        NHDFlowline,
        PlusFlowlineVAA,
        PlusFlow,
        mf_grid=None,
        mf_grid_node_col=None,
        nrows=None,
        ncols=None,
        mfdis=None,
        xul=None,
        yul=None,
        rot=0,
        model_domain=None,
        flowlines_proj4=None,
        mfgrid_proj4=None,
        domain_proj4=None,
        mf_units_mult=1,
    ):
        """Class for working with information from NHDPlus v2.
        See the user's guide for more information:
        <http://www.horizon-systems.com/NHDPlus/NHDPlusV2_documentation.php#NHDPlusV2 User Guide>

        Parameters
        ==========
        NHDFlowline : str, list of strings or dataframe
            Shapefile, list of shapefiles, or dataframe defining SFR network;
            assigned to the Flowline attribute.
        PlusFlowlineVAA : str, list of strings or dataframe
            DBF file, list of DBF files with NHDPlus attribute information;
            assigned to PlusFlowlineVAA attribute.
        PlusFlow : str, list of strings or dataframe
            DBF file, list of DBF files with routing information;
            assigned to PlusFlow attribute.
        mf_grid : str or dataframe
            Shapefile or dataframe containing MODFLOW grid
        mf_grid_node_col : str
            Column in grid shapefile or dataframe with unique node numbers.
            In case the grid isn't sorted!
            (which will result in mixup if rows and columns are assigned later using the node numbers)
        nrows : int
            (structured grids) Number of model rows
        ncols : int
            (structured grids) Number of model columns
        mfdis : str
            MODFLOW discretization file (not yet supported for this class)
        xul : float, optional
            x offset of upper left corner of grid. Only needed if using mfdis instead of shapefile
        yul : float, optional
            y offset of upper left corner of grid. Only needed if using mfdis instead of shapefile
        rot : float, optional (default 0)
            Grid rotation; only needed if using mfdis instead of shapefile.
        model_domain : str (shapefile) or shapely polygon, optional
            Polygon defining area in which to create SFR cells.
            Default is to create SFR at all intersections between the model grid and NHD flowlines.
        flowlines_proj4 : str, optional
            Proj4 string for coordinate system of NHDFlowlines.
            Only needed if flowlines are supplied in a dataframe.
        domain_proj4 : str, optional
            Proj4 string for coordinate system of model_domain.
            Only needed if model_domain is supplied as a polygon.
        mf_units_mult : float
            multiplier to convert GIS units to MODFLOW units
        """
        self.Flowline = NHDFlowline
        self.PlusFlowlineVAA = PlusFlowlineVAA

        self.PlusFlow = PlusFlow
        self.fl_cols = [
            "COMID",
            "FCODE",
            "FDATE",
            "FLOWDIR",
            "FTYPE",
            "GNIS_ID",
            "GNIS_NAME",
            "LENGTHKM",
            "REACHCODE",
            "RESOLUTION",
            "WBAREACOMI",
            "geometry",
        ]
        self.pfvaa_cols = ["ArbolateSu", "Hydroseq", "DnHydroseq", "LevelPathI", "StreamOrde"]

        self.mf_grid = mf_grid
        self.model_domain = model_domain
        self.nrows = nrows
        self.ncols = ncols
        self.mfdis = mfdis
        self.xul = xul
        self.yul = yul
        self.rot = rot
        self.mf_units_mult = mf_units_mult
        self.GISunits = None
        self.to_km = None  # converts GIS units to km for arbolate sum

        self.fl_proj4 = flowlines_proj4
        self.mf_grid_proj4 = mfgrid_proj4
        self.domain_proj4 = domain_proj4

        print "Reading input..."
        # handle dataframes or shapefiles as arguments
        # get proj4 for any shapefiles that are submitted
        for attr, input in {"fl": NHDFlowline, "pf": PlusFlow, "pfvaa": PlusFlowlineVAA, "grid": mf_grid}.iteritems():
            if isinstance(input, pd.DataFrame):
                self.__dict__[attr] = input
            else:
                self.__dict__[attr] = shp2df(input)
        if isinstance(model_domain, Polygon):
            self.domain = model_domain
        else:
            self.domain = shape(fiona.open(model_domain).next()["geometry"])
            self.domain_proj4 = get_proj4(model_domain)

        # sort and pair down the grid
        if mf_grid_node_col is not None:
            self.grid.sort(mf_grid_node_col, inplace=True)
            self.grid.index = self.grid[mf_grid_node_col].values
        self.grid = self.grid[["geometry"]]

        # get projections
        if self.mf_grid_proj4 is None and not isinstance(mf_grid, pd.DataFrame):
            self.mf_grid_proj4 = get_proj4(mf_grid)
        if self.fl_proj4 is None:
            if isinstance(NHDFlowline, list):
                self.fl_proj4 = get_proj4(NHDFlowline[0])
            elif not isinstance(NHDFlowline, pd.DataFrame):
                self.fl_proj4 = get_proj4(NHDFlowline)

        # set the indices
        for attr, index in {"fl": "COMID", "pfvaa": "ComID"}.iteritems():
            if not self.__dict__[attr].index.name == index:
                self.__dict__[attr].index = self.__dict__[attr][index]

        # first check that grid is in projected units
        if self.mf_grid_proj4.split("proj=")[1].split()[0].strip() == "longlat":
            raise ProjectionError(self.mf_grid)

        # reproject the NHD Flowlines and model domain to model grid if they aren't
        # (prob a better way to check for same projection)

        # set GIS units from modflow grid projection (used for arbolate sum computation)
        # assumes either m or ft!
        self.GISunits = parse_proj4_units(self.mf_grid_proj4)
        self.to_km = [0.001 if self.GISunits == "m" else 0.001 / 0.3048][0]

        if different_projections(self.fl_proj4, self.mf_grid_proj4):
            print "reprojecting NHDFlowlines from\n{}\nto\n{}...".format(self.fl_proj4, self.mf_grid_proj4)
            self.fl["geometry"] = projectdf(self.fl, self.fl_proj4, self.mf_grid_proj4)

        if model_domain is not None and different_projections(self.domain_proj4, self.mf_grid_proj4):
            print "reprojecting model domain from\n{}\nto\n{}...".format(self.domain_proj4, self.mf_grid_proj4)
            self.domain = project(self.domain, self.domain_proj4, self.mf_grid_proj4)
Exemple #11
0
    def make_collection(self, shp, index_field=None,
                        s=20, fc='0.8', ec='k', lw=0.5, alpha=0.5,
                        color_field=None,
                        cbar=False, clim=(), cmap='jet', cbar_label=None,
                        simplify_patches=100,
                        zorder=5,
                        convert_coordinates=1,
                        remove_offset=True,
                        collection_name=None,
                        **kwargs):

        if collection_name is None:
            collection_name = os.path.split(shp)[-1].split('.')[0]
        df = shp2df(shp)

        if index_field is not None:
            df.index = df[index_field]

        proj4 = get_proj4(shp)

        if proj4 != self.proj4:
            df['geometry'] = projectdf(df, proj4, self.proj4)

        # convert projected coordinate units and/or get rid z values if the shapefile has them
        if convert_coordinates != 1 or df.iloc[0]['geometry'].has_z:
            df['geometry'] = [transform(lambda x, y, z=None: (x * convert_coordinates,
                                                              y * convert_coordinates), g)
                              for g in df.geometry]

        # remove model offset from projected coordinates (llcorner = 0,0)
        if remove_offset:
            df['geometry'] = [translate(g,
                                        -1 * self.extent_proj[0],
                                        -1 * self.extent_proj[1]) for g in df.geometry]

        if simplify_patches > 0:
            df['geometry'] = [g.simplify(simplify_patches) for g in df.geometry]

        if 'Polygon' in df.iloc[0].geometry.type:
            print("building PatchCollection...")
            inds = []
            patches = []
            for i, g in df.geometry.iteritems():
                if g.type != 'MultiPolygon':
                    inds.append(i)
                    patches.append(PolygonPatch(g))
                else:
                    for part in g.geoms:
                        inds.append(i)
                        patches.append(PolygonPatch(part))

            collection = PatchCollection(patches, cmap=cmap,
                                         facecolor=fc, linewidth=lw, edgecolor=ec, alpha=alpha,
                                         )

        elif 'LineString' in df.geometry[0].type:
            print("building LineCollection...")
            inds = []
            lines = []
            for i, g in df.geometry.iteritems():
                if 'Multi' not in g.type:
                    x, y = g.xy
                    inds.append(i)
                    lines.append(list(zip(x, y)))
                # plot each line in a multilinestring
                else:
                    for l in g:
                        x, y = l.xy
                        inds.append(i)
                        lines.append(list(zip(x, y)))

            collection = LineCollection(lines, colors=ec, linewidths=lw, alpha=alpha, zorder=zorder, **kwargs)
            #lc.set_edgecolor(ec)
            #lc.set_alpha(alpha)
            #lc.set_lw(lw)

            # set the color scheme (could set line thickness by same proceedure)
            if fc in df.columns:
                colors = np.array([df[fc][ind] for ind in inds])
                collection.set_array(colors)

        else:
            print("plotting points...")
            x = np.array([g.x for g in df.geometry])
            y = np.array([g.y for g in df.geometry])

            collection = self.ax.scatter(x, y, s=s, c=fc, ec=ec, lw=lw, alpha=alpha, zorder=zorder, **kwargs)
            inds = list(range(len(x)))

        self.layers[collection_name] = df
        self.collections[collection_name] = collection
        self.collection_inds[collection_name] = inds

        return collection
Exemple #12
0
def get_upstream_area(points,
                      PlusFlow,
                      NHDFlowlines,
                      NHDCatchments,
                      nearfield=None):
    """For each point in points, get upstream drainage area in km2, using
    NHDPlus PlusFlow routing table and NHDPlus Catchment areas. Upstream area
    within the containing catchment is estimated as a fraction of proportional
    to the distance of the measurment point along the NHDPlus Flowline associated with the catchment.

    Parameters
    ----------
    points : list of shapely Point objects
        Locations of streamflow measurements. Must be in same coordinate system as NHDCatchments
    PlusFlow : str or list of strings
        Path(s) to PlusFlow routing tables
    NHDFlowlines : str or list of strings
        Path(s) to Flowlines shapefiles
    NHDCatchments : str or list of strings
        Path(s) to Catchment shapefiles
    nearfield : shapefile or shapely Polygon
        Nearfield area of model. Used to filter NHDPlus flowlines and catchments to
        greatly speed reading them in and finding the COMIDs associated with points.
        Must be in same coordinate system as points and NHDPlus shapefiles.

    Returns
    -------
    upstream_area : list
        List of areas in km2, for each point in points.
    """
    try:
        import fiona
        from shapely.geometry import LineString, Polygon, shape
        from GISio import shp2df
    except ImportError:
        print('This method requires fiona, shapely and GIS_utils.')

    if isinstance(nearfield, Polygon):
        bbox = nearfield.bounds
    elif isinstance(nearfield, str):
        bbox = shape(fiona.open(nearfield).next()['geometry']).bounds()
    else:
        bbox = None

    # dialate the bounding box by half, so that features aren't missed.
    x = 0.5 * (bbox[2] - bbox[0])
    y = 0.5 * (bbox[3] - bbox[1])
    bbox = (bbox[0] - x, bbox[1] - y, bbox[2] + x, bbox[3] + y)

    pf = shp2df(PlusFlow)
    fl = shp2df(NHDFlowlines, index='COMID', filter=bbox)
    cmt = shp2df(NHDCatchments, index='FEATUREID', filter=bbox)

    # find the catchment containing each point in points
    comids = []
    for p in points:
        comids += cmt.FEATUREID[np.array([p.within(g)
                                          for g in cmt.geometry])].tolist()

    upstream_area = []
    for i, comid in enumerate(comids):
        comids = {comid}
        upstream = [comid]
        for j in range(1000):
            upstream = set(pf.ix[pf.TOCOMID.isin(upstream),
                                 'FROMCOMID']).difference({0})
            if len(upstream) == 0:
                break
            comids.update(upstream)

        total_upstream_area = cmt.ix[comids, 'AreaSqKM'].sum()
        if comid == 11951607:
            j = 2
        # estimate fraction of containing catchment that is upstream
        # by finding closest vertex on flowline,
        # and then dividing upstream length by downstream length
        #X = np.array(fl.ix[comid, 'geometry'].coords.xy[0])
        #Y = np.array(fl.ix[comid, 'geometry'].coords.xy[1])
        g = points[i]  # misc measurement point
        #i = np.argmin(np.sqrt((X-g.x)**2 + (Y-g.y)**2)) # closest point on flowline

        # should be able to just project point onto flowline and divide by total length
        l = fl.ix[comid, 'geometry']
        frac = l.project(g) / l.length
        #frac = LineString(zip(X[:i+1], Y[:i+1])).length/LineString(zip(X[i:], Y[i:])).length
        upstream_in_catchment = cmt.ix[comid, 'AreaSqKM'] * frac
        total_upstream_area += upstream_in_catchment
        upstream_area.append(total_upstream_area)

    return upstream_area
Exemple #13
0
from flopy.utils.reference import SpatialReference

# basic grid parameters
name = 'map_test'
epsg = 5070
xul, yul = 520487.3, 1194668.3
nrow, ncol = 20, 20
dxy = 5280 * .3048
buf = 1e4
bounds = xul - buf, \
         yul - dxy * nrow - buf, \
         xul + dxy * ncol + buf, \
         yul + buf

# make version of preprocessed flowlines filtered to bounding box
df = shp2df('/Users/aleaf/Documents/MAP/repos/sfr_output/preprocessed/flowlines_gt20km/flowlines_gt20km_edited.shp',
            filter=bounds)
df2shp(df, 'data/{}_flowlines.shp'.format(name), epsg=epsg)

# make a spatial reference object defining the grid
sr = SpatialReference(delr=np.ones(ncol, dtype=float) * dxy,
                      delc=np.ones(nrow, dtype=float) * dxy,
                      xul=xul, yul=yul, epsg=epsg)
# export sr info to json file
model_info = sr.attribute_dict
model_info['nrow'] = sr.nrow
model_info['ncol'] = sr.ncol
model_info['delr'] = sr.delr[0]
model_info['delc'] = sr.delc[0]
model_info['epsg'] = sr.epsg

with open('data/{}_grid.json'.format(name), 'w') as output:
Exemple #14
0
    def make_collection(self,
                        shp,
                        index_field=None,
                        s=20,
                        fc='0.8',
                        ec='k',
                        lw=0.5,
                        alpha=0.5,
                        color_field=None,
                        cbar=False,
                        clim=(),
                        cmap='jet',
                        cbar_label=None,
                        simplify_patches=100,
                        zorder=5,
                        convert_coordinates=1,
                        remove_offset=True,
                        collection_name=None,
                        **kwargs):

        if collection_name is None:
            collection_name = os.path.split(shp)[-1].split('.')[0]
        df = shp2df(shp)

        if index_field is not None:
            df.index = df[index_field]

        proj4 = get_proj4(shp)

        if proj4 != self.proj4:
            df['geometry'] = projectdf(df, proj4, self.proj4)

        # convert projected coordinate units and/or get rid z values if the shapefile has them
        if convert_coordinates != 1 or df.iloc[0]['geometry'].has_z:
            df['geometry'] = [
                transform(lambda x, y, z=None:
                          (x * convert_coordinates, y * convert_coordinates),
                          g) for g in df.geometry
            ]

        # remove model offset from projected coordinates (llcorner = 0,0)
        if remove_offset:
            df['geometry'] = [
                translate(g, -1 * self.extent_proj[0],
                          -1 * self.extent_proj[1]) for g in df.geometry
            ]

        if simplify_patches > 0:
            df['geometry'] = [
                g.simplify(simplify_patches) for g in df.geometry
            ]

        if 'Polygon' in df.iloc[0].geometry.type:
            print("building PatchCollection...")
            inds = []
            patches = []
            for i, g in df.geometry.iteritems():
                if g.type != 'MultiPolygon':
                    inds.append(i)
                    patches.append(PolygonPatch(g))
                else:
                    for part in g.geoms:
                        inds.append(i)
                        patches.append(PolygonPatch(part))

            collection = PatchCollection(
                patches,
                cmap=cmap,
                facecolor=fc,
                linewidth=lw,
                edgecolor=ec,
                alpha=alpha,
            )

        elif 'LineString' in df.geometry[0].type:
            print("building LineCollection...")
            inds = []
            lines = []
            for i, g in df.geometry.iteritems():
                if 'Multi' not in g.type:
                    x, y = g.xy
                    inds.append(i)
                    lines.append(list(zip(x, y)))
                # plot each line in a multilinestring
                else:
                    for l in g:
                        x, y = l.xy
                        inds.append(i)
                        lines.append(list(zip(x, y)))

            collection = LineCollection(lines,
                                        colors=ec,
                                        linewidths=lw,
                                        alpha=alpha,
                                        zorder=zorder,
                                        **kwargs)
            #lc.set_edgecolor(ec)
            #lc.set_alpha(alpha)
            #lc.set_lw(lw)

            # set the color scheme (could set line thickness by same proceedure)
            if fc in df.columns:
                colors = np.array([df[fc][ind] for ind in inds])
                collection.set_array(colors)

        else:
            print("plotting points...")
            x = np.array([g.x for g in df.geometry])
            y = np.array([g.y for g in df.geometry])

            collection = self.ax.scatter(x,
                                         y,
                                         s=s,
                                         c=fc,
                                         ec=ec,
                                         lw=lw,
                                         alpha=alpha,
                                         zorder=zorder,
                                         **kwargs)
            inds = list(range(len(x)))

        self.layers[collection_name] = df
        self.collections[collection_name] = collection
        self.collection_inds[collection_name] = inds

        return collection
Exemple #15
0
    def __init__(self, NHDFlowline, PlusFlowlineVAA, PlusFlow,
                 mf_grid=None, mf_grid_node_col=None,
                 nrows=None, ncols=None,
                 mfdis=None, xul=None, yul=None, rot=0,
                 model_domain=None,
                 flowlines_proj4=None, mfgrid_proj4=None, domain_proj4=None,
                 mf_units_mult=1):
        """Class for working with information from NHDPlus v2.
        See the user's guide for more information:
        <http://www.horizon-systems.com/NHDPlus/NHDPlusV2_documentation.php#NHDPlusV2 User Guide>

        Parameters
        ==========
        NHDFlowline : str, list of strings or dataframe
            Shapefile, list of shapefiles, or dataframe defining SFR network;
            assigned to the Flowline attribute.
        PlusFlowlineVAA : str, list of strings or dataframe
            DBF file, list of DBF files with NHDPlus attribute information;
            assigned to PlusFlowlineVAA attribute.
        PlusFlow : str, list of strings or dataframe
            DBF file, list of DBF files with routing information;
            assigned to PlusFlow attribute.
        mf_grid : str or dataframe
            Shapefile or dataframe containing MODFLOW grid
        mf_grid_node_col : str
            Column in grid shapefile or dataframe with unique node numbers.
            In case the grid isn't sorted!
            (which will result in mixup if rows and columns are assigned later using the node numbers)
        nrows : int
            (structured grids) Number of model rows
        ncols : int
            (structured grids) Number of model columns
        mfdis : str
            MODFLOW discretization file (not yet supported for this class)
        xul : float, optional
            x offset of upper left corner of grid. Only needed if using mfdis instead of shapefile
        yul : float, optional
            y offset of upper left corner of grid. Only needed if using mfdis instead of shapefile
        rot : float, optional (default 0)
            Grid rotation; only needed if using mfdis instead of shapefile.
        model_domain : str (shapefile) or shapely polygon, optional
            Polygon defining area in which to create SFR cells.
            Default is to create SFR at all intersections between the model grid and NHD flowlines.
        flowlines_proj4 : str, optional
            Proj4 string for coordinate system of NHDFlowlines.
            Only needed if flowlines are supplied in a dataframe.
        domain_proj4 : str, optional
            Proj4 string for coordinate system of model_domain.
            Only needed if model_domain is supplied as a polygon.
        mf_units_mult : float
            multiplier to convert GIS units to MODFLOW units
        """
        self.Flowline = NHDFlowline
        self.PlusFlowlineVAA = PlusFlowlineVAA

        self.PlusFlow = PlusFlow
        self.fl_cols = ['COMID', 'FCODE', 'FDATE', 'FLOWDIR',
                          'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'LENGTHKM',
                          'REACHCODE', 'RESOLUTION', 'WBAREACOMI', 'geometry']
        self.pfvaa_cols = ['ArbolateSu', 'Hydroseq', 'DnHydroseq',
                      'LevelPathI', 'StreamOrde']

        self.mf_grid = mf_grid
        self.model_domain = model_domain
        self.nrows = nrows
        self.ncols = ncols
        self.mfdis = mfdis
        self.xul = xul
        self.yul = yul
        self.rot = rot
        self.mf_units_mult = mf_units_mult
        self.GISunits = None
        self.to_km = None # converts GIS units to km for arbolate sum

        self.fl_proj4 = flowlines_proj4
        self.mf_grid_proj4 = mfgrid_proj4
        self.domain_proj4 = domain_proj4

        print "Reading input..."
        # handle dataframes or shapefiles as arguments
        # get proj4 for any shapefiles that are submitted
        for attr, input in {'fl': NHDFlowline,
                            'pf': PlusFlow,
                            'pfvaa': PlusFlowlineVAA,
                            'grid': mf_grid}.iteritems():
            if isinstance(input, pd.DataFrame):
                self.__dict__[attr] = input
            else:
                self.__dict__[attr] = shp2df(input)
        if isinstance(model_domain, Polygon):
            self.domain = model_domain
        elif isinstance(model_domain, str):
            self.domain = shape(fiona.open(model_domain).next()['geometry'])
            self.domain_proj4 = get_proj4(model_domain)
        else:
            #print 'setting model domain to extent of grid...'
            #self.domain = unary_union(self.grid.geometry.tolist())

        # sort and pair down the grid
        if mf_grid_node_col is not None:
            self.grid.sort(mf_grid_node_col, inplace=True)
            self.grid.index = self.grid[mf_grid_node_col].values
        self.grid = self.grid[['geometry']]

        # get projections
        if self.mf_grid_proj4 is None and not isinstance(mf_grid, pd.DataFrame):
            self.mf_grid_proj4 = get_proj4(mf_grid)
        if self.fl_proj4 is None:
            if isinstance(NHDFlowline, list):
                self.fl_proj4 = get_proj4(NHDFlowline[0])
            elif not isinstance(NHDFlowline, pd.DataFrame):
                self.fl_proj4 = get_proj4(NHDFlowline)

        # set the indices
        for attr, index in {'fl': 'COMID', 'pfvaa': 'ComID'}.iteritems():
            if not self.__dict__[attr].index.name == index:
                self.__dict__[attr].index = self.__dict__[attr][index]

        # first check that grid is in projected units
        if self.mf_grid_proj4.split('proj=')[1].split()[0].strip() == 'longlat':
            raise ProjectionError(self.mf_grid)

        # reproject the NHD Flowlines and model domain to model grid if they aren't
        # (prob a better way to check for same projection)

        # set GIS units from modflow grid projection (used for arbolate sum computation)
        # assumes either m or ft!
        self.GISunits = parse_proj4_units(self.mf_grid_proj4)
        self.to_km = [0.001 if self.GISunits == 'm' else 0.001/0.3048][0]

        if different_projections(self.fl_proj4, self.mf_grid_proj4):
            print "reprojecting NHDFlowlines from\n{}\nto\n{}...".format(self.fl_proj4, self.mf_grid_proj4)
            self.fl['geometry'] = projectdf(self.fl, self.fl_proj4, self.mf_grid_proj4)

        if model_domain is not None \
                and different_projections(self.domain_proj4, self.mf_grid_proj4):
            print "reprojecting model domain from\n{}\nto\n{}...".format(self.domain_proj4, self.mf_grid_proj4)
            self.domain = project(self.domain, self.domain_proj4, self.mf_grid_proj4)



    def list_updown_comids(self):

        # setup local variables and cull plusflow table to comids in model
        comids = self.df.index.tolist()
        pf = self.pf.ix[(self.pf.FROMCOMID.isin(comids)) |
                        (self.pf.TOCOMID.isin(comids))].copy()

        # subset PlusFlow entries for comids that are not in flowlines dataset
        # comids may be missing because they are outside of the model
        # or if the flowlines dataset was edited (resulting in breaks in the routing)
        missing_tocomids = ~pf.TOCOMID.isin(comids) & (pf.TOCOMID != 0)
        missing = pf.ix[missing_tocomids, ['FROMCOMID', 'TOCOMID']].copy()
        # recursively crawl the PlusFlow table
        # to try to find a downstream comid in the flowlines dataest
        missing['nextCOMID'] = [find_next(tc, self.pf, comids) for tc in missing.TOCOMID]
        pf.loc[missing_tocomids, 'TOCOMID'] = missing.nextCOMID

        # set any remaining comids not in model to zero
        # (outlets or inlets from outside model)
        #pf.loc[~pf.TOCOMID.isin(comids), 'TOCOMID'] = 0 (these should all be handled above)
        pf.loc[~pf.FROMCOMID.isin(comids), 'FROMCOMID'] = 0
        tocomid = pf.TOCOMID.values
        fromcomid = pf.FROMCOMID.values
        self.df['dncomids'] = [tocomid[fromcomid == c].tolist() for c in comids]
        self.df['upcomids'] = [fromcomid[tocomid == c].tolist() for c in comids]

    def assign_segments(self):

        # create segment numbers
        self.df.sort('COMID', inplace=True)
        self.df['segment'] = np.arange(len(self.df)) + 1

        # reduce dncomids to 1 per segment
        braids = self.df[np.array([len(d) for d in self.df.dncomids]) > 1]
        for i, r in braids.iterrows():
            # select the dncomid that has a matching levelpath
            matching_levelpaths = np.array(r.dncomids)[self.df.ix[self.df.COMID.isin(r.dncomids), 'LevelPathI'].values
                                             == r.LevelPathI]
            # if none match, select the first dncomid
            if len(matching_levelpaths) == 0:
                matching_levelpaths = [r.dncomids[0]]

            self.df.set_value(i, 'dncomids', matching_levelpaths)

        # assign upsegs and outsegs based on NHDPlus routing
        self.df['upsegs'] = [[self.df.segment[c] if c !=0 else 0 for c in comids] for comids in self.df.upcomids]
        self.df['dnsegs'] = [[self.df.segment[c] if c !=0 else 0 for c in comids] for comids in self.df.dncomids]

        # make a column of outseg integers
        self.df['outseg'] = [d[0] for d in self.df.dnsegs]
        self.df.sort('segment', inplace=True)

    def to_sfr(self, roughness=0.037, streambed_thickness=1, streambedK=1,
               icalc=1,
               iupseg=0, iprior=0, nstrpts=0, flow=0, runoff=0, etsw=0, pptsw=0,
               roughch=0, roughbk=0, cdepth=0, fdepth=0, awdth=0, bwdth=0):


        # create a working dataframe
        self.df = self.fl[self.fl_cols].join(self.pfvaa[self.pfvaa_cols], how='inner')

        print '\nclipping flowlines to active area...'
        inside = [g.intersects(self.domain) for g in self.df.geometry]
        self.df = self.df.ix[inside].copy()
        self.df.sort('COMID', inplace=True)
        flowline_geoms = self.df.geometry.tolist()
        grid_geoms = self.grid.geometry.tolist()

        print "intersecting flowlines with grid cells..."
        grid_intersections = GISops.intersect_rtree(grid_geoms, flowline_geoms)

        print "setting up segments..."
        self.list_updown_comids()
        self.assign_segments()
        fl_segments = self.df.segment.tolist()
        fl_comids = self.df.COMID.tolist()

        m1 = make_mat1(flowline_geoms, fl_segments, fl_comids, grid_intersections, grid_geoms)

        print "computing widths..."
        m1['length'] = np.array([g.length for g in m1.geometry])
        lengths = m1[['segment', 'length']].copy()
        groups = lengths.groupby('segment')
        reach_asums = np.concatenate([np.cumsum(grp.length.values[::-1])[::-1] for s, grp in groups])
        segment_asums = np.array([self.df.ArbolateSu.values[s-1] for s in m1.segment.values])
        reach_asums = -1 * self.to_km * reach_asums + segment_asums # arbolate sums are computed in km
        width = width_from_arbolate(reach_asums) # widths are returned in m
        if self.GISunits != 'm':
            width = width / 0.3048
        m1['width'] = width * self.mf_units_mult
        m1['length'] = m1.length * self.mf_units_mult

        m1['roughness'] = roughness
        m1['sbthick'] = streambed_thickness
        m1['sbK'] = streambedK
        m1['sbtop'] = 0

        if self.nrows is not None:
            m1['row'] = np.floor(m1.node / self.ncols) + 1
        if self.ncols is not None:
            column = m1.node.values % self.ncols
            column[column == 0] = self.ncols # last column has remainder of 0
            m1['column'] = column
        m1['layer'] = 1

        self.m1 = m1

        print "setting up Mat2..."
        self.m2 = self.df[['segment', 'outseg']]
        self.m2['icalc'] = icalc
        self.m2.index = self.m2.segment
        print 'Done'

    def write_tables(self, basename='SFR'):
        """Write tables with SFR reach (Mat1) and segment (Mat2) information out to csv files.

        Parameters
        ----------
        basename: string
            e.g. Mat1 is written to <basename>Mat1.csv
        """
        m1_cols = ['node', 'layer', 'segment', 'reach', 'sbtop', 'width', 'length', 'sbthick', 'sbK', 'roughness', 'reachID']
        m2_cols = ['segment', 'icalc', 'outseg']
        if self.nrows is not None:
            m1_cols.insert(1, 'row')

        if self.ncols is not None:
            m1_cols.insert(2, 'column')
        print "writing Mat1 to {0}{1}, Mat2 to {0}{2}".format(basename, 'Mat1.csv', 'Mat2.csv')
        self.m1[m1_cols].to_csv(basename + 'Mat1.csv', index=False)
        self.m2[m2_cols].to_csv(basename + 'Mat2.csv', index=False)

    def write_linework_shapefile(self, basename='SFR'):
        """Write a shapefile containing linework for each SFR reach,
        with segment, reach, model node number, and NHDPlus COMID attribute information

        Parameters
        ----------
        basename: string
            Output will be written to <basename>.shp
        """
        print "writing reach geometries to {}".format(basename+'.shp')
        df2shp(self.m1[['reachID', 'node', 'segment', 'reach', 'comid', 'geometry']],
               basename+'.shp', proj4=self.mf_grid_proj4)
Exemple #16
0
    def __init__(self, NHDFlowline=None, PlusFlowlineVAA=None, PlusFlow=None, NHDFcode=None,
                 elevslope=None,
                 mf_grid=None, mf_grid_node_col=None,
                 nrows=None, ncols=None,
                 mfdis=None, xul=None, yul=None, rot=0,
                 model_domain=None,
                 flowlines_proj4=None, mfgrid_proj4=None, domain_proj4=None,
                 mf_units='feet'):
        """Class for working with information from NHDPlus v2.
        See the user's guide for more information:
        <http://www.horizon-systems.com/NHDPlus/NHDPlusV2_documentation.php#NHDPlusV2 User Guide>

        Parameters
        ==========
        NHDFlowline : str, list of strings or dataframe
            Shapefile, list of shapefiles, or dataframe defining SFR network;
            assigned to the Flowline attribute.
        PlusFlowlineVAA : str, list of strings or dataframe
            DBF file, list of DBF files with NHDPlus attribute information;
            assigned to PlusFlowlineVAA attribute.
        PlusFlow : str, list of strings or dataframe
            DBF file, list of DBF files with routing information;
            assigned to PlusFlow attribute.
        mf_grid : str or dataframe
            Shapefile or dataframe containing MODFLOW grid
        mf_grid_node_col : str
            Column in grid shapefile or dataframe with unique node numbers.
            In case the grid isn't sorted!
            (which will result in mixup if rows and columns are assigned later using the node numbers)
        nrows : int
            (structured grids) Number of model rows
        ncols : int
            (structured grids) Number of model columns
        mfdis : str
            MODFLOW discretization file (not yet supported for this class)
        xul : float, optional
            x offset of upper left corner of grid. Only needed if using mfdis instead of shapefile
        yul : float, optional
            y offset of upper left corner of grid. Only needed if using mfdis instead of shapefile
        rot : float, optional (default 0)
            Grid rotation; only needed if using mfdis instead of shapefile.
        model_domain : str (shapefile) or shapely polygon, optional
            Polygon defining area in which to create SFR cells.
            Default is to create SFR at all intersections between the model grid and NHD flowlines.
        flowlines_proj4 : str, optional
            Proj4 string for coordinate system of NHDFlowlines.
            Only needed if flowlines are supplied in a dataframe.
        domain_proj4 : str, optional
            Proj4 string for coordinate system of model_domain.
            Only needed if model_domain is supplied as a polygon.
        mf_units : str, 'feet' or 'meters'
            Length units of MODFLOW model
        """
        self.Flowline = NHDFlowline
        self.PlusFlowlineVAA = PlusFlowlineVAA

        self.PlusFlow = PlusFlow
        self.elevslope = elevslope
        self.fl_cols = ['COMID', 'FCODE', 'FDATE', 'FLOWDIR',
                          'FTYPE', 'GNIS_ID', 'GNIS_NAME', 'LENGTHKM',
                          'REACHCODE', 'RESOLUTION', 'WBAREACOMI', 'geometry']
        self.pfvaa_cols = ['ArbolateSu', 'Hydroseq', 'DnHydroseq',
                      'LevelPathI', 'StreamOrde']

        self.mf_grid = mf_grid
        self.model_domain = model_domain
        self.nrows = nrows
        self.ncols = ncols
        self.mfdis = mfdis
        self.xul = xul
        self.yul = yul
        self.rot = rot

        # unit conversions (set below after grid projection is verified)
        self.mf_units = mf_units
        self.mf_units_mult = 1.0 # go from GIS units to model units
        self.GISunits = None #
        self.to_km = None # converts GIS units to km for arbolate sum

        self.fl_proj4 = flowlines_proj4
        self.mf_grid_proj4 = mfgrid_proj4
        self.domain_proj4 = domain_proj4

        print("Reading input...")
        # handle dataframes or shapefiles as arguments
        # get proj4 for any shapefiles that are submitted
        for attr, input in {'fl': NHDFlowline,
                            'pf': PlusFlow,
                            'pfvaa': PlusFlowlineVAA,
                            'elevs': elevslope,
                            'grid': mf_grid}.items():
            if isinstance(input, pd.DataFrame):
                self.__dict__[attr] = input
            else:
                self.__dict__[attr] = shp2df(input)
        if isinstance(model_domain, Polygon):
            self.domain = model_domain
        elif isinstance(model_domain, str):
            self.domain = shape(fiona.open(model_domain).next()['geometry'])
            self.domain_proj4 = get_proj4(model_domain)
        else:
            print('setting model domain to extent of grid ' \
                  'by performing unary union of grid cell geometries...\n' \
                  '(may take a few minutes for large grids)')
            # add tiny buffer to overcome floating point errors in gridcell geometries
            # (otherwise a multipolygon feature may be returned)
            geoms = [g.buffer(0.001) for g in self.grid.geometry.tolist()]
            self.domain = unary_union(geoms)

        # sort and pair down the grid
        if mf_grid_node_col is not None:
            self.grid.sort_values(by=mf_grid_node_col, inplace=True)
            self.grid.index = self.grid[mf_grid_node_col].values
        else:
            print('Warning: Node field for grid shape file not supplied. \
                  Node numbers will be assigned using index. \
                  This may result in incorrect location of SFR reaches.')
        self.grid = self.grid[['geometry']]

        # get projections
        if self.mf_grid_proj4 is None and not isinstance(mf_grid, pd.DataFrame):
            self.mf_grid_proj4 = get_proj4(mf_grid)
        if self.fl_proj4 is None:
            if isinstance(NHDFlowline, list):
                self.fl_proj4 = get_proj4(NHDFlowline[0])
            elif not isinstance(NHDFlowline, pd.DataFrame):
                self.fl_proj4 = get_proj4(NHDFlowline)

        # set the indices
        for attr, index in {'fl': 'COMID',
                            'pfvaa': 'ComID',
                            'elevs': 'COMID'}.items():
            if not self.__dict__[attr].index.name == index:
                self.__dict__[attr].index = self.__dict__[attr][index]

        # first check that grid is in projected units
        if self.mf_grid_proj4.split('proj=')[1].split()[0].strip() == 'longlat':
            raise ProjectionError(self.mf_grid)

        # reproject the NHD Flowlines and model domain to model grid if they aren't
        # (prob a better way to check for same projection)

        # set GIS units from modflow grid projection (used for arbolate sum computation)
        # assumes either m or ft!
        self.GISunits = parse_proj4_units(self.mf_grid_proj4)
        self.mf_units_mult = 1/0.3048 if self.GISunits == 'm' and self.mf_units == 'feet' \
                             else 0.3048 if not self.GISunits == 'm' and self.mf_units == 'meters' \
                             else 1.0
        self.to_km = 0.001 if self.GISunits == 'm' else 0.001/0.3048

        # convert the elevations from elevslope table
        self.elevs['Max'] = self.elevs.MAXELEVSMO * self.convert_elevslope_to_model_units[self.mf_units]
        self.elevs['Min'] = self.elevs.MINELEVSMO * self.convert_elevslope_to_model_units[self.mf_units]

        if different_projections(self.fl_proj4, self.mf_grid_proj4):
            print("reprojecting NHDFlowlines from\n{}\nto\n{}...".format(self.fl_proj4, self.mf_grid_proj4))
            self.fl['geometry'] = projectdf(self.fl, self.fl_proj4, self.mf_grid_proj4)

        if model_domain is not None \
                and different_projections(self.domain_proj4, self.mf_grid_proj4):
            print("reprojecting model domain from\n{}\nto\n{}...".format(self.domain_proj4, self.mf_grid_proj4))
            self.domain = project(self.domain, self.domain_proj4, self.mf_grid_proj4)