def check_grid_intersection(self, sfr_linework_shapefile=None): print '\Checking SFR cell geometries for consistancy with linework...' if sfr_linework_shapefile is None: print 'Need linework to check SFR cell geometries.' return else: import GISio df = GISio.shp2df(sfr_linework_shapefile) try: self.lines = df[['segment', 'reach', 'geometry']].copy() self.lines.sort(['segment', 'reach'], inplace=True) except IndexError: print 'Linework shapfile must have segment and reach information!' self._get_sfr_cell_geoms() self.m1.sort(['segment', 'reach'], inplace=True) lines, cells = self.lines.geometry.tolist(), self.m1.geometry.tolist() intersections = np.array([lines[i].intersects(cell) for i, cell in enumerate(cells)]) nmismatch = len(self.m1[~intersections]) if nmismatch > 0: reportfile = 'bad_intersections.csv' print "{} SFR reaches don't coincide with linework in {}!" \ "see {}.".format(nmismatch, sfr_linework_shapefile, reportfile) self.m1[~intersections].to_csv(reportfile, index=False) else: print 'passed.'
def check_grid_intersection(self, sfr_linework_shapefile=None): print('\Checking SFR cell geometries for consistancy with linework...') if sfr_linework_shapefile is None: print('Need linework to check SFR cell geometries.') return else: import GISio df = GISio.shp2df(sfr_linework_shapefile) try: self.lines = df[['segment', 'reach', 'geometry']].copy() self.lines.sort(['segment', 'reach'], inplace=True) except IndexError: print('Linework shapfile must have segment and reach information!') self._get_sfr_cell_geoms() self.m1.sort(['segment', 'reach'], inplace=True) lines, cells = self.lines.geometry.tolist(), self.m1.geometry.tolist() intersections = np.array([lines[i].intersects(cell) for i, cell in enumerate(cells)]) nmismatch = len(self.m1[~intersections]) if nmismatch > 0: reportfile = 'bad_intersections.csv' print("{} SFR reaches don't coincide with linework in {}!" \ "see {}.".format(nmismatch, sfr_linework_shapefile, reportfile)) self.m1[~intersections].to_csv(reportfile, index=False) else: print('passed.')
def dissolve(inshp, outshp, dissolve_attribute=None): df = GISio.shp2df(inshp) df_out = dissolve_df(df, dissolve_attribute) # write dissolved polygons to new shapefile GISio.df2shp(df_out, outshp, prj=inshp[:-4]+'.prj')
def dissolve(inshp, outshp, dissolve_attribute): df = GISio.shp2df(shp, geometry=True) df_out = dissolve_df(df, dissolve_attribute) # write dissolved polygons to new shapefile GISio.df2shp(df_out, outshp, 'geometry', inshp[:-4]+'.prj')
def join_csv2shp(shapefile, shp_joinfield, csvfile, csv_joinfield, out_shapefile, how='outer'): ''' add attribute information to shapefile from csv file shapefile: shapefile to add attributes to shp_joinfield: attribute name in shapefile on which to make join csvfile: csv file with information to be added to shapefile csv_joinfield: column in csv with entries matching those in shp_joinfield out_shapefile: output; original shapefile is not modified type: pandas join type; see http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.join.html ''' shpdf = GISio.shp2df(shapefile, index=shp_joinfield, geometry=True) csvdf = pd.read_csv(csvfile, index_col=csv_joinfield) print('joining to {}...'.format(csvfile)) joined = shpdf.join(csvdf, how='inner', lsuffix='L', rsuffix='R') # write to shapefile GISio.df2shp(joined, out_shapefile, 'geometry', shapefile[:-4] + '.prj')
def dissolve(inshp, outshp, dissolve_attribute=None): df = GISio.shp2df(inshp) df_out = dissolve_df(df, dissolve_attribute) # write dissolved polygons to new shapefile GISio.df2shp(df_out, outshp, prj=inshp[:-4] + '.prj')
def get_drainage_areas(comids, CumulativeArea_file, units='mi2', areatype='TotDASqKM'): CA = GISio.shp2df(CumulativeArea_file) CA.index = CA.ComID areas_dict = {} for comid in comids: if units == 'mi2': areas_dict[comid] = CA.ix[comid, areatype] * 0.386102 elif units == 'km2': areas_dict[comid] = CA.ix[comid, areatype] return areas_dict
def check_outlets(self, model_domain=None, buffer=100): print "\nChecking for outlets in the model interior..." if model_domain is None: print 'Need a shapefile of the model domain edge to check for interior outlets.\n' \ '(rotated coordinate systems not supported; ' \ 'for rotated grids submit shapefile of unrotated domain in coordinates consistent with model cells.)' return else: import GISio df = GISio.shp2df(model_domain) self.domain = df.iloc[0].geometry self._get_sfr_cell_geoms() print '\nChecking for breaks in routing (outlets) within the SFR network' if 'Outlet' not in self.m1.columns: circular_routing = self.map_outsegs() if circular_routing is not None: print circular_routing print '\nCannot evaluate interior outlets until circular routing is fixed.' return else: pass outlets = np.unique(self.m1.Outlet.values) self.m1.sort(['segment', 'reach'], inplace=True) outlet_nodes = [self.m1.ix[(self.m1.segment == o), 'node'].values[-1] for o in outlets] outlet_geoms = [self.m1.ix[(self.m1.segment == o), 'geometry'].values[-1] for o in outlets] interior_outlets = [n for i, n in enumerate(outlet_nodes) if outlet_geoms[i].buffer(buffer).within(self.domain)] if len(interior_outlets) > 0: print 'Interior outlets found at the following nodes:\n' for i in interior_outlets: print '{} '.format(i), print '\n' else: # make sure that at least 1 SFR cell is inside the domain for g in self.m1.geometry: if g.within(self.domain): print 'passed.' break else: continue print "No SFR cells were inside of the supplied domain! Check domain shapefile coordinates,\n" \ "and that the correct model origin was supplied."
def check_outlets(self, model_domain=None, buffer=100): print("\nChecking for outlets in the model interior...") if model_domain is None: print('Need a shapefile of the model domain edge to check for interior outlets.\n' \ '(rotated coordinate systems not supported; ' \ 'for rotated grids submit shapefile of unrotated domain in coordinates consistent with model cells.)') return else: import GISio df = GISio.shp2df(model_domain) self.domain = df.iloc[0].geometry self._get_sfr_cell_geoms() print('\nChecking for breaks in routing (outlets) within the SFR network') if 'Outlet' not in self.m1.columns: circular_routing = self.map_outsegs() if circular_routing is not None: print(circular_routing) print('\nCannot evaluate interior outlets until circular routing is fixed.') return else: pass outlets = np.unique(self.m1.Outlet.values) self.m1.sort(['segment', 'reach'], inplace=True) outlet_nodes = [self.m1.ix[(self.m1.segment == o), 'node'].values[-1] for o in outlets] outlet_geoms = [self.m1.ix[(self.m1.segment == o), 'geometry'].values[-1] for o in outlets] interior_outlets = [n for i, n in enumerate(outlet_nodes) if outlet_geoms[i].buffer(buffer).within(self.domain)] if len(interior_outlets) > 0: print('Interior outlets found at the following nodes:\n') for i in interior_outlets: print('{} '.format(i), end=' ') print('\n') else: # make sure that at least 1 SFR cell is inside the domain for g in self.m1.geometry: if g.within(self.domain): print('passed.') break else: continue print("No SFR cells were inside of the supplied domain! Check domain shapefile coordinates,\n" \ "and that the correct model origin was supplied.")
def portion_perennial(self, bounds=None): ''' returns new dataframe of COMIDs classified as perennial (True) or ephemeral (False) ``bounds (optional)``: (polygon shapefile) encompassing streams to evaluate ''' streams = self.df[(self.df.FCODE == 46006) | (self.df.FCODE == 46003)].copy() streams['perennial'] = [True if r.FCODE == 46006 else False for i, r in streams.iterrows()] if bounds: bound = GISio.shp2df(bounds, geometry=True).iloc[0].geometry intersected = [g.intersects(bound) for g in streams.geometry] streams = streams.ix[intersected] return streams
def check_outlets(self, model_domain=None, buffer=100): if model_domain is None: print 'Need a shapefile of the model domain edge to check for interior outlets.' return else: import GISio df = GISio.shp2df(model_domain) self.domain = df.iloc[0].geometry if 'geometry' not in self.m1.columns: try: self.get_cell_geometries() except: print "No geometry column in attribute m1; couldn't generate geometries from dis attribute.\n" \ "Read in the DIS file by running read_dis2()." if self.xll == 0 and self.yll == 0: print 'Warning, model origin for SFR object is 0, 0.' print 'Checking for breaks in routing (outlets) within the SFR network' if 'Outlet' not in self.m1.columns: self.map_outsegs() outlets = np.unique(self.m1.Outlet.values) self.m1.sort(['segment', 'reach'], inplace=True) outlet_nodes = [self.m1.ix[(self.m1.segment == o), 'node'].values[-1] for o in outlets] outlet_geoms = [self.m1.ix[(self.m1.segment == o), 'geometry'].values[-1] for o in outlets] interior_outlets = [n for i, n in enumerate(outlet_nodes) if outlet_geoms[i].buffer(buffer).within(self.domain)] if len(interior_outlets) > 0: print 'Interior outlets found at the following nodes:\n' for i in interior_outlets: print '{} '.format(i), else: # make sure that at least 1 SFR cell is inside the domain for g in self.m1.geometry: if g.within(self.domain): print 'passed.' break else: continue print "No SFR cells were inside of the supplied domain! Check domain shapefile coordinates,\n" \ "and that the correct model origin was supplied."
def portion_perennial(self, bounds=None): ''' returns new dataframe of COMIDs classified as perennial (True) or ephemeral (False) ``bounds (optional)``: (polygon shapefile) encompassing streams to evaluate ''' streams = self.df[(self.df.FCODE == 46006) | (self.df.FCODE == 46003)].copy() streams['perennial'] = [ True if r.FCODE == 46006 else False for i, r in streams.iterrows() ] if bounds: bound = GISio.shp2df(bounds, geometry=True).iloc[0].geometry intersected = [g.intersects(bound) for g in streams.geometry] streams = streams.ix[intersected] return streams
def join_csv2shp(shapefile, shp_joinfield, csvfile, csv_joinfield, out_shapefile, how='outer'): ''' add attribute information to shapefile from csv file shapefile: shapefile to add attributes to shp_joinfield: attribute name in shapefile on which to make join csvfile: csv file with information to be added to shapefile csv_joinfield: column in csv with entries matching those in shp_joinfield out_shapefile: output; original shapefile is not modified type: pandas join type; see http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.join.html ''' shpdf = GISio.shp2df(shapefile, index=shp_joinfield, geometry=True) csvdf = pd.read_csv(csvfile, index_col=csv_joinfield) print('joining to {}...'.format(csvfile)) joined = shpdf.join(csvdf, how='inner', lsuffix='L', rsuffix='R') # write to shapefile GISio.df2shp(joined, out_shapefile, 'geometry', shapefile[:-4]+'.prj')
__author__ = 'aleaf' ''' retrieve daily values for a list of NWIS sites ''' import os import pandas as pd from shapely.geometry import Point import flux_targets as ft import sys sys.path.append('D:\\ATLData\\Documents\\GitHub\\GIS_utils\\') import GISio NWIS_daily_values_sites_file = 'D:/ATLData/GFL files/Great_Divide/flux_targets/NWIS_daily_values_sites.txt' output_folder = 'D:/ATLData/GFL files/Great_Divide/flux_targets/daily' model_domain_polygon = 'D:/ATLData/CNNF_Great_Divide/GIS/shps/GD_nearfield.shp' # must be in geographic coordinates! bounds = GISio.shp2df(model_domain_polygon, geometry=True).geometry[0] # read in info from daily values sites file header_text = open(NWIS_daily_values_sites_file).readlines() columns, header_rows = ft.NWIS_header(header_text) df = pd.read_csv(NWIS_daily_values_sites_file, sep='\t', names=columns, skiprows=header_rows) for n in df.site_no: # first test if the site is in the model domain site_location = Point(df[df.site_no == n][['dec_long_va', 'dec_lat_va']].get_values()[0])
arcpy.Clip_analysis(os.path.join(os.getcwd(), 'temp2.shp'), MFdomain, os.path.join(os.getcwd(), 'catchments.shp')) print 'performing spatial join of catchments to SFR cells...' # intersect is way faster than spatial join arcpy.SpatialJoin_analysis(SFR_shapefile, os.path.join(os.getcwd(), 'catchments.shp'), os.path.join(os.getcwd(), 'catchments_joined.shp')) print 'and to model grid (this may take awhile)...' arcpy.SpatialJoin_analysis(MFgrid, os.path.join(os.getcwd(), 'catchments.shp'), os.path.join(os.getcwd(), 'MFgrid_catchments.shp')) # now figure out which SFR segment each catchment should drain to print 'reading {} into pandas dataframe...'.format( os.path.join(os.getcwd(), 'catchments_joined.shp')) SFRcatchments = GISio.shp2df(os.path.join(os.getcwd(), 'catchments_joined.shp')) print 'assigning an SFR segment to each catchment... (this may take awhile)' intersected_catchments = list(np.unique(SFRcatchments.FEATUREID)) segments_dict = {} for cmt in intersected_catchments: try: segment = SFRcatchments[SFRcatchments.FEATUREID == cmt].segment.mode()[0] except: # pandas crashes if mode is called on df of length 1 segment = SFRcatchments[SFRcatchments.FEATUREID == cmt].segment[0] segments_dict[cmt] = segment # can also use values_count() to get a frequency table for segments (reaches) in each catchment print 'building UZF package IRUNBND array from {}'.format(MFgrid) MFgrid_joined = GISio.shp2df(os.path.join(os.getcwd(),
print 'clipping to {}'.format(MFdomain) arcpy.Clip_analysis(os.path.join(os.getcwd(), 'temp2.shp'), MFdomain, os.path.join(os.getcwd(), 'catchments.shp')) print 'performing spatial join of catchments to SFR cells...' # intersect is way faster than spatial join arcpy.SpatialJoin_analysis(SFR_shapefile, os.path.join(os.getcwd(), 'catchments.shp'), os.path.join(os.getcwd(), 'catchments_joined.shp')) print 'and to model grid (this may take awhile)...' arcpy.SpatialJoin_analysis(MFgrid, os.path.join(os.getcwd(), 'catchments.shp'), os.path.join(os.getcwd(), 'MFgrid_catchments.shp')) # now figure out which SFR segment each catchment should drain to print 'reading {} into pandas dataframe...'.format(os.path.join(os.getcwd(), 'catchments_joined.shp')) SFRcatchments = GISio.shp2df(os.path.join(os.getcwd(), 'catchments_joined.shp')) print 'assigning an SFR segment to each catchment... (this may take awhile)' intersected_catchments = list(np.unique(SFRcatchments.FEATUREID)) segments_dict = {} for cmt in intersected_catchments: try: segment = SFRcatchments[SFRcatchments.FEATUREID == cmt].segment.mode()[0] except: # pandas crashes if mode is called on df of length 1 segment = SFRcatchments[SFRcatchments.FEATUREID == cmt].segment[0] segments_dict[cmt] = segment # can also use values_count() to get a frequency table for segments (reaches) in each catchment print 'building UZF package IRUNBND array from {}'.format(MFgrid) MFgrid_joined = GISio.shp2df(os.path.join(os.getcwd(), 'MFgrid_catchments.shp'), geometry=True) MFgrid_joined.index = MFgrid_joined.node
def __init__(self, filename): ''' ``filename``: (string or list) shapefile(s) to load ''' self.filename = filename self.df = GISio.shp2df(self.filename, index='COMID', geometry=True)
def add_fcode(self, fcodefile): self.fcode = GISio.shp2df(fcodefile)
__author__ = 'aleaf' ''' retrieve daily values for a list of NWIS sites ''' import os import pandas as pd from shapely.geometry import Point import flux_targets as ft import sys sys.path.append('D:\\ATLData\\Documents\\GitHub\\GIS_utils\\') import GISio NWIS_daily_values_sites_file = 'D:/ATLData/GFL files/Great_Divide/flux_targets/NWIS_daily_values_sites.txt' output_folder = 'D:/ATLData/GFL files/Great_Divide/flux_targets/daily' model_domain_polygon = 'D:/ATLData/CNNF_Great_Divide/GIS/shps/GD_nearfield.shp' # must be in geographic coordinates! bounds = GISio.shp2df(model_domain_polygon, geometry=True).geometry[0] # read in info from daily values sites file header_text = open(NWIS_daily_values_sites_file).readlines() columns, header_rows = ft.NWIS_header(header_text) df = pd.read_csv(NWIS_daily_values_sites_file, sep='\t', names=columns, skiprows=header_rows) for n in df.site_no: # first test if the site is in the model domain site_location = Point(df[df.site_no == n][['dec_long_va', 'dec_lat_va']].get_values()[0]) if site_location.within(bounds): text = ft.get_nwis(n, '00060') ofp = open(os.path.join(output_folder, '{}.txt'.format(n)), 'w')
def add_PlusFlowVAA(self, pfvaafile, **kwargs): self.pfvaa = GISio.shp2df(pfvaafile, index='COMID') self.df = self.df.join(self.pfvaa, **kwargs)
def check_4gaps_in_routing(self, model_domain=None, tol=0): print("\nChecking for gaps in routing between segments...") if model_domain is None: print('No model_domain supplied. ' \ 'Routing gaps for segments intersecting model domain boundary will not be considered.\n' \ '(rotated coordinate systems not supported; ' \ 'for rotated grids submit shapefile of unrotated domain in coordinates consistent with model cells.)') else: import GISio df = GISio.shp2df(model_domain) self.domain = df.iloc[0].geometry self._get_sfr_cell_geoms() m1 = self.m1.copy() m2 = self.m2.copy() if tol is None: try: tol = np.min([np.min(self.dis.delr), np.min(self.dis.delc)]) except: tol = 0 # get number of reaches for each segment max_reach_numbers = [np.max(m1.reach[m1.segment == s]) for s in m2.segment] # get cell centroids for last reach in each segment end_centroids = [m1.geometry[(m1.segment == s) & (m1.reach == max_reach_numbers[i])].values[0].centroid for i, s in enumerate(m2.segment.tolist())] # get cell centroids for first reach in each segment start_centroids = [m1.geometry[(m1.segment == s) & (m1.reach == 1)].values[0].centroid for s in m2.segment] # compute distances between end reach cell centroid, and cell centroid of outseg reach 1 distances = [end_centroids[i].distance(start_centroids[os-1]) if 0 < os < 999999 else 0 for i, os in enumerate(m2.outseg.astype(int))] m2['routing_distance'] = distances m2['end_reach_geom'] = [m1.geometry[(m1.segment == s) & (m1.reach == max_reach_numbers[i])].values[0] for i, s in enumerate(m2.segment.tolist())] routing = m2.ix[m2.routing_distance > tol, ['segment', 'outseg', 'routing_distance', 'end_reach_geom']]\ .sort('routing_distance', ascending=False) reportfile = 'routing_gaps.csv' if model_domain is not None: # identify routing gaps that do not occur along the model boundary # (HWR code in sfr_classes may route segments along the model boundary to each other) interior_gaps = [g.within(self.domain) for g in routing.end_reach_geom] routing = routing[interior_gaps] if len(routing) > 0: print('{:.0f} gaps in routing greater than {} (default is minimum model cell size)' \ '\nfound that do not coincide with {}. See {}'.format(len(routing), tol, model_domain, reportfile)) routing.drop('end_reach_geom', axis=1).to_csv(reportfile, index=False) return if len(routing) > 0: print('{:.0f} gaps in routing greater than {} found, ' \ 'but these may coincide with model domain boundary. See {}'.format(len(routing), tol, reportfile)) routing.drop('end_reach_geom', axis=1).to_csv(reportfile, index=False) return print('passed.')
for line in text: if line.strip().split('\t')[0] == 'agency_cd': columns = line.strip().split('\t') knt += 2 break else: knt += 1 return columns, knt # read in NWIS site information and study area boundary header_text = open(NWIS_site_info_file).readlines() columns, header_rows = NWIS_header(header_text) df = pd.read_csv(NWIS_site_info_file, sep='\t', names=columns, skiprows=header_rows) bounds = GISio.shp2df(model_domain_polygon, geometry=True).geometry[0] # make geomtries for each station, and drop stations not in the study area df['geometry'] = df.apply(lambda x: Point(x['dec_long_va'], x['dec_lat_va']), axis=1) GISio.df2shp(df, 'D:/ATLData/GFL files/Great_Divide/flux_targets/NWIS_sites_all.shp', prj='epsg:4269') within = [p.within(bounds) for p in df.geometry] df = df[within] GISio.df2shp(df, NWIS_site_info_file[:-4]+'.shp', prj='epsg:4269') # now do spatial join of NWIS locations to NHD comids arcpy.SpatialJoin_analysis(NWIS_site_info_file[:-4]+'.shp', flowlines_clipped, NWIS_site_info_file[:-4]+'_joined.shp', "JOIN_ONE_TO_ONE", "KEEP_ALL", '', "WITHIN_A_DISTANCE", .001) # now read back in and make a csv file for input into flux_targets.py df = GISio.shp2df(NWIS_site_info_file[:-4]+'_joined.shp')
def check_4gaps_in_routing(self, model_domain=None, tol=0): print "\nChecking for gaps in routing between segments..." if model_domain is None: print 'No model_domain supplied. ' \ 'Routing gaps for segments intersecting model domain boundary will not be considered.\n' \ '(rotated coordinate systems not supported; ' \ 'for rotated grids submit shapefile of unrotated domain in coordinates consistent with model cells.)' else: import GISio df = GISio.shp2df(model_domain) self.domain = df.iloc[0].geometry self._get_sfr_cell_geoms() m1 = self.m1.copy() m2 = self.m2.copy() if tol is None: try: tol = np.min([np.min(self.dis.delr), np.min(self.dis.delc)]) except: tol = 0 # get number of reaches for each segment max_reach_numbers = [np.max(m1.reach[m1.segment == s]) for s in m2.segment] # get cell centroids for last reach in each segment end_centroids = [m1.geometry[(m1.segment == s) & (m1.reach == max_reach_numbers[i])].values[0].centroid for i, s in enumerate(m2.segment.tolist())] # get cell centroids for first reach in each segment start_centroids = [m1.geometry[(m1.segment == s) & (m1.reach == 1)].values[0].centroid for s in m2.segment] # compute distances between end reach cell centroid, and cell centroid of outseg reach 1 distances = [end_centroids[i].distance(start_centroids[os-1]) if 0 < os < 999999 else 0 for i, os in enumerate(m2.outseg.astype(int))] m2['routing_distance'] = distances m2['end_reach_geom'] = [m1.geometry[(m1.segment == s) & (m1.reach == max_reach_numbers[i])].values[0] for i, s in enumerate(m2.segment.tolist())] routing = m2.ix[m2.routing_distance > tol, ['segment', 'outseg', 'routing_distance', 'end_reach_geom']]\ .sort('routing_distance', ascending=False) reportfile = 'routing_gaps.csv' if model_domain is not None: # identify routing gaps that do not occur along the model boundary # (HWR code in sfr_classes may route segments along the model boundary to each other) interior_gaps = [g.within(self.domain) for g in routing.end_reach_geom] routing = routing[interior_gaps] if len(routing) > 0: print '{:.0f} gaps in routing greater than {} (default is minimum model cell size)' \ '\nfound that do not coincide with {}. See {}'.format(len(routing), tol, model_domain, reportfile) routing.drop('end_reach_geom', axis=1).to_csv(reportfile, index=False) return if len(routing) > 0: print '{:.0f} gaps in routing greater than {} found, ' \ 'but these may coincide with model domain boundary. See {}'.format(len(routing), tol, reportfile) routing.drop('end_reach_geom', axis=1).to_csv(reportfile, index=False) return print 'passed.'
knt += 2 break else: knt += 1 return columns, knt # read in NWIS site information and study area boundary header_text = open(NWIS_site_info_file).readlines() columns, header_rows = NWIS_header(header_text) df = pd.read_csv(NWIS_site_info_file, sep='\t', names=columns, skiprows=header_rows) bounds = GISio.shp2df(model_domain_polygon, geometry=True).geometry[0] # make geomtries for each station, and drop stations not in the study area df['geometry'] = df.apply(lambda x: Point(x['dec_long_va'], x['dec_lat_va']), axis=1) GISio.df2shp( df, 'D:/ATLData/GFL files/Great_Divide/flux_targets/NWIS_sites_all.shp', prj='epsg:4269') within = [p.within(bounds) for p in df.geometry] df = df[within] GISio.df2shp(df, NWIS_site_info_file[:-4] + '.shp', prj='epsg:4269') # now do spatial join of NWIS locations to NHD comids arcpy.SpatialJoin_analysis(NWIS_site_info_file[:-4] + '.shp',