def update_reach_number_data(self): """ Updates the reach number data based on input files """ #get rapid connect info rapid_connect_table = csv_to_list(self.rapid_connect_file) self.IS_riv_tot = len(rapid_connect_table) self.IS_max_up = max([int(float(row[2])) for row in rapid_connect_table]) #get riv_bas_id info riv_bas_id_table = csv_to_list(self.riv_bas_id_file) self.IS_riv_bas = len(riv_bas_id_table)
def _copy_streamflow_values(self, input_flow_var_name): """ Copies streamflow values from raw output to CF file """ q_var = self.cf_nc.createVariable(self.output_flow_var_name, "f4", (self.output_id_dim_name, "time")) q_var.long_name = "Discharge" q_var.units = "m^3/s" q_var.coordinates = "time lat lon z" q_var.grid_mapping = "crs" q_var.source = ( "Generated by the Routing Application for Parallel " + "computatIon of Discharge (RAPID) river routing model." ) q_var.references = "http://rapid-hub.org/" q_var.comment = "lat, lon, and z values taken at midpoint of river " + "reach feature" log("Copying streamflow values", "INFO") begin_time_step_index = 1 end_time_step_index = -1 for raw_nc_index, raw_nc in enumerate(self.raw_nc_list): if raw_nc_index == 0: end_time_step_index = self.time_len_array[raw_nc_index] else: end_time_step_index = begin_time_step_index + self.time_len_array[raw_nc_index] q_var[:, begin_time_step_index:end_time_step_index] = raw_nc.variables[input_flow_var_name][:].transpose() begin_time_step_index = end_time_step_index # add initial flow to RAPID output file if self.qinit_file and self.rapid_connect_file: lookup_table = csv_to_list(self.rapid_connect_file) lookup_comids = np.array([int(float(row[0])) for row in lookup_table]) init_flow_table = csv_to_list(self.qinit_file) for index, comid in enumerate(self.cf_nc.variables[self.output_id_dim_name][:]): try: lookup_index = np.where(lookup_comids == comid)[0][0] except Exception: log("COMID %s misssing in rapid_connect file" % comid, "ERROR") q_var[index, 0] = float(init_flow_table[lookup_index][0]) else: for index, comid in enumerate(self.cf_nc.variables[self.output_id_dim_name][:]): q_var[index, 0] = 0
def create_jurisdiction_dict(directory): data_list = helper_functions.csv_to_list(directory, 'jurisdictions.csv', 1, 0) juris_dict = {} for row in data_list: new_juris = row_to_jurisdiction(row) new_abbrev = new_juris.abbrev juris_dict[new_abbrev] = new_juris return juris_dict
def generate_qinit_from_past_qout(self, qinit_file): """ Generate qinit from qout file """ print "Generating qinit file from qout file ..." print "Extracting data ..." #get information from datasets data_nc = Dataset(self.Qout_file, mode="r") riv_bas_id_array = data_nc.variables['COMID'][:] qout_dimensions = data_nc.variables['Qout'].dimensions if qout_dimensions[0].lower() == 'time' and qout_dimensions[1].lower() == 'comid': #data is raw rapid output data_values = data_nc.variables['Qout'][-1,:] elif qout_dimensions[1].lower() == 'time' and qout_dimensions[0].lower() == 'comid': #the data is CF compliant and has time=0 added to output data_values = data_nc.variables['Qout'][:,-1] else: data_nc.close() raise Exception( "Invalid ECMWF forecast file %s" % self.Qout_file) data_nc.close() print "Reordering data..." rapid_connect_array = csv_to_list(self.rapid_connect_file) stream_id_array = np.array([int(float(row[0])) for row in rapid_connect_array]) init_flows_array = np.zeros(len(rapid_connect_array)) for riv_bas_index, riv_bas_id in enumerate(riv_bas_id_array): try: data_index = np.where(stream_id_array==riv_bas_id)[0][0] init_flows_array[data_index] = data_values[riv_bas_index] except Exception: raise Exception ('riv bas id %s not found in connectivity list.' % riv_bas_id) print "Writing to file ..." with open(qinit_file, 'wb') as qinit_out: for init_flow in init_flows_array: qinit_out.write('{}\n'.format(init_flow)) self.Qinit_file = qinit_file self.BS_opt_Qinit = True print "Initialization Complete!"
def create_edge_sublist(court_name, master_cited_as_key): """ Given a court name (and a corresponding consolidation file) and a master edge list, create_edge_sublist will create a citations.csv file in the court's directory, representing the subset of edges in which both nodes are in court_name's court. """ court_dir = data_dir + r'/%s' % court_name court_data = helper_functions.csv_to_list(court_dir, 'consolidation.csv', 1, 0) print 'finding IDs in court...' citation_ids_in_court = [] for row in court_data: opinion_id = int(row[0]) citation_ids_in_court.append(opinion_id) edge_sublist = [['citing', 'cited']] num_ids = len(citation_ids_in_court) id = 0 for opinion_id in citation_ids_in_court: try: list_of_citers = master_cited_as_key[opinion_id] except KeyError: list_of_citers = [] for citer in list_of_citers: if citer in citation_ids_in_court: edge_sublist.append([citer, opinion_id]) id += 1 if id % 1000 == 0: print '%s of %s IDs checked (%s)' % (id, num_ids, float(id) / num_ids) helper_functions.list_to_csv(court_dir + r'/citations_sublist.csv', edge_sublist)
import networkx as nx import matplotlib.pyplot as plt import datetime from operator import itemgetter import numpy as np import os proj_cwd = os.path.dirname(os.getcwd()) data_dir = proj_cwd + r'/data' ############### # BUILD A GRAPH ############### # Load data from the CSVs edgelist_data = helper_functions.csv_to_list(data_dir + r'/scotus', 'citations_sublist.csv', 1, 0) node_data = helper_functions.csv_to_list(data_dir + r'/scotus', 'consolidation.csv', 1, 0) # Instantiate a directed graph object, D D = nx.DiGraph() # Add our nodes to D for row in node_data: # It is really easy to add arbitrary info about each node or edge. For example, here, I load each node with a # date, judges and citation_id attribute. case_number = int(row[0]) month, day, year = ['', '', ''] if row[3] is '' else [int(element) for element in row[3].rsplit('/')] file_date = '' if month is '' else datetime.date(year=year, month=month, day=day)
def generate_usgs_avg_daily_flows_opt(self, reach_id_gage_id_file, start_datetime, end_datetime, out_streamflow_file, out_stream_id_file): """ Generate streamflow file and stream id file required for optimization based on usgs gage ids associated with stream ids """ print "Generating avg streamflow file and stream id file required for optimization ..." reach_id_gage_id_list = csv_to_list(reach_id_gage_id_file) if start_datetime.tzinfo is None or start_datetime.tzinfo.utcoffset(start_datetime) is None: start_datetime = start_datetime.replace(tzinfo=utc) if end_datetime.tzinfo is None or end_datetime.tzinfo.utcoffset(end_datetime) is None: end_datetime = end_datetime.replace(tzinfo=utc) gage_data_matrix = [] valid_comid_list = [] num_days_needed = (end_datetime-start_datetime).days gage_id_list = [] for row in reach_id_gage_id_list[1:]: station_id = row[1] if len(row[1]) == 7: station_id = '0' + row[1] gage_id_list.append(station_id) num_gage_id_list = np.array(gage_id_list, dtype=np.int32) print "Querying Server for Data ..." #print station_id query_params = { 'format': 'json', 'sites': ",".join(gage_id_list), 'startDT': start_datetime.astimezone(tzoffset(None, -18000)).strftime("%Y-%m-%d"), 'endDT': end_datetime.astimezone(tzoffset(None, -18000)).strftime("%Y-%m-%d"), 'parameterCd': '00060', #streamflow 'statCd': '00003' #average } response = get("http://waterservices.usgs.gov/nwis/dv", params=query_params) if response.ok: data_valid = True try: requested_data = response.json()['value']['timeSeries'] except IndexError: data_valid = False pass if data_valid: for time_series in enumerate(requested_data): usgs_station_full_name = time_series[1]['name'] usgs_station_id = usgs_station_full_name.split(":")[1] gage_data = [] for time_step in time_series[1]['values'][0]['value']: local_datetime = parse(time_step['dateTime']) if local_datetime > end_datetime: break if local_datetime >= start_datetime: if not time_step['value']: print "MISSING DATA", station_id, local_datetime, time_step['value'] gage_data.append(float(time_step['value'])/35.3146667) try: #get where streamids assocated with USGS sation id is streamid_index = np.where(num_gage_id_list==int(float(usgs_station_id)))[0][0]+1 except Exception: print "USGS Station", usgs_station_id, "not found in list ..." raise if len(gage_data) == num_days_needed: gage_data_matrix.append(gage_data) valid_comid_list.append(reach_id_gage_id_list[streamid_index][0]) else: print "StreamID", reach_id_gage_id_list[streamid_index][0], "USGS Station", \ usgs_station_id, "MISSING", num_days_needed-len(gage_data), "DATA VALUES" if gage_data_matrix and valid_comid_list: print "Writing Output ..." np_array = np.array(gage_data_matrix).transpose() with open(out_streamflow_file, 'wb') as gage_data: wf = csvwriter(gage_data) for row in np_array: wf.writerow(row) with open(out_stream_id_file, 'wb') as comid_data: cf = csvwriter(comid_data) for row in valid_comid_list: cf.writerow([int(float(row))]) #set parameters for RAPID run self.IS_obs_tot = len(valid_comid_list) self.obs_tot_id_file = out_stream_id_file self.Qobs_file = out_streamflow_file self.IS_obs_use = len(valid_comid_list) self.obs_use_id_file = out_stream_id_file else: print "No valid data returned ..." else: print "USGS query error ..."
def _write_comid_lat_lon_z(self): """Add latitude, longitude, and z values for each netCDF feature Arguments: cf_nc -- netCDF Dataset object to be modified lookup_filename -- full path and filename for lookup table id_var_name -- name of Id variable Remarks: Lookup table is a CSV file with COMID, Lat, Lon, and Elev_m columns. Columns must be in that order and these must be the first four columns. """ if self.comid_lat_lon_z_file: # get list of COMIDS lookup_table = csv_to_list(self.comid_lat_lon_z_file) lookup_comids = np.array([int(float(row[0])) for row in lookup_table[1:]]) # Get relevant arrays while we update them nc_comids = self.cf_nc.variables[self.output_id_dim_name][:] lats = self.cf_nc.variables["lat"][:] lons = self.cf_nc.variables["lon"][:] zs = self.cf_nc.variables["z"][:] lat_min = None lat_max = None lon_min = None lon_max = None z_min = None z_max = None # Process each row in the lookup table for nc_index, nc_comid in enumerate(nc_comids): try: lookup_index = np.where(lookup_comids == nc_comid)[0][0] + 1 except Exception: log("COMID %s misssing in comid_lat_lon_z file" % nc_comid, "ERROR") lat = float(lookup_table[lookup_index][1]) lats[nc_index] = lat if (lat_min) is None or lat < lat_min: lat_min = lat if (lat_max) is None or lat > lat_max: lat_max = lat lon = float(lookup_table[lookup_index][2]) lons[nc_index] = lon if (lon_min) is None or lon < lon_min: lon_min = lon if (lon_max) is None or lon > lon_max: lon_max = lon z = float(lookup_table[lookup_index][3]) zs[nc_index] = z if (z_min) is None or z < z_min: z_min = z if (z_max) is None or z > z_max: z_max = z # Overwrite netCDF variable values self.cf_nc.variables["lat"][:] = lats self.cf_nc.variables["lon"][:] = lons self.cf_nc.variables["z"][:] = zs # Update metadata if lat_min is not None: self.cf_nc.geospatial_lat_min = lat_min if lat_max is not None: self.cf_nc.geospatial_lat_max = lat_max if lon_min is not None: self.cf_nc.geospatial_lon_min = lon_min if lon_max is not None: self.cf_nc.geospatial_lon_max = lon_max if z_min is not None: self.cf_nc.geospatial_vertical_min = z_min if z_max is not None: self.cf_nc.geospatial_vertical_max = z_max else: log("No comid_lat_lon_z file. Not adding values ...", "INFO")