def build_connections(supernetwork_parameters, dt): # TODO: Remove the dependence on dt in this function cols = supernetwork_parameters["columns"] param_df = nhd_io.read(supernetwork_parameters["geo_file_path"]) param_df = param_df[list(cols.values())] param_df = param_df.set_index(cols["key"]) if "mask_file_path" in supernetwork_parameters: data_mask = nhd_io.read_mask( supernetwork_parameters["mask_file_path"], layer_string=supernetwork_parameters["mask_layer_string"], ) param_df = param_df.filter( data_mask.iloc[:, supernetwork_parameters["mask_key"]], axis=0) param_df = param_df.sort_index() param_df = nhd_io.replace_downstreams(param_df, cols["downstream"], 0) connections = nhd_network.extract_connections(param_df, cols["downstream"]) # TODO: reorganize this so the wbodies object doesn't use the par-final param_df # This could mean doing something different to get the final param_df, # or changing the wbodies call to use the final param_df as it stands. wbodies = nhd_network.extract_waterbodies( param_df, cols["waterbody"], supernetwork_parameters["waterbody_null_code"]) param_df["dt"] = dt param_df = param_df.rename(columns=reverse_dict(cols)) param_df = param_df.astype("float32") # datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']] return connections, wbodies, param_df
def build_connections(supernetwork_parameters, dt): # TODO: Remove the dependence on dt in this function cols = supernetwork_parameters["columns"] param_df = nhd_io.read(pathlib.Path(supernetwork_parameters["geo_file_path"])) param_df = param_df[list(cols.values())] param_df = param_df.set_index(cols["key"]) if "mask_file_path" in supernetwork_parameters: data_mask = nhd_io.read_mask( pathlib.Path(supernetwork_parameters["mask_file_path"]), layer_string=supernetwork_parameters["mask_layer_string"], ) param_df = param_df.filter( data_mask.iloc[:, supernetwork_parameters["mask_key"]], axis=0 ) param_df = param_df.sort_index() param_df = nhd_io.replace_downstreams(param_df, cols["downstream"], 0) connections = nhd_network.extract_connections(param_df, cols["downstream"]) param_df["dt"] = dt param_df = param_df.rename(columns=reverse_dict(cols)) param_df = param_df.astype("float32") # datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']] return connections, param_df
def build_connections(supernetwork_parameters): cols = supernetwork_parameters["columns"] terminal_code = supernetwork_parameters.get("terminal_code", 0) param_df = nhd_io.read( pathlib.Path(supernetwork_parameters["geo_file_path"])) param_df = param_df[list(cols.values())] param_df = param_df.set_index(cols["key"]) if "mask_file_path" in supernetwork_parameters: data_mask = nhd_io.read_mask( pathlib.Path(supernetwork_parameters["mask_file_path"]), layer_string=supernetwork_parameters["mask_layer_string"], ) param_df = param_df.filter( data_mask.iloc[:, supernetwork_parameters["mask_key"]], axis=0) param_df = param_df.rename(columns=reverse_dict(cols)) # Rename parameter columns to standard names: from route-link names # key: "link" # downstream: "to" # dx: "Length" # n: "n" # TODO: rename to `manningn` # ncc: "nCC" # TODO: rename to `mannningncc` # s0: "So" # TODO: rename to `bedslope` # bw: "BtmWdth" # TODO: rename to `bottomwidth` # waterbody: "NHDWaterbodyComID" # gages: "gages" # tw: "TopWdth" # TODO: rename to `topwidth` # twcc: "TopWdthCC" # TODO: rename to `topwidthcc` # alt: "alt" # musk: "MusK" # musx: "MusX" # cs: "ChSlp" # TODO: rename to `sideslope` param_df = param_df.sort_index() param_df = param_df.rename(columns=reverse_dict(cols)) wbodies = {} if "waterbody" in cols: wbodies = build_waterbodies(param_df[["waterbody"]], supernetwork_parameters, "waterbody") param_df = param_df.drop("waterbody", axis=1) gages = {} if "gages" in cols: gages = build_gages(param_df[["gages"]]) param_df = param_df.drop("gages", axis=1) connections = nhd_network.extract_connections(param_df, "downstream") param_df = param_df.drop("downstream", axis=1) param_df = param_df.astype("float32") # datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']] return connections, param_df, wbodies, gages
def test_build_connections(): # There can be an externally determined terminal code -- that's this first value terminal_codes = set() terminal_codes.add(test_terminal_code) # ... but there may also be off-domain nodes that are not explicitly identified # but which are terminal (i.e., off-domain) as a result of a mask or some other # an interior domain truncation that results in a # otherwise valid node value being pointed to, but which is masked out or # being intentionally separated into another domain. terminal_codes = terminal_codes | set( test_param_df[~test_param_df["downstream"].isin(test_param_df.index)][ "downstream" ].values ) connections = nhd_network.extract_connections( test_param_df, "downstream", terminal_codes ) assert connections == expected_connections
def build_connections(supernetwork_parameters): cols = supernetwork_parameters["columns"] terminal_code = supernetwork_parameters.get("terminal_code", 0) param_df = nhd_io.read(pathlib.Path(supernetwork_parameters["geo_file_path"])) param_df = param_df[list(cols.values())] param_df = param_df.set_index(cols["key"]) if "mask_file_path" in supernetwork_parameters: data_mask = nhd_io.read_mask( pathlib.Path(supernetwork_parameters["mask_file_path"]), layer_string=supernetwork_parameters["mask_layer_string"], ) param_df = param_df.filter( data_mask.iloc[:, supernetwork_parameters["mask_key"]], axis=0 ) param_df = param_df.rename(columns=nhd_network.reverse_dict(cols)) # Rename parameter columns to standard names: from route-link names # key: "link" # downstream: "to" # dx: "Length" # n: "n" # TODO: rename to `manningn` # ncc: "nCC" # TODO: rename to `mannningncc` # s0: "So" # TODO: rename to `bedslope` # bw: "BtmWdth" # TODO: rename to `bottomwidth` # waterbody: "NHDWaterbodyComID" # gages: "gages" # tw: "TopWdth" # TODO: rename to `topwidth` # twcc: "TopWdthCC" # TODO: rename to `topwidthcc` # alt: "alt" # musk: "MusK" # musx: "MusX" # cs: "ChSlp" # TODO: rename to `sideslope` param_df = param_df.sort_index() # TODO: Do we need this second, identical call to the one above? param_df = param_df.rename(columns=nhd_network.reverse_dict(cols)) wbodies = {} if "waterbody" in cols: wbodies = build_waterbodies( param_df[["waterbody"]], supernetwork_parameters, "waterbody" ) param_df = param_df.drop("waterbody", axis=1) gages = {} if "gages" in cols: gages = build_gages(param_df[["gages"]]) param_df = param_df.drop("gages", axis=1) # There can be an externally determined terminal code -- that's this first value terminal_codes = set() terminal_codes.add(terminal_code) # ... but there may also be off-domain nodes that are not explicitly identified # but which are terminal (i.e., off-domain) as a result of a mask or some other # an interior domain truncation that results in a # otherwise valid node value being pointed to, but which is masked out or # being intentionally separated into another domain. terminal_codes = terminal_codes | set( param_df[~param_df["downstream"].isin(param_df.index)]["downstream"].values ) connections = nhd_network.extract_connections( param_df, "downstream", terminal_codes=terminal_codes ) param_df = param_df.drop("downstream", axis=1) param_df = param_df.astype("float32") # datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']] return connections, param_df, wbodies, gages
def main(): args = _handle_args() next_gen_input_folder = test_folder.joinpath("input", "next_gen") if args.input: next_gen_input_folder = pathlib.Path(args.input) # The following 2 values are currently hard coded for this test domain nts = 720 # number of timestep = 1140 * 60(model timestep) = 86400 = day dt_mc = 300.0 # time interval for MC # Currently tested on the Sugar Creek domain ngen_network_df = nhd_io.read_geopandas(args.supernetwork) if args.subset: ngen_network_df = ngen_network_df[ ngen_network_df['realized_catchment'].isin(args.subset)] # Create dictionary mapping each connection ID ngen_network_dict = dict(zip(ngen_network_df.id, ngen_network_df.toid)) #ngen_network_dict = dict(zip(ngen_network_df.ID, ngen_network_df.toID)) def node_key_func(x): return int(x[3:]) # Extract the ID integer values waterbody_connections = { node_key_func(k): node_key_func(v) for k, v in ngen_network_dict.items() } # Convert dictionary connections to data frame and make ID column the index waterbody_df = pd.DataFrame.from_dict(waterbody_connections, orient='index', columns=['to']) # Sort ID index column waterbody_df = waterbody_df.sort_index() waterbody_df = nhd_io.replace_downstreams(waterbody_df, "to", 0) connections = nhd_network.extract_connections(waterbody_df, "to") # Read and convert catchment lateral flows to format that can be processed by compute_network qlats = next_gen_io.read_catchment_lateral_flows(next_gen_input_folder) print(qlats) rconn = nhd_network.reverse_network(connections) subnets = nhd_network.reachable_network(rconn, check_disjoint=False) # read the routelink file nhd_routelink = nhd_io.read_netcdf("data/RouteLink_NHDPLUS.nc") nhd_routelink['dt'] = 300.0 nhd_routelink.set_index("link", inplace=True) routelink_cols = { "downstream": "to", "dx": "Length", "n": "n", "ncc": "nCC", "s0": "So", "bw": "BtmWdth", "tw": "TopWdth", "twcc": "TopWdthCC", "waterbody": "NHDWaterbodyComID", "musk": "MusK", "musx": "MusX", "cs": "ChSlp", } routelink_cols = dict([(value, key) for key, value in routelink_cols.items()]) nhd_routelink.rename(columns=routelink_cols, inplace=True) with open(next_gen_input_folder / 'coarse/crosswalk.json') as f: crosswalk_data = json.load(f) waterbody_df['comid'] = waterbody_df.apply( lambda x: crosswalk_data['cat-' + str(x.name)]['outlet_COMID'], axis=1) waterbody_df = waterbody_df.join(nhd_routelink, on='comid', how='left') del nhd_routelink # initial conditions, assume to be zero # TO DO: Allow optional reading of initial conditions from WRF q0 = pd.DataFrame(0, index=waterbody_df.index, columns=["qu0", "qd0", "h0"], dtype="float32") #Set types as float32 waterbody_df = waterbody_df.astype({ "dt": "float32", "bw": "float32", "tw": "float32", "twcc": "float32", "dx": "float32", "n": "float32", "ncc": "float32", "cs": "float32", "s0": "float32" }) subreaches = {} for tw, net in subnets.items(): path_func = partial(nhd_network.split_at_junction, net) subreaches[tw] = nhd_network.dfs_decomposition(net, path_func) results = [] for twi, (tw, reach) in enumerate(subreaches.items(), 1): r = list(chain.from_iterable(reach)) data_sub = waterbody_df.loc[ r, ['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0' ]].sort_index() #data_sub = waterbody_df.loc[r, ['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']] qlat_sub = qlats.loc[r].sort_index() q0_sub = q0.loc[r].sort_index() results.append( mc_reach.compute_network(nts, reach, subnets[tw], data_sub.index.values, data_sub.columns.values, data_sub.values, qlat_sub.values, q0_sub.values)) fdv_columns = pd.MultiIndex.from_product([range(nts), ['q', 'v', 'd']]).to_flat_index() flowveldepth = pd.concat( [pd.DataFrame(d, index=i, columns=fdv_columns) for i, d in results], copy=False) flowveldepth = flowveldepth.sort_index() outfile_base_name = (args.supernetwork).split(".")[0] flowveldepth.to_csv(f"{outfile_base_name}_mc_results.csv") print(flowveldepth)
def main(): args = _handle_args() nts = args.nts debuglevel = -1 * args.debuglevel verbose = args.verbose showtiming = args.showtiming supernetwork = args.supernetwork break_network_at_waterbodies = args.break_network_at_waterbodies csv_output_folder = args.csv_output_folder assume_short_ts = args.assume_short_ts test_folder = pathlib.Path(root, "test") geo_input_folder = test_folder.joinpath("input", "geo") # TODO: Make these commandline args """##NHD Subset (Brazos/Lower Colorado)""" # supernetwork = 'Brazos_LowerColorado_Named_Streams' # supernetwork = 'Brazos_LowerColorado_ge5' # supernetwork = 'Pocono_TEST1' """##NHD CONUS order 5 and greater""" # supernetwork = 'CONUS_ge5' """These are large -- be careful""" # supernetwork = 'Mainstems_CONUS' # supernetwork = 'CONUS_FULL_RES_v20' # supernetwork = 'CONUS_Named_Streams' #create a subset of the full resolution by reading the GNIS field # supernetwork = 'CONUS_Named_combined' #process the Named streams through the Full-Res paths to join the many hanging reaches if verbose: print("creating supernetwork connections set") if showtiming: start_time = time.time() # STEP 1 network_data = nnu.set_supernetwork_data( supernetwork=args.supernetwork, geo_input_folder=geo_input_folder, verbose=False, debuglevel=debuglevel, ) cols = network_data["columns"] param_df = nhd_io.read(network_data["geo_file_path"]) param_df = param_df[list(cols.values())] param_df = param_df.set_index(cols["key"]) if "mask_file_path" in network_data: data_mask = nhd_io.read_mask( network_data["mask_file_path"], layer_string=network_data["mask_layer_string"], ) param_df = param_df.filter(data_mask.iloc[:, network_data["mask_key"]], axis=0) param_df = param_df.sort_index() param_df = nhd_io.replace_downstreams(param_df, cols["downstream"], 0) if args.ql: qlats = nhd_io.read_qlat(args.ql) else: qlats = constant_qlats(param_df, nts, 10.0) # initial conditions, assume to be zero # TO DO: Allow optional reading of initial conditions from WRF q0 = pd.DataFrame( 0, index=param_df.index, columns=["qu0", "qd0", "h0"], dtype="float32" ) connections = nhd_network.extract_connections(param_df, cols["downstream"]) wbodies = nhd_network.extract_waterbodies( param_df, cols["waterbody"], network_data["waterbody_null_code"] ) if verbose: print("supernetwork connections set complete") if showtiming: print("... in %s seconds." % (time.time() - start_time)) # STEP 2 if showtiming: start_time = time.time() if verbose: print("organizing connections into reaches ...") rconn = nhd_network.reverse_network(connections) independent_networks = nhd_network.reachable_network(rconn) reaches_bytw = {} for tw, net in independent_networks.items(): path_func = partial(nhd_network.split_at_junction, net) reaches_bytw[tw] = nhd_network.dfs_decomposition(net, path_func) if verbose: print("reach organization complete") if showtiming: print("... in %s seconds." % (time.time() - start_time)) if showtiming: start_time = time.time() param_df["dt"] = 300.0 param_df = param_df.rename(columns=nnu.reverse_dict(cols)) param_df = param_df.astype("float32") # datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']] parallel_compute_method = args.parallel_compute_method cpu_pool = args.cpu_pool compute_method = args.compute_method if compute_method == "standard cython compute network": compute_func = mc_reach.compute_network else: compute_func = mc_reach.compute_network if parallel_compute_method == "by-network": with Parallel(n_jobs=cpu_pool, backend="threading") as parallel: jobs = [] for twi, (tw, reach_list) in enumerate(reaches_bytw.items(), 1): r = list(chain.from_iterable(reach_list)) param_df_sub = param_df.loc[ r, ["dt", "bw", "tw", "twcc", "dx", "n", "ncc", "cs", "s0"] ].sort_index() qlat_sub = qlats.loc[r].sort_index() q0_sub = q0.loc[r].sort_index() jobs.append( delayed(compute_func)( nts, reach_list, independent_networks[tw], param_df_sub.index.values, param_df_sub.columns.values, param_df_sub.values, qlat_sub.values, q0_sub.values, ) ) results = parallel(jobs) else: # Execute in serial results = [] for twi, (tw, reach_list) in enumerate(reaches_bytw.items(), 1): r = list(chain.from_iterable(reach_list)) param_df_sub = param_df.loc[ r, ["dt", "bw", "tw", "twcc", "dx", "n", "ncc", "cs", "s0"] ].sort_index() qlat_sub = qlats.loc[r].sort_index() q0_sub = q0.loc[r].sort_index() results.append( compute_func( nts, reach_list, independent_networks[tw], param_df_sub.index.values, param_df_sub.columns.values, param_df_sub.values, qlat_sub.values, q0_sub.values, ) ) if (debuglevel <= -1) or csv_output_folder: qvd_columns = pd.MultiIndex.from_product( [range(nts), ["q", "v", "d"]] ).to_flat_index() flowveldepth = pd.concat( [pd.DataFrame(d, index=i, columns=qvd_columns) for i, d in results], copy=False, ) if csv_output_folder: flowveldepth = flowveldepth.sort_index() output_path = pathlib.Path(csv_output_folder).resolve() flowveldepth.to_csv(output_path.joinpath(f"{args.supernetwork}.csv")) if debuglevel <= -1: print(flowveldepth) if verbose: print("ordered reach computation complete") if showtiming: print("... in %s seconds." % (time.time() - start_time))