def test_xy_to_gpd(): pts_df = pts[[sites_col_name, 'geometry']].copy() pts_df['x'] = pts_df.geometry.x pts_df['y'] = pts_df.geometry.y pts_df.drop('geometry', axis=1, inplace=True) pts3 = vector.xy_to_gpd(sites_col_name, 'x', 'y', pts_df) assert (len(pts3) == 2) & isinstance(pts3, gpd.GeoDataFrame)
def rd_niwa_rcp( base_path, mtypes, poly, vcsn_sites_csv=r'\\fileservices02\ManagedShares\Data\VirtualClimate\GIS\niwa_vcsn_wgs84.csv', id_col='Network', x_col='deg_x', y_col='deg_y', output_fun=None, export_path='output'): """ Function to read in the NIWA RCP netcdf files and output the data in a specified format. """ mtype_name = { 'precip': 'TotalPrecipCorr', 'T_max': 'MaxTempCorr', 'T_min': 'MinTempCorr', 'P_atmos': 'MSLP', 'PET': 'PE', 'RH_mean': 'RelHum', 'R_s': 'SurfRad', 'U_z': 'WindSpeed' } ### Import and reorganize data vcsn_sites = pd.read_csv(vcsn_sites_csv)[[id_col, x_col, y_col]] sites_gpd = vector.xy_to_gpd(id_col, x_col, y_col, vcsn_sites, 4326) poly1 = gpd.read_file(poly) sites_gpd2 = sites_gpd.to_crs(poly1.crs) mtypes1 = [mtype_name[i] for i in mtypes] ### Select sites sites_gpd3 = vector.sel_sites_poly(sites_gpd2, poly1)[id_col] site_loc1 = vcsn_sites[vcsn_sites[id_col].isin(sites_gpd3)] site_loc1.columns = ['id', 'x', 'y'] ### Read and extract data from netcdf files for root, dirs, files in os.walk(base_path): files2 = [i for i in files if i.endswith('.nc')] files3 = [j for j in files2 if any(j.startswith(i) for i in mtypes1)] file_paths1 = [os.path.join(root, i) for i in files3] if len(file_paths1) > 0: ds = rd_niwa_rcp_dir(file_paths1, site_loc1, mtypes) if callable(output_fun): new_base_path = root.replace(base_path, export_path) base_file_name = file_paths1[0].split('VCSN_')[1] if not os.path.exists(new_base_path): os.makedirs(new_base_path) output_fun(ds, new_base_path, base_file_name) print(base_file_name) else: raise ValueError('Must have a output function.')
def process_sites(self, input_sites): """ Function to process the sites. Parameters ---------- input_sites : str, int, list, or None Flow sites (either recorder or gauging) to be naturalised. If None, then the input_sites need to be defined later. Default is None. Returns ------- DataFrame """ ## Checks if isinstance(input_sites, (str, int)): input_sites = [input_sites] elif not isinstance(input_sites, list): raise ValueError('input_sites must be a str, int, or list') ## Convert sites to gdf sites_gdf = vector.xy_to_gpd(['ExtSiteID', 'CollectionType'], 'NZTMX', 'NZTMY', self.summ.drop_duplicates('ExtSiteID')) input_summ1 = self.summ[self.summ.ExtSiteID.isin(input_sites)].copy() bad_sites = [s for s in input_sites if s not in input_summ1.ExtSiteID.unique()] if bad_sites: print(', '.join(bad_sites) + ' sites are not available for naturalisation') flow_sites_gdf = sites_gdf[sites_gdf.ExtSiteID.isin(input_sites)].copy() ## Save if required if hasattr(self, 'output_path'): run_time = pd.Timestamp.today().strftime('%Y-%m-%dT%H%M') flow_sites_shp = param['output']['flow_sites_shp'].format(run_date=run_time) flow_sites_gdf.to_file(os.path.join(self.output_path, flow_sites_shp)) setattr(self, 'sites_gdf', sites_gdf) setattr(self, 'flow_sites_gdf', flow_sites_gdf) setattr(self, 'input_summ', input_summ1) ## Remove existing attributes if they exist if hasattr(self, 'catch_gdf'): delattr(self, 'catch_gdf') if hasattr(self, 'waps_gdf'): delattr(self, 'waps_gdf') if hasattr(self, 'flow'): delattr(self, 'flow') if hasattr(self, 'usage_rate'): delattr(self, 'usage_rate') if hasattr(self, 'nat_flow'): delattr(self, 'nat_flow') return input_summ1
def upstream_takes(self): """ Function to determine the upstream water abstraction sites from the catchment delineation. Returns ------- DataFrame allocation data """ if not hasattr(self, 'catch_gdf'): catch_gdf = self.catch_del() ### WAP selection wap1 = mssql.rd_sql(self.permit_server, param['input']['permit_database'], param['input']['crc_wap_table'], ['ExtSiteID'], where_in={'ConsentStatus': param['input']['crc_status']}).ExtSiteID.unique() sites3 = self.sites[self.sites.ExtSiteID.isin(wap1)].copy() sites3.rename(columns={'ExtSiteID': 'Wap'}, inplace=True) sites4 = vector.xy_to_gpd('Wap', 'NZTMX', 'NZTMY', sites3) sites4 = sites4.merge(sites3.drop(['NZTMX', 'NZTMY'], axis=1), on='Wap') waps_gdf, poly1 = vector.pts_poly_join(sites4, catch_gdf, 'ExtSiteID') waps_gdf.dropna(subset=['SwazName', 'SwazGroupName'], inplace=True) ### Get crc data if waps_gdf.empty: print('No WAPs were found in the polygon') allo_wap = pd.DataFrame() else: allo1 = AlloUsage(crc_filter={'ExtSiteID': waps_gdf.Wap.unique().tolist(), 'ConsentStatus': param['input']['crc_status']}, from_date=self.from_date, to_date=self.to_date) allo_wap1 = allo1.allo.copy() allo_wap = pd.merge(allo_wap1.reset_index(), waps_gdf[['Wap', 'ExtSiteID']], on='Wap') ## Save if required if hasattr(self, 'output_path'): run_time = pd.Timestamp.today().strftime('%Y-%m-%dT%H%M') waps_shp = param['output']['waps_shp'].format(run_date=run_time) waps_gdf.to_file(os.path.join(self.output_path, waps_shp)) allo_data_csv = param['output']['allo_data_csv'].format(run_date=run_time) allo_wap.to_csv(os.path.join(self.output_path, allo_data_csv), index=False) ## Return setattr(self, 'waps_gdf', waps_gdf) setattr(self, 'allo_wap', allo_wap) return allo_wap
pkl2c = 'catchment_delineation_rec_2019-07-17c.pkl.xz' pkl2f = 'rec_2-4b.pkl.xz' pkl2g = 'rec_catch_2-4b.pkl.xz' rec_rivers_path = r'P:\WaterDataProgramme\Source Data\GDBs\river-environment-classification-canterbury-2010\river-environment-classification-canterbury-2010.gpkg' rec_catch_path = r'P:\WaterDataProgramme\Source Data\GDBs\river-environment-classification-watershed-canterbury-2010\river-environment-classification-watershed-canterbury-2010.gpkg' ################################### ### f1 = FlowNat(output_path=output_path, load_rec=True) sites1 = f1.summ[['ExtSiteID', 'NZTMX', 'NZTMY']].copy() sites_gdf = vector.xy_to_gpd('ExtSiteID', 'NZTMX', 'NZTMY', sites1) ## Catch del catch_gdf = catch_delineate(sites_gdf, f1.rec_rivers, f1.rec_catch) catch_gdf.to_file(os.path.join(output_path, shp1)) c2 = catch_gdf.drop(['NZREACH', 'area'], axis=1).copy() c2['geometry'] = catch_gdf.simplify(30) c2.plot() catch_gdf.plot() c2.to_file(os.path.join(output_path, gpkg1), driver='GPKG') # ### Save if required
def getCorrelations(self): ''' Calculate correlations (fits) for flow sites that do not have a recorder. Returns: - dataframe with the 6 best correlations for each flow site - dataframe with flow time-series for all sites (recorder and manual gaugings) - geopandas dataframe with locations of recorder sites within a buffer distance from the manual sites Writes: - csv-file with the 6 best correlations for each flow site - csv-file with flow time-series for all sites (recorder and manual gaugings) - shapefile with locations of recorder sites within a buffer distance from the manual sites ''' dataset_type_table = 'DatasetType' ts_summ_table = 'TSDataNumericDailySumm' ts_table = 'TSDataNumericDaily' site_table = 'ExternalSite' self.min_flow_obs = self.config.getint('FLOW_CORRELATIONS', 'min_flow_obs') self.buf_dist = self.config.getint('FLOW_CORRELATIONS', 'buf_dist') self.filter_winter_flow = self.config.getint('FLOW_CORRELATIONS', 'filter_winter_flow') #-lists of stations and corresponding dates for which flow records should be removed before correlation is calculated remove_stat_dates = self.config.get('FLOW_CORRELATIONS', 'remove_stat_dates').split(',') if len(remove_stat_dates)>1: remove_stats = remove_stat_dates[0:-1:2] remove_dates = remove_stat_dates[1::2] else: remove_stats = False remove_dates = False remove_stat_dates = None #-Get lowflow sites flow_sites = self.flow_sites_gdf.flow_site.tolist() ## Read in datasettypes datasets = mssql.rd_sql(self.server, self.database, dataset_type_table, ['DatasetTypeID', 'CTypeID'], where_in={'FeatureID': [1], 'MTypeID': [2], 'CTypeID': [1, 2], 'DataCodeID': [1]}) #-Get datasetTypes for recorded data and manual data rec_datasetTypes = datasets[datasets.CTypeID == 1].DatasetTypeID.tolist() man_datasetTypes = datasets[datasets.CTypeID == 2].DatasetTypeID.tolist() all_datasetTypes = rec_datasetTypes.copy() all_datasetTypes.extend(man_datasetTypes) #-Get summary table for the lowflow sites site_summ = mssql.rd_sql(self.server, self.database, ts_summ_table, where_in={'DatasetTypeID': all_datasetTypes, 'ExtSiteID': flow_sites}) site_summ.FromDate = pd.to_datetime(site_summ.FromDate) site_summ.ToDate = pd.to_datetime(site_summ.ToDate) site_summ.drop('ModDate', axis=1, inplace=True) #-Throw out sites from summary table and lowflow sites that have less records than 'min_flow_obs' too_short = site_summ.loc[site_summ.Count<self.min_flow_obs, ['ExtSiteID','Count']] if len(too_short)>0: for j in too_short.iterrows(): print('ExtSiteID %s is not used because it only has %s records, which is less than %s.' %(j[1]['ExtSiteID'], int(j[1]['Count']), self.min_flow_obs)) site_summ = site_summ.loc[site_summ['ExtSiteID']!=j[1]['ExtSiteID']] flow_sites.remove(j[1]['ExtSiteID']) #-Get site ids for recorder and manual sites of the lowflow sites and create geodataframe for manually recorded lowflow sites rec_sites = site_summ.loc[site_summ.DatasetTypeID.isin(rec_datasetTypes),'ExtSiteID'].tolist() man_sites = site_summ.loc[site_summ.DatasetTypeID.isin(man_datasetTypes),'ExtSiteID'].tolist() man_sites_gdf = self.flow_sites_gdf[self.flow_sites_gdf.flow_site.isin(man_sites)].copy() #-get all recorder flow sites within a buffer distance from the manually recorded lowflow sites man_sites_buffer_gdf = man_sites_gdf.copy() man_sites_buffer_gdf['geometry'] = man_sites_gdf.buffer(self.buf_dist) all_recorder_flowsites = mssql.rd_sql(self.server, self.database, ts_summ_table, ['ExtSiteID'], where_in={'DatasetTypeID': rec_datasetTypes}) all_recorder_flowsites = pd.unique(all_recorder_flowsites.ExtSiteID).tolist() all_recorder_flowsites = mssql.rd_sql(self.server, self.database, site_table, ['ExtSiteID', 'NZTMX', 'NZTMY'], where_in={'ExtSiteID': all_recorder_flowsites}) all_recorder_flowsites_gdf = vector.xy_to_gpd('ExtSiteID', 'NZTMX', 'NZTMY', all_recorder_flowsites) self.rec_sites_buffer_gdf = vector.sel_sites_poly(all_recorder_flowsites_gdf, man_sites_buffer_gdf) #-write recorder sites within buffer to a shapefile self.rec_sites_buffer_gdf.to_file(os.path.join(self.results_path, self.config.get('FLOW_CORRELATIONS', 'rec_sites_shp'))) all_recorder_flowsites = None; all_recorder_flowsites_gdf = None; del all_recorder_flowsites, all_recorder_flowsites_gdf #-merge list of lowflow sites with list of recorder sites in buffer to create list of all sites for which to extract data all_flow_sites = flow_sites.copy() all_flow_sites.extend(pd.unique(self.rec_sites_buffer_gdf.ExtSiteID).tolist()) all_flow_sites = pd.DataFrame(columns=all_flow_sites) all_flow_sites = pd.unique(all_flow_sites.columns).tolist() #-Get time-series of all_flow_sites ts_df = mssql.rd_sql(self.server, self.database, ts_table, ['ExtSiteID', 'DatasetTypeID', 'DateTime', 'Value', 'QualityCode'], where_in={'DatasetTypeID': all_datasetTypes, 'ExtSiteID': all_flow_sites}) ts_df = ts_df.loc[ts_df.QualityCode>100] #-everything otherwise than missing data ts_df.DateTime = pd.to_datetime(ts_df.DateTime) ts_df_sites = pd.unique(ts_df.ExtSiteID).tolist() all_flow_sites = None; del all_flow_sites #-Check if there is data for all lowflow sites for j in flow_sites: if j not in ts_df_sites: print('There is zero flow data for site %s, or record of observations for this site was too short.' %j) #-Minimum and maximum date of all data min_date = ts_df.DateTime.min() max_date = ts_df.DateTime.max() #-Fill dataframe with data for min_date through max_date df_final = pd.DataFrame(index=pd.date_range(min_date, max_date, freq='D'), columns=ts_df_sites) df_final.rename_axis('DateTime', inplace=True) for j in ts_df_sites: df_short = ts_df.loc[ts_df.ExtSiteID==j, ['DateTime', 'Value','QualityCode']] #-keep only the records with the highest qualitycode df_short_group = df_short.groupby(['DateTime', 'QualityCode']).mean() df_short_group.reset_index(inplace=True) df_short_group.sort_values(by=['DateTime', 'QualityCode'], inplace=True) df_short_group.drop_duplicates(subset='DateTime', keep='last', inplace=True) df_short_group.drop('QualityCode', inplace=True, axis=1) df_short_group.set_index('DateTime', inplace=True) df_short_group.rename(columns={'Value':j}, inplace=True) df_final[[j]] = df_short_group ts_df = None; df_short_group = None; del ts_df, df_short_group df_final.dropna(how='all', inplace=True) #-create a copy of all gauged and recorded flow (before filtering) to make it accessible in the main class self.flow_ts_df = df_final.copy() self.flow_ts_df.to_csv(os.path.join(self.results_path, self.config.get('FLOW_CORRELATIONS', 'flow_ts_csv'))) #-remove zero records df_final[df_final==0] = np.nan #-remove unreliable values using lists of stations and dates if remove_stats and remove_dates: df_final.reset_index(inplace=True) i = 0 for s in remove_stats: stat_date = remove_dates[i] df_final.loc[(df_final['DateTime']==stat_date), s] = np.nan i+=1 df_final.set_index('DateTime', inplace=True) #-keep only winter flows for estimating correlations if self.filter_winter_flow: df_final.reset_index(inplace=True) df_final['Month'] = df_final['DateTime'].dt.strftime('%m').astype(np.int) df_final.set_index('DateTime', inplace=True) df_final = df_final.loc[(df_final.Month>4) & (df_final.Month<10)] df_final.dropna(how='all', inplace=True) #-loop over the lowflow sites and calculate regressions df_regression = pd.DataFrame(columns=['y', 'x', 'mean_y', 'mean_x', 'nobs', 'rmse', 'Adj. R2-squared', 'p-value', 'slope', 'intercept', 'power', 'fittype']) i=0 for j in flow_sites: for s in self.rec_sites_buffer_gdf.ExtSiteID.tolist(): sel_df = df_final[[j, s]] sel_df.replace(0., np.nan, inplace=True) sel_df.dropna(inplace=True) #-only fit for sites that have the minimum number of observations if len(sel_df)>=self.min_flow_obs: try: #-linear regression #-set x and y x = sel_df[[s]] x = sm.add_constant(x) #-needed for intercept y = sel_df[[j]] #-linear fit model = sm.OLS(y, x).fit() #-predict using the model predictions = model.predict(x) # make the predictions by the model rmse_val = np.sqrt( np.mean( (predictions.to_numpy() - y.to_numpy())**2 ) ) #-fill dataframe with stats for linear fit df_regression.loc[i, 'y'] = j df_regression.loc[i, 'x'] = s df_regression.loc[i, 'mean_y'] = np.nanmean(y) df_regression.loc[i, 'mean_x'] = np.nanmean(x[s].to_numpy()) df_regression.loc[i, 'nobs'] = model.nobs df_regression.loc[i, 'rmse'] = rmse_val df_regression.loc[i, 'Adj. R2-squared'] = model.rsquared_adj df_regression.loc[i, 'p-value'] = model.f_pvalue df_regression.loc[i, 'slope'] = model.params[1] df_regression.loc[i, 'intercept'] = model.params[0] df_regression.loc[i, 'fittype'] = 'linear' i+=1 except: print('Could not establish linear fit for %s and %s...' %(j,s)) try: #-power fit regression #-set x and y x = sel_df[[s]] x = sm.add_constant(x) #-needed for intercept y = sel_df[[j]] #-*********************************************************** #- y = ax^b #- log(y) = log(a) + b*log(x) #-*********************************************************** logY = np.log(y) logX = np.log(x[[s]]) logX = sm.add_constant(logX) model = sm.OLS(logY, logX).fit() x = x[s].to_numpy() #-predictions of the exact x values for calculating rmse predictions = np.exp(model.params[0]) * x**model.params[1] rmse_val = np.sqrt( np.mean( (predictions - y.to_numpy())**2 ) ) #-fill dataframe with stats for power fit df_regression.loc[i, 'y'] = j df_regression.loc[i, 'x'] = s df_regression.loc[i, 'mean_y'] = np.nanmean(y) df_regression.loc[i, 'mean_x'] = np.nanmean(x) df_regression.loc[i, 'nobs'] = model.nobs df_regression.loc[i, 'rmse'] = rmse_val df_regression.loc[i, 'Adj. R2-squared'] = model.rsquared_adj df_regression.loc[i, 'p-value'] = model.f_pvalue df_regression.loc[i, 'slope'] = np.exp(model.params[0]) df_regression.loc[i, 'power'] = model.params[1] df_regression.loc[i, 'fittype'] = 'power' i+=1 except: print('Could not establish power fit for %s and %s...' %(j,s)) sel_df = None; del sel_df #-remove negative correlations df_regression.loc[df_regression['Adj. R2-squared']<0.,:] = np.nan df_regression.dropna(how='all', inplace=True) #-loop over lowflow sites, and select best six fits by sorting on R2, rmse, nobs self.best_regressions_df = pd.DataFrame(columns=df_regression.columns) for j in flow_sites: sel_df = df_regression.loc[df_regression.y == j] sel_df.sort_values(by=['Adj. R2-squared', 'rmse', 'nobs'], ascending=[False, True, False], inplace=True) sel_df = sel_df.iloc[0:6,:] self.best_regressions_df = pd.concat([self.best_regressions_df, sel_df]) sel_df = None; del sel_df self.best_regressions_df.to_csv(os.path.join(self.results_path, self.config.get('FLOW_CORRELATIONS', 'correlations_csv')), index=False) #-make 6 plots for each site to visually pick the best unique_y = pd.unique(self.best_regressions_df['y']) unique_y = list(unique_y) for j in flow_sites: if j in unique_y: #-get regression results for site j sel_df_regression = self.best_regressions_df.loc[self.best_regressions_df.y == j] #-plot the best 6 fits fig = plt.figure(figsize=(11,8)) kk=1 for k in sel_df_regression.iterrows(): try: ax1 = plt.subplot(3,2,kk) xSite = k[1]['x'] ySite = k[1]['y'] xy = df_final[[xSite, ySite]] xy.dropna(inplace=True) x = xy[xSite].to_numpy() y = xy[ySite].to_numpy() xmin = np.nanmin(x) xmax = np.nanmax(x) dx = (xmax-xmin)/100 xvals = np.arange(xmin, xmax, dx) ymin = np.nanmin(y) ymax = np.nanmax(y) #-Get the stats from the table fit_type = k[1]['fittype'] if fit_type == 'linear': eq_str = 'y = %.3fx + %.3f' %(k[1]['slope'], k[1]['intercept']) predictions = k[1]['slope'] * xvals + k[1]['intercept'] else: eq_str = 'y = %.3fx$^{%.3f}$' %(k[1]['slope'], k[1]['power']) predictions = k[1]['slope'] * (xvals ** k[1]['power']) r2 = k[1]['Adj. R2-squared'] rmse_val = k[1]['rmse'] p_val = k[1]['p-value'] nobs = k[1]['nobs'] plt.plot(xvals, predictions, label='fit') plt.scatter(x,y, color='red', label='data') ax1.grid(True) ax1.set_xlabel('Flow at %s [m$^3$ s$^{-1}$]' %xSite) ax1.set_ylabel('Flow at %s [m$^3$ s$^{-1}$]' %ySite) ax1.legend(loc='lower right') dy = (ymax-ymin)/10 dx = (xmax-xmin)/10 #-plot the stats in the plot plt.text(xmin, ymax, eq_str) plt.text(xmin, ymax-dy, 'R$^2$ %.2f' %r2) plt.text(xmin, ymax-2*dy, 'RMSE %.2f' %rmse_val) plt.text(xmin, ymax-3*dy, 'p-value %.2f' %p_val) plt.text(xmin, ymax-4*dy, 'nobs %.0f' %nobs) ax1.set_xlim([xmin-dx, xmax+dx]) ax1.set_ylim([ymin-dy, ymax+dy]) kk+=1 except: pass fig.tight_layout() if self.filter_winter_flow: plt.savefig(os.path.join(self.results_path, '%s_correlations_winteronly.png' %ySite), dpi=300) else: plt.savefig(os.path.join(self.results_path, '%s_correlations.png' %ySite), dpi=300)
}) sites_buffer_gdf = flow_sites_gdf.copy() sites_buffer_gdf['geometry'] = sites_buffer_gdf.buffer(60000) all_gwl_sites = mssql.rd_sql( server, database, ts_summ_table, ['ExtSiteID'], where_in={'DatasetTypeID': datasets.DatasetTypeID.tolist()}) all_gwl_sites = pd.unique(all_gwl_sites.ExtSiteID).tolist() all_gwl_sites = mssql.rd_sql(server, database, site_table, ['ExtSiteID', 'NZTMX', 'NZTMY'], where_in={'ExtSiteID': all_gwl_sites}) all_gwl_sites_gdf = vector.xy_to_gpd('ExtSiteID', 'NZTMX', 'NZTMY', all_gwl_sites) sel_gwl_sites_gdf = vector.sel_sites_poly(all_gwl_sites_gdf, sites_buffer_gdf) all_gwl_sites = None all_gwl_sites_gdf = None #-Get summary table for the gwl sites and filter period of interest site_summ = mssql.rd_sql(server, database, ts_summ_table, where_in={ 'DatasetTypeID': datasets.DatasetTypeID.tolist(), 'ExtSiteID': sel_gwl_sites_gdf.ExtSiteID.tolist() }) site_summ.FromDate = pd.to_datetime(site_summ.FromDate) site_summ.ToDate = pd.to_datetime(site_summ.ToDate) site_summ.drop('ModDate', axis=1, inplace=True)
def sel_xy_nc(bound_shp, nc_path, x_col='longitude', y_col='latitude', time_col='time', nc_vars=None, buffer_dis=0, from_date=None, to_date=None, nc_crs=4326, out_crs=None, out_type='pandas'): """ Function to select space and time data from a netcdf file using a polygon shapefile. """ ### Process the boundary layer bound = gpd.read_file(bound_shp).buffer(buffer_dis).to_crs( convert_crs(nc_crs)) x_min, y_min, x_max, y_max = bound.unary_union.bounds ### Read and extract data from netcdf files ds1 = xr.open_dataset(nc_path) time1 = pd.to_datetime(ds1[time_col].values) if isinstance(from_date, str): time1 = time1[time1 >= from_date] if isinstance(to_date, str): time1 = time1[time1 <= to_date] lat1 = ds1[y_col].values lon1 = ds1[x_col].values lat2 = lat1[(lat1 >= y_min) & (lat1 <= y_max)] lon2 = lon1[(lon1 >= x_min) & (lon1 <= x_max)] ds2 = ds1.loc[{x_col: lon2, time_col: time1.values, y_col: lat2}] # coords1 = ds2.coords.keys() # dims1 = ds2.dims.keys() ## Select mtypes if isinstance(nc_vars, str): ds3 = ds2[[nc_vars]] elif isinstance(nc_vars, (list, np.ndarray, pd.Series)): ds3 = ds2[nc_vars] elif nc_vars is None: ds3 = ds2 ### Convert to different crs if needed if out_crs is not None: df1 = ds3.to_dataframe().reset_index() xy1 = ds3[[x_col, y_col]].copy() xy2 = xy1.to_dataframe().reset_index() crs1 = util.convert_crs(out_crs) new_gpd1 = vector.xy_to_gpd(xy2.index, x_col, y_col, xy2, nc_crs) new_gpd2 = new_gpd1.to_crs(crs1) site_loc2 = xy2.copy() site_loc2['x_new'] = new_gpd2.geometry.apply(lambda j: j.x) site_loc2['y_new'] = new_gpd2.geometry.apply(lambda j: j.y) df2 = pd.merge(df1, site_loc2[[x_col, y_col, 'x_new', 'y_new']], on=[x_col, y_col], how='left') df3 = df2.drop([x_col, y_col], axis=1).rename(columns={ 'x_new': x_col, 'y_new': y_col }) ds1.close() return (df3) elif out_type == 'pandas': df1 = ds3.to_dataframe().reset_index() ds1.close() return (df1) elif out_type == 'xarray': return ds3
def rd_niwa_vcsn( mtypes, sites, nc_path=r'\\fileservices02\ManagedShares\Data\VirtualClimate\vcsn_precip_et_2016-06-06.nc', vcsn_sites_csv=r'\\fileservices02\ManagedShares\Data\VirtualClimate\GIS\niwa_vcsn_wgs84.csv', id_col='Network', x_col='deg_x', y_col='deg_y', buffer_dis=0, include_sites=False, from_date=None, to_date=None, out_crs=None, netcdf_out=None): """ Function to read in the NIWA vcsn netcdf file and output the data as a dataframe. mtypes -- A string or list of the measurement types (either 'precip', or 'PET').\n sites -- Either a list of vcsn site names or a polygon of the area of interest.\n nc_path -- The path to the vcsn nc file.\n vcsn_sites_csv -- The csv file that relates the site name to coordinates.\n id_col -- The site name column in vcsn_sites_csv.\n x_col - The x column name in vcsn_sites_csv.\n y_col -- The y column name in vcsn_sites_csv.\n include_sites -- Should the site names be added to the output?\n out_crs -- The crs epsg number for the output coordinates if different than the default WGS85 (e.g. 2193 for NZTM). """ mtype_name = {'precip': 'rain', 'PET': 'pe'} ### Import and reorganize data vcsn_sites = pd.read_csv(vcsn_sites_csv)[[id_col, x_col, y_col]] if isinstance(sites, str): if sites.endswith('.shp'): sites_gpd = vector.xy_to_gpd(id_col, x_col, y_col, vcsn_sites, 4326) poly1 = gpd.read_file(sites) sites_gpd2 = sites_gpd.to_crs(poly1.crs) ### Select sites sites2 = vector.sel_sites_poly(sites_gpd2, poly1, buffer_dis)[id_col] elif isinstance(sites, (list, pd.Series, np.ndarray)): sites2 = sites ### Select locations site_loc1 = vcsn_sites[vcsn_sites[id_col].isin(sites2)] site_loc1.columns = ['id', 'x', 'y'] ### Select mtypes if isinstance(mtypes, str): mtypes1 = [mtype_name[mtypes]] else: mtypes1 = [mtype_name[i] for i in mtypes] if include_sites: mtypes1.extend(['site']) ### Read and extract data from netcdf files ds1 = xr.open_dataset(nc_path) time1 = pd.to_datetime(ds1.time.values) if isinstance(from_date, str): time1 = time1[time1 >= from_date] if isinstance(to_date, str): time1 = time1[time1 <= to_date] lat1 = ds1.latitude.values lon1 = ds1.longitude.values lat2 = lat1[np.in1d(lat1, site_loc1.y.unique())] lon2 = lon1[np.in1d(lon1, site_loc1.x.unique())] ds2 = ds1.loc[{'longitude': lon2, 'time': time1.values, 'latitude': lat2}] ds3 = ds2[mtypes1] ### Convert to DataFrame df1 = ds3.to_dataframe().reset_index() df1.rename(columns={'latitude': 'y', 'longitude': 'x'}, inplace=True) df1 = df1.dropna() ### Convert to different crs if needed if out_crs is not None: crs1 = util.convert_crs(out_crs) new_gpd1 = vector.xy_to_gpd('id', 'x', 'y', site_loc1, 4326) new_gpd2 = new_gpd1.to_crs(crs1) site_loc2 = site_loc1.copy() site_loc2['x_new'] = new_gpd2.geometry.apply(lambda j: j.x) site_loc2['y_new'] = new_gpd2.geometry.apply(lambda j: j.y) df2 = pd.merge(df1, site_loc2[['x', 'y', 'x_new', 'y_new']], on=['x', 'y']) df3 = df2.drop(['x', 'y'], axis=1).rename(columns={ 'x_new': 'x', 'y_new': 'y' }) col_order = ['y', 'x', 'time'] col_order.extend(mtypes1) df4 = df3[col_order] else: df4 = df1 ds1.close() ds3.close() ### Return if isinstance(netcdf_out, str): ds3.to_netcdf(netcdf_out) return df4
def _usage_estimation(self, freq, buffer_dis=40000, min_months=36): """ """ ### Get the necessary data # a1 = AlloUsage() # a1.permits = self.permits.copy() # a1.waps = self.waps.copy() # a1.from_date = self.from_date # a1.to_date = self.to_date # if hasattr(self, 'total_allo_ts'): # delattr(self, 'total_allo_ts') allo_use1 = self.get_ts(['allo', 'metered_allo', 'usage'], 'M', ['permit_id', 'wap']) permits = self.permits.copy() ### Create Wap locations waps1 = vector.xy_to_gpd( 'wap', 'lon', 'lat', self.waps.drop('permit_id', axis=1).drop_duplicates('wap'), 4326) waps2 = waps1.to_crs(2193) ### Determine which Waps need to be estimated allo_use_mis1 = allo_use1[allo_use1['total_metered_allo'] == 0].copy().reset_index() allo_use_with1 = allo_use1[ allo_use1['total_metered_allo'] > 0].copy().reset_index() mis_waps1 = allo_use_mis1.groupby(['permit_id', 'wap' ])['total_allo'].count().copy() with_waps1 = allo_use_with1.groupby(['permit_id', 'wap'])['total_allo'].count() with_waps2 = with_waps1[with_waps1 >= min_months] with_waps3 = pd.merge(with_waps2.reset_index()[['permit_id', 'wap']], permits[['permit_id', 'use_type']], on='permit_id') with_waps4 = pd.merge(waps2, with_waps3['wap'], on='wap') mis_waps2 = pd.merge(mis_waps1.reset_index(), permits[['permit_id', 'use_type']], on='permit_id') mis_waps3 = pd.merge(waps2, mis_waps2['wap'], on='wap') mis_waps3['geometry'] = mis_waps3['geometry'].buffer(buffer_dis) # mis_waps3.rename(columns={'wap': 'mis_wap'}, inplace=True) mis_waps4, poly1 = vector.pts_poly_join( with_waps4.rename(columns={'wap': 'good_wap'}), mis_waps3, 'wap') ## Calc ratios allo_use_with2 = pd.merge(allo_use_with1, permits[['permit_id', 'use_type']], on='permit_id') allo_use_with2['month'] = allo_use_with2['date'].dt.month allo_use_with2['usage_allo'] = allo_use_with2[ 'total_usage'] / allo_use_with2['total_allo'] allo_use_ratio1 = allo_use_with2.groupby( ['permit_id', 'wap', 'use_type', 'month'])['usage_allo'].mean().reset_index() allo_use_ratio2 = pd.merge( allo_use_ratio1.rename(columns={'wap': 'good_wap'}), mis_waps4[['good_wap', 'wap']], on='good_wap') ## Combine with the missing ones allo_use_mis2 = pd.merge(allo_use_mis1[['permit_id', 'wap', 'date']], permits[['permit_id', 'use_type']], on='permit_id') allo_use_mis2['month'] = allo_use_mis2['date'].dt.month allo_use_mis3 = pd.merge( allo_use_mis2, allo_use_ratio2[['use_type', 'month', 'usage_allo', 'wap']], on=['use_type', 'wap', 'month']) allo_use_mis4 = allo_use_mis3.groupby( ['permit_id', 'wap', 'date'])['usage_allo'].mean().reset_index() allo_use_mis5 = pd.merge(allo_use_mis4, allo_use_mis1[[ 'permit_id', 'wap', 'date', 'total_allo', 'sw_allo', 'gw_allo' ]], on=['permit_id', 'wap', 'date']) allo_use_mis5['total_usage_est'] = ( allo_use_mis5['usage_allo'] * allo_use_mis5['total_allo']).round() allo_use_mis5['sw_allo_usage_est'] = ( allo_use_mis5['usage_allo'] * allo_use_mis5['sw_allo']).round() allo_use_mis5['gw_allo_usage_est'] = ( allo_use_mis5['usage_allo'] * allo_use_mis5['gw_allo']).round() allo_use_mis6 = allo_use_mis5[[ 'permit_id', 'wap', 'date', 'total_usage_est', 'sw_allo_usage_est', 'gw_allo_usage_est' ]].copy() ### Convert to daily if required if freq == 'D': days1 = allo_use_mis6.date.dt.daysinmonth days2 = pd.to_timedelta((days1 / 2).round().astype('int32'), unit='D') allo_use_mis6[ 'total_usage_est'] = allo_use_mis6['total_usage_est'] / days1 allo_use_mis6['sw_allo_usage_est'] = allo_use_mis6[ 'sw_allo_usage_est'] / days1 allo_use_mis6['gw_allo_usage_est'] = allo_use_mis6[ 'gw_allo_usage_est'] / days1 usage_rate0 = allo_use_mis6.copy() usage_rate0['date'] = usage_rate0['date'] - days2 grp1 = allo_use_mis6.groupby(['permit_id', 'wap']) first1 = grp1.first() last1 = grp1.last() first1.loc[:, 'date'] = pd.to_datetime( first1.loc[:, 'date'].dt.strftime('%Y-%m') + '-01') usage_rate1 = pd.concat( [first1, usage_rate0.set_index(['permit_id', 'wap']), last1], sort=True).reset_index().sort_values( ['permit_id', 'wap', 'date']) usage_rate1.set_index('date', inplace=True) usage_daily_rate1 = usage_rate1.groupby([ 'permit_id', 'wap' ]).apply(lambda x: x.resample('D').interpolate(method='pchip')[[ 'total_usage_est', 'sw_allo_usage_est', 'gw_allo_usage_est' ]]).round(2) else: usage_daily_rate1 = allo_use_mis6.set_index( ['permit_id', 'wap', 'date']) ## Put the actual usage back into the estimate act_use1 = self.get_ts(['usage'], freq, ['permit_id', 'wap']) combo1 = pd.concat([usage_daily_rate1, act_use1], axis=1).sort_index() combo1.loc[combo1['total_usage'].notnull(), 'total_usage_est'] = combo1.loc[ combo1['total_usage'].notnull(), 'total_usage'] combo1.loc[combo1['sw_allo_usage'].notnull(), 'sw_allo_usage_est'] = combo1.loc[ combo1['sw_allo_usage'].notnull(), 'sw_allo_usage'] combo1.loc[combo1['gw_allo_usage'].notnull(), 'gw_allo_usage_est'] = combo1.loc[ combo1['gw_allo_usage'].notnull(), 'gw_allo_usage'] combo1.drop(['total_usage', 'sw_allo_usage', 'gw_allo_usage'], axis=1, inplace=True) # combo1 = combo1.loc[slice(None), slice(None), self.from_date:self.to_date] setattr(self, 'usage_est', combo1) return combo1
## Resample in 2D pe1 = interp2d.points_to_grid(both3.reset_index(), 'time', 'x', 'y', 'pe', 1000, 4326, 2193) rain1 = interp2d.points_to_grid(both3.reset_index(), 'time', 'x', 'y', 'rain', 1000, 4326, 2193) pe2 = pe1.to_dataframe().dropna() rain2 = rain1.to_dataframe().dropna() ## Combine datasets both4 = pd.concat([rain2, pe2], axis=1).reset_index() ## Aggregate by catchment both5 = vector.xy_to_gpd(['time', 'rain', 'pe'], 'x', 'y', both4) #pts0 = both4[both4.time == '1982-07-31'].copy() #pts0.index.name = 'index' #pts1 = vector.xy_to_gpd(pts0.index, 'x', 'y', pts0) catch_del = gpd.read_file(catch_del_shp_path) catch_del.rename(columns={'SITENUMBER': 'site'}, inplace=True) pts2, poly1 = vector.pts_poly_join(both5, catch_del, 'site') catch_agg1 = pts2.groupby(['site', 'time'])[['rain', 'pe']].mean() ## Adjust the vcsn according to the precip gauge 404810 ts1 = mssql.rd_sql_ts(server, database, ts_table, 'ExtSiteID',
results_path = os.path.join(project_path, 'results') catch_del_shp = 'catch_del_66401_{}.shp'.format(today1) allo_csv = 'allo_66401_{}.csv'.format(today1) allo_wap_csv = 'allo_wap_66401_{}.csv'.format(today1) wap_shp = 'wap_66401_{}.shp'.format(today1) ###################################### ### Read in data sites1 = mssql.rd_sql(server, database, sites_table, ['ExtSiteID', 'NZTMX', 'NZTMY']) sites0 = sites1[sites1.ExtSiteID == site].copy() sites2 = vector.xy_to_gpd('ExtSiteID', 'NZTMX', 'NZTMY', sites0) sql1 = sql_arg() rec_rivers_dict = sql1.get_dict(rec_rivers_sql) rec_catch_dict = sql1.get_dict(rec_catch_sql) ################################### ### Catchment delineation and WAPs catch1 = rec.catch_delineate(sites2, rec_rivers_dict, rec_catch_dict) catch1.to_file(os.path.join(results_path, catch_del_shp)) wap1 = mssql.rd_sql(server, database, crc_wap_table, ['wap']).wap.unique() sites3 = sites1[sites1.ExtSiteID.isin(wap1)].copy()
def process_waps(param): """ """ run_time_start = pd.Timestamp.today().strftime('%Y-%m-%d %H:%M:%S') print(run_time_start) ### Read in source data and update accela tables in ConsentsReporting db print('--Reading in source data...') ## Make object to contain the source data db = types.SimpleNamespace() for t in param['misc']['WapProcessing']['tables']: p = param['source data'][t] stmt = 'select * from "{table}"'.format(table=p['table']) setattr( db, t, sf.read_table(p['username'], p['password'], p['account'], p['database'], p['schema'], stmt)) # Spatial data gw_dict = param['source data']['gw_zones'] setattr( db, 'gw_zones', mssql.rd_sql(gw_dict['server'], gw_dict['database'], gw_dict['table'], gw_dict['col_names'], username=gw_dict['username'], password=gw_dict['password'], geo_col=True, rename_cols=gw_dict['rename_cols'])) sw_dict = param['source data']['sw_reaches'] setattr( db, 'sw_reaches', mssql.rd_sql(sw_dict['server'], sw_dict['database'], sw_dict['table'], sw_dict['col_names'], username=gw_dict['username'], password=gw_dict['password'], geo_col=True)) ################################################## ### Waps print('--Process Waps') sites1 = vector.xy_to_gpd('Wap', 'NzTmX', 'NzTmY', db.sites.drop('EffectiveFromDate', axis=1)) waps1 = sites1.merge(db.wap_sd.drop('EffectiveFromDate', axis=1), on='Wap') waps1.loc[waps1['SD1_7Day'].isnull(), 'SD1_7Day'] = 0 waps1.loc[waps1['SD1_30Day'].isnull(), 'SD1_30Day'] = 0 waps1.loc[waps1['SD1_150Day'].isnull(), 'SD1_150Day'] = 0 waps1[['SD1_7Day', 'SD1_30Day', 'SD1_150Day']] = waps1[['SD1_7Day', 'SD1_30Day', 'SD1_150Day']].round().astype(int) ## Aquifer tests aq1 = db.wap_aquifer_test.dropna(subset=['Storativity']).drop( 'EffectiveFromDate', axis=1).copy() aq2 = aq1.groupby('Wap')['Storativity'].mean().dropna().reset_index() aq2.Storativity = True waps2 = waps1.merge(aq2, on='Wap', how='left') waps2.loc[waps2.Storativity.isnull(), 'Storativity'] = False ## Add spaital info # GW gw_zones = db.gw_zones.copy() gw_zones.rename(columns={'SpatialUnitID': 'GwSpatialUnitId'}, inplace=True) waps3, poly1 = vector.pts_poly_join(waps2, gw_zones, 'GwSpatialUnitId') waps3.drop_duplicates('Wap', inplace=True) waps3['Combined'] = waps3.apply(lambda x: 'CWAZ' in x['GwSpatialUnitId'], axis=1) # SW sw1 = db.sw_reaches.copy() sw1.rename(columns={'SpatialUnitID': 'SwSpatialUnitId'}, inplace=True) lst1 = [] for index, row in sw1.iterrows(): for j in list(row['geometry'].coords): lst1.append([row['SwSpatialUnitId'], Point(j)]) df1 = pd.DataFrame(lst1, columns=['SwSpatialUnitId', 'geometry']) sw2 = gpd.GeoDataFrame(df1, geometry='geometry') waps3b = vector.kd_nearest(waps3, sw2, 'SwSpatialUnitId') ## prepare output waps3b['NzTmX'] = waps3b.geometry.x waps3b['NzTmY'] = waps3b.geometry.y waps4 = pd.DataFrame(waps3b.drop(['geometry'], axis=1)) waps4[['NzTmX', 'NzTmY']] = waps4[['NzTmX', 'NzTmY']].round().astype(int) waps4.rename(columns={ 'Name': 'SpatialUnitName', 'distance': 'DistanceToSw' }, inplace=True) ## Check for differences print('Save results') wap_dict = param['source data']['waps'] # old_stmt = 'select * from "{table}"'.format(table=wap_dict['table']) # old1 = sf.read_table(wap_dict['username'], wap_dict['password'], wap_dict['account'], wap_dict['database'], wap_dict['schema'], old_stmt).drop('EffectiveFromDate', axis=1) # # change1 = compare_dfs(old1, waps4, ['Wap']) # new1 = change1['new'] # diff1 = change1['diff'] ## Save data waps4['EffectiveFromDate'] = run_time_start sf.to_table(waps4, wap_dict['table'], wap_dict['username'], wap_dict['password'], wap_dict['account'], wap_dict['database'], wap_dict['schema'], True) return waps4
precip_mon_plot = 'station_precip_mon.png' precip_yr_plot = 'station_precip_yr.png' reg_plot = 'mon_reg_404810.png' ##################################### ### Process VCSN data both1 = rd_niwa_vcsn(mtypes, catch_shp_path, buffer_dis=5000) ## Aggregate by month both2 = both1.groupby(['x', 'y', pd.Grouper(key='time', freq='M')]).sum() both3 = both2.loc[(slice(None), slice(None), slice(from_date, to_date)), :].copy() ## Aggregate by catchment both4 = vector.xy_to_gpd(['time', 'rain', 'pe'], 'x', 'y', both3.reset_index(), 4326) both5 = both4.to_crs(epsg=2193) catch_del = gpd.read_file(catch_del_shp_path) catch_del.rename(columns={'SITENUMBER': 'site'}, inplace=True) catch_del['site'] = catch_del['site'].astype(str) pts2, poly1 = vector.pts_poly_join(both5, catch_del, 'site') catch_agg0 = pts2.groupby(['site', 'time'])[['rain', 'pe']].mean() # Only above Huts catch_agg1 = catch_agg0.loc[rec_site].copy() catch_agg1.rename(columns={ 'rain': 'VCSN Precip', 'pe': 'VCSN PET'
start_date1 = now1 - pd.DateOffset(months=121) - pd.DateOffset(days=now1.day - 1) start_date2 = now1 - pd.DateOffset(months=1) - pd.DateOffset(days=now1.day - 1) sites1 = sites[sites.site.isin( count1[(count1 >= 120) & (end_date1 >= start_date2) & (start_date1 <= start_date1)].index)] uw1 = sites[sites.CwmsName.isin(['Upper Waitaki']) & sites.site.isin(count1[(count1 >= 80) & (end_date1 >= start_date2) & (start_date1 <= start_date1)].index)] sites2 = pd.concat([sites1, uw1]).drop_duplicates() gw_sites = xy_to_gpd(['site', 'CwmsName'], 'NZTMX', 'NZTMY', sites2) gw2 = gw1[gw1.site.isin(gw_sites.site)].copy() ### Extract Site locations gw_sites.to_file( os.path.join(param.base_dir, param.output_dir, param.gw_sites_shp)) ### Combine the sites with the polygons gw_site_zone = gw_sites.drop(['geometry'], axis=1) gw_site_zone.rename(columns={'CwmsName': 'zone'}, inplace=True) ### Monthly interpolations if param.interp: ## Estimate monthly means through interpolation day1 = grp_ts_agg(gw2, 'site', 'time', 'D').mean().unstack('site')
catch_shp_path = get_path(catch_shp) sites_col_name = 'SITENUMBER' poly_col_name = 'Catchmen_1' line_site_col = 'NZREACH' ####################################### ### Examples pts = util.load_geo_data(sites_shp_path) pts['geometry'] = pts.geometry.simplify(1) ## Selecting points from within a polygon pts1 = vector.sel_sites_poly(sites_shp_path, rec_catch_shp_path, buffer_dis=10) ## Spatial join attributes of polygons to points pts2, poly2 = vector.pts_poly_join(sites_shp_path, catch_shp_path, poly_col_name) ## Create a GeoDataFrame from x and y data pts_df = pts[[sites_col_name, 'geometry']].copy() pts_df['x'] = pts_df.geometry.x pts_df['y'] = pts_df.geometry.y pts_df.drop('geometry', axis=1, inplace=True) pts3 = vector.xy_to_gpd(sites_col_name, 'x', 'y', pts_df) ## Find the closest line to points line1 = vector.closest_line_to_pts(sites_shp_path, rec_streams_shp_path, line_site_col, buffer_dis=100)
print('-> loaded from local files') except: print('-> Processing data from the databases') sites1 = mssql.rd_sql( ts_server, ts_database, sites_table, ['ExtSiteID', 'NZTMX', 'NZTMY', 'SwazGroupName', 'SwazName']) input_sites1 = pd.read_csv(os.path.join(param['inputs_path'], site_csv)).site.astype(str) sites0 = sites1[sites1.ExtSiteID.isin(input_sites1)].copy() sites0.rename(columns={'ExtSiteID': 'FlowSite'}, inplace=True) flow_sites_gdf = vector.xy_to_gpd('FlowSite', 'NZTMX', 'NZTMY', sites0) flow_sites_gdf.to_file(os.path.join(results_path, flow_sites_shp)) ################################### ### Catchment delineation try: catch_del_shp = [ p for p in os.listdir(results_path) if (catch_del_base in p) and ('.shp' in p) ][-1] catch_gdf = gpd.read_file(os.path.join(results_path, catch_del_shp)) except: sql1 = sql_arg() rec_rivers_dict = sql1.get_dict(rec_rivers_sql)
catch_del_shp_path = os.path.join(base_path, shp_dir, catch_del_shp) rec_site = 70105 usage_output = 'pareora_huts_usage_mon.csv' ############################################ ### Extract data site_filter = {'CatchmentGroupName': catch_group} a1 = AlloUsage(from_date, to_date, site_filter=site_filter) sites0 = a1.sites.reset_index().copy() sites = vector.xy_to_gpd('wap', 'NZTMX', 'NZTMY', sites0) catch_del = gpd.read_file(catch_del_shp_path) catch_del.rename(columns={'SITENUMBER': 'site'}, inplace=True) catch_del1 = catch_del[catch_del.site == rec_site] sites1 = vector.sel_sites_poly(sites, catch_del1) ## Usage data usage1 = pd.read_hdf(os.path.join(source_path, usage_hdf)) usage2 = usage1[usage1.wap.isin(sites1.wap)].copy() usage2['time'] = pd.to_datetime(usage2['time'])
base_dir = r'E:\ecan\shared\base_data\metservice\2018-09-11' nc1 = 'wrf_hourly_precip_nz4kmN-NCEP_2018090918.nc' nc1 = 'wrf_hourly_precip_nz8kmN-NCEP_2018090918.nc' ms_nc = os.path.join(base_dir, nc1) ms1 = xr.open_dataset(os.path.join(base_dir, nc1)) ms2 = ms1.sel(time='2018-09-12 6:00:00').drop('time').precipitation_amount df1 = ms2.to_dataframe().reset_index(drop=True) #ms_proj = to_proj4(nc) df2 = df1[df1.longitude > 0] #gdf1 = vector.xy_to_gpd(df2.precipitation_amount, 'longitude', 'latitude', df2, ms_proj) gdf1 = vector.xy_to_gpd(df2.precipitation_amount, 'longitude', 'latitude', df2, 4326) gdf1.columns = ['precip', 'geometry'] #gdf2 = gdf1.to_crs(epsg=2193) gdf1.to_file(os.path.join(base_dir, 'test_ms_df.shp')) from_crs = '+proj=lcc +lat_1=-30.0 +lat_2=-60.0 +lat_0=-50.288826 +lon_0=161.85754 +R=6370000.0 +no_defs' from_crs = '+proj=lcc +lat_1=-60 +lat_2=-30 +lat_0=-60 +lon_0=167.5 +x_0=211921 +y_0=-1221320 +a=6367470 +b=6367470 +no_defs' to_crs = 4326 from_crs1 = Proj(convert_crs(from_crs, pass_str=True), preserve_units=True) to_crs1 = Proj(convert_crs(to_crs, pass_str=True), preserve_units=True)
rec_datasets = datasets[datasets.CTypeID == 1].DatasetTypeID.tolist() man_datasets = datasets[datasets.CTypeID == 2].DatasetTypeID.tolist() rec_summ1 = site_summ1[site_summ1.DatasetTypeID.isin(rec_datasets) & (site_summ1.FromDate <= param['from_date']) & (site_summ1.ToDate >= param['to_date'])].sort_values('ToDate', ascending=False).drop_duplicates('ExtSiteID').copy() flow_sites_gdf = takes.flow_sites_gdf.copy() sites_rec_bool = flow_sites_gdf.FlowSite.isin(rec_summ1.ExtSiteID.unique()) sites_rec1 = flow_sites_gdf[sites_rec_bool].copy() sites_man1 = flow_sites_gdf[~sites_rec_bool].copy() flow_rec_sites1 = mssql.rd_sql(server, database, site_table, ['ExtSiteID', 'NZTMX', 'NZTMY'], where_in={'ExtSiteID': rec_summ1.ExtSiteID.unique().tolist()}) flow_rec_sites2 = vector.xy_to_gpd('ExtSiteID', 'NZTMX', 'NZTMY', flow_rec_sites1) ## Estimate flow where recorder doesn't exist sites_man2 = sites_man1.copy() sites_man2['geometry'] = sites_man1.buffer(buffer_dis) rec_sites2 = vector.sel_sites_poly(flow_rec_sites2, sites_man2) rec_ts_data1 = mssql.rd_sql_ts(server, database, ts_table, 'ExtSiteID', 'DateTime', 'Value', from_date=param['from_date'], to_date=param['to_date'], where_in={'ExtSiteID': rec_sites2.ExtSiteID.tolist(), 'DatasetTypeID': rec_summ1.DatasetTypeID.unique().tolist()}) rec_ts_data2 = rec_ts_data1.Value.unstack(0).interpolate('time', limit=10).dropna(axis=1) rec_flow1 = rec_ts_data2.loc[:, rec_ts_data2.columns.isin(sites_rec1.FlowSite)].copy() man_ts_data1 = mssql.rd_sql_ts(server, database, ts_table, 'ExtSiteID', 'DateTime', 'Value', from_date=param['from_date'], to_date=param['to_date'], where_in={'ExtSiteID': sites_man1.FlowSite.tolist(), 'DatasetTypeID': man_datasets})