Python xy_to_gpd Examples, gistools.vector.xy_to_gpd Python Examples

Example #1

0

Show file

File: test_vector.py Project: mullenkamp/gistools

def test_xy_to_gpd():
    pts_df = pts[[sites_col_name, 'geometry']].copy()
    pts_df['x'] = pts_df.geometry.x
    pts_df['y'] = pts_df.geometry.y
    pts_df.drop('geometry', axis=1, inplace=True)

    pts3 = vector.xy_to_gpd(sites_col_name, 'x', 'y', pts_df)

    assert (len(pts3) == 2) & isinstance(pts3, gpd.GeoDataFrame)

Example #2

0

Show file

def rd_niwa_rcp(
        base_path,
        mtypes,
        poly,
        vcsn_sites_csv=r'\\fileservices02\ManagedShares\Data\VirtualClimate\GIS\niwa_vcsn_wgs84.csv',
        id_col='Network',
        x_col='deg_x',
        y_col='deg_y',
        output_fun=None,
        export_path='output'):
    """
    Function to read in the NIWA RCP netcdf files and output the data in a specified format.
    """

    mtype_name = {
        'precip': 'TotalPrecipCorr',
        'T_max': 'MaxTempCorr',
        'T_min': 'MinTempCorr',
        'P_atmos': 'MSLP',
        'PET': 'PE',
        'RH_mean': 'RelHum',
        'R_s': 'SurfRad',
        'U_z': 'WindSpeed'
    }

    ### Import and reorganize data
    vcsn_sites = pd.read_csv(vcsn_sites_csv)[[id_col, x_col, y_col]]

    sites_gpd = vector.xy_to_gpd(id_col, x_col, y_col, vcsn_sites, 4326)
    poly1 = gpd.read_file(poly)

    sites_gpd2 = sites_gpd.to_crs(poly1.crs)

    mtypes1 = [mtype_name[i] for i in mtypes]

    ### Select sites
    sites_gpd3 = vector.sel_sites_poly(sites_gpd2, poly1)[id_col]
    site_loc1 = vcsn_sites[vcsn_sites[id_col].isin(sites_gpd3)]
    site_loc1.columns = ['id', 'x', 'y']

    ### Read and extract data from netcdf files

    for root, dirs, files in os.walk(base_path):
        files2 = [i for i in files if i.endswith('.nc')]
        files3 = [j for j in files2 if any(j.startswith(i) for i in mtypes1)]
        file_paths1 = [os.path.join(root, i) for i in files3]
        if len(file_paths1) > 0:
            ds = rd_niwa_rcp_dir(file_paths1, site_loc1, mtypes)
            if callable(output_fun):
                new_base_path = root.replace(base_path, export_path)
                base_file_name = file_paths1[0].split('VCSN_')[1]
                if not os.path.exists(new_base_path):
                    os.makedirs(new_base_path)
                output_fun(ds, new_base_path, base_file_name)
                print(base_file_name)
            else:
                raise ValueError('Must have a output function.')

Example #3

0

Show file

File: core.py Project: Data-to-Knowledge/FlowNaturalisation

    def process_sites(self, input_sites):
        """
        Function to process the sites.

        Parameters
        ----------
        input_sites : str, int, list, or None
            Flow sites (either recorder or gauging) to be naturalised. If None, then the input_sites need to be defined later. Default is None.

        Returns
        -------
        DataFrame
        """
        ## Checks
        if isinstance(input_sites, (str, int)):
            input_sites = [input_sites]
        elif not isinstance(input_sites, list):
            raise ValueError('input_sites must be a str, int, or list')

        ## Convert sites to gdf
        sites_gdf = vector.xy_to_gpd(['ExtSiteID', 'CollectionType'], 'NZTMX', 'NZTMY', self.summ.drop_duplicates('ExtSiteID'))
        input_summ1 = self.summ[self.summ.ExtSiteID.isin(input_sites)].copy()

        bad_sites = [s for s in input_sites if s not in input_summ1.ExtSiteID.unique()]

        if bad_sites:
            print(', '.join(bad_sites) + ' sites are not available for naturalisation')

        flow_sites_gdf = sites_gdf[sites_gdf.ExtSiteID.isin(input_sites)].copy()

        ## Save if required
        if hasattr(self, 'output_path'):
            run_time = pd.Timestamp.today().strftime('%Y-%m-%dT%H%M')
            flow_sites_shp = param['output']['flow_sites_shp'].format(run_date=run_time)
            flow_sites_gdf.to_file(os.path.join(self.output_path, flow_sites_shp))

        setattr(self, 'sites_gdf', sites_gdf)
        setattr(self, 'flow_sites_gdf', flow_sites_gdf)
        setattr(self, 'input_summ', input_summ1)

        ## Remove existing attributes if they exist
        if hasattr(self, 'catch_gdf'):
            delattr(self, 'catch_gdf')
        if hasattr(self, 'waps_gdf'):
            delattr(self, 'waps_gdf')
        if hasattr(self, 'flow'):
            delattr(self, 'flow')
        if hasattr(self, 'usage_rate'):
            delattr(self, 'usage_rate')
        if hasattr(self, 'nat_flow'):
            delattr(self, 'nat_flow')

        return input_summ1

Example #4

0

Show file

File: core.py Project: Data-to-Knowledge/FlowNaturalisation

    def upstream_takes(self):
        """
        Function to determine the upstream water abstraction sites from the catchment delineation.

        Returns
        -------
        DataFrame
            allocation data
        """
        if not hasattr(self, 'catch_gdf'):
            catch_gdf = self.catch_del()

        ### WAP selection
        wap1 = mssql.rd_sql(self.permit_server, param['input']['permit_database'], param['input']['crc_wap_table'], ['ExtSiteID'], where_in={'ConsentStatus': param['input']['crc_status']}).ExtSiteID.unique()

        sites3 = self.sites[self.sites.ExtSiteID.isin(wap1)].copy()
        sites3.rename(columns={'ExtSiteID': 'Wap'}, inplace=True)

        sites4 = vector.xy_to_gpd('Wap', 'NZTMX', 'NZTMY', sites3)
        sites4 = sites4.merge(sites3.drop(['NZTMX', 'NZTMY'], axis=1), on='Wap')

        waps_gdf, poly1 = vector.pts_poly_join(sites4, catch_gdf, 'ExtSiteID')
        waps_gdf.dropna(subset=['SwazName', 'SwazGroupName'], inplace=True)

        ### Get crc data
        if waps_gdf.empty:
            print('No WAPs were found in the polygon')
            allo_wap = pd.DataFrame()
        else:
            allo1 = AlloUsage(crc_filter={'ExtSiteID': waps_gdf.Wap.unique().tolist(), 'ConsentStatus': param['input']['crc_status']}, from_date=self.from_date, to_date=self.to_date)

            allo_wap1 = allo1.allo.copy()
            allo_wap = pd.merge(allo_wap1.reset_index(), waps_gdf[['Wap', 'ExtSiteID']], on='Wap')

            ## Save if required
            if hasattr(self, 'output_path'):
                run_time = pd.Timestamp.today().strftime('%Y-%m-%dT%H%M')

                waps_shp = param['output']['waps_shp'].format(run_date=run_time)
                waps_gdf.to_file(os.path.join(self.output_path, waps_shp))

                allo_data_csv = param['output']['allo_data_csv'].format(run_date=run_time)
                allo_wap.to_csv(os.path.join(self.output_path, allo_data_csv), index=False)

        ## Return
        setattr(self, 'waps_gdf', waps_gdf)
        setattr(self, 'allo_wap', allo_wap)
        return allo_wap

Example #5

0

Show file

File: catch_del_all.py Project: Data-to-Knowledge/FlowNaturalisation

pkl2c = 'catchment_delineation_rec_2019-07-17c.pkl.xz'
pkl2f = 'rec_2-4b.pkl.xz'
pkl2g = 'rec_catch_2-4b.pkl.xz'

rec_rivers_path = r'P:\WaterDataProgramme\Source Data\GDBs\river-environment-classification-canterbury-2010\river-environment-classification-canterbury-2010.gpkg'
rec_catch_path = r'P:\WaterDataProgramme\Source Data\GDBs\river-environment-classification-watershed-canterbury-2010\river-environment-classification-watershed-canterbury-2010.gpkg'

###################################
###

f1 = FlowNat(output_path=output_path, load_rec=True)

sites1 = f1.summ[['ExtSiteID', 'NZTMX', 'NZTMY']].copy()

sites_gdf = vector.xy_to_gpd('ExtSiteID', 'NZTMX', 'NZTMY', sites1)

## Catch del
catch_gdf = catch_delineate(sites_gdf, f1.rec_rivers, f1.rec_catch)

catch_gdf.to_file(os.path.join(output_path, shp1))

c2 = catch_gdf.drop(['NZREACH', 'area'], axis=1).copy()
c2['geometry'] = catch_gdf.simplify(30)

c2.plot()

catch_gdf.plot()
c2.to_file(os.path.join(output_path, gpkg1), driver='GPKG')
#
### Save if required

Example #6

0

Show file

def getCorrelations(self):
    '''
    Calculate correlations (fits) for flow sites that do not have a recorder.
    Returns:
        - dataframe with the 6 best correlations for each flow site
        - dataframe with flow time-series for all sites (recorder and manual gaugings)
        - geopandas dataframe with locations of recorder sites within a buffer distance from the manual sites
    Writes:
        - csv-file with the 6 best correlations for each flow site
        - csv-file with flow time-series for all sites (recorder and manual gaugings)
        - shapefile with locations of recorder sites within a buffer distance from the manual sites
    '''
    
    dataset_type_table = 'DatasetType'
    ts_summ_table = 'TSDataNumericDailySumm'
    ts_table = 'TSDataNumericDaily'
    site_table = 'ExternalSite'
    
    self.min_flow_obs = self.config.getint('FLOW_CORRELATIONS', 'min_flow_obs')
    self.buf_dist = self.config.getint('FLOW_CORRELATIONS', 'buf_dist')
    self.filter_winter_flow = self.config.getint('FLOW_CORRELATIONS', 'filter_winter_flow')
    
    #-lists of stations and corresponding dates for which flow records should be removed before correlation is calculated
    remove_stat_dates = self.config.get('FLOW_CORRELATIONS', 'remove_stat_dates').split(',')
    if len(remove_stat_dates)>1:
        remove_stats = remove_stat_dates[0:-1:2]
        remove_dates = remove_stat_dates[1::2]
    else:
        remove_stats = False
        remove_dates = False
    remove_stat_dates = None

    #-Get lowflow sites
    flow_sites = self.flow_sites_gdf.flow_site.tolist()
 
    ## Read in datasettypes
    datasets = mssql.rd_sql(self.server, self.database, dataset_type_table, ['DatasetTypeID', 'CTypeID'], where_in={'FeatureID': [1], 'MTypeID': [2], 'CTypeID': [1, 2], 'DataCodeID': [1]})
     
    #-Get datasetTypes for recorded data and manual data
    rec_datasetTypes = datasets[datasets.CTypeID == 1].DatasetTypeID.tolist()
    man_datasetTypes = datasets[datasets.CTypeID == 2].DatasetTypeID.tolist()
    all_datasetTypes = rec_datasetTypes.copy()
    all_datasetTypes.extend(man_datasetTypes)
     
    #-Get summary table for the lowflow sites
    site_summ = mssql.rd_sql(self.server, self.database, ts_summ_table, where_in={'DatasetTypeID': all_datasetTypes, 'ExtSiteID': flow_sites})
    site_summ.FromDate = pd.to_datetime(site_summ.FromDate)
    site_summ.ToDate = pd.to_datetime(site_summ.ToDate)
    site_summ.drop('ModDate', axis=1, inplace=True)
     
    #-Throw out sites from summary table and lowflow sites that have less records than 'min_flow_obs'
    too_short = site_summ.loc[site_summ.Count<self.min_flow_obs, ['ExtSiteID','Count']]
    if len(too_short)>0:
        for j in too_short.iterrows():
            print('ExtSiteID %s is not used because it only has %s records, which is less than %s.' %(j[1]['ExtSiteID'], int(j[1]['Count']), self.min_flow_obs))
            site_summ = site_summ.loc[site_summ['ExtSiteID']!=j[1]['ExtSiteID']]
            flow_sites.remove(j[1]['ExtSiteID'])
     
    #-Get site ids for recorder and manual sites of the lowflow sites and create geodataframe for manually recorded lowflow sites
    rec_sites = site_summ.loc[site_summ.DatasetTypeID.isin(rec_datasetTypes),'ExtSiteID'].tolist()
    man_sites = site_summ.loc[site_summ.DatasetTypeID.isin(man_datasetTypes),'ExtSiteID'].tolist()
    man_sites_gdf = self.flow_sites_gdf[self.flow_sites_gdf.flow_site.isin(man_sites)].copy()
     
    #-get all recorder flow sites within a buffer distance from the manually recorded lowflow sites
    man_sites_buffer_gdf = man_sites_gdf.copy()
    man_sites_buffer_gdf['geometry'] = man_sites_gdf.buffer(self.buf_dist)
    all_recorder_flowsites = mssql.rd_sql(self.server, self.database, ts_summ_table, ['ExtSiteID'], where_in={'DatasetTypeID': rec_datasetTypes})
    all_recorder_flowsites = pd.unique(all_recorder_flowsites.ExtSiteID).tolist()
    all_recorder_flowsites =  mssql.rd_sql(self.server, self.database, site_table, ['ExtSiteID', 'NZTMX', 'NZTMY'], where_in={'ExtSiteID': all_recorder_flowsites})
    all_recorder_flowsites_gdf = vector.xy_to_gpd('ExtSiteID', 'NZTMX', 'NZTMY', all_recorder_flowsites)
    self.rec_sites_buffer_gdf = vector.sel_sites_poly(all_recorder_flowsites_gdf, man_sites_buffer_gdf)
    #-write recorder sites within buffer to a shapefile
    self.rec_sites_buffer_gdf.to_file(os.path.join(self.results_path, self.config.get('FLOW_CORRELATIONS', 'rec_sites_shp')))
    all_recorder_flowsites = None; all_recorder_flowsites_gdf = None; del all_recorder_flowsites, all_recorder_flowsites_gdf  
     
     
    #-merge list of lowflow sites with list of recorder sites in buffer to create list of all sites for which to extract data
    all_flow_sites = flow_sites.copy()
    all_flow_sites.extend(pd.unique(self.rec_sites_buffer_gdf.ExtSiteID).tolist())
    all_flow_sites = pd.DataFrame(columns=all_flow_sites)
    all_flow_sites = pd.unique(all_flow_sites.columns).tolist()
     
    #-Get time-series of all_flow_sites
    ts_df = mssql.rd_sql(self.server, self.database, ts_table, ['ExtSiteID', 'DatasetTypeID', 'DateTime', 'Value', 'QualityCode'], where_in={'DatasetTypeID': all_datasetTypes, 'ExtSiteID': all_flow_sites})
    ts_df = ts_df.loc[ts_df.QualityCode>100] #-everything otherwise than missing data
    ts_df.DateTime = pd.to_datetime(ts_df.DateTime)
    ts_df_sites = pd.unique(ts_df.ExtSiteID).tolist()
    all_flow_sites = None; del all_flow_sites
     
    #-Check if there is data for all lowflow sites
    for j in flow_sites:
        if j not in ts_df_sites:
            print('There is zero flow data for site %s, or record of observations for this site was too short.' %j) 
    #-Minimum and maximum date of all data
    min_date = ts_df.DateTime.min()
    max_date = ts_df.DateTime.max()
     
    #-Fill dataframe with data for min_date through max_date
    df_final = pd.DataFrame(index=pd.date_range(min_date, max_date, freq='D'), columns=ts_df_sites)
    df_final.rename_axis('DateTime', inplace=True)
    for j in ts_df_sites:
        df_short = ts_df.loc[ts_df.ExtSiteID==j, ['DateTime', 'Value','QualityCode']]
        #-keep only the records with the highest qualitycode
        df_short_group = df_short.groupby(['DateTime', 'QualityCode']).mean()
        df_short_group.reset_index(inplace=True)
        df_short_group.sort_values(by=['DateTime', 'QualityCode'], inplace=True)
        df_short_group.drop_duplicates(subset='DateTime', keep='last', inplace=True)
        df_short_group.drop('QualityCode', inplace=True, axis=1)
        df_short_group.set_index('DateTime', inplace=True)
        df_short_group.rename(columns={'Value':j}, inplace=True)
        df_final[[j]] = df_short_group
    ts_df = None; df_short_group = None; del ts_df, df_short_group
    df_final.dropna(how='all', inplace=True)
     
    #-create a copy of all gauged and recorded flow (before filtering) to make it accessible in the main class
    self.flow_ts_df = df_final.copy()
    self.flow_ts_df.to_csv(os.path.join(self.results_path, self.config.get('FLOW_CORRELATIONS', 'flow_ts_csv')))
     
    #-remove zero records
    df_final[df_final==0] = np.nan
    
    #-remove unreliable values using lists of stations and dates
    if remove_stats and remove_dates:
        df_final.reset_index(inplace=True)
        i = 0
        for s in remove_stats:
            stat_date = remove_dates[i]
            df_final.loc[(df_final['DateTime']==stat_date), s] = np.nan
            i+=1
        df_final.set_index('DateTime', inplace=True)
     
    #-keep only winter flows for estimating correlations
    if self.filter_winter_flow:
        df_final.reset_index(inplace=True)
        df_final['Month'] = df_final['DateTime'].dt.strftime('%m').astype(np.int)
        df_final.set_index('DateTime', inplace=True)
        df_final = df_final.loc[(df_final.Month>4) & (df_final.Month<10)]
    df_final.dropna(how='all', inplace=True)
     
    #-loop over the lowflow sites and calculate regressions
    df_regression = pd.DataFrame(columns=['y', 'x', 'mean_y', 'mean_x', 'nobs', 'rmse', 'Adj. R2-squared', 'p-value', 'slope', 'intercept', 'power', 'fittype'])
    i=0
    for j in flow_sites:
        for s in self.rec_sites_buffer_gdf.ExtSiteID.tolist():
            sel_df = df_final[[j, s]]
            sel_df.replace(0., np.nan, inplace=True)
            sel_df.dropna(inplace=True)
            #-only fit for sites that have the minimum number of observations
            if len(sel_df)>=self.min_flow_obs:
                try: #-linear regression
                    #-set x and y
                    x = sel_df[[s]]
                    x = sm.add_constant(x)  #-needed for intercept
                    y = sel_df[[j]]
                       
                    #-linear fit
                    model = sm.OLS(y, x).fit()
                       
                    #-predict using the model
                    predictions = model.predict(x) # make the predictions by the model
                    rmse_val = np.sqrt(  np.mean(   (predictions.to_numpy() - y.to_numpy())**2   )  )
                       
                    #-fill dataframe with stats for linear fit
                    df_regression.loc[i, 'y'] = j
                    df_regression.loc[i, 'x'] = s
                    df_regression.loc[i, 'mean_y'] = np.nanmean(y)
                    df_regression.loc[i, 'mean_x'] = np.nanmean(x[s].to_numpy())
                    df_regression.loc[i, 'nobs'] = model.nobs
                    df_regression.loc[i, 'rmse'] = rmse_val
                    df_regression.loc[i, 'Adj. R2-squared'] = model.rsquared_adj
                    df_regression.loc[i, 'p-value'] = model.f_pvalue
                    df_regression.loc[i, 'slope'] = model.params[1]
                    df_regression.loc[i, 'intercept'] = model.params[0]
                    df_regression.loc[i, 'fittype'] = 'linear'
                       
                    i+=1
                except:
                    print('Could not establish linear fit for %s and %s...' %(j,s))
      
                try: #-power fit regression
                    #-set x and y
                    x = sel_df[[s]]
                    x = sm.add_constant(x)  #-needed for intercept
                    y = sel_df[[j]]                 
      
                    #-***********************************************************
                    #-                           y = ax^b
                    #-                           log(y) = log(a) + b*log(x)
                    #-***********************************************************
                    logY = np.log(y)
                    logX = np.log(x[[s]])
                    logX = sm.add_constant(logX)
                    model = sm.OLS(logY, logX).fit()
       
                    x = x[s].to_numpy()
                    #-predictions of the exact x values for calculating rmse            
                    predictions = np.exp(model.params[0]) * x**model.params[1]
                    rmse_val = np.sqrt(  np.mean(   (predictions - y.to_numpy())**2   )  )
                       
                    #-fill dataframe with stats for power fit
                    df_regression.loc[i, 'y'] = j
                    df_regression.loc[i, 'x'] = s
                    df_regression.loc[i, 'mean_y'] = np.nanmean(y)
                    df_regression.loc[i, 'mean_x'] = np.nanmean(x)
                    df_regression.loc[i, 'nobs'] = model.nobs
                    df_regression.loc[i, 'rmse'] = rmse_val
                    df_regression.loc[i, 'Adj. R2-squared'] = model.rsquared_adj
                    df_regression.loc[i, 'p-value'] = model.f_pvalue
                    df_regression.loc[i, 'slope'] = np.exp(model.params[0])
                    df_regression.loc[i, 'power'] = model.params[1]
                    df_regression.loc[i, 'fittype'] = 'power'
                    i+=1
                except:
                    print('Could not establish power fit for %s and %s...' %(j,s))
    sel_df = None; del sel_df
    #-remove negative correlations
    df_regression.loc[df_regression['Adj. R2-squared']<0.,:] = np.nan
    df_regression.dropna(how='all', inplace=True)
      
    #-loop over lowflow sites, and select best six fits by sorting on R2, rmse, nobs
    self.best_regressions_df = pd.DataFrame(columns=df_regression.columns)
    for j in flow_sites:
        sel_df = df_regression.loc[df_regression.y == j]
        sel_df.sort_values(by=['Adj. R2-squared', 'rmse', 'nobs'], ascending=[False, True, False], inplace=True)
        sel_df = sel_df.iloc[0:6,:]
        self.best_regressions_df = pd.concat([self.best_regressions_df, sel_df])
    sel_df = None; del sel_df
    self.best_regressions_df.to_csv(os.path.join(self.results_path, self.config.get('FLOW_CORRELATIONS', 'correlations_csv')), index=False)
     
    #-make 6 plots for each site to visually pick the best
    unique_y = pd.unique(self.best_regressions_df['y'])
    unique_y = list(unique_y)
    for j in flow_sites:
        if j in unique_y:
            #-get regression results for site j
            sel_df_regression = self.best_regressions_df.loc[self.best_regressions_df.y == j]
     
            #-plot the best 6 fits
            fig = plt.figure(figsize=(11,8))
            kk=1
            for k in sel_df_regression.iterrows():
                try:
                    ax1 = plt.subplot(3,2,kk)
                    xSite = k[1]['x']
                    ySite = k[1]['y']
                     
                    xy = df_final[[xSite, ySite]]
                    xy.dropna(inplace=True)
                    x = xy[xSite].to_numpy()
                    y = xy[ySite].to_numpy()
         
                    xmin = np.nanmin(x)
                    xmax = np.nanmax(x)
                    dx = (xmax-xmin)/100
                    xvals = np.arange(xmin, xmax, dx)
                    ymin = np.nanmin(y)
                    ymax = np.nanmax(y)
                     
                    #-Get the stats from the table
                    fit_type = k[1]['fittype']
                    if fit_type == 'linear':
                        eq_str = 'y = %.3fx + %.3f' %(k[1]['slope'], k[1]['intercept'])
                        predictions = k[1]['slope'] * xvals + k[1]['intercept']
                    else:
                        eq_str = 'y = %.3fx$^{%.3f}$' %(k[1]['slope'], k[1]['power'])
                        predictions = k[1]['slope'] * (xvals ** k[1]['power'])
                    r2 = k[1]['Adj. R2-squared']
                    rmse_val = k[1]['rmse']
                    p_val = k[1]['p-value']
                    nobs = k[1]['nobs']
                    plt.plot(xvals, predictions, label='fit')
                    plt.scatter(x,y, color='red', label='data')
                    ax1.grid(True)
                    ax1.set_xlabel('Flow at %s [m$^3$ s$^{-1}$]' %xSite)
                    ax1.set_ylabel('Flow at %s [m$^3$ s$^{-1}$]' %ySite)
                    ax1.legend(loc='lower right')
                     
                    dy = (ymax-ymin)/10
                    dx = (xmax-xmin)/10
                     
                    #-plot the stats in the plot
                    plt.text(xmin, ymax, eq_str)
                    plt.text(xmin, ymax-dy, 'R$^2$ %.2f' %r2)
                    plt.text(xmin, ymax-2*dy, 'RMSE %.2f' %rmse_val)
                    plt.text(xmin, ymax-3*dy, 'p-value %.2f' %p_val)
                    plt.text(xmin, ymax-4*dy, 'nobs %.0f' %nobs)
                    ax1.set_xlim([xmin-dx, xmax+dx])
                    ax1.set_ylim([ymin-dy, ymax+dy])
                    kk+=1
                except:
                    pass
            fig.tight_layout()
            if self.filter_winter_flow:
                plt.savefig(os.path.join(self.results_path, '%s_correlations_winteronly.png' %ySite), dpi=300)
            else:
                plt.savefig(os.path.join(self.results_path, '%s_correlations.png' %ySite), dpi=300)

Example #7

0

Show file

                        })

sites_buffer_gdf = flow_sites_gdf.copy()
sites_buffer_gdf['geometry'] = sites_buffer_gdf.buffer(60000)

all_gwl_sites = mssql.rd_sql(
    server,
    database,
    ts_summ_table, ['ExtSiteID'],
    where_in={'DatasetTypeID': datasets.DatasetTypeID.tolist()})
all_gwl_sites = pd.unique(all_gwl_sites.ExtSiteID).tolist()
all_gwl_sites = mssql.rd_sql(server,
                             database,
                             site_table, ['ExtSiteID', 'NZTMX', 'NZTMY'],
                             where_in={'ExtSiteID': all_gwl_sites})
all_gwl_sites_gdf = vector.xy_to_gpd('ExtSiteID', 'NZTMX', 'NZTMY',
                                     all_gwl_sites)
sel_gwl_sites_gdf = vector.sel_sites_poly(all_gwl_sites_gdf, sites_buffer_gdf)
all_gwl_sites = None
all_gwl_sites_gdf = None

#-Get summary table for the gwl sites and filter period of interest
site_summ = mssql.rd_sql(server,
                         database,
                         ts_summ_table,
                         where_in={
                             'DatasetTypeID': datasets.DatasetTypeID.tolist(),
                             'ExtSiteID': sel_gwl_sites_gdf.ExtSiteID.tolist()
                         })
site_summ.FromDate = pd.to_datetime(site_summ.FromDate)
site_summ.ToDate = pd.to_datetime(site_summ.ToDate)
site_summ.drop('ModDate', axis=1, inplace=True)

Example #8

0

Show file

def sel_xy_nc(bound_shp,
              nc_path,
              x_col='longitude',
              y_col='latitude',
              time_col='time',
              nc_vars=None,
              buffer_dis=0,
              from_date=None,
              to_date=None,
              nc_crs=4326,
              out_crs=None,
              out_type='pandas'):
    """
    Function to select space and time data from a netcdf file using a polygon shapefile.
    """

    ### Process the boundary layer
    bound = gpd.read_file(bound_shp).buffer(buffer_dis).to_crs(
        convert_crs(nc_crs))
    x_min, y_min, x_max, y_max = bound.unary_union.bounds

    ### Read and extract data from netcdf files
    ds1 = xr.open_dataset(nc_path)
    time1 = pd.to_datetime(ds1[time_col].values)
    if isinstance(from_date, str):
        time1 = time1[time1 >= from_date]
    if isinstance(to_date, str):
        time1 = time1[time1 <= to_date]
    lat1 = ds1[y_col].values
    lon1 = ds1[x_col].values
    lat2 = lat1[(lat1 >= y_min) & (lat1 <= y_max)]
    lon2 = lon1[(lon1 >= x_min) & (lon1 <= x_max)]
    ds2 = ds1.loc[{x_col: lon2, time_col: time1.values, y_col: lat2}]

    #    coords1 = ds2.coords.keys()
    #    dims1 = ds2.dims.keys()

    ## Select mtypes
    if isinstance(nc_vars, str):
        ds3 = ds2[[nc_vars]]
    elif isinstance(nc_vars, (list, np.ndarray, pd.Series)):
        ds3 = ds2[nc_vars]
    elif nc_vars is None:
        ds3 = ds2

    ### Convert to different crs if needed
    if out_crs is not None:
        df1 = ds3.to_dataframe().reset_index()
        xy1 = ds3[[x_col, y_col]].copy()
        xy2 = xy1.to_dataframe().reset_index()
        crs1 = util.convert_crs(out_crs)
        new_gpd1 = vector.xy_to_gpd(xy2.index, x_col, y_col, xy2, nc_crs)
        new_gpd2 = new_gpd1.to_crs(crs1)
        site_loc2 = xy2.copy()
        site_loc2['x_new'] = new_gpd2.geometry.apply(lambda j: j.x)
        site_loc2['y_new'] = new_gpd2.geometry.apply(lambda j: j.y)

        df2 = pd.merge(df1,
                       site_loc2[[x_col, y_col, 'x_new', 'y_new']],
                       on=[x_col, y_col],
                       how='left')
        df3 = df2.drop([x_col, y_col], axis=1).rename(columns={
            'x_new': x_col,
            'y_new': y_col
        })
        ds1.close()
        return (df3)
    elif out_type == 'pandas':
        df1 = ds3.to_dataframe().reset_index()
        ds1.close()
        return (df1)
    elif out_type == 'xarray':
        return ds3

Example #9

0

Show file

def rd_niwa_vcsn(
        mtypes,
        sites,
        nc_path=r'\\fileservices02\ManagedShares\Data\VirtualClimate\vcsn_precip_et_2016-06-06.nc',
        vcsn_sites_csv=r'\\fileservices02\ManagedShares\Data\VirtualClimate\GIS\niwa_vcsn_wgs84.csv',
        id_col='Network',
        x_col='deg_x',
        y_col='deg_y',
        buffer_dis=0,
        include_sites=False,
        from_date=None,
        to_date=None,
        out_crs=None,
        netcdf_out=None):
    """
    Function to read in the NIWA vcsn netcdf file and output the data as a dataframe.

    mtypes -- A string or list of the measurement types (either 'precip', or 'PET').\n
    sites -- Either a list of vcsn site names or a polygon of the area of interest.\n
    nc_path -- The path to the vcsn nc file.\n
    vcsn_sites_csv -- The csv file that relates the site name to coordinates.\n
    id_col -- The site name column in vcsn_sites_csv.\n
    x_col - The x column name in vcsn_sites_csv.\n
    y_col -- The y column name in vcsn_sites_csv.\n
    include_sites -- Should the site names be added to the output?\n
    out_crs -- The crs epsg number for the output coordinates if different than the default WGS85 (e.g. 2193 for NZTM).
    """

    mtype_name = {'precip': 'rain', 'PET': 'pe'}

    ### Import and reorganize data
    vcsn_sites = pd.read_csv(vcsn_sites_csv)[[id_col, x_col, y_col]]

    if isinstance(sites, str):
        if sites.endswith('.shp'):
            sites_gpd = vector.xy_to_gpd(id_col, x_col, y_col, vcsn_sites,
                                         4326)
            poly1 = gpd.read_file(sites)

            sites_gpd2 = sites_gpd.to_crs(poly1.crs)

            ### Select sites
            sites2 = vector.sel_sites_poly(sites_gpd2, poly1,
                                           buffer_dis)[id_col]
    elif isinstance(sites, (list, pd.Series, np.ndarray)):
        sites2 = sites

    ### Select locations
    site_loc1 = vcsn_sites[vcsn_sites[id_col].isin(sites2)]
    site_loc1.columns = ['id', 'x', 'y']

    ### Select mtypes
    if isinstance(mtypes, str):
        mtypes1 = [mtype_name[mtypes]]
    else:
        mtypes1 = [mtype_name[i] for i in mtypes]

    if include_sites:
        mtypes1.extend(['site'])

    ### Read and extract data from netcdf files
    ds1 = xr.open_dataset(nc_path)
    time1 = pd.to_datetime(ds1.time.values)
    if isinstance(from_date, str):
        time1 = time1[time1 >= from_date]
    if isinstance(to_date, str):
        time1 = time1[time1 <= to_date]
    lat1 = ds1.latitude.values
    lon1 = ds1.longitude.values
    lat2 = lat1[np.in1d(lat1, site_loc1.y.unique())]
    lon2 = lon1[np.in1d(lon1, site_loc1.x.unique())]
    ds2 = ds1.loc[{'longitude': lon2, 'time': time1.values, 'latitude': lat2}]
    ds3 = ds2[mtypes1]

    ### Convert to DataFrame
    df1 = ds3.to_dataframe().reset_index()
    df1.rename(columns={'latitude': 'y', 'longitude': 'x'}, inplace=True)
    df1 = df1.dropna()

    ### Convert to different crs if needed
    if out_crs is not None:
        crs1 = util.convert_crs(out_crs)
        new_gpd1 = vector.xy_to_gpd('id', 'x', 'y', site_loc1, 4326)
        new_gpd2 = new_gpd1.to_crs(crs1)
        site_loc2 = site_loc1.copy()
        site_loc2['x_new'] = new_gpd2.geometry.apply(lambda j: j.x)
        site_loc2['y_new'] = new_gpd2.geometry.apply(lambda j: j.y)

        df2 = pd.merge(df1,
                       site_loc2[['x', 'y', 'x_new', 'y_new']],
                       on=['x', 'y'])
        df3 = df2.drop(['x', 'y'], axis=1).rename(columns={
            'x_new': 'x',
            'y_new': 'y'
        })
        col_order = ['y', 'x', 'time']
        col_order.extend(mtypes1)
        df4 = df3[col_order]
    else:
        df4 = df1

    ds1.close()
    ds3.close()

    ### Return
    if isinstance(netcdf_out, str):
        ds3.to_netcdf(netcdf_out)
    return df4

Example #10

0

Show file

File: core.py Project: mullenkamp/nz-allo-usage-tools

    def _usage_estimation(self, freq, buffer_dis=40000, min_months=36):
        """

        """
        ### Get the necessary data

        # a1 = AlloUsage()
        # a1.permits = self.permits.copy()
        # a1.waps = self.waps.copy()
        # a1.from_date = self.from_date
        # a1.to_date = self.to_date

        # if hasattr(self, 'total_allo_ts'):
        #     delattr(self, 'total_allo_ts')

        allo_use1 = self.get_ts(['allo', 'metered_allo', 'usage'], 'M',
                                ['permit_id', 'wap'])

        permits = self.permits.copy()

        ### Create Wap locations
        waps1 = vector.xy_to_gpd(
            'wap', 'lon', 'lat',
            self.waps.drop('permit_id', axis=1).drop_duplicates('wap'), 4326)
        waps2 = waps1.to_crs(2193)

        ### Determine which Waps need to be estimated
        allo_use_mis1 = allo_use1[allo_use1['total_metered_allo'] ==
                                  0].copy().reset_index()
        allo_use_with1 = allo_use1[
            allo_use1['total_metered_allo'] > 0].copy().reset_index()

        mis_waps1 = allo_use_mis1.groupby(['permit_id', 'wap'
                                           ])['total_allo'].count().copy()
        with_waps1 = allo_use_with1.groupby(['permit_id',
                                             'wap'])['total_allo'].count()
        with_waps2 = with_waps1[with_waps1 >= min_months]

        with_waps3 = pd.merge(with_waps2.reset_index()[['permit_id', 'wap']],
                              permits[['permit_id', 'use_type']],
                              on='permit_id')

        with_waps4 = pd.merge(waps2, with_waps3['wap'], on='wap')

        mis_waps2 = pd.merge(mis_waps1.reset_index(),
                             permits[['permit_id', 'use_type']],
                             on='permit_id')
        mis_waps3 = pd.merge(waps2, mis_waps2['wap'], on='wap')
        mis_waps3['geometry'] = mis_waps3['geometry'].buffer(buffer_dis)
        # mis_waps3.rename(columns={'wap': 'mis_wap'}, inplace=True)

        mis_waps4, poly1 = vector.pts_poly_join(
            with_waps4.rename(columns={'wap': 'good_wap'}), mis_waps3, 'wap')

        ## Calc ratios
        allo_use_with2 = pd.merge(allo_use_with1,
                                  permits[['permit_id', 'use_type']],
                                  on='permit_id')

        allo_use_with2['month'] = allo_use_with2['date'].dt.month
        allo_use_with2['usage_allo'] = allo_use_with2[
            'total_usage'] / allo_use_with2['total_allo']

        allo_use_ratio1 = allo_use_with2.groupby(
            ['permit_id', 'wap', 'use_type',
             'month'])['usage_allo'].mean().reset_index()

        allo_use_ratio2 = pd.merge(
            allo_use_ratio1.rename(columns={'wap': 'good_wap'}),
            mis_waps4[['good_wap', 'wap']],
            on='good_wap')

        ## Combine with the missing ones
        allo_use_mis2 = pd.merge(allo_use_mis1[['permit_id', 'wap', 'date']],
                                 permits[['permit_id', 'use_type']],
                                 on='permit_id')
        allo_use_mis2['month'] = allo_use_mis2['date'].dt.month

        allo_use_mis3 = pd.merge(
            allo_use_mis2,
            allo_use_ratio2[['use_type', 'month', 'usage_allo', 'wap']],
            on=['use_type', 'wap', 'month'])
        allo_use_mis4 = allo_use_mis3.groupby(
            ['permit_id', 'wap', 'date'])['usage_allo'].mean().reset_index()

        allo_use_mis5 = pd.merge(allo_use_mis4,
                                 allo_use_mis1[[
                                     'permit_id', 'wap', 'date', 'total_allo',
                                     'sw_allo', 'gw_allo'
                                 ]],
                                 on=['permit_id', 'wap', 'date'])

        allo_use_mis5['total_usage_est'] = (
            allo_use_mis5['usage_allo'] * allo_use_mis5['total_allo']).round()
        allo_use_mis5['sw_allo_usage_est'] = (
            allo_use_mis5['usage_allo'] * allo_use_mis5['sw_allo']).round()
        allo_use_mis5['gw_allo_usage_est'] = (
            allo_use_mis5['usage_allo'] * allo_use_mis5['gw_allo']).round()

        allo_use_mis6 = allo_use_mis5[[
            'permit_id', 'wap', 'date', 'total_usage_est', 'sw_allo_usage_est',
            'gw_allo_usage_est'
        ]].copy()

        ### Convert to daily if required
        if freq == 'D':
            days1 = allo_use_mis6.date.dt.daysinmonth
            days2 = pd.to_timedelta((days1 / 2).round().astype('int32'),
                                    unit='D')

            allo_use_mis6[
                'total_usage_est'] = allo_use_mis6['total_usage_est'] / days1
            allo_use_mis6['sw_allo_usage_est'] = allo_use_mis6[
                'sw_allo_usage_est'] / days1
            allo_use_mis6['gw_allo_usage_est'] = allo_use_mis6[
                'gw_allo_usage_est'] / days1

            usage_rate0 = allo_use_mis6.copy()

            usage_rate0['date'] = usage_rate0['date'] - days2

            grp1 = allo_use_mis6.groupby(['permit_id', 'wap'])
            first1 = grp1.first()
            last1 = grp1.last()

            first1.loc[:, 'date'] = pd.to_datetime(
                first1.loc[:, 'date'].dt.strftime('%Y-%m') + '-01')

            usage_rate1 = pd.concat(
                [first1,
                 usage_rate0.set_index(['permit_id', 'wap']), last1],
                sort=True).reset_index().sort_values(
                    ['permit_id', 'wap', 'date'])

            usage_rate1.set_index('date', inplace=True)

            usage_daily_rate1 = usage_rate1.groupby([
                'permit_id', 'wap'
            ]).apply(lambda x: x.resample('D').interpolate(method='pchip')[[
                'total_usage_est', 'sw_allo_usage_est', 'gw_allo_usage_est'
            ]]).round(2)
        else:
            usage_daily_rate1 = allo_use_mis6.set_index(
                ['permit_id', 'wap', 'date'])

        ## Put the actual usage back into the estimate
        act_use1 = self.get_ts(['usage'], freq, ['permit_id', 'wap'])

        combo1 = pd.concat([usage_daily_rate1, act_use1], axis=1).sort_index()
        combo1.loc[combo1['total_usage'].notnull(),
                   'total_usage_est'] = combo1.loc[
                       combo1['total_usage'].notnull(), 'total_usage']
        combo1.loc[combo1['sw_allo_usage'].notnull(),
                   'sw_allo_usage_est'] = combo1.loc[
                       combo1['sw_allo_usage'].notnull(), 'sw_allo_usage']
        combo1.loc[combo1['gw_allo_usage'].notnull(),
                   'gw_allo_usage_est'] = combo1.loc[
                       combo1['gw_allo_usage'].notnull(), 'gw_allo_usage']
        combo1.drop(['total_usage', 'sw_allo_usage', 'gw_allo_usage'],
                    axis=1,
                    inplace=True)

        # combo1 = combo1.loc[slice(None), slice(None), self.from_date:self.to_date]
        setattr(self, 'usage_est', combo1)

        return combo1

Example #11

0

Show file

File: vcsn_estimates_pareora_old.py Project: Data-to-Knowledge/pareora-current-state-2016

## Resample in 2D
pe1 = interp2d.points_to_grid(both3.reset_index(), 'time', 'x', 'y', 'pe',
                              1000, 4326, 2193)
rain1 = interp2d.points_to_grid(both3.reset_index(), 'time', 'x', 'y', 'rain',
                                1000, 4326, 2193)

pe2 = pe1.to_dataframe().dropna()
rain2 = rain1.to_dataframe().dropna()

## Combine datasets

both4 = pd.concat([rain2, pe2], axis=1).reset_index()

## Aggregate by catchment
both5 = vector.xy_to_gpd(['time', 'rain', 'pe'], 'x', 'y', both4)
#pts0 = both4[both4.time == '1982-07-31'].copy()
#pts0.index.name = 'index'
#pts1 = vector.xy_to_gpd(pts0.index, 'x', 'y', pts0)
catch_del = gpd.read_file(catch_del_shp_path)
catch_del.rename(columns={'SITENUMBER': 'site'}, inplace=True)

pts2, poly1 = vector.pts_poly_join(both5, catch_del, 'site')

catch_agg1 = pts2.groupby(['site', 'time'])[['rain', 'pe']].mean()

## Adjust the vcsn according to the precip gauge 404810
ts1 = mssql.rd_sql_ts(server,
                      database,
                      ts_table,
                      'ExtSiteID',

Example #12

0

Show file

File: utest_upstream_takes.py Project: Data-to-Knowledge/waimak-de-tide

results_path = os.path.join(project_path, 'results')

catch_del_shp = 'catch_del_66401_{}.shp'.format(today1)
allo_csv = 'allo_66401_{}.csv'.format(today1)
allo_wap_csv = 'allo_wap_66401_{}.csv'.format(today1)
wap_shp = 'wap_66401_{}.shp'.format(today1)

######################################
### Read in data

sites1 = mssql.rd_sql(server, database, sites_table,
                      ['ExtSiteID', 'NZTMX', 'NZTMY'])

sites0 = sites1[sites1.ExtSiteID == site].copy()

sites2 = vector.xy_to_gpd('ExtSiteID', 'NZTMX', 'NZTMY', sites0)

sql1 = sql_arg()

rec_rivers_dict = sql1.get_dict(rec_rivers_sql)
rec_catch_dict = sql1.get_dict(rec_catch_sql)

###################################
### Catchment delineation and WAPs

catch1 = rec.catch_delineate(sites2, rec_rivers_dict, rec_catch_dict)
catch1.to_file(os.path.join(results_path, catch_del_shp))

wap1 = mssql.rd_sql(server, database, crc_wap_table, ['wap']).wap.unique()

sites3 = sites1[sites1.ExtSiteID.isin(wap1)].copy()

Example #13

0

Show file

def process_waps(param):
    """

    """
    run_time_start = pd.Timestamp.today().strftime('%Y-%m-%d %H:%M:%S')
    print(run_time_start)

    ### Read in source data and update accela tables in ConsentsReporting db
    print('--Reading in source data...')

    ## Make object to contain the source data
    db = types.SimpleNamespace()

    for t in param['misc']['WapProcessing']['tables']:
        p = param['source data'][t]
        stmt = 'select * from "{table}"'.format(table=p['table'])
        setattr(
            db, t,
            sf.read_table(p['username'], p['password'], p['account'],
                          p['database'], p['schema'], stmt))

    # Spatial data
    gw_dict = param['source data']['gw_zones']

    setattr(
        db, 'gw_zones',
        mssql.rd_sql(gw_dict['server'],
                     gw_dict['database'],
                     gw_dict['table'],
                     gw_dict['col_names'],
                     username=gw_dict['username'],
                     password=gw_dict['password'],
                     geo_col=True,
                     rename_cols=gw_dict['rename_cols']))

    sw_dict = param['source data']['sw_reaches']

    setattr(
        db, 'sw_reaches',
        mssql.rd_sql(sw_dict['server'],
                     sw_dict['database'],
                     sw_dict['table'],
                     sw_dict['col_names'],
                     username=gw_dict['username'],
                     password=gw_dict['password'],
                     geo_col=True))

    ##################################################
    ### Waps
    print('--Process Waps')

    sites1 = vector.xy_to_gpd('Wap', 'NzTmX', 'NzTmY',
                              db.sites.drop('EffectiveFromDate', axis=1))

    waps1 = sites1.merge(db.wap_sd.drop('EffectiveFromDate', axis=1), on='Wap')
    waps1.loc[waps1['SD1_7Day'].isnull(), 'SD1_7Day'] = 0
    waps1.loc[waps1['SD1_30Day'].isnull(), 'SD1_30Day'] = 0
    waps1.loc[waps1['SD1_150Day'].isnull(), 'SD1_150Day'] = 0
    waps1[['SD1_7Day', 'SD1_30Day',
           'SD1_150Day']] = waps1[['SD1_7Day', 'SD1_30Day',
                                   'SD1_150Day']].round().astype(int)

    ## Aquifer tests
    aq1 = db.wap_aquifer_test.dropna(subset=['Storativity']).drop(
        'EffectiveFromDate', axis=1).copy()
    aq2 = aq1.groupby('Wap')['Storativity'].mean().dropna().reset_index()
    aq2.Storativity = True

    waps2 = waps1.merge(aq2, on='Wap', how='left')
    waps2.loc[waps2.Storativity.isnull(), 'Storativity'] = False

    ## Add spaital info
    # GW
    gw_zones = db.gw_zones.copy()
    gw_zones.rename(columns={'SpatialUnitID': 'GwSpatialUnitId'}, inplace=True)

    waps3, poly1 = vector.pts_poly_join(waps2, gw_zones, 'GwSpatialUnitId')
    waps3.drop_duplicates('Wap', inplace=True)
    waps3['Combined'] = waps3.apply(lambda x: 'CWAZ' in x['GwSpatialUnitId'],
                                    axis=1)

    # SW
    sw1 = db.sw_reaches.copy()
    sw1.rename(columns={'SpatialUnitID': 'SwSpatialUnitId'}, inplace=True)

    lst1 = []
    for index, row in sw1.iterrows():
        for j in list(row['geometry'].coords):
            lst1.append([row['SwSpatialUnitId'], Point(j)])
    df1 = pd.DataFrame(lst1, columns=['SwSpatialUnitId', 'geometry'])
    sw2 = gpd.GeoDataFrame(df1, geometry='geometry')

    waps3b = vector.kd_nearest(waps3, sw2, 'SwSpatialUnitId')

    ## prepare output
    waps3b['NzTmX'] = waps3b.geometry.x
    waps3b['NzTmY'] = waps3b.geometry.y

    waps4 = pd.DataFrame(waps3b.drop(['geometry'], axis=1))
    waps4[['NzTmX', 'NzTmY']] = waps4[['NzTmX', 'NzTmY']].round().astype(int)
    waps4.rename(columns={
        'Name': 'SpatialUnitName',
        'distance': 'DistanceToSw'
    },
                 inplace=True)

    ## Check for differences
    print('Save results')
    wap_dict = param['source data']['waps']

    #    old_stmt = 'select * from "{table}"'.format(table=wap_dict['table'])
    #    old1 = sf.read_table(wap_dict['username'], wap_dict['password'], wap_dict['account'], wap_dict['database'], wap_dict['schema'], old_stmt).drop('EffectiveFromDate', axis=1)
    #
    #    change1 = compare_dfs(old1, waps4, ['Wap'])
    #    new1 = change1['new']
    #    diff1 = change1['diff']

    ## Save data
    waps4['EffectiveFromDate'] = run_time_start

    sf.to_table(waps4, wap_dict['table'], wap_dict['username'],
                wap_dict['password'], wap_dict['account'],
                wap_dict['database'], wap_dict['schema'], True)

    return waps4

Example #14

0

Show file

File: vcsn_estimates_pareora.py Project: Data-to-Knowledge/pareora-current-state-2016

precip_mon_plot = 'station_precip_mon.png'
precip_yr_plot = 'station_precip_yr.png'
reg_plot = 'mon_reg_404810.png'

#####################################
### Process VCSN data

both1 = rd_niwa_vcsn(mtypes, catch_shp_path, buffer_dis=5000)

## Aggregate by month
both2 = both1.groupby(['x', 'y', pd.Grouper(key='time', freq='M')]).sum()
both3 = both2.loc[(slice(None), slice(None),
                   slice(from_date, to_date)), :].copy()

## Aggregate by catchment
both4 = vector.xy_to_gpd(['time', 'rain', 'pe'], 'x', 'y', both3.reset_index(),
                         4326)
both5 = both4.to_crs(epsg=2193)

catch_del = gpd.read_file(catch_del_shp_path)
catch_del.rename(columns={'SITENUMBER': 'site'}, inplace=True)
catch_del['site'] = catch_del['site'].astype(str)

pts2, poly1 = vector.pts_poly_join(both5, catch_del, 'site')

catch_agg0 = pts2.groupby(['site', 'time'])[['rain', 'pe']].mean()

# Only above Huts
catch_agg1 = catch_agg0.loc[rec_site].copy()
catch_agg1.rename(columns={
    'rain': 'VCSN Precip',
    'pe': 'VCSN PET'

Example #15

0

Show file

start_date1 = now1 - pd.DateOffset(months=121) - pd.DateOffset(days=now1.day -
                                                               1)
start_date2 = now1 - pd.DateOffset(months=1) - pd.DateOffset(days=now1.day - 1)

sites1 = sites[sites.site.isin(
    count1[(count1 >= 120) & (end_date1 >= start_date2) &
           (start_date1 <= start_date1)].index)]

uw1 = sites[sites.CwmsName.isin(['Upper Waitaki'])
            & sites.site.isin(count1[(count1 >= 80)
                                     & (end_date1 >= start_date2)
                                     & (start_date1 <= start_date1)].index)]

sites2 = pd.concat([sites1, uw1]).drop_duplicates()

gw_sites = xy_to_gpd(['site', 'CwmsName'], 'NZTMX', 'NZTMY', sites2)

gw2 = gw1[gw1.site.isin(gw_sites.site)].copy()

### Extract Site locations
gw_sites.to_file(
    os.path.join(param.base_dir, param.output_dir, param.gw_sites_shp))

### Combine the sites with the polygons
gw_site_zone = gw_sites.drop(['geometry'], axis=1)
gw_site_zone.rename(columns={'CwmsName': 'zone'}, inplace=True)

### Monthly interpolations
if param.interp:
    ## Estimate monthly means through interpolation
    day1 = grp_ts_agg(gw2, 'site', 'time', 'D').mean().unstack('site')

Example #16

0

Show file

catch_shp_path = get_path(catch_shp)

sites_col_name = 'SITENUMBER'
poly_col_name = 'Catchmen_1'
line_site_col = 'NZREACH'

#######################################
### Examples

pts = util.load_geo_data(sites_shp_path)
pts['geometry'] = pts.geometry.simplify(1)

## Selecting points from within a polygon
pts1 = vector.sel_sites_poly(sites_shp_path, rec_catch_shp_path, buffer_dis=10)

## Spatial join attributes of polygons to points
pts2, poly2 = vector.pts_poly_join(sites_shp_path, catch_shp_path, poly_col_name)

## Create a GeoDataFrame from x and y data
pts_df = pts[[sites_col_name, 'geometry']].copy()
pts_df['x'] = pts_df.geometry.x
pts_df['y'] = pts_df.geometry.y
pts_df.drop('geometry', axis=1, inplace=True)

pts3 = vector.xy_to_gpd(sites_col_name, 'x', 'y', pts_df)

## Find the closest line to points
line1 = vector.closest_line_to_pts(sites_shp_path, rec_streams_shp_path, line_site_col, buffer_dis=100)

Example #17

0

Show file

    print('-> loaded from local files')

except:
    print('-> Processing data from the databases')

    sites1 = mssql.rd_sql(
        ts_server, ts_database, sites_table,
        ['ExtSiteID', 'NZTMX', 'NZTMY', 'SwazGroupName', 'SwazName'])

    input_sites1 = pd.read_csv(os.path.join(param['inputs_path'],
                                            site_csv)).site.astype(str)

    sites0 = sites1[sites1.ExtSiteID.isin(input_sites1)].copy()
    sites0.rename(columns={'ExtSiteID': 'FlowSite'}, inplace=True)

    flow_sites_gdf = vector.xy_to_gpd('FlowSite', 'NZTMX', 'NZTMY', sites0)
    flow_sites_gdf.to_file(os.path.join(results_path, flow_sites_shp))

    ###################################
    ### Catchment delineation

    try:
        catch_del_shp = [
            p for p in os.listdir(results_path)
            if (catch_del_base in p) and ('.shp' in p)
        ][-1]
        catch_gdf = gpd.read_file(os.path.join(results_path, catch_del_shp))
    except:
        sql1 = sql_arg()

        rec_rivers_dict = sql1.get_dict(rec_rivers_sql)

Example #18

0

Show file

File: pareora_usage.py Project: Data-to-Knowledge/pareora-current-state-2016

catch_del_shp_path = os.path.join(base_path, shp_dir, catch_del_shp)

rec_site = 70105

usage_output = 'pareora_huts_usage_mon.csv'

############################################
### Extract data

site_filter = {'CatchmentGroupName': catch_group}

a1 = AlloUsage(from_date, to_date, site_filter=site_filter)

sites0 = a1.sites.reset_index().copy()

sites = vector.xy_to_gpd('wap', 'NZTMX', 'NZTMY', sites0)

catch_del = gpd.read_file(catch_del_shp_path)
catch_del.rename(columns={'SITENUMBER': 'site'}, inplace=True)

catch_del1 = catch_del[catch_del.site == rec_site]

sites1 = vector.sel_sites_poly(sites, catch_del1)

## Usage data

usage1 = pd.read_hdf(os.path.join(source_path, usage_hdf))

usage2 = usage1[usage1.wap.isin(sites1.wap)].copy()
usage2['time'] = pd.to_datetime(usage2['time'])

Example #19

0

Show file

base_dir = r'E:\ecan\shared\base_data\metservice\2018-09-11'

nc1 = 'wrf_hourly_precip_nz4kmN-NCEP_2018090918.nc'
nc1 = 'wrf_hourly_precip_nz8kmN-NCEP_2018090918.nc'

ms_nc = os.path.join(base_dir, nc1)

ms1 = xr.open_dataset(os.path.join(base_dir, nc1))
ms2 = ms1.sel(time='2018-09-12 6:00:00').drop('time').precipitation_amount
df1 = ms2.to_dataframe().reset_index(drop=True)
#ms_proj = to_proj4(nc)
df2 = df1[df1.longitude > 0]

#gdf1 = vector.xy_to_gpd(df2.precipitation_amount, 'longitude', 'latitude', df2, ms_proj)
gdf1 = vector.xy_to_gpd(df2.precipitation_amount, 'longitude', 'latitude', df2, 4326)

gdf1.columns = ['precip', 'geometry']
#gdf2 = gdf1.to_crs(epsg=2193)

gdf1.to_file(os.path.join(base_dir, 'test_ms_df.shp'))

from_crs = '+proj=lcc +lat_1=-30.0 +lat_2=-60.0 +lat_0=-50.288826 +lon_0=161.85754 +R=6370000.0 +no_defs'
from_crs = '+proj=lcc +lat_1=-60 +lat_2=-30 +lat_0=-60 +lon_0=167.5 +x_0=211921 +y_0=-1221320 +a=6367470 +b=6367470 +no_defs'

to_crs = 4326


from_crs1 = Proj(convert_crs(from_crs, pass_str=True), preserve_units=True)
to_crs1 = Proj(convert_crs(to_crs, pass_str=True), preserve_units=True)

Example #20

0

Show file

File: flow_estimates.py Project: Data-to-Knowledge/FlowNaturalisation

    rec_datasets = datasets[datasets.CTypeID == 1].DatasetTypeID.tolist()
    man_datasets = datasets[datasets.CTypeID == 2].DatasetTypeID.tolist()

    rec_summ1 = site_summ1[site_summ1.DatasetTypeID.isin(rec_datasets) & (site_summ1.FromDate <= param['from_date']) & (site_summ1.ToDate >= param['to_date'])].sort_values('ToDate', ascending=False).drop_duplicates('ExtSiteID').copy()

    flow_sites_gdf = takes.flow_sites_gdf.copy()

    sites_rec_bool = flow_sites_gdf.FlowSite.isin(rec_summ1.ExtSiteID.unique())

    sites_rec1 = flow_sites_gdf[sites_rec_bool].copy()
    sites_man1 = flow_sites_gdf[~sites_rec_bool].copy()

    flow_rec_sites1 = mssql.rd_sql(server, database, site_table, ['ExtSiteID', 'NZTMX', 'NZTMY'], where_in={'ExtSiteID': rec_summ1.ExtSiteID.unique().tolist()})

    flow_rec_sites2 = vector.xy_to_gpd('ExtSiteID', 'NZTMX', 'NZTMY', flow_rec_sites1)

    ## Estimate flow where recorder doesn't exist

    sites_man2 = sites_man1.copy()
    sites_man2['geometry'] = sites_man1.buffer(buffer_dis)

    rec_sites2 = vector.sel_sites_poly(flow_rec_sites2, sites_man2)

    rec_ts_data1 = mssql.rd_sql_ts(server, database, ts_table, 'ExtSiteID', 'DateTime', 'Value', from_date=param['from_date'], to_date=param['to_date'], where_in={'ExtSiteID': rec_sites2.ExtSiteID.tolist(), 'DatasetTypeID': rec_summ1.DatasetTypeID.unique().tolist()})

    rec_ts_data2 = rec_ts_data1.Value.unstack(0).interpolate('time', limit=10).dropna(axis=1)

    rec_flow1 = rec_ts_data2.loc[:, rec_ts_data2.columns.isin(sites_rec1.FlowSite)].copy()

    man_ts_data1 = mssql.rd_sql_ts(server, database, ts_table, 'ExtSiteID', 'DateTime', 'Value', from_date=param['from_date'], to_date=param['to_date'], where_in={'ExtSiteID': sites_man1.FlowSite.tolist(), 'DatasetTypeID': man_datasets})