Esempio n. 1
0
def rd_waps_geo(sites=None):
    if sites is not None:
        site_geo = rd_sql('SQL2012PROD05',
                          'Wells',
                          'WELL_DETAILS', ['WELL_NO', 'NZTMX', 'NZTMY'],
                          where_col='WELL_NO',
                          where_val=sites)
    else:
        site_geo = rd_sql('SQL2012PROD05', 'Wells', 'WELL_DETAILS',
                          ['WELL_NO', 'NZTMX', 'NZTMY'])

    site_geo.rename(columns={'WELL_NO': 'site'}, inplace=True)
    index1 = (site_geo.NZTMX > 1300000) & (site_geo.NZTMX < 1700000) & (
        site_geo.NZTMY > 5000000) & (site_geo.NZTMY < 5400000)
    site_geo0 = site_geo[index1]
    site_geo2 = xy_to_gpd(df=site_geo0,
                          id_col='site',
                          x_col='NZTMX',
                          y_col='NZTMY')
    #    site_geo2.loc[:, 'site'] = site_geo2.loc[:, 'site'].str.upper().str.replace(' ', '')
    #    site_geo2 = site_geo2.drop_duplicates()
    site_geo2.loc[:, 'site'] = to_numeric(site_geo2.loc[:, 'site'],
                                          errors='ignore')

    return (site_geo2.set_index('site'))
Esempio n. 2
0
def rd_sw_rain_geo(sites=None):
    if sites is not None:
        site_geo = rd_sql('SQL2012PROD05',
                          'Bgauging',
                          'RSITES',
                          col_names=['SiteNumber', 'NZTMX', 'NZTMY'],
                          where_col='SiteNumber',
                          where_val=sites)
    else:
        site_geo = rd_sql('SQL2012PROD05',
                          'Bgauging',
                          'RSITES',
                          col_names=['SiteNumber', 'NZTMX', 'NZTMY'])

    site_geo.columns = ['site', 'NZTMX', 'NZTMY']
    site_geo.loc[:, 'site'] = to_numeric(site_geo.loc[:, 'site'],
                                         errors='ignore')

    site_geo2 = xy_to_gpd(df=site_geo,
                          id_col='site',
                          x_col='NZTMX',
                          y_col='NZTMY')
    site_geo3 = site_geo2.loc[site_geo2.site > 0, :]
    site_geo3.loc[:, 'site'] = site_geo3.loc[:, 'site'].astype('int32')
    return (site_geo3.set_index('site'))
Esempio n. 3
0
def metconnect_id_loc(sites=None,
                      mc_server='SQL2012PROD03',
                      mc_db='MetConnect',
                      mc_site_table='RainFallPredictionSites',
                      mc_cols=['MetConnectID', 'SiteString', 'TidedaID'],
                      gis_server='SQL2012PROD05'):
    """
    Function to extract the metconnect id table with geometry location.

    Parameters
    ----------
    sites : list of int or None
        The site numbers to extract from the table, or None for all.

    Returns
    -------
    GeoDataFrame
    """

    ### Input parameters
    #    hy_server = 'SQL2012PROD05'
    #    hy_db = 'Hydrotel'
    #    pts_table = 'Points'
    #    objs_table = 'Objects'
    #    sites_table = 'Sites'
    #
    #    pts_cols = ['Point', 'Object']
    #    objs_cols = ['Object', 'Site']
    #    sites_cols = ['Site', 'ExtSysId']

    loc_db = 'Bgauging'
    loc_table = 'RSITES'

    loc_cols = ['SiteNumber', 'NZTMX', 'NZTMY']

    ## Import tables
    mc1 = rd_sql(mc_server, mc_db, mc_site_table, mc_cols)
    mc2 = mc1[~mc1.SiteString.str.startswith('M')]
    mc2.columns = ['MetConnectID', 'site_name', 'ExtSysId']
    mc2 = mc2[(mc2.MetConnectID != 7) & mc2.ExtSysId.notnull()]
    mc2.loc[:, 'ExtSysId'] = mc2.loc[:, 'ExtSysId'].astype(int)

    #    hy_pts = rd_sql(hy_server, hy_db, pts_table, pts_cols, 'Point', mc2.Point.tolist())
    #    hy_objs = rd_sql(hy_server, hy_db, objs_table, objs_cols, 'Object', hy_pts.Object.tolist())
    #    hy_sites = rd_sql(hy_server, hy_db, sites_table, sites_cols, 'Site', hy_objs.Site.tolist())
    #    hy_sites['ExtSysId'] = to_numeric(hy_sites['ExtSysId'])
    hy_loc = rd_sql(gis_server, loc_db, loc_table, loc_cols, 'SiteNumber',
                    mc2.ExtSysId.tolist())
    hy_loc.columns = ['ExtSysId', 'x', 'y']

    #    t1 = merge(mc2, hy_pts, on='Point')
    #    t2 = merge(t1, hy_objs, on='Object')
    #    t3 = merge(t2, hy_sites, on='Site')
    t4 = merge(mc2, hy_loc, on='ExtSysId')

    hy_xy = xy_to_gpd('MetConnectID', 'x', 'y', t4)

    return (hy_xy)
Esempio n. 4
0
def input_processing(precip_et,
                     crs,
                     irr1,
                     paw1,
                     bound_shp,
                     rain_name,
                     pet_name,
                     grid_res,
                     buffer_dis,
                     interp_fun,
                     agg_ts_fun,
                     time_agg,
                     irr_eff_dict,
                     irr_trig_dict,
                     min_irr_area_ratio=0.01,
                     irr_mons=[10, 11, 12, 1, 2, 3, 4],
                     precip_correction=1.1):
    """
    Function to process the input data for the lsrm. Outputs a DataFrame of the variables for the lsrm.
    """
    seterr(invalid='ignore')

    ## Load and resample precip and et
    bound = read_file(bound_shp)

    new_rain = poly_interp_agg(precip_et,
                               crs,
                               bound_shp,
                               rain_name,
                               'time',
                               'x',
                               'y',
                               buffer_dis,
                               grid_res,
                               grid_res,
                               interp_fun=interp_fun,
                               agg_ts_fun=agg_ts_fun,
                               period=time_agg) * precip_correction
    new_rain.name = 'precip'

    new_et = poly_interp_agg(precip_et,
                             crs,
                             bound_shp,
                             pet_name,
                             'time',
                             'x',
                             'y',
                             buffer_dis,
                             grid_res,
                             grid_res,
                             interp_fun=interp_fun,
                             agg_ts_fun=agg_ts_fun,
                             period=time_agg)
    new_et.name = 'pet'

    new_rain_et = concat([new_rain, new_et], axis=1)

    ## convert new point locations to geopandas
    time1 = new_rain_et.index.levels[0][0]
    grid1 = new_rain_et.loc[time1].reset_index()[['x', 'y']]
    grid2 = xy_to_gpd(grid1.index, 'x', 'y', grid1, bound.crs)
    grid2.columns = ['site', 'geometry']

    all_times = new_rain_et.index.levels[0]
    new_rain_et.loc[:, 'site'] = tile(grid1.index, len(all_times))

    ## Convert points to polygons
    sites_poly = points_grid_to_poly(grid2, 'site')

    ## process polygon data
    # Select polgons within boundary

    sites_poly_union = sites_poly.unary_union
    irr2 = irr1[irr1.intersects(sites_poly_union)]
    irr3 = irr2[irr2.irr_type.notnull()]
    paw2 = paw1[paw1.intersects(sites_poly_union)]
    paw3 = paw2[paw2.paw.notnull()]

    # Overlay intersection
    sites_poly1 = spatial_overlays(sites_poly, bound,
                                   how='intersection')[['site', 'geometry']]
    sites_poly2 = sites_poly1.dissolve('site')
    sites_poly2.crs = sites_poly.crs
    sites_poly_area = sites_poly2.area.round(2)
    sites_poly3 = sites_poly2.reset_index()

    irr4 = spatial_overlays(irr3, sites_poly3, how='intersection')
    paw4 = spatial_overlays(paw3, sites_poly3, how='intersection')

    irr4['area'] = irr4.geometry.area.round()
    irr5 = irr4[irr4.area >= 1].drop(['idx1', 'idx2'], axis=1).copy()

    paw4['area'] = paw4.geometry.area.round()
    paw5 = paw4.loc[(paw4.area >= 1)].drop(['idx1', 'idx2'], axis=1).copy()
    paw5.loc[paw5.paw <= 0, 'paw'] = 1

    # Add in missing PAW values - Change later to something more useful if needed
    mis_sites_index = ~sites_poly3.site.isin(paw5.site)
    sites_poly3['area'] = sites_poly3.area.round()

    paw6 = concat([paw5, sites_poly3[mis_sites_index]])
    paw6.loc[paw6.paw.isnull(), 'paw'] = 1

    # Aggregate by site weighted by area to estimate a volume
    paw_area1 = paw6[['paw', 'site', 'area']].copy()
    paw_area1.loc[:, 'paw_vol'] = paw_area1['paw'] * paw_area1['area']
    paw7 = ((paw_area1.groupby('site')['paw_vol'].sum() /
             paw_area1.groupby('site')['area'].sum()) * sites_poly_area *
            0.001).round(2)

    site_irr_area = irr5.groupby('site')['area'].sum()
    irr_eff1 = irr5.replace({'irr_type': irr_eff_dict})
    irr_eff1.loc[:, 'irr_eff'] = irr_eff1['irr_type'] * irr_eff1['area']
    irr_eff2 = (irr_eff1.groupby('site')['irr_eff'].sum() /
                site_irr_area).round(3)

    irr_trig1 = irr5.replace({'irr_type': irr_trig_dict})
    irr_trig1.loc[:, 'irr_trig'] = irr_trig1['irr_type'] * irr_trig1['area']
    irr_trig2 = (irr_trig1.groupby('site')['irr_trig'].sum() /
                 site_irr_area).round(3)

    irr_area_ratio1 = (site_irr_area / sites_poly_area).round(3)

    poly_data1 = concat(
        [paw7, sites_poly_area, irr_eff2, irr_trig2, irr_area_ratio1], axis=1)
    poly_data1.columns = [
        'paw', 'site_area', 'irr_eff', 'irr_trig', 'irr_area_ratio'
    ]
    poly_data1.loc[poly_data1['irr_area_ratio'] < min_irr_area_ratio,
                   ['irr_eff', 'irr_trig', 'irr_area_ratio']] = nan

    ## Combine time series with polygon data
    new_rain_et1 = new_rain_et[new_rain_et['site'].isin(sites_poly2.index)]

    input1 = merge(new_rain_et1.reset_index(),
                   poly_data1.reset_index(),
                   on='site',
                   how='left')

    ## Convert precip and et to volumes
    input1.loc[:, ['precip', 'pet']] = (input1.loc[:, ['precip', 'pet']].mul(
        input1.loc[:, 'site_area'], axis=0) * 0.001).round(2)

    ## Remove irrigation parameters during non-irrigation times
    input1.loc[~input1.time.dt.month.isin(irr_mons),
               ['irr_eff', 'irr_trig']] = nan

    ## Run checks on the input data

    #    print('Running checks on the prepared input data')

    null_time = input1.loc[input1.time.isnull(), 'time']
    null_x = input1.loc[input1.x.isnull(), 'x']
    null_y = input1.loc[input1.y.isnull(), 'y']
    null_pet = input1.loc[input1['pet'].isnull(), 'pet']
    null_rain = input1.loc[input1['precip'].isnull(), 'precip']
    null_paw = input1.loc[input1.paw.isnull(), 'paw']
    not_null_irr_eff = input1.loc[input1.irr_eff.notnull(), 'irr_eff']

    if not null_time.empty:
        raise ValueError('Null values in the time variable')
    if not null_x.empty:
        raise ValueError('Null values in the x variable')
    if not null_y.empty:
        raise ValueError('Null values in the y variable')
    if not null_pet.empty:
        raise ValueError('Null values in the pet variable')
    if not null_rain.empty:
        raise ValueError('Null values in the rain variable')
    if not null_paw.empty:
        raise ValueError('Null values in the paw variable')
    if not_null_irr_eff.empty:
        raise ValueError('No values for irrigation variables')

    if input1['time'].dtype.name != 'datetime64[ns]':
        raise ValueError('time variable must be a datetime64[ns] dtype')
    if input1['x'].dtype != float:
        raise ValueError('x variable must be a float dtype')
    if input1['y'].dtype != float:
        raise ValueError('y variable must be a float dtype')
    if input1['pet'].dtype != float:
        raise ValueError('pet variable must be a float dtype')
    if input1['precip'].dtype != float:
        raise ValueError('precip variable must be a float dtype')
    if input1['paw'].dtype != float:
        raise ValueError('paw variable must be a float dtype')
    if input1['irr_eff'].dtype != float:
        raise ValueError('irr_eff variable must be a float dtype')
    if input1['irr_trig'].dtype != float:
        raise ValueError('irr_trig variable must be a float dtype')
    if input1['irr_area_ratio'].dtype != float:
        raise ValueError('irr_area_ratio variable must be a float dtype')

    ## Return dict
    return (input1, sites_poly2)
Esempio n. 5
0
def rd_squalarc(sites,
                mtypes=None,
                from_date=None,
                to_date=None,
                convert_dtl=False,
                dtl_method=None,
                export=None):
    """
    Function to read in "squalarc" data. Which is atually stored in the mssql db.

    Parameters
    ----------
    sites: ndarry, list, or str
        The site names as a list, array, csv with the first column as the site names, or a polygon shapefile of the area of interest.
    mtypes: list or None
        A list of measurement type names to be in the output. Leaving it empty returns all mtypes.
    from_date: str
        A start date string in of '2010-01-01'.
    to_date: str
        A end date string in of '2011-01-01'.
    convert_dtl: bool
        Should values under the detection limit be converted to numeric?
    dtl_method: str
        The method to use to convert values under a detection limit to numeric. None or 'standard' takes half of the detection limit. 'trend' is meant as an output for trend analysis with includes an additional column dtl_ratio referring to the ratio of values under the detection limit.
    export: str or None
        Either None or a string path to a csv file.
    """

    #### Read in sites
    sites1 = select_sites(sites)

    #### Extract by polygon
    if isinstance(sites1, gpd.GeoDataFrame):
        ## Surface water sites
        sw_sites_tab = rd_sql('SQL2012PROD05',
                              'Squalarc',
                              'SITES',
                              col_names=['SITE_ID', 'NZTMX', 'NZTMY'])
        sw_sites_tab.columns = ['site', 'NZTMX', 'NZTMY']
        gdf_sw_sites = xy_to_gpd('site', 'NZTMX', 'NZTMY', sw_sites_tab)
        sites1a = sites1.to_crs(gdf_sw_sites.crs)
        sw_sites2 = sel_sites_poly(gdf_sw_sites, sites1a).drop('geometry',
                                                               axis=1)

        ## Groundwater sites
        gw_sites_tab = rd_sql('SQL2012PROD05',
                              'Wells',
                              'WELL_DETAILS',
                              col_names=['WELL_NO', 'NZTMX', 'NZTMY'])
        gw_sites_tab.columns = ['site', 'NZTMX', 'NZTMY']
        gdf_gw_sites = xy_to_gpd('site', 'NZTMX', 'NZTMY', gw_sites_tab)
        gw_sites2 = sel_sites_poly(gdf_gw_sites, sites1a).drop('geometry',
                                                               axis=1)

        sites2 = sw_sites2.site.append(gw_sites2.site).astype(str).tolist()
    else:
        sites2 = pd.Series(sites1, name='site').astype(str).tolist()

    #### Extract the rest of the data
    if len(sites2) > 10000:
        n_chunks = int(np.ceil(len(sites2) * 0.0001))
        sites3 = [sites2[i::n_chunks] for i in xrange(n_chunks)]
        samples_tab = pd.DataFrame()
        for i in sites3:
            samples_tab1 = rd_sql('SQL2012PROD05',
                                  'Squalarc',
                                  '"SQL_SAMPLE_METHODS+"',
                                  col_names=[
                                      'Site_ID', 'SAMPLE_NO', 'ME_TYP',
                                      'Collect_Date', 'Collect_Time',
                                      'PA_NAME', 'PARAM_UNITS', 'SRESULT'
                                  ],
                                  where_col='Site_ID',
                                  where_val=i)
            samples_tab1.columns = [
                'site', 'sample_id', 'source', 'date', 'time', 'parameter',
                'units', 'val'
            ]
            samples_tab1.loc[:,
                             'source'] = samples_tab1.loc[:,
                                                          'source'].str.lower(
                                                          )
            samples_tab = pd.concat([samples_tab, samples_tab1])
    else:
        samples_tab = rd_sql('SQL2012PROD05',
                             'Squalarc',
                             '"SQL_SAMPLE_METHODS+"',
                             col_names=[
                                 'Site_ID', 'SAMPLE_NO', 'ME_TYP',
                                 'Collect_Date', 'Collect_Time', 'PA_NAME',
                                 'PARAM_UNITS', 'SRESULT'
                             ],
                             where_col='Site_ID',
                             where_val=sites2)
        samples_tab.columns = [
            'site', 'sample_id', 'source', 'date', 'time', 'parameter',
            'units', 'val'
        ]
        samples_tab.loc[:, 'source'] = samples_tab.loc[:, 'source'].str.lower()

    samples_tab2 = samples_tab.copy()
    num_test = pd.to_numeric(samples_tab2.loc[:, 'time'], 'coerce')
    samples_tab2.loc[num_test.isnull(), 'time'] = '0000'
    samples_tab2.loc[:,
                     'time'] = samples_tab2.loc[:,
                                                'time'].str.replace('.', '')
    samples_tab2 = samples_tab2[samples_tab2.date.notnull()]
    #    samples_tab2.loc[:, 'time'] = samples_tab2.loc[:, 'time'].str.replace('9999', '0000')
    time1 = pd.to_datetime(samples_tab2.time, format='%H%M', errors='coerce')
    time1[time1.isnull()] = pd.Timestamp('2000-01-01 00:00:00')
    datetime1 = pd.to_datetime(
        samples_tab2.date.dt.date.astype(str) + ' ' +
        time1.dt.time.astype(str))
    samples_tab2.loc[:, 'date'] = datetime1
    samples_tab2 = samples_tab2.drop('time', axis=1)
    samples_tab2.loc[samples_tab2.val.isnull(), 'val'] = np.nan
    samples_tab2.loc[samples_tab2.val == 'N/A', 'val'] = np.nan

    #### Select within time range
    if isinstance(from_date, str):
        samples_tab2 = samples_tab2[samples_tab2['date'] >= from_date]
    if isinstance(to_date, str):
        samples_tab2 = samples_tab2[samples_tab2['date'] <= to_date]

    if mtypes is not None:
        mtypes1 = select_sites(mtypes)
        data = samples_tab2[samples_tab2.parameter.isin(mtypes1)].reset_index(
            drop=True)
    else:
        data = samples_tab2.reset_index(drop=True)

    #### Correct poorly typed in site names
    data.loc[:, 'site'] = data.loc[:, 'site'].str.upper().str.replace(' ', '')

    #### Convert detection limit values
    if convert_dtl:
        less1 = data['val'].str.match('<')
        if less1.sum() > 0:
            less1.loc[less1.isnull()] = False
            data2 = data.copy()
            data2.loc[less1,
                      'val'] = pd.to_numeric(
                          data.loc[less1, 'val'].str.replace('<', ''),
                          errors='coerce') * 0.5
            if dtl_method in (None, 'standard'):
                data3 = data2
            if dtl_method == 'trend':
                df1 = data2.loc[less1]
                count1 = data.groupby('parameter')['val'].count()
                count1.name = 'tot_count'
                count_dtl = df1.groupby('parameter')['val'].count()
                count_dtl.name = 'dtl_count'
                count_dtl_val = df1.groupby('parameter')['val'].nunique()
                count_dtl_val.name = 'dtl_val_count'
                combo1 = pd.concat([count1, count_dtl, count_dtl_val],
                                   axis=1,
                                   join='inner')
                combo1['dtl_ratio'] = (combo1['dtl_count'] /
                                       combo1['tot_count']).round(2)

                ## conditionals
                #            param1 = combo1[(combo1['dtl_ratio'] <= 0.4) | (combo1['dtl_ratio'] == 1)]
                #            under_40 = data['parameter'].isin(param1.index)
                param2 = combo1[(combo1['dtl_ratio'] > 0.4)
                                & (combo1['dtl_val_count'] != 1)]
                over_40 = data['parameter'].isin(param2.index)

                ## Calc detection limit values
                data3 = pd.merge(data,
                                 combo1['dtl_ratio'].reset_index(),
                                 on='parameter',
                                 how='left')
                data3.loc[:, 'val_dtl'] = data2['val']

                max_dtl_val = data2[over_40 & less1].groupby(
                    'parameter')['val'].transform('max')
                max_dtl_val.name = 'dtl_val_max'
                data3.loc[over_40 & less1, 'val_dtl'] = max_dtl_val
        else:
            data3 = data
    else:
        data3 = data

    #### Return and export
    if isinstance(export, str):
        data3.to_csv(export, encoding='utf-8', index=False)
    return data3
Esempio n. 6
0
def rd_ht_wq_data(hts,
                  sites=None,
                  mtypes=None,
                  start=None,
                  end=None,
                  dtl_method=None,
                  output_site_data=False,
                  mtype_params=None,
                  sample_params=None):
    """
    Function to read data from an hts file and optionally select specific sites and aggregate the data.

    Parameters
    ----------
    hts : str
        Path to the hts file.
    sites : list
        A list of site names within the hts file.
    mtypes : list
        A list of measurement types that should be returned.
    start : str
        The start date to retreive from the data in ISO format (e.g. '2011-11-30 00:00').
    end : str
        The end date to retreive from the data in ISO format (e.g. '2011-11-30 00:00').
    dtl_method : None, 'standard', 'trend'
        The method to use to convert values under a detection limit to numeric. None does no conversion. 'standard' takes half of the detection limit. 'trend' is meant as an output for trend analysis with includes an additional column dtl_ratio referring to the ratio of values under the detection limit.
    output_site_data : bool
        Should the site data be output?

    Returns
    -------
    DataFrame
    """

    #    agg_unit_dict = {'l/s': 1, 'm3/s': 1, 'm3/hour': 1, 'mm': 1, 'm3': 4}
    #    unit_convert = {'l/s': 0.001, 'm3/s': 1, 'm3/hour': 1, 'mm': 1, 'm3': 4}

    sites1 = select_sites(sites)

    #### Extract by polygon
    if isinstance(sites1, GeoDataFrame):
        ## Surface water sites
        sw_sites_tab = rd_sql('SQL2012PROD05',
                              'Squalarc',
                              'SITES',
                              col_names=['SITE_ID', 'NZTMX', 'NZTMY'])
        sw_sites_tab.columns = ['site', 'NZTMX', 'NZTMY']
        gdf_sw_sites = xy_to_gpd('site', 'NZTMX', 'NZTMY', sw_sites_tab)
        sites1a = sites1.to_crs(gdf_sw_sites.crs)
        sw_sites2 = sel_sites_poly(gdf_sw_sites, sites1a).drop('geometry',
                                                               axis=1)

        ## Groundwater sites
        gw_sites_tab = rd_sql('SQL2012PROD05',
                              'Wells',
                              'WELL_DETAILS',
                              col_names=['WELL_NO', 'NZTMX', 'NZTMY'])
        gw_sites_tab.columns = ['site', 'NZTMX', 'NZTMY']
        gdf_gw_sites = xy_to_gpd('site', 'NZTMX', 'NZTMY', gw_sites_tab)
        gw_sites2 = sel_sites_poly(gdf_gw_sites, sites1a).drop('geometry',
                                                               axis=1)

        sites2 = sw_sites2.site.append(gw_sites2.site).astype(str).tolist()
    else:
        sites2 = sites1

    ### First read all of the sites in the hts file and select the ones to be read
    sites_df = rd_hilltop_sites(hts,
                                sites=sites2,
                                mtypes=mtypes,
                                rem_wq_sample=False)

    ### Open the hts file
    wqr = Dispatch("Hilltop.WQRetrieval")
    dfile = Dispatch("Hilltop.DataFile")
    try:
        dfile.Open(hts)
    except ValueError:
        print(dfile.errmsg)

    ### Iterate through he hts file
    df_lst = []
    for i in sites_df.index:
        site = sites_df.loc[i, 'site']
        mtype = sites_df.loc[i, 'mtype']
        if mtype == 'WQ Sample':
            continue
        wqr = dfile.FromWQSite(site, mtype)

        ## Set up start and end times and aggregation initiation
        if (start is None):
            start1 = wqr.DataStartTime
        else:
            start1 = start
        if end is None:
            end1 = wqr.DataEndTime
        else:
            end1 = end

        wqr.FromTimeRange(start1, end1)

        ## Extract data
        data = []
        time = []

        test_params = sites_df[sites_df.site == site].mtype.unique()
        if ('WQ Sample' in test_params) & (isinstance(mtype_params, list)
                                           | isinstance(sample_params, list)):
            sample_p = []
            mtype_p = []
            while wqr.GetNext:
                data.append(wqr.value)
                time.append(str(pytime_to_datetime(wqr.time)))
                sample_p.append({
                    sp: wqr.params(sp).encode('ascii', 'ignore')
                    for sp in sample_params
                })
                mtype_p.append({
                    mp: wqr.params(mp).encode('ascii', 'ignore')
                    for mp in mtype_params
                })
        else:
            while wqr.GetNext:
                data.append(wqr.value)
                time.append(str(pytime_to_datetime(wqr.time)))

        if data:
            df_temp = DataFrame({
                'time': time,
                'data': data,
                'site': site,
                'mtype': mtype
            })
            if sample_p:
                df_temp = concat(
                    [df_temp, DataFrame(sample_p),
                     DataFrame(mtype_p)], axis=1)
            df_lst.append(df_temp)

    dfile.Close()
    wqr.close()
    if df_lst:
        data = concat(df_lst)
        data.loc[:, 'time'] = to_datetime(data.loc[:, 'time'])
        data1 = to_numeric(data.loc[:, 'data'], errors='coerce')
        data.loc[data1.notnull(), 'data'] = data1[data1.notnull()]
        #        data.loc[:, 'data'].str.replace('*', '')
        data = data.reset_index(drop=True)

        #### Convert detection limit values
        if dtl_method is not None:
            less1 = data['data'].str.match('<')
            if less1.sum() > 0:
                less1.loc[less1.isnull()] = False
                data2 = data.copy()
                data2.loc[less1, 'data'] = to_numeric(
                    data.loc[less1, 'data'].str.replace('<', ''),
                    errors='coerce') * 0.5
                if dtl_method == 'standard':
                    data3 = data2
                if dtl_method == 'trend':
                    df1 = data2.loc[less1]
                    count1 = data.groupby('mtype')['data'].count()
                    count1.name = 'tot_count'
                    count_dtl = df1.groupby('mtype')['data'].count()
                    count_dtl.name = 'dtl_count'
                    count_dtl_val = df1.groupby('mtype')['data'].nunique()
                    count_dtl_val.name = 'dtl_val_count'
                    combo1 = concat([count1, count_dtl, count_dtl_val],
                                    axis=1,
                                    join='inner')
                    combo1['dtl_ratio'] = (combo1['dtl_count'] /
                                           combo1['tot_count']).round(2)

                    ## conditionals
                    param2 = combo1[(combo1['dtl_ratio'] > 0.4)
                                    & (combo1['dtl_val_count'] != 1)]
                    over_40 = data['mtype'].isin(param2.index)

                    ## Calc detection limit values
                    data3 = merge(data,
                                  combo1['dtl_ratio'].reset_index(),
                                  on='mtype',
                                  how='left')
                    data3.loc[:, 'data_dtl'] = data2['data']

                    max_dtl_val = data2[over_40 & less1].groupby(
                        'mtype')['data'].transform('max')
                    max_dtl_val.name = 'dtl_data_max'
                    data3.loc[over_40 & less1, 'data_dtl'] = max_dtl_val
            else:
                data3 = data
        else:
            data3 = data

        if output_site_data:
            sites_df = sites_df[~(sites_df.mtype == 'WQ Sample')]
            return (data3, sites_df)
        else:
            return (data3)
Esempio n. 7
0
def poly_interp_agg(precip,
                    precip_crs,
                    poly,
                    data_col,
                    time_col,
                    x_col,
                    y_col,
                    interp_buffer_dis=10000,
                    poly_buffer_dis=0,
                    grid_res=None,
                    interp_fun='cubic',
                    agg_ts_fun=None,
                    period=None,
                    digits=2,
                    agg_xy=False,
                    nfiles='many',
                    output_path=None):
    """
    Function to select the precip sites within a polygon with a certain buffer distance, then interpolate/resample the data at a specific resolution, then output the results.
    precip -- dataframe of time, x, y, and precip.\n
    precip_crs -- The crs of the x and y coordinates of the precip dataframe.\n
    poly -- str path of a shapefile polygon or a polygon GeoDataFrame.\n
    interp_buffer_dis -- Buffer distance of the polygon selection when performing the interpolation.\n
    poly_buffer_dis -- Buffer distance of the polygon selection when outputting the results.\n
    grid_res -- The resulting grid resolution in meters (or the unit of the final projection).\n
    interp_fun -- The scipy griddata interpolation function to be applied (see https://docs.scipy.org/doc/scipy-0.19.0/reference/generated/scipy.interpolate.griddata.html).\n
    agg_ts_fun -- The pandas time series resampling function to resample the data in time (either 'mean' or 'sum'). If None, then no time resampling.\n
    period -- The pandas time series code to resample the data in time (i.e. '2H' for two hours).\n
    digits -- the number of digits to round to (int).\n
    agg_xy -- Should all of the interpolated points within the polygon area be aggregated (mean) to a single time series?\n
    nfiles -- If output_path is a geotiff, then 'one' or 'many' geotiffs to be created.\n
    output_path -- Full path string where the output should be stored. The file extension should be one of '.tif' for geotiff, '.nc' for netcdf, or '.csv' for csv.
    """

    ### Convert x and y of precip to geodataframe
    sites0 = precip[[x_col, y_col]].drop_duplicates().reset_index(drop=True)
    sites = xy_to_gpd(sites0.index,
                      sites0[x_col],
                      sites0[y_col],
                      crs=precip_crs)
    sites.columns = ['site', 'geometry']

    ### Select the locations within the polygon
    if isinstance(poly, (GeoDataFrame, GeoSeries)):
        poly1 = poly.copy()
    elif isinstance(poly, str):
        poly1 = read_file(poly)
    sites1 = sites.to_crs(poly1.crs)
    sites_sel = sel_sites_poly(sites1, poly, interp_buffer_dis)
    sites2 = sites0.loc[sites_sel['site']]

    ### Determine the grid resolution if not set
    if not isinstance(grid_res, (int, float)):
        bounds = poly1.unary_union.bounds
        x_range = bounds[2] - bounds[0]
        y_range = bounds[3] - bounds[1]
        min1 = min([x_range, y_range])
        grid_res = int(ceil(min1 / 20))

    ### Select the precip data from the sites
    precip2 = merge(precip, sites2, on=['x', 'y']).dropna()

    ### Interpolate grid
    poly_crs = ['+' + str(i) + '=' + str(poly1.crs[i]) for i in poly1.crs]
    poly_crs1 = ' '.join(poly_crs)
    new_precip = grid_interp_ts(precip2,
                                time_col,
                                x_col,
                                y_col,
                                data_col,
                                grid_res,
                                sites.crs,
                                poly_crs1,
                                interp_fun=interp_fun,
                                agg_ts_fun=agg_ts_fun,
                                period=period,
                                digits=digits)

    ### Create new sites list
    time = new_precip[time_col].sort_values().unique()
    sites_new_df = new_precip.loc[new_precip[time_col] == time[0],
                                  [x_col, y_col, data_col]]
    sites_new = xy_to_gpd(sites_new_df.index.values, x_col, y_col,
                          sites_new_df, poly_crs1)
    sites_new.columns = ['site', 'geometry']
    new_precip['site'] = tile(sites_new_df.index.values, len(time))

    ### Select sites from polygon
    sites_sel2 = sel_sites_poly(sites_new, poly, poly_buffer_dis)
    new_precip2 = new_precip.loc[new_precip.site.isin(sites_sel2.site),
                                 [time_col, x_col, y_col, data_col]]

    ### Agg to polygon if required
    if agg_xy:
        new_precip3 = new_precip2.groupby(time_col)[data_col].mean().round(
            digits)
        time_col = None
    else:
        new_precip3 = new_precip2.set_index([time_col, x_col, y_col])[data_col]

    ### Save results
    if isinstance(output_path, str):
        path1 = path.splitext(output_path)[0]
        if '.csv' in output_path:
            new_precip3.to_csv(path1 + '.csv', header=True)

        if '.tif' in output_path:
            df = new_precip3.reset_index()
            save_geotiff(df=df,
                         data_col=data_col,
                         crs=poly_crs1,
                         x_col=x_col,
                         y_col=y_col,
                         time_col=time_col,
                         nfiles=nfiles,
                         export_path=path1 + '.tif')

        if '.nc' in output_path:
            ds1 = new_precip3.to_xarray().to_dataset()
            ds1.attrs['spatial_ref'] = poly_crs1
            ds1.to_netcdf(path1 + '.nc')

    return (new_precip3)