예제 #1
0
def main():

    main_dir = Path(
        r'P:\Synchronize\IWS\Testings\fourtrans_practice\multisite_phs_spec_corr')

    os.chdir(main_dir)

    in_data_file = Path(r'precipitation.csv')
    in_crds_file = Path(r'precipitation_coords.csv')

    sep = ';'
    time_fmt = '%Y-%m-%d'

    beg_year = 1991
    end_year = 1991  # NOTE: skips last odd step.

    out_dir = Path(r'precipitation_kriging')

    min_valid_stns = 10

    mdr = 0.5
    perm_r_list = [1, 2]
    fit_vgs = ['Exp']
    fil_nug_vg = 'Sph'
    n_best = 1
    ngp = 20
    figs_flag = True

    vg_vars = ['orig', 'mag', 'phs', 'data', ]  # 'cos', 'sin',

    n_cpus = 8

    out_dir.mkdir(exist_ok=True)

    data_df = pd.read_csv(in_data_file, sep=sep, index_col=0)

    data_df.index = pd.to_datetime(data_df.index, format=time_fmt)

    data_df = data_df.loc[f'{beg_year}':f'{end_year}'].iloc[:-1]

    data_df.dropna(axis=1, how='any', inplace=True)

    crds_df = pd.read_csv(in_crds_file, sep=sep, index_col=0)[['X', 'Y', 'Z']]

    crds_df = crds_df.loc[data_df.columns]

    crds_df.to_csv(Path(in_crds_file.stem + '_subset.csv'), sep=sep)

    probs_df = data_df.rank(axis=0) / (data_df.shape[0] + 1)

    norms_df = pd.DataFrame(
        data=norm.ppf(probs_df.values), columns=data_df.columns)

    ft_df = pd.DataFrame(
        data=np.fft.rfft(norms_df, axis=0),
        columns=data_df.columns)

    mag_df = pd.DataFrame(data=np.abs(ft_df), columns=data_df.columns)

    phs_df = pd.DataFrame(data=np.angle(ft_df), columns=data_df.columns)

    phs_le_idxs = phs_df < 0

    phs_df[phs_le_idxs] = (2 * np.pi) + phs_df[phs_le_idxs]

    for part in vg_vars:

        (out_dir / part).mkdir(exist_ok=True)

        if part == 'mag':
            part_df = mag_df

        elif part == 'phs':
            part_df = phs_df

        elif part == 'cos':
            part_df = pd.DataFrame(
                data=np.cos(phs_df), columns=data_df.columns)

        elif part == 'sin':
            part_df = pd.DataFrame(
                data=np.sin(phs_df), columns=data_df.columns)

        elif part == 'data':
            part_df = data_df.copy()

            part_df.values[:] = np.sort(part_df.values, axis=0)

#             part_df = part_df.iloc[-2:]

        elif part == 'orig':
            part_df = data_df.copy()

        else:
            raise ValueError(f'Undefined: {part}!')

        part_df.to_csv(out_dir / f'{part}.csv', sep=sep)

        fit_vg_cls = FitVariograms()

        fit_vg_cls.set_data(part_df, crds_df, index_type='obj')

        fit_vg_cls.set_vg_fitting_parameters(
            mdr,
            perm_r_list,
            fil_nug_vg,
            ngp,
            fit_vgs,
            n_best)

        fit_vg_cls.set_misc_settings(n_cpus, min_valid_stns)

        fit_vg_cls.set_output_settings(out_dir / part, figs_flag)

        fit_vg_cls.verify()

        fit_vg_cls.fit_vgs()

        fit_vg_cls.save_fin_vgs_df()
        fit_vg_cls = None
    return
def main():

    main_dir = Path(
        r'P:\Synchronize\IWS\Testings\fourtrans_practice\ft_spatio_temporal_interps'
    )

    os.chdir(main_dir)

    in_data_file = Path(r'precipitation.csv')
    in_crds_file = Path(r'precipitation_coords.csv')

    sep = ';'
    time_fmt = '%Y-%m-%d'

    beg_year = 1989
    end_year = 1992  # NOTE: skips last odd step.

    out_dir = Path(r'precipitation_interpolation_validation')

    min_valid_stns = 10

    # Selected post subsetting.
    #     validation_cols = []  # ['T3705', 'T1875', 'T5664', 'T1197']
    validation_cols = ['P3733', 'P3315', 'P3713', 'P3454']

    mdr = 0.7
    perm_r_list = [1, 2]
    fit_vgs = ['Sph', 'Exp']
    fil_nug_vg = 'Nug'
    n_best = 1
    ngp = 5
    figs_flag = True

    n_cpus = 8

    out_dir.mkdir(exist_ok=True)

    data_df = pd.read_csv(in_data_file, sep=sep, index_col=0)

    data_df.index = pd.to_datetime(data_df.index, format=time_fmt)

    data_df = data_df.loc[f'{beg_year}':f'{end_year}'].iloc[:-1]

    data_df.dropna(axis=1, how='any', inplace=True)

    crds_df = pd.read_csv(in_crds_file, sep=sep, index_col=0)[['X', 'Y', 'Z']]

    crds_df = crds_df.loc[data_df.columns]

    crds_df.to_csv(Path(in_crds_file.stem + '_subset.csv'), sep=sep)
    #     raise Exception

    if validation_cols:
        assert all([
            validation_col in crds_df.index
            for validation_col in validation_cols
        ])

        crds_df = crds_df.loc[crds_df.index.difference(
            pd.Index(validation_cols))]

        data_df = data_df[crds_df.index]

    ft_df = pd.DataFrame(data=np.fft.rfft(data_df, axis=0),
                         columns=data_df.columns)

    for part in ['real', 'imag']:
        fit_vg_cls = FitVariograms()

        part_df = pd.DataFrame(getattr(ft_df.values, part),
                               columns=data_df.columns)

        part_df.to_csv(out_dir / f'{part}.csv', sep=sep)

        fit_vg_cls.set_data(part_df, crds_df, index_type='obj')

        fit_vg_cls.set_vg_fitting_parameters(mdr, perm_r_list, fil_nug_vg, ngp,
                                             fit_vgs, n_best)

        fit_vg_cls.set_misc_settings(n_cpus, min_valid_stns)

        fit_vg_cls.set_output_settings(out_dir / part, figs_flag)

        fit_vg_cls.verify()

        fit_vg_cls.fit_vgs()

        fit_vg_cls.save_fin_vgs_df()
        fit_vg_cls = None
    return
예제 #3
0
def main():

    main_dir = Path(
        r'P:\Synchronize\IWS\Testings\fourtrans_practice\multisite_phs_spec_corr\5min\v7_long_range'
    )

    os.chdir(main_dir)

    in_data_file = Path(
        r'../neckar_1min_ppt_data_20km_buff_Y2009__RR5min_RTsum.pkl')
    in_crds_file = Path(r'../metadata_ppt_gkz3_crds.csv')  # has X, Y cols

    sep = ';'
    time_fmt = '%Y-%m-%d %H:%M:%S'

    beg_time = '2009-01-01 00:00:00'
    end_time = '2009-03-31 23:59:00'

    out_dir = main_dir

    min_valid_stns = 10

    mdr = 0.5
    perm_r_list = [1, 2]
    fit_vgs = ['Exp']
    fil_nug_vg = 'Sph'
    n_best = 1
    ngp = 20
    figs_flag = True

    vg_vars = [
        'orig',
        'data',
    ]  # 'phs', 'mag', 'sin', 'cos',

    n_cpus = 8

    out_dir.mkdir(exist_ok=True)

    if in_data_file.suffix == '.csv':
        data_df = pd.read_csv(in_data_file, sep=sep, index_col=0)
        data_df.index = pd.to_datetime(data_df.index, format=time_fmt)

    elif in_data_file.suffix == '.pkl':
        data_df = pd.read_pickle(in_data_file)

    else:
        raise NotImplementedError(
            f'Unknown extension of in_data_file: {in_data_file.suffix}!')

    data_df = data_df.loc[f'{beg_time}':f'{end_time}']

    if data_df.shape[0] % 2:
        data_df = data_df.iloc[:-1, :]
        print('Dropped last record in data_df!')

    data_df.dropna(axis=1, how='any', inplace=True)

    crds_df = pd.read_csv(in_crds_file, sep=sep, index_col=0)[['X', 'Y']]

    crds_df = crds_df.loc[data_df.columns]

    probs_df = data_df.rank(axis=0) / (data_df.shape[0] + 1)

    norms_df = pd.DataFrame(data=norm.ppf(probs_df.values),
                            columns=data_df.columns)

    ft_df = pd.DataFrame(data=np.fft.rfft(norms_df, axis=0),
                         columns=data_df.columns)

    mag_df = pd.DataFrame(data=np.abs(ft_df), columns=data_df.columns)

    phs_df = pd.DataFrame(data=np.angle(ft_df), columns=data_df.columns)

    phs_le_idxs = phs_df < 0

    phs_df[phs_le_idxs] = (2 * np.pi) + phs_df[phs_le_idxs]

    for part in vg_vars:

        (out_dir / part).mkdir(exist_ok=True)

        index_type = 'obj'

        if part == 'mag':
            part_df = mag_df

        elif part == 'phs':
            part_df = phs_df

        elif part == 'cos':
            part_df = pd.DataFrame(data=np.cos(phs_df),
                                   columns=data_df.columns)

        elif part == 'sin':
            part_df = pd.DataFrame(data=np.sin(phs_df),
                                   columns=data_df.columns)

        elif part == 'data':
            part_df = data_df.copy()

            part_df.values[:] = np.sort(part_df.values, axis=0)

            index_type = 'date'


#             part_df = part_df.iloc[-2:]

        elif part == 'orig':
            part_df = data_df.copy()
            index_type = 'date'

        else:
            raise ValueError(f'Undefined: {part}!')

        part_df.to_csv(out_dir / f'{part}/{part}.csv', sep=sep)

        #         continue

        fit_vg_cls = FitVariograms()

        fit_vg_cls.set_data(part_df, crds_df, index_type=index_type)

        fit_vg_cls.set_vg_fitting_parameters(mdr, perm_r_list, fil_nug_vg, ngp,
                                             fit_vgs, n_best)

        fit_vg_cls.set_misc_settings(n_cpus, min_valid_stns)

        fit_vg_cls.set_output_settings(out_dir / part, figs_flag)

        fit_vg_cls.verify()

        fit_vg_cls.fit_vgs()

        fit_vg_cls.save_fin_vgs_df()
        fit_vg_cls = None
    return
예제 #4
0
def main():

    main_dir = Path(
        r'P:\Synchronize\IWS\Testings\fourtrans_practice\multisite_phs_spec_corr'
    )

    os.chdir(main_dir)

    in_data_file = Path(r'precipitation.csv')
    in_crds_file = Path(r'precipitation_coords.csv')

    sep = ';'
    time_fmt = '%Y-%m-%d'

    beg_year = 1991
    end_year = 1991  # NOTE: skips last odd step.

    out_dir = Path(r'precipitation_kriging')

    min_valid_stns = 10

    # Selected post subsetting.
    validation_cols = []  # ['T3705', 'T1875', 'T5664', 'T1197']
    #     validation_cols = ['P3733', 'P3315', 'P3713', 'P3454']

    mdr = 0.7
    perm_r_list = [1, 2]
    fit_vgs = ['Sph', 'Exp']
    fil_nug_vg = 'Sph'
    n_best = 1
    ngp = 10
    figs_flag = True

    #     mag_cftn = None
    #     cos_sin_cftn = None

    mag_cftn = '0.02154 Sph(8998.9) + 0.91539 Exp(100566492.6) + 0.05894 Sph(73391.2)'
    cos_sin_cftn = '0.89903 Sph(848353919.8) + 0.38155 Exp(112531.1) + 0.11241 Sph(6980.2)'

    n_cpus = 8

    out_dir.mkdir(exist_ok=True)

    data_df = pd.read_csv(in_data_file, sep=sep, index_col=0)

    data_df.index = pd.to_datetime(data_df.index, format=time_fmt)

    data_df = data_df.loc[f'{beg_year}':f'{end_year}'].iloc[:-1]

    data_df.dropna(axis=1, how='any', inplace=True)

    crds_df = pd.read_csv(in_crds_file, sep=sep, index_col=0)[['X', 'Y', 'Z']]

    crds_df = crds_df.loc[data_df.columns]

    crds_df.to_csv(Path(in_crds_file.stem + '_subset.csv'), sep=sep)

    if validation_cols:
        assert all([
            validation_col in crds_df.index
            for validation_col in validation_cols
        ])

        crds_df = crds_df.loc[crds_df.index.difference(
            pd.Index(validation_cols))]

        data_df = data_df[crds_df.index]

    probs_df = data_df.rank(axis=0) / (data_df.shape[0] + 1)

    norms_df = pd.DataFrame(data=norm.ppf(probs_df.values),
                            columns=data_df.columns)

    ft_df = pd.DataFrame(data=np.fft.rfft(norms_df, axis=0),
                         columns=data_df.columns)

    mag_df = pd.DataFrame(data=np.abs(ft_df), columns=data_df.columns)

    phs_df = pd.DataFrame(data=np.angle(ft_df), columns=data_df.columns)

    for part in [
            'data',
            'mag',
            'cos',
            'sin',
    ]:  # 'probs', 'norm',

        ft_vg_flag = False

        (out_dir / part).mkdir(exist_ok=True)

        if part == 'mag':
            part_df = mag_df

            out_ser = pd.Series(index=part_df.index, dtype=object)
            out_ser[:] = mag_cftn

            out_ser.to_csv(out_dir / f'{part}/vg_strs.csv', sep=sep)

            ft_vg_flag = False

        elif part == 'cos':
            part_df = pd.DataFrame(data=np.cos(phs_df),
                                   columns=data_df.columns)

            out_ser = pd.Series(index=part_df.index, dtype=object)
            out_ser[:] = cos_sin_cftn

            out_ser.to_csv(out_dir / f'{part}/vg_strs.csv', sep=sep)

            ft_vg_flag = False

        elif part == 'sin':
            part_df = pd.DataFrame(data=np.sin(phs_df),
                                   columns=data_df.columns)

            out_ser = pd.Series(index=part_df.index, dtype=object)
            out_ser[:] = cos_sin_cftn

            out_ser.to_csv(out_dir / f'{part}/vg_strs.csv', sep=sep)

            ft_vg_flag = False

        elif part == 'data':
            part_df = data_df.copy()

            part_df.values[:] = np.sort(part_df.values, axis=0)

            ft_vg_flag = True

        elif part == 'probs':
            part_df = probs_df.copy()

            part_df.values[:] = np.sort(part_df.values, axis=0)

            ft_vg_flag = True

        elif part == 'norms':
            part_df = norms_df.copy()

            part_df.values[:] = np.sort(part_df.values, axis=0)

            ft_vg_flag = True

        else:
            raise ValueError(f'Undefined: {part}!')

        part_df.to_csv(out_dir / f'{part}.csv', sep=sep)

        if not ft_vg_flag:
            continue

        fit_vg_cls = FitVariograms()

        fit_vg_cls.set_data(part_df, crds_df, index_type='obj')

        fit_vg_cls.set_vg_fitting_parameters(mdr, perm_r_list, fil_nug_vg, ngp,
                                             fit_vgs, n_best)

        fit_vg_cls.set_misc_settings(n_cpus, min_valid_stns)

        fit_vg_cls.set_output_settings(out_dir / part, figs_flag)

        fit_vg_cls.verify()

        fit_vg_cls.fit_vgs()

        fit_vg_cls.save_fin_vgs_df()
        fit_vg_cls = None
    return
예제 #5
0
def main():

    main_dir = Path(r'P:\Synchronize\IWS\Testings\fourtrans_practice\multisite_phs_spec_corr')
    os.chdir(main_dir)

    vg_vars = ['mean_temp']

    strt_date = '1991-01-01'
    end_date = '1991-12-30'
    min_valid_stns = 10

    drop_stns = []  # ['T3705', 'T1875', 'T5664', 'T1197']
#     drop_stns = ['P3733', 'P3315', 'P3713', 'P3454']

    mdr = 0.7
    perm_r_list = [1, 2]
    fit_vgs = ['Sph', 'Exp']
    fil_nug_vg = 'Sph'
    n_best = 1
    ngp = 5
    figs_flag = False

    n_cpus = 8

    sep = ';'

    for vg_var in vg_vars:
        if vg_var == 'mean_temp':
            (in_vals_df_loc,
             in_stn_coords_df_loc,
             out_dir) = get_mean_temp_paths()

        elif vg_var == 'min_temp':
            (in_vals_df_loc,
             in_stn_coords_df_loc,
             out_dir) = get_min_temp_paths()

        elif vg_var == 'max_temp':
            (in_vals_df_loc,
             in_stn_coords_df_loc,
             out_dir) = get_max_temp_paths()

        elif vg_var == 'ppt':
            (in_vals_df_loc,
             in_stn_coords_df_loc,
             out_dir) = get_ppt_paths()

        else:
            raise RuntimeError(f'Unknown vg_var: {vg_var}!')

        in_vals_df = pd.read_csv(
            in_vals_df_loc, sep=sep, index_col=0, encoding='utf-8')

        in_vals_df.index = pd.to_datetime(in_vals_df.index, format='%Y-%m-%d')
        in_vals_df = in_vals_df.loc[strt_date:end_date, :]

        if drop_stns:
            in_vals_df.drop(labels=drop_stns, axis=1, inplace=True)

        in_vals_df.dropna(how='all', axis=0, inplace=True)

        in_coords_df = pd.read_csv(
            in_stn_coords_df_loc, sep=sep, index_col=0, encoding='utf-8')

        in_coords_df.index = list(map(str, in_coords_df.index))

        if drop_stns:
            in_coords_df.drop(labels=drop_stns, axis=0, inplace=True)

        in_coords_df = in_coords_df[['X', 'Y', 'Z']].astype(float)

        fit_vg_cls = FitVariograms()

        fit_vg_cls.set_data(in_vals_df, in_coords_df)

        fit_vg_cls.set_vg_fitting_parameters(
            mdr,
            perm_r_list,
            fil_nug_vg,
            ngp,
            fit_vgs,
            n_best)

        fit_vg_cls.set_misc_settings(n_cpus, min_valid_stns)

        fit_vg_cls.set_output_settings(out_dir, figs_flag)

        fit_vg_cls.verify()

        fit_vg_cls.fit_vgs()

        fit_vg_cls.save_fin_vgs_df()
        fit_vg_cls = None

    return
예제 #6
0
def main():

    main_dir = Path(
        r'X:\hiwi\ElHachem\Prof_Bardossy\Extremes\kriging_ppt_netatmo')

    #     main_dir = Path(
    #         r"/run/media/abbas/EL Hachem 2019/home_office")
    # r'X:\staff\elhachem\2020_10_03_Rheinland_Pfalz')
    os.chdir(main_dir)

    vg_vars = ['ppt']  # ['ppt']

    strt_date = '2017-01-01'
    end_date = '2019-12-31'
    min_valid_stns = 5

    drop_stns = []
    mdr = 0.5
    perm_r_list = [1, 2]
    fit_vgs = ['Sph', 'Exp']
    fil_nug_vg = 'Nug'  # 'Nug'
    n_best = 2
    ngp = 5
    figs_flag = True

    fit_for_extreme_events = True

    use_netatmo_good_stns = False

    DWD_stations = True

    n_cpus = 4

    sep = ';'  # ;

    for vg_var in vg_vars:
        if vg_var == 'mean_temp':
            (in_vals_df_loc, in_stn_coords_df_loc,
             out_dir) = get_mean_temp_paths()

        elif vg_var == 'min_temp':
            (in_vals_df_loc, in_stn_coords_df_loc,
             out_dir) = get_min_temp_paths()

        elif vg_var == 'max_temp':
            (in_vals_df_loc, in_stn_coords_df_loc,
             out_dir) = get_max_temp_paths()

        elif vg_var == 'ppt':
            (in_vals_df_loc, in_stn_coords_df_loc, out_dir,
             path_to_netatmo_gd_stns_file, path_to_netatmo_ppt_extreme,
             path_to_dwd_ppt_extreme) = get_ppt_paths()

        else:
            raise RuntimeError(f'Unknown vg_var: {vg_var}!')

        # added by Abbas

        if use_netatmo_good_stns:
            in_df_stns = pd.read_csv(path_to_netatmo_gd_stns_file,
                                     index_col=0,
                                     sep=';')
            good_netatmo_stns = list(in_df_stns.values.ravel())

        in_vals_df = pd.read_csv(in_vals_df_loc,
                                 sep=sep,
                                 index_col=0,
                                 encoding='utf-8',
                                 parse_dates=True,
                                 infer_datetime_format=True,
                                 engine='c')

        #         in_vals_df.index = pd.to_datetime(in_vals_df.index,
        #                                           format='%Y-%m-%d')
        in_vals_df = in_vals_df.loc[strt_date:end_date, :]

        if use_netatmo_good_stns:
            in_vals_df = in_vals_df.loc[:, good_netatmo_stns]

        if drop_stns:
            in_vals_df.drop(labels=drop_stns, axis=1, inplace=True)

        in_vals_df.dropna(how='all', axis=0, inplace=True)

        # added by Abbas, for edf
        #in_vals_df = in_vals_df[in_vals_df >= 0]

        in_coords_df = pd.read_csv(in_stn_coords_df_loc,
                                   sep=';',
                                   index_col=0,
                                   encoding='utf-8')

        if fit_for_extreme_events:
            df_extremes = pd.read_csv(
                path_to_dwd_ppt_extreme,
                sep=';',
                index_col=0,
                parse_dates=True,
                infer_datetime_format=True).dropna(how='all')

            in_vals_df = in_vals_df.loc[
                in_vals_df.index.intersection(df_extremes.index), :]

        if DWD_stations:
            #             # added by Abbas, for DWD stations
            #
            stndwd_ix = [
                '0' * (5 - len(str(stn_id))) +
                str(stn_id) if len(str(stn_id)) < 5 else str(stn_id)
                for stn_id in in_coords_df.index
            ]
            stndwd_ix = [stn for stn in stndwd_ix if stn in in_vals_df.columns]

            in_coords_df.index = stndwd_ix

        in_coords_df.index = list(map(str, in_coords_df.index))

        if drop_stns:
            in_coords_df.drop(labels=drop_stns, axis=0, inplace=True)

        if in_coords_df.shape[0] > in_vals_df.shape[1]:

            in_vals_df = in_vals_df.loc[:, in_coords_df.index]
        else:
            in_coords_df = in_coords_df.loc[in_vals_df.columns, :]
        fit_vg_cls = FitVariograms()

        fit_vg_cls.set_data(in_vals_df, in_coords_df)

        fit_vg_cls.set_vg_fitting_parameters(mdr, perm_r_list, fil_nug_vg, ngp,
                                             fit_vgs, n_best)

        fit_vg_cls.set_misc_settings(n_cpus, min_valid_stns)

        fit_vg_cls.set_output_settings(out_dir, figs_flag)

        fit_vg_cls.verify()

        fit_vg_cls.fit_vgs()

        fit_vg_cls.save_fin_vgs_df()

        fit_vg_cls = None

    return