Exemple #1
0
def obj_ftn(new_crds, crds, evgs, n_extra_dims, n_lags, fit_vg_str, calls_ctr):

    fit_vg_str = f'{new_crds[0]:0.5f} {fit_vg_str}({new_crds[1]:0.1f})'

    old_crds = crds[:, -n_extra_dims:].copy()

    crds[:, -n_extra_dims:] = new_crds[2:].reshape(crds.shape[0], n_extra_dims)

    #     print(new_crds.min(), new_crds.max())

    tvgs = []
    for i in range(1, n_lags + 1):
        dists = get_l2_norm(crds[i:, :], crds[:-i, :])

        tvgs.extend(get_theo_vg_vals(fit_vg_str, dists).tolist())

    tvgs = np.array(tvgs)

    obj_val = ((evgs - tvgs)**2).sum()

    calls_ctr[0] += 1
    print(f'{obj_val:0.5E}', calls_ctr[0])

    crds[:, -n_extra_dims:] = old_crds

    return obj_val
Exemple #2
0
def main():

    main_dir = Path(os.getcwd())
    os.chdir(main_dir)

    h_arr = np.linspace(0, 1e6, int(1e1))

    print(get_theo_vg_vals('100 Sph(10000) + 10 Exp(1000000)', h_arr))

    return
Exemple #3
0
def get_lags_evgs_tvgs(ts_vals, crds, n_lags, fit_vg_str):

    lags, evgs, tvgs = [], [], []
    for i in range(1, n_lags + 1):
        diffs = (ts_vals[i:] - ts_vals[:-i])**2

        dists = get_l2_norm(crds[i:, :], crds[:-i, :])

        lags.extend(dists.tolist())
        evgs.extend(diffs.tolist())

        tvgs.extend(get_theo_vg_vals(fit_vg_str, dists).tolist())

    return lags, evgs, tvgs
def get_lags_evgs_tvgs(ts_vals, crds, fit_vg_str):

    lags, evgs, tvgs = [], [], []
    for i in range(0, ts_vals.shape[0]):
        val_1 = ts_vals[i]
        crds_1 = crds[i,:]
        for j in range(0, ts_vals.shape[0]):
            if j <= i:
                continue

            diff = (val_1 - ts_vals[j]) ** 2

            dist = get_l2_norm(crds_1, crds[j,:])

            lags.append(dist)
            evgs.append(diff)

    lags = np.array(lags)
    tvgs.append(get_theo_vg_vals(fit_vg_str, lags).tolist())

    return lags, evgs, tvgs
Exemple #5
0
def get_mean_vg(vg_strs_ser, dists):

    assert vg_strs_ser.size

    if vg_strs_ser.size == 1:
        mean_vg_str = vg_strs_ser.iloc[0]

    else:
        vgs = []
        vg_perm_rs = []
        vg_stat_vals = np.zeros((vg_strs_ser.size, dists.size))
        for j, vg_str in enumerate(vg_strs_ser):
            vg_stat_vals[j, :] = get_theo_vg_vals(vg_str, dists)

            for i, vg in enumerate(disagg_vg_str(vg_str)[1], start=1):
                if i not in vg_perm_rs:
                    vg_perm_rs.append(i)

                if vg in vgs:
                    continue

                vgs.append(vg)

        vg_vals = vg_stat_vals.mean(axis=0)

        # median might be a problem if vgs don't have the same rise rate.
        #         vg_vals = np.median(vg_stat_vals, axis=0)

        assert dists.size == vg_vals.size

        get_vg_args = (dists, vg_vals, 'mean_vg', vgs, vg_perm_rs, 1000, False,
                       False, None, None, False, dists[-1] + 1)

        mean_vg_str = get_vg(get_vg_args)[1]

    return mean_vg_str
Exemple #6
0
def get_vg(args):

    (dists,
     vg_vals,
     dist_lab,
     mix_vg_list,
     perm_r_list,
     opt_iters,
     wt_by_dist_flag,
     plot_flag,
     fig_size,
     out_dir,
     plt_at_zero_dist_flag,
     max_fit_dist) = args

    fit_idxs = dists <= max_fit_dist

    dists_fit = dists[fit_idxs]
    vg_val_fit = vg_vals[fit_idxs]

    perm_r_list = np.array(np.unique(perm_r_list), dtype=int)
    perm_r_list = perm_r_list[perm_r_list <= len(mix_vg_list)]

    all_mix_vg_ftns = {
       'Nug': nug_vg,
       'Sph': sph_vg,
       'Exp': exp_vg,
       'Lin': lin_vg,
       'Gau': gau_vg,
       'Pow': pow_vg,
       'Hol': hol_vg
       }

    min_obj = np.inf
    best_vg_name = ''
    best_vg_param = ''
    lb_thresh = 1e-8  # lower bound (used instead of zero)
    max_dist_thresh = max(1e6, dists.max())
    var_multpr = 2

    for perm_r in perm_r_list:
        vg_perms = combinations(mix_vg_list, int(perm_r))

        skip_perm_list = []

        for vg_strs in vg_perms:
            if vg_strs in skip_perm_list:
                # if a given permutation exists then don't run further
                continue

            mix_vg_names = []  # to hold the variogram names and ftns
            bounds = []

            for i, vg_name in enumerate(vg_strs):
                mix_vg_names.append((vg_name, all_mix_vg_ftns[vg_name]))

                if vg_name == 'Pow':
                    sub_bounds = [(lb_thresh, 2),
                                  (lb_thresh, var_multpr * vg_vals.max())]

                else:
                    sub_bounds = [
                        (dists.min(), max_dist_thresh),
                        (lb_thresh, var_multpr * vg_vals.max())]

                [bounds.append(tuple(l)) for l in sub_bounds]

            opt = differential_evolution(
                vg_calib,
                tuple(bounds),
                args=(mix_vg_names, dists_fit, vg_val_fit, wt_by_dist_flag),
                maxiter=opt_iters,
                popsize=len(bounds) * 50,
                polish=False)

            assert opt.success, 'Optimization did not succeed!'

            # Conditions for an optimization result to be selected:
            # 1: Obj ftn value less than the previous * fit_thresh
            # 2: Range of the variograms is in ascending order

            # minimize type optimization:
            rngs = opt.x[0::2].copy()
            sills = opt.x[1::2].copy()

            #  using Akaike Information Criterion (AIC) to select a model
            curr_AIC = (
                (vg_vals.size * np.log(opt.fun)) + (2 * opt.x.shape[0]))

            cond_1_fun = curr_AIC < min_obj * (1. - 1e-2)
            cond_2_fun = np.all(np.where(np.ediff1d(rngs) < 0, False, True))

            if not cond_2_fun:
                # flipping ranges and sills into correct order
                sort_idxs = np.argsort(rngs)
                rngs = rngs[sort_idxs]
                sills = sills[sort_idxs]

                adj_perm = np.array(vg_strs)[sort_idxs]

                skip_perm_list.append(tuple(adj_perm))

                mix_vg_names = np.array(mix_vg_names)[sort_idxs]

            cond_2_fun = np.all(
                np.where(np.ediff1d(rngs) < 0, False, True))

            prms = np.zeros((2 * rngs.shape[0]), dtype=np.float64)
            prms[0::2] = rngs
            prms[1::2] = sills

            if (cond_1_fun and cond_2_fun):
                min_obj = curr_AIC
                best_vg_name = mix_vg_names
                best_vg_param = prms

    vg_str = ''  # final nested variogram string

    for i in range(len(best_vg_name)):
        prms = best_vg_param[(i * 2): (i * 2 + 2)]

        vg_str += (
            ' + %0.5f %s(%0.1f)' % (prms[1], best_vg_name[i][0], prms[0]))

    if vg_str:
        vg_str = vg_str[3:]

    print(dist_lab, vg_str)

    assert vg_str, 'No vg fitted!'

    if plt_at_zero_dist_flag:
        theo_dists = np.concatenate(([0.0], dists))

    else:
        theo_dists = dists

    theo_vg_vals = get_theo_vg_vals(vg_str, theo_dists)

    if plot_flag:
        plt.figure(figsize=fig_size)

        plt.plot(
            dists,
            vg_vals,
            label='empirical',
            lw=3,
            alpha=0.4,
            color='red')

        plt.plot(
            theo_dists,
            theo_vg_vals,
            label='theoretical',
            lw=1,
            alpha=0.6,
            color='blue')

        plt.legend()
        plt.xlabel('Distance')
        plt.ylabel('Semi-variogram')
        plt.title(f'{dist_lab}\n{vg_str}')

        plt.grid()
        plt.gca().set_axisbelow(True)

        plt.savefig(str(out_dir / f'{dist_lab}_vg.png'), bbox_inches='tight')

        plt.close()

    return (dist_lab, vg_str, theo_dists, theo_vg_vals)
Exemple #7
0
def main():

    main_dir = Path(
        r'P:\Synchronize\IWS\Testings\variograms\comb_vg\temp_1961_2015_with_zeros\vgs_CP'
    )

    os.chdir(main_dir)

    # Something needed with an actual range.
    allowed_vgs = ['Sph', 'Exp']  # , 'Gau']

    in_vg_strs_file = Path('vgs.csv')

    sep = ';'

    # max_rng can be None or a float.
    # When None, then maximum range from all vgs is taken.
    max_rng = 250e3
    n_fit_dists = 50
    max_nbr_dist = 50e3

    n_rnd_pts = int(1e2)
    n_sims = int(1e2)

    ks_alpha = 0.99
    n_sel_thresh = 1000

    abs_thresh_wt = (1e-2)  # * n_rnd_pts

    out_fig_name = 'clustered_vgs.png'
    fig_size = (10, 7)
    out_vgs_sers_name = 'clustered_vgs.csv'

    #     krg_wts_exp = 0.1

    vg_strs_ser_main = pd.read_csv(in_vg_strs_file,
                                   sep=sep,
                                   index_col=0,
                                   squeeze=True)

    if max_rng is None:
        max_rng = -np.inf
        for vg_str in vg_strs_ser_main:

            _, vgs, rngs = disagg_vg_str(vg_str)

            assert all([vg in allowed_vgs for vg in vgs])

            rng = max(rngs)

            if rng >= max_rng:
                max_rng = rng

    elif isinstance(max_rng, (int, float)):
        max_rng = float(max_rng)

    else:
        raise ValueError('Invalid max_rng:', max_rng)

    print('max_rng:', max_rng)

    cluster_args = (vg_strs_ser_main, max_rng, n_fit_dists, n_sims, n_rnd_pts,
                    abs_thresh_wt, ks_alpha, n_sel_thresh, max_nbr_dist)

    vg_clusters = get_clustered_vgs(cluster_args)

    print('Done fitting.')
    print('Refitting...')
    theo_dists = np.linspace(0, max_rng, n_fit_dists)
    refit_vgs = []

    out_clustered_ser = pd.Series(index=vg_strs_ser_main.index, dtype=object)

    for vg_cluster in vg_clusters:
        print(vg_cluster)

        refit_vg_str = get_mean_vg(vg_strs_ser_main.loc[vg_cluster[1]],
                                   theo_dists)

        refit_vgs.append(refit_vg_str)

        print(vg_cluster[0], refit_vg_str)

        for vg_label in vg_cluster[1]:
            out_clustered_ser.loc[vg_label] = refit_vg_str

    out_clustered_ser.to_csv(out_vgs_sers_name, sep=sep)

    plt.figure(figsize=fig_size)
    leg_flag = True
    for vg_str in vg_strs_ser_main:

        if leg_flag:
            label = f'old(n={vg_strs_ser_main.size})'
            leg_flag = False

        else:
            label = None

        plt.plot(theo_dists,
                 get_theo_vg_vals(vg_str, theo_dists),
                 label=label,
                 alpha=0.5,
                 c='red')

    leg_flag = True
    for vg_str in refit_vgs:

        if leg_flag:
            label = f'new(n={len(refit_vgs)})'
            leg_flag = False

        else:
            label = None

        plt.plot(theo_dists,
                 get_theo_vg_vals(vg_str, theo_dists),
                 label=label,
                 alpha=0.5,
                 c='blue')

    plt.legend()

    plt.grid()
    plt.gca().set_axisbelow(True)

    plt.xlabel('Distance')
    plt.ylabel('Semi-variogram')

    #     plt.show()

    plt.savefig(out_fig_name, bbox_inches='tight')
    plt.close()

    return