Beispiel #1
0
def _collect_result(test_function: str, N: int, noise_std: float, random: bool, gps: Tuple[str, ...], M: int):
    store = store_path(test_function, N, noise_std, random, M)
    destination = store / "results"
    destination.mkdir(mode=0o777, parents=True, exist_ok=True)
    for gp in gps:
        for sobol in (True, False):
            if sobol:
                lin_trans = linear_transformation(store)
                frame = data.Frame(destination / "{0}.{1}".format(gp, "True_Theta.csv"),  DataFrame(lin_trans))
                lin_trans = transpose(lin_trans)
                params = ("Theta.csv", "S.csv", "S1.csv")
            else:
                params = ("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv", "test_stats.csv")
            for param in params:
                results = None
                avg = None
                for k in range(K):
                    source = (store / "fold.{0:d}".format(k)) / gp
                    source = source / "sobol" if sobol else source / "kernel" if param == "lengthscale.csv" else source
                    result = data.Frame(source / param, **model.base.Model.CSV_PARAMETERS).df.copy(deep=True)
                    result.insert(0, "fold", full(result.shape[0], k), True)
                    if k == 0:
                        results = result
                        avg = result / K
                    else:
                        results = concat([results, result], axis=0, ignore_index=True, sort=False)
                        avg += result / K
                avg.loc[:, 'fold'] = 'mean'
                results = concat([results, avg], axis=0, ignore_index=True, sort=False)
                frame = data.Frame(destination / "{0}.{1}".format(gp, param), results)
Beispiel #2
0
def _test_stats(k: int, gp_path: Path) -> data.Frame:
    test = data.Frame(gp_path / "__test__.csv").df.copy()
    Y = test['Y'].values
    mean_ = test['Predictive Mean'].values
    std = test['Predictive Std'].values
    err = abs(Y - mean_)
    outliers = floor_divide(err, 2 * std)
    df = DataFrame({'fold': k, 'RMSE': sqrt(mean(err ** 2)) / 4, 'Prediction Std': mean(std),
                         'Outliers': count_nonzero(outliers) / len(std)}, index=[0])
    return data.Frame(gp_path / "test_stats.csv", df)
Beispiel #3
0
def _collect_test_stats(M, N, function_name, random, noisy):
    noisy_str = NORMAL_CDF_DIR if noisy else NOISELESS_DIR
    source_store = store_dir(M, N, function_name, random, noisy)
    for k in range(FOLDS):
        fold = data.Fold(source_store, k)
        gp_dir = fold.dir / "ard"
        frame = data.Frame(
            gp_dir / "test_stats.csv",
            _test_stats(k,
                        data.Frame(gp_dir / "__test__.csv").df.copy()))
Beispiel #4
0
def _collect_test_stats(test_function: str,
                        N: int,
                        noise_std: float,
                        random: bool,
                        gp: str,
                        M: int = 5):
    source_store = store_path(test_function, N, noise_std, random, M)
    for k in range(K):
        fold = data.Fold(source_store, k)
        gp_dir = fold.dir / gp
        frame = data.Frame(
            gp_dir / "test_stats.csv",
            _test_stats(k,
                        data.Frame(gp_dir / "__test__.csv").df.copy()))
Beispiel #5
0
def _collect_result(M: int, N: int, function_name: str, random: bool,
                    noisy: bool):
    noisy_str = NORMAL_CDF_DIR if noisy else NOISELESS_DIR
    source_store = store_dir(M, N, function_name, random, noisy)
    destination = (BASE_PATH / "results") / source_store.name
    destination.mkdir(mode=0o777, parents=True, exist_ok=True)
    for gp in ("ard", "rom.optimized", "rom.reduced"):
        for sobol in (True, False):
            if sobol:
                lin_trans = linear_transformation(source_store)
                frame = data.Frame(
                    destination /
                    "{0}.{1}.{2}".format(noisy_str, gp, "True_Theta.csv"),
                    DataFrame(lin_trans))
                lin_trans = transpose(lin_trans)
                params = ("Theta.csv", "S.csv", "S1.csv")
            else:
                params = (("lengthscale.csv", "e.csv", "f.csv",
                           "log_likelihood.csv") if gp == "rom.optimized" else
                          ("lengthscale.csv", "e.csv", "f.csv",
                           "log_likelihood.csv", "test_stats.csv"))
            for param in params:
                results = None
                avg = None
                for k in range(FOLDS):
                    source = (source_store / "fold.{0:d}".format(k)) / gp
                    source = source / "sobol" if sobol else source / "kernel" if param == "lengthscale.csv" else source
                    result = data.Frame(
                        source / param,
                        **model.base.Model.CSV_PARAMETERS).df.copy(deep=True)
                    if param == "Theta.csv":  # TODO: May not need this
                        signs = result.values @ lin_trans
                        signs = sign(diag(signs))
                        signs.shape = (signs.shape[0], 1)
                        result *= signs
                    result.insert(0, "fold", full(result.shape[0], k), True)
                    if k == 0:
                        results = result
                        avg = result / FOLDS
                    else:
                        results = concat([results, result],
                                         axis=0,
                                         ignore_index=True)
                        avg += result / FOLDS
                avg.loc[:, 'fold'] = 'mean'
                results = concat([results, avg], axis=0, ignore_index=True)
                frame = data.Frame(
                    destination / "{0}.{1}.{2}".format(noisy_str, gp, param),
                    results)
Beispiel #6
0
def _collect_result(test_function: str,
                    N: int,
                    noise_std: float,
                    random: bool,
                    gp: str,
                    M: int = 5):
    source_store = store_path(test_function, N, noise_std, random, M)
    destination = RESULTS_PATH / source_store.name
    destination.mkdir(mode=0o777, parents=True, exist_ok=True)
    for sobol in (False, ):
        if sobol:
            lin_trans = linear_transformation(source_store)
            frame = data.Frame(
                destination / "{0}.{1}".format(gp, "True_Theta.csv"),
                DataFrame(lin_trans))
            lin_trans = transpose(lin_trans)
            params = ("Theta.csv", "S.csv", "S1.csv")
        else:
            params = (("lengthscale.csv", "e.csv", "f.csv",
                       "log_likelihood.csv") if gp == "rom.optimized" else
                      ("lengthscale.csv", "e.csv", "f.csv",
                       "log_likelihood.csv", "test_stats.csv"))
        for param in params:
            results = None
            avg = None
            for k in range(K):
                source = (source_store / "fold.{0:d}".format(k)) / gp
                source = source / "sobol" if sobol else source / "kernel" if param == "lengthscale.csv" else source
                result = data.Frame(
                    source / param,
                    **model.base.Model.CSV_PARAMETERS).df.copy(deep=True)
                # if param == "Theta.csv": # TODO: May not need this
                #     signs = result.values @ lin_trans
                #     signs = sign(diag(signs))
                #     signs.shape = (signs.shape[0], 1)
                #     result *= signs
                result.insert(0, "fold", full(result.shape[0], k), True)
                if k == 0:
                    results = result
                    avg = result / K
                else:
                    results = concat([results, result],
                                     axis=0,
                                     ignore_index=True)
                    avg += result / K
            avg.loc[:, 'fold'] = 'mean'
            results = concat([results, avg], axis=0, ignore_index=True)
            frame = data.Frame(destination / "{0}.{1}".format(gp, param),
                               results)
Beispiel #7
0
def summarise_results(test_functions: Tuple[str, ...], Ns: Tuple[int, ...], noise_stds: Tuple[float, ...], randoms: Tuple[bool, ...],
                      gps: Tuple[str, ...], Ms: Tuple[int, ...] = (5, )):
    for M in Ms:
        for test_function in test_functions:
            Mu = choose_Mu(test_function)
            for random in randoms:
                destination = RESULTS_PATH / "{0}.{1:d}.{2}".format(test_function, M, _random_str(random))
                destination.mkdir(mode=0o777, parents=True, exist_ok=True)
                for gp in gps:
                    for sobol in (True, False):
                        if sobol:
                            params = (("S.csv", "S1.csv", "Theta.csv", "True_Theta.csv", "Theta_Analyzed.csv")
                                      if random and gp == "rom.optimized.reduced" else ("S.csv", "S1.csv", "Theta.csv"))
                        else:
                            params = ("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv", "test_stats.csv")
                        for param in params:
                            is_initial = True
                            for N in Ns:
                                for noise_std in noise_stds:
                                    results_path = store_path(test_function, N, noise_std, random, M) / "results"
                                    std = loadtxt(results_path / 'std.csv')
                                    source = results_path / "{0}.{1}".format(gp, param)
                                    result = data.Frame(source, **model.base.Model.CSV_PARAMETERS).df.copy(deep=True)
                                    result = (result.copy(deep=True) if param == "True_Theta.csv"
                                              else result.loc[result['fold'] == 'mean'].drop('fold', axis=1).copy(deep=True))
                                    result.insert(0, "N", full(result.shape[0], int(N/2), dtype=int), True)
                                    result.insert(0, "Noise", full(result.shape[0], noise_std/std), True)
                                    if is_initial:
                                        results = result
                                        is_initial = False
                                    else:
                                        results = concat([results, result], axis=0, ignore_index=True, sort=False)
                            results.to_csv(destination / "{0}.{1}".format(gp, param), index=False)
                            results.to_csv(destination / "formatted.{0}.{1}".format(gp, param), float_format='%.4f', index=False)
Beispiel #8
0
def synopsise(test_functions: Tuple[str, ...], randoms: Tuple[bool, ...],
              gps: Tuple[str, ...]):
    destination = SUMMARY_PATH / "synopsis"
    destination.mkdir(mode=0o777, parents=True, exist_ok=True)
    params = ("lengthscale.csv", "e.csv", "f.csv", "log_likelihood.csv",
              "test_stats.csv")
    for param in params:
        is_initial = True
        for test_function in test_functions:
            for random in randoms:
                source_path = SUMMARY_PATH / (test_function + '.' +
                                              _random_str(random))
                for gp in gps:
                    result = data.Frame(source_path /
                                        "{0}.{1}".format(gp, param),
                                        header=0,
                                        index_col=False).df.copy(deep=True)
                    result.insert(0, "GP", full(result.shape[0], gp), True)
                    result.insert(0, "Random Rotation",
                                  full(result.shape[0], random), True)
                    result.insert(0, "Test Function",
                                  full(result.shape[0], test_function), True)
                    if is_initial:
                        results = result
                        is_initial = False
                    else:
                        results = concat([results, result],
                                         axis=0,
                                         ignore_index=True,
                                         sort=False)
        results.to_csv(destination / "{0}".format(param), index=False)
Beispiel #9
0
def _analyze_theta(M: int, N: int, function_name: str,
                   noisy: bool) -> DataFrame:
    Mu = choose_Mu(function_name)
    noisy_str = NORMAL_CDF_DIR if noisy else NOISELESS_DIR
    random_path = ((BASE_PATH / "results") /
                   (function_name + ".{0:d}.{1:d}.random").format(N, M))
    theta_true = data.Frame(
        random_path / (noisy_str + ".rom.optimized.True_Theta.csv"),
        **model.base.Model.CSV_PARAMETERS).df.values
    theta_csv = (noisy_str + ".rom.optimized.Theta.csv")
    rom_path = ((BASE_PATH / "results") /
                (function_name + ".{0:d}.{1:d}.rom").format(N, M))
    theta_rom_df = data.Frame(rom_path / theta_csv,
                              **model.base.Model.CSV_PARAMETERS).df
    theta_random_df = data.Frame(random_path / theta_csv,
                                 **model.base.Model.CSV_PARAMETERS).df
    for k in range(FOLDS):
        theta_rom = theta_rom_df.loc[theta_rom_df['fold'] == str(
            k)].values[:, 1:].copy().astype(float)
        theta_random = theta_random_df.loc[theta_random_df['fold'] == str(
            k)].values[:, 1:].transpose().copy().astype(float)
        h = theta_rom @ theta_true @ theta_random
        resultA = _singular_values(h[:Mu, :Mu])
        resultI = _singular_values(h[Mu:, Mu:])
        result = concatenate((resultA, resultI), axis=1)
        result_df = DataFrame(result)
        result_df.insert(0, "fold", full(result.shape[0], k, dtype=int), True)
        if k == 0:
            results = DataFrame(result_df)
            mean = results.copy(deep=True) / FOLDS
        else:
            results = concat([results, result_df], axis=0, ignore_index=True)
            mean += result_df / FOLDS
    mean.loc[:, 'fold'] = 'mean'
    results = concat([results, mean], axis=0, ignore_index=True)
    results.to_csv(
        random_path /
        "{0}.{1}.Theta_Analyzed.csv".format(noisy_str, "rom.optimized"))
    results.to_csv(random_path / "{0}.{1}.Theta_Analyzed.formatted.csv".format(
        noisy_str, "rom.optimized"),
                   float_format='%.4f')
    return mean
Beispiel #10
0
def _analyze_theta(test_function: str, N: int, noise_std: float, M: int) -> DataFrame:
    Mu = choose_Mu(test_function)
    random = {flag: (store_path(test_function, N, noise_std, flag, M) / "results") for flag in (True, False)}
    theta_df_random = {flag: data.Frame(random[flag] / "rom.optimized.Theta.csv", **model.base.Model.CSV_PARAMETERS).df for flag in (True, False)}
    theta_true = data.Frame(random[True] / "rom.optimized.True_Theta.csv", **model.base.Model.CSV_PARAMETERS).df.values
    for k in range(K):
        theta_random = {flag: theta_df_random[flag].loc[theta_df_random[flag]['fold'] == str(k)].values[:, 1:].copy().astype(float)
                        for flag in (True, False)}
        h = theta_random[False] @ theta_true @ theta_random[True].transpose()
        resultA = _singular_values(h[:Mu, :Mu])
        resultI = _singular_values(h[Mu:, Mu:])
        result = concatenate((resultA, resultI), axis=1)
        result_df = DataFrame(result)
        result_df.insert(0, "fold", full(result.shape[0], k, dtype=int), True)
        if k == 0:
            results = DataFrame(result_df)
            mean = results.copy(deep=True) / K
        else:
            results = concat([results, result_df], axis=0, ignore_index=True, sort=False)
            mean += result_df / K
    mean.loc[:, 'fold'] = 'mean'
    results = concat([results, mean], axis=0, ignore_index=True, sort=False)
    results.to_csv(random[True] / "rom.optimized.reduced.Theta_Analyzed.csv")
    return mean
Beispiel #11
0
def _run_test(M, N, function_name, random, noisy):
    noisy_str = NORMAL_CDF_DIR if noisy else NOISELESS_DIR
    source_store = store_dir(M, N, function_name, random, noisy)
    Mu = choose_Mu(function_name)
    kernel_parameters = model.gpy_.Kernel.ExponentialQuadratic.Parameters(
        lengthscale=full((1, Mu), 0.2, dtype=float))
    parameters = model.gpy_.GP.DEFAULT_PARAMETERS._replace(
        kernel=kernel_parameters, e_floor=1E-5, e=1E-10)
    for k in range(FOLDS):
        fold = data.Fold(source_store, k, Mu)
        dst = fold.dir / "rom.reduced"
        if dst.exists():
            shutil.rmtree(dst)
        shutil.copytree(src=fold.dir / "rom.optimized", dst=dst)
        gp = model.gpy_.GP(fold, "rom.reduced", parameters)
        gp.optimize(model.gpy_.GP.DEFAULT_OPTIMIZER_OPTIONS)
        frame = data.Frame(gp.dir / "test_stats.csv",
                           _test_stats(k,
                                       gp.test().df.copy()))
Beispiel #12
0
def summarise_results(Ms: Tuple[int], Ns: Tuple[int],
                      function_names: Tuple[str]):
    for M in Ms:
        for function_name in function_names:
            Mu = choose_Mu(function_name)
            for random in (True, False):
                destination = (BASE_PATH / "results") / (
                    function_name +
                    (".random.{0:d}" if random else ".rom.{0:d}").format(M))
                destination.mkdir(mode=0o777, parents=True, exist_ok=True)
                for gp in ("ard", "rom.optimized", "rom.reduced"):
                    for sobol in (True, False):
                        if sobol:
                            params = () if gp == "rom.reduced" else (
                                "S.csv", "S1.csv", "Theta.csv",
                                "True_Theta.csv", "Theta_Analyzed.csv")
                        else:
                            params = (("lengthscale.csv", "e.csv", "f.csv",
                                       "log_likelihood.csv")
                                      if gp == "rom.optimized" else
                                      ("lengthscale.csv", "e.csv", "f.csv",
                                       "log_likelihood.csv", "test_stats.csv"))
                        for param in params:
                            if (param == "Theta_Analyzed.csv"
                                    and ((not random) or (gp == "ard"))):
                                continue
                            is_initial = True
                            for N in Ns:
                                for noisy in (False, True):
                                    noisy_str = NORMAL_CDF_DIR if noisy else NOISELESS_DIR
                                    noise = 0.025 if noisy else 0
                                    source = (
                                        ((BASE_PATH / "results") /
                                         store_dir(M, N, function_name, random,
                                                   noisy).name) /
                                        "{0}.{1}.{2}".format(
                                            noisy_str, gp, param))
                                    result = data.Frame(
                                        source, **model.base.Model.
                                        CSV_PARAMETERS).df.copy(deep=True)
                                    result = (
                                        result.copy(deep=True)
                                        if param == "True_Theta.csv" else
                                        result.loc[result['fold'] == 'mean'].
                                        drop('fold', axis=1).copy(deep=True))
                                    result.insert(
                                        0, "N",
                                        full(result.shape[0],
                                             int(N / 2),
                                             dtype=int), True)
                                    result.insert(0, "Noise",
                                                  full(result.shape[0], noise),
                                                  True)
                                    if is_initial:
                                        results = result
                                        is_initial = False
                                    else:
                                        results = concat([results, result],
                                                         axis=0,
                                                         ignore_index=True)
                            results.to_csv(destination /
                                           "{0}.{1}".format(gp, param),
                                           index=False)
                            results.to_csv(
                                destination /
                                "{0}.formatted.{1}".format(gp, param),
                                float_format='%.4f',
                                index=False)
Beispiel #13
0
def summarise_results(test_functions: Tuple[str, ...],
                      Ns: Tuple[int, ...],
                      noise_stds: Tuple[float, ...],
                      randoms: Tuple[bool, ...],
                      gps: Tuple[str, ...],
                      Ms: Tuple[int, ...] = (5, )):
    for test_function in test_functions:
        for random in randoms:
            destination = SUMMARY_PATH / (test_function + '.' +
                                          _random_str(random))
            destination.mkdir(mode=0o777, parents=True, exist_ok=True)
            for gp in gps:
                for sobol in (False, ):
                    if sobol:
                        params = () if gp == "rom.reduced" else (
                            "S.csv", "S1.csv", "Theta.csv", "True_Theta.csv",
                            "Theta_Analyzed.csv")
                    else:
                        params = (("lengthscale.csv", "e.csv", "f.csv",
                                   "log_likelihood.csv")
                                  if gp == "rom.optimized" else
                                  ("lengthscale.csv", "e.csv", "f.csv",
                                   "log_likelihood.csv", "test_stats.csv"))
                    for param in params:
                        if (param == "Theta_Analyzed.csv"
                                and ((not random) or (gp == "ard"))):
                            continue
                        is_initial = True
                        for M in Ms:
                            Mu = M
                            for N in Ns:
                                for noise_std in noise_stds:
                                    source_store = store_path(
                                        test_function, N, noise_std, random, M)
                                    source = (RESULTS_PATH / source_store.name
                                              ) / "{0}.{1}".format(gp, param)
                                    result = data.Frame(
                                        source, **model.base.Model.
                                        CSV_PARAMETERS).df.copy(deep=True)
                                    result = (
                                        result.copy(deep=True)
                                        if param == "True_Theta.csv" else
                                        result.loc[result['fold'] == 'mean'].
                                        drop('fold', axis=1).copy(deep=True))
                                    result.insert(
                                        0, "N",
                                        full(result.shape[0],
                                             int(N / 2),
                                             dtype=int), True)
                                    result.insert(
                                        0, "Noise",
                                        full(result.shape[0], noise_std), True)
                                    result.insert(0, "M",
                                                  full(result.shape[0], M),
                                                  True)
                                    if is_initial:
                                        results = result
                                        is_initial = False
                                    else:
                                        results = concat([results, result],
                                                         axis=0,
                                                         ignore_index=True,
                                                         sort=False)
                        results.to_csv(destination /
                                       "{0}.{1}".format(gp, param),
                                       index=False)