def _dump_current_state(self):
     #if self.export_csv:
     #  self._export_results(task=task, gof_result=gof_single_result, file_handle_results=self.file_handle_results_csv)
     #if self.export_pickle:
     with open(self.results_pickle_path, "wb") as f:
         intermediate_gof_results = GoodnessOfFitResults(
             single_results_dict=self.gof_single_res_collection)
         io.dump_as_pickle(f, intermediate_gof_results, verbose=False)
    def __init__(self,
                 exp_prefix,
                 est_params,
                 sim_params,
                 observations,
                 keys_of_interest,
                 n_mc_samples=10**7,
                 n_x_cond=5,
                 n_seeds=5,
                 use_gpu=True,
                 tail_measures=True):

        assert est_params and exp_prefix and sim_params and keys_of_interest
        assert observations.all()

        # every simulator configuration will be run multiple times with different randomness seeds
        sim_params = _add_seeds_to_sim_params(n_seeds, sim_params)

        self.observations = observations
        self.n_mc_samples = n_mc_samples
        self.n_x_cond = n_x_cond
        self.keys_of_interest = keys_of_interest
        self.exp_prefix = exp_prefix
        self.use_gpu = use_gpu
        self.tail_measures = tail_measures

        logger.configure(log_directory=config.DATA_DIR,
                         prefix=exp_prefix,
                         color='green')
        ''' ---------- Either load or generate the configs ----------'''
        config_pkl_path = os.path.join(logger.log_directory, logger.prefix,
                                       EXP_CONFIG_FILE)

        if os.path.isfile(config_pkl_path):
            logger.log("{:<70s} {:<30s}".format(
                "Loading experiment previous configs from file: ",
                config_pkl_path))
            self.configs = logger.load_pkl(EXP_CONFIG_FILE)
        else:
            logger.log("{:<70s} {:<30s}".format(
                "Generating and storing experiment configs under: ",
                config_pkl_path))
            self.configs = self._generate_configuration_variants(
                est_params, sim_params)
            logger.dump_pkl(data=self.configs, path=EXP_CONFIG_FILE)
        ''' ---------- Either load already existing results or start a new result collection ---------- '''
        results_pkl_path = os.path.join(logger.log_directory, logger.prefix,
                                        RESULTS_FILE)
        if os.path.isfile(results_pkl_path):
            logger.log_line("{:<70s} {:<30s}".format("Continue with: ",
                                                     results_pkl_path))
            self.gof_single_res_collection = dict(
                logger.load_pkl_log(RESULTS_FILE))

        else:  # start from scratch
            self.gof_single_res_collection = {}

        self.gof_results = GoodnessOfFitResults(self.gof_single_res_collection)
Ejemplo n.º 3
0
def get_density_plots(estimators_list,
                      simulators_dict,
                      path_to_results,
                      exp_prefix="question1_noise_reg_x",
                      task_ids=None):
    """
  This function allows to compare plots from estimators and simulators (i.e. fitted and true densities). Two modes are currently available:
  1) by specifying estimators and simulator, the function picks one result pair randomly that matches the given simulator/estimator
  selection
  2) by specifying the task_ids as list, it is possible to pick specific plots to compare

  Args:
    estimators: a list containing strings of estimators to be evaluated, e.g. ['KernelMixtureNetwork', 'MixtureDensityNetwork']
    simulators: a dict containing specifications of a simulator under which the estimators shall be compared, e.g.
      {'heteroscedastic': True, 'random_seed': 20, 'std': 1, 'simulator': 'EconDensity'}
    path_to_results: absolute path to where the dumped model files are stored
    exp_prefix: specifies the task question

  Returns:
    A list of figures for fitted and true densities.
  """

    if task_ids is not None:
        assert type(task_ids) == list
        assert len(task_ids) == len(estimators_list)

    RESULTS_FILE = 'results.pkl'
    logger.configure(path_to_results, exp_prefix)

    results_from_pkl_file = dict(logger.load_pkl_log(RESULTS_FILE))
    gof_result = GoodnessOfFitResults(
        single_results_dict=results_from_pkl_file)
    results_df = gof_result.generate_results_dataframe(
        base_experiment.KEYS_OF_INTEREST)
    """ load model's estimators """
    if task_ids is None:

        models_of_interest = {
            k: v
            for k, v in gof_result.single_results_dict.items()
            if v.probabilistic_model_params == simulators_dict and v.ndim_x +
            v.ndim_y == 2
        }

        models = [
            ConfigRunner.load_dumped_estimator(
                take_of_type(1, estimator_str, models_of_interest))
            for estimator_str in estimators_list
        ]
    else:
        models = [
            ConfigRunner.load_dumped_estimators(gof_result, task_id=task_ids)
        ]
    """ load model's simulators """
    # todo: implement when simulator dumps exist

    figs = []

    for model in models:
        graph = model.estimator.sess.graph
        sess = tf.Session(graph=graph)

        with sess:
            sess.run(tf.global_variables_initializer())
            model.estimator.sess = sess
            """ fitted density figures"""
            plt.suptitle(model.estimator.name)
            fig_fitted = model.estimator.plot3d()
            figs.append(fig_fitted)
            """ true density figures """
            # todo: use newly dumped simulators

            sess.close()

    return figs
            'y_noise_std': [0.1],
        },
    }

    simulators_params = {'LinearStudentT': {'ndim_x': [10]}}

    observations = 100 * np.logspace(2, 6, num=8, base=2.0, dtype=np.int32)

    return estimator_params, simulators_params, observations


if __name__ == '__main__':
    estimator_params, simulators_params, observations = question4()
    load = base_experiment.launch_experiment(estimator_params,
                                             simulators_params,
                                             observations,
                                             EXP_PREFIX,
                                             n_mc_samples=N_MC_SAMPLES,
                                             tail_measures=False)

    if load:
        logger.configure(config.DATA_DIR, EXP_PREFIX)

        results_from_pkl_file = dict(logger.load_pkl_log(RESULTS_FILE))
        gof_result = GoodnessOfFitResults(
            single_results_dict=results_from_pkl_file)
        results_df = gof_result.generate_results_dataframe(
            base_experiment.KEYS_OF_INTEREST)

        gof_result = ConfigRunner.load_dumped_estimators(gof_result)
Ejemplo n.º 5
0
import os

EXP_PREFIX = "question6_noise_schedules"
RESULTS_FILE = "results.pkl"
CLUSTER_DIR = "/local/rojonas/cde/data/local"
LOCATION = "{}/{}/{}".format(CLUSTER_DIR, EXP_PREFIX, RESULTS_FILE)
DATA_DIR_LOCAL = "/home/jonasrothfuss/Dropbox/Eigene_Dateien/ETH/02_Projects/02_Noise_Regularization/02_Code_Conditional_Density_Estimation/data/cluster"

logger.configure(
    #"/local/rojonas/cde/data/local",
    DATA_DIR_LOCAL,
    EXP_PREFIX,
)

results_from_pkl_file = dict(logger.load_pkl_log(RESULTS_FILE))
gof_result = GoodnessOfFitResults(single_results_dict=results_from_pkl_file)
results_df = gof_result.generate_results_dataframe(base_experiment.KEYS_OF_INTEREST_LOGPROB)
results_df.replace(to_replace=[None], value="None", inplace=True)

estimators = [
    "MixtureDensityNetwork",
    "KernelMixtureNetwork",
    "NormalizingFlowEstimator"
]
simulators = ["EconDensity", "GaussianMixture", "SkewNormal"]

# rules of thumb
for estimator in estimators:
    plot_dict = dict(
        [
            (
Ejemplo n.º 6
0
import pickle
import os

EXP_PREFIX = "question5_benchmark"
RESULTS_FILE = "results.pkl"
CLUSTER_DIR = "/home/simon/Documents/KIT/Informatik/Bachelorarbeit/Conditional_Density_Estimation/data/cluster"
LOCATION = "{}/{}/{}".format(CLUSTER_DIR, EXP_PREFIX, RESULTS_FILE)

logger.configure(
    "/home/simon/Documents/KIT/Informatik/Bachelorarbeit/Conditional_Density_Estimation/data/cluster",
    EXP_PREFIX,
)

with open(LOCATION, "rb") as fh:
    results_from_pkl_file = pickle.load(fh)
gof_result = GoodnessOfFitResults(single_results_dict=results_from_pkl_file)
results_df = gof_result.generate_results_dataframe(
    base_experiment.KEYS_OF_INTEREST)
results_df.replace(to_replace=[None], value="None", inplace=True)

estimators = [
    "MixtureDensityNetwork",
    "KernelMixtureNetwork",
    "NormalizingFlowEstimator",
]
simulators = ["ArmaJump", "EconDensity", "GaussianMixture", "SkewNormal"]

plot_dict = dict([
    (
        simulator,
        {
Ejemplo n.º 7
0

#
def _resize_plots(fig):
    fig.axes[0].set_ylim((-3, -1.9))
    #fig.axes[1].set_ylim((-7, -4.5))
    fig.axes[3].set_ylim((1.0, 1.63))


# results_df.to_csv('/home/jonasrothfuss/Dropbox/Eigene_Dateien/ETH/02_Projects/02_Noise_Regularization/02_Code_Conditional_Density_Estimation/'
#                   'data/local/question7_regularization_logprob/results.csv')
results_df = pd.read_csv(
    '/home/jonasrothfuss/Dropbox/Eigene_Dateien/ETH/02_Projects/02_Noise_Regularization/02_Code_Conditional_Density_Estimation/data/local/question7_regularization_logprob/results.csv'
)

gof_result = GoodnessOfFitResults(single_results_dict=[])
gof_result.results_df = results_df

# rules of thumb
# for estimator in estimators:
#     plot_dict = dict(
#         [
#             (
#                 simulator,
#                 {
#                     "rule_of_thumb_1.0": {
#                         "simulator": simulator,
#                         "estimator": estimator,
#                         "adaptive_noise_fn": "rule_of_thumb_1.00"
#                     },
#                     "rule_of_thumb_0.7": {
    }

    observations = 100 * np.logspace(0, 6, num=7, base=2.0, dtype=np.int32)

    return estimator_params, simulators_params, observations


if __name__ == '__main__':
    estimator_params, simulators_params, observations = question2()
    load = base_experiment.launch_experiment(estimator_params,
                                             simulators_params, observations,
                                             EXP_PREFIX)

    if load:
        results_from_pkl_file = dict(logger.load_pkl_log(RESULTS_FILE))
        gof_result = GoodnessOfFitResults(
            single_results_dict=results_from_pkl_file)
        results_df = gof_result.generate_results_dataframe(
            base_experiment.KEYS_OF_INTEREST)

        graph_dicts = [{
            "estimator": "KernelMixtureNetwork",
            "entropy_reg_coef": 0.001,
            "n_centers": 20
        }, {
            "estimator": "KernelMixtureNetwork",
            "entropy_reg_coef": 0.01,
            "n_centers": 20
        }, {
            "estimator": "KernelMixtureNetwork",
            "entropy_reg_coef": 0.1,
            "n_centers": 20
plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize

## SKEW Normal

EXP_PREFIX = 'question4_benchmark_skew_NF'
RESULTS_FILE = 'results.pkl'

if not os.path.isdir(EXP_PREFIX):
    os.makedirs(EXP_PREFIX)

logger.configure(
  '/home/simon/Documents/KIT/Informatik/Bachelorarbeit/Conditional_Density_Estimation/data/cluster',
  EXP_PREFIX)

results_from_pkl_file = dict(logger.load_pkl_log(RESULTS_FILE))
gof_result = GoodnessOfFitResults(single_results_dict=results_from_pkl_file)
results_df = gof_result.generate_results_dataframe(base_experiment.KEYS_OF_INTEREST + ['bandwidth'])


plot_dict = dict([(simulator,
                   {"MDN": {"simulator": simulator, "estimator": "MixtureDensityNetwork", "x_noise_std": 0.1},
                     "KMN": {"simulator": simulator, "estimator": "KernelMixtureNetwork", "x_noise_std": 0.1},
                     "LSCDE": {"simulator": simulator, "estimator": "LSConditionalDensityEstimation"},
                     "CKDE": {"simulator": simulator, "estimator": "ConditionalKernelDensityEstimation", "bandwidth": "normal_reference"},
                     "CKDE_CV": {"simulator": simulator, "estimator": "ConditionalKernelDensityEstimation", "bandwidth": "cv_ml"},
                     "NKDE": {"simulator": simulator, "estimator": "NeighborKernelDensityEstimation"},
                    "NF": {"simulator": simulator, "estimator": "NormalizingFlowEstimator",}
                    }) for simulator in ["EconDensity", "ArmaJump", "SkewNormal"]])

fig = gof_result.plot_metric(plot_dict, metric="hellinger_distance", figsize=(15, 5.5))
from cde.model_fitting.GoodnessOfFitResults import GoodnessOfFitResults
from cde.evaluation.simulation_eval import base_experiment
import cde.model_fitting.ConfigRunner as ConfigRunner
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

EXP_PREFIX = 'question2_entropy_reg'
RESULTS_FILE = 'results.pkl'

logger.configure(
    '/home/jonasrothfuss/Dropbox/Eigene_Dateien/Uni/WS17_18/Density_Estimation/Nonparametric_Density_Estimation/data/cluster',
    EXP_PREFIX)

results_from_pkl_file = dict(logger.load_pkl_log(RESULTS_FILE))
gof_result = GoodnessOfFitResults(single_results_dict=results_from_pkl_file)
results_df = gof_result.generate_results_dataframe(
    base_experiment.KEYS_OF_INTEREST + ['entropy_reg_coef'])

#gof_result = ConfigRunner.load_dumped_estimators(gof_result, task_id=[5])

SMALL_SIZE = 11
MEDIUM_SIZE = 12
LARGE_SIZE = 16
TITLE_SIZE = 20

LINEWIDTH = 6

plt.rc('font', size=SMALL_SIZE)  # controls default text sizes
plt.rc('axes', titlesize=LARGE_SIZE)  # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)  # fontsize of the x and y labels
Ejemplo n.º 11
0
import cde.model_fitting.ConfigRunner as ConfigRunner
import matplotlib.pyplot as plt
import numpy as np

from matplotlib.pyplot import cm
import pandas as pd

EXP_PREFIX = 'question3_KDE'
RESULTS_FILE = 'results.pkl'

logger.configure(
    '/home/jonasrothfuss/Dropbox/Eigene_Dateien/Uni/WS17_18/Density_Estimation/Nonparametric_Density_Estimation/data/cluster',
    EXP_PREFIX)

results_from_pkl_file = dict(logger.load_pkl_log(RESULTS_FILE))
gof_result = GoodnessOfFitResults(single_results_dict=results_from_pkl_file)
results_df = gof_result.generate_results_dataframe(
    base_experiment.KEYS_OF_INTEREST + ['bandwidth_selection'])

#gof_result = ConfigRunner.load_dumped_estimators(gof_result, task_id=[5])

SMALL_SIZE = 11
MEDIUM_SIZE = 12
LARGE_SIZE = 16
TITLE_SIZE = 20

LINEWIDTH = 6

plt.rc('font', size=SMALL_SIZE)  # controls default text sizes
plt.rc('axes', titlesize=LARGE_SIZE)  # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)  # fontsize of the x and y labels
Ejemplo n.º 12
0
EXP_PREFIX = "question8_benchmark"
RESULTS_FILE = "results.pkl"
CLUSTER_DIR = "/local/rojonas/cde/data/local"
LOCATION = "{}/{}/{}".format(CLUSTER_DIR, EXP_PREFIX, RESULTS_FILE)
DATA_DIR_LOCAL = "/home/jonasrothfuss/Dropbox/Eigene_Dateien/ETH/02_Projects/02_Noise_Regularization/02_Code_Conditional_Density_Estimation/data/cluster"

logger.configure(
    #"/local/rojonas/cde/data/local",
    DATA_DIR_LOCAL,
    #CLUSTER_DIR,
    EXP_PREFIX,
)

results_from_pkl_file = dict(logger.load_pkl_log(RESULTS_FILE))
gof_result = GoodnessOfFitResults(single_results_dict=results_from_pkl_file)
results_df = gof_result.generate_results_dataframe(base_experiment.KEYS_OF_INTEREST_LOGPROB + ["bandwidth", "param_selection"])
results_df.replace(to_replace=[None], value="None", inplace=True)

# seprarate 2d and 4d GMM
results_df.index = list(range(len(results_df)))

for i, row in results_df[['simulator', 'ndim_y']].iterrows():
    if row['simulator'] == 'GaussianMixture':
        results_df.at[i, 'simulator'] = '%s_%id'%(row['simulator'], row['ndim_y'])

estimators = [
    "MixtureDensityNetwork",
    "KernelMixtureNetwork",
    "NormalizingFlowEstimator",
    "ConditionalKernelDensityEstimation",