def __init__(self, exp_prefix, est_params, sim_params, observations, keys_of_interest, n_mc_samples=10**7, n_x_cond=5, n_seeds=5, use_gpu=True, tail_measures=True): assert est_params and exp_prefix and sim_params and keys_of_interest assert observations.all() # every simulator configuration will be run multiple times with different randomness seeds sim_params = _add_seeds_to_sim_params(n_seeds, sim_params) self.observations = observations self.n_mc_samples = n_mc_samples self.n_x_cond = n_x_cond self.keys_of_interest = keys_of_interest self.exp_prefix = exp_prefix self.use_gpu = use_gpu self.tail_measures = tail_measures logger.configure(log_directory=config.DATA_DIR, prefix=exp_prefix, color='green') ''' ---------- Either load or generate the configs ----------''' config_pkl_path = os.path.join(logger.log_directory, logger.prefix, EXP_CONFIG_FILE) if os.path.isfile(config_pkl_path): logger.log("{:<70s} {:<30s}".format( "Loading experiment previous configs from file: ", config_pkl_path)) self.configs = logger.load_pkl(EXP_CONFIG_FILE) else: logger.log("{:<70s} {:<30s}".format( "Generating and storing experiment configs under: ", config_pkl_path)) self.configs = self._generate_configuration_variants( est_params, sim_params) logger.dump_pkl(data=self.configs, path=EXP_CONFIG_FILE) ''' ---------- Either load already existing results or start a new result collection ---------- ''' results_pkl_path = os.path.join(logger.log_directory, logger.prefix, RESULTS_FILE) if os.path.isfile(results_pkl_path): logger.log_line("{:<70s} {:<30s}".format("Continue with: ", results_pkl_path)) self.gof_single_res_collection = dict( logger.load_pkl_log(RESULTS_FILE)) else: # start from scratch self.gof_single_res_collection = {} self.gof_results = GoodnessOfFitResults(self.gof_single_res_collection)
def load_dumped_estimator(dict_entry): assert len(dict_entry) == 1 if type(dict_entry) == dict and len(dict_entry) == 1: dict_entry = list(dict_entry.values())[0] with tf.Session(graph=tf.Graph()) as sess: dict_entry.estimator = logger.load_pkl("model_dumps/" + dict_entry.task_name + ".pkl") print("loaded estimator for entry " + dict_entry.task_name) return dict_entry
def test_store_load_configrunner_pipeline(self): logger.configure(log_directory=config.DATA_DIR, prefix=EXP_PREFIX) test_dir = os.path.join(logger.log_directory, logger.prefix) if os.path.exists(test_dir): shutil.rmtree(test_dir) keys_of_interest = ['task_name', 'estimator', 'simulator', 'n_observations', 'center_sampling_method', 'x_noise_std', 'y_noise_std', 'ndim_x', 'ndim_y', 'n_centers', "n_mc_samples", "n_x_cond", 'mean_est', 'cov_est', 'mean_sim', 'cov_sim', 'kl_divergence', 'hellinger_distance', 'js_divergence', 'x_cond', 'random_seed', "mean_sim", "cov_sim", "mean_abs_diff", "cov_abs_diff", "VaR_sim", "VaR_est", "VaR_abs_diff", "CVaR_sim", "CVaR_est", "CVaR_abs_diff", "time_to_fit"] conf_est, conf_sim, observations = question1() conf_runner = ConfigRunner(EXP_PREFIX, conf_est, conf_sim, observations=observations, keys_of_interest=keys_of_interest, n_mc_samples=1 * 10 ** 2, n_x_cond=5, n_seeds=5) conf_runner.configs = random.sample(conf_runner.configs, NUM_CONFIGS_TO_TEST) conf_runner.run_configurations(dump_models=True, multiprocessing=False) results_from_pkl_file = dict({logger.load_pkl(RESULTS_FILE)}) """ check if model dumps have all been created """ dump_dir = os.path.join(logger.log_directory, logger.prefix, 'model_dumps') model_dumps_list = os.listdir(dump_dir) # get list of all model files model_dumps_list_no_suffix = [os.path.splitext(entry)[0] for entry in model_dumps_list] # remove suffix for conf in conf_runner.configs: self.assertTrue(conf['task_name'] in model_dumps_list_no_suffix) """ check if model dumps can be used successfully""" for model_dump_i in model_dumps_list: #tf.reset_default_graph() with tf.Session(graph=tf.Graph()): model = logger.load_pkl("model_dumps/"+model_dump_i) self.assertTrue(model) if model.ndim_x == 1 and model.ndim_y == 1: self.assertTrue(model.plot3d(show=False))
def test_log_data(setup): import numpy d1 = numpy.random.randn(20, 10) logger.log_data(d1, 'test_file.pkl') sleep(1.0) d2 = numpy.random.randn(20, 10) logger.log_data(d2, 'test_file.pkl', overwrite=True) sleep(1.0) data = logger.load_pkl('test_file.pkl') assert len(data) == 1, "data should contain only one array because we overwrote it." assert numpy.array_equal(data[0], d2), "first should be the same as d2"
def test_save_pkl_abs_path(setup): import numpy d1 = numpy.random.randn(20, 10) logger.save_pkl(d1, "/tmp/ml-logger-test/test_file_1.pkl") sleep(0.1) data = logger.load_pkl("/tmp/ml-logger-test/test_file_1.pkl") assert len( data ) == 1, "data should contain only one array because we overwrote it." assert numpy.array_equal(data[0], d1), "first should be the same as d2"
def test_load_pkl(setup): import numpy d1 = numpy.random.randn(20, 10) logger.log_data(d1, 'test_file.pkl') sleep(1.0) d2 = numpy.random.randn(20, 10) logger.log_data(d2, 'test_file.pkl') sleep(1.0) data = logger.load_pkl('test_file.pkl') assert len(data) == 2, "data should contain two arrays" assert numpy.array_equal(data[0], d1), "first should be the same as d1" assert numpy.array_equal(data[1], d2), "first should be the same as d2"
def test_read_lambda(): """veryfy that the object is correct""" from ml_logger import logger data, = logger.load_pkl("./test_data.pkl") print(*[f"{k}: {type(v)} - {str(v)}" for k, v in data.items()], sep="\n")
def test_load_module(setup, test_module): result, = logger.load_pkl(f"modules/{0:04d}_Test.pkl") import numpy as np assert (result['var_1'] == np.ones( [100, 2])).all(), "should be the same as test data"