def __init__(self, exp_prefix, est_params, sim_params, observations, keys_of_interest, n_mc_samples=10**7, n_x_cond=5, n_seeds=5, use_gpu=True, tail_measures=True): assert est_params and exp_prefix and sim_params and keys_of_interest assert observations.all() # every simulator configuration will be run multiple times with different randomness seeds sim_params = _add_seeds_to_sim_params(n_seeds, sim_params) self.observations = observations self.n_mc_samples = n_mc_samples self.n_x_cond = n_x_cond self.keys_of_interest = keys_of_interest self.exp_prefix = exp_prefix self.use_gpu = use_gpu self.tail_measures = tail_measures logger.configure(log_directory=config.DATA_DIR, prefix=exp_prefix, color='green') ''' ---------- Either load or generate the configs ----------''' config_pkl_path = os.path.join(logger.log_directory, logger.prefix, EXP_CONFIG_FILE) if os.path.isfile(config_pkl_path): logger.log("{:<70s} {:<30s}".format( "Loading experiment previous configs from file: ", config_pkl_path)) self.configs = logger.load_pkl(EXP_CONFIG_FILE) else: logger.log("{:<70s} {:<30s}".format( "Generating and storing experiment configs under: ", config_pkl_path)) self.configs = self._generate_configuration_variants( est_params, sim_params) logger.dump_pkl(data=self.configs, path=EXP_CONFIG_FILE) ''' ---------- Either load already existing results or start a new result collection ---------- ''' results_pkl_path = os.path.join(logger.log_directory, logger.prefix, RESULTS_FILE) if os.path.isfile(results_pkl_path): logger.log_line("{:<70s} {:<30s}".format("Continue with: ", results_pkl_path)) self.gof_single_res_collection = dict( logger.load_pkl_log(RESULTS_FILE)) else: # start from scratch self.gof_single_res_collection = {} self.gof_results = GoodnessOfFitResults(self.gof_single_res_collection)
def test_log_data(setup): import numpy d1 = numpy.random.randn(20, 10) logger.log_pkl(d1, 'test_file.pkl') sleep(1.0) d2 = numpy.random.randn(20, 10) logger.dump_pkl(d2, 'test_file.pkl') sleep(1.0) data = logger.load_pkl_log('test_file.pkl') assert len( data ) == 1, "data should contain only one array because we overwrote it." assert numpy.array_equal(data[0], d2), "first should be the same as d2"
def _run_single_task(self, i, task): start_time = time.time() try: task_hash = _hash_task_dict( task) # generate SHA256 hash of task dict as identifier # skip task if it has already been completed if task_hash in self.gof_single_res_collection.keys(): logger.log("Task {:<1} {:<63} {:<10} {:<1} {:<1} {:<1}".format( i + 1, "has already been completed:", "Estimator:", task['estimator_name'], " Simulator: ", task["simulator_name"])) return None # run task when it has not been completed else: logger.log("Task {:<1} {:<63} {:<10} {:<1} {:<1} {:<1}".format( i + 1, "running:", "Estimator:", task['estimator_name'], " Simulator: ", task["simulator_name"])) tf.reset_default_graph() ''' build simulator and estimator model given the specified configurations ''' simulator = globals()[task['simulator_name']]( **task['simulator_config']) t = time.time() estimator = globals()[task['estimator_name']]( task['task_name'], simulator.ndim_x, simulator.ndim_y, **task['estimator_config']) time_to_initialize = time.time() - t # if desired hide gpu devices if not self.use_gpu: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ''' train the model ''' gof = GoodnessOfFit(estimator=estimator, probabilistic_model=simulator, X=task['X'], Y=task['Y'], n_observations=task['n_obs'], n_mc_samples=task['n_mc_samples'], x_cond=task['x_cond'], task_name=task['task_name'], tail_measures=self.tail_measures) t = time.time() gof.fit_estimator(print_fit_result=True) time_to_fit = time.time() - t if self.dump_models: logger.dump_pkl(data=gof.estimator, path="model_dumps/{}.pkl".format( task['task_name'])) logger.dump_pkl(data=gof.probabilistic_model, path="model_dumps/{}.pkl".format( task['task_name'] + "_simulator")) ''' perform tests with the fitted model ''' t = time.time() gof_results = gof.compute_results() time_to_evaluate = time.time() - t gof_results.task_name = task['task_name'] gof_results.hash = task_hash logger.log_pkl(data=(task_hash, gof_results), path=RESULTS_FILE) logger.flush(file_name=RESULTS_FILE) del gof_results task_duration = time.time() - start_time logger.log( "Finished task {:<1} in {:<1.4f} {:<43} {:<10} {:<1} {:<1} {:<2} | {:<1} {:<1.2f} {:<1} {:<1.2f} {:<1} {:<1.2f}" .format(i + 1, task_duration, "sec:", "Estimator:", task['estimator_name'], " Simulator: ", task["simulator_name"], "t_init:", time_to_initialize, "t_fit:", time_to_fit, "t_eval:", time_to_evaluate)) except Exception as e: logger.log("error in task: ", str(i + 1)) logger.log(str(e)) traceback.print_exc()