def __init__(self,
                 exp_prefix,
                 est_params,
                 sim_params,
                 observations,
                 keys_of_interest,
                 n_mc_samples=10**7,
                 n_x_cond=5,
                 n_seeds=5,
                 use_gpu=True,
                 tail_measures=True):

        assert est_params and exp_prefix and sim_params and keys_of_interest
        assert observations.all()

        # every simulator configuration will be run multiple times with different randomness seeds
        sim_params = _add_seeds_to_sim_params(n_seeds, sim_params)

        self.observations = observations
        self.n_mc_samples = n_mc_samples
        self.n_x_cond = n_x_cond
        self.keys_of_interest = keys_of_interest
        self.exp_prefix = exp_prefix
        self.use_gpu = use_gpu
        self.tail_measures = tail_measures

        logger.configure(log_directory=config.DATA_DIR,
                         prefix=exp_prefix,
                         color='green')
        ''' ---------- Either load or generate the configs ----------'''
        config_pkl_path = os.path.join(logger.log_directory, logger.prefix,
                                       EXP_CONFIG_FILE)

        if os.path.isfile(config_pkl_path):
            logger.log("{:<70s} {:<30s}".format(
                "Loading experiment previous configs from file: ",
                config_pkl_path))
            self.configs = logger.load_pkl(EXP_CONFIG_FILE)
        else:
            logger.log("{:<70s} {:<30s}".format(
                "Generating and storing experiment configs under: ",
                config_pkl_path))
            self.configs = self._generate_configuration_variants(
                est_params, sim_params)
            logger.dump_pkl(data=self.configs, path=EXP_CONFIG_FILE)
        ''' ---------- Either load already existing results or start a new result collection ---------- '''
        results_pkl_path = os.path.join(logger.log_directory, logger.prefix,
                                        RESULTS_FILE)
        if os.path.isfile(results_pkl_path):
            logger.log_line("{:<70s} {:<30s}".format("Continue with: ",
                                                     results_pkl_path))
            self.gof_single_res_collection = dict(
                logger.load_pkl_log(RESULTS_FILE))

        else:  # start from scratch
            self.gof_single_res_collection = {}

        self.gof_results = GoodnessOfFitResults(self.gof_single_res_collection)
Example #2
0
def test_log_data(setup):
    import numpy
    d1 = numpy.random.randn(20, 10)
    logger.log_pkl(d1, 'test_file.pkl')
    sleep(1.0)
    d2 = numpy.random.randn(20, 10)
    logger.dump_pkl(d2, 'test_file.pkl')
    sleep(1.0)

    data = logger.load_pkl_log('test_file.pkl')
    assert len(
        data
    ) == 1, "data should contain only one array because we overwrote it."
    assert numpy.array_equal(data[0], d2), "first should be the same as d2"
    def _run_single_task(self, i, task):
        start_time = time.time()
        try:
            task_hash = _hash_task_dict(
                task)  # generate SHA256 hash of task dict as identifier

            # skip task if it has already been completed
            if task_hash in self.gof_single_res_collection.keys():
                logger.log("Task {:<1} {:<63} {:<10} {:<1} {:<1} {:<1}".format(
                    i + 1, "has already been completed:", "Estimator:",
                    task['estimator_name'], " Simulator: ",
                    task["simulator_name"]))
                return None

            # run task when it has not been completed
            else:
                logger.log("Task {:<1} {:<63} {:<10} {:<1} {:<1} {:<1}".format(
                    i + 1, "running:", "Estimator:", task['estimator_name'],
                    " Simulator: ", task["simulator_name"]))

                tf.reset_default_graph()
                ''' build simulator and estimator model given the specified configurations '''

                simulator = globals()[task['simulator_name']](
                    **task['simulator_config'])

                t = time.time()
                estimator = globals()[task['estimator_name']](
                    task['task_name'], simulator.ndim_x, simulator.ndim_y,
                    **task['estimator_config'])
                time_to_initialize = time.time() - t

                # if desired hide gpu devices
                if not self.use_gpu:
                    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

                with tf.Session() as sess:
                    sess.run(tf.global_variables_initializer())
                    ''' train the model '''
                    gof = GoodnessOfFit(estimator=estimator,
                                        probabilistic_model=simulator,
                                        X=task['X'],
                                        Y=task['Y'],
                                        n_observations=task['n_obs'],
                                        n_mc_samples=task['n_mc_samples'],
                                        x_cond=task['x_cond'],
                                        task_name=task['task_name'],
                                        tail_measures=self.tail_measures)

                    t = time.time()
                    gof.fit_estimator(print_fit_result=True)
                    time_to_fit = time.time() - t

                    if self.dump_models:
                        logger.dump_pkl(data=gof.estimator,
                                        path="model_dumps/{}.pkl".format(
                                            task['task_name']))
                        logger.dump_pkl(data=gof.probabilistic_model,
                                        path="model_dumps/{}.pkl".format(
                                            task['task_name'] + "_simulator"))
                    ''' perform tests with the fitted model '''
                    t = time.time()
                    gof_results = gof.compute_results()
                    time_to_evaluate = time.time() - t

                    gof_results.task_name = task['task_name']

                    gof_results.hash = task_hash

                logger.log_pkl(data=(task_hash, gof_results),
                               path=RESULTS_FILE)
                logger.flush(file_name=RESULTS_FILE)
                del gof_results

                task_duration = time.time() - start_time
                logger.log(
                    "Finished task {:<1} in {:<1.4f} {:<43} {:<10} {:<1} {:<1} {:<2} | {:<1} {:<1.2f} {:<1} {:<1.2f} {:<1} {:<1.2f}"
                    .format(i + 1, task_duration, "sec:", "Estimator:",
                            task['estimator_name'], " Simulator: ",
                            task["simulator_name"], "t_init:",
                            time_to_initialize, "t_fit:", time_to_fit,
                            "t_eval:", time_to_evaluate))

        except Exception as e:
            logger.log("error in task: ", str(i + 1))
            logger.log(str(e))
            traceback.print_exc()