def test_6(self):
        """ Test short estimation tasks.
        """
        num_agents = np.random.randint(5, 100)
        constr = {
            "simulation": {
                "agents": num_agents
            },
            "num_periods": np.random.randint(1, 4),
            "estimation": {
                "maxfun": np.random.randint(0, 5),
                "agents": num_agents
            },
        }

        # Simulate a dataset

        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)

        write_interpolation_grid(respy_obj)

        # Run estimation task.
        simulate_observed(respy_obj)
        base_x, base_val = respy_obj.fit()

        # We also check whether updating the class instance and a single evaluation of
        # the criterion function give the same result.
        respy_obj.update_optim_paras(base_x)
        respy_obj.attr["maxfun"] = 0

        alt_x, alt_val = respy_obj.fit()

        for arg in [(alt_val, base_val), (alt_x, base_x)]:
            np.testing.assert_almost_equal(arg[0], arg[1])
    def test_4(self):
        """ Test the evaluation of the criterion function for random requests, not just
        at the true values.
        """
        # Constraints that ensure that two alternative initialization files can be used
        # for the same simulated data.
        num_agents = np.random.randint(5, 100)
        constr = {
            "simulation": {
                "agents": num_agents
            },
            "num_periods": np.random.randint(1, 4),
            "edu_spec": {
                "start": [7],
                "max": 15,
                "share": [1.0]
            },
            "estimation": {
                "maxfun": 0,
                "agents": num_agents
            },
        }

        # Simulate a dataset
        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)
        simulate_observed(respy_obj)

        # Evaluate at different points, ensuring that the simulated dataset still fits.
        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)
        respy_obj.fit()
    def test_2(self):
        """ This test ensures that the record files are identical.
        """
        # Generate random initialization file. The number of periods is higher than
        # usual as only FORTRAN implementations are used to solve the random request.
        # This ensures that also some cases of interpolation are explored.
        constr = {
            "program": {
                "version": "fortran"
            },
            "num_periods": np.random.randint(3, 10),
            "estimation": {
                "maxfun": 0
            },
        }

        params_spec, options_spec = generate_random_model(point_constr=constr)

        base_sol_log, base_est_info_log = None, None
        base_est_log = None

        for is_parallel in [False, True]:

            options_spec["program"]["threads"] = 1
            options_spec["program"]["procs"] = 1

            if is_parallel:
                if IS_PARALLELISM_OMP:
                    options_spec["program"]["threads"] = np.random.randint(
                        2, 5)
                if IS_PARALLELISM_MPI:
                    options_spec["program"]["procs"] = np.random.randint(2, 5)

            respy_obj = RespyCls(params_spec, options_spec)

            file_sim = respy_obj.get_attr("file_sim")

            simulate_observed(respy_obj)

            respy_obj.fit()

            # Check for identical records
            fname = file_sim + ".respy.sol"

            if base_sol_log is None:
                base_sol_log = open(fname, "r").read()

            assert open(fname, "r").read() == base_sol_log

            if base_est_info_log is None:
                base_est_info_log = open("est.respy.info", "r").read()
            assert open("est.respy.info", "r").read() == base_est_info_log

            if base_est_log is None:
                base_est_log = open("est.respy.log", "r").readlines()
            compare_est_log(base_est_log)
    def test_9(self):
        """ This test just locks in the evaluation of the criterion function for the
        original Keane & Wolpin data. We create an additional initialization files that
        include numerous types and initial conditions.

        """
        # This ensures that the experience effect is taken care of properly.
        open(".restud.respy.scratch", "w").close()

        kw_spec, result = random.choice([
            ("kw_data_one", 10.45950941513551),
            ("kw_data_two", 45.04552402391903),
            ("kw_data_three", 74.28253652773714),
            ("kw_data_one_types", 9.098738585839529),
            ("kw_data_one_initial", 7.965979149372883),
        ])

        base_path = TEST_RESOURCES_DIR / kw_spec

        # Evaluate criterion function at true values.
        respy_obj = RespyCls(base_path.with_suffix(".csv"),
                             base_path.with_suffix(".json"))

        respy_obj.unlock()
        respy_obj.set_attr("maxfun", 0)
        respy_obj.lock()

        simulate_observed(respy_obj, is_missings=False)

        _, val = respy_obj.fit()
        np.testing.assert_allclose(val, result)
    def test_5(self):
        """ Test the scripts.
        """
        # Constraints that ensure that two alternative initialization files can be used
        # for the same simulated data.
        for _ in range(10):
            num_agents = np.random.randint(5, 100)
            constr = {
                "simulation": {
                    "agents": num_agents
                },
                "num_periods": np.random.randint(1, 4),
                "edu_spec": {
                    "start": [7],
                    "max": 15,
                    "share": [1.0]
                },
                "estimation": {
                    "maxfun": 0,
                    "agents": num_agents
                },
            }
            # Simulate a dataset
            params_spec, options_spec = generate_random_model(
                point_constr=constr)
            respy_obj = RespyCls(params_spec, options_spec)
            simulate_observed(respy_obj)

            # Create output to process a baseline.
            respy_obj.unlock()
            respy_obj.set_attr("maxfun", 0)
            respy_obj.lock()

            respy_obj.fit()

            # Potentially evaluate at different points.
            params_spec, options_spec = generate_random_model(
                point_constr=constr)
            respy_obj = RespyCls(params_spec, options_spec)

            single = np.random.choice([True, False])

            scripts_check("estimate", respy_obj)
            scripts_estimate(single, respy_obj)
    def test_2(self):
        """Ensure that the evaluation of the criterion is equal across versions."""
        max_draws = np.random.randint(10, 100)

        # It seems to be important that max_draws and max_agents is the same
        # number because otherwise some functions that read draws from a file
        # to ensure compatibility of fortran and python versions won't work.
        bound_constr = {"max_draws": max_draws, "max_agents": max_draws}

        point_constr = {
            "interpolation": {"flag": False},
            "program": {"procs": 1, "threads": 1, "version": "python"},
            "estimation": {"maxfun": 0},
        }

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr, bound_constr=bound_constr
        )
        respy_obj = RespyCls(params_spec, options_spec)

        num_agents_sim, optim_paras = dist_class_attributes(
            respy_obj, "num_agents_sim", "optim_paras"
        )

        type_shares = optim_paras["type_shares"]

        # Simulate a dataset
        simulate_observed(respy_obj)

        # Iterate over alternative implementations
        base_x, base_val = None, None

        num_periods = options_spec["num_periods"]

        write_draws(num_periods, max_draws)
        write_types(type_shares, num_agents_sim)

        for version in ["python", "fortran"]:

            respy_obj.unlock()

            respy_obj.set_attr("version", version)

            respy_obj.lock()

            x, val = respy_obj.fit()

            # Check for the returned parameters.
            if base_x is None:
                base_x = x
            np.testing.assert_allclose(base_x, x)

            # Check for the value of the criterion function.
            if base_val is None:
                base_val = val
            np.testing.assert_allclose(base_val, val)
def create_single(idx):
    """ This function creates a single test.
    """
    dirname = get_random_dirname(5)
    os.mkdir(dirname)
    os.chdir(dirname)

    # The late import is required so a potentially just compiled FORTRAN implementation
    # is recognized. This is important for the creation of the regression vault as we
    # want to include FORTRAN use cases.
    from respy import RespyCls

    # We impose a couple of constraints that make the requests manageable.
    np.random.seed(idx)

    version = np.random.choice(["python", "fortran"])

    # only choose from constraint optimizers because we always have some bounds
    if version == "python":
        optimizer = "SCIPY-LBFGSB"
    else:
        optimizer = "FORTE-BOBYQA"

    constr = {
        "program": {
            "version": version
        },
        "preconditioning": {
            "type": np.random.choice(["identity", "magnitudes"])
        },
        "estimation": {
            "maxfun":
            int(np.random.choice(range(6), p=[0.5, 0.1, 0.1, 0.1, 0.1, 0.1])),
            "optimizer":
            optimizer,
        },
    }
    constr["flag_estimation"] = True

    param_spec, options_spec = generate_random_model(point_constr=constr)
    respy_obj = RespyCls(param_spec, options_spec)
    simulate_observed(respy_obj)
    crit_val = respy_obj.fit()[1]

    # In rare instances, the value of the criterion function might be too large and thus
    # printed as a string. This occurred in the past, when the gradient preconditioning
    # had zero probability observations. We now generate random initialization files
    # with smaller gradient step sizes.
    if not isinstance(crit_val, float):
        raise AssertionError(" ... value of criterion function too large.")

    # Cleanup of temporary directories.from
    os.chdir("../")
    shutil.rmtree(dirname)

    return respy_obj.attr, crit_val
def check_single(tests, idx):
    """ This function checks a single test from the dictionary.
    """
    # Distribute test information.
    attr, crit_val = tests[idx]

    if not IS_PARALLELISM_OMP or not IS_FORTRAN:
        attr["num_threads"] = 1

    if not IS_PARALLELISM_MPI or not IS_FORTRAN:
        attr["num_procs"] = 1

    if not IS_FORTRAN:
        attr["version"] = "python"

    # In the past we also had the problem that some of the testing machines report
    # selective failures when the regression vault was created on another machine.
    msg = " ... test is known to fail on this machine"
    if "zeus" in socket.gethostname() and idx in []:
        print(msg)
        return None
    if "acropolis" in socket.gethostname() and idx in []:
        print(msg)
        return None
    if "pontos" in socket.gethostname() and idx in []:
        print(msg)
        return None

    # We need to create an temporary directory, so the multiprocessing does not
    # interfere with any of the files that are printed and used during the small
    # estimation request.
    dirname = get_random_dirname(5)
    os.mkdir(dirname)
    os.chdir(dirname)

    # The late import is required so a potentially just compiled FORTRAN implementation
    # is recognized. This is important for the creation of the regression vault as we
    # want to include FORTRAN use cases.
    from respy import RespyCls

    params_spec = _params_spec_from_attributes(attr)
    options_spec = _options_spec_from_attributes(attr)
    respy_obj = RespyCls(params_spec, options_spec)

    simulate_observed(respy_obj)

    est_val = respy_obj.fit()[1]

    is_success = np.isclose(est_val, crit_val, rtol=TOL, atol=TOL)

    # Cleanup of temporary directories.from
    os.chdir("../")
    shutil.rmtree(dirname)

    return is_success
    def test_3(self):
        """ Testing whether the a simulated dataset and the evaluation of the criterion function
        are the same for a tiny delta and a myopic agent.
        """
        constr = {"estimation": {"maxfun": 0}}
        params_spec, options_spec = generate_random_model(point_constr=constr,
                                                          myopic=True)
        respy_obj = RespyCls(params_spec, options_spec)

        optim_paras, num_agents_sim, edu_spec = dist_class_attributes(
            respy_obj, "optim_paras", "num_agents_sim", "edu_spec")

        write_types(optim_paras["type_shares"], num_agents_sim)
        write_edu_start(edu_spec, num_agents_sim)
        write_lagged_start(num_agents_sim)

        # Iterate over alternative discount rates.
        base_data, base_val = None, None

        for delta in [0.00, 0.000001]:

            respy_obj = RespyCls(params_spec, options_spec)

            respy_obj.unlock()

            respy_obj.attr["optim_paras"]["delta"] = np.array([delta])

            respy_obj.lock()

            simulate_observed(respy_obj)

            # This parts checks the equality of simulated dataset for the different
            # versions of the code.
            data_frame = pd.read_csv("data.respy.dat", delim_whitespace=True)

            if base_data is None:
                base_data = data_frame.copy()

            assert_frame_equal(base_data, data_frame)

            # This part checks the equality of an evaluation of the criterion function.
            _, crit_val = respy_obj.fit()

            if base_val is None:
                base_val = crit_val

            np.testing.assert_allclose(base_val,
                                       crit_val,
                                       rtol=1e-03,
                                       atol=1e-03)
Example #10
0
def run(hours):

    start, timeout = datetime.now(), timedelta(hours=hours)

    count = 0
    while True:
        print("COUNT", count)
        count += 1
        # Generate random initialization file
        constr = {
            "program": {
                "version": "fortran"
            },
            "estimation": {
                "maxfun": np.random.randint(0, 50),
                "optimizer": "FORT-BOBYQA",
            },
        }
        params_spec, options_spec = generate_random_model(point_constr=constr)

        base = None
        for is_parallel in [True, False]:

            if is_parallel is False:
                options_spec["program"]["threads"] = 1
                options_spec["program"]["procs"] = 1
            else:
                if IS_PARALLELISM_OMP:
                    options_spec["program"]["threads"] = np.random.randint(
                        2, 5)
                if IS_PARALLELISM_MPI:
                    options_spec["program"]["procs"] = np.random.randint(2, 5)

            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)
            _, crit_val = respy_obj.fit()

            if base is None:
                base = crit_val
            np.testing.assert_equal(base, crit_val)

        #  Timeout.
        if timeout < datetime.now() - start:
            break
    def test_1(self):
        """Ensure that it makes no difference whether the
        criterion function is evaluated in parallel or not.
        """
        # Generate random initialization file
        constr = {
            "program": {
                "version": "fortran"
            },
            "estimation": {
                "maxfun": np.random.randint(0, 50)
            },
        }

        params_spec, options_spec = generate_random_model(point_constr=constr)

        # If delta is a not fixed, we need to ensure a bound-constraint optimizer.
        # However, this is not the standard flag_estimation as the number of function
        # evaluation is possibly much larger to detect and differences in the updates of
        # the optimizer steps depending on the implementation.
        if params_spec.loc[("delta", "delta"), "fixed"] is False:
            options_spec["estimation"]["optimizer"] = "FORT-BOBYQA"

        base = None
        for is_parallel in [True, False]:
            options_spec["program"]["threads"] = 1
            options_spec["program"]["procs"] = 1

            if is_parallel:
                if IS_PARALLELISM_OMP:
                    options_spec["program"]["threads"] = np.random.randint(
                        2, 5)
                if IS_PARALLELISM_MPI:
                    options_spec["program"]["procs"] = np.random.randint(2, 5)

            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)
            _, crit_val = respy_obj.fit()

            if base is None:
                base = crit_val
            np.testing.assert_equal(base, crit_val)
def test_single_regression(regression_vault, index):
    """Run a single regression test."""
    attr, crit_val = regression_vault[index]

    if not IS_PARALLELISM_OMP or not IS_FORTRAN:
        attr["num_threads"] = 1

    if not IS_PARALLELISM_MPI or not IS_FORTRAN:
        attr["num_procs"] = 1

    if not IS_FORTRAN:
        attr["version"] = "python"
        if attr["optimizer_used"] not in OPT_EST_PYTH:
            attr["optimizer_used"] = OPT_EST_PYTH[2]

    params_spec = _params_spec_from_attributes(attr)
    options_spec = _options_spec_from_attributes(attr)
    respy_obj = RespyCls(params_spec, options_spec)

    simulate_observed(respy_obj)

    est_val = respy_obj.fit()[1]

    assert np.isclose(est_val, crit_val, rtol=TOL, atol=TOL)
Example #13
0
def run(request, is_compile, is_background, is_strict, num_procs):
    """ Run the regression tests.
    """
    if is_compile:
        compile_package(True)

    # We can set up a multiprocessing pool right away.
    mp_pool = mp.Pool(num_procs)

    # The late import is required so a potentially just compiled FORTRAN implementation
    # is recognized. This is important for the creation of the regression vault as we
    # want to include FORTRAN use cases.
    from respy import RespyCls

    # Process command line arguments
    is_creation = False
    is_investigation, is_check = False, False
    num_tests, idx = None, None

    if request[0] == "create":
        is_creation, num_tests = True, int(request[1])
    elif request[0] == "check":
        is_check, num_tests = True, int(request[1])
    elif request[0] == "investigate":
        is_investigation, idx = True, int(request[1])
    else:
        raise AssertionError("request in [create, check. investigate]")
    if num_tests is not None:
        assert num_tests > 0
    if idx is not None:
        assert idx >= 0

    if is_investigation:
        fname = TEST_RESOURCES_DIR / "regression_vault.pickle"
        with open(fname, "rb") as p:
            tests = pickle.load(p)

        attr, crit_val = tests[idx]
        params_spec = _params_spec_from_attributes(attr)
        options_spec = _options_spec_from_attributes(attr)

        respy_obj = RespyCls(params_spec, options_spec)

        simulate_observed(respy_obj)

        result = respy_obj.fit()[1]
        np.testing.assert_almost_equal(result, crit_val, decimal=DECIMALS)

    if is_creation:
        # We maintain the separate execution in the case of a single processor for
        # debugging purposes. The error messages are generally much more informative.
        if num_procs == 1:
            tests = []
            for idx in range(num_tests):
                tests += [create_single(idx)]
        else:
            tests = mp_pool.map(create_single, range(num_tests))

        with open(TEST_RESOURCES_DIR / "regression_vault.pickle", "wb") as p:
            pickle.dump(tests, p)
        return

    if is_check:
        fname = TEST_RESOURCES_DIR / "regression_vault.pickle"
        with open(fname, "rb") as p:
            tests = pickle.load(p)

        run_single = partial(check_single, tests)
        indices = list(range(num_tests))

        # We maintain the separate execution in the case of a single processor for
        # debugging purposes. The error messages are generally much more informative.
        if num_procs == 1:
            ret = []
            for index in indices:
                ret += [run_single(index)]
                # We need an early termination if a strict test run is requested.
                if is_strict and (False in ret):
                    break
        else:
            ret = []
            for chunk in get_chunks(indices, num_procs):
                ret += mp_pool.map(run_single, chunk)
                # We need an early termination if a strict test run is requested. So we
                # check whether there are any failures in the last batch.
                if is_strict and (False in ret):
                    break

        # This allows to call this test from another script, that runs other tests as
        # well.
        idx_failures = [i for i, x in enumerate(ret) if x not in [True, None]]
        is_failure = False in ret

        if len(idx_failures) > 0:
            is_failure = True

        if not is_background:
            send_notification(
                "regression", is_failed=is_failure, idx_failures=idx_failures
            )

        return not is_failure
Example #14
0
def run_robustness_test(seed, is_investigation):
    """Run a single robustness test."""
    passed = True
    error_message = None
    np.random.seed(seed)
    old_dir = os.getcwd()
    t = str(time())[-6:]
    if is_investigation is True:
        new_dir = join(old_dir, str(seed))
        if exists(new_dir):
            rmtree(new_dir)
        os.mkdir(new_dir)
    else:
        new_dir = join(old_dir, str(seed) + "_" + t)
        os.mkdir(new_dir)
    for file in ["career_data.respy.dat", "career_data.respy.pkl"]:
        copy(join(old_dir, file), join(new_dir, file))
    os.chdir(new_dir)

    # We need to impose some constraints so that the random initialization file does
    # meet the structure of the empirical dataset. We need to be particularly careful
    # with the construction of the maximum level of schooling as we need to rule out
    # that anyone in the estimation sample has a value larger then the specified maximum
    # value.
    version = np.random.choice(["python", "fortran"])
    if version == "python":
        max_periods = 3
    else:
        max_periods = 10

    num_periods = np.random.randint(1, max_periods)
    agents = np.random.randint(500, 1372 + 1)
    edu_start = np.random.choice(range(7, 12))

    constr = {
        "num_periods": num_periods,
        "edu_spec": {
            "start": [int(edu_start)],
            "max": np.random.randint(edu_start + num_periods, 30),
        },
        "estimation": {
            "file": "career_data.respy.dat",
            "agents": agents,
            "maxfun": np.random.randint(1, 5),
        },
        "program": {
            "version": version
        },
    }

    if version == "fortran":
        constr["estimation"]["optimizer"] = "FORT-BOBYQA"
    if version == "python":
        constr["estimation"]["optimizer"] = "SCIPY-LBFGSB"

    params_spec, options_spec = generate_random_model(point_constr=constr)

    try:
        respy_obj = RespyCls(params_spec, options_spec)
        if is_investigation:
            write_out_model_spec(respy_obj.attr, str(seed))
        respy_obj.fit()
    except:
        tb = traceback.format_exc()
        passed = False
        error_message = str(tb)

    os.chdir(old_dir)
    if is_investigation is False:
        rmtree(new_dir)
    return passed, error_message
Example #15
0
    def test_2(self):
        """Compare results from an evaluation of the criterion function at the initial
        values."""
        args = generate_constraints_dict()
        params_spec, options_spec = generate_random_model(**args)
        params_spec, options_spec = adjust_model_spec(params_spec, options_spec)

        max_draws = args["bound_constr"]["max_draws"]

        # At this point, the random initialization file does only provide diagonal
        # covariances.
        cov_sampled = np.random.uniform(0, 0.01, size=(4, 4)) + np.diag(
            np.random.uniform(1.0, 1.5, size=4)
        )
        chol = np.linalg.cholesky(cov_sampled)
        coeffs = chol[np.tril_indices(4)]
        params_spec.loc["shocks", "para"] = coeffs
        params_spec.loc["shocks", "upper"] = np.nan
        params_spec.loc["shocks", "lower"] = np.nan

        respy_obj = RespyCls(params_spec, options_spec)

        # This flag aligns the random components between the RESTUD program and RESPY
        # package. The existence of the file leads to the RESTUD program to write out
        # the random components.
        (
            optim_paras,
            edu_spec,
            num_agents_est,
            num_periods,
            num_draws_emax,
            num_draws_prob,
            tau,
            num_agents_sim,
        ) = dist_class_attributes(
            respy_obj,
            "optim_paras",
            "edu_spec",
            "num_agents_est",
            "num_periods",
            "num_draws_emax",
            "num_draws_prob",
            "tau",
            "num_agents_sim",
        )

        shocks_cholesky = optim_paras["shocks_cholesky"]
        cov = np.matmul(shocks_cholesky, shocks_cholesky.T)

        # Simulate sample model using RESTUD code.
        transform_respy_to_restud_sim(
            optim_paras, edu_spec, num_agents_sim, num_periods, num_draws_emax, cov
        )

        open(".restud.testing.scratch", "a").close()
        cmd = str(TEST_RESOURCES_BUILD / "kw_dp3asim")
        subprocess.check_call(cmd, shell=True)

        transform_respy_to_restud_est(
            optim_paras,
            edu_spec,
            num_agents_est,
            num_draws_prob,
            tau,
            num_periods,
            num_draws_emax,
            cov,
        )

        filenames = ["in.txt", TEST_RESOURCES_DIR / "in_bottom.txt"]
        with open("in1.txt", "w") as outfile:
            for fname in filenames:
                with open(fname) as infile:
                    outfile.write(infile.read())

        draws_standard = np.random.multivariate_normal(
            np.zeros(4), np.identity(4), (num_periods, max_draws)
        )

        with open(".draws.respy.test", "w") as file_:
            for period in range(num_periods):
                for i in range(max_draws):
                    fmt = " {0:15.10f} {1:15.10f} {2:15.10f} {3:15.10f}\n"
                    line = fmt.format(*draws_standard[period, i, :])
                    file_.write(line)

        # We always need the seed.txt
        shutil.copy(str(TEST_RESOURCES_DIR / "seed.txt"), "seed.txt")
        cmd = str(TEST_RESOURCES_BUILD / "kw_dpml4a")
        subprocess.check_call(cmd, shell=True)
        Path("seed.txt").unlink()

        with open("output1.txt", "r") as searchfile:
            # Search file for strings, trim lines and save as variables
            for line in searchfile:
                if "OLD LOGLF=" in line:
                    stat = float(shlex.split(line)[2])
                    break

        # Now we also evaluate the criterion function with the RESPY package.
        restud_sample_to_respy()
        respy_obj = respy.RespyCls(params_spec, options_spec)
        respy_obj.attr["file_est"] = "ftest.respy.dat"

        open(".restud.respy.scratch", "a").close()
        _, val = respy_obj.fit()
        Path(".restud.respy.scratch").unlink()

        # This ensure that the two values are within 1% of the RESPY value.
        np.testing.assert_allclose(
            abs(stat), abs(val * num_agents_est), rtol=0.01, atol=0.00
        )
    def test_10(self):
        """ This test ensures that the order of the initial schooling level specified in
        the initialization files does not matter for the simulation of a dataset and
        subsequent evaluation of the criterion function.

        Warning
        -------
        This test fails if types have the identical intercept as no unique ordering is
        determined than.

        """
        point_constr = {
            "estimation": {
                "maxfun": 0
            },
            # We cannot allow for interpolation as the order of states within each
            # period changes and thus the prediction model is altered even if the same
            # state identifier is used.
            "interpolation": {
                "flag": False
            },
        }

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr)

        respy_obj = RespyCls(params_spec, options_spec)

        edu_baseline_spec, num_types, num_paras, optim_paras = dist_class_attributes(
            respy_obj, "edu_spec", "num_types", "num_paras", "optim_paras")

        # We want to randomly shuffle the list of initial schooling but need to maintain
        # the order of the shares.
        edu_shuffled_start = np.random.permutation(
            edu_baseline_spec["start"]).tolist()

        edu_shuffled_share, edu_shuffled_lagged = [], []
        for start in edu_shuffled_start:
            idx = edu_baseline_spec["start"].index(start)
            edu_shuffled_lagged += [edu_baseline_spec["lagged"][idx]]
            edu_shuffled_share += [edu_baseline_spec["share"][idx]]

        edu_shuffled_spec = copy.deepcopy(edu_baseline_spec)
        edu_shuffled_spec["lagged"] = edu_shuffled_lagged
        edu_shuffled_spec["start"] = edu_shuffled_start
        edu_shuffled_spec["share"] = edu_shuffled_share

        # We are only looking at a single evaluation as otherwise the reordering affects
        # the optimizer that is trying better parameter values one-by-one. The
        # reordering might also violate the bounds.
        for i in range(53, num_paras):
            optim_paras["paras_bounds"][i] = [None, None]
            optim_paras["paras_fixed"][i] = False

        # We need to ensure that the baseline type is still in the first position.
        types_order = [0] + np.random.permutation(range(1, num_types)).tolist()

        type_shares = []
        for i in range(num_types):
            lower, upper = i * 2, (i + 1) * 2
            type_shares += [optim_paras["type_shares"][lower:upper].tolist()]

        optim_paras_baseline = copy.deepcopy(optim_paras)
        optim_paras_shuffled = copy.deepcopy(optim_paras)

        list_ = [
            optim_paras["type_shifts"][i, :].tolist() for i in types_order
        ]
        optim_paras_shuffled["type_shifts"] = np.array(list_)

        list_ = [type_shares[i] for i in types_order]
        optim_paras_shuffled["type_shares"] = np.array(list_).flatten()

        base_data, base_val = None, None

        k = 0

        for optim_paras in [optim_paras_baseline, optim_paras_shuffled]:
            for edu_spec in [edu_baseline_spec, edu_shuffled_spec]:

                respy_obj.unlock()
                respy_obj.set_attr("edu_spec", edu_spec)
                respy_obj.lock()

                # There is some more work to do to update the coefficients as we
                # distinguish between the economic and optimization version of the
                # parameters.
                x = get_optim_paras(optim_paras, num_paras, "all", True)
                shocks_cholesky, _ = extract_cholesky(x)
                shocks_coeffs = cholesky_to_coeffs(shocks_cholesky)
                x[43:53] = shocks_coeffs
                respy_obj.update_optim_paras(x)

                respy_obj.reset()

                simulate_observed(respy_obj)

                # This part checks the equality of simulated dataset.
                data_frame = pd.read_csv("data.respy.dat",
                                         delim_whitespace=True)

                if base_data is None:
                    base_data = data_frame.copy()

                assert_frame_equal(base_data, data_frame)

                # This part checks the equality of a single function evaluation.
                _, val = respy_obj.fit()
                if base_val is None:
                    base_val = val
                np.testing.assert_almost_equal(base_val, val)

                respy_obj.reset()
                k += 1
Example #17
0
    def test_3(self):
        """Ensure that the log looks exactly the same for different versions."""
        max_draws = np.random.randint(10, 100)

        bound_constr = {"max_draws": max_draws, "max_agents": max_draws}

        point_constr = {
            "interpolation": {"flag": False},
            "program": {"procs": 1, "threads": 1, "version": "python"},
            "estimation": {"maxfun": 0},
        }

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr, bound_constr=bound_constr
        )
        respy_obj = RespyCls(params_spec, options_spec)

        num_agents_sim, optim_paras, file_sim = dist_class_attributes(
            respy_obj, "num_agents_sim", "optim_paras", "file_sim"
        )

        # Iterate over alternative implementations
        base_sol_log, base_est_info, base_est_log = None, None, None
        base_sim_log = None

        type_shares = respy_obj.attr["optim_paras"]["type_shares"]
        num_periods = options_spec["num_periods"]

        edu_spec = options_spec["edu_spec"]

        write_draws(num_periods, max_draws)
        write_types(type_shares, num_agents_sim)
        write_edu_start(edu_spec, num_agents_sim)
        write_lagged_start(num_agents_sim)

        for version in ["fortran", "python"]:

            respy_obj.unlock()

            respy_obj.set_attr("version", version)

            respy_obj.lock()

            simulate_observed(respy_obj)

            # Check for identical logging
            fname = file_sim + ".respy.sol"
            if base_sol_log is None:
                base_sol_log = open(fname, "r").read()
            assert open(fname, "r").read() == base_sol_log

            # Check for identical logging
            fname = file_sim + ".respy.sim"
            if base_sim_log is None:
                base_sim_log = open(fname, "r").read()
            assert open(fname, "r").read() == base_sim_log

            respy_obj.fit()

            if base_est_info is None:
                base_est_info = open("est.respy.info", "r").read()
                assert open("est.respy.info", "r").read() == base_est_info

            if base_est_log is None:
                base_est_log = open("est.respy.log", "r").readlines()
            compare_est_log(base_est_log)
    def test_8(self):
        """ We ensure that the number of initial conditions does not matter for the
        evaluation of the criterion function if a weight of one is put on the first
        group.
        """
        num_agents = np.random.randint(5, 100)
        constr = {
            "simulation": {
                "agents": num_agents
            },
            "num_periods": np.random.randint(1, 4),
            "edu_spec": {
                "max": np.random.randint(15, 25, size=1).tolist()[0]
            },
            "estimation": {
                "maxfun": 0,
                "agents": num_agents
            },
            "interpolation": {
                "flag": False
            },
        }

        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)
        simulate_observed(respy_obj)

        base_val, edu_start_base = (None,
                                    np.random.randint(1, 5,
                                                      size=1).tolist()[0])

        # We need to ensure that the initial lagged activity always has the same
        # distribution.
        edu_lagged_base = np.random.uniform(size=5).tolist()

        for num_edu_start in [1, np.random.choice([2, 3, 4]).tolist()]:

            # We always need to ensure that a weight of one is on the first level of
            # initial schooling.
            options_spec["edu_spec"]["share"] = [
                1.0
            ] + [0.0] * (num_edu_start - 1)
            options_spec["edu_spec"][
                "lagged"] = edu_lagged_base[:num_edu_start]

            # We need to make sure that the baseline level of initial schooling is
            # always included. At the same time we cannot have any duplicates.
            edu_start = np.random.choice(range(1, 10),
                                         size=num_edu_start,
                                         replace=False).tolist()
            if edu_start_base in edu_start:
                edu_start.remove(edu_start_base)
                edu_start.insert(0, edu_start_base)
            else:
                edu_start[0] = edu_start_base

            options_spec["edu_spec"]["start"] = edu_start

            respy_obj = RespyCls(params_spec, options_spec)
            simulate_observed(respy_obj)
            _, val = respy_obj.fit()
            if base_val is None:
                base_val = val

            np.testing.assert_almost_equal(base_val, val)
Example #19
0
    def test_1(self):
        """ Testing the equality of an evaluation of the criterion function for a random
        request.
        """
        # Run evaluation for multiple random requests.
        is_deterministic = np.random.choice([True, False], p=[0.10, 0.9])
        is_interpolated = bool(np.random.choice([True, False], p=[0.10, 0.9]))
        is_myopic = np.random.choice([True, False], p=[0.10, 0.9])
        max_draws = np.random.randint(11, 100)
        num_agents = np.random.randint(10, max_draws)

        bound_constr = {"max_draws": max_draws}
        point_constr = {
            "interpolation": {"flag": is_interpolated},
            "program": {"procs": 1, "threads": 1, "version": "python"},
            "estimation": {"maxfun": 0, "agents": num_agents},
            "simulation": {"agents": num_agents},
            "num_periods": np.random.randint(1, 5),
        }

        num_types = np.random.randint(2, 5)

        if is_interpolated:
            point_constr["num_periods"] = np.random.randint(3, 5)

        params_spec, options_spec = generate_random_model(
            bound_constr=bound_constr,
            point_constr=point_constr,
            deterministic=is_deterministic,
            myopic=is_myopic,
            num_types=num_types,
        )

        edu_spec = options_spec["edu_spec"]
        num_periods = point_constr["num_periods"]

        # The use of the interpolation routines is a another special case. Constructing
        #  a request that actually involves the use of the interpolation routine is a
        #  little involved as the number of interpolation points needs to be lower than
        #  the actual number of states. And to know the number of states each period, I
        #  need to construct the whole state space.
        if is_interpolated:
            state_space = StateSpace(
                num_periods, num_types, edu_spec["start"], edu_spec["max"]
            )

            max_states_period = state_space.states_per_period.max()

            options_spec["interpolation"]["points"] = np.random.randint(
                10, max_states_period
            )

        # Write out random components and interpolation grid to align the three
        # implementations.
        write_draws(num_periods, max_draws)
        respy_obj = RespyCls(params_spec, options_spec)
        write_interpolation_grid(respy_obj)

        type_shares = respy_obj.attr["optim_paras"]["type_shares"]

        write_types(type_shares, num_agents)
        write_edu_start(edu_spec, num_agents)
        write_lagged_start(num_agents)

        # Clean evaluations based on interpolation grid,
        base_val, base_data = None, None

        for version in ["python", "fortran"]:
            respy_obj = RespyCls(params_spec, options_spec)

            # Modify the version of the program for the different requests.
            respy_obj.unlock()
            respy_obj.set_attr("version", version)
            respy_obj.lock()

            # Solve the model
            respy_obj = simulate_observed(respy_obj)

            # This parts checks the equality of simulated dataset for the different
            # versions of the code.
            data_frame = pd.read_csv("data.respy.dat", delim_whitespace=True)

            if base_data is None:
                base_data = data_frame.copy()

            assert_frame_equal(base_data, data_frame)

            # This part checks the equality of an evaluation of the criterion function.
            _, crit_val = respy_obj.fit()

            if base_val is None:
                base_val = crit_val

            np.testing.assert_allclose(base_val, crit_val, rtol=1e-05, atol=1e-06)

            # We know even more for the deterministic case.
            if is_deterministic:
                assert crit_val in [-1.0, 0.0]
def run_estimation(which):
    """ Run an estimation with the respective release.
    """
    os.chdir(which)

    import numpy as np

    from respy import RespyCls

    from respy.pre_processing.model_processing import write_init_file

    # We need to make sure that the function simulate_observed() is imported from the original
    # package. Otherwise dependencies might not work properly.
    import respy

    sys.path.insert(0, os.path.dirname(respy.__file__) + "/tests")
    from respy.tests.codes.auxiliary import simulate_observed

    init_dict = json.load(open("init_dict.respy.json", "r"))

    # There was a change in the setup for releases after 1.00. This is only required when
    # comparing to v1.0.0.
    if "1.0.0" in sys.executable:
        init_dict["SHOCKS"]["fixed"] = np.array(init_dict["SHOCKS"]["fixed"])

    write_init_file(init_dict)

    respy_obj = RespyCls("test.respy.ini")

    # This flag ensures a clean switch to the synthetic simulation for cases where the
    # simulate_observed() was changed in between releases.
    if os.path.exists("../.simulate_observed.cfg"):
        respy_obj.simulate()
    else:
        simulate_observed(respy_obj)

    # This flag ensures that the change in the truncation of the wage variable has no effect. We
    # simply copy the dataset from the new release to the old.
    if ("2.0.0.dev20" in sys.executable) and ("/new" in os.getcwd()):
        fnames = glob.glob("data.respy.*")
        for fname in fnames:
            shutil.copy("../old/" + fname, ".")

    # Moving from 2.0.0.dev17 to 2.0.0.dev18 breaks the equality because the simulated datasets
    # differ. So, we just copy the one from old. However, this is only relevant if 2.0.0.dev18 is
    # the candidate.
    if ("2.0.0.dev18" in sys.executable) and ("/new" in os.getcwd()):
        os.chdir("../old")
        files = glob.glob("data.respy.*")
        for file in files:
            shutil.copy(file, "../new")
        os.chdir("../new")

    _, crit_val = respy_obj.fit()

    # There was a bug in version 1.0 which might lead to crit_val not to actually take the lowest
    # value that was visited by the optimizer. So, we reprocess the log file again to be sure.
    if "1.0.0" in sys.executable:
        crit_val = 1e10
        with open("est.respy.log") as infile:
            for line in infile.readlines():
                list_ = shlex.split(line)
                # Skip empty lines
                if not list_:
                    continue
                # Process candidate value
                if list_[0] == "Criterion":
                    try:
                        value = float(list_[1])
                        if value < crit_val:
                            crit_val = value
                    except ValueError:
                        pass

    pkl.dump(crit_val, open("crit_val.respy.pkl", "wb"))

    os.chdir("../")