Ejemplo n.º 1
0
    def test_4(self):
        """ Test the evaluation of the criterion function for random requests, not just
        at the true values.
        """
        # Constraints that ensure that two alternative initialization files can be used
        # for the same simulated data.
        num_agents = np.random.randint(5, 100)
        constr = {
            "simulation": {
                "agents": num_agents
            },
            "num_periods": np.random.randint(1, 4),
            "edu_spec": {
                "start": [7],
                "max": 15,
                "share": [1.0]
            },
            "estimation": {
                "maxfun": 0,
                "agents": num_agents
            },
        }

        # Simulate a dataset
        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)
        simulate_observed(respy_obj)

        # Evaluate at different points, ensuring that the simulated dataset still fits.
        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)
        respy_obj.fit()
Ejemplo n.º 2
0
    def test_2(self):
        """ If there is no random variation in rewards then the number of draws to simulate the
        expected future value should have no effect.
        """
        params_spec, options_spec = generate_random_model(deterministic=True)

        # Initialize auxiliary objects
        base = None

        for _ in range(2):
            num_draws_emax = np.random.randint(1, 100)
            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj.unlock()
            respy_obj.set_attr("num_draws_emax", num_draws_emax)
            respy_obj.lock()
            respy_obj = simulate_observed(respy_obj)
            periods_emax = respy_obj.get_attr("periods_emax")

            if base is None:
                base = periods_emax.copy()

            diff = np.max(
                abs(
                    np.ma.masked_invalid(base) -
                    np.ma.masked_invalid(periods_emax)))
            np.testing.assert_almost_equal(diff, 0.0)
Ejemplo n.º 3
0
    def test_12(self):
        """ Testing the functionality introduced to ensure that the simulation is
        independent of the order of initial conditions and types in the initialization
        file.

        """
        num_elements = np.random.randint(1, 11)

        input_array = np.random.normal(size=num_elements)

        # We first check the sorting implementation.
        py = sorted(input_array)
        f90 = fort_debug.wrapper_sorted(input_array, num_elements)
        assert_equal(py, f90)

        params_spec, options_spec = generate_random_model()
        respy_obj = RespyCls(params_spec, options_spec)

        edu_spec, optim_paras, num_types = dist_class_attributes(
            respy_obj, "edu_spec", "optim_paras", "num_types")

        args = (edu_spec["start"], edu_spec["share"], edu_spec["max"])
        f90 = fort_debug.wrapper_sort_edu_spec(*args)
        py = sort_edu_spec(edu_spec)
        for i, label in enumerate(["start", "share", "max"]):
            assert_equal(py[label], f90[i])

        py = sort_type_info(optim_paras, num_types)
        f90 = fort_debug.wrapper_sort_type_info(optim_paras["type_shares"],
                                                num_types)
        for i, label in enumerate(["order", "shares"]):
            assert_equal(py[label], f90[i])
Ejemplo n.º 4
0
    def test_2(self):
        """ This test compares the results from a solution using the interpolation code
        for the special case where the number of interpolation points is exactly the
        number of states in the final period. In this case the interpolation code is run
        and then all predicted values replaced with their actual values.
        """
        # Set initial constraints
        # Set initial constraints
        constr = {"interpolation": {"flag": False}}

        params_spec, options_spec = generate_random_model(point_constr=constr,
                                                          deterministic=True)
        baseline = None

        # Solve with and without interpolation code
        for _ in range(2):
            # Process and solve
            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)

            # Extract class attributes
            states_number_period, periods_emax = dist_class_attributes(
                respy_obj, "states_number_period", "periods_emax")

            # Store and check results
            if baseline is None:
                baseline = periods_emax
            else:
                np.testing.assert_array_almost_equal(baseline, periods_emax)

            # Updates for second iteration
            options_spec["interpolation"]["points"] = max(states_number_period)
            options_spec["interpolation"]["flag"] = True
Ejemplo n.º 5
0
    def test_6(self):
        """ Test short estimation tasks.
        """
        num_agents = np.random.randint(5, 100)
        constr = {
            "simulation": {
                "agents": num_agents
            },
            "num_periods": np.random.randint(1, 4),
            "estimation": {
                "maxfun": np.random.randint(0, 5),
                "agents": num_agents
            },
        }

        # Simulate a dataset

        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)

        write_interpolation_grid(respy_obj)

        # Run estimation task.
        simulate_observed(respy_obj)
        base_x, base_val = respy_obj.fit()

        # We also check whether updating the class instance and a single evaluation of
        # the criterion function give the same result.
        respy_obj.update_optim_paras(base_x)
        respy_obj.attr["maxfun"] = 0

        alt_x, alt_val = respy_obj.fit()

        for arg in [(alt_val, base_val), (alt_x, base_x)]:
            np.testing.assert_almost_equal(arg[0], arg[1])
Ejemplo n.º 6
0
    def test_5(self):
        """ Test the scripts.
        """
        # Constraints that ensure that two alternative initialization files can be used
        # for the same simulated data.
        for _ in range(10):
            num_agents = np.random.randint(5, 100)
            constr = {
                "simulation": {
                    "agents": num_agents
                },
                "num_periods": np.random.randint(1, 4),
                "edu_spec": {
                    "start": [7],
                    "max": 15,
                    "share": [1.0]
                },
                "estimation": {
                    "maxfun": 0,
                    "agents": num_agents
                },
            }
            # Simulate a dataset
            params_spec, options_spec = generate_random_model(
                point_constr=constr)
            respy_obj = RespyCls(params_spec, options_spec)
            simulate_observed(respy_obj)

            # Create output to process a baseline.
            respy_obj.unlock()
            respy_obj.set_attr("maxfun", 0)
            respy_obj.lock()

            respy_obj.fit()

            # Potentially evaluate at different points.
            params_spec, options_spec = generate_random_model(
                point_constr=constr)
            respy_obj = RespyCls(params_spec, options_spec)

            single = np.random.choice([True, False])

            scripts_check("estimate", respy_obj)
            scripts_estimate(single, respy_obj)
Ejemplo n.º 7
0
    def test_2(self):
        """ This test ensures that the record files are identical.
        """
        # Generate random initialization file. The number of periods is higher than
        # usual as only FORTRAN implementations are used to solve the random request.
        # This ensures that also some cases of interpolation are explored.
        constr = {
            "program": {
                "version": "fortran"
            },
            "num_periods": np.random.randint(3, 10),
            "estimation": {
                "maxfun": 0
            },
        }

        params_spec, options_spec = generate_random_model(point_constr=constr)

        base_sol_log, base_est_info_log = None, None
        base_est_log = None

        for is_parallel in [False, True]:

            options_spec["program"]["threads"] = 1
            options_spec["program"]["procs"] = 1

            if is_parallel:
                if IS_PARALLELISM_OMP:
                    options_spec["program"]["threads"] = np.random.randint(
                        2, 5)
                if IS_PARALLELISM_MPI:
                    options_spec["program"]["procs"] = np.random.randint(2, 5)

            respy_obj = RespyCls(params_spec, options_spec)

            file_sim = respy_obj.get_attr("file_sim")

            simulate_observed(respy_obj)

            respy_obj.fit()

            # Check for identical records
            fname = file_sim + ".respy.sol"

            if base_sol_log is None:
                base_sol_log = open(fname, "r").read()

            assert open(fname, "r").read() == base_sol_log

            if base_est_info_log is None:
                base_est_info_log = open("est.respy.info", "r").read()
            assert open("est.respy.info", "r").read() == base_est_info_log

            if base_est_log is None:
                base_est_log = open("est.respy.log", "r").readlines()
            compare_est_log(base_est_log)
Ejemplo n.º 8
0
    def test_2(self):
        """Ensure that the evaluation of the criterion is equal across versions."""
        max_draws = np.random.randint(10, 100)

        # It seems to be important that max_draws and max_agents is the same
        # number because otherwise some functions that read draws from a file
        # to ensure compatibility of fortran and python versions won't work.
        bound_constr = {"max_draws": max_draws, "max_agents": max_draws}

        point_constr = {
            "interpolation": {"flag": False},
            "program": {"procs": 1, "threads": 1, "version": "python"},
            "estimation": {"maxfun": 0},
        }

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr, bound_constr=bound_constr
        )
        respy_obj = RespyCls(params_spec, options_spec)

        num_agents_sim, optim_paras = dist_class_attributes(
            respy_obj, "num_agents_sim", "optim_paras"
        )

        type_shares = optim_paras["type_shares"]

        # Simulate a dataset
        simulate_observed(respy_obj)

        # Iterate over alternative implementations
        base_x, base_val = None, None

        num_periods = options_spec["num_periods"]

        write_draws(num_periods, max_draws)
        write_types(type_shares, num_agents_sim)

        for version in ["python", "fortran"]:

            respy_obj.unlock()

            respy_obj.set_attr("version", version)

            respy_obj.lock()

            x, val = respy_obj.fit()

            # Check for the returned parameters.
            if base_x is None:
                base_x = x
            np.testing.assert_allclose(base_x, x)

            # Check for the value of the criterion function.
            if base_val is None:
                base_val = val
            np.testing.assert_allclose(base_val, val)
def create_single(idx):
    """ This function creates a single test.
    """
    dirname = get_random_dirname(5)
    os.mkdir(dirname)
    os.chdir(dirname)

    # The late import is required so a potentially just compiled FORTRAN implementation
    # is recognized. This is important for the creation of the regression vault as we
    # want to include FORTRAN use cases.
    from respy import RespyCls

    # We impose a couple of constraints that make the requests manageable.
    np.random.seed(idx)

    version = np.random.choice(["python", "fortran"])

    # only choose from constraint optimizers because we always have some bounds
    if version == "python":
        optimizer = "SCIPY-LBFGSB"
    else:
        optimizer = "FORTE-BOBYQA"

    constr = {
        "program": {
            "version": version
        },
        "preconditioning": {
            "type": np.random.choice(["identity", "magnitudes"])
        },
        "estimation": {
            "maxfun":
            int(np.random.choice(range(6), p=[0.5, 0.1, 0.1, 0.1, 0.1, 0.1])),
            "optimizer":
            optimizer,
        },
    }
    constr["flag_estimation"] = True

    param_spec, options_spec = generate_random_model(point_constr=constr)
    respy_obj = RespyCls(param_spec, options_spec)
    simulate_observed(respy_obj)
    crit_val = respy_obj.fit()[1]

    # In rare instances, the value of the criterion function might be too large and thus
    # printed as a string. This occurred in the past, when the gradient preconditioning
    # had zero probability observations. We now generate random initialization files
    # with smaller gradient step sizes.
    if not isinstance(crit_val, float):
        raise AssertionError(" ... value of criterion function too large.")

    # Cleanup of temporary directories.from
    os.chdir("../")
    shutil.rmtree(dirname)

    return respy_obj.attr, crit_val
Ejemplo n.º 10
0
    def test_7(self):
        """ This is a special test for shared functions related to the interpolation setup.
        """
        # Impose constraints
        point_constr = {"num_periods": np.random.randint(2, 5)}

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr)
        respy_obj = RespyCls(params_spec, options_spec)

        # Extract class attributes
        is_debug, num_periods = dist_class_attributes(respy_obj, "is_debug",
                                                      "num_periods")

        # Write out a grid for the interpolation
        max_states_period = write_interpolation_grid(respy_obj)

        # Draw random request for testing
        num_states = np.random.randint(1, max_states_period)
        candidates = list(range(num_states))

        period = np.random.randint(1, num_periods)
        num_points_interp = np.random.randint(1, num_states + 1)

        # Check function for random choice and make sure that there are no duplicates.
        args = (candidates, num_states, num_points_interp)
        f90 = fort_debug.wrapper_random_choice(*args)
        assert_equal(len(set(f90)), len(f90))
        assert_equal(len(f90), num_points_interp)

        # Check the standard cases of the function.
        args = (num_points_interp, num_states, period, is_debug, num_periods)
        f90 = fort_debug.wrapper_get_simulated_indicator(*args)

        assert_equal(len(f90), num_states)
        assert_equal(np.all(f90) in [0, 1], True)

        # Test the standardization across PYTHON, F2PY, and FORTRAN implementations.
        # This is possible as we write out an interpolation grid to disk which is used
        # for both functions.
        base_args = (num_points_interp, num_states, period, is_debug)
        args = base_args
        py = get_simulated_indicator(*args)
        args = base_args + (num_periods, )
        f90 = fort_debug.wrapper_get_simulated_indicator(*args)
        assert_array_equal(f90, 1 * py)
        os.unlink(".interpolation.respy.test")

        # Special case where number of interpolation points are same as the number of
        # candidates. In that case the returned indicator should be all TRUE.
        args = (num_states, num_states, period, True, num_periods)
        f90 = fort_debug.wrapper_get_simulated_indicator(*args)
        assert_equal(sum(f90), num_states)
Ejemplo n.º 11
0
    def test_3(self):
        """ Testing whether the a simulated dataset and the evaluation of the criterion function
        are the same for a tiny delta and a myopic agent.
        """
        constr = {"estimation": {"maxfun": 0}}
        params_spec, options_spec = generate_random_model(point_constr=constr,
                                                          myopic=True)
        respy_obj = RespyCls(params_spec, options_spec)

        optim_paras, num_agents_sim, edu_spec = dist_class_attributes(
            respy_obj, "optim_paras", "num_agents_sim", "edu_spec")

        write_types(optim_paras["type_shares"], num_agents_sim)
        write_edu_start(edu_spec, num_agents_sim)
        write_lagged_start(num_agents_sim)

        # Iterate over alternative discount rates.
        base_data, base_val = None, None

        for delta in [0.00, 0.000001]:

            respy_obj = RespyCls(params_spec, options_spec)

            respy_obj.unlock()

            respy_obj.attr["optim_paras"]["delta"] = np.array([delta])

            respy_obj.lock()

            simulate_observed(respy_obj)

            # This parts checks the equality of simulated dataset for the different
            # versions of the code.
            data_frame = pd.read_csv("data.respy.dat", delim_whitespace=True)

            if base_data is None:
                base_data = data_frame.copy()

            assert_frame_equal(base_data, data_frame)

            # This part checks the equality of an evaluation of the criterion function.
            _, crit_val = respy_obj.fit()

            if base_val is None:
                base_val = crit_val

            np.testing.assert_allclose(base_val,
                                       crit_val,
                                       rtol=1e-03,
                                       atol=1e-03)
Ejemplo n.º 12
0
    def test_2(self):
        """ Testing whether the back and forth transformation works.
        """
        for _ in range(100):
            params_spec, options_spec = generate_random_model()
            # Process request and write out again.
            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj.write_out("alt_respy")

            new_params_spec = _read_params_spec(Path("alt_respy.csv"))
            new_options_spec = _read_options_spec(Path("alt_respy.json"))

            assert options_spec == new_options_spec

            for col in params_spec.columns:
                assert_series_equal(params_spec[col], new_params_spec[col])
Ejemplo n.º 13
0
    def test_10(self):
        """ Function that calculates the number of observations by individual.
        """
        for _ in range(2):
            params_spec, options_spec = generate_random_model()
            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)

            num_agents_est = respy_obj.get_attr("num_agents_est")

            data_array = process_dataset(respy_obj).to_numpy()

            py = np.bincount(data_array[:, 0].astype(int))
            f90 = fort_debug.wrapper_get_num_obs_agent(data_array,
                                                       num_agents_est)

            assert_almost_equal(py, f90)
Ejemplo n.º 14
0
def run(hours):

    start, timeout = datetime.now(), timedelta(hours=hours)

    count = 0
    while True:
        print("COUNT", count)
        count += 1
        # Generate random initialization file
        constr = {
            "program": {
                "version": "fortran"
            },
            "estimation": {
                "maxfun": np.random.randint(0, 50),
                "optimizer": "FORT-BOBYQA",
            },
        }
        params_spec, options_spec = generate_random_model(point_constr=constr)

        base = None
        for is_parallel in [True, False]:

            if is_parallel is False:
                options_spec["program"]["threads"] = 1
                options_spec["program"]["procs"] = 1
            else:
                if IS_PARALLELISM_OMP:
                    options_spec["program"]["threads"] = np.random.randint(
                        2, 5)
                if IS_PARALLELISM_MPI:
                    options_spec["program"]["procs"] = np.random.randint(2, 5)

            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)
            _, crit_val = respy_obj.fit()

            if base is None:
                base = crit_val
            np.testing.assert_equal(base, crit_val)

        #  Timeout.
        if timeout < datetime.now() - start:
            break
Ejemplo n.º 15
0
    def test_1(self):
        """Ensure that it makes no difference whether the
        criterion function is evaluated in parallel or not.
        """
        # Generate random initialization file
        constr = {
            "program": {
                "version": "fortran"
            },
            "estimation": {
                "maxfun": np.random.randint(0, 50)
            },
        }

        params_spec, options_spec = generate_random_model(point_constr=constr)

        # If delta is a not fixed, we need to ensure a bound-constraint optimizer.
        # However, this is not the standard flag_estimation as the number of function
        # evaluation is possibly much larger to detect and differences in the updates of
        # the optimizer steps depending on the implementation.
        if params_spec.loc[("delta", "delta"), "fixed"] is False:
            options_spec["estimation"]["optimizer"] = "FORT-BOBYQA"

        base = None
        for is_parallel in [True, False]:
            options_spec["program"]["threads"] = 1
            options_spec["program"]["procs"] = 1

            if is_parallel:
                if IS_PARALLELISM_OMP:
                    options_spec["program"]["threads"] = np.random.randint(
                        2, 5)
                if IS_PARALLELISM_MPI:
                    options_spec["program"]["procs"] = np.random.randint(2, 5)

            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)
            _, crit_val = respy_obj.fit()

            if base is None:
                base = crit_val
            np.testing.assert_equal(base, crit_val)
Ejemplo n.º 16
0
    def test_1(self):
        """ This is the special case where the EMAX better be equal to the MAXE.
        """
        # Set initial constraints
        constr = {
            "interpolation": {
                "flag": False
            },
            "num_periods": np.random.randint(3, 6),
        }

        params_spec, options_spec = generate_random_model(point_constr=constr,
                                                          deterministic=True)

        baseline = None

        a = []

        # Solve with and without interpolation code
        for _ in range(2):
            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)

            # Extract class attributes
            states_number_period, periods_emax, state_space = dist_class_attributes(
                respy_obj, "states_number_period", "periods_emax",
                "state_space")

            a.append(state_space)

            # Store and check results
            if baseline is None:
                baseline = periods_emax.copy()
            else:
                np.testing.assert_array_almost_equal(baseline, periods_emax)

            # Updates for second iteration. This ensures that there is at least one
            # interpolation taking place.
            options_spec["interpolation"]["points"] = max(
                states_number_period) - 1
            options_spec["interpolation"]["flag"] = True
Ejemplo n.º 17
0
    def test_2(self):
        """Compare results from an evaluation of the criterion function at the initial
        values."""
        args = generate_constraints_dict()
        params_spec, options_spec = generate_random_model(**args)
        params_spec, options_spec = adjust_model_spec(params_spec, options_spec)

        max_draws = args["bound_constr"]["max_draws"]

        # At this point, the random initialization file does only provide diagonal
        # covariances.
        cov_sampled = np.random.uniform(0, 0.01, size=(4, 4)) + np.diag(
            np.random.uniform(1.0, 1.5, size=4)
        )
        chol = np.linalg.cholesky(cov_sampled)
        coeffs = chol[np.tril_indices(4)]
        params_spec.loc["shocks", "para"] = coeffs
        params_spec.loc["shocks", "upper"] = np.nan
        params_spec.loc["shocks", "lower"] = np.nan

        respy_obj = RespyCls(params_spec, options_spec)

        # This flag aligns the random components between the RESTUD program and RESPY
        # package. The existence of the file leads to the RESTUD program to write out
        # the random components.
        (
            optim_paras,
            edu_spec,
            num_agents_est,
            num_periods,
            num_draws_emax,
            num_draws_prob,
            tau,
            num_agents_sim,
        ) = dist_class_attributes(
            respy_obj,
            "optim_paras",
            "edu_spec",
            "num_agents_est",
            "num_periods",
            "num_draws_emax",
            "num_draws_prob",
            "tau",
            "num_agents_sim",
        )

        shocks_cholesky = optim_paras["shocks_cholesky"]
        cov = np.matmul(shocks_cholesky, shocks_cholesky.T)

        # Simulate sample model using RESTUD code.
        transform_respy_to_restud_sim(
            optim_paras, edu_spec, num_agents_sim, num_periods, num_draws_emax, cov
        )

        open(".restud.testing.scratch", "a").close()
        cmd = str(TEST_RESOURCES_BUILD / "kw_dp3asim")
        subprocess.check_call(cmd, shell=True)

        transform_respy_to_restud_est(
            optim_paras,
            edu_spec,
            num_agents_est,
            num_draws_prob,
            tau,
            num_periods,
            num_draws_emax,
            cov,
        )

        filenames = ["in.txt", TEST_RESOURCES_DIR / "in_bottom.txt"]
        with open("in1.txt", "w") as outfile:
            for fname in filenames:
                with open(fname) as infile:
                    outfile.write(infile.read())

        draws_standard = np.random.multivariate_normal(
            np.zeros(4), np.identity(4), (num_periods, max_draws)
        )

        with open(".draws.respy.test", "w") as file_:
            for period in range(num_periods):
                for i in range(max_draws):
                    fmt = " {0:15.10f} {1:15.10f} {2:15.10f} {3:15.10f}\n"
                    line = fmt.format(*draws_standard[period, i, :])
                    file_.write(line)

        # We always need the seed.txt
        shutil.copy(str(TEST_RESOURCES_DIR / "seed.txt"), "seed.txt")
        cmd = str(TEST_RESOURCES_BUILD / "kw_dpml4a")
        subprocess.check_call(cmd, shell=True)
        Path("seed.txt").unlink()

        with open("output1.txt", "r") as searchfile:
            # Search file for strings, trim lines and save as variables
            for line in searchfile:
                if "OLD LOGLF=" in line:
                    stat = float(shlex.split(line)[2])
                    break

        # Now we also evaluate the criterion function with the RESPY package.
        restud_sample_to_respy()
        respy_obj = respy.RespyCls(params_spec, options_spec)
        respy_obj.attr["file_est"] = "ftest.respy.dat"

        open(".restud.respy.scratch", "a").close()
        _, val = respy_obj.fit()
        Path(".restud.respy.scratch").unlink()

        # This ensure that the two values are within 1% of the RESPY value.
        np.testing.assert_allclose(
            abs(stat), abs(val * num_agents_est), rtol=0.01, atol=0.00
        )
Ejemplo n.º 18
0
    def test_1(self):
        """Compare simulation results from the RESTUD program and the RESPY package."""
        args = generate_constraints_dict()
        params_spec, options_spec = generate_random_model(**args)
        params_spec, options_spec = adjust_model_spec(params_spec, options_spec)

        # Indicate RESTUD code the special case of zero disturbance.
        open(".restud.testing.scratch", "a").close()

        # We need to indicate to the RESFORT code to rescale the experience covariates.
        open(".restud.respy.scratch", "a").close()

        # Perform toolbox actions
        respy_obj = RespyCls(params_spec, options_spec)

        # This flag aligns the random components between the RESTUD program and RESPY
        # package. The existence of the file leads to the RESTUD program to write out
        # the random components.
        (
            optim_paras,
            edu_spec,
            num_agents_sim,
            num_periods,
            num_draws_emax,
        ) = dist_class_attributes(
            respy_obj,
            "optim_paras",
            "edu_spec",
            "num_agents_sim",
            "num_periods",
            "num_draws_emax",
        )

        shocks_cholesky = optim_paras["shocks_cholesky"]
        cov = np.matmul(shocks_cholesky, shocks_cholesky.T)

        # Simulate sample model using RESTUD code.
        transform_respy_to_restud_sim(
            optim_paras, edu_spec, num_agents_sim, num_periods, num_draws_emax, cov
        )

        # Solve model using RESTUD code.
        cmd = str(TEST_RESOURCES_BUILD / "kw_dp3asim")
        subprocess.check_call(cmd, shell=True)

        # We need to ensure for RESPY that the lagged activity variable indicates that
        # the individuals were in school the period before entering the model.
        types = np.random.choice([3], size=num_agents_sim)
        np.savetxt(".initial_lagged.respy.test", types, fmt="%i")

        # Solve model using RESPY package.
        simulate_observed(respy_obj, is_missings=False)

        # Compare the simulated dataset generated by the programs.
        column_labels = []
        column_labels += ["Experience_A", "Experience_B"]
        column_labels += ["Years_Schooling", "Lagged_Choice"]

        py = pd.read_csv(
            "data.respy.dat",
            delim_whitespace=True,
            header=0,
            na_values=".",
            usecols=column_labels,
        ).astype(np.float)

        fort = pd.DataFrame(
            np.array(np.genfromtxt("ftest.txt", missing_values="."), ndmin=2)[:, -4:],
            columns=column_labels,
        ).astype(np.float)

        # The simulated dataset from FORTRAN includes an indicator for the lagged
        # activities.
        py["Lagged_Choice"] = py["Lagged_Choice"].map({1: 0.0, 2: 0.0, 3: 1.0, 4: 0.0})

        assert_frame_equal(py, fort)
Ejemplo n.º 19
0
    def test_4(self):
        """ This test ensures that the scaling matrix is identical between the
        alternative versions.
        """
        max_draws = np.random.randint(11, 300)

        bound_constr = {"max_draws": max_draws, "max_agents": max_draws}
        num_agents = np.random.randint(10, max_draws)

        point_constr = {
            "program": {"version": "python"},
            "estimation": {"maxfun": np.random.randint(1, 6), "agents": num_agents},
            "simulation": {"agents": num_agents},
        }

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr, bound_constr=bound_constr
        )
        respy_base = RespyCls(params_spec, options_spec)

        num_agents_sim, optim_paras = dist_class_attributes(
            respy_base, "num_agents_sim", "optim_paras"
        )

        type_shares = optim_paras["type_shares"]
        num_periods = options_spec["num_periods"]

        write_draws(num_periods, max_draws)
        write_interpolation_grid(respy_base)
        write_types(type_shares, num_agents_sim)

        simulate_observed(respy_base)

        base_scaling_matrix = None
        for version in ["fortran", "python"]:
            respy_obj = copy.deepcopy(respy_base)

            # The actual optimizer does not matter for the scaling matrix. We also need
            # to make sure that PYTHON is only called with a single processor.
            if version == "python":
                optimizer_used = "SCIPY-LBFGSB"
                num_procs = 1
            else:
                num_procs = respy_obj.get_attr("num_procs")
                optimizer_used = "FORT-BOBYQA"

            # Create output to process a baseline.
            respy_obj.unlock()
            respy_obj.set_attr("optimizer_used", optimizer_used)
            respy_obj.set_attr("num_procs", num_procs)
            respy_obj.set_attr("version", version)
            respy_obj.set_attr("maxfun", 1)
            respy_obj.lock()

            respy_obj.fit()

            if base_scaling_matrix is None:
                base_scaling_matrix = np.genfromtxt("scaling.respy.out")

            scaling_matrix = np.genfromtxt("scaling.respy.out")
            assert_almost_equal(base_scaling_matrix, scaling_matrix)
Ejemplo n.º 20
0
    def test_1(self):
        """ Testing the equality of an evaluation of the criterion function for a random
        request.
        """
        # Run evaluation for multiple random requests.
        is_deterministic = np.random.choice([True, False], p=[0.10, 0.9])
        is_interpolated = bool(np.random.choice([True, False], p=[0.10, 0.9]))
        is_myopic = np.random.choice([True, False], p=[0.10, 0.9])
        max_draws = np.random.randint(11, 100)
        num_agents = np.random.randint(10, max_draws)

        bound_constr = {"max_draws": max_draws}
        point_constr = {
            "interpolation": {"flag": is_interpolated},
            "program": {"procs": 1, "threads": 1, "version": "python"},
            "estimation": {"maxfun": 0, "agents": num_agents},
            "simulation": {"agents": num_agents},
            "num_periods": np.random.randint(1, 5),
        }

        num_types = np.random.randint(2, 5)

        if is_interpolated:
            point_constr["num_periods"] = np.random.randint(3, 5)

        params_spec, options_spec = generate_random_model(
            bound_constr=bound_constr,
            point_constr=point_constr,
            deterministic=is_deterministic,
            myopic=is_myopic,
            num_types=num_types,
        )

        edu_spec = options_spec["edu_spec"]
        num_periods = point_constr["num_periods"]

        # The use of the interpolation routines is a another special case. Constructing
        #  a request that actually involves the use of the interpolation routine is a
        #  little involved as the number of interpolation points needs to be lower than
        #  the actual number of states. And to know the number of states each period, I
        #  need to construct the whole state space.
        if is_interpolated:
            state_space = StateSpace(
                num_periods, num_types, edu_spec["start"], edu_spec["max"]
            )

            max_states_period = state_space.states_per_period.max()

            options_spec["interpolation"]["points"] = np.random.randint(
                10, max_states_period
            )

        # Write out random components and interpolation grid to align the three
        # implementations.
        write_draws(num_periods, max_draws)
        respy_obj = RespyCls(params_spec, options_spec)
        write_interpolation_grid(respy_obj)

        type_shares = respy_obj.attr["optim_paras"]["type_shares"]

        write_types(type_shares, num_agents)
        write_edu_start(edu_spec, num_agents)
        write_lagged_start(num_agents)

        # Clean evaluations based on interpolation grid,
        base_val, base_data = None, None

        for version in ["python", "fortran"]:
            respy_obj = RespyCls(params_spec, options_spec)

            # Modify the version of the program for the different requests.
            respy_obj.unlock()
            respy_obj.set_attr("version", version)
            respy_obj.lock()

            # Solve the model
            respy_obj = simulate_observed(respy_obj)

            # This parts checks the equality of simulated dataset for the different
            # versions of the code.
            data_frame = pd.read_csv("data.respy.dat", delim_whitespace=True)

            if base_data is None:
                base_data = data_frame.copy()

            assert_frame_equal(base_data, data_frame)

            # This part checks the equality of an evaluation of the criterion function.
            _, crit_val = respy_obj.fit()

            if base_val is None:
                base_val = crit_val

            np.testing.assert_allclose(base_val, crit_val, rtol=1e-05, atol=1e-06)

            # We know even more for the deterministic case.
            if is_deterministic:
                assert crit_val in [-1.0, 0.0]
Ejemplo n.º 21
0
    def test_3(self):
        """ Testing some of the relationships in the simulated dataset.
        """
        is_deterministic = np.random.choice([True, False])
        is_myopic = np.random.choice([True, False])

        max_draws = np.random.randint(5, 200)
        bound_constr = {"max_draws": max_draws, "max_agents": max_draws}

        params_spec, options_spec = generate_random_model(
            bound_constr=bound_constr,
            deterministic=is_deterministic,
            myopic=is_myopic)

        respy_obj = RespyCls(params_spec, options_spec)
        _, df = respy_obj.simulate()

        optim_paras, num_types, edu_spec, num_periods = dist_class_attributes(
            respy_obj, "optim_paras", "num_types", "edu_spec", "num_periods")

        # We can back out the wage information from other information provided in the
        # simulated dataset.
        for choice in [1, 2]:
            cond = df["Choice"] == choice
            label_sys = "Systematic_Reward_{}".format(choice)
            label_sho = "Shock_Reward_{}".format(choice)
            label_gen = "General_Reward_{}".format(choice)
            label_com = "Common_Reward"
            df["Ex_Post_Reward"] = (df[label_sys] - df[label_gen] -
                                    df[label_com]) * df[label_sho]

            col_1 = df["Ex_Post_Reward"].loc[:, cond]
            col_2 = df["Wage"].loc[:, cond]
            np.testing.assert_array_almost_equal(col_1, col_2)

        # In the myopic case, the total reward should the equal to the ex post rewards.
        if is_myopic:
            # The shock only affects the skill-function and not the other components
            # determining the overall reward.
            for choice in [1, 2]:
                cond = df["Choice"] == choice

                label = "Ex_Post_Reward_{}".format(choice)
                label_gen = "General_Reward_{}".format(choice)
                label_com = "Common_Reward"
                label_wag = "Wage"

                df[label] = df[label_wag] + df[label_gen] + df[label_com]

                col_1 = df["Total_Reward_" + str(choice)].loc[:, cond]
                col_2 = df[label].loc[:, cond]

                np.testing.assert_array_almost_equal(col_1, col_2)

            for choice in [3, 4]:
                label = "Ex_Post_Reward_{}".format(choice)
                label_sys = "Systematic_Reward_{}".format(choice)
                label_sho = "Shock_Reward_{}".format(choice)

                df[label] = df[label_sys] * df[label_sho]
                df[label] = df[label_sys] + df[label_sho]

                # The equality does not hold if a state is inadmissible.
                cond = df["Years_Schooling"] != edu_spec["max"]

                col_1 = df["Total_Reward_" + str(choice)].loc[:, cond]
                col_2 = df[label].loc[:, cond]

                np.testing.assert_array_almost_equal(col_1, col_2)

        # If the model is deterministic, all shocks should be equal to zero. Of course,
        # one after exponentiation for wages.
        if is_deterministic:
            for i in range(1, 5):
                label = "Shock_Reward_{}".format(i)
                if i in [1, 2]:
                    cond = df[label] == 1
                else:
                    cond = df[label] == 0
                assert np.all(cond)
Ejemplo n.º 22
0
    def test_4(self):
        """ Testing the core functions of the solution step for the equality of results
        between the PYTHON and FORTRAN implementations.
        """
        params_spec, options_spec = generate_random_model()
        respy_obj = RespyCls(params_spec, options_spec)

        # Ensure that backward induction routines use the same grid for the
        # interpolation.
        write_interpolation_grid(respy_obj)

        # Extract class attributes
        (
            num_periods,
            edu_spec,
            optim_paras,
            num_draws_emax,
            seed_emax,
            is_debug,
            is_interpolated,
            num_points_interp,
            optimizer_options,
            file_sim,
            num_types,
        ) = dist_class_attributes(
            respy_obj,
            "num_periods",
            "edu_spec",
            "optim_paras",
            "num_draws_emax",
            "seed_emax",
            "is_debug",
            "is_interpolated",
            "num_points_interp",
            "optimizer_options",
            "file_sim",
            "num_types",
        )

        shocks_cholesky = optim_paras["shocks_cholesky"]
        coeffs_common = optim_paras["coeffs_common"]
        coeffs_home = optim_paras["coeffs_home"]
        coeffs_edu = optim_paras["coeffs_edu"]
        coeffs_a = optim_paras["coeffs_a"]
        coeffs_b = optim_paras["coeffs_b"]
        delta = optim_paras["delta"]

        type_spec_shifts = optim_paras["type_shifts"]
        type_spec_shares = optim_paras["type_shares"]

        min_idx = edu_spec["max"] + 1

        # Check the state space creation.
        state_space = StateSpace(num_periods, num_types, edu_spec["start"],
                                 edu_spec["max"], optim_paras)

        states_all, mapping_state_idx, _, _ = state_space._get_fortran_counterparts(
        )

        pyth = (
            states_all,
            state_space.states_per_period,
            mapping_state_idx,
            state_space.states_per_period.max(),
        )

        f2py = fort_debug.wrapper_create_state_space(num_periods, num_types,
                                                     edu_spec["start"],
                                                     edu_spec["max"], min_idx)
        for i in range(4):
            # Slice Fortran output to shape of Python output.
            if isinstance(f2py[i], np.ndarray):
                f2py_reduced = f2py[i][tuple(map(slice, pyth[i].shape))]
            else:
                f2py_reduced = f2py[i]

            assert_allclose(pyth[i], f2py_reduced)

        _, _, pyth, _ = state_space._get_fortran_counterparts()

        f2py = fort_debug.wrapper_calculate_rewards_systematic(
            num_periods,
            state_space.states_per_period,
            states_all,
            state_space.states_per_period.max(),
            coeffs_common,
            coeffs_a,
            coeffs_b,
            coeffs_edu,
            coeffs_home,
            type_spec_shares,
            type_spec_shifts,
        )

        assert_allclose(pyth, f2py)

        # Carry some results from the systematic rewards calculation for future use and
        # create the required set of disturbances.
        periods_draws_emax = create_draws(num_periods, num_draws_emax,
                                          seed_emax, is_debug)

        # Save result for next test.
        periods_rewards_systematic = pyth.copy()

        # Fix for hardcoded myopic agents.
        optim_paras["delta"] = 0.00000000000000001

        # Check backward induction procedure.
        state_space = pyth_backward_induction(
            periods_draws_emax,
            state_space,
            is_debug,
            is_interpolated,
            num_points_interp,
            optim_paras,
            file_sim,
            False,
        )
        _, _, _, pyth = state_space._get_fortran_counterparts()

        f2py = fort_debug.wrapper_backward_induction(
            num_periods,
            False,
            state_space.states_per_period.max(),
            periods_draws_emax,
            num_draws_emax,
            state_space.states_per_period,
            periods_rewards_systematic,
            mapping_state_idx,
            states_all,
            is_debug,
            is_interpolated,
            num_points_interp,
            edu_spec["start"],
            edu_spec["max"],
            shocks_cholesky,
            delta,
            coeffs_common,
            coeffs_a,
            coeffs_b,
            file_sim,
            False,
        )

        assert_allclose(pyth, f2py)
Ejemplo n.º 23
0
    def test_4(self):
        """ Testing the return values for the total values in case of myopic
        individuals for one period.

        Note
        ----
        The original test was designed to use Fortran rewards and calculate the total
        values and rewards ex post in Python and see whether they match. As both
        versions diverged in their implementation, we will implement the test with the
        Python version and check the equality of Fortran and Python outputs at all
        stages.

        """

        constr = {"edu_spec": {"max": 99}}
        params_spec, options_spec = generate_random_model(myopic=True,
                                                          point_constr=constr)

        # The equality below does not hold if schooling is an inadmissible state.
        respy_obj = RespyCls(params_spec, options_spec)
        respy_obj, _ = respy_obj.simulate()

        (
            num_periods,
            num_types,
            optim_paras,
            edu_spec,
            mapping_state_idx,
            periods_emax,
            states_all,
            periods_rewards_systematic,
            states_number_period,
        ) = dist_class_attributes(
            respy_obj,
            "num_periods",
            "num_types",
            "optim_paras",
            "edu_spec",
            "mapping_state_idx",
            "periods_emax",
            "states_all",
            "periods_rewards_systematic",
            "states_number_period",
        )

        # We have to create the state space and calculate the rewards in the Python
        # version as we later need the wages which are not part of
        # ``periods_rewards_systematic``.
        state_space = StateSpace(num_periods, num_types, edu_spec["start"],
                                 edu_spec["max"], optim_paras)

        # Check that rewards match
        _, _, pyth, _ = state_space._get_fortran_counterparts()

        # Set NaNs to -99.
        mask = np.isnan(periods_rewards_systematic)
        periods_rewards_systematic[mask] = MISSING_FLOAT

        assert_almost_equal(pyth, periods_rewards_systematic)

        period = np.random.choice(num_periods)
        draws = np.random.normal(size=4)

        # Internalize periods_emax
        state_space._create_attributes_from_fortran_counterparts(periods_emax)

        # Unpack necessary attributes
        rewards_period = state_space.get_attribute_from_period(
            "rewards", period)
        emaxs_period = state_space.get_attribute_from_period("emaxs",
                                                             period)[:, :4]
        max_education_period = (state_space.get_attribute_from_period(
            "states", period)[:, 3] >= edu_spec["max"])

        total_values, rewards_ex_post = get_continuation_value_and_ex_post_rewards(
            rewards_period[:, -2:],
            rewards_period[:, :4],
            emaxs_period,
            draws.reshape(1, -1),
            optim_paras["delta"],
            max_education_period,
        )

        np.testing.assert_equal(total_values, rewards_ex_post)
Ejemplo n.º 24
0
    def test_5(self):
        """ This methods ensures that the core functions yield the same results across
        implementations.
        """
        params_spec, options_spec = generate_random_model()
        respy_obj = RespyCls(params_spec, options_spec)

        # Ensure that backward induction routines use the same grid for the
        # interpolation.
        max_states_period = write_interpolation_grid(respy_obj)

        # Extract class attributes
        (
            num_periods,
            edu_spec,
            optim_paras,
            num_draws_emax,
            is_debug,
            is_interpolated,
            num_points_interp,
            is_myopic,
            num_agents_sim,
            num_draws_prob,
            tau,
            seed_sim,
            num_agents_est,
            optimizer_options,
            file_sim,
            num_types,
            num_paras,
        ) = dist_class_attributes(
            respy_obj,
            "num_periods",
            "edu_spec",
            "optim_paras",
            "num_draws_emax",
            "is_debug",
            "is_interpolated",
            "num_points_interp",
            "is_myopic",
            "num_agents_sim",
            "num_draws_prob",
            "tau",
            "seed_sim",
            "num_agents_est",
            "optimizer_options",
            "file_sim",
            "num_types",
            "num_paras",
        )

        min_idx = edu_spec["max"] + 1
        shocks_cholesky = optim_paras["shocks_cholesky"]
        coeffs_common = optim_paras["coeffs_common"]
        coeffs_home = optim_paras["coeffs_home"]
        coeffs_edu = optim_paras["coeffs_edu"]
        coeffs_a = optim_paras["coeffs_a"]
        coeffs_b = optim_paras["coeffs_b"]
        delta = optim_paras["delta"]

        type_spec_shares = optim_paras["type_shares"]
        type_spec_shifts = optim_paras["type_shifts"]

        # Write out random components and interpolation grid to align the three
        # implementations.
        max_draws = max(num_agents_sim, num_draws_emax, num_draws_prob)
        write_types(type_spec_shares, num_agents_sim)
        write_edu_start(edu_spec, num_agents_sim)
        write_draws(num_periods, max_draws)
        write_lagged_start(num_agents_sim)

        # It is critical that the model is simulated after all files have been written
        # to the disk because they are picked up in the subroutines.
        respy_obj = simulate_observed(respy_obj)

        periods_draws_emax = read_draws(num_periods, num_draws_emax)
        periods_draws_prob = read_draws(num_periods, num_draws_prob)
        periods_draws_sims = read_draws(num_periods, num_agents_sim)

        fort, _ = resfort_interface(respy_obj, "simulate")

        state_space = pyth_solve(
            is_interpolated,
            num_points_interp,
            num_periods,
            is_debug,
            periods_draws_emax,
            edu_spec,
            optim_paras,
            file_sim,
            num_types,
        )

        (
            states_all,
            mapping_state_idx,
            periods_rewards_systematic,
            periods_emax,
        ) = state_space._get_fortran_counterparts()

        py = (
            periods_rewards_systematic,
            state_space.states_per_period,
            mapping_state_idx,
            periods_emax,
            states_all,
        )

        f2py = fort_debug.wrapper_solve(
            is_interpolated,
            num_points_interp,
            num_draws_emax,
            num_periods,
            is_myopic,
            is_debug,
            periods_draws_emax,
            min_idx,
            edu_spec["start"],
            edu_spec["max"],
            coeffs_common,
            coeffs_a,
            coeffs_b,
            coeffs_edu,
            coeffs_home,
            shocks_cholesky,
            delta,
            file_sim,
            max_states_period,
            num_types,
            type_spec_shares,
            type_spec_shifts,
        )

        assert_allclose(py[0], fort[0])
        assert_allclose(py[1], fort[1])
        assert_allclose(py[2], fort[2])
        assert_allclose(py[3], fort[3])
        assert_allclose(py[4], fort[4])

        assert_allclose(py[0], f2py[0])
        assert_allclose(py[1], f2py[1])
        assert_allclose(py[2], f2py[2])
        assert_allclose(py[3], f2py[3])
        assert_allclose(py[4], f2py[4])

        (
            states_all,
            mapping_state_idx,
            periods_rewards_systematic,
            periods_emax,
        ) = state_space._get_fortran_counterparts()

        simulated_data = pyth_simulate(
            state_space,
            num_agents_sim,
            periods_draws_sims,
            seed_sim,
            file_sim,
            edu_spec,
            optim_paras,
            is_debug,
        )
        py = simulated_data.copy().fillna(MISSING_FLOAT).values

        data_array = process_dataset(respy_obj).to_numpy()

        # Is is very important to cut the data array down to the size of the estimation
        # sample for the calculation of contributions.
        data_array = py[:num_agents_est * num_periods, :]

        f2py = fort_debug.wrapper_simulate(
            periods_rewards_systematic,
            mapping_state_idx,
            periods_emax,
            states_all,
            num_periods,
            num_agents_sim,
            periods_draws_sims,
            seed_sim,
            file_sim,
            edu_spec["start"],
            edu_spec["max"],
            edu_spec["share"],
            edu_spec["lagged"],
            optim_paras["coeffs_common"],
            optim_paras["coeffs_a"],
            optim_paras["coeffs_b"],
            shocks_cholesky,
            delta,
            num_types,
            type_spec_shares,
            type_spec_shifts,
            is_debug,
        )
        assert_allclose(py, f2py)

        # We have to cut the simulated data to `num_agents_est` as the Python
        # implementation calculates the likelihood contributions for all agents in the
        # data.
        simulated_data = simulated_data.loc[simulated_data.Identifier.lt(
            num_agents_est)]

        py = pyth_contributions(state_space, simulated_data,
                                periods_draws_prob, tau, optim_paras)

        num_obs_agent = np.bincount(simulated_data.Identifier.to_numpy())

        f2py = fort_debug.wrapper_contributions(
            periods_rewards_systematic,
            mapping_state_idx,
            periods_emax,
            states_all,
            data_array,
            periods_draws_prob,
            tau,
            num_periods,
            num_draws_prob,
            num_agents_est,
            num_obs_agent,
            num_types,
            edu_spec["start"],
            edu_spec["max"],
            shocks_cholesky,
            delta,
            type_spec_shares,
            type_spec_shifts,
        )

        assert_allclose(py, f2py)

        # Evaluation of criterion function
        x0 = get_optim_paras(optim_paras, num_paras, "all", is_debug)

        py = pyth_criterion(
            x0,
            is_interpolated,
            num_points_interp,
            is_debug,
            simulated_data,
            tau,
            periods_draws_emax,
            periods_draws_prob,
            state_space,
        )

        f2py = fort_debug.wrapper_criterion(
            x0,
            is_interpolated,
            num_draws_emax,
            num_periods,
            num_points_interp,
            is_myopic,
            is_debug,
            data_array,
            num_draws_prob,
            tau,
            periods_draws_emax,
            periods_draws_prob,
            states_all,
            state_space.states_per_period,
            mapping_state_idx,
            max_states_period,
            num_agents_est,
            num_obs_agent,
            num_types,
            edu_spec["start"],
            edu_spec["max"],
            edu_spec["share"],
            type_spec_shares,
            type_spec_shifts,
            num_paras,
        )

        assert_allclose(py, f2py)
Ejemplo n.º 25
0
    def test_10(self):
        """ This test ensures that the order of the initial schooling level specified in
        the initialization files does not matter for the simulation of a dataset and
        subsequent evaluation of the criterion function.

        Warning
        -------
        This test fails if types have the identical intercept as no unique ordering is
        determined than.

        """
        point_constr = {
            "estimation": {
                "maxfun": 0
            },
            # We cannot allow for interpolation as the order of states within each
            # period changes and thus the prediction model is altered even if the same
            # state identifier is used.
            "interpolation": {
                "flag": False
            },
        }

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr)

        respy_obj = RespyCls(params_spec, options_spec)

        edu_baseline_spec, num_types, num_paras, optim_paras = dist_class_attributes(
            respy_obj, "edu_spec", "num_types", "num_paras", "optim_paras")

        # We want to randomly shuffle the list of initial schooling but need to maintain
        # the order of the shares.
        edu_shuffled_start = np.random.permutation(
            edu_baseline_spec["start"]).tolist()

        edu_shuffled_share, edu_shuffled_lagged = [], []
        for start in edu_shuffled_start:
            idx = edu_baseline_spec["start"].index(start)
            edu_shuffled_lagged += [edu_baseline_spec["lagged"][idx]]
            edu_shuffled_share += [edu_baseline_spec["share"][idx]]

        edu_shuffled_spec = copy.deepcopy(edu_baseline_spec)
        edu_shuffled_spec["lagged"] = edu_shuffled_lagged
        edu_shuffled_spec["start"] = edu_shuffled_start
        edu_shuffled_spec["share"] = edu_shuffled_share

        # We are only looking at a single evaluation as otherwise the reordering affects
        # the optimizer that is trying better parameter values one-by-one. The
        # reordering might also violate the bounds.
        for i in range(53, num_paras):
            optim_paras["paras_bounds"][i] = [None, None]
            optim_paras["paras_fixed"][i] = False

        # We need to ensure that the baseline type is still in the first position.
        types_order = [0] + np.random.permutation(range(1, num_types)).tolist()

        type_shares = []
        for i in range(num_types):
            lower, upper = i * 2, (i + 1) * 2
            type_shares += [optim_paras["type_shares"][lower:upper].tolist()]

        optim_paras_baseline = copy.deepcopy(optim_paras)
        optim_paras_shuffled = copy.deepcopy(optim_paras)

        list_ = [
            optim_paras["type_shifts"][i, :].tolist() for i in types_order
        ]
        optim_paras_shuffled["type_shifts"] = np.array(list_)

        list_ = [type_shares[i] for i in types_order]
        optim_paras_shuffled["type_shares"] = np.array(list_).flatten()

        base_data, base_val = None, None

        k = 0

        for optim_paras in [optim_paras_baseline, optim_paras_shuffled]:
            for edu_spec in [edu_baseline_spec, edu_shuffled_spec]:

                respy_obj.unlock()
                respy_obj.set_attr("edu_spec", edu_spec)
                respy_obj.lock()

                # There is some more work to do to update the coefficients as we
                # distinguish between the economic and optimization version of the
                # parameters.
                x = get_optim_paras(optim_paras, num_paras, "all", True)
                shocks_cholesky, _ = extract_cholesky(x)
                shocks_coeffs = cholesky_to_coeffs(shocks_cholesky)
                x[43:53] = shocks_coeffs
                respy_obj.update_optim_paras(x)

                respy_obj.reset()

                simulate_observed(respy_obj)

                # This part checks the equality of simulated dataset.
                data_frame = pd.read_csv("data.respy.dat",
                                         delim_whitespace=True)

                if base_data is None:
                    base_data = data_frame.copy()

                assert_frame_equal(base_data, data_frame)

                # This part checks the equality of a single function evaluation.
                _, val = respy_obj.fit()
                if base_val is None:
                    base_val = val
                np.testing.assert_almost_equal(base_val, val)

                respy_obj.reset()
                k += 1
Ejemplo n.º 26
0
 def test_1(self):
     """Test if random model specifications can be simulated and processed."""
     params_spec, options_spec = generate_random_model()
     respy_obj = RespyCls(params_spec, options_spec)
     simulate_observed(respy_obj)
     process_dataset(respy_obj)
Ejemplo n.º 27
0
    def test_8(self):
        """ We ensure that the number of initial conditions does not matter for the
        evaluation of the criterion function if a weight of one is put on the first
        group.
        """
        num_agents = np.random.randint(5, 100)
        constr = {
            "simulation": {
                "agents": num_agents
            },
            "num_periods": np.random.randint(1, 4),
            "edu_spec": {
                "max": np.random.randint(15, 25, size=1).tolist()[0]
            },
            "estimation": {
                "maxfun": 0,
                "agents": num_agents
            },
            "interpolation": {
                "flag": False
            },
        }

        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)
        simulate_observed(respy_obj)

        base_val, edu_start_base = (None,
                                    np.random.randint(1, 5,
                                                      size=1).tolist()[0])

        # We need to ensure that the initial lagged activity always has the same
        # distribution.
        edu_lagged_base = np.random.uniform(size=5).tolist()

        for num_edu_start in [1, np.random.choice([2, 3, 4]).tolist()]:

            # We always need to ensure that a weight of one is on the first level of
            # initial schooling.
            options_spec["edu_spec"]["share"] = [
                1.0
            ] + [0.0] * (num_edu_start - 1)
            options_spec["edu_spec"][
                "lagged"] = edu_lagged_base[:num_edu_start]

            # We need to make sure that the baseline level of initial schooling is
            # always included. At the same time we cannot have any duplicates.
            edu_start = np.random.choice(range(1, 10),
                                         size=num_edu_start,
                                         replace=False).tolist()
            if edu_start_base in edu_start:
                edu_start.remove(edu_start_base)
                edu_start.insert(0, edu_start_base)
            else:
                edu_start[0] = edu_start_base

            options_spec["edu_spec"]["start"] = edu_start

            respy_obj = RespyCls(params_spec, options_spec)
            simulate_observed(respy_obj)
            _, val = respy_obj.fit()
            if base_val is None:
                base_val = val

            np.testing.assert_almost_equal(base_val, val)
Ejemplo n.º 28
0
    def test_7(self):
        """ This test ensures that the constraints for the covariance matrix are
        properly handled.
        """

        params_spec, options_spec = generate_random_model(deterministic=True)

        # Manual specification of update patterns.
        updates = {}

        # off-diagonals fixed
        updates["valid_1"] = [
            False,
            True,
            False,
            True,
            True,
            False,
            True,
            True,
            True,
            False,
        ]
        updates["valid_2"] = [False] * 10
        updates["valid_3"] = [True] * 10

        updates["invalid_1"] = [
            False,
            False,
            True,
            True,
            False,
            True,
            True,
            False,
            True,
            False,
        ]
        updates["invalid_2"] = [
            False,
            False,
            False,
            True,
            False,
            False,
            True,
            False,
            True,
            False,
        ]

        # We draw a random update and print it out to the initialization file.
        label = np.random.choice(list(updates.keys()))
        params_spec.loc["shocks", "fixed"] = np.array(updates[label])

        if "invalid" in label:
            # This exception block makes sure that the UserError is in fact raised.
            try:
                RespyCls(params_spec, options_spec)
                raise AssertionError
            except UserError:
                pass
        else:
            RespyCls(params_spec, options_spec)
Ejemplo n.º 29
0
    def test_1(self):
        """ Compare the evaluation of the criterion function for the ambiguity
        optimization and the simulated expected future value between the FORTRAN and
        PYTHON implementations. These tests are set up a separate test case due to the
        large setup cost to construct the ingredients for the interface.
        """
        # Generate constraint periods
        constr = {"program": {"version": "python"}}
        # Generate random initialization file
        params_spec, options_spec = generate_random_model(point_constr=constr)
        respy_obj = RespyCls(params_spec, options_spec)
        respy_obj = simulate_observed(respy_obj)

        # Extract class attributes
        (
            state_space,
            states_all,
            mapping_state_idx,
            periods_rewards_systematic,
            periods_emax,
            num_periods,
            num_draws_emax,
            edu_spec,
            optim_paras,
            num_types,
        ) = dist_class_attributes(
            respy_obj,
            "state_space",
            "states_all",
            "mapping_state_idx",
            "periods_rewards_systematic",
            "periods_emax",
            "num_periods",
            "num_draws_emax",
            "edu_spec",
            "optim_paras",
            "num_types",
        )

        # Sample draws
        draws_emax_standard = np.random.multivariate_normal(
            np.zeros(4), np.identity(4), num_draws_emax)
        draws_emax_risk = transform_disturbances(
            draws_emax_standard, np.zeros(4), optim_paras["shocks_cholesky"])

        # Sampling of random period and admissible state index
        period = np.random.choice(range(num_periods))
        k = np.random.choice(range(state_space.states_per_period[period]))

        # Select systematic rewards
        rewards_systematic = periods_rewards_systematic[period, k, :]

        # Evaluation of simulated expected future values. Limit to one individual as the
        # Fortran version.
        rewards_period = state_space.get_attribute_from_period(
            "rewards", period)[k]
        emaxs_period = state_space.get_attribute_from_period("emaxs",
                                                             period)[k, :4]
        max_education_period = (state_space.get_attribute_from_period(
            "states", period)[k, 3] >= edu_spec["max"])

        py = construct_emax_risk(
            rewards_period[-2:],
            rewards_period[:4],
            emaxs_period,
            draws_emax_risk,
            optim_paras["delta"],
            max_education_period,
        )

        f90 = fort_debug.wrapper_construct_emax_risk(
            num_periods,
            num_draws_emax,
            period,
            k,
            draws_emax_risk,
            rewards_systematic,
            periods_emax,
            states_all,
            mapping_state_idx,
            edu_spec["start"],
            edu_spec["max"],
            optim_paras["delta"],
            optim_paras["coeffs_common"],
            optim_paras["coeffs_a"],
            optim_paras["coeffs_b"],
            num_types,
        )

        assert_allclose(py, f90)
Ejemplo n.º 30
0
    def test_6(self):
        """ Further tests for the interpolation routines.
        """
        params_spec, options_spec = generate_random_model()
        respy_obj = RespyCls(params_spec, options_spec)
        respy_obj = simulate_observed(respy_obj)

        # Extract class attributes
        (
            periods_rewards_systematic,
            mapping_state_idx,
            seed_prob,
            periods_emax,
            num_periods,
            states_all,
            num_points_interp,
            edu_spec,
            num_draws_emax,
            is_myopic,
            is_debug,
            is_interpolated,
            optim_paras,
            optimizer_options,
            file_sim,
            num_types,
        ) = dist_class_attributes(
            respy_obj,
            "periods_rewards_systematic",
            "mapping_state_idx",
            "seed_prob",
            "periods_emax",
            "num_periods",
            "states_all",
            "num_points_interp",
            "edu_spec",
            "num_draws_emax",
            "is_myopic",
            "is_debug",
            "is_interpolated",
            "optim_paras",
            "optimizer_options",
            "file_sim",
            "num_types",
        )

        shocks_cholesky = optim_paras["shocks_cholesky"]
        shocks_cov = shocks_cholesky.dot(shocks_cholesky.T)
        coeffs_common = optim_paras["coeffs_common"]
        coeffs_a = optim_paras["coeffs_a"]
        coeffs_b = optim_paras["coeffs_b"]
        delta = optim_paras["delta"]

        # Add some additional objects required for the interfaces to the functions.
        period = np.random.choice(num_periods)

        periods_draws_emax = create_draws(num_periods, num_draws_emax,
                                          seed_prob, is_debug)

        draws_emax_standard = periods_draws_emax[period, :, :]

        draws_emax_risk = transform_disturbances(draws_emax_standard,
                                                 np.zeros(4), shocks_cholesky)

        # Initialize Python version and solve.
        state_space = StateSpace(num_periods, num_types, edu_spec["start"],
                                 edu_spec["max"], optim_paras)

        # Integrate periods_emax in state_space
        state_space.emaxs = np.column_stack((
            np.zeros((state_space.num_states, 4)),
            periods_emax[~np.isnan(periods_emax)
                         & (periods_emax != MISSING_FLOAT)],
        ))

        # Fill emaxs_a - emaxs_home in the requested period
        states_period = state_space.get_attribute_from_period("states", period)

        # Do not get the emaxs from the previous period if we are in the last one.
        if period != state_space.num_periods - 1:
            state_space.emaxs = get_emaxs_of_subsequent_period(
                states_period, state_space.indexer, state_space.emaxs,
                edu_spec["max"])

        num_states = state_space.states_per_period[period]

        shifts = np.random.randn(4)

        # Slight modification of request which assures that the interpolation code is
        # working.
        num_points_interp = min(num_points_interp, num_states)

        # Get the IS_SIMULATED indicator for the subset of points which are used for the
        # predication model.
        is_simulated = get_simulated_indicator(num_points_interp, num_states,
                                               period, is_debug)

        # Unpack necessary attributes
        rewards_period = state_space.get_attribute_from_period(
            "rewards", period)
        emaxs_period = state_space.get_attribute_from_period("emaxs",
                                                             period)[:, :4]
        max_education = (state_space.get_attribute_from_period(
            "states", period)[:, 3] >= edu_spec["max"])

        # Construct the exogenous variables for all points of the state space.
        exogenous, max_emax = get_exogenous_variables(rewards_period,
                                                      emaxs_period, shifts,
                                                      optim_paras["delta"],
                                                      max_education)

        # Align output between Python and Fortran version.
        py = (exogenous, max_emax)

        f90 = fort_debug.wrapper_get_exogenous_variables(
            period,
            num_periods,
            num_states,
            periods_rewards_systematic,
            shifts,
            mapping_state_idx,
            periods_emax,
            states_all,
            edu_spec["start"],
            edu_spec["max"],
            delta,
            coeffs_common,
            coeffs_a,
            coeffs_b,
            num_types,
        )

        assert_almost_equal(py[0], f90[0])
        assert_almost_equal(py[1], f90[1])

        # Construct endogenous variable so that the prediction model can be fitted.
        endogenous = get_endogenous_variable(
            rewards_period,
            emaxs_period,
            max_emax,
            is_simulated,
            draws_emax_risk,
            optim_paras["delta"],
            max_education,
        )

        f90 = fort_debug.wrapper_get_endogenous_variable(
            period,
            num_periods,
            num_states,
            periods_rewards_systematic,
            mapping_state_idx,
            periods_emax,
            states_all,
            is_simulated,
            num_draws_emax,
            max_emax,
            draws_emax_risk,
            edu_spec["start"],
            edu_spec["max"],
            shocks_cov,
            delta,
            coeffs_common,
            coeffs_a,
            coeffs_b,
        )
        assert_almost_equal(endogenous, replace_missing_values(f90))

        py = get_predictions(endogenous, exogenous, max_emax, is_simulated)

        f90 = fort_debug.wrapper_get_predictions(
            endogenous,
            exogenous,
            max_emax,
            is_simulated,
            num_points_interp,
            num_states,
            file_sim,
            False,
        )

        # This assertion fails if a column is all zeros.
        if not exogenous.any(axis=0).any():
            assert_array_almost_equal(py, f90)