Ejemplo n.º 1
0
    def test_12(self):
        """ Testing the functionality introduced to ensure that the simulation is
        independent of the order of initial conditions and types in the initialization
        file.

        """
        num_elements = np.random.randint(1, 11)

        input_array = np.random.normal(size=num_elements)

        # We first check the sorting implementation.
        py = sorted(input_array)
        f90 = fort_debug.wrapper_sorted(input_array, num_elements)
        assert_equal(py, f90)

        params_spec, options_spec = generate_random_model()
        respy_obj = RespyCls(params_spec, options_spec)

        edu_spec, optim_paras, num_types = dist_class_attributes(
            respy_obj, "edu_spec", "optim_paras", "num_types")

        args = (edu_spec["start"], edu_spec["share"], edu_spec["max"])
        f90 = fort_debug.wrapper_sort_edu_spec(*args)
        py = sort_edu_spec(edu_spec)
        for i, label in enumerate(["start", "share", "max"]):
            assert_equal(py[label], f90[i])

        py = sort_type_info(optim_paras, num_types)
        f90 = fort_debug.wrapper_sort_type_info(optim_paras["type_shares"],
                                                num_types)
        for i, label in enumerate(["order", "shares"]):
            assert_equal(py[label], f90[i])
Ejemplo n.º 2
0
    def test_2(self):
        """ This test compares the results from a solution using the interpolation code
        for the special case where the number of interpolation points is exactly the
        number of states in the final period. In this case the interpolation code is run
        and then all predicted values replaced with their actual values.
        """
        # Set initial constraints
        # Set initial constraints
        constr = {"interpolation": {"flag": False}}

        params_spec, options_spec = generate_random_model(point_constr=constr,
                                                          deterministic=True)
        baseline = None

        # Solve with and without interpolation code
        for _ in range(2):
            # Process and solve
            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)

            # Extract class attributes
            states_number_period, periods_emax = dist_class_attributes(
                respy_obj, "states_number_period", "periods_emax")

            # Store and check results
            if baseline is None:
                baseline = periods_emax
            else:
                np.testing.assert_array_almost_equal(baseline, periods_emax)

            # Updates for second iteration
            options_spec["interpolation"]["points"] = max(states_number_period)
            options_spec["interpolation"]["flag"] = True
Ejemplo n.º 3
0
    def test_1(self):
        """  Compare results from the RESTUD program and the RESPY package.
        """
        # Impose some constraints on the initialization file which ensures that
        # the problem can be solved by the RESTUD code. The code is adjusted to
        # run with zero draws.
        constraints = dict()
        constraints['edu'] = (10, 20)
        constraints['is_deterministic'] = True

        # Generate random initialization file. The RESTUD code uses the same
        # random draws for the solution and simulation of the model. Thus,
        # the number of draws is required to be less or equal to the number
        # of agents.
        init_dict = generate_random_dict(constraints)

        num_agents_sim = init_dict['SIMULATION']['agents']
        num_draws_emax = init_dict['SOLUTION']['draws']
        if num_draws_emax < num_agents_sim:
            init_dict['SOLUTION']['draws'] = num_agents_sim

        print_init_dict(init_dict)

        # Indicate RESTUD code the special case of zero disturbance.
        open('.restud.testing.scratch', 'a').close()

        # Perform toolbox actions
        respy_obj = RespyCls('test.respy.ini')

        # This flag aligns the random components between the RESTUD program and
        # RESPY package. The existence of the file leads to the RESTUD program
        # to write out the random components.
        model_paras, edu_start, edu_max, num_agents_sim, num_periods, \
            num_draws_emax, delta = \
                dist_class_attributes(respy_obj,
                    'model_paras', 'edu_start', 'edu_max', 'num_agents_sim',
                    'num_periods', 'num_draws_emax', 'delta')

        transform_respy_to_restud(model_paras, edu_start, edu_max,
                                  num_agents_sim, num_periods, num_draws_emax,
                                  delta)

        # Solve model using RESTUD code.
        cmd = TEST_RESOURCES_DIR + '/kw_dp3asim'
        subprocess.check_call(cmd, shell=True)

        # Solve model using RESPY package.
        simulate(respy_obj)

        # Compare the simulated datasets generated by the programs.
        py = pd.DataFrame(
            np.array(np.genfromtxt('data.respy.dat', missing_values='.'),
                     ndmin=2)[:, -4:])

        fort = pd.DataFrame(
            np.array(np.genfromtxt('ftest.txt', missing_values='.'),
                     ndmin=2)[:, -4:])

        assert_frame_equal(py, fort)
Ejemplo n.º 4
0
    def test_7(self):
        """ This is a special test for auxiliary functions related to the
        interpolation setup.
        """
        # Impose constraints
        constr = dict()
        constr['periods'] = np.random.randint(2, 5)

        # Construct a random initialization file
        generate_init(constr)

        # Extract required information
        respy_obj = RespyCls('test.respy.ini')

        # Extract class attributes
        is_debug, num_periods = dist_class_attributes(respy_obj, 'is_debug',
                                                      'num_periods')

        # Write out a grid for the interpolation
        max_states_period = write_interpolation_grid('test.respy.ini')

        # Draw random request for testing
        num_states = np.random.randint(1, max_states_period)
        candidates = list(range(num_states))

        period = np.random.randint(1, num_periods)
        num_points_interp = np.random.randint(1, num_states + 1)

        # Check function for random choice and make sure that there are no
        # duplicates.
        f90 = fort_debug.wrapper_random_choice(candidates, num_states,
                                               num_points_interp)
        np.testing.assert_equal(len(set(f90)), len(f90))
        np.testing.assert_equal(len(f90), num_points_interp)

        # Check the standard cases of the function.
        args = (num_points_interp, num_states, period, is_debug, num_periods)
        f90 = fort_debug.wrapper_get_simulated_indicator(*args)

        np.testing.assert_equal(len(f90), num_states)
        np.testing.assert_equal(np.all(f90) in [0, 1], True)

        # Test the standardization across PYTHON, F2PY, and FORTRAN
        # implementations. This is possible as we write out an interpolation
        # grid to disk which is used for both functions.
        base_args = (num_points_interp, num_states, period, is_debug)
        args = base_args
        py = get_simulated_indicator(*args)
        args = base_args + (num_periods, )
        f90 = fort_debug.wrapper_get_simulated_indicator(*args)
        np.testing.assert_array_equal(f90, 1 * py)
        os.unlink('interpolation.txt')

        # Special case where number of interpolation points are same as the
        # number of candidates. In that case the returned indicator
        # should be all TRUE.
        args = (num_states, num_states, period, True, num_periods)
        f90 = fort_debug.wrapper_get_simulated_indicator(*args)
        np.testing.assert_equal(sum(f90), num_states)
Ejemplo n.º 5
0
    def test_7(self):
        """ This is a special test for auxiliary functions related to the
        interpolation setup.
        """
        # Impose constraints
        constr = dict()
        constr['periods'] = np.random.randint(2, 5)

        # Construct a random initialization file
        generate_init(constr)

        # Extract required information
        respy_obj = RespyCls('test.respy.ini')

        # Extract class attributes
        is_debug, num_periods = dist_class_attributes(respy_obj,
                'is_debug', 'num_periods')

        # Write out a grid for the interpolation
        max_states_period = write_interpolation_grid('test.respy.ini')

        # Draw random request for testing
        num_states = np.random.randint(1, max_states_period)
        candidates = list(range(num_states))

        period = np.random.randint(1, num_periods)
        num_points_interp = np.random.randint(1, num_states + 1)

        # Check function for random choice and make sure that there are no
        # duplicates.
        f90 = fort_debug.wrapper_random_choice(candidates, num_states, num_points_interp)
        np.testing.assert_equal(len(set(f90)), len(f90))
        np.testing.assert_equal(len(f90), num_points_interp)

        # Check the standard cases of the function.
        args = (num_points_interp, num_states, period, is_debug, num_periods)
        f90 = fort_debug.wrapper_get_simulated_indicator(*args)

        np.testing.assert_equal(len(f90), num_states)
        np.testing.assert_equal(np.all(f90) in [0, 1], True)

        # Test the standardization across PYTHON, F2PY, and FORTRAN
        # implementations. This is possible as we write out an interpolation
        # grid to disk which is used for both functions.
        base_args = (num_points_interp, num_states, period, is_debug)
        args = base_args
        py = get_simulated_indicator(*args)
        args = base_args + (num_periods, )
        f90 = fort_debug.wrapper_get_simulated_indicator(*args)
        np.testing.assert_array_equal(f90, 1*py)
        os.unlink('interpolation.txt')

        # Special case where number of interpolation points are same as the
        # number of candidates. In that case the returned indicator
        # should be all TRUE.
        args = (num_states, num_states, period, True, num_periods)
        f90 = fort_debug.wrapper_get_simulated_indicator(*args)
        np.testing.assert_equal(sum(f90), num_states)
Ejemplo n.º 6
0
def simulate_observed(respy_obj, is_missings=True):
    """ This function adds two important features of observed datasests: (1) missing
    observations and missing wage information.
    """
    def drop_agents_obs(agent):
        """ We now determine the exact period from which onward the history is truncated
        and cut the simulated dataset down to size.
        """
        start_truncation = np.random.choice(range(1,
                                                  agent["Period"].max() + 2))
        agent = agent[agent["Period"] < start_truncation]
        return agent

    seed_sim = dist_class_attributes(respy_obj, "seed_sim")

    respy_obj.simulate()

    # It is important to set the seed after the simulation call. Otherwise, the value of
    # the seed differs due to the different implementations of the PYTHON and FORTRAN
    # programs.
    np.random.seed(seed_sim)

    # We read in the baseline simulated dataset.
    data_frame = pd.read_csv(
        "data.respy.dat",
        delim_whitespace=True,
        header=0,
        na_values=".",
        dtype=DATA_FORMATS_SIM,
        names=DATA_LABELS_SIM,
    )

    if is_missings:
        # We truncate the histories of agents. This mimics the frequent empirical fact
        # that we loose track of more and more agents over time.
        data_subset = data_frame.groupby("Identifier").apply(drop_agents_obs)

        # We also want to drop the some wage observations. Note that we might be dealing
        # with a dataset where nobody is working anyway.
        is_working = data_subset["Choice"].isin([1, 2])
        num_drop_wages = int(
            np.sum(is_working) * np.random.uniform(high=0.5, size=1))
        if num_drop_wages > 0:
            indices = data_subset["Wage"][is_working].index
            index_missing = np.random.choice(indices, num_drop_wages, False)
            data_subset.loc[index_missing, "Wage"] = None
        else:
            pass
    else:
        data_subset = data_frame

    # We can restrict the information to observed entities only.
    data_subset = data_subset[DATA_LABELS_EST]
    write_out(respy_obj, data_subset)

    return respy_obj
Ejemplo n.º 7
0
    def test_2(self):
        """Ensure that the evaluation of the criterion is equal across versions."""
        max_draws = np.random.randint(10, 100)

        # It seems to be important that max_draws and max_agents is the same
        # number because otherwise some functions that read draws from a file
        # to ensure compatibility of fortran and python versions won't work.
        bound_constr = {"max_draws": max_draws, "max_agents": max_draws}

        point_constr = {
            "interpolation": {"flag": False},
            "program": {"procs": 1, "threads": 1, "version": "python"},
            "estimation": {"maxfun": 0},
        }

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr, bound_constr=bound_constr
        )
        respy_obj = RespyCls(params_spec, options_spec)

        num_agents_sim, optim_paras = dist_class_attributes(
            respy_obj, "num_agents_sim", "optim_paras"
        )

        type_shares = optim_paras["type_shares"]

        # Simulate a dataset
        simulate_observed(respy_obj)

        # Iterate over alternative implementations
        base_x, base_val = None, None

        num_periods = options_spec["num_periods"]

        write_draws(num_periods, max_draws)
        write_types(type_shares, num_agents_sim)

        for version in ["python", "fortran"]:

            respy_obj.unlock()

            respy_obj.set_attr("version", version)

            respy_obj.lock()

            x, val = respy_obj.fit()

            # Check for the returned parameters.
            if base_x is None:
                base_x = x
            np.testing.assert_allclose(base_x, x)

            # Check for the value of the criterion function.
            if base_val is None:
                base_val = val
            np.testing.assert_allclose(base_val, val)
Ejemplo n.º 8
0
    def test_1(self):
        """  Compare results from the RESTUD program and the RESPY package.
        """
        # Impose some constraints on the initialization file which ensures that
        # the problem can be solved by the RESTUD code. The code is adjusted to
        # run with zero draws.
        constraints = dict()
        constraints['edu'] = (10, 20)
        constraints['is_deterministic'] = True

        # Generate random initialization file. The RESTUD code uses the same
        # random draws for the solution and simulation of the model. Thus,
        # the number of draws is required to be less or equal to the number
        # of agents.
        init_dict = generate_random_dict(constraints)

        num_agents_sim = init_dict['SIMULATION']['agents']
        num_draws_emax = init_dict['SOLUTION']['draws']
        if num_draws_emax < num_agents_sim:
            init_dict['SOLUTION']['draws'] = num_agents_sim

        print_init_dict(init_dict)

        # Indicate RESTUD code the special case of zero disturbance.
        open('.restud.testing.scratch', 'a').close()

        # Perform toolbox actions
        respy_obj = RespyCls('test.respy.ini')

        # This flag aligns the random components between the RESTUD program and
        # RESPY package. The existence of the file leads to the RESTUD program
        # to write out the random components.
        model_paras, edu_start, edu_max, num_agents_sim, num_periods, \
            num_draws_emax, delta = \
                dist_class_attributes(respy_obj,
                    'model_paras', 'edu_start', 'edu_max', 'num_agents_sim',
                    'num_periods', 'num_draws_emax', 'delta')

        transform_respy_to_restud(model_paras, edu_start, edu_max,
            num_agents_sim, num_periods, num_draws_emax, delta)

        # Solve model using RESTUD code.
        cmd = TEST_RESOURCES_DIR + '/kw_dp3asim'
        subprocess.check_call(cmd, shell=True)

        # Solve model using RESPY package.
        simulate(respy_obj)

        # Compare the simulated datasets generated by the programs.
        py = pd.DataFrame(np.array(np.genfromtxt('data.respy.dat',
                missing_values='.'), ndmin=2)[:, -4:])

        fort = pd.DataFrame(np.array(np.genfromtxt('ftest.txt',
                missing_values='.'), ndmin=2)[:, -4:])

        assert_frame_equal(py, fort)
Ejemplo n.º 9
0
    def test_7(self):
        """ This is a special test for shared functions related to the interpolation setup.
        """
        # Impose constraints
        point_constr = {"num_periods": np.random.randint(2, 5)}

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr)
        respy_obj = RespyCls(params_spec, options_spec)

        # Extract class attributes
        is_debug, num_periods = dist_class_attributes(respy_obj, "is_debug",
                                                      "num_periods")

        # Write out a grid for the interpolation
        max_states_period = write_interpolation_grid(respy_obj)

        # Draw random request for testing
        num_states = np.random.randint(1, max_states_period)
        candidates = list(range(num_states))

        period = np.random.randint(1, num_periods)
        num_points_interp = np.random.randint(1, num_states + 1)

        # Check function for random choice and make sure that there are no duplicates.
        args = (candidates, num_states, num_points_interp)
        f90 = fort_debug.wrapper_random_choice(*args)
        assert_equal(len(set(f90)), len(f90))
        assert_equal(len(f90), num_points_interp)

        # Check the standard cases of the function.
        args = (num_points_interp, num_states, period, is_debug, num_periods)
        f90 = fort_debug.wrapper_get_simulated_indicator(*args)

        assert_equal(len(f90), num_states)
        assert_equal(np.all(f90) in [0, 1], True)

        # Test the standardization across PYTHON, F2PY, and FORTRAN implementations.
        # This is possible as we write out an interpolation grid to disk which is used
        # for both functions.
        base_args = (num_points_interp, num_states, period, is_debug)
        args = base_args
        py = get_simulated_indicator(*args)
        args = base_args + (num_periods, )
        f90 = fort_debug.wrapper_get_simulated_indicator(*args)
        assert_array_equal(f90, 1 * py)
        os.unlink(".interpolation.respy.test")

        # Special case where number of interpolation points are same as the number of
        # candidates. In that case the returned indicator should be all TRUE.
        args = (num_states, num_states, period, True, num_periods)
        f90 = fort_debug.wrapper_get_simulated_indicator(*args)
        assert_equal(sum(f90), num_states)
Ejemplo n.º 10
0
    def test_3(self):
        """ Testing whether the a simulated dataset and the evaluation of the criterion function
        are the same for a tiny delta and a myopic agent.
        """
        constr = {"estimation": {"maxfun": 0}}
        params_spec, options_spec = generate_random_model(point_constr=constr,
                                                          myopic=True)
        respy_obj = RespyCls(params_spec, options_spec)

        optim_paras, num_agents_sim, edu_spec = dist_class_attributes(
            respy_obj, "optim_paras", "num_agents_sim", "edu_spec")

        write_types(optim_paras["type_shares"], num_agents_sim)
        write_edu_start(edu_spec, num_agents_sim)
        write_lagged_start(num_agents_sim)

        # Iterate over alternative discount rates.
        base_data, base_val = None, None

        for delta in [0.00, 0.000001]:

            respy_obj = RespyCls(params_spec, options_spec)

            respy_obj.unlock()

            respy_obj.attr["optim_paras"]["delta"] = np.array([delta])

            respy_obj.lock()

            simulate_observed(respy_obj)

            # This parts checks the equality of simulated dataset for the different
            # versions of the code.
            data_frame = pd.read_csv("data.respy.dat", delim_whitespace=True)

            if base_data is None:
                base_data = data_frame.copy()

            assert_frame_equal(base_data, data_frame)

            # This part checks the equality of an evaluation of the criterion function.
            _, crit_val = respy_obj.fit()

            if base_val is None:
                base_val = crit_val

            np.testing.assert_allclose(base_val,
                                       crit_val,
                                       rtol=1e-03,
                                       atol=1e-03)
Ejemplo n.º 11
0
    def test_1(self):
        """ Compare the evaluation of the criterion function for the ambiguity
        optimization and the simulated expected future value between the FORTRAN
        and PYTHON implementations. These tests are set up a separate test case
        due to the large setup cost to construct the ingredients for the interface.
        """
        # Generate constraint periods
        constraints = dict()
        constraints['version'] = 'PYTHON'

        # Generate random initialization file
        generate_init(constraints)

        # Perform toolbox actions
        respy_obj = RespyCls('test.respy.ini')

        respy_obj = simulate(respy_obj)

        # Extract class attributes
        periods_payoffs_systematic, states_number_period, mapping_state_idx, \
        periods_emax, num_periods, states_all, num_draws_emax, edu_start, \
        edu_max, delta = \
            dist_class_attributes(respy_obj,
                'periods_payoffs_systematic', 'states_number_period',
                'mapping_state_idx', 'periods_emax', 'num_periods',
                'states_all', 'num_draws_emax', 'edu_start', 'edu_max',
                'delta')

        # Sample draws
        draws_standard = np.random.multivariate_normal(np.zeros(4),
                                                       np.identity(4),
                                                       (num_draws_emax, ))

        # Sampling of random period and admissible state index
        period = np.random.choice(range(num_periods))
        k = np.random.choice(range(states_number_period[period]))

        # Select systematic payoffs
        payoffs_systematic = periods_payoffs_systematic[period, k, :]

        # Evaluation of simulated expected future values
        args = (num_periods, num_draws_emax, period, k, draws_standard,
                payoffs_systematic, edu_max, edu_start, periods_emax,
                states_all, mapping_state_idx, delta)

        py = get_future_value(*args)
        f90 = fort_debug.wrapper_get_future_value(*args)

        np.testing.assert_allclose(py, f90, rtol=1e-05, atol=1e-06)
Ejemplo n.º 12
0
    def test_1(self):
        """ Compare the evaluation of the criterion function for the ambiguity
        optimization and the simulated expected future value between the FORTRAN
        and PYTHON implementations. These tests are set up a separate test case
        due to the large setup cost to construct the ingredients for the interface.
        """
        # Generate constraint periods
        constraints = dict()
        constraints['version'] = 'PYTHON'

        # Generate random initialization file
        generate_init(constraints)

        # Perform toolbox actions
        respy_obj = RespyCls('test.respy.ini')

        respy_obj = simulate(respy_obj)

        # Extract class attributes
        periods_payoffs_systematic, states_number_period, mapping_state_idx, \
        periods_emax, num_periods, states_all, num_draws_emax, edu_start, \
        edu_max, delta = \
            dist_class_attributes(respy_obj,
                'periods_payoffs_systematic', 'states_number_period',
                'mapping_state_idx', 'periods_emax', 'num_periods',
                'states_all', 'num_draws_emax', 'edu_start', 'edu_max',
                'delta')

        # Sample draws
        draws_standard = np.random.multivariate_normal(np.zeros(4),
                            np.identity(4), (num_draws_emax,))

        # Sampling of random period and admissible state index
        period = np.random.choice(range(num_periods))
        k = np.random.choice(range(states_number_period[period]))

        # Select systematic payoffs
        payoffs_systematic = periods_payoffs_systematic[period, k, :]

        # Evaluation of simulated expected future values
        args = (num_periods, num_draws_emax, period, k, draws_standard,
            payoffs_systematic, edu_max, edu_start, periods_emax, states_all,
            mapping_state_idx, delta)

        py = get_future_value(*args)
        f90 = fort_debug.wrapper_get_future_value(*args)

        np.testing.assert_allclose(py, f90, rtol=1e-05, atol=1e-06)
Ejemplo n.º 13
0
    def check_estimation(self):
        """Check model attributes that are only relevant for estimation tasks."""
        # Check that class instance is locked.
        assert self.get_attr("is_locked")

        # Check that no other estimations are currently running in this directory.
        assert not os.path.exists(".estimation.respy.scratch")

        # Distribute class attributes
        (
            optimizer_options,
            optimizer_used,
            optim_paras,
            version,
            maxfun,
            num_paras,
            file_est,
        ) = dist_class_attributes(
            self,
            "optimizer_options",
            "optimizer_used",
            "optim_paras",
            "version",
            "maxfun",
            "num_paras",
            "file_est",
        )

        # Ensure that at least one parameter is free.
        if sum(optim_paras["paras_fixed"]) == num_paras:
            raise UserError("Estimation requires at least one free parameter")

        # Make sure the estimation dataset exists
        if not os.path.exists(file_est):
            raise UserError("Estimation dataset does not exist")

        if maxfun > 0:
            assert optimizer_used in optimizer_options.keys()

            # Make sure the requested optimizer is valid
            if version == "python":
                assert optimizer_used in OPT_EST_PYTH
            elif version == "fortran":
                assert optimizer_used in OPT_EST_FORT
            else:
                raise AssertionError

        return self
Ejemplo n.º 14
0
def simulate(respy_obj):
    """ Simulate dataset of synthetic agent following the model specified in
    the initialization file.
    """
    # Cleanup
    for fname in ['sim.respy.log', 'sol.respy.log']:
        if os.path.exists(fname):
            os.unlink(fname)

    # Distribute class attributes
    is_debug, version, num_agents_sim, is_store = \
        dist_class_attributes(respy_obj, 'is_debug', 'version',
                'num_agents_sim', 'is_store')

    # Select appropriate interface
    if version in ['PYTHON']:
        solution, data_array = respy_interface(respy_obj, 'simulate')
    elif version in ['FORTRAN']:
        solution, data_array = resfort_interface(respy_obj, 'simulate')
    else:
        raise NotImplementedError

    # Attach solution to class instance
    respy_obj = add_solution(respy_obj, *solution)

    respy_obj.unlock()
    respy_obj.set_attr('is_solved', True)
    respy_obj.lock()

    # Store object to file
    if is_store:
        respy_obj.store('solution.respy.pkl')

    # Create pandas data frame with missing values.
    data_frame = pd.DataFrame(replace_missing_values(data_array))

    # Wrapping up by running some checks on the dataset and then writing out
    # the file and some basic information.
    if is_debug:
        check_dataset(data_frame, respy_obj, 'sim')

    write_out(respy_obj, data_frame)

    write_info(respy_obj, data_frame)

    # Finishing
    return respy_obj
Ejemplo n.º 15
0
def process_dataset(respy_obj):
    """Process the dataset from disk."""
    num_agents_est, file_est, edu_spec, num_periods = dist_class_attributes(
        respy_obj, "num_agents_est", "file_est", "edu_spec", "num_periods")

    # Process dataset from files.
    data_frame = pd.read_csv(file_est,
                             delim_whitespace=True,
                             header=0,
                             na_values=".")
    data_frame.set_index(["Identifier", "Period"], drop=False, inplace=True)

    # We want to allow to estimate with only a subset of periods in the sample.
    cond = data_frame["Period"] < num_periods
    data_frame = data_frame[cond]

    # Only keep the information that is relevant for the estimation.
    # Once that is done,  impose some type restrictions.
    data_frame = data_frame[DATA_LABELS_EST]
    data_frame = data_frame.astype(DATA_FORMATS_EST)

    # We want to restrict the sample to meet the specified initial conditions.
    cond = data_frame["Years_Schooling"].loc[:, 0].isin(edu_spec["start"])
    data_frame.set_index(["Identifier"], drop=False, inplace=True)
    data_frame = data_frame.loc[cond]

    # We now subset the dataframe to include only the number of agents that are
    # requested for the estimation. However, this requires to adjust the
    # num_agents_est as the dataset might actually be smaller as we restrict
    # initial conditions.
    data_frame = data_frame.loc[data_frame.index.unique()[:num_agents_est]]
    data_frame.set_index(["Identifier", "Period"], drop=False, inplace=True)

    # We need to update the number of individuals for the estimation as the
    # whole dataset might actually be lower.
    num_agents_est = data_frame["Identifier"].nunique()

    respy_obj.unlock()
    respy_obj.set_attr("num_agents_est", num_agents_est)
    respy_obj.lock()

    # Check the dataset against the initialization files.
    check_estimation_dataset(data_frame, respy_obj)

    # Finishing
    return data_frame
Ejemplo n.º 16
0
def write_interpolation_grid(respy_obj):
    """ Write out an interpolation grid that can be used across
    implementations.
    """

    # Distribute class attribute
    num_periods, num_points_interp, edu_spec, num_types = dist_class_attributes(
        respy_obj, "num_periods", "num_points_interp", "edu_spec", "num_types")

    # Determine maximum number of states
    state_space = StateSpace(num_periods, num_types, edu_spec["start"],
                             edu_spec["max"])

    states_number_period = state_space.states_per_period
    max_states_period = max(states_number_period)

    # Initialize container
    booleans = np.full((max_states_period, num_periods), True)

    # Iterate over all periods
    for period in range(num_periods):

        # Construct auxiliary objects
        num_states = states_number_period[period]
        any_interpolation = (num_states - num_points_interp) > 0

        # Check applicability
        if not any_interpolation:
            continue

        # Draw points for interpolation
        indicators = np.random.choice(range(num_states),
                                      size=(num_states - num_points_interp),
                                      replace=False)

        # Replace indicators
        for i in range(num_states):
            if i in indicators:
                booleans[i, period] = False

    # Write out to file
    np.savetxt(".interpolation.respy.test", booleans, fmt="%s")

    # Some information that is useful elsewhere.
    return max_states_period
Ejemplo n.º 17
0
def write_interpolation_grid(file_name):
    """ Write out an interpolation grid that can be used across
    implementations.
    """
    # Process relevant initialization file
    respy_obj = RespyCls(file_name)

    # Distribute class attribute
    num_periods, num_points_interp, edu_start, edu_max, min_idx = \
        dist_class_attributes(respy_obj,
            'num_periods', 'num_points_interp', 'edu_start', 'edu_max', 'min_idx')

    # Determine maximum number of states
    _, states_number_period, _, max_states_period = \
        pyth_create_state_space(num_periods, edu_start, edu_max, min_idx)

    # Initialize container
    booleans = np.tile(True, (max_states_period, num_periods))

    # Iterate over all periods
    for period in range(num_periods):

        # Construct auxiliary objects
        num_states = states_number_period[period]
        any_interpolation = (num_states - num_points_interp) > 0

        # Check applicability
        if not any_interpolation:
            continue

        # Draw points for interpolation
        indicators = np.random.choice(range(num_states),
            size=(num_states - num_points_interp), replace=False)

        # Replace indicators
        for i in range(num_states):
            if i in indicators:
                booleans[i, period] = False

    # Write out to file
    np.savetxt('interpolation.txt', booleans, fmt='%s')

    # Some information that is useful elsewhere.
    return max_states_period
Ejemplo n.º 18
0
def scripts_check(request, respy_obj):
    """ Wrapper for the estimation.
    """

    # Distribute model parameters
    num_periods, edu_spec, num_types, optim_paras = dist_class_attributes(
        respy_obj, "num_periods", "edu_spec", "num_types", "optim_paras"
    )

    # We need to run additional checks if an estimation is requested.
    if request == "estimate":
        # Create the grid of the admissible states.
        state_space = StateSpace(
            num_periods, num_types, edu_spec["start"], edu_spec["max"], optim_paras
        )

        # We also check the structure of the dataset.
        data_array = process_dataset(respy_obj).to_numpy()
        num_rows = data_array.shape[0]

        for j in range(num_rows):
            period = int(data_array[j, 1])
            # Extract observable components of state space as well as agent decision.
            exp_a, exp_b, edu, choice_lagged = data_array[j, 4:].astype(int)

            # First of all, we need to ensure that all observed years of schooling are
            # larger than the initial condition of the model.
            try:
                np.testing.assert_equal(edu >= 0, True)
            except AssertionError:
                raise UserError(ERR_MSG)

            # Get state indicator to obtain the systematic component of the agents
            # rewards. This might fail either because the state is simply infeasible at
            # any period or just not defined for the particular period requested.
            try:
                k = state_space.indexer[period, exp_a, exp_b, edu, choice_lagged - 1]
                np.testing.assert_equal(k >= 0, True)
            except (IndexError, AssertionError):
                raise UserError(ERR_MSG)

        # We also take a special look at the optimizer options.
        respy_obj.check_estimation()
Ejemplo n.º 19
0
    def test_1(self):
        """ This is the special case where the EMAX better be equal to the MAXE.
        """
        # Set initial constraints
        constr = {
            "interpolation": {
                "flag": False
            },
            "num_periods": np.random.randint(3, 6),
        }

        params_spec, options_spec = generate_random_model(point_constr=constr,
                                                          deterministic=True)

        baseline = None

        a = []

        # Solve with and without interpolation code
        for _ in range(2):
            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)

            # Extract class attributes
            states_number_period, periods_emax, state_space = dist_class_attributes(
                respy_obj, "states_number_period", "periods_emax",
                "state_space")

            a.append(state_space)

            # Store and check results
            if baseline is None:
                baseline = periods_emax.copy()
            else:
                np.testing.assert_array_almost_equal(baseline, periods_emax)

            # Updates for second iteration. This ensures that there is at least one
            # interpolation taking place.
            options_spec["interpolation"]["points"] = max(
                states_number_period) - 1
            options_spec["interpolation"]["flag"] = True
Ejemplo n.º 20
0
    def test_2(self):
        """ This test compares the results from a solution using the
        interpolation code for the special case where the number of interpolation
        points is exactly the number of states in the final period. In this case
        the interpolation code is run and then all predicted values replaced
        with their actual values.
        """
        # Set initial constraints
        constraints = dict()
        constraints['flag_interpolation'] = False
        constraints['periods'] = np.random.randint(3, 6)

        # Initialize request
        init_dict = generate_random_dict(constraints)
        baseline = None

        # Solve with and without interpolation code
        for _ in range(2):

            # Write out request
            print_init_dict(init_dict)

            # Process and solve
            respy_obj = RespyCls('test.respy.ini')
            respy_obj = simulate(respy_obj)

            # Extract class attributes
            states_number_period, periods_emax = \
                dist_class_attributes(respy_obj,
                    'states_number_period', 'periods_emax')

            # Store and check results
            if baseline is None:
                baseline = periods_emax
            else:
                np.testing.assert_array_almost_equal(baseline, periods_emax)

            # Updates for second iteration
            init_dict['INTERPOLATION']['points'] = max(states_number_period)
            init_dict['INTERPOLATION']['flag'] = True
Ejemplo n.º 21
0
    def test_2(self):
        """ This test compares the results from a solution using the
        interpolation code for the special case where the number of interpolation
        points is exactly the number of states in the final period. In this case
        the interpolation code is run and then all predicted values replaced
        with their actual values.
        """
        # Set initial constraints
        constraints = dict()
        constraints['flag_interpolation'] = False
        constraints['periods'] = np.random.randint(3, 6)

        # Initialize request
        init_dict = generate_random_dict(constraints)
        baseline = None

        # Solve with and without interpolation code
        for _ in range(2):

            # Write out request
            print_init_dict(init_dict)

            # Process and solve
            respy_obj = RespyCls('test.respy.ini')
            respy_obj = simulate(respy_obj)

            # Extract class attributes
            states_number_period, periods_emax = \
                dist_class_attributes(respy_obj,
                    'states_number_period', 'periods_emax')

            # Store and check results
            if baseline is None:
                baseline = periods_emax
            else:
                np.testing.assert_array_almost_equal(baseline, periods_emax)

            # Updates for second iteration
            init_dict['INTERPOLATION']['points'] = max(states_number_period)
            init_dict['INTERPOLATION']['flag'] = True
Ejemplo n.º 22
0
    def test_1(self):
        """ This is the special case where the EMAX better be equal to the MAXE.
        """
        # Set initial constraints
        constraints = dict()
        constraints['flag_interpolation'] = False
        constraints['periods'] = np.random.randint(3, 6)
        constraints['is_deterministic'] = True

        # Initialize request
        init_dict = generate_random_dict(constraints)
        baseline = None

        # Solve with and without interpolation code
        for _ in range(2):

            # Write out request
            print_init_dict(init_dict)

            # Process and solve
            respy_obj = RespyCls('test.respy.ini')
            respy_obj = simulate(respy_obj)

            # Extract class attributes
            states_number_period, periods_emax = \
                dist_class_attributes(respy_obj,
                    'states_number_period', 'periods_emax')

            # Store and check results
            if baseline is None:
                baseline = periods_emax
            else:
                np.testing.assert_array_almost_equal(baseline, periods_emax)

            # Updates for second iteration. This ensures that there is at least
            # one interpolation taking place.
            init_dict['INTERPOLATION']['points'] = max(
                states_number_period) - 1
            init_dict['INTERPOLATION']['flag'] = True
Ejemplo n.º 23
0
    def test_1(self):
        """ This is the special case where the EMAX better be equal to the MAXE.
        """
        # Set initial constraints
        constraints = dict()
        constraints['flag_interpolation'] = False
        constraints['periods'] = np.random.randint(3, 6)
        constraints['is_deterministic'] = True

        # Initialize request
        init_dict = generate_random_dict(constraints)
        baseline = None

        # Solve with and without interpolation code
        for _ in range(2):

            # Write out request
            print_init_dict(init_dict)

            # Process and solve
            respy_obj = RespyCls('test.respy.ini')
            respy_obj = simulate(respy_obj)

            # Extract class attributes
            states_number_period, periods_emax = \
                dist_class_attributes(respy_obj,
                    'states_number_period', 'periods_emax')

            # Store and check results
            if baseline is None:
                baseline = periods_emax
            else:
                np.testing.assert_array_almost_equal(baseline, periods_emax)

            # Updates for second iteration. This ensures that there is at least
            # one interpolation taking place.
            init_dict['INTERPOLATION']['points'] = max(states_number_period) - 1
            init_dict['INTERPOLATION']['flag'] = True
Ejemplo n.º 24
0
def add_gradient_information(respy_obj):
    """ This function adds information about the gradient to the information
    files. It is not part of the estimation _modules as it breaks the design
    and requires to carry additional attributes. This results in considerable
    overhead, which appears justified at this point.
    """

    model_paras, is_debug, paras_fixed, derivatives = \
        dist_class_attributes(respy_obj, 'model_paras', 'is_debug',
            'paras_fixed', 'derivatives')

    # Auxiliary objects
    coeffs_a, coeffs_b, coeffs_edu, coeffs_home, shocks_cholesky = \
        dist_model_paras(model_paras, is_debug)

    # Construct starting values
    x_all_start = get_optim_paras(coeffs_a, coeffs_b, coeffs_edu, coeffs_home,
                                  shocks_cholesky, 'all', paras_fixed,
                                  is_debug)

    x_free_start = get_optim_paras(coeffs_a, coeffs_b, coeffs_edu, coeffs_home,
                                   shocks_cholesky, 'free', paras_fixed,
                                   is_debug)

    # Construct auxiliary information
    num_free = len(x_free_start)

    # The information about the gradient is simply added to the original
    # information later. Note that the original file is read before the
    # gradient evaluation. This is required as the information otherwise
    # accounts for the multiple function evaluation during the gradient
    # approximation scheme.
    original_lines = open('est.respy.info', 'r').readlines()
    fmt_ = '{0:<25}{1:>15}\n'
    original_lines[-5] = fmt_.format(*[' Number of Steps', 0])
    original_lines[-3] = fmt_.format(*[' Number of Evaluations', num_free])

    # Approximate gradient by forward finite differences.
    grad, ei = np.zeros((num_free, ), float), np.zeros((26, ), float)
    dfunc_eps = derivatives[1]

    # Making sure that the criterion is only evaluated at the relevant
    # starting values.
    respy_obj.unlock()
    respy_obj.set_attr('maxfun', 0)
    respy_obj.lock()

    _, f0 = estimate(respy_obj)

    for k, i in enumerate(np.where(np.logical_not(paras_fixed))[0].tolist()):
        x_baseline = x_all_start.copy()

        ei[i] = 1.0
        d = dfunc_eps * ei
        respy_obj.update_model_paras(x_baseline + d)

        _, f1 = estimate(respy_obj)

        grad[k] = (f1 - f0) / d[k]
        ei[i] = 0.0

    grad = np.random.uniform(0, 1, 26 - sum(paras_fixed)).tolist()
    norm = np.amax(np.abs(grad))

    # Write out extended information
    with open('est.respy.info', 'a') as out_file:
        # Insert information about gradient
        out_file.write('\n\n\n\n Gradient\n\n')
        fmt_ = '{0:>15}    {1:>15}\n\n'
        out_file.write(fmt_.format(*['Identifier', 'Start']))
        fmt_ = '{0:>15}    {1:15.4f}\n'

        # Iterate over all candidate values, but only write the free
        # ones to file. This ensure that the identifiers line up.
        for j in range(26):
            is_fixed = paras_fixed[j]
            if not is_fixed:
                values = [j, grad.pop(0)]
                out_file.write(fmt_.format(*values))

        out_file.write('\n')

        # Add value of infinity norm
        values = ['Norm', norm]
        out_file.write(fmt_.format(*values))
        out_file.write('\n\n')
Ejemplo n.º 25
0
def respy_interface(respy_obj, request, data=None):
    """Provide the interface to the PYTHON functionality."""
    # Distribute class attributes
    (
        optim_paras,
        num_periods,
        edu_spec,
        is_debug,
        num_draws_prob,
        seed_prob,
        num_draws_emax,
        seed_emax,
        is_interpolated,
        num_points_interp,
        maxfun,
        optimizer_used,
        tau,
        optimizer_options,
        seed_sim,
        num_agents_sim,
        file_sim,
        precond_spec,
        num_types,
        num_paras,
        num_agents_est,
    ) = dist_class_attributes(
        respy_obj,
        "optim_paras",
        "num_periods",
        "edu_spec",
        "is_debug",
        "num_draws_prob",
        "seed_prob",
        "num_draws_emax",
        "seed_emax",
        "is_interpolated",
        "num_points_interp",
        "maxfun",
        "optimizer_used",
        "tau",
        "optimizer_options",
        "seed_sim",
        "num_agents_sim",
        "file_sim",
        "precond_spec",
        "num_types",
        "num_paras",
        "num_agents_est",
    )

    if request == "estimate":

        periods_draws_prob = create_draws(
            num_periods, num_draws_prob, seed_prob, is_debug
        )
        periods_draws_emax = create_draws(
            num_periods, num_draws_emax, seed_emax, is_debug
        )

        # Construct starting values
        x_optim_free_unscaled_start = get_optim_paras(
            optim_paras, num_paras, "free", is_debug
        )
        x_optim_all_unscaled_start = get_optim_paras(
            optim_paras, num_paras, "all", is_debug
        )

        # Construct the state space
        state_space = StateSpace(
            num_periods, num_types, edu_spec["start"], edu_spec["max"]
        )

        # Collect arguments that are required for the criterion function.
        # These must be in the correct order already.
        args = (
            is_interpolated,
            num_points_interp,
            is_debug,
            data,
            tau,
            periods_draws_emax,
            periods_draws_prob,
            state_space,
        )

        # Special case where just one evaluation at the starting values is
        # requested is accounted for. Note, that the relevant value of the
        # criterion function is always the one indicated by the class attribute
        # and not the value returned by the optimization algorithm.
        num_free = optim_paras["paras_fixed"].count(False)

        # Take only bounds from unfixed parameters and insert default bounds.
        mask_paras_fixed = np.array(optim_paras["paras_fixed"])
        paras_bounds_free_unscaled = np.array(optim_paras["paras_bounds"])[
            ~mask_paras_fixed
        ]
        paras_bounds_free_unscaled[:, 0] = np.where(
            paras_bounds_free_unscaled[:, 0] == None,  # noqa: E711
            -HUGE_FLOAT,
            paras_bounds_free_unscaled[:, 0],
        )
        paras_bounds_free_unscaled[:, 1] = np.where(
            paras_bounds_free_unscaled[:, 1] == None,  # noqa: E711
            HUGE_FLOAT,
            paras_bounds_free_unscaled[:, 1],
        )

        record_estimation_scaling(
            x_optim_free_unscaled_start,
            None,
            None,
            None,
            optim_paras["paras_fixed"],
            True,
        )

        precond_matrix = get_precondition_matrix(
            precond_spec,
            optim_paras,
            x_optim_all_unscaled_start,
            args,
            maxfun,
            num_paras,
            num_types,
        )

        x_optim_free_scaled_start = apply_scaling(
            x_optim_free_unscaled_start, precond_matrix, "do"
        )

        paras_bounds_free_scaled = np.full((num_free, 2), np.nan)
        for i in range(2):
            paras_bounds_free_scaled[:, i] = apply_scaling(
                paras_bounds_free_unscaled[:, i], precond_matrix, "do"
            )

        record_estimation_scaling(
            x_optim_free_unscaled_start,
            x_optim_free_scaled_start,
            paras_bounds_free_scaled,
            precond_matrix,
            optim_paras["paras_fixed"],
            False,
        )

        opt_obj = OptimizationClass(
            x_optim_all_unscaled_start,
            optim_paras["paras_fixed"],
            precond_matrix,
            num_types,
        )
        opt_obj.maxfun = maxfun

        if maxfun == 0:

            record_estimation_scalability("Start")
            opt_obj.crit_func(x_optim_free_scaled_start, *args)
            record_estimation_scalability("Finish")

            success = True
            message = "Single evaluation of criterion function at starting values."

        elif optimizer_used == "SCIPY-BFGS":

            bfgs_maxiter = optimizer_options["SCIPY-BFGS"]["maxiter"]
            bfgs_gtol = optimizer_options["SCIPY-BFGS"]["gtol"]
            bfgs_eps = optimizer_options["SCIPY-BFGS"]["eps"]

            try:
                rslt = fmin_bfgs(
                    opt_obj.crit_func,
                    x_optim_free_scaled_start,
                    args=args,
                    gtol=bfgs_gtol,
                    epsilon=bfgs_eps,
                    maxiter=bfgs_maxiter,
                    full_output=True,
                    disp=False,
                )

                success = rslt[6] not in [1, 2]
                message = "Optimization terminated successfully."
                if rslt[6] == 1:
                    message = "Maximum number of iterations exceeded."
                elif rslt[6] == 2:
                    message = "Gradient and/or function calls not changing."

            except MaxfunError:
                success = False
                message = "Maximum number of iterations exceeded."

        elif optimizer_used == "SCIPY-LBFGSB":

            lbfgsb_maxiter = optimizer_options["SCIPY-LBFGSB"]["maxiter"]
            lbfgsb_maxls = optimizer_options["SCIPY-LBFGSB"]["maxls"]
            lbfgsb_factr = optimizer_options["SCIPY-LBFGSB"]["factr"]
            lbfgsb_pgtol = optimizer_options["SCIPY-LBFGSB"]["pgtol"]
            lbfgsb_eps = optimizer_options["SCIPY-LBFGSB"]["eps"]
            lbfgsb_m = optimizer_options["SCIPY-LBFGSB"]["m"]

            try:
                rslt = fmin_l_bfgs_b(
                    opt_obj.crit_func,
                    x_optim_free_scaled_start,
                    args=args,
                    approx_grad=True,
                    bounds=paras_bounds_free_scaled,
                    m=lbfgsb_m,
                    factr=lbfgsb_factr,
                    pgtol=lbfgsb_pgtol,
                    epsilon=lbfgsb_eps,
                    iprint=-1,
                    maxfun=maxfun,
                    maxiter=lbfgsb_maxiter,
                    maxls=lbfgsb_maxls,
                )

                success = rslt[2]["warnflag"] in [0]
                message = rslt[2]["task"]

            except MaxfunError:
                success = False
                message = "Maximum number of iterations exceeded."

        elif optimizer_used == "SCIPY-POWELL":

            powell_maxiter = optimizer_options["SCIPY-POWELL"]["maxiter"]
            powell_maxfun = optimizer_options["SCIPY-POWELL"]["maxfun"]
            powell_xtol = optimizer_options["SCIPY-POWELL"]["xtol"]
            powell_ftol = optimizer_options["SCIPY-POWELL"]["ftol"]

            try:
                rslt = fmin_powell(
                    opt_obj.crit_func,
                    x_optim_free_scaled_start,
                    args,
                    powell_xtol,
                    powell_ftol,
                    powell_maxiter,
                    powell_maxfun,
                    disp=0,
                )

                success = rslt[5] not in [1, 2]
                message = "Optimization terminated successfully."
                if rslt[5] == 1:
                    message = "Maximum number of function evaluations."
                elif rslt[5] == 2:
                    message = "Maximum number of iterations."

            except MaxfunError:
                success = False
                message = "Maximum number of iterations exceeded."

        else:
            raise NotImplementedError

        record_estimation_final(success, message)
        record_estimation_stop()

    elif request == "simulate":

        # Draw draws for the simulation.
        periods_draws_sims = create_draws(
            num_periods, num_agents_sim, seed_sim, is_debug
        )

        # Draw standard normal deviates for the solution and evaluation step.
        periods_draws_emax = create_draws(
            num_periods, num_draws_emax, seed_emax, is_debug
        )

        # Collect arguments for different implementations of the simulation.
        state_space = pyth_solve(
            is_interpolated,
            num_points_interp,
            num_periods,
            is_debug,
            periods_draws_emax,
            edu_spec,
            optim_paras,
            file_sim,
            num_types,
        )

        simulated_data = pyth_simulate(
            state_space,
            num_agents_sim,
            periods_draws_sims,
            seed_sim,
            file_sim,
            edu_spec,
            optim_paras,
            is_debug,
        )

        args = (state_space, simulated_data)

    else:
        raise NotImplementedError("This request is not implemented.")

    return args
Ejemplo n.º 26
0
    def test_4(self):
        """ Testing the core functions of the solution step for the equality
        of results between the PYTHON and FORTRAN implementations.
        """

        # Generate random initialization file
        generate_init()

        # Perform toolbox actions
        respy_obj = RespyCls('test.respy.ini')

        # Ensure that backward induction routines use the same grid for the
        # interpolation.
        write_interpolation_grid('test.respy.ini')

        # Extract class attributes
        num_periods, edu_start, edu_max, min_idx, model_paras, num_draws_emax, \
            seed_emax, is_debug, delta, is_interpolated, num_points_interp, = \
                dist_class_attributes(respy_obj,
                    'num_periods', 'edu_start', 'edu_max', 'min_idx',
                    'model_paras', 'num_draws_emax', 'seed_emax', 'is_debug',
                    'delta', 'is_interpolated', 'num_points_interp')

        # Auxiliary objects
        coeffs_a, coeffs_b, coeffs_edu, coeffs_home, shocks_cholesky = \
            dist_model_paras(model_paras, is_debug)

        # Check the state space creation.
        args = (num_periods, edu_start, edu_max, min_idx)
        pyth = pyth_create_state_space(*args)
        f2py = fort_debug.f2py_create_state_space(*args)
        for i in range(4):
            np.testing.assert_allclose(pyth[i], f2py[i])

        # Carry some results from the state space creation for future use.
        states_all, states_number_period = pyth[:2]
        mapping_state_idx, max_states_period = pyth[2:]

        # Cutting to size
        states_all = states_all[:, :max(states_number_period), :]

        # Check calculation of systematic components of payoffs.
        args = (num_periods, states_number_period, states_all, edu_start,
            coeffs_a, coeffs_b, coeffs_edu, coeffs_home, max_states_period)
        pyth = pyth_calculate_payoffs_systematic(*args)
        f2py = fort_debug.f2py_calculate_payoffs_systematic(*args)
        np.testing.assert_allclose(pyth, f2py)

        # Carry some results from the systematic payoff calculation for
        # future use and create the required set of disturbances.
        periods_draws_emax = create_draws(num_periods, num_draws_emax,
            seed_emax, is_debug)

        periods_payoffs_systematic = pyth

        # Check backward induction procedure.
        args = (num_periods, max_states_period, periods_draws_emax,
            num_draws_emax, states_number_period, periods_payoffs_systematic,
            edu_max, edu_start, mapping_state_idx, states_all, delta,
            is_debug, is_interpolated, num_points_interp, shocks_cholesky)

        pyth = pyth_backward_induction(*args)

        f2py = fort_debug.f2py_backward_induction(*args)
        np.testing.assert_allclose(pyth, f2py)
Ejemplo n.º 27
0
def scripts_compare(base_init, is_update):
    """Construct some model fit statistics by comparing the observed and simulated
    dataset."""
    # In case of updating, we create a new initialization file that contains the updated
    # parameter values.
    if is_update:
        init_file = "compare.respy.ini"
        shutil.copy(base_init, init_file)
        scripts_update(init_file)
    else:
        init_file = base_init

    # Read in relevant model specification.
    respy_obj = RespyCls(init_file)
    respy_obj.write_out("compare.respy.ini")

    # Distribute some information for further processing.
    num_periods, num_agents_est, num_agents_sim = dist_class_attributes(
        respy_obj, "num_periods", "num_agents_est", "num_agents_sim")

    # The comparison does make sense when the file of the simulated dataset and
    # estimation dataset are the same. Then the estimation dataset is overwritten by the
    # simulated dataset.
    fname_est = respy_obj.attr["file_est"].split(".")[0]
    fname_sim = respy_obj.attr["file_sim"].split(".")[0]
    if fname_est == fname_sim:
        raise UserError(" Simulation would overwrite estimation dataset")
    data_obs = process_dataset(respy_obj)
    data_sim = respy_obj.simulate()[1]

    if num_periods > 1:
        tf = []
        tf += [construct_transition_matrix(data_obs)]
        tf += [construct_transition_matrix(data_sim)]

    # Distribute class attributes
    max_periods = len(data_obs["Period"].unique())

    # Prepare results
    rslt_initial = _prepare_initial(data_obs, data_sim, num_agents_est,
                                    num_agents_sim)
    rslt_choice, rmse_choice = _prepare_choices(data_obs, data_sim)
    rslt_a = _prepare_wages(data_obs, data_sim, "Occupation A")
    rslt_b = _prepare_wages(data_obs, data_sim, "Occupation B")

    with open("compare.respy.info", "w") as file_:

        file_.write("\n Comparing the Observed and Simulated Economy\n\n")

        file_.write("   Number of Periods:      " + str(max_periods) + "\n\n")

        file_.write("\n   Initial Schooling Shares \n\n")
        fmt_ = "{:>15}" * 3 + "\n"
        labels = ["Level", "Observed", "Simulated"]
        file_.write(fmt_.format(*labels) + "\n")
        for info in rslt_initial:
            info[1:] = [format_float(x) for x in info[1:]]
            file_.write(fmt_.format(*info))

        # Comparing the choice distributions
        file_.write("\n\n   Choices \n\n")
        fmt_ = "{:>15}" * 7 + "\n"
        labels = ["Data", "Period", "Count", "White", "Blue", "School", "Home"]
        file_.write(fmt_.format(*labels) + "\n")
        for period in range(max_periods):
            for name in ["Observed", "Simulated"]:
                line = [name, period + 1] + rslt_choice[name][period]
                fmt_ = "{:>15}" * 3 + "{:15.2f}" * 4 + "\n"
                file_.write(fmt_.format(*line))
            file_.write("\n")
        line = "   Overall RMSE {:14.5f}\n".format(rmse_choice)
        file_.write(line)

        # Comparing the transition matrices
        if num_periods > 1:
            file_.write("\n\n   Transition Matrix \n\n")
            fmt_ = "{:>15}" * 6 + "\n\n"
            labels = ["Work A", "Work B", "School", "Home"]
            file_.write(fmt_.format(*["", ""] + labels))
            for i in range(4):
                for j, source in enumerate(["Observed", "Simulated"]):
                    fmt_ = "{:>15}{:>15}" + "{:15.4f}" * 4 + "\n"
                    line = [source, labels[i]] + tf[j][i, :].tolist()
                    file_.write(fmt_.format(*line))
                file_.write("\n")

        # Comparing the wages distributions
        file_.write("\n   Outcomes \n\n")
        fmt_ = "{:>15}" * 8 + "\n"

        labels = []
        labels += ["Data", "Period", "Count", "Mean", "Std."]
        labels += ["25%", "50%", "75%"]

        file_.write(fmt_.format(*labels) + "\n")
        for rslt, name in [(rslt_a, "Occupation A"), (rslt_b, "Occupation B")]:
            file_.write("\n    " + name + " \n\n")
            for period in range(max_periods):
                for label in ["Observed", "Simulated"]:
                    counts = int(rslt[label][period][0])
                    line = [label, period + 1, counts]
                    # The occurrence of NAN requires special care.
                    stats = rslt[label][period][1:]
                    stats = [format_float(x) for x in stats]
                    file_.write(fmt_.format(*line + stats))
                file_.write("\n")
Ejemplo n.º 28
0
def respy_interface(respy_obj, request, data_array=None):
    """ This function provides the interface to the PYTHOn functionality.
    """
    # Distribute class attributes
    model_paras, num_periods, num_agents_est, edu_start, is_debug, edu_max, \
        delta, num_draws_prob, seed_prob, num_draws_emax, seed_emax, \
        min_idx, is_myopic, is_interpolated, num_points_interp, maxfun, \
        optimizer_used, tau, paras_fixed, optimizer_options, seed_sim, \
        num_agents_sim, derivatives = dist_class_attributes( respy_obj,
            'model_paras', 'num_periods', 'num_agents_est', 'edu_start',
            'is_debug', 'edu_max', 'delta', 'num_draws_prob', 'seed_prob',
            'num_draws_emax', 'seed_emax', 'min_idx', 'is_myopic',
            'is_interpolated', 'num_points_interp', 'maxfun', 'optimizer_used',
            'tau', 'paras_fixed', 'optimizer_options', 'seed_sim',
            'num_agents_sim', 'derivatives')

    # Auxiliary objects
    dfunc_eps = derivatives[1]

    coeffs_a, coeffs_b, coeffs_edu, coeffs_home, shocks_cholesky = \
        dist_model_paras(model_paras, is_debug)

    if request == 'estimate':
        # Check that selected optimizer is in line with version of program.
        if maxfun > 0:
            assert optimizer_used in OPTIMIZERS_PYTH

        periods_draws_prob = create_draws(num_periods, num_draws_prob,
                                          seed_prob, is_debug)

        # Draw standard normal deviates for the solution and evaluation step.
        periods_draws_emax = create_draws(num_periods, num_draws_emax,
                                          seed_emax, is_debug)

        # Construct starting values
        x_free_start = get_optim_paras(coeffs_a, coeffs_b, coeffs_edu,
                                       coeffs_home, shocks_cholesky, 'free',
                                       paras_fixed, is_debug)

        x_all_start = get_optim_paras(coeffs_a, coeffs_b, coeffs_edu,
                                      coeffs_home, shocks_cholesky, 'all',
                                      paras_fixed, is_debug)

        # Collect arguments that are required for the criterion function. These
        # must be in the correct order already.
        args = (is_interpolated, num_draws_emax, num_periods,
                num_points_interp, is_myopic, edu_start, is_debug, edu_max,
                min_idx, delta, data_array, num_agents_est, num_draws_prob,
                tau, periods_draws_emax, periods_draws_prob)

        # Special case where just an evaluation at the starting values is
        # requested is accounted for. Note, that the relevant value of the
        # criterion function is always the one indicated by the class
        # attribute and not the value returned by the optimization algorithm.
        opt_obj = OptimizationClass()

        opt_obj.maxfun = maxfun
        opt_obj.paras_fixed = paras_fixed
        opt_obj.x_all_start = x_all_start

        if maxfun == 0:
            opt_obj.crit_func(x_free_start, *args)
            success = True
            message = 'Single evaluation of criterion function at starting ' \
                      'values.'

        elif optimizer_used == 'SCIPY-BFGS':

            bfgs_maxiter = optimizer_options['SCIPY-BFGS']['maxiter']
            bfgs_gtol = optimizer_options['SCIPY-BFGS']['gtol']

            try:
                rslt = fmin_bfgs(opt_obj.crit_func,
                                 x_free_start,
                                 args=args,
                                 gtol=bfgs_gtol,
                                 epsilon=dfunc_eps,
                                 maxiter=bfgs_maxiter,
                                 full_output=True,
                                 disp=False)

                success = (rslt[6] not in [1, 2])
                rslt = 'Optimization terminated successfully.'
                if rslt[5] == 1:
                    message = 'Maximum number of iterations exceeded.'
                elif rslt == 2:
                    message = 'Gradient and/or function calls not changing.'

            except MaxfunError:
                success = False
                message = 'Maximum number of iterations exceeded.'

        elif optimizer_used == 'SCIPY-POWELL':

            powell_maxiter = optimizer_options['SCIPY-POWELL']['maxiter']
            powell_maxfun = optimizer_options['SCIPY-POWELL']['maxfun']
            powell_xtol = optimizer_options['SCIPY-POWELL']['xtol']
            powell_ftol = optimizer_options['SCIPY-POWELL']['ftol']

            try:
                rslt = fmin_powell(opt_obj.crit_func,
                                   x_free_start,
                                   args,
                                   powell_xtol,
                                   powell_ftol,
                                   powell_maxiter,
                                   powell_maxfun,
                                   disp=0)

                success = (rslt[5] not in [1, 2])
                message = 'Optimization terminated successfully.'
                if rslt[5] == 1:
                    message = 'Maximum number of function evaluations.'
                elif rslt[5] == 2:
                    message = 'Maximum number of iterations.'

            except MaxfunError:
                success = False
                message = 'Maximum number of iterations exceeded.'

        record_estimation_final(opt_obj, success, message)
        record_estimation_stop()

    elif request == 'simulate':

        # Draw draws for the simulation.
        periods_draws_sims = create_draws(num_periods, num_agents_sim,
                                          seed_sim, is_debug)

        # Draw standard normal deviates for the solution and evaluation step.
        periods_draws_emax = create_draws(num_periods, num_draws_emax,
                                          seed_emax, is_debug)

        # Collect arguments to pass in different implementations of the
        # simulation.
        periods_payoffs_systematic, states_number_period, mapping_state_idx, \
            periods_emax, states_all = pyth_solve(coeffs_a, coeffs_b,
            coeffs_edu, coeffs_home, shocks_cholesky, is_interpolated,
            num_draws_emax, num_periods, num_points_interp, is_myopic,
            edu_start, is_debug, edu_max, min_idx, delta, periods_draws_emax)

        solution = (periods_payoffs_systematic, states_number_period,
                    mapping_state_idx, periods_emax, states_all)

        data_array = pyth_simulate(periods_payoffs_systematic,
                                   mapping_state_idx, periods_emax, states_all,
                                   shocks_cholesky, num_periods, edu_start,
                                   edu_max, delta, num_agents_sim,
                                   periods_draws_sims, seed_sim)

        args = (solution, data_array)
    else:
        raise AssertionError

    return args
Ejemplo n.º 29
0
def resfort_interface(respy_obj, request, data_array=None):
    """ This function provides the interface to the FORTRAN functionality.
    """
    # Add mock specification for FORTRAN optimizers if not defined by user.
    # This is required so the initialization file for FORTRAN is complete.
    respy_obj = add_optimizers(respy_obj)

    # Distribute class attributes
    model_paras, num_periods, edu_start, is_debug, edu_max, delta, \
        num_draws_emax, seed_emax, is_interpolated, num_points_interp, \
        is_myopic, min_idx, tau, is_parallel, num_procs, \
        num_agents_sim, num_draws_prob, num_agents_est, seed_prob, seed_sim, \
        paras_fixed, optimizer_options, optimizer_used, maxfun, paras_fixed, \
        derivatives, scaling = dist_class_attributes(respy_obj,
                'model_paras', 'num_periods', 'edu_start', 'is_debug',
                'edu_max', 'delta', 'num_draws_emax', 'seed_emax',
                'is_interpolated', 'num_points_interp', 'is_myopic', 'min_idx',
                'tau', 'is_parallel', 'num_procs', 'num_agents_sim',
                'num_draws_prob', 'num_agents_est', 'seed_prob', 'seed_sim',
                'paras_fixed', 'optimizer_options', 'optimizer_used',
                                            'maxfun', 'paras_fixed',
                                            'derivatives', 'scaling')

    dfunc_eps = derivatives[1]
    is_scaled, scale_minimum = scaling

    if request == 'estimate':
        # Check that selected optimizer is in line with version of program.
        if maxfun > 0:
            assert optimizer_used in OPTIMIZERS_FORT

        assert data_array is not None
        # If an evaluation is requested, then a specially formatted dataset is
        # written to a scratch file. This eases the reading of the dataset in
        # FORTRAN.
        write_dataset(data_array)

    # Distribute model parameters
    coeffs_a, coeffs_b, coeffs_edu, coeffs_home, shocks_cholesky = \
        dist_model_paras(model_paras, is_debug)

    args = (coeffs_a, coeffs_b, coeffs_edu, coeffs_home, shocks_cholesky,
        is_interpolated, num_draws_emax, num_periods, num_points_interp, is_myopic,
        edu_start, is_debug, edu_max, min_idx, delta)

    args = args + (num_draws_prob, num_agents_est, num_agents_sim, seed_prob,
    seed_emax, tau, num_procs, request, seed_sim, optimizer_options,
    optimizer_used, maxfun, paras_fixed, dfunc_eps, is_scaled, scale_minimum)

    write_resfort_initialization(*args)

    # Call executable
    if not is_parallel:
        cmd = [EXEC_DIR + '/resfort_scalar']
        subprocess.check_call(cmd)
    else:
        cmd = ['mpiexec', '-n', '1', EXEC_DIR + '/resfort_parallel_master']
        subprocess.check_call(cmd)

    # Return arguments depends on the request.
    if request == 'simulate':
        results = get_results(num_periods, min_idx, num_agents_sim, 'simulate')
        args = (results[:-1], results[-1])
    elif request == 'estimate':
        args = None
    else:
        raise AssertionError

    return args
Ejemplo n.º 30
0
    def test_5(self):
        """ This methods ensures that the core functions yield the same
        results across implementations.
        """

        # Generate random initialization file
        generate_init()

        # Perform toolbox actions
        respy_obj = RespyCls('test.respy.ini')

        # Ensure that backward induction routines use the same grid for the
        # interpolation.
        max_states_period = write_interpolation_grid('test.respy.ini')

        # Extract class attributes
        num_periods, edu_start, edu_max, min_idx, model_paras, num_draws_emax, \
        is_debug, delta, is_interpolated, num_points_interp, is_myopic, num_agents_sim, \
        num_draws_prob, tau, paras_fixed, seed_sim = \
            dist_class_attributes(
            respy_obj, 'num_periods', 'edu_start', 'edu_max', 'min_idx',
            'model_paras', 'num_draws_emax', 'is_debug', 'delta',
            'is_interpolated', 'num_points_interp', 'is_myopic', 'num_agents_sim',
            'num_draws_prob', 'tau', 'paras_fixed', 'seed_sim')

        # Write out random components and interpolation grid to align the
        # three implementations.
        max_draws = max(num_agents_sim, num_draws_emax, num_draws_prob)
        write_draws(num_periods, max_draws)
        periods_draws_emax = read_draws(num_periods, num_draws_emax)
        periods_draws_prob = read_draws(num_periods, num_draws_prob)
        periods_draws_sims = read_draws(num_periods, num_agents_sim)

        # Extract coefficients
        coeffs_a, coeffs_b, coeffs_edu, coeffs_home, shocks_cholesky = dist_model_paras(
            model_paras, True)

        # Check the full solution procedure
        base_args = (coeffs_a, coeffs_b, coeffs_edu, coeffs_home, shocks_cholesky,
        is_interpolated, num_draws_emax, num_periods, num_points_interp, is_myopic,
        edu_start, is_debug, edu_max, min_idx, delta)

        fort, _ = resfort_interface(respy_obj, 'simulate')
        pyth = pyth_solve(*base_args + (periods_draws_emax,))
        f2py = fort_debug.f2py_solve(*base_args + (periods_draws_emax, max_states_period))

        for alt in [f2py, fort]:
            for i in range(5):
                np.testing.assert_allclose(pyth[i], alt[i])

        # Distribute solution arguments for further use in simulation test.
        periods_payoffs_systematic, _, mapping_state_idx, periods_emax, states_all = pyth

        args = (periods_payoffs_systematic, mapping_state_idx, \
            periods_emax, states_all, shocks_cholesky, num_periods, edu_start,
            edu_max, delta, num_agents_sim, periods_draws_sims, seed_sim)

        pyth = pyth_simulate(*args)

        f2py = fort_debug.f2py_simulate(*args)
        np.testing.assert_allclose(pyth, f2py)

        data_array = pyth

        base_args = (coeffs_a, coeffs_b, coeffs_edu, coeffs_home, shocks_cholesky,
         is_interpolated, num_draws_emax, num_periods, num_points_interp, is_myopic,
         edu_start, is_debug, edu_max, min_idx, delta, data_array, num_agents_sim,
         num_draws_prob, tau)

        args = base_args + (periods_draws_emax, periods_draws_prob)
        pyth = pyth_evaluate(*args)

        args = base_args + (periods_draws_emax, periods_draws_prob)
        f2py = fort_debug.f2py_evaluate(*args)

        np.testing.assert_allclose(pyth, f2py)

        # Evaluation of criterion function
        x0 = get_optim_paras(coeffs_a, coeffs_b, coeffs_edu, coeffs_home,
            shocks_cholesky, 'all', paras_fixed, is_debug)

        args = (
        is_interpolated, num_draws_emax, num_periods, num_points_interp, is_myopic,
        edu_start, is_debug, edu_max, min_idx, delta, data_array, num_agents_sim,
        num_draws_prob, tau, periods_draws_emax, periods_draws_prob)

        pyth = pyth_criterion(x0, *args)
        f2py = fort_debug.f2py_criterion(x0, *args)
        np.testing.assert_allclose(pyth, f2py)
Ejemplo n.º 31
0
    def test_1(self):
        """Compare simulation results from the RESTUD program and the RESPY package."""
        args = generate_constraints_dict()
        params_spec, options_spec = generate_random_model(**args)
        params_spec, options_spec = adjust_model_spec(params_spec, options_spec)

        # Indicate RESTUD code the special case of zero disturbance.
        open(".restud.testing.scratch", "a").close()

        # We need to indicate to the RESFORT code to rescale the experience covariates.
        open(".restud.respy.scratch", "a").close()

        # Perform toolbox actions
        respy_obj = RespyCls(params_spec, options_spec)

        # This flag aligns the random components between the RESTUD program and RESPY
        # package. The existence of the file leads to the RESTUD program to write out
        # the random components.
        (
            optim_paras,
            edu_spec,
            num_agents_sim,
            num_periods,
            num_draws_emax,
        ) = dist_class_attributes(
            respy_obj,
            "optim_paras",
            "edu_spec",
            "num_agents_sim",
            "num_periods",
            "num_draws_emax",
        )

        shocks_cholesky = optim_paras["shocks_cholesky"]
        cov = np.matmul(shocks_cholesky, shocks_cholesky.T)

        # Simulate sample model using RESTUD code.
        transform_respy_to_restud_sim(
            optim_paras, edu_spec, num_agents_sim, num_periods, num_draws_emax, cov
        )

        # Solve model using RESTUD code.
        cmd = str(TEST_RESOURCES_BUILD / "kw_dp3asim")
        subprocess.check_call(cmd, shell=True)

        # We need to ensure for RESPY that the lagged activity variable indicates that
        # the individuals were in school the period before entering the model.
        types = np.random.choice([3], size=num_agents_sim)
        np.savetxt(".initial_lagged.respy.test", types, fmt="%i")

        # Solve model using RESPY package.
        simulate_observed(respy_obj, is_missings=False)

        # Compare the simulated dataset generated by the programs.
        column_labels = []
        column_labels += ["Experience_A", "Experience_B"]
        column_labels += ["Years_Schooling", "Lagged_Choice"]

        py = pd.read_csv(
            "data.respy.dat",
            delim_whitespace=True,
            header=0,
            na_values=".",
            usecols=column_labels,
        ).astype(np.float)

        fort = pd.DataFrame(
            np.array(np.genfromtxt("ftest.txt", missing_values="."), ndmin=2)[:, -4:],
            columns=column_labels,
        ).astype(np.float)

        # The simulated dataset from FORTRAN includes an indicator for the lagged
        # activities.
        py["Lagged_Choice"] = py["Lagged_Choice"].map({1: 0.0, 2: 0.0, 3: 1.0, 4: 0.0})

        assert_frame_equal(py, fort)
Ejemplo n.º 32
0
    def test_3(self):
        """ Testing some of the relationships in the simulated dataset.
        """
        is_deterministic = np.random.choice([True, False])
        is_myopic = np.random.choice([True, False])

        max_draws = np.random.randint(5, 200)
        bound_constr = {"max_draws": max_draws, "max_agents": max_draws}

        params_spec, options_spec = generate_random_model(
            bound_constr=bound_constr,
            deterministic=is_deterministic,
            myopic=is_myopic)

        respy_obj = RespyCls(params_spec, options_spec)
        _, df = respy_obj.simulate()

        optim_paras, num_types, edu_spec, num_periods = dist_class_attributes(
            respy_obj, "optim_paras", "num_types", "edu_spec", "num_periods")

        # We can back out the wage information from other information provided in the
        # simulated dataset.
        for choice in [1, 2]:
            cond = df["Choice"] == choice
            label_sys = "Systematic_Reward_{}".format(choice)
            label_sho = "Shock_Reward_{}".format(choice)
            label_gen = "General_Reward_{}".format(choice)
            label_com = "Common_Reward"
            df["Ex_Post_Reward"] = (df[label_sys] - df[label_gen] -
                                    df[label_com]) * df[label_sho]

            col_1 = df["Ex_Post_Reward"].loc[:, cond]
            col_2 = df["Wage"].loc[:, cond]
            np.testing.assert_array_almost_equal(col_1, col_2)

        # In the myopic case, the total reward should the equal to the ex post rewards.
        if is_myopic:
            # The shock only affects the skill-function and not the other components
            # determining the overall reward.
            for choice in [1, 2]:
                cond = df["Choice"] == choice

                label = "Ex_Post_Reward_{}".format(choice)
                label_gen = "General_Reward_{}".format(choice)
                label_com = "Common_Reward"
                label_wag = "Wage"

                df[label] = df[label_wag] + df[label_gen] + df[label_com]

                col_1 = df["Total_Reward_" + str(choice)].loc[:, cond]
                col_2 = df[label].loc[:, cond]

                np.testing.assert_array_almost_equal(col_1, col_2)

            for choice in [3, 4]:
                label = "Ex_Post_Reward_{}".format(choice)
                label_sys = "Systematic_Reward_{}".format(choice)
                label_sho = "Shock_Reward_{}".format(choice)

                df[label] = df[label_sys] * df[label_sho]
                df[label] = df[label_sys] + df[label_sho]

                # The equality does not hold if a state is inadmissible.
                cond = df["Years_Schooling"] != edu_spec["max"]

                col_1 = df["Total_Reward_" + str(choice)].loc[:, cond]
                col_2 = df[label].loc[:, cond]

                np.testing.assert_array_almost_equal(col_1, col_2)

        # If the model is deterministic, all shocks should be equal to zero. Of course,
        # one after exponentiation for wages.
        if is_deterministic:
            for i in range(1, 5):
                label = "Shock_Reward_{}".format(i)
                if i in [1, 2]:
                    cond = df[label] == 1
                else:
                    cond = df[label] == 0
                assert np.all(cond)
Ejemplo n.º 33
0
    def test_4(self):
        """ Testing the return values for the total values in case of myopic
        individuals for one period.

        Note
        ----
        The original test was designed to use Fortran rewards and calculate the total
        values and rewards ex post in Python and see whether they match. As both
        versions diverged in their implementation, we will implement the test with the
        Python version and check the equality of Fortran and Python outputs at all
        stages.

        """

        constr = {"edu_spec": {"max": 99}}
        params_spec, options_spec = generate_random_model(myopic=True,
                                                          point_constr=constr)

        # The equality below does not hold if schooling is an inadmissible state.
        respy_obj = RespyCls(params_spec, options_spec)
        respy_obj, _ = respy_obj.simulate()

        (
            num_periods,
            num_types,
            optim_paras,
            edu_spec,
            mapping_state_idx,
            periods_emax,
            states_all,
            periods_rewards_systematic,
            states_number_period,
        ) = dist_class_attributes(
            respy_obj,
            "num_periods",
            "num_types",
            "optim_paras",
            "edu_spec",
            "mapping_state_idx",
            "periods_emax",
            "states_all",
            "periods_rewards_systematic",
            "states_number_period",
        )

        # We have to create the state space and calculate the rewards in the Python
        # version as we later need the wages which are not part of
        # ``periods_rewards_systematic``.
        state_space = StateSpace(num_periods, num_types, edu_spec["start"],
                                 edu_spec["max"], optim_paras)

        # Check that rewards match
        _, _, pyth, _ = state_space._get_fortran_counterparts()

        # Set NaNs to -99.
        mask = np.isnan(periods_rewards_systematic)
        periods_rewards_systematic[mask] = MISSING_FLOAT

        assert_almost_equal(pyth, periods_rewards_systematic)

        period = np.random.choice(num_periods)
        draws = np.random.normal(size=4)

        # Internalize periods_emax
        state_space._create_attributes_from_fortran_counterparts(periods_emax)

        # Unpack necessary attributes
        rewards_period = state_space.get_attribute_from_period(
            "rewards", period)
        emaxs_period = state_space.get_attribute_from_period("emaxs",
                                                             period)[:, :4]
        max_education_period = (state_space.get_attribute_from_period(
            "states", period)[:, 3] >= edu_spec["max"])

        total_values, rewards_ex_post = get_continuation_value_and_ex_post_rewards(
            rewards_period[:, -2:],
            rewards_period[:, :4],
            emaxs_period,
            draws.reshape(1, -1),
            optim_paras["delta"],
            max_education_period,
        )

        np.testing.assert_equal(total_values, rewards_ex_post)
Ejemplo n.º 34
0
    def test_10(self):
        """ This test ensures that the order of the initial schooling level specified in
        the initialization files does not matter for the simulation of a dataset and
        subsequent evaluation of the criterion function.

        Warning
        -------
        This test fails if types have the identical intercept as no unique ordering is
        determined than.

        """
        point_constr = {
            "estimation": {
                "maxfun": 0
            },
            # We cannot allow for interpolation as the order of states within each
            # period changes and thus the prediction model is altered even if the same
            # state identifier is used.
            "interpolation": {
                "flag": False
            },
        }

        params_spec, options_spec = generate_random_model(
            point_constr=point_constr)

        respy_obj = RespyCls(params_spec, options_spec)

        edu_baseline_spec, num_types, num_paras, optim_paras = dist_class_attributes(
            respy_obj, "edu_spec", "num_types", "num_paras", "optim_paras")

        # We want to randomly shuffle the list of initial schooling but need to maintain
        # the order of the shares.
        edu_shuffled_start = np.random.permutation(
            edu_baseline_spec["start"]).tolist()

        edu_shuffled_share, edu_shuffled_lagged = [], []
        for start in edu_shuffled_start:
            idx = edu_baseline_spec["start"].index(start)
            edu_shuffled_lagged += [edu_baseline_spec["lagged"][idx]]
            edu_shuffled_share += [edu_baseline_spec["share"][idx]]

        edu_shuffled_spec = copy.deepcopy(edu_baseline_spec)
        edu_shuffled_spec["lagged"] = edu_shuffled_lagged
        edu_shuffled_spec["start"] = edu_shuffled_start
        edu_shuffled_spec["share"] = edu_shuffled_share

        # We are only looking at a single evaluation as otherwise the reordering affects
        # the optimizer that is trying better parameter values one-by-one. The
        # reordering might also violate the bounds.
        for i in range(53, num_paras):
            optim_paras["paras_bounds"][i] = [None, None]
            optim_paras["paras_fixed"][i] = False

        # We need to ensure that the baseline type is still in the first position.
        types_order = [0] + np.random.permutation(range(1, num_types)).tolist()

        type_shares = []
        for i in range(num_types):
            lower, upper = i * 2, (i + 1) * 2
            type_shares += [optim_paras["type_shares"][lower:upper].tolist()]

        optim_paras_baseline = copy.deepcopy(optim_paras)
        optim_paras_shuffled = copy.deepcopy(optim_paras)

        list_ = [
            optim_paras["type_shifts"][i, :].tolist() for i in types_order
        ]
        optim_paras_shuffled["type_shifts"] = np.array(list_)

        list_ = [type_shares[i] for i in types_order]
        optim_paras_shuffled["type_shares"] = np.array(list_).flatten()

        base_data, base_val = None, None

        k = 0

        for optim_paras in [optim_paras_baseline, optim_paras_shuffled]:
            for edu_spec in [edu_baseline_spec, edu_shuffled_spec]:

                respy_obj.unlock()
                respy_obj.set_attr("edu_spec", edu_spec)
                respy_obj.lock()

                # There is some more work to do to update the coefficients as we
                # distinguish between the economic and optimization version of the
                # parameters.
                x = get_optim_paras(optim_paras, num_paras, "all", True)
                shocks_cholesky, _ = extract_cholesky(x)
                shocks_coeffs = cholesky_to_coeffs(shocks_cholesky)
                x[43:53] = shocks_coeffs
                respy_obj.update_optim_paras(x)

                respy_obj.reset()

                simulate_observed(respy_obj)

                # This part checks the equality of simulated dataset.
                data_frame = pd.read_csv("data.respy.dat",
                                         delim_whitespace=True)

                if base_data is None:
                    base_data = data_frame.copy()

                assert_frame_equal(base_data, data_frame)

                # This part checks the equality of a single function evaluation.
                _, val = respy_obj.fit()
                if base_val is None:
                    base_val = val
                np.testing.assert_almost_equal(base_val, val)

                respy_obj.reset()
                k += 1
Ejemplo n.º 35
0
    def test_2(self):
        """Compare results from an evaluation of the criterion function at the initial
        values."""
        args = generate_constraints_dict()
        params_spec, options_spec = generate_random_model(**args)
        params_spec, options_spec = adjust_model_spec(params_spec, options_spec)

        max_draws = args["bound_constr"]["max_draws"]

        # At this point, the random initialization file does only provide diagonal
        # covariances.
        cov_sampled = np.random.uniform(0, 0.01, size=(4, 4)) + np.diag(
            np.random.uniform(1.0, 1.5, size=4)
        )
        chol = np.linalg.cholesky(cov_sampled)
        coeffs = chol[np.tril_indices(4)]
        params_spec.loc["shocks", "para"] = coeffs
        params_spec.loc["shocks", "upper"] = np.nan
        params_spec.loc["shocks", "lower"] = np.nan

        respy_obj = RespyCls(params_spec, options_spec)

        # This flag aligns the random components between the RESTUD program and RESPY
        # package. The existence of the file leads to the RESTUD program to write out
        # the random components.
        (
            optim_paras,
            edu_spec,
            num_agents_est,
            num_periods,
            num_draws_emax,
            num_draws_prob,
            tau,
            num_agents_sim,
        ) = dist_class_attributes(
            respy_obj,
            "optim_paras",
            "edu_spec",
            "num_agents_est",
            "num_periods",
            "num_draws_emax",
            "num_draws_prob",
            "tau",
            "num_agents_sim",
        )

        shocks_cholesky = optim_paras["shocks_cholesky"]
        cov = np.matmul(shocks_cholesky, shocks_cholesky.T)

        # Simulate sample model using RESTUD code.
        transform_respy_to_restud_sim(
            optim_paras, edu_spec, num_agents_sim, num_periods, num_draws_emax, cov
        )

        open(".restud.testing.scratch", "a").close()
        cmd = str(TEST_RESOURCES_BUILD / "kw_dp3asim")
        subprocess.check_call(cmd, shell=True)

        transform_respy_to_restud_est(
            optim_paras,
            edu_spec,
            num_agents_est,
            num_draws_prob,
            tau,
            num_periods,
            num_draws_emax,
            cov,
        )

        filenames = ["in.txt", TEST_RESOURCES_DIR / "in_bottom.txt"]
        with open("in1.txt", "w") as outfile:
            for fname in filenames:
                with open(fname) as infile:
                    outfile.write(infile.read())

        draws_standard = np.random.multivariate_normal(
            np.zeros(4), np.identity(4), (num_periods, max_draws)
        )

        with open(".draws.respy.test", "w") as file_:
            for period in range(num_periods):
                for i in range(max_draws):
                    fmt = " {0:15.10f} {1:15.10f} {2:15.10f} {3:15.10f}\n"
                    line = fmt.format(*draws_standard[period, i, :])
                    file_.write(line)

        # We always need the seed.txt
        shutil.copy(str(TEST_RESOURCES_DIR / "seed.txt"), "seed.txt")
        cmd = str(TEST_RESOURCES_BUILD / "kw_dpml4a")
        subprocess.check_call(cmd, shell=True)
        Path("seed.txt").unlink()

        with open("output1.txt", "r") as searchfile:
            # Search file for strings, trim lines and save as variables
            for line in searchfile:
                if "OLD LOGLF=" in line:
                    stat = float(shlex.split(line)[2])
                    break

        # Now we also evaluate the criterion function with the RESPY package.
        restud_sample_to_respy()
        respy_obj = respy.RespyCls(params_spec, options_spec)
        respy_obj.attr["file_est"] = "ftest.respy.dat"

        open(".restud.respy.scratch", "a").close()
        _, val = respy_obj.fit()
        Path(".restud.respy.scratch").unlink()

        # This ensure that the two values are within 1% of the RESPY value.
        np.testing.assert_allclose(
            abs(stat), abs(val * num_agents_est), rtol=0.01, atol=0.00
        )
Ejemplo n.º 36
0
    def test_6(self):
        """ Further tests for the interpolation routines.
        """
        # Generate random initialization file
        generate_init()

        # Perform toolbox actions
        respy_obj = RespyCls('test.respy.ini')
        respy_obj = simulate(respy_obj)

        # Extract class attributes
        periods_payoffs_systematic, states_number_period, mapping_state_idx, seed_prob, periods_emax, num_periods, states_all, num_points_interp, edu_start, num_draws_emax, is_debug, edu_max, delta = dist_class_attributes(
            respy_obj, 'periods_payoffs_systematic', 'states_number_period',
            'mapping_state_idx', 'seed_prob', 'periods_emax',
            'num_periods', 'states_all', 'num_points_interp', 'edu_start',
            'num_draws_emax', 'is_debug', 'edu_max', 'delta')

        # Add some additional objects required for the interfaces to the
        # functions.
        period = np.random.choice(range(num_periods))

        periods_draws_emax = create_draws(num_periods, num_draws_emax, seed_prob,
            is_debug)

        draws_emax = periods_draws_emax[period, :, :]

        num_states = states_number_period[period]

        shifts = np.random.randn(4)

        # Slight modification of request which assures that the
        # interpolation code is working.
        num_points_interp = min(num_points_interp, num_states)

        # Get the IS_SIMULATED indicator for the subset of points which are
        # used for the predication model.
        args = (num_points_interp, num_states, period, is_debug)
        is_simulated = get_simulated_indicator(*args)

        # Construct the exogenous variables for all points of the state
        # space.
        args = (
        period, num_periods, num_states, delta, periods_payoffs_systematic, shifts,
        edu_max, edu_start, mapping_state_idx, periods_emax, states_all)

        py = get_exogenous_variables(*args)
        f90 = fort_debug.wrapper_get_exogenous_variables(*args)

        np.testing.assert_equal(py, f90)

        # Distribute validated results for further functions.
        exogenous, maxe = py

        # Construct endogenous variable so that the prediction model can be
        # fitted.
        args = (period, num_periods, num_states, delta,
            periods_payoffs_systematic, edu_max, edu_start,
            mapping_state_idx, periods_emax, states_all, is_simulated,
            num_draws_emax, maxe, draws_emax)

        py = get_endogenous_variable(*args)
        f90 = fort_debug.wrapper_get_endogenous_variable(*args)

        np.testing.assert_equal(py, replace_missing_values(f90))

        # Distribute validated results for further functions.
        endogenous = py

        args = (endogenous, exogenous, maxe, is_simulated, num_points_interp,
            num_states, is_debug)

        py = get_predictions(*args)
        f90 = fort_debug.wrapper_get_predictions(*args[:-1])

        np.testing.assert_array_almost_equal(py, f90)
Ejemplo n.º 37
0
def add_gradient_information(respy_obj):
    """ This function adds information about the gradient to the information
    files. It is not part of the estimation _modules as it breaks the design
    and requires to carry additional attributes. This results in considerable
    overhead, which appears justified at this point.
    """

    model_paras, is_debug, paras_fixed, derivatives = \
        dist_class_attributes(respy_obj, 'model_paras', 'is_debug',
            'paras_fixed', 'derivatives')

    # Auxiliary objects
    coeffs_a, coeffs_b, coeffs_edu, coeffs_home, shocks_cholesky = \
        dist_model_paras(model_paras, is_debug)

    # Construct starting values
    x_all_start = get_optim_paras(coeffs_a, coeffs_b, coeffs_edu, coeffs_home,
            shocks_cholesky, 'all', paras_fixed, is_debug)

    x_free_start = get_optim_paras(coeffs_a, coeffs_b, coeffs_edu, coeffs_home,
            shocks_cholesky, 'free', paras_fixed, is_debug)

    # Construct auxiliary information
    num_free = len(x_free_start)

    # The information about the gradient is simply added to the original
    # information later. Note that the original file is read before the
    # gradient evaluation. This is required as the information otherwise
    # accounts for the multiple function evaluation during the gradient
    # approximation scheme.
    original_lines = open('est.respy.info', 'r').readlines()
    fmt_ = '{0:<25}{1:>15}\n'
    original_lines[-5] = fmt_.format(*[' Number of Steps', 0])
    original_lines[-3] = fmt_.format(*[' Number of Evaluations', num_free])

    # Approximate gradient by forward finite differences.
    grad, ei = np.zeros((num_free,), float), np.zeros((26,), float)
    dfunc_eps = derivatives[1]

    # Making sure that the criterion is only evaluated at the relevant
    # starting values.
    respy_obj.unlock()
    respy_obj.set_attr('maxfun', 0)
    respy_obj.lock()

    _, f0 = estimate(respy_obj)

    for k, i in enumerate(np.where(np.logical_not(paras_fixed))[0].tolist()):
        x_baseline = x_all_start.copy()

        ei[i] = 1.0
        d = dfunc_eps * ei
        respy_obj.update_model_paras(x_baseline + d)

        _, f1 = estimate(respy_obj)

        grad[k] = (f1 - f0) / d[k]
        ei[i] = 0.0

    grad = np.random.uniform(0, 1, 26 - sum(paras_fixed)).tolist()
    norm = np.amax(np.abs(grad))

    # Write out extended information
    with open('est.respy.info', 'a') as out_file:
        # Insert information about gradient
        out_file.write('\n\n\n\n Gradient\n\n')
        fmt_ = '{0:>15}    {1:>15}\n\n'
        out_file.write(fmt_.format(*['Identifier', 'Start']))
        fmt_ = '{0:>15}    {1:15.4f}\n'

        # Iterate over all candidate values, but only write the free
        # ones to file. This ensure that the identifiers line up.
        for j in range(26):
            is_fixed = paras_fixed[j]
            if not is_fixed:
                values = [j, grad.pop(0)]
                out_file.write(fmt_.format(*values))

        out_file.write('\n')

        # Add value of infinity norm
        values = ['Norm', norm]
        out_file.write(fmt_.format(*values))
        out_file.write('\n\n')
Ejemplo n.º 38
0
def resfort_interface(respy_obj, request, data_array=None):
    """ This function provides the interface to the FORTRAN functionality.
    """
    # Add mock specification for FORTRAN optimizers if not defined by user.
    # This is required so the initialization file for FORTRAN is complete.
    respy_obj = add_optimizers(respy_obj)

    # Distribute class attributes
    model_paras, num_periods, edu_start, is_debug, edu_max, delta, \
        num_draws_emax, seed_emax, is_interpolated, num_points_interp, \
        is_myopic, min_idx, tau, is_parallel, num_procs, \
        num_agents_sim, num_draws_prob, num_agents_est, seed_prob, seed_sim, \
        paras_fixed, optimizer_options, optimizer_used, maxfun, paras_fixed, \
        derivatives, scaling = dist_class_attributes(respy_obj,
                'model_paras', 'num_periods', 'edu_start', 'is_debug',
                'edu_max', 'delta', 'num_draws_emax', 'seed_emax',
                'is_interpolated', 'num_points_interp', 'is_myopic', 'min_idx',
                'tau', 'is_parallel', 'num_procs', 'num_agents_sim',
                'num_draws_prob', 'num_agents_est', 'seed_prob', 'seed_sim',
                'paras_fixed', 'optimizer_options', 'optimizer_used',
                                            'maxfun', 'paras_fixed',
                                            'derivatives', 'scaling')

    dfunc_eps = derivatives[1]
    is_scaled, scale_minimum = scaling

    if request == 'estimate':
        # Check that selected optimizer is in line with version of program.
        if maxfun > 0:
            assert optimizer_used in OPTIMIZERS_FORT

        assert data_array is not None
        # If an evaluation is requested, then a specially formatted dataset is
        # written to a scratch file. This eases the reading of the dataset in
        # FORTRAN.
        write_dataset(data_array)

    # Distribute model parameters
    coeffs_a, coeffs_b, coeffs_edu, coeffs_home, shocks_cholesky = \
        dist_model_paras(model_paras, is_debug)

    args = (coeffs_a, coeffs_b, coeffs_edu, coeffs_home, shocks_cholesky,
            is_interpolated, num_draws_emax, num_periods, num_points_interp,
            is_myopic, edu_start, is_debug, edu_max, min_idx, delta)

    args = args + (num_draws_prob, num_agents_est, num_agents_sim, seed_prob,
                   seed_emax, tau, num_procs, request, seed_sim,
                   optimizer_options, optimizer_used, maxfun, paras_fixed,
                   dfunc_eps, is_scaled, scale_minimum)

    write_resfort_initialization(*args)

    # Call executable
    if not is_parallel:
        cmd = [EXEC_DIR + '/resfort_scalar']
        subprocess.check_call(cmd)
    else:
        cmd = ['mpiexec', '-n', '1', EXEC_DIR + '/resfort_parallel_master']
        subprocess.check_call(cmd)

    # Return arguments depends on the request.
    if request == 'simulate':
        results = get_results(num_periods, min_idx, num_agents_sim, 'simulate')
        args = (results[:-1], results[-1])
    elif request == 'estimate':
        args = None
    else:
        raise AssertionError

    return args
Ejemplo n.º 39
0
def scripts_modify(identifiers, action, init_file, values=None, bounds=None):
    """ Modify optimization parameters by either changing their status or values.
    """
    # Select interface
    is_bounds = action == "bounds"
    is_fixed = action == "fix"

    # Baseline
    init_dict = read_init_file(init_file)
    respy_obj = RespyCls(init_file)

    optim_paras, num_paras, num_types = dist_class_attributes(
        respy_obj, "optim_paras", "num_paras", "num_types")

    # We now need to ensure a consistent perspective, i.e. all are the parameter values
    # as specified in the initialization file.
    x = get_optim_paras(optim_paras, num_paras, "all", True)
    x[43:53] = cholesky_to_coeffs(optim_paras["shocks_cholesky"])

    if action == "value":
        for i, j in enumerate(identifiers):
            x[j] = values[i]

    for identifier in identifiers:
        if identifier in [0]:
            j = identifier
            init_dict["BASICS"]["coeffs"][j] = x[identifier]
            if is_fixed:
                init_dict["BASICS"]["fixed"][j] = is_fixed
            elif is_bounds:
                init_dict["BASICS"]["bounds"][j] = bounds
        elif identifier in list(range(1, 3)):
            j = identifier - 1
            init_dict["COMMON"]["coeffs"][j] = x[identifier]
            if is_fixed:
                init_dict["COMMON"]["fixed"][j] = is_fixed
            elif is_bounds:
                init_dict["COMMON"]["bounds"][j] = bounds
        elif identifier in list(range(3, 18)):
            j = identifier - 3
            init_dict["OCCUPATION A"]["coeffs"][j] = x[identifier]
            if is_fixed:
                init_dict["OCCUPATION A"]["fixed"][j] = is_fixed
            elif is_bounds:
                init_dict["OCCUPATION A"]["bounds"][j] = bounds
        elif identifier in list(range(18, 33)):
            j = identifier - 18
            init_dict["OCCUPATION B"]["coeffs"][j] = x[identifier]
            if is_fixed:
                init_dict["OCCUPATION B"]["fixed"][j] = is_fixed
            elif is_bounds:
                init_dict["OCCUPATION B"]["bounds"][j] = bounds
        elif identifier in list(range(33, 40)):
            j = identifier - 33
            init_dict["EDUCATION"]["coeffs"][j] = x[identifier]
            if is_fixed:
                init_dict["EDUCATION"]["fixed"][j] = is_fixed
            elif is_bounds:
                init_dict["EDUCATION"]["bounds"][j] = bounds
        elif identifier in list(range(40, 43)):
            j = identifier - 40
            init_dict["HOME"]["coeffs"][j] = x[identifier]
            if is_fixed:
                init_dict["HOME"]["fixed"][j] = is_fixed
            elif is_bounds:
                init_dict["HOME"]["bounds"][j] = bounds
        elif identifier in list(range(43, 53)):
            j = identifier - 43
            init_dict["SHOCKS"]["coeffs"][j] = x[identifier]
            if is_fixed:
                init_dict["SHOCKS"]["fixed"][j] = is_fixed
            elif is_bounds:
                init_dict["SHOCKS"]["bounds"][j] = bounds
        elif identifier in list(range(53, 53 + (num_types - 1) * 2)):
            j = identifier - 53
            init_dict["TYPE SHARES"]["coeffs"][j] = x[identifier]
            if is_fixed:
                init_dict["TYPE SHARES"]["fixed"][j] = is_fixed
            elif is_bounds:
                init_dict["TYPE SHARES"]["bounds"][j] = bounds
        elif identifier in list(range(53 + (num_types - 1) * 2, num_paras)):
            j = identifier - (53 + (num_types - 1) * 2)
            init_dict["TYPE SHIFTS"]["coeffs"][j] = x[identifier]
            if is_fixed:
                init_dict["TYPE SHIFTS"]["fixed"][j] = is_fixed
            elif is_bounds:
                init_dict["TYPE SHIFTS"]["bounds"][j] = bounds
        else:
            raise NotImplementedError

    # Check that the new candidate initialization file is valid. If so, go ahead and
    # replace the original file.
    write_init_file(init_dict, ".tmp.respy.ini")
    RespyCls(".tmp.respy.ini")
    shutil.move(".tmp.respy.ini", init_file)
Ejemplo n.º 40
0
def check_estimation_dataset(data_frame, respy_obj):
    """Run consistency checks on data_frame."""
    # Distribute class attributes
    num_periods, edu_spec, num_agents_est = dist_class_attributes(
        respy_obj, "num_periods", "edu_spec", "num_agents_est")

    # Check that no variable but 'Wage' contains missings.
    for label in DATA_LABELS_EST:
        if label == "Wage":
            continue
        assert ~data_frame[label].isnull().any()

    # Checks for PERIODS. It can happen that the last period is deleted for all
    # agents. Thus, this is not a strict equality for observed data.
    # It is for simulated data.
    dat = data_frame["Period"]
    np.testing.assert_equal(dat.max() <= num_periods - 1, True)

    # Checks for CHOICE
    dat = data_frame["Choice"].isin([1, 2, 3, 4])
    np.testing.assert_equal(dat.all(), True)

    # Checks for WAGE
    dat = data_frame["Wage"].fillna(99) > 0.00
    np.testing.assert_equal(dat.all(), True)

    # Checks for EXPERIENCE. We also know that both need to take value of zero
    # in the very first period.
    for label in ["Experience_A", "Experience_B"]:
        dat = data_frame[label] >= 0.00
        np.testing.assert_equal(dat.all(), True)

        dat = data_frame[label][:, 0] == 0
        np.testing.assert_equal(dat.all(), True)

    # We check individual state variables against the recorded choices
    data_frame.groupby(level="Identifier").apply(check_state_variables)

    # Checks for LAGGED ACTIVITY. Just to be sure, we also construct the
    # correct lagged activity here as well and compare it to the one provided
    # in the dataset.
    dat = data_frame["Lagged_Choice"].isin([1, 2, 3, 4])
    np.testing.assert_equal(dat.all(), True)

    dat = data_frame["Lagged_Choice"][:, 0].isin([3, 4])
    np.testing.assert_equal(dat.all(), True)

    # We can reconstruct the lagged choice easily in general, but the very
    # first period is ambiguous.
    data_frame["TEMP"] = data_frame.groupby(
        level="Identifier")["Choice"].shift(+1)
    temp_initial = data_frame.loc[(slice(None), 0), "Lagged_Choice"].copy()
    data_frame.loc[(slice(None), 0), "TEMP"] = temp_initial
    data_frame["TEMP"] = data_frame["TEMP"].astype(int)
    np.testing.assert_equal(
        data_frame["TEMP"].equals(data_frame["Lagged_Choice"]), True)
    del data_frame["TEMP"]

    # Checks for YEARS SCHOOLING. We also know that the initial years of
    # schooling can only take values specified in the initialization file and
    # no individual in our estimation sample is allowed to have more than the
    # maximum number of years of education.
    dat = data_frame["Years_Schooling"] >= 0.00
    np.testing.assert_equal(dat.all(), True)

    dat = data_frame["Years_Schooling"][:, 0].isin(edu_spec["start"])
    np.testing.assert_equal(dat.all(), True)

    dat = data_frame["Years_Schooling"].max()
    np.testing.assert_equal(dat <= edu_spec["max"], True)

    # Check that there are no duplicated observations for any period by agent.
    def check_unique_periods(group):
        np.testing.assert_equal(group["Period"].duplicated().any(), False)

    data_frame.groupby(level="Identifier").apply(check_unique_periods)

    # Check that we observe the whole sequence of observations and that they
    # are in the right order.
    def check_series_observations(group):
        np.testing.assert_equal(group["Period"].tolist(),
                                list(range(group["Period"].max() + 1)))

    data_frame.groupby(level="Identifier").apply(check_series_observations)

    # We need to ensure that the number of individuals requested for the
    # estimation is available. We do not enforce a strict equality here as a
    # simulated dataset is checked for its estimation suitability in
    # general, i.e. before any constraints on initial conditions.
    np.testing.assert_equal(
        data_frame["Identifier"].nunique() >= num_agents_est, True)