Esempi in Python per process_dataset, esempi in Python per respy.pre_processing.data_processing.process_dataset

Esempio n. 1

0

Mostra file

File: test_f2py.py Progetto: tobiasraabe/respy_for_ma

    def test_10(self):
        """ Function that calculates the number of observations by individual.
        """
        for _ in range(2):
            params_spec, options_spec = generate_random_model()
            respy_obj = RespyCls(params_spec, options_spec)
            respy_obj = simulate_observed(respy_obj)

            num_agents_est = respy_obj.get_attr("num_agents_est")

            data_array = process_dataset(respy_obj).to_numpy()

            py = np.bincount(data_array[:, 0].astype(int))
            f90 = fort_debug.wrapper_get_num_obs_agent(data_array,
                                                       num_agents_est)

            assert_almost_equal(py, f90)

Esempio n. 2

0

Mostra file

File: scripts_check.py Progetto: tobiasraabe/respy_for_ma

def scripts_check(request, respy_obj):
    """ Wrapper for the estimation.
    """

    # Distribute model parameters
    num_periods, edu_spec, num_types, optim_paras = dist_class_attributes(
        respy_obj, "num_periods", "edu_spec", "num_types", "optim_paras"
    )

    # We need to run additional checks if an estimation is requested.
    if request == "estimate":
        # Create the grid of the admissible states.
        state_space = StateSpace(
            num_periods, num_types, edu_spec["start"], edu_spec["max"], optim_paras
        )

        # We also check the structure of the dataset.
        data_array = process_dataset(respy_obj).to_numpy()
        num_rows = data_array.shape[0]

        for j in range(num_rows):
            period = int(data_array[j, 1])
            # Extract observable components of state space as well as agent decision.
            exp_a, exp_b, edu, choice_lagged = data_array[j, 4:].astype(int)

            # First of all, we need to ensure that all observed years of schooling are
            # larger than the initial condition of the model.
            try:
                np.testing.assert_equal(edu >= 0, True)
            except AssertionError:
                raise UserError(ERR_MSG)

            # Get state indicator to obtain the systematic component of the agents
            # rewards. This might fail either because the state is simply infeasible at
            # any period or just not defined for the particular period requested.
            try:
                k = state_space.indexer[period, exp_a, exp_b, edu, choice_lagged - 1]
                np.testing.assert_equal(k >= 0, True)
            except (IndexError, AssertionError):
                raise UserError(ERR_MSG)

        # We also take a special look at the optimizer options.
        respy_obj.check_estimation()

Esempio n. 3

0

Mostra file

    def fit(self):
        """Estimate the model."""
        # Cleanup
        for fname in ["est.respy.log", "est.respy.info"]:
            if os.path.exists(fname):
                os.unlink(fname)

        if self.get_attr("is_solved"):
            self.reset()

        self.check_estimation()

        # This locks the estimation directory for additional estimation requests.
        atexit.register(remove_scratch, ".estimation.respy.scratch")
        open(".estimation.respy.scratch", "w").close()

        # Read in estimation dataset. It only reads in the number of agents
        # requested for the estimation (or all available, depending on which is
        # less). It allows to read in only a subset of the initial conditions.
        data_frame = process_dataset(self)
        record_estimation_sample(data_frame)

        # Distribute class attributes
        version = self.get_attr("version")

        data_array = data_frame.to_numpy()

        # Select appropriate interface
        if version in ["python"]:
            respy_interface(self, "estimate", data_frame)
        elif version in ["fortran"]:
            resfort_interface(self, "estimate", data_array)
        else:
            raise NotImplementedError

        rslt = get_est_info()
        x, val = rslt["paras_step"], rslt["value_step"]

        for fname in [".estimation.respy.scratch", ".stop.respy.scratch"]:
            remove_scratch(fname)

        return x, val

Esempio n. 4

0

Mostra file

File: scripts_compare.py Progetto: tobiasraabe/respy_for_ma

def scripts_compare(base_init, is_update):
    """Construct some model fit statistics by comparing the observed and simulated
    dataset."""
    # In case of updating, we create a new initialization file that contains the updated
    # parameter values.
    if is_update:
        init_file = "compare.respy.ini"
        shutil.copy(base_init, init_file)
        scripts_update(init_file)
    else:
        init_file = base_init

    # Read in relevant model specification.
    respy_obj = RespyCls(init_file)
    respy_obj.write_out("compare.respy.ini")

    # Distribute some information for further processing.
    num_periods, num_agents_est, num_agents_sim = dist_class_attributes(
        respy_obj, "num_periods", "num_agents_est", "num_agents_sim")

    # The comparison does make sense when the file of the simulated dataset and
    # estimation dataset are the same. Then the estimation dataset is overwritten by the
    # simulated dataset.
    fname_est = respy_obj.attr["file_est"].split(".")[0]
    fname_sim = respy_obj.attr["file_sim"].split(".")[0]
    if fname_est == fname_sim:
        raise UserError(" Simulation would overwrite estimation dataset")
    data_obs = process_dataset(respy_obj)
    data_sim = respy_obj.simulate()[1]

    if num_periods > 1:
        tf = []
        tf += [construct_transition_matrix(data_obs)]
        tf += [construct_transition_matrix(data_sim)]

    # Distribute class attributes
    max_periods = len(data_obs["Period"].unique())

    # Prepare results
    rslt_initial = _prepare_initial(data_obs, data_sim, num_agents_est,
                                    num_agents_sim)
    rslt_choice, rmse_choice = _prepare_choices(data_obs, data_sim)
    rslt_a = _prepare_wages(data_obs, data_sim, "Occupation A")
    rslt_b = _prepare_wages(data_obs, data_sim, "Occupation B")

    with open("compare.respy.info", "w") as file_:

        file_.write("\n Comparing the Observed and Simulated Economy\n\n")

        file_.write("   Number of Periods:      " + str(max_periods) + "\n\n")

        file_.write("\n   Initial Schooling Shares \n\n")
        fmt_ = "{:>15}" * 3 + "\n"
        labels = ["Level", "Observed", "Simulated"]
        file_.write(fmt_.format(*labels) + "\n")
        for info in rslt_initial:
            info[1:] = [format_float(x) for x in info[1:]]
            file_.write(fmt_.format(*info))

        # Comparing the choice distributions
        file_.write("\n\n   Choices \n\n")
        fmt_ = "{:>15}" * 7 + "\n"
        labels = ["Data", "Period", "Count", "White", "Blue", "School", "Home"]
        file_.write(fmt_.format(*labels) + "\n")
        for period in range(max_periods):
            for name in ["Observed", "Simulated"]:
                line = [name, period + 1] + rslt_choice[name][period]
                fmt_ = "{:>15}" * 3 + "{:15.2f}" * 4 + "\n"
                file_.write(fmt_.format(*line))
            file_.write("\n")
        line = "   Overall RMSE {:14.5f}\n".format(rmse_choice)
        file_.write(line)

        # Comparing the transition matrices
        if num_periods > 1:
            file_.write("\n\n   Transition Matrix \n\n")
            fmt_ = "{:>15}" * 6 + "\n\n"
            labels = ["Work A", "Work B", "School", "Home"]
            file_.write(fmt_.format(*["", ""] + labels))
            for i in range(4):
                for j, source in enumerate(["Observed", "Simulated"]):
                    fmt_ = "{:>15}{:>15}" + "{:15.4f}" * 4 + "\n"
                    line = [source, labels[i]] + tf[j][i, :].tolist()
                    file_.write(fmt_.format(*line))
                file_.write("\n")

        # Comparing the wages distributions
        file_.write("\n   Outcomes \n\n")
        fmt_ = "{:>15}" * 8 + "\n"

        labels = []
        labels += ["Data", "Period", "Count", "Mean", "Std."]
        labels += ["25%", "50%", "75%"]

        file_.write(fmt_.format(*labels) + "\n")
        for rslt, name in [(rslt_a, "Occupation A"), (rslt_b, "Occupation B")]:
            file_.write("\n    " + name + " \n\n")
            for period in range(max_periods):
                for label in ["Observed", "Simulated"]:
                    counts = int(rslt[label][period][0])
                    line = [label, period + 1, counts]
                    # The occurrence of NAN requires special care.
                    stats = rslt[label][period][1:]
                    stats = [format_float(x) for x in stats]
                    file_.write(fmt_.format(*line + stats))
                file_.write("\n")

Esempio n. 5

0

Mostra file

File: test_integration.py Progetto: tobiasraabe/respy_for_ma

 def test_1(self):
     """Test if random model specifications can be simulated and processed."""
     params_spec, options_spec = generate_random_model()
     respy_obj = RespyCls(params_spec, options_spec)
     simulate_observed(respy_obj)
     process_dataset(respy_obj)

Esempio n. 6

0

Mostra file

File: test_f2py.py Progetto: tobiasraabe/respy_for_ma

    def test_5(self):
        """ This methods ensures that the core functions yield the same results across
        implementations.
        """
        params_spec, options_spec = generate_random_model()
        respy_obj = RespyCls(params_spec, options_spec)

        # Ensure that backward induction routines use the same grid for the
        # interpolation.
        max_states_period = write_interpolation_grid(respy_obj)

        # Extract class attributes
        (
            num_periods,
            edu_spec,
            optim_paras,
            num_draws_emax,
            is_debug,
            is_interpolated,
            num_points_interp,
            is_myopic,
            num_agents_sim,
            num_draws_prob,
            tau,
            seed_sim,
            num_agents_est,
            optimizer_options,
            file_sim,
            num_types,
            num_paras,
        ) = dist_class_attributes(
            respy_obj,
            "num_periods",
            "edu_spec",
            "optim_paras",
            "num_draws_emax",
            "is_debug",
            "is_interpolated",
            "num_points_interp",
            "is_myopic",
            "num_agents_sim",
            "num_draws_prob",
            "tau",
            "seed_sim",
            "num_agents_est",
            "optimizer_options",
            "file_sim",
            "num_types",
            "num_paras",
        )

        min_idx = edu_spec["max"] + 1
        shocks_cholesky = optim_paras["shocks_cholesky"]
        coeffs_common = optim_paras["coeffs_common"]
        coeffs_home = optim_paras["coeffs_home"]
        coeffs_edu = optim_paras["coeffs_edu"]
        coeffs_a = optim_paras["coeffs_a"]
        coeffs_b = optim_paras["coeffs_b"]
        delta = optim_paras["delta"]

        type_spec_shares = optim_paras["type_shares"]
        type_spec_shifts = optim_paras["type_shifts"]

        # Write out random components and interpolation grid to align the three
        # implementations.
        max_draws = max(num_agents_sim, num_draws_emax, num_draws_prob)
        write_types(type_spec_shares, num_agents_sim)
        write_edu_start(edu_spec, num_agents_sim)
        write_draws(num_periods, max_draws)
        write_lagged_start(num_agents_sim)

        # It is critical that the model is simulated after all files have been written
        # to the disk because they are picked up in the subroutines.
        respy_obj = simulate_observed(respy_obj)

        periods_draws_emax = read_draws(num_periods, num_draws_emax)
        periods_draws_prob = read_draws(num_periods, num_draws_prob)
        periods_draws_sims = read_draws(num_periods, num_agents_sim)

        fort, _ = resfort_interface(respy_obj, "simulate")

        state_space = pyth_solve(
            is_interpolated,
            num_points_interp,
            num_periods,
            is_debug,
            periods_draws_emax,
            edu_spec,
            optim_paras,
            file_sim,
            num_types,
        )

        (
            states_all,
            mapping_state_idx,
            periods_rewards_systematic,
            periods_emax,
        ) = state_space._get_fortran_counterparts()

        py = (
            periods_rewards_systematic,
            state_space.states_per_period,
            mapping_state_idx,
            periods_emax,
            states_all,
        )

        f2py = fort_debug.wrapper_solve(
            is_interpolated,
            num_points_interp,
            num_draws_emax,
            num_periods,
            is_myopic,
            is_debug,
            periods_draws_emax,
            min_idx,
            edu_spec["start"],
            edu_spec["max"],
            coeffs_common,
            coeffs_a,
            coeffs_b,
            coeffs_edu,
            coeffs_home,
            shocks_cholesky,
            delta,
            file_sim,
            max_states_period,
            num_types,
            type_spec_shares,
            type_spec_shifts,
        )

        assert_allclose(py[0], fort[0])
        assert_allclose(py[1], fort[1])
        assert_allclose(py[2], fort[2])
        assert_allclose(py[3], fort[3])
        assert_allclose(py[4], fort[4])

        assert_allclose(py[0], f2py[0])
        assert_allclose(py[1], f2py[1])
        assert_allclose(py[2], f2py[2])
        assert_allclose(py[3], f2py[3])
        assert_allclose(py[4], f2py[4])

        (
            states_all,
            mapping_state_idx,
            periods_rewards_systematic,
            periods_emax,
        ) = state_space._get_fortran_counterparts()

        simulated_data = pyth_simulate(
            state_space,
            num_agents_sim,
            periods_draws_sims,
            seed_sim,
            file_sim,
            edu_spec,
            optim_paras,
            is_debug,
        )
        py = simulated_data.copy().fillna(MISSING_FLOAT).values

        data_array = process_dataset(respy_obj).to_numpy()

        # Is is very important to cut the data array down to the size of the estimation
        # sample for the calculation of contributions.
        data_array = py[:num_agents_est * num_periods, :]

        f2py = fort_debug.wrapper_simulate(
            periods_rewards_systematic,
            mapping_state_idx,
            periods_emax,
            states_all,
            num_periods,
            num_agents_sim,
            periods_draws_sims,
            seed_sim,
            file_sim,
            edu_spec["start"],
            edu_spec["max"],
            edu_spec["share"],
            edu_spec["lagged"],
            optim_paras["coeffs_common"],
            optim_paras["coeffs_a"],
            optim_paras["coeffs_b"],
            shocks_cholesky,
            delta,
            num_types,
            type_spec_shares,
            type_spec_shifts,
            is_debug,
        )
        assert_allclose(py, f2py)

        # We have to cut the simulated data to `num_agents_est` as the Python
        # implementation calculates the likelihood contributions for all agents in the
        # data.
        simulated_data = simulated_data.loc[simulated_data.Identifier.lt(
            num_agents_est)]

        py = pyth_contributions(state_space, simulated_data,
                                periods_draws_prob, tau, optim_paras)

        num_obs_agent = np.bincount(simulated_data.Identifier.to_numpy())

        f2py = fort_debug.wrapper_contributions(
            periods_rewards_systematic,
            mapping_state_idx,
            periods_emax,
            states_all,
            data_array,
            periods_draws_prob,
            tau,
            num_periods,
            num_draws_prob,
            num_agents_est,
            num_obs_agent,
            num_types,
            edu_spec["start"],
            edu_spec["max"],
            shocks_cholesky,
            delta,
            type_spec_shares,
            type_spec_shifts,
        )

        assert_allclose(py, f2py)

        # Evaluation of criterion function
        x0 = get_optim_paras(optim_paras, num_paras, "all", is_debug)

        py = pyth_criterion(
            x0,
            is_interpolated,
            num_points_interp,
            is_debug,
            simulated_data,
            tau,
            periods_draws_emax,
            periods_draws_prob,
            state_space,
        )

        f2py = fort_debug.wrapper_criterion(
            x0,
            is_interpolated,
            num_draws_emax,
            num_periods,
            num_points_interp,
            is_myopic,
            is_debug,
            data_array,
            num_draws_prob,
            tau,
            periods_draws_emax,
            periods_draws_prob,
            states_all,
            state_space.states_per_period,
            mapping_state_idx,
            max_states_period,
            num_agents_est,
            num_obs_agent,
            num_types,
            edu_spec["start"],
            edu_spec["max"],
            edu_spec["share"],
            type_spec_shares,
            type_spec_shifts,
            num_paras,
        )

        assert_allclose(py, f2py)