Beispiel #1
0
def simplify_covariance_and_sdcorr_constraints(pc, pp):
    """Enforce covariance and sdcorr constraints by bounds if possible.

    This is possible if the dimension is <= 2 or all covariances are fexd to 0.

    """
    cov_constraints, others = _split_constraints(pc, "covariance")
    sdcorr_constraints, others = _split_constraints(others, "sdcorr")
    to_simplify = cov_constraints + sdcorr_constraints
    pp = pp.copy()
    lower = pp["lower_bound"].copy()
    upper = pp["upper_bound"].copy()

    not_simplifyable = []
    for constr in to_simplify:
        dim = number_of_triangular_elements_to_dimension(len(constr["index"]))
        if constr["type"] == "covariance":
            diag_positions = [0] + np.cumsum(range(2, dim + 1)).tolist()
            diag_indices = np.array(constr["index"])[diag_positions].tolist()
            off_indices = [
                i for i in constr["index"] if i not in diag_positions
            ]
        if constr["type"] == "sdcorr":
            diag_indices = constr["index"][:dim]
            off_indices = constr["index"][dim:]

        uncorrelated = False
        if pp.iloc[off_indices]["_is_fixed_to_value"].all():
            if (pp.iloc[off_indices]["_fixed_value"] == 0).all():
                uncorrelated = True

        if uncorrelated:
            lower.iloc[diag_indices] = np.maximum(0, lower.iloc[diag_indices])
        elif dim <= 2:
            lower.iloc[diag_indices] = np.maximum(0, lower.iloc[diag_indices])
            lower.iloc[off_indices] = -1
            upper.iloc[off_indices] = 1
        else:
            not_simplifyable.append(constr)

    pp["lower_bound"] = lower
    pp["upper_bound"] = upper

    return others + not_simplifyable, pp
Beispiel #2
0
def _create_internal_bounds(lower, upper, pc):
    """Create columns with bounds for the internal parameter vector.

    The columns have the length of the external params and will be reduced later.

    Args:
        lower (pd.Series): Processed and consolidated external lower bounds.
        upper (pd.Series): Processed and consolidated external upper bounds.
        pc (pd.DataFrame): Processed and consolidated constraints.

    Returns:
        int_lower (pd.Series): Lower bound of internal parameters.
        int_upper (pd.Series): Upper bound of internal parameters.

    """
    int_lower, int_upper = lower.copy(), upper.copy()

    for constr in pc:
        if constr["type"] in ["covariance", "sdcorr"]:
            # Note that the diagonal positions are the same for covariance and sdcorr
            # because the internal params contains the Cholesky factor of the implied
            # covariance matrix in both cases.
            dim = number_of_triangular_elements_to_dimension(
                len(constr["index"]))
            diag_positions = [0] + np.cumsum(range(2, dim + 1)).tolist()
            diag_indices = np.array(constr["index"])[diag_positions].tolist()
            bd = constr.get("bounds_distance", 0)
            bd = np.sqrt(bd) if constr["type"] == "covariance" else bd
            int_lower.iloc[diag_indices] = np.maximum(
                int_lower.iloc[diag_indices], bd)
        elif constr["type"] == "probability":
            int_lower.iloc[constr["index"]] = 0
        elif constr["type"] == "linear":
            int_lower.iloc[constr["index"]] = -np.inf
            int_upper.iloc[constr["index"]] = np.inf
            int_lower.update(constr["right_hand_side"]["lower_bound"])
            int_upper.update(constr["right_hand_side"]["upper_bound"])
        else:
            raise TypeError("Invalid constraint type {}".format(
                constr["type"]))

    return int_lower, int_upper
Beispiel #3
0
def _covariance_from_internal(params_subset, case):
    """Reparametrize parameters that describe a covariance matrix from internal.

    If case == 'all_free', undo the cholesky reparametrization. Otherwise, do nothing.

    Args:
        params_subset (DataFrame): relevant subset of internal_params.
        case (str): can take the values 'all_free', 'uncorrelated' or 'all_fixed'.

    Returns:
        res (Series): Series with lower triangular elements of a covariance matrix

    """
    res = params_subset.copy(deep=True)
    if case == "all_free":
        dim = number_of_triangular_elements_to_dimension(len(params_subset))
        helper = np.zeros((dim, dim))
        helper[np.tril_indices(dim)] = params_subset["value"].to_numpy()
        cov = helper.dot(helper.T)
        cov_coeffs = cov[np.tril_indices(dim)]
        res["value"] = cov_coeffs
    return res["value"]
Beispiel #4
0
def _covariance_from_internal(params_subset, case, type_):
    """Reparametrize parameters that describe a covariance matrix from internal.

    If case == 'free', undo the cholesky reparametrization. Otherwise, do nothing.

    Args:
        params_subset (DataFrame): relevant subset of internal_params.
        case (str): can take the values 'free', 'uncorrelated' or 'all_fixed'.

    Returns:
        res (Series): Series with lower triangular elements of a covariance matrix

    """
    res = params_subset.copy(deep=True)
    if case == "free":
        dim = number_of_triangular_elements_to_dimension(len(params_subset))
        helper = np.zeros((dim, dim))
        helper[np.tril_indices(dim)] = params_subset["value"].to_numpy()

        if params_subset["_fixed"].any():
            helper[0, 0] = np.sqrt(helper[0, 0])

        cov = helper.dot(helper.T)

        if type_ == "covariance":
            res["value"] = cov_matrix_to_params(cov)
        elif type_ == "sdcorr":
            res["value"] = cov_matrix_to_sdcorr_params(cov)
        else:
            raise ValueError("Invalid type_: {}".format(type_))
    elif case in ["all_fixed", "uncorrelated"]:
Beispiel #5
0
def test_number_of_triangular_elements_to_dimension():
    inputs = [6, 10, 15, 21]
    expected = [3, 4, 5, 6]
    for inp, exp in zip(inputs, expected):
        assert number_of_triangular_elements_to_dimension(inp) == exp
Beispiel #6
0
def generate_random_model(
    point_constr=None,
    bound_constr=None,
    n_types=None,
    n_type_covariates=None,
    myopic=False,
):
    """Generate a random model specification.

    Parameters
    ----------
    point_constr : dict
        A full or partial options specification. Elements that are specified here are
        not drawn randomly.
    bound_constr : dict
        Upper bounds for some options to keep computation time reasonable. Can have the
        keys ["max_types", "max_periods", "max_edu_start", "max_agents", "max_draws"]
    n_types : int
        Number of unobserved types.
    n_type_covariates :
        Number of covariates to calculate type probabilities.
    myopic : bool
        Indicator for myopic agents meaning the discount factor is set to zero.

    """
    point_constr = {} if point_constr is None else copy.deepcopy(point_constr)
    bound_constr = {} if bound_constr is None else copy.deepcopy(bound_constr)

    for constr in point_constr, bound_constr:
        assert isinstance(constr, dict)

    bound_constr = _consolidate_bound_constraints(bound_constr)

    if n_types is None:
        n_types = np.random.randint(1, bound_constr["max_types"] + 1)
    if n_type_covariates is None:
        n_type_covariates = np.random.randint(2, 4)

    params = csv_template(
        n_types=n_types, n_type_covariates=n_type_covariates, initialize_coeffs=False
    )
    params["value"] = np.random.uniform(low=-0.05, high=0.05, size=len(params))

    params.loc["delta", "value"] = 1 - np.random.uniform() if myopic is False else 0

    n_shock_coeffs = len(params.loc["shocks_sdcorr"])
    dim = number_of_triangular_elements_to_dimension(n_shock_coeffs)
    helper = np.eye(dim) * 0.5
    helper[np.tril_indices(dim, k=-1)] = np.random.uniform(
        -0.05, 0.2, size=(n_shock_coeffs - dim)
    )
    cov = helper.dot(helper.T)
    params.loc["shocks_sdcorr", "value"] = cov_matrix_to_sdcorr_params(cov)

    params.loc["meas_error", "value"] = np.random.uniform(
        low=0.001, high=0.1, size=len(params.loc["meas_error"])
    )

    n_edu_start = np.random.randint(1, bound_constr["max_edu_start"] + 1)
    edu_starts = point_constr.get(
        "edu_start", np.random.choice(np.arange(1, 15), size=n_edu_start, replace=False)
    )
    edu_shares = point_constr.get("edu_share", _get_initial_shares(n_edu_start))
    edu_max = point_constr.get("edu_max", np.random.randint(max(edu_starts) + 1, 30))
    params = pd.concat(
        [params, initial_and_max_experience_template(edu_starts, edu_shares, edu_max)],
        axis=0,
        sort=False,
    )

    n_lagged_choices = point_constr.get("n_lagged_choices", np.random.choice(2))
    if n_lagged_choices:
        choices = ["a", "b", "edu", "home"]
        lc_probs_params = lagged_choices_probs_template(n_lagged_choices, choices)
        lc_params = pd.read_csv(
            ROOT_DIR / "pre_processing" / "lagged_choice_params.csv"
        )
        lc_params.set_index(["category", "name"], inplace=True)
        params = pd.concat([params, lc_probs_params, lc_params], axis=0, sort=False)
        lc_covariates = lagged_choices_covariates_template()
    else:
        lc_covariates = {}

    observables = point_constr.pop("observables", None)
    if observables is None:
        n_observables = np.random.randint(0, 3)
        # Do not sample observables with 1 level!
        observables = (
            np.random.randint(2, 4, size=n_observables) if n_observables else False
        )

    if observables is not False:
        to_concat = [
            params,
            observable_prob_template(observables),
            observable_coeffs_template(observables, params),
        ]
        params = pd.concat(to_concat, axis="rows", sort=False)

        indices = (
            params.index.get_level_values("category")
            .str.extract(r"observable_([a-z0-9_]+)", expand=False)
            .dropna()
            .unique()
        )
        observable_covs = {x: "{} == {}".format(*x.rsplit("_", 1)) for x in indices}
    else:
        observable_covs = {}

    options = {
        "simulation_agents": np.random.randint(3, bound_constr["max_agents"] + 1),
        "simulation_seed": np.random.randint(1, 1_000),
        "n_periods": np.random.randint(1, bound_constr["max_periods"]),
        "solution_draws": np.random.randint(1, bound_constr["max_draws"]),
        "solution_seed": np.random.randint(1, 10_000),
        "estimation_draws": np.random.randint(1, bound_constr["max_draws"]),
        "estimation_seed": np.random.randint(1, 10_000),
        "estimation_tau": np.random.uniform(100, 500),
        "interpolation_points": -1,
    }