def simplify_covariance_and_sdcorr_constraints(pc, pp): """Enforce covariance and sdcorr constraints by bounds if possible. This is possible if the dimension is <= 2 or all covariances are fexd to 0. """ cov_constraints, others = _split_constraints(pc, "covariance") sdcorr_constraints, others = _split_constraints(others, "sdcorr") to_simplify = cov_constraints + sdcorr_constraints pp = pp.copy() lower = pp["lower_bound"].copy() upper = pp["upper_bound"].copy() not_simplifyable = [] for constr in to_simplify: dim = number_of_triangular_elements_to_dimension(len(constr["index"])) if constr["type"] == "covariance": diag_positions = [0] + np.cumsum(range(2, dim + 1)).tolist() diag_indices = np.array(constr["index"])[diag_positions].tolist() off_indices = [ i for i in constr["index"] if i not in diag_positions ] if constr["type"] == "sdcorr": diag_indices = constr["index"][:dim] off_indices = constr["index"][dim:] uncorrelated = False if pp.iloc[off_indices]["_is_fixed_to_value"].all(): if (pp.iloc[off_indices]["_fixed_value"] == 0).all(): uncorrelated = True if uncorrelated: lower.iloc[diag_indices] = np.maximum(0, lower.iloc[diag_indices]) elif dim <= 2: lower.iloc[diag_indices] = np.maximum(0, lower.iloc[diag_indices]) lower.iloc[off_indices] = -1 upper.iloc[off_indices] = 1 else: not_simplifyable.append(constr) pp["lower_bound"] = lower pp["upper_bound"] = upper return others + not_simplifyable, pp
def _create_internal_bounds(lower, upper, pc): """Create columns with bounds for the internal parameter vector. The columns have the length of the external params and will be reduced later. Args: lower (pd.Series): Processed and consolidated external lower bounds. upper (pd.Series): Processed and consolidated external upper bounds. pc (pd.DataFrame): Processed and consolidated constraints. Returns: int_lower (pd.Series): Lower bound of internal parameters. int_upper (pd.Series): Upper bound of internal parameters. """ int_lower, int_upper = lower.copy(), upper.copy() for constr in pc: if constr["type"] in ["covariance", "sdcorr"]: # Note that the diagonal positions are the same for covariance and sdcorr # because the internal params contains the Cholesky factor of the implied # covariance matrix in both cases. dim = number_of_triangular_elements_to_dimension( len(constr["index"])) diag_positions = [0] + np.cumsum(range(2, dim + 1)).tolist() diag_indices = np.array(constr["index"])[diag_positions].tolist() bd = constr.get("bounds_distance", 0) bd = np.sqrt(bd) if constr["type"] == "covariance" else bd int_lower.iloc[diag_indices] = np.maximum( int_lower.iloc[diag_indices], bd) elif constr["type"] == "probability": int_lower.iloc[constr["index"]] = 0 elif constr["type"] == "linear": int_lower.iloc[constr["index"]] = -np.inf int_upper.iloc[constr["index"]] = np.inf int_lower.update(constr["right_hand_side"]["lower_bound"]) int_upper.update(constr["right_hand_side"]["upper_bound"]) else: raise TypeError("Invalid constraint type {}".format( constr["type"])) return int_lower, int_upper
def _covariance_from_internal(params_subset, case): """Reparametrize parameters that describe a covariance matrix from internal. If case == 'all_free', undo the cholesky reparametrization. Otherwise, do nothing. Args: params_subset (DataFrame): relevant subset of internal_params. case (str): can take the values 'all_free', 'uncorrelated' or 'all_fixed'. Returns: res (Series): Series with lower triangular elements of a covariance matrix """ res = params_subset.copy(deep=True) if case == "all_free": dim = number_of_triangular_elements_to_dimension(len(params_subset)) helper = np.zeros((dim, dim)) helper[np.tril_indices(dim)] = params_subset["value"].to_numpy() cov = helper.dot(helper.T) cov_coeffs = cov[np.tril_indices(dim)] res["value"] = cov_coeffs return res["value"]
def _covariance_from_internal(params_subset, case, type_): """Reparametrize parameters that describe a covariance matrix from internal. If case == 'free', undo the cholesky reparametrization. Otherwise, do nothing. Args: params_subset (DataFrame): relevant subset of internal_params. case (str): can take the values 'free', 'uncorrelated' or 'all_fixed'. Returns: res (Series): Series with lower triangular elements of a covariance matrix """ res = params_subset.copy(deep=True) if case == "free": dim = number_of_triangular_elements_to_dimension(len(params_subset)) helper = np.zeros((dim, dim)) helper[np.tril_indices(dim)] = params_subset["value"].to_numpy() if params_subset["_fixed"].any(): helper[0, 0] = np.sqrt(helper[0, 0]) cov = helper.dot(helper.T) if type_ == "covariance": res["value"] = cov_matrix_to_params(cov) elif type_ == "sdcorr": res["value"] = cov_matrix_to_sdcorr_params(cov) else: raise ValueError("Invalid type_: {}".format(type_)) elif case in ["all_fixed", "uncorrelated"]:
def test_number_of_triangular_elements_to_dimension(): inputs = [6, 10, 15, 21] expected = [3, 4, 5, 6] for inp, exp in zip(inputs, expected): assert number_of_triangular_elements_to_dimension(inp) == exp
def generate_random_model( point_constr=None, bound_constr=None, n_types=None, n_type_covariates=None, myopic=False, ): """Generate a random model specification. Parameters ---------- point_constr : dict A full or partial options specification. Elements that are specified here are not drawn randomly. bound_constr : dict Upper bounds for some options to keep computation time reasonable. Can have the keys ["max_types", "max_periods", "max_edu_start", "max_agents", "max_draws"] n_types : int Number of unobserved types. n_type_covariates : Number of covariates to calculate type probabilities. myopic : bool Indicator for myopic agents meaning the discount factor is set to zero. """ point_constr = {} if point_constr is None else copy.deepcopy(point_constr) bound_constr = {} if bound_constr is None else copy.deepcopy(bound_constr) for constr in point_constr, bound_constr: assert isinstance(constr, dict) bound_constr = _consolidate_bound_constraints(bound_constr) if n_types is None: n_types = np.random.randint(1, bound_constr["max_types"] + 1) if n_type_covariates is None: n_type_covariates = np.random.randint(2, 4) params = csv_template( n_types=n_types, n_type_covariates=n_type_covariates, initialize_coeffs=False ) params["value"] = np.random.uniform(low=-0.05, high=0.05, size=len(params)) params.loc["delta", "value"] = 1 - np.random.uniform() if myopic is False else 0 n_shock_coeffs = len(params.loc["shocks_sdcorr"]) dim = number_of_triangular_elements_to_dimension(n_shock_coeffs) helper = np.eye(dim) * 0.5 helper[np.tril_indices(dim, k=-1)] = np.random.uniform( -0.05, 0.2, size=(n_shock_coeffs - dim) ) cov = helper.dot(helper.T) params.loc["shocks_sdcorr", "value"] = cov_matrix_to_sdcorr_params(cov) params.loc["meas_error", "value"] = np.random.uniform( low=0.001, high=0.1, size=len(params.loc["meas_error"]) ) n_edu_start = np.random.randint(1, bound_constr["max_edu_start"] + 1) edu_starts = point_constr.get( "edu_start", np.random.choice(np.arange(1, 15), size=n_edu_start, replace=False) ) edu_shares = point_constr.get("edu_share", _get_initial_shares(n_edu_start)) edu_max = point_constr.get("edu_max", np.random.randint(max(edu_starts) + 1, 30)) params = pd.concat( [params, initial_and_max_experience_template(edu_starts, edu_shares, edu_max)], axis=0, sort=False, ) n_lagged_choices = point_constr.get("n_lagged_choices", np.random.choice(2)) if n_lagged_choices: choices = ["a", "b", "edu", "home"] lc_probs_params = lagged_choices_probs_template(n_lagged_choices, choices) lc_params = pd.read_csv( ROOT_DIR / "pre_processing" / "lagged_choice_params.csv" ) lc_params.set_index(["category", "name"], inplace=True) params = pd.concat([params, lc_probs_params, lc_params], axis=0, sort=False) lc_covariates = lagged_choices_covariates_template() else: lc_covariates = {} observables = point_constr.pop("observables", None) if observables is None: n_observables = np.random.randint(0, 3) # Do not sample observables with 1 level! observables = ( np.random.randint(2, 4, size=n_observables) if n_observables else False ) if observables is not False: to_concat = [ params, observable_prob_template(observables), observable_coeffs_template(observables, params), ] params = pd.concat(to_concat, axis="rows", sort=False) indices = ( params.index.get_level_values("category") .str.extract(r"observable_([a-z0-9_]+)", expand=False) .dropna() .unique() ) observable_covs = {x: "{} == {}".format(*x.rsplit("_", 1)) for x in indices} else: observable_covs = {} options = { "simulation_agents": np.random.randint(3, bound_constr["max_agents"] + 1), "simulation_seed": np.random.randint(1, 1_000), "n_periods": np.random.randint(1, bound_constr["max_periods"]), "solution_draws": np.random.randint(1, bound_constr["max_draws"]), "solution_seed": np.random.randint(1, 10_000), "estimation_draws": np.random.randint(1, bound_constr["max_draws"]), "estimation_seed": np.random.randint(1, 10_000), "estimation_tau": np.random.uniform(100, 500), "interpolation_points": -1, }