def create_state_space_class(optim_paras, options): """Create the state space of the model.""" core, indexer = _create_core_and_indexer(optim_paras, options) dense_grid = _create_dense_state_space_grid(optim_paras) # Downcast after calculations or be aware of silent integer overflows. core = compute_covariates(core, options["covariates_core"]) core = core.apply(downcast_to_smallest_dtype) dense = _create_dense_state_space_covariates(dense_grid, optim_paras, options) base_draws_sol = create_base_draws( (options["n_periods"], options["solution_draws"], len(optim_paras["choices"])), next(options["solution_seed_startup"]), options["monte_carlo_sequence"], ) if dense: state_space = _MultiDimStateSpace(core, indexer, base_draws_sol, optim_paras, options, dense) else: state_space = _SingleDimStateSpace(core, indexer, base_draws_sol, optim_paras, options) return state_space
def _create_dense_period_choice( core, dense, core_key_to_core_indices, core_key_to_complex, optim_paras, options ): """Create dense period choice parts of the state space. We loop over all dense combinations and calculate choice restrictions for each particular dense state space. The information allows us to compile a dict that maps a combination of period, choice_set and dense_index into core_key! Note that we do not allow for choice restrictions that interact between core and dense covariates. In order to do so we would have to rewrite this function and return explicit state space position instead of core indices! Returns ------- dense_period_choice : dict d: (period, choice_set, dense_index) -> core_key """ if not dense: for key, complex_ in core_key_to_complex.items(): dump_objects( core.loc[core_key_to_core_indices[key]], "states", complex_, options ) dense_period_choice = {k: i for i, k in core_key_to_complex.items()} else: choices = [f"_{choice}" for choice in optim_paras["choices"]] dense_period_choice = {} for dense_idx, (_, dense_vec) in enumerate(dense.items()): states = core.copy().assign(**dense_vec) states = compute_covariates(states, options["covariates_all"]) states = create_is_inadmissible(states, optim_paras, options) for core_idx, indices in core_key_to_core_indices.items(): df = states.copy().loc[indices].assign(**dense_vec) df[choices] = ~df[choices] grouper = df.groupby(choices).groups if not len(grouper) == 1: raise ValueError( "Choice restrictions cannot interact between core and dense " "information such that heterogeneous choice sets within a " "period are created. Use penalties in the utility functions " "for that." ) period_choice = { (core_key_to_complex[core_idx][0], idx, dense_idx): core_idx for idx, indices in grouper.items() } dense_period_choice = {**dense_period_choice, **period_choice} idx = list(grouper.keys())[0] dump_objects( df, "states", (core_key_to_complex[core_idx][0], idx, dense_idx), options, ) return dense_period_choice
def _sample_characteristic(states_df, options, level_dict, use_keys): """Sample characteristic of individuals. The function is used to sample the values of one state space characteristic, say experience. The keys of ``level_dict`` are the possible starting values of experience. The values of the dictionary are :class:`pandas.Series` whose index are covariate names and the values are the parameter values. ``states_df`` is used to generate all possible covariates with the existing information. For each level, the dot product of parameters and covariates determines the value ``z``. The softmax function converts the level-specific ``z``-values to probabilities. The probabilities are used to sample the characteristic. Parameters ---------- states_df : pandas.DataFrame Contains the state of each individual. options : dict Options of the model. level_dict : dict A dictionary where the keys are the values distributed according to the probability mass function. The values are a :class:`pandas.Series` with covariate names as the index and parameter values. use_keys : bool Identifier for whether the keys of the level dict should be used as variables values or use numeric codes instead. For example, assign numbers to choices. Returns ------- characteristic : numpy.ndarray Array with shape (n_individuals,) containing sampled values. """ # Generate covariates. all_data = compute_covariates(states_df, options["covariates_all"], check_nans=True, raise_errors=False) # Calculate dot product of covariates and parameters. z = () for level in level_dict: x_beta = pandas_dot(all_data, level_dict[level]) z += (x_beta, ) # Calculate probabilities with the softmax function. probabilities = softmax(np.column_stack(z), axis=1) np.random.seed(next(options["simulation_seed_iteration"])) choices = level_dict if use_keys else len(level_dict) characteristic = _random_choice(choices, probabilities) return characteristic
def _compute_x_beta_for_type_probabilities(df, optim_paras, options): for type_ in range(optim_paras["n_types"]): first_observations = df.copy().assign(type=type_) relevant_covariates = identify_necessary_covariates( optim_paras["type_prob"][type_].index, options["covariates_all"] ) first_observations = compute_covariates(first_observations, relevant_covariates) labels = optim_paras["type_prob"][type_].index df[type_] = np.dot( first_observations[labels].to_numpy(dtype=COVARIATES_DOT_PRODUCT_DTYPE), optim_paras["type_prob"][type_], ) return df[range(optim_paras["n_types"])]
def _create_dense_state_space_covariates(dense_grid, optim_paras, options): if dense_grid: columns = create_dense_state_space_columns(optim_paras) df = pd.DataFrame(data=dense_grid, columns=columns).set_index(columns, drop=False) covariates = compute_covariates(df, options["covariates_dense"]) covariates = covariates.apply(downcast_to_smallest_dtype) covariates = covariates.to_dict(orient="index") covariates = convert_dictionary_keys_to_dense_indices(covariates) else: covariates = False return covariates
def _compute_x_beta_for_type_probabilities(df, optim_paras, options): """Compute the vector dot product of type covariates and type coefficients. For each individual, compute as many vector dot products as there are types. The scalars are later passed to a softmax function to compute the type probabilities. The probability for each individual to be some type. """ for type_ in range(optim_paras["n_types"]): first_observations = df.copy().assign(type=type_) relevant_covariates = identify_necessary_covariates( optim_paras["type_prob"][type_].index, options["covariates_all"]) first_observations = compute_covariates(first_observations, relevant_covariates) df[type_] = pandas_dot(first_observations, optim_paras["type_prob"][type_]) return df[range(optim_paras["n_types"])]
def create_state_space_class(optim_paras, options): """Create the state space of the model.""" prepare_cache_directory(options) core = _create_core_state_space(optim_paras, options) dense_grid = _create_dense_state_space_grid(optim_paras) # Downcast after calculations or be aware of silent integer overflows. core = compute_covariates(core, options["covariates_core"]) core = core.apply(downcast_to_smallest_dtype) dense = _create_dense_state_space_covariates(dense_grid, optim_paras, options) core_period_choice = _create_core_period_choice(core, optim_paras, options) core_key_to_complex = dict(enumerate(core_period_choice)) core_key_to_core_indices = { i: core_period_choice[complex_] for i, complex_ in core_key_to_complex.items() } indexer = _create_indexer(core, core_key_to_core_indices, optim_paras) dense_period_choice = _create_dense_period_choice( core, dense, core_key_to_core_indices, core_key_to_complex, optim_paras, options) state_space = StateSpace( core, indexer, dense, dense_period_choice, core_key_to_complex, core_key_to_core_indices, optim_paras, options, ) return state_space
def states(self): states = self.core.copy().assign(**self.dense_covariates) states = compute_covariates(states, self.mixed_covariates) return states
def _process_estimation_data(df, state_space, optim_paras, options): """Process estimation data. All necessary objects for :func:`_internal_log_like_obs` dependent on the data are produced. Some objects have to be repeated for each type which is a desirable format for the estimation where every observations is weighted by type probabilities. Parameters ---------- df : pandas.DataFrame The DataFrame which contains the data used for estimation. The DataFrame contains individual identifiers, periods, experiences, lagged choices, choices in current period, the wage and other observed data. indexer : numpy.ndarray Indexer for the core state space. optim_paras : dict options : dict Returns ------- choices : numpy.ndarray Array with shape (n_observations, n_types) where information is only repeated over the second axis. idx_indiv_first_obs : numpy.ndarray Array with shape (n_individuals,) containing indices for the first observations of each individual. indices : numpy.ndarray Array with shape (n_observations, n_types) containing indices for states which correspond to observations. log_wages_observed : numpy.ndarray Array with shape (n_observations, n_types) containing clipped log wages. type_covariates : numpy.ndarray Array with shape (n_individuals, n_type_covariates) containing covariates to predict probabilities for each type. """ col_dtype = generate_column_dtype_dict_for_estimation(optim_paras) df = ( df.sort_index()[list(col_dtype)[2:]] .rename(columns=rename_labels_to_internal) .rename_axis(index=rename_labels_to_internal) ) df = convert_labeled_variables_to_codes(df, optim_paras) # Get indices of states in the state space corresponding to all observations for all # types. The indexer has the shape (n_observations,). n_periods = int(df.index.get_level_values("period").max() + 1) indices = [] core_columns = create_core_state_space_columns(optim_paras) for period in range(n_periods): period_df = df.query("period == @period") period_core = tuple(period_df[col].to_numpy() for col in core_columns) period_indices = state_space.indexer[period][period_core] indices.append(period_indices) indices = np.concatenate(indices) # The indexer is now sorted in period-individual pairs whereas the estimation needs # individual-period pairs. Sort it! indices_to_reorder = ( df.sort_values(["period", "identifier"]) .assign(__index__=np.arange(df.shape[0])) .sort_values(["identifier", "period"])["__index__"] .to_numpy() ) df["index"] = indices[indices_to_reorder] # Add indices of child states to the DataFrame. children = pd.DataFrame( data=state_space.indices_of_child_states[df["index"].to_numpy()], index=df.index, columns=[f"child_index_{c}" for c in optim_paras["choices"]], ) df = pd.concat([df, children], axis="columns") # For the estimation, log wages are needed with shape (n_observations, n_types). df["log_wage"] = np.log(np.clip(df.wage.to_numpy(), 1 / MAX_FLOAT, MAX_FLOAT)) df = df.drop(columns="wage") # For the type covariates, we only need the first observation of each individual. if optim_paras["n_types"] >= 2: initial_states = df.query("period == 0").copy() type_covariates = compute_covariates( initial_states, options["covariates_core"], raise_errors=False ) type_covariates = type_covariates.apply(downcast_to_smallest_dtype) else: type_covariates = None return df, type_covariates
def _process_estimation_data(df, state_space, optim_paras, options): """Process estimation data. All necessary objects for :func:`_internal_log_like_obs` dependent on the data are produced. Some objects have to be repeated for each type which is a desirable format for the estimation where every observations is weighted by type probabilities. Parameters ---------- df : pandas.DataFrame The DataFrame which contains the data used for estimation. The DataFrame contains individual identifiers, periods, experiences, lagged choices, choices in current period, the wage and other observed data. indexer : numpy.ndarray Indexer for the core state space. optim_paras : dict options : dict Returns ------- choices : numpy.ndarray Array with shape (n_observations, n_types) where information is only repeated over the second axis. idx_indiv_first_obs : numpy.ndarray Array with shape (n_individuals,) containing indices for the first observations of each individual. indices : numpy.ndarray Array with shape (n_observations, n_types) containing indices for states which correspond to observations. log_wages_observed : numpy.ndarray Array with shape (n_observations, n_types) containing clipped log wages. type_covariates : numpy.ndarray Array with shape (n_individuals, n_type_covariates) containing covariates to predict probabilities for each type. """ n_types = optim_paras["n_types"] col_dtype = generate_column_dtype_dict_for_estimation(optim_paras) df = (df.sort_index()[list(col_dtype)[2:]].rename( columns=rename_labels_to_internal).rename_axis( index=rename_labels_to_internal)) df = convert_labeled_variables_to_codes(df, optim_paras) # Duplicate observations for each type. if n_types >= 2: df = pd.concat([df.copy().assign(type=i) for i in range(n_types)]) df["dense_key"], df["core_index"] = map_observations_to_states( df, state_space, optim_paras) # For the estimation, log wages are needed with shape (n_observations, n_types). df["log_wage"] = np.log( np.clip(df.wage.to_numpy(), 1 / MAX_FLOAT, MAX_FLOAT)) df = df.drop(columns="wage") # For the type covariates, we only need the first observation of each individual. if n_types >= 2: initial_states = df.query("period == 0").copy() type_covariates = compute_covariates(initial_states, options["covariates_core"], raise_errors=False) type_covariates = type_covariates.apply(downcast_to_smallest_dtype) else: type_covariates = None return df, type_covariates