def test_monte_carlo_choices(): """ Test simulation of choices without capacity constraints. This test just verifies that the code runs, using a fairly large synthetic dataset. """ data = build_data(1000, 100) monte_carlo_choices(data)
def test_simulation_accuracy(): """ This test checks that the simulation tool is generating choices that match the provided probabilities. """ data = build_data(5, 3) # Get values associated with an arbitrary row r = np.random.randint(0, 15, 1) row = pd.DataFrame(data).reset_index().iloc[r] oid = int(row.oid) aid = int(row.aid) prob = float( pd.DataFrame(data).query('oid==' + str(oid) + ' & aid==' + str(aid)).sum()) n = 1000 count = 0 for i in range(n): choices = monte_carlo_choices(data) if (choices.loc[oid] == aid): count += 1 assert (count / n > prob - 0.1) assert (count / n < prob + 0.1)
def test_simulation_accuracy(): """ This test checks that the simulation tool is generating choices that match the provided probabilities. """ data = build_data(5,3) # Get values associated with an arbitrary row r = np.random.randint(0, 15, 1) row = pd.DataFrame(data).reset_index().iloc[r] oid = int(row.oid) aid = int(row.aid) prob = float(pd.DataFrame(data).query('oid=='+str(oid)+' & aid=='+str(aid)).sum()) n = 1000 count = 0 for i in range(n): choices = monte_carlo_choices(data) if (choices.loc[oid] == aid): count += 1 assert(count/n > prob-0.1) assert(count/n < prob+0.1)
def run(self, chooser_batch_size=None, interaction_terms=None): """ Run the model step: simulate choices and use them to update an Orca column. The simulated choices are saved to the class object for diagnostics. If choices are unconstrained, the choice table and the probabilities of sampled alternatives are saved as well. Parameters ---------- chooser_batch_size : int This parameter gets passed to choicemodels.tools.simulation.iterative_lottery_choices and is a temporary workaround for dealing with memory issues that arise from generating massive merged choice tables for simulations that involve large numbers of choosers, large numbers of alternatives, and large numbers of predictors. It allows the user to specify a batch size for simulating choices one chunk at a time. interaction_terms : pandas.Series, pandas.DataFrame, or list of either, optional Additional column(s) of interaction terms whose values depend on the combination of observation and alternative, to be merged onto the final data table. If passed as a Series or DataFrame, it should include a two-level MultiIndex. One level's name and values should match an index or column from the observations table, and the other should match an index or column from the alternatives table. Returns ------- None """ check_choicemodels_version() from choicemodels import MultinomialLogit from choicemodels.tools import (MergedChoiceTable, monte_carlo_choices, iterative_lottery_choices) # Clear simulation attributes from the class object self.mergedchoicetable = None self.probabilities = None self.choices = None if interaction_terms is not None: uniq_intx_idx_names = set([ idx for intx in interaction_terms for idx in intx.index.names ]) obs_extra_cols = to_list(self.chooser_size) + \ list(uniq_intx_idx_names) alts_extra_cols = to_list( self.alt_capacity) + list(uniq_intx_idx_names) else: obs_extra_cols = to_list(self.chooser_size) alts_extra_cols = to_list(self.alt_capacity) # get any necessary extra columns from the mct intx operations spec if self.mct_intx_ops: intx_extra_obs_cols = self.mct_intx_ops.get('extra_obs_cols', []) intx_extra_obs_cols = to_list(intx_extra_obs_cols) obs_extra_cols += intx_extra_obs_cols intx_extra_alts_cols = self.mct_intx_ops.get('extra_alts_cols', []) intx_extra_alts_cols = to_list(intx_extra_alts_cols) alts_extra_cols += intx_extra_alts_cols observations = get_data(tables=self.out_choosers, fallback_tables=self.choosers, filters=self.out_chooser_filters, model_expression=self.model_expression, extra_columns=obs_extra_cols) if len(observations) == 0: print("No valid choosers") return alternatives = get_data(tables=self.out_alternatives, fallback_tables=self.alternatives, filters=self.out_alt_filters, model_expression=self.model_expression, extra_columns=alts_extra_cols) if len(alternatives) == 0: print("No valid alternatives") return # Remove filter columns before merging, in case column names overlap expr_cols = columns_in_formula(self.model_expression) obs_cols = set( observations.columns) & set(expr_cols + to_list(obs_extra_cols)) observations = observations[list(obs_cols)] alt_cols = set( alternatives.columns) & set(expr_cols + to_list(alts_extra_cols)) alternatives = alternatives[list(alt_cols)] # Callables for iterative choices def mct(obs, alts, intx_ops=None): this_mct = MergedChoiceTable(obs, alts, sample_size=self.alt_sample_size, interaction_terms=interaction_terms) if intx_ops: this_mct = self.perform_mct_intx_ops(this_mct) this_mct.sample_size = self.alt_sample_size return this_mct def probs(mct): return self.model.probabilities(mct) if self.constrained_choices is True: choices = iterative_lottery_choices( observations, alternatives, mct_callable=mct, probs_callable=probs, alt_capacity=self.alt_capacity, chooser_size=self.chooser_size, max_iter=self.max_iter, chooser_batch_size=chooser_batch_size, mct_intx_ops=self.mct_intx_ops) else: choicetable = mct(observations, alternatives, intx_ops=self.mct_intx_ops) probabilities = probs(choicetable) choices = monte_carlo_choices(probabilities) # Save data to class object if available self.mergedchoicetable = choicetable self.probabilities = probabilities # Save choices to class object for diagnostics self.choices = choices # Update Orca update_column(table=self.out_choosers, fallback_table=self.choosers, column=self.out_column, fallback_column=self.choice_column, data=choices)