def fit(self, mct=None): """ Fit the model; save and report results. This uses the ChoiceModels estimation engine (originally from UrbanSim MNL). The `fit()` method can be run as many times as desired. Results will not be saved with Orca or ModelManager until the `register()` method is run. After sampling alternatives for each chooser, the merged choice table is saved to the class object for diagnostic use (`mergedchoicetable` with type choicemodels.tools.MergedChoiceTable). Parameters ---------- mct : choicemodels.tools.MergedChoiceTable This parameter is a temporary backdoor allowing us to pass in a more complicated choice table than can be generated within the template, for example including sampling weights or interaction terms. Returns ------- None """ check_choicemodels_version() from choicemodels import MultinomialLogit from choicemodels.tools import MergedChoiceTable if (mct is not None): df_from_mct = mct.to_frame() idx_names = df_from_mct.index.names df_from_mct = df_from_mct.reset_index() df_from_mct = apply_filter_query( df_from_mct, self.chooser_filters).set_index(idx_names) mct = MergedChoiceTable.from_df(df_from_mct) else: observations = get_data(tables=self.choosers, filters=self.chooser_filters, model_expression=self.model_expression, extra_columns=self.choice_column) if (self.chooser_sample_size is not None): observations = observations.sample(self.chooser_sample_size) alternatives = get_data(tables=self.alternatives, filters=self.alt_filters, model_expression=self.model_expression) mct = MergedChoiceTable(observations=observations, alternatives=alternatives, chosen_alternatives=self.choice_column, sample_size=self.alt_sample_size) model = MultinomialLogit(data=mct, model_expression=self.model_expression) results = model.fit() self.name = self._generate_name() self.summary_table = str(results) print(self.summary_table) coefs = results.get_raw_results()['fit_parameters']['Coefficient'] self.fitted_parameters = coefs.tolist() self.model = results # Save merged choice table to the class object for diagnostics self.mergedchoicetable = mct
def perform_mct_intx_ops(self, mct, nan_handling='zero'): """ Method to dynamically update a MergedChoiceTable object according to a pre-defined set of operations specified in the model .yaml config. Operations are performed sequentially as follows: 1) Pandas merges with other Orca tables; 2) Pandas group-by aggregations; 3) rename existing columns; 4) create new columns via Pandas `eval()`. Parameters ---------- mct : choicemodels.tools.MergedChoiceTable nan_handling : str Either 'zero' or 'drop', where the former will replace all NaN's and None's with 0 integers and the latter will drop all rows with any NaN or Null values. Returns ------- MergedChoiceTable """ intx_ops = self.mct_intx_ops mct_df = mct.to_frame() og_mct_index = mct_df.index.names mct_df.reset_index(inplace=True) mct_df.index.name = 'mct_index' # merges intx_df = mct_df.copy() for merge_args in intx_ops.get('successive_merges', []): # make sure mct index is preserved during merge left_cols = merge_args.get('mct_cols', intx_df.columns) left_idx = merge_args.get('left_index', False) if intx_df.index.name == mct_df.index.name: if not left_idx: intx_df.reset_index(inplace=True) if mct_df.index.name not in left_cols: left_cols += [mct_df.index.name] elif mct_df.index.name in intx_df.columns: if mct_df.index.name not in left_cols: left_cols += [mct_df.index.name] else: raise KeyError( 'Column {0} must be preserved in intx ops!'.format( mct_df.index.name)) left = intx_df[left_cols] right = get_data(merge_args['right_table'], extra_columns=merge_args.get('right_cols', None)) intx_df = pd.merge(left, right, how=merge_args.get('how', 'inner'), on=merge_args.get('on_cols', None), left_on=merge_args.get('left_on', None), right_on=merge_args.get('right_on', None), left_index=left_idx, right_index=merge_args.get( 'right_index', False), suffixes=merge_args.get('suffixes', ('_x', '_y'))) # aggs aggs = intx_ops.get('aggregations', False) if aggs: intx_df = intx_df.groupby('mct_index').agg(aggs) # rename cols if intx_ops.get('rename_cols', False): intx_df = intx_df.rename(columns=intx_ops['rename_cols']) # update mct mct_df = pd.merge(mct_df, intx_df, on='mct_index') # create new cols from expressions for eval_op in intx_ops.get('sequential_eval_ops', []): new_col = eval_op['name'] expr = eval_op['expr'] engine = eval_op.get('engine', 'numexpr') mct_df[new_col] = mct_df.eval(expr, engine=engine) # restore original mct index mct_df.set_index(og_mct_index, inplace=True) # handle NaNs and Nones if mct_df.isna().values.any(): if nan_handling == 'zero': print("Replacing MCT None's and NaN's with 0") mct_df = mct_df.fillna(0) elif nan_handling == 'drop': print("Dropping rows with None's/NaN's from MCT") mct_df = mct_df.dropna(axis=0) return MergedChoiceTable.from_df(mct_df)