def pylogit_mxlogit_estimate(data,
                             rhs_columns,
                             random_varnames,
                             num_draws=100,
                             seed=None,
                             print_result=False):
    spec = OrderedDict()
    variable_names = OrderedDict()
    for var in rhs_columns:
        spec[var] = [[1, 2]]
        variable_names[var] = [var]

    mixed_model = pl.create_choice_model(data=data,
                                         alt_id_col="alt",
                                         obs_id_col="group",
                                         choice_col="choice",
                                         specification=spec,
                                         model_type="Mixed Logit",
                                         names=variable_names,
                                         mixing_id_col='user_id',
                                         mixing_vars=random_varnames)
    numCoef = sum([len(spec[s]) for s in spec]) + len(random_varnames)
    if seed:
        mixed_model.fit_mle(np.zeros(numCoef), num_draws=num_draws, seed=seed)
    else:
        mixed_model.fit_mle(np.zeros(numCoef), num_draws=num_draws)
    if print_result:
        print(mixed_model.get_statsmodels_summary())
    return mixed_model
Esempio n. 2
0
def create_model(dataframe, n_feats, specs, spec_names):
    # Fit to a multinomial logit model (MNL)
    choice_model = pl.create_choice_model(data=df,
                                          alt_id_col='alt_ids',
                                          obs_id_col='obs_ids',
                                          choice_col='choices',
                                          specification=specs,
                                          model_type="MNL",
                                          names=spec_names)

    # Specify the initial values and method for the optimization.
    choice_model.fit_mle(np.zeros(n_feats), print_res=False)
    fit_summary_print_output = choice_model.fit_summary
    summary_print_output = choice_model.summary

    summary = (summary_print_output.to_dict(orient="index"))

    fit_summary = (fit_summary_print_output.to_dict())

    choice_model.get_statsmodels_summary()

    print(fit_summary_print_output)
    print(summary_print_output)

    return fit_summary, summary
Esempio n. 3
0
    def fit(self):
        """
        Fit the model using maximum likelihood estimation. Uses either the ChoiceModels
        or PyLogit estimation engine as appropriate.

        [TO DO: should we add pass-through parameters here, or take them all in the
        constructor?]

        Parameters - NOT YET IMPLEMENTED
        ----------
        GPU : bool, optional
            GPU acceleration.
        coefrange : tuple of floats, optional
            Limits to which coefficients are held, in format (min, max).
        initial_values : 1D array, optional
            Initial values for the coefficients.

        Returns
        -------
        MultinomialLogitResults() object.

        """
        if (self._estimation_engine == 'PyLogit'):

            m = pylogit.create_choice_model(
                data=self._data,
                obs_id_col=self._observation_id_col,
                alt_id_col=self._alternative_id_col,
                choice_col=self._choice_col,
                specification=self._model_expression,
                names=self._model_labels,
                model_type='MNL')

            m.fit_mle(init_vals=self._initial_coefs)
            results = MultinomialLogitResults(self._estimation_engine,
                                              results=m)

        elif (self._estimation_engine == 'ChoiceModels'):

            model_design = dmatrix(self._model_expression,
                                   data=self._data,
                                   return_type='dataframe')

            # generate 2D array from choice column, for mnl_estimate()
            chosen = np.reshape(self._data[[self._choice_col]].as_matrix(),
                                (self._numobs, self._numalts))

            log_lik, fit = mnl_estimate(model_design.as_matrix(), chosen,
                                        self._numalts)

            result_params = dict(log_likelihood=log_lik,
                                 fit_parameters=fit,
                                 x_names=model_design.design_info.column_names)

            results = MultinomialLogitResults(self._estimation_engine,
                                              results=result_params)

        return results
 def _create_model(self):
     """
     Create the pylogit model class
     """
     self.pylogit_model = pl.create_choice_model(
             data=self.long_data,
             alt_id_col=_CHOICE_ID_COL,
             obs_id_col=_OBSERVATION_COL,
             choice_col=_CHOICE_COL,
             specification=self.specification,
             names=self.names,
             model_type='MNL')
    def estimation_asym(self, model_mnl):
        # read the data
        long_testing_data = pd.read_csv(self.output_file)

        # Set up the asym specifaction and names dictionary
        asym_specification = OrderedDict()
        asym_names = OrderedDict()

        for col in basic_specification:
            if col != "intercept":
                asym_specification[col] = basic_specification[col]
                asym_names[col] = basic_names[col]

        asym_intercept_names = basic_names["intercept"]

        # the "index" of the alternative whose constant has been constrained
        asym_intercept_ref_pos = 4

        # "shape_TAS" is not presented
        asym_shape_names = ["shape_NSW", "shape_VIC", "shape_QLD", "shape_SA"]
        number_of_initial_values = len(asym_shape_names)

        # the "index" of the alternative whose shape parameter is constrained
        asym_ref = 4

        print("################################################## Asymmetry Model #########################################")

        model_asym = pl.create_choice_model(
            data=long_testing_data,
            alt_id_col=self.custom_alt_id,
            obs_id_col=self.obs_id_column,
            choice_col=self.choice_column,
            specification=asym_specification,
            model_type="Asym",
            names=asym_names,
            shape_names=asym_shape_names,
            intercept_names=asym_intercept_names,
            shape_ref_pos=asym_ref,
            intercept_ref_pos=asym_intercept_ref_pos
        )

        model_asym.fit_mle(
            None,
            init_shapes=np.zeros(number_of_initial_values),
            init_intercepts=model_mnl.params.values[:number_of_initial_values],
            init_coefs=model_mnl.params.values[number_of_initial_values:] / np.log(number_of_initial_values+1),
            method="bfgs"
        )

        model_asym.get_statsmodels_summary()

        return model_asym
Esempio n. 6
0
def getModel(alt,
             obs,
             choice,
             spec=specification,
             names=names,
             type_="MNL",
             data=raw_data):
    model = pl.create_choice_model(data=data,
                                   alt_id_col=alt,
                                   obs_id_col=obs,
                                   choice_col=choice,
                                   specification=specification,
                                   model_type=type_,
                                   names=names)
    return (model)
Esempio n. 7
0
def create_model(dataframe: pd.DataFrame, feature_list: list):
    """Fitting multinomial logit model to the dataframe choices with specifications
    
    Args:
        dataframe (pd.DataFrame): Dataframe representation of the routes and the choices taken in our data
        feature_list (list[str]): list of (id#, featurename) tuples present in G.edges output
        
        
        specs (OrderedDict): Each category with an 'all_same' value to make sure the column ID does not change for the same attribute
        spec_names (OrderedDict): Customizable specs for each category for nesting cases
    
    Returns:
        (fit_summary, summary): fit_summary is a dictionary that shows the overall model's fit, 
                                summary is a dictionary that shows each input's fit in the model
    """

    # This just means the columns will be consistent across choices
    spec_names = OrderedDict()
    specs = OrderedDict()
    for i in range(len(feature_list)):
        spec = feature_list[i]
        spec_names[spec] = spec
        specs[spec] = 'all_same'

    assert (len(specs) == len(spec_names))

    # Fit to a multinomial logit model (MNL)
    choice_model = pl.create_choice_model(data=dataframe,
                                          alt_id_col='alt_ids',
                                          obs_id_col='obs_ids',
                                          choice_col='choices',
                                          specification=specs,
                                          model_type="MNL",
                                          names=spec_names)

    choice_model.fit_mle(np.zeros(len(feature_list)), print_res=False)

    fit_summary_print_output = choice_model.fit_summary
    summary_print_output = choice_model.summary

    summary = (summary_print_output.to_dict(orient="index"))

    fit_summary = (fit_summary_print_output.to_dict())

    choice_model.get_statsmodels_summary()

    return fit_summary, summary
Esempio n. 8
0
    def fit(self):
        """
        Fit the model using maximum likelihood estimation. Uses either the ChoiceModels
        or PyLogit estimation engine as appropriate.

        Returns
        -------
        MultinomialLogitResults() object.

        """
        if (self._estimation_engine == 'PyLogit'):

            m = pylogit.create_choice_model(
                data=self._df,
                obs_id_col=self._observation_id_col,
                alt_id_col=self._alternative_id_col,
                choice_col=self._choice_col,
                specification=self._model_expression,
                names=self._model_labels,
                model_type='MNL')

            m.fit_mle(init_vals=self._initial_coefs)
            results = MultinomialLogitResults(
                estimation_engine=self._estimation_engine,
                model_expression=self._model_expression,
                results=m)

        elif (self._estimation_engine == 'ChoiceModels'):

            dm = dmatrix(self._model_expression, data=self._df)

            chosen = np.reshape(self._df[[self._choice_col]].values,
                                (self._numobs, self._numalts))

            log_lik, fit = mnl_estimate(np.array(dm), chosen, self._numalts)

            result_params = dict(log_likelihood=log_lik,
                                 fit_parameters=fit,
                                 x_names=dm.design_info.column_names)

            results = MultinomialLogitResults(
                estimation_engine=self._estimation_engine,
                model_expression=self._model_expression,
                results=result_params)

        return results
def pylogit_logit_estimate():
    spec = OrderedDict()
    variable_names = OrderedDict()
    for var in rhs_columns:
        spec[var] = [[1, 2]]
        variable_names[var] = [var]
    model = pl.create_choice_model(data=df,
                                   alt_id_col="alt",
                                   obs_id_col="group",
                                   choice_col="choice",
                                   specification=spec,
                                   model_type="MNL",
                                   names=variable_names)
    numCoef = sum([len(spec[s]) for s in spec])
    model.fit_mle(np.zeros(numCoef))
    print(model.get_statsmodels_summary())
    return model
def logit_spec(long_data_df, alt_attr_vars, generic_attrs=[], constant=True, 
               alts={0:'drive', 1:'cycle', 2:'walk', 3:'PT'}, ref_alt_ind=0):
    """
    generate specification & varnames for pylogit
    
    Arguments:
    ------------------------------
    long_data_df: pandas dataframe, long data, generated by long_form_data
    alt_attr_vars: list of alternative specific vars
    generic_attrs: list of case specific vars, generally demographic vars
    constant: whether or not to include ASCs
    alts: a dict or list to define indices and names of alternative
    ref_alt_ind: index of reference alternative for ASC specification
    
    Returns:
    --------------------------------
    model: pylogit MNL model object
    numCoef: the number of coefficients to estimated
    """
    specifications = OrderedDict()
    names = OrderedDict()
    nalt = len(alts)
    if isinstance(alts, list):
        alts = {i:i for i in alts}
    for var in alt_attr_vars:
        specifications[var] = [list(range(nalt))]
        names[var] = [var]
    for var in generic_attrs:
        specifications[var] = [i for i in range(nalt) if i != ref_alt_ind]
        names[var] = [var + ' for ' + alts[i] for i in alts if i != ref_alt_ind]
    if constant:
        specifications['intercept'] = [i for i in range(nalt) if i != ref_alt_ind]
        names['intercept'] = ['ASC for ' + alts[i] for i in alts if i != ref_alt_ind]
    model = pl.create_choice_model(data = long_data_df.copy(),
                        alt_id_col="alt",
                        obs_id_col="group",
                        choice_col="choice",
                        specification=specifications,
                        model_type = "MNL",
                        names = names
    )
    numCoef = sum([len(specifications[s]) for s in specifications])
    return model, numCoef
Esempio n. 11
0
    def fit(self):
        """
        Fit the model using maximum likelihood estimation. Uses either the ChoiceModels
        or PyLogit estimation engine as appropriate.

        Returns
        -------
        MultinomialLogitResults() object.

        """
        if (self._estimation_engine == 'PyLogit'):

            m = pylogit.create_choice_model(data = self._df,
                                            obs_id_col = self._observation_id_col,
                                            alt_id_col = self._alternative_id_col,
                                            choice_col = self._choice_col,
                                            specification = self._model_expression,
                                            names = self._model_labels,
                                            model_type = 'MNL')

            m.fit_mle(init_vals = self._initial_coefs)
            results = MultinomialLogitResults(estimation_engine = self._estimation_engine,
                                              model_expression = self._model_expression,
                                              results = m)

        elif (self._estimation_engine == 'ChoiceModels'):

            dm = dmatrix(self._model_expression, data=self._df)

            chosen = np.reshape(self._df[[self._choice_col]].values,
                                (self._numobs, self._numalts))

            log_lik, fit = mnl_estimate(np.array(dm), chosen, self._numalts)

            result_params = dict(log_likelihood = log_lik,
                                 fit_parameters = fit,
                                 x_names = dm.design_info.column_names)

            results = MultinomialLogitResults(estimation_engine = self._estimation_engine,
                                              model_expression = self._model_expression,
                                              results = result_params)

        return results
Esempio n. 12
0
def pylogitModel(data):
    basic_specification = OrderedDict()
    basic_names = OrderedDict()
    basic_specification['Affordable'] = [[1, 2, 3, 4, 5]]
    basic_names['Affordable'] = ['Affordable']
    basic_specification['Ease'] = [[1, 2, 3, 4, 5]]
    basic_names['Ease'] = ['Ease']
    basic_specification['Power'] = [[1, 2, 3, 4, 5]]
    basic_names['Power'] = ['Power']
    basic_specification['Learning'] = [[1, 2, 3, 4, 5]]
    basic_names['Learning'] = ['Learning']
    basic_specification['Supplements'] = [[1, 2, 3, 4, 5]]
    basic_names['Supplements'] = ['Supplements']
    basic_specification['Support'] = [[1, 2, 3, 4, 5]]
    basic_names['Support'] = ['Support']
    basic_specification['Needs'] = [[1, 2, 3, 4, 5]]
    basic_names['Needs'] = ['Needs']
    basic_specification['IT'] = [[1, 2, 3, 4, 5]]
    basic_names['IT'] = ['IT']
    basic_specification["intercept"] = [1, 2, 3, 4, 5]
    basic_names["intercept"] = ['Matlab', 'R', 'SAS', 'SPSS', 'Stata']

    print(basic_names)
    print(basic_specification)

    mnl_model_r = pl.create_choice_model(data=data,
                                         alt_id_col='Alternative',
                                         obs_id_col='OrgID',
                                         choice_col='Choice',
                                         specification=basic_specification,
                                         model_type="MNL",
                                         names=basic_names)

    # Specify the initial values and method for the optimization.
    mnl_model_r.fit_mle(np.zeros(13))

    # Look at the estimation results
    # print('1')
    # mnl_model_r.get_statsmodels_summary()
    print('2')
    mnl_model_r.print_summaries()
    return mnl_model_r
Esempio n. 13
0
    def estimation_mnl(self):
        long_testing_data = pd.read_csv(self.output_file)

        print("################################################ MNL Model ######################################")

        model_mnl = pl.create_choice_model(
            data=long_testing_data,
            alt_id_col=self.custom_alt_id,
            obs_id_col=self.obs_id_column,
            choice_col=self.choice_column,
            specification=basic_specification,
            model_type="MNL",
            names=basic_names
        )

        model_mnl.fit_mle(np.zeros(total_num_parameters))
        results = model_mnl.get_statsmodels_summary()

        print("########################", results)

        # all_situation_ids = np.sort(long_testing_data["choice_situation"].unique())
        # prediction_ids = all_situation_ids[:2000]

        return model_mnl
range = list(range(6))
basic_specification["sp"] = "all_same"
basic_names["sp"] = "sp"		

basic_specification["xp"] = "all_same"
basic_names["xp"] = "xp"

basic_specification["tim"] = "all_same"
basic_names["tim"] = "tim"

custom_alt_id = "r_box"
obs_id_column = "uri"

x_model = pl.create_choice_model(data=df,
							alt_id_col=custom_alt_id,
							obs_id_col=obs_id_column,
							choice_col="win",
							specification=basic_specification,
							model_type="MNL",
							names=basic_names)

x_model.fit_mle(np.zeros(3))

summary = x_model.get_statsmodels_summary()
print(summary)





Esempio n. 15
0
 def __init__(self, *args, **kwargs):
     self.wrapped_model = pylogit.create_choice_model(model_type="MNL",
                                                      *args,
                                                      **kwargs)
     return
Esempio n. 16
0
    def test_constructor(self):
        """
        Construct the various choice models and make sure the constructed
        object has the necessary attributes.
        """
        # Record the model types of all the models to be created
        all_model_types = model_type_to_display_name.keys()

        # Record the attribute / value pairs that are common to all models.
        common_attr_value_dict = {"data": self.fake_df,
                                  "name_spec": self.fake_names,
                                  "design": self.fake_design,
                                  "ind_var_names": self.fake_names["x"],
                                  "alt_id_col": self.alt_id_col,
                                  "obs_id_col": self.obs_id_col,
                                  "choice_col": self.choice_col,
                                  "specification": self.fake_specification,
                                  "alt_IDs": self.fake_df["alt_id"].values,
                                  "choices": self.fake_df["choice"].values}

        # Create a shape name dictionary to relate the various models to the
        # names of their shape parameters.
        shape_name_dict = {"MNL": None,
                           "Asym": self.fake_shape_names[:2],
                           "Cloglog": None,
                           "Scobit": self.fake_shape_names,
                           "Uneven": self.fake_shape_names,
                           "Nested Logit": None,
                           "Mixed Logit": None}

        # Create a shape reference position dictionary to relate the various
        # models to their shape reference positions.
        shape_ref_dict = {}
        for key in shape_name_dict:
            shape_ref_dict[key] = (None if key != "Asym" else
                                   self.fake_shape_ref_pos)

        # Create an intercept_names and intercept_ref_position dictionary to
        # relate the various models to their respective kwargs.
        intercept_names_dict = {}
        intercept_ref_dict = {}
        for key in shape_name_dict:
            if key in ["MNL", "Nested Logit", "Mixed Logit"]:
                intercept_names_dict[key] = None
                intercept_ref_dict[key] = None
            else:
                intercept_names_dict[key] = self.fake_intercept_names
                intercept_ref_dict[key] = self.fake_intercept_ref_pos

        # Create a nest_names dictionary to relate the various models to their
        # nest_name attributes
        nest_name_dict = {}
        nest_spec_dict = {}
        for key in shape_name_dict:
            if key != "Nested Logit":
                nest_name_dict[key] = None
                nest_spec_dict[key] = None
            else:
                nest_name_dict[key] = list(self.fake_nest_spec.keys())
                nest_spec_dict[key] = self.fake_nest_spec

        # Create dictionaries for the mixing_id_col, mixing_vars, and
        # mixing_pos attributes
        mixing_id_col_dict = {}
        mixing_vars_dict = {}
        mixing_pos_dict = {}

        for key in shape_name_dict:
            if key != "Mixed Logit":
                mixing_id_col_dict[key] = None
                mixing_vars_dict[key] = None
                mixing_pos_dict[key] = None
            else:
                mixing_id_col_dict[key] = self.obs_id_col
                mixing_vars_dict[key] = self.fake_names["x"]
                mixing_pos_dict[key] = [0]

        # Record the attribute / value pairs that vary across models
        varying_attr_value_dict = {"model_type": model_type_to_display_name,
                                   "intercept_names": intercept_names_dict,
                                   "intercept_ref_position":
                                       intercept_ref_dict,
                                   "shape_names": shape_name_dict,
                                   "shape_ref_position": shape_ref_dict,
                                   "nest_names": nest_name_dict,
                                   "nest_spec": nest_spec_dict,
                                   "mixing_id_col": mixing_id_col_dict,
                                   "mixing_vars": mixing_vars_dict,
                                   "mixing_pos": mixing_pos_dict}

        # Set up the keyword arguments that are needed for each of the model
        # types
        variable_kwargs = {}
        for model_name in all_model_types:
            variable_kwargs[model_name] = {}
            variable_kwargs[model_name]["intercept_names"] =\
                intercept_names_dict[model_name]
            variable_kwargs[model_name]["intercept_ref_pos"] =\
                intercept_ref_dict[model_name]
            variable_kwargs[model_name]["shape_ref_pos"] =\
                shape_ref_dict[model_name]
            variable_kwargs[model_name]["shape_names"] =\
                shape_name_dict[model_name]
            variable_kwargs[model_name]["nest_spec"] =\
                nest_spec_dict[model_name]
            variable_kwargs[model_name]["mixing_id_col"] =\
                mixing_id_col_dict[model_name]
            variable_kwargs[model_name]["mixing_vars"] =\
                mixing_vars_dict[model_name]

        # Execute the test for each model type
        for model_name in all_model_types:
            # Update the model type in the list of constructor args
            self.constructor_args[-1] = model_name

            # Use this specific model's keyword arguments
            self.constructor_kwargs.update(variable_kwargs[model_name])

            # Construct the model object
            model_obj = pylogit.create_choice_model(*self.constructor_args,
                                                    **self.constructor_kwargs)

            # Make sure that the constructor has all of the required attributes
            for attr in common_attr_value_dict:
                value = common_attr_value_dict[attr]
                if isinstance(value, pd.DataFrame):
                    self.assertTrue(value.equals(model_obj.data))
                elif isinstance(value, np.ndarray):
                    npt.assert_allclose(value,
                                        model_obj.__getattribute__(attr))
                else:
                    self.assertEqual(value,
                                     model_obj.__getattribute__(attr))

            for attr in varying_attr_value_dict:
                value = varying_attr_value_dict[attr][model_name]

                self.assertEqual(value,
                                 model_obj.__getattribute__(attr))

        return None
Esempio n. 17
0
long_lpmc = gld.generate_data(train=True) # train=False for generating the test dataset
y = long_lpmc.copy()

# standardize what has to be standardized : custom_id, mode_id etc.. are ignored
y.iloc[:, 3::1] = helpers.standardize(long_lpmc.iloc[:, 3::1])


choice_column = "travel_mode"
obs_id_column = "custom_id"
custom_alt_id = "mode_id"
basic_specification = helpers.create_specification()

lpmc_mnltrain = pl.create_choice_model(data=y,
                                        alt_id_col=custom_alt_id,
                                        obs_id_col=obs_id_column,
                                        choice_col=choice_column,
                                        specification=basic_specification,
                                        model_type="MNL",
                                        names=None)

#%%
"""
Defines relevant parameter for the simulations.
Variables :
    num_simul= Number of desired simulation for the grid search of the 
               hyperparameter lambda_lasso, lambda_ridge or the two.
    num_points = For each regularisation of the grid search, defines the 
                 number of simulation realised by addind parameters.
    maxiter = Number of iterations realised by Scipy 'minimize()' to optimized
             the parameters.
    num_param_keep = vector of length num_points which indicates how many
Esempio n. 18
0
long_swiss_metro_train, long_swiss_metro_test = train_test_split(
    long_swiss_metro, train_size=SPLIT_EMBEDDINGS_DCM, shuffle=False)

# In[39]:

testsetsize = len(long_swiss_metro_test) / 3
trainsetsize = len(long_swiss_metro_train) / 3
print(testsetsize, trainsetsize)

# In[40]:

# Estimate the multinomial logit model (MNL)
swissmetro_mnl = pl.create_choice_model(data=long_swiss_metro_train,
                                        alt_id_col=custom_alt_id,
                                        obs_id_col=obs_id_column,
                                        choice_col=choice_column,
                                        specification=basic_specification,
                                        model_type="MNL",
                                        names=basic_names)

deg_freedom = sum([len(b) for b in basic_specification.values()])
# Specify the initial values and method for the optimization.
swissmetro_mnl.fit_mle(np.zeros(deg_freedom))

# Look at the estimation results
swissmetro_mnl.get_statsmodels_summary()

# In[41]:

long_probs = swissmetro_mnl.predict(long_swiss_metro_test)
SCORE = sum([
    data.loc[data['mode_id']==i, col_name] = confounder_vectors[int(i-1)][2]
    data[col_name] = data[col_name].fillna(0)
    
data['confounder_all'] = data[['confounder_for_mode_1','confounder_for_mode_2','confounder_for_mode_3',
                              'confounder_for_mode_4', 'confounder_for_mode_5', 'confounder_for_mode_6',
                              'confounder_for_mode_7', 'confounder_for_mode_8']].sum(axis=1)
# -

# ## Estimate non-causal MNL

# +
# Estimate the basic MNL model, using the hessian and newton-conjugate gradient
mnl_model = cm.create_choice_model(data=data,
                                   alt_id_col="mode_id",
                                   obs_id_col="observation_id",
                                   choice_col="choice",
                                   specification=mnl_specification,
                                   model_type="MNL",
                                   names=mnl_names)

num_vars = len(reduce(lambda x, y: x + y, mnl_names.values()))
# Note newton-cg used to ensure convergence to a point where gradient 
# is essentially zero for all dimensions. 
mnl_model.fit_mle(np.zeros(num_vars),
                  method="BFGS")

# Look at the estimation results
mnl_model.get_statsmodels_summary()
# -

# ## Estimate Causal MNL
mode_shares.index = [ALT_ID_TO_MODE_NAME[x] for x in mode_shares.index.values]
mode_shares.name = "Mode Shares"
mode_shares
# -

# # Choice Model Estimation

# For purposes of this task, we use the MNL specification from Brathwaite and Walker (2016) and estimate the model resulting from such a specification. We assume that the estimated model parameters represent the "true" model parameters.

# +
# Estimate the basic MNL model, using the hessian and newton-conjugate gradient
mnl_model = pl.create_choice_model(
    data=bike_data_long,
    alt_id_col=ALT_ID_COL,
    obs_id_col=OBS_ID_COL,
    choice_col="choice",
    specification=MNL_SPECIFICATION,
    model_type="MNL",
    names=MNL_NAMES,
)

num_vars = len(reduce(lambda x, y: x + y, MNL_NAMES.values()))

# Note newton-cg used to ensure convergence to a point where gradient
# is essentially zero for all dimensions.
mnl_model.fit_mle(np.zeros(num_vars), method="BFGS")

# Look at the estimation results
mnl_model.get_statsmodels_summary()
# -
    for k in range(len(indices) - 1):

        # For each fold k, create the holdout set and the bagged set
        holdout = train.iloc[indices[k]:indices[k + 1], :].sort_values(
            by=["Year", "Country"])
        bagged = train[~train.index.isin(holdout.index)].sort_values(
            by=["Year", "Country"])

        # Create mixed logit model with year fixed-effects and random coefficients over countries
        model = pylogit.create_choice_model(
            data=bagged,
            alt_id_col="Status",
            obs_id_col="Year",
            choice_col="default_RR",  # =1 for default, =0 for no default
            specification=basic_specification,
            model_type="Mixed Logit",  # mixed panel logit model
            names=basic_names,
            mixing_id_col=
            "Country",  # implies coefficients are randomized over countries
            mixing_vars=index_var_names)

        # Estimate mixed logit model using Nelder-Mead algorithm (cross-validated to choose optimal lambda) on K-1 folds
        model.fit_mle(
            init_vals=np.zeros(46),
            num_draws=
            1000,  # 1000 draws from independent normal distributions for each parameter,
            # as functions of their means and standard deviations
            #seed = 2,
            method="Nelder-Mead",  # using Nelder-Mead algorithm
            maxiter=10,  # number of Nelder-Mead iterations
Esempio n. 22
0
# basic_specification["regular_class"] = [1]
# basic_names["regular_class"] = ["First Class == False, (Swissmetro)"]
#
# basic_specification["single_luggage_piece"] = [3]
# basic_names["single_luggage_piece"] = ["Number of Luggage Pieces == 1, (Car)"]
#
# basic_specification["multiple_luggage_pieces"] = [3]
# basic_names["multiple_luggage_pieces"] = ["Number of Luggage Pieces > 1, (Car)"]

# print basic_names
# print basic_specification

destination_mnl = pl.create_choice_model(data=long_testing_data,
                                         alt_id_col=custom_alt_id,
                                         obs_id_col=obs_id_column,
                                         choice_col=choice_column,
                                         specification=basic_specification,
                                         model_type="MNL",
                                         names=basic_names)

destination_mnl.fit_mle(np.zeros(10))
print(destination_mnl.get_statsmodels_summary())

all_situation_ids = np.sort(long_testing_data["choice_situation"].unique())
prediction_ids = all_situation_ids[:2000]
prediction_df = long_testing_data.loc[
    long_testing_data["choice_situation"].isin(prediction_ids)].copy()
# print(prediction_df)
# This is the array of the predicted choice
prediction_array = destination_mnl.predict(prediction_df)
print(prediction_array)
Esempio n. 23
0
    def fit(self, df_comb, target, df_i=None, df_j=None, merge_columns=None):
        """
        This function computes the maximum-likelihood estimate of model parameters
        given pairwise-comparison data, using optimizers
        provided by the ``scipy.optimize`` module.

        Parameters
        ------------
        df_comb : DataFrame
                  DataFrame with multi-index where each index is an entity and object
                  of comparison made. This table should contain the target value and
                  any other predictive features that the user would like to use and
                  store on the observational level, for example the weather for a
                  particular match.

        target : str
                 name of target variable column.

        df_i : DataFrame, default: None
               DataFrame where the index is an entity and the values are
               features for predictions that the user would like to store on the
               entity level. For example the budget of a team. This information
               may be merged onto the df_comb table, however this is available
               for purpose of ease if the user has this stored in a way most
               relational database users would stored data. If the column names
               are repeated in df_i and df_comb there will be an error raised.

        df_j : DataFrame, default: None
               Same as df_i, but can be used in case the second entity compared
               is always of a different nature and isn't stored in the same
               DataFrame. For example movies might be compared with songs.

        merge_columns : list of str, default: None
                        Any columns that exists in the df_comb DataFrame, df_i and df_j
                        DataFrames that the users would like to also merge on such as year,
                        note that the entities in the indices will be automatically
                        considered.

        Returns
        ---------
        Self
        """

        check_indexing_of_entities(df_comb)
        self.x_comb_entnames = df_comb.index.names.copy()
        # Remember the results column so that it can be removed later
        self.target_col_name = target

        self.rplc_lkp, self.lkp = generate_entity_lookup(
            get_distinct_entities(df_comb))

        self.hyperparameters = {
            'alpha': self.alpha,
            'method': self.method,
            'initial_params': self.initial_params,
            'max_iter': self.max_iter,
            'tol': self.tol
        }

        # Training with choix
        if df_i is None and df_j is None and \
                (list(df_comb.columns) == [self.target_col_name]):
            training_data, n_ents = self.unpack_data_for_choix(
                df_comb, self.x_comb_entnames)
            # Fit Bradley Terry
            self._params = choix.opt_pairwise(n_ents, training_data["winner"],
                                              **self.hyperparameters)

            self.params_ = pd.DataFrame.from_dict(self.lkp,
                                                  orient='index',
                                                  columns=['entity'])

            self.params_['learned_strength'] = self._params.copy()

            self.is_fitted_ = True
            self.pylogit_fit = False

        # Training with pylogit
        else:
            if self.hyperparameters['method'] == "Newton-CG":
                warn(
                    "Note that method specified for pylogit descent is" +
                    " Newton-CG, at the point we last checked there was an" +
                    " open issue regarding the Hessian not being correct which"
                    +
                    " is used for this type of optimization. If this issue has"
                    +
                    " been resolved in pylogit please contact us to remove this"
                    + " warning")

            self.hyperparameters['ridge'] = self.alpha

            if df_i is not None:
                self.df_i = df_i.copy()
            else:
                self.df_i = None

            if df_j is not None:
                self.df_j = df_j.copy()
            else:
                self.df_j = None

            if merge_columns is not None:
                self.merge_columns = merge_columns.copy()
            else:
                self.merge_columns = None

            long_format = self.unpack_data_for_pylogit(df_comb,
                                                       self.x_comb_entnames)

            x_comb = self.join_up_dataframes(long_format, df_i, df_j,
                                             merge_columns)

            basic_specification = OrderedDict()
            basic_names = OrderedDict()
            columns = 0
            for i in x_comb.columns:
                if i not in ['observation', 'entity', 'CHOICE']:
                    basic_specification[i] = [list(self.lkp.keys())]
                    basic_names[i] = [i]
                    columns += 1

            basic_specification['intercept'] = list(self.lkp.keys())
            basic_names['intercept'] = [str(i) for i in self.rplc_lkp.keys()]

            self.bt_with_feats = pl.create_choice_model(
                data=x_comb,
                alt_id_col='entity',
                obs_id_col='observation',
                choice_col='CHOICE',
                specification=basic_specification,
                model_type="MNL",
                names=basic_names)

            self.x_comb = x_comb.copy()

            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                self.bt_with_feats.fit_mle(np.zeros(columns + len(self.lkp)),
                                           **self.hyperparameters,
                                           print_res=False)
            self.is_fitted_ = True
            self._feat_params = self.bt_with_feats.params.reset_index()
            self.params_ = self._feat_params[~self._feat_params['index'].
                                             isin(basic_names.keys())]
            self.params_.columns = ['entity', 'learned_strength']
            self.pylogit_fit = True
Esempio n. 24
0
    "Headway, units:hrs, (Train)", "Headway, units:hrs, (Metro)"
]

##########
# Determine the columns for: alternative ids, the observation ids and the choice
##########
# The 'alternative_id' variable will identify the alternative associated with each row.
alternative_id = "alt_id"

# The 'obs_id' variable will identify the observation id associated with each row.
observation_id = "obs_id"

# Create a 'choice' variable which identifies the choice associated with each row.
choice = "CHOICE"

# Estimate the multinomial logit model (MNL)
model_01_mnl = pl.create_choice_model(data=data_01,
                                      alt_id_col=alternative_id,
                                      obs_id_col=observation_id,
                                      choice_col=choice,
                                      specification=basic_specification,
                                      model_type="MNL",
                                      names=basic_names)

# Specify the initial values and method for the optimization.
model_01_mnl.fit_mle(
    np.zeros(8))  # 8 is the total number of parameters to be esimtated

# Look at the estimation results
model_01_mnl.get_statsmodels_summary()
Esempio n. 25
0
basic_names["built_since_jan2010"] = ['built_since_jan2010']

basic_specification["work_dist"] = [list(set(long_data['choice_id']))]
basic_names["work_dist"] = ['work_dist']

for poiField in poiFields:
    basic_specification[poiField] = [list(set(long_data['choice_id']))]
    basic_names[poiField] = [poiField]

# interation of work distance and vehcile ownership: insignificant in the model
# basic_specification["work_dist_veh"] = [list(set(long_data['choice_id']))]
# basic_names["work_dist_veh"] = ['work_dist_veh']

home_loc_mnl = pl.create_choice_model(data=long_data,
                                      alt_id_col='choice_id',
                                      obs_id_col='custom_id',
                                      choice_col='choice',
                                      specification=basic_specification,
                                      model_type="MNL",
                                      names=basic_names)

print('Fitting Model')
numCoef = sum([len(basic_specification[s]) for s in basic_specification])
home_loc_mnl.fit_mle(np.zeros(numCoef))

# Look at the estimation results
print(home_loc_mnl.get_statsmodels_summary())

pickle.dump(home_loc_mnl, open(FITTED_HOME_LOC_MODEL_PATH, 'wb'))
json.dump(rent_normalisation, open(RENT_NORM_PATH, 'w'))
Esempio n. 26
0
                      index=False)

choiceModelPUMA_spec = OrderedDict()
choiceModelPUMA_names = OrderedDict()
choiceModelPUMAsRegressors = [
    'puma_pop_per_sqm', 'income_disparity', 'work_dist', 'media_norm_rent',
    'num_houses'
] + [x for x in list(long_data_PUMA.columns) if x.endswith('_den')]
for var in choiceModelPUMAsRegressors:
    choiceModelPUMA_spec[var] = [list(set(long_data_PUMA['choice_id']))]
    choiceModelPUMA_names[var] = [var]

home_loc_mnl_PUMAs = pl.create_choice_model(data=long_data_PUMA,
                                            alt_id_col='choice_id',
                                            obs_id_col='custom_id',
                                            choice_col='choice',
                                            specification=choiceModelPUMA_spec,
                                            model_type="MNL",
                                            names=choiceModelPUMA_names)
print('\n[info] Fitting Upper Level Model')
numCoef = sum([len(choiceModelPUMA_spec[s]) for s in choiceModelPUMA_spec])

# pylogit may encounter memory error in calculating Hessiann matrix for S.E. in this model, if so, switch to noHessian approach and only do point estimation.
try:
    home_loc_mnl_PUMAs.fit_mle(np.zeros(numCoef))
    print(home_loc_mnl_PUMAs.get_statsmodels_summary())
    home_loc_mnl['home_loc_mnl_PUMAs'] = {
        'just_point': False,
        'model': home_loc_mnl_PUMAs
    }
except:
fig.savefig(
    str(pyprojroot.here("article/images/qq-plot-method-3.pdf")),
    dpi=500,
    bbox_inches="tight",
)
# -

# ## True Model

# +
# Estimate the basic MNL model, using the hessian and newton-conjugate gradient
mnl_model = cm.create_choice_model(
    data=data,
    alt_id_col="mode_id",
    obs_id_col="observation_id",
    choice_col="sim_choice",
    specification=mnl_specification,
    model_type="MNL",
    names=mnl_names,
)

num_vars = len(reduce(lambda x, y: x + y, mnl_names.values()))
mnl_model.fit_mle(np.zeros(num_vars), method="BFGS")
mnl_model.get_statsmodels_summary()
# -

# ## Model 1

# +
# Create my specification and variable names for the basic MNL model
# NOTE: - Keys should be variables within the long format dataframe.
Esempio n. 28
0
spec = OrderedDict()
variable_names = OrderedDict()
Vars = ["TTME", "INVC", "INVT"]
spec["intercept"] = [1,2,3]
variable_names["intercept"] = ["ASC Air", "ASC Train", "ASC Bus"]
for var in Vars:
    spec[var] = [[1,2,3,4]]
    variable_names[var] = [var]
spec["HINC"] = [4]
variable_names["HINC"] = ["HINC for Car"]
spec["TTME"] = [[1], [2,3,4]]
variable_names["TTME"] = ["TTME for Air", "TTME for Train/Bus/Car"]
model = pl.create_choice_model(data = data,
                               alt_id_col="ALT",
                               obs_id_col="Group",
                               choice_col="MODE",
                               specification=spec,
                               model_type = "MNL",
                               names = variable_names
                               )
model.fit_mle(np.zeros(8))
model.print_summaries()


# #retrive model parameters
# print("\n\nFollowings are attributes of the model object")
# print(dir(model))
# print("\nFollowings are coefficients")
# print(model.params.values)
# print("\nFollowings are pvalues")
# print(model.pvalues.values)
Esempio n. 29
0
        df[col] = df[col].astype(float)
        spec[col] = [[1, 2, 3]]
        spec_names[col] = [col]

if profile:
    ini_ram = curr_ram()
    profiler = Profiler().start()

np.random.seed(0)
# Prints are temporarily disabled as pylogit has excessive verbosity
sys.stdout, sys.stderr = io.StringIO(), io.StringIO()  # Disable print
model = pl.create_choice_model(data=df,
                               alt_id_col=alt_id_col,
                               obs_id_col=obs_id_col,
                               choice_col=choice_col,
                               specification=spec,
                               mixing_vars=mixing_vars,
                               model_type="Mixed Logit",
                               names=spec_names,
                               mixing_id_col=mixing_id_col)
model.fit_mle(init_vals=np.zeros(len(varnames) + len(mixing_vars)),
              num_draws=n_draws,
              seed=123)
sys.stdout, sys.stderr = sys.__stdout__, sys.__stderr__  # Enable print

if profile:
    ellapsed, max_ram, max_gpu = profiler.stop()
    print("{:6} {:7.2f} {:11.2f} {:7.3f} {:7.3f} {}".format(
        n_draws, ellapsed, model.log_likelihood, max_ram - ini_ram, max_gpu,
        model.estimation_success))
    profiler.export('pylogit', dataset, n_draws, ellapsed,
Esempio n. 30
0
# 设置模型配置specification
# pylogit通过有序字典来配置
# 每一个key都是要引入的变量,这个变量必须是数据中的一个列名,与key对应的value是一个list,list中的每个元素对应于一个模型系数
# 我们也可以设定一个变量名称字典,与模型匹配字典使用相同的key,其value也是一个list,可以以字符串的形式定义每个模型系数的名称
# 重点是在配置模型的过程中,如果系数是方案系数(系数保持不变),那么要使用一个内层list表示哪些备选项的效用中引入这个系数。
spec = OrderedDict()
var_names = OrderedDict()
vars = ['L', 'A', 'B']
for var in vars:
    spec[var] = [[1, 2, 3, 4, 5]]
    var_names[var] = ['beta of ' + var]

# 建立模型object
# data是数据,alt_id_col是标识备选项名称或编号的列名,obs_id_col是标识选择场景观测编号的列名,choicec_col是标识选择结果0/1的列名
# specification是设定好的模型配置有序字典,model_type选择MNL是一般的logit模型,names是设定好的变量名
model = pl.create_choice_model(data=data, alt_id_col='ALT', obs_id_col='GROUP', choice_col='MODE',
                               specification=spec, model_type='MNL', names=var_names)
# 注意进行估计时需要参数的初始估计值,可以设为0
model.fit_mle(np.zeros(3))
model.print_summaries()
# 结果包括对数似然值、R2、3个参数的估计值、t检验、p值、robust统计量等。
#
# # 通过DIR查看更多信息
print(dir(model))
# print(model.chi_square)
print(model.coefs)
# print(model.cov)
# print(model.params.values)
# print(model.pvalues)

# # 利用模型进行预测,需要输入一个以同样方式组织的数据
# print(model.predict(data.iloc[0:5]))