def test_ridge_warning_in_fit_mle(self): """ Ensure that a UserWarning is raised when one passes the ridge keyword argument to the `fit_mle` method of a Nested Logit model object. """ # Bundle the arguments used to construct the nested logit model constructor_args = [ self.fake_df, self.alt_id_col, self.obs_id_col, self.choice_col, self.fake_specification, self.fake_names ] # Bundle the kwargs for constructing the nested_logit_model constructor_kwargs = {"nest_spec": self.fake_nest_spec} # Create the mnl model object whose coefficients will be estimated. base_nl = nl.NestedLogit(*constructor_args, **constructor_kwargs) # Create a variable for the fit_mle function's kwargs. # The print_res = False arguments are to make sure strings aren't # printed to the console unnecessarily. fit_kwargs = {"constrained_pos": [1], "ridge": 0.5, "print_res": False} # Test to make sure that the ridge warning message is printed when # using the ridge keyword argument with warnings.catch_warnings(record=True) as w: # Use this filter to always trigger the UserWarnings warnings.simplefilter('always', UserWarning) base_nl.fit_mle(self.fake_all_params, **fit_kwargs) self.assertGreaterEqual(len(w), 1) self.assertIsInstance(w[0].category, type(UserWarning)) self.assertIn(nl._ridge_warning_msg, str(w[0].message)) return None
def test_just_point_kwarg(self): """ Ensure that calling `fit_mle` with `just_point = True` returns a dictionary with a 'x' key and a corresponding value that is an ndarray. """ # Bundle the arguments used to construct the nested logit model constructor_args = [ self.fake_df, self.alt_id_col, self.obs_id_col, self.choice_col, self.fake_specification ] # Bundle the kwargs for constructing the nested_logit_model constructor_kwargs = { "names": self.fake_names, "nest_spec": self.fake_nest_spec } # Create the mnl model object whose coefficients will be estimated. base_nl = nl.NestedLogit(*constructor_args, **constructor_kwargs) # Create a variable for the arguments to the fit_mle function. fit_args = [self.fake_all_params] # Alias the function being tested func = base_nl.fit_mle # Get the necessary kwargs kwargs = {"just_point": True} # Get the function results func_result = func(*fit_args, **kwargs) # Perform the desired tests to make sure we get back a dictionary with # an "x" key in it and a value that is a ndarray. self.assertIsInstance(func_result, dict) self.assertIn("x", func_result) self.assertIsInstance(func_result["x"], np.ndarray) return None
def test_invalid_init_kwargs_error_in_fit_mle(self): """ Ensures that a ValueError is raised when users try to use any other type of initial value input methods other than the `init_vals` argument of `fit_mle()`. This prevents people from expecting the use of outside intercept or shape parameters to work with the Nested Logit model. """ # Bundle the arguments used to construct the nested logit model constructor_args = [ self.fake_df, self.alt_id_col, self.obs_id_col, self.choice_col, self.fake_specification ] # Bundle the kwargs for constructing the nested_logit_model constructor_kwargs = { "names": self.fake_names, "nest_spec": self.fake_nest_spec } # Create the mnl model object whose coefficients will be estimated. base_nl = nl.NestedLogit(*constructor_args, **constructor_kwargs) # Create a variable for the arguments to the fit_mle function. # this mimics the arguments passed when trying to use the shape_param # or outside intercepts kwargs with fit_mle. fit_args = [None] # Create variables for the incorrect kwargs. # The print_res = False arguments are to make sure strings aren't # printed to the console unnecessarily. kwarg_map_1 = {"init_shapes": np.array([1, 2]), "print_res": False} kwarg_map_2 = {"init_intercepts": np.array([1]), "print_res": False} kwarg_map_3 = {"init_coefs": np.array([1]), "print_res": False} # Test to ensure that the kwarg ignore message is printed when using # any of these three incorrect kwargs for kwargs in [kwarg_map_1, kwarg_map_2, kwarg_map_3]: self.assertRaises(ValueError, base_nl.fit_mle, *fit_args, **kwargs) return None
def test_invalid_init_vals_length_in_estimate(self): """ Ensure that when _estimate() is called, with an init_values argument that is of an incorrect length, a ValueError is raised. """ # Bundle the arguments used to construct the nested logit model constructor_args = [ self.fake_df, self.alt_id_col, self.obs_id_col, self.choice_col, self.fake_specification, self.fake_names ] # Bundle the kwargs for constructing the nested_logit_model constructor_kwargs = {"nest_spec": self.fake_nest_spec} # Create the mnl model object whose coefficients will be estimated. base_nl = nl.NestedLogit(*constructor_args, **constructor_kwargs) # Create an estimator object. zero_vector = np.zeros(self.fake_all_params.shape[0]) estimator_args = [ base_nl, base_nl.get_mappings_for_fit(), None, zero_vector, nl.split_param_vec ] estimator_kwargs = {"constrained_pos": [1]} nested_estimator = nl.NestedEstimator(*estimator_args, **estimator_kwargs) # Alias the function being tested func = nested_estimator.check_length_of_initial_values # Test that the desired error is raised for i in [-1, 1]: init_values = np.arange(self.fake_all_params.shape[0] + i) self.assertRaisesRegexp(ValueError, "values are of the wrong dimension", func, init_values) return None
def setUp(self): # Create the betas to be used during the tests self.fake_betas = np.array([0.3, -0.6, 0.2]) # Create the fake nest coefficients to be used during the tests # Note that these are the 'natural' nest coefficients, i.e. the # inverse of the scale parameters for each nest. They should be less # than or equal to 1. self.natural_nest_coefs = np.array([0.995, 0.5]) # Create an array of all model parameters self.fake_all_params = np.concatenate((self.natural_nest_coefs, self.fake_betas)) # The set up being used is one where there are two choice situations, # The first having three alternatives, and the second having only two. # The nest memberships of these alternatives are given below. self.fake_rows_to_nests = csr_matrix(np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1]])) # Create a sparse matrix that maps the rows of the design matrix to the # observatins self.fake_rows_to_obs = csr_matrix(np.array([[1, 0], [1, 0], [1, 0], [0, 1], [0, 1]])) # Create the fake design matrix with columns denoting ASC_1, ASC_2, X self.fake_design = np.array([[1, 0, 1], [0, 1, 2], [0, 0, 3], [1, 0, 1.5], [0, 0, 3.5]]) # Create fake versions of the needed arguments for the MNL constructor self.fake_df = pd.DataFrame({"obs_id": [1, 1, 1, 2, 2], "alt_id": [1, 2, 3, 1, 3], "choice": [0, 1, 0, 0, 1], "x": range(5), "intercept": [1 for i in range(5)]}) # Record the various column names self.alt_id_col = "alt_id" self.obs_id_col = "obs_id" self.choice_col = "choice" # Store the choice array self.choice_array = self.fake_df[self.choice_col].values # Create a sparse matrix that maps the chosen rows of the design # matrix to the observatins self.fake_chosen_rows_to_obs = csr_matrix(np.array([[0, 0], [1, 0], [0, 0], [0, 0], [0, 1]])) # Create the index specification and name dictionaryfor the model self.fake_specification = OrderedDict() self.fake_specification["intercept"] = [1, 2] self.fake_specification["x"] = [[1, 2, 3]] self.fake_names = OrderedDict() self.fake_names["intercept"] = ["ASC 1", "ASC 2"] self.fake_names["x"] = ["x (generic coefficient)"] # Create the nesting specification self.fake_nest_spec = OrderedDict() self.fake_nest_spec["Nest 1"] = [1, 2] self.fake_nest_spec["Nest 2"] = [3] # Create a nested logit object args = [self.fake_df, self.alt_id_col, self.obs_id_col, self.choice_col, self.fake_specification] kwargs = {"names": self.fake_names, "nest_spec": self.fake_nest_spec} self.model_obj = nested_logit.NestedLogit(*args, **kwargs) # Store a ridge parameter self.ridge = 0.5 return None
def make_nested_model(self): # Create the betas to be used during the tests fake_betas = np.array([0.3, -0.6, 0.2]) # Create the fake nest coefficients to be used during the tests # Note that these are the 'natural' nest coefficients, i.e. the # inverse of the scale parameters for each nest. They should be bigger # than or equal to 1. natural_nest_coefs = np.array([1 - 1e-16, 0.5]) # Create an array of all model parameters fake_all_params = np.concatenate((natural_nest_coefs, fake_betas)) # The set up being used is one where there are two choice situations, # The first having three alternatives, and the second having only two. # The nest memberships of these alternatives are given below. fake_rows_to_nests = csr_matrix( np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1]])) # Create a sparse matrix that maps the rows of the design matrix to the # observatins fake_rows_to_obs = csr_matrix( np.array([[1, 0], [1, 0], [1, 0], [0, 1], [0, 1]])) # Create the fake design matrix with columns denoting ASC_1, ASC_2, X fake_design = np.array([[1, 0, 1], [0, 1, 2], [0, 0, 3], [1, 0, 1.5], [0, 0, 3.5]]) # Create fake versions of the needed arguments for the MNL constructor fake_df = pd.DataFrame({ "obs_id": [1, 1, 1, 2, 2], "alt_id": [1, 2, 3, 1, 3], "choice": [0, 1, 0, 0, 1], "x": range(5), "intercept": [1 for i in range(5)] }) # Record the various column names alt_id_col = "alt_id" obs_id_col = "obs_id" choice_col = "choice" # Store the choice array choice_array = fake_df[choice_col].values # Create a sparse matrix that maps the chosen rows of the design # matrix to the observatins fake_chosen_rows_to_obs = csr_matrix( np.array([[0, 0], [1, 0], [0, 0], [0, 0], [0, 1]])) # Create the index specification and name dictionaryfor the model fake_specification = OrderedDict() fake_specification["intercept"] = [1, 2] fake_specification["x"] = [[1, 2, 3]] fake_names = OrderedDict() fake_names["intercept"] = ["ASC 1", "ASC 2"] fake_names["x"] = ["x (generic coefficient)"] # Create the nesting specification fake_nest_spec = OrderedDict() fake_nest_spec["Nest 1"] = [1, 2] fake_nest_spec["Nest 2"] = [3] # Create a nested logit object args = [ fake_df, alt_id_col, obs_id_col, choice_col, fake_specification ] kwargs = {"names": fake_names, "nest_spec": fake_nest_spec} model_obj = nested_logit.NestedLogit(*args, **kwargs) model_obj.coefs = pd.Series(fake_betas, index=model_obj.ind_var_names) model_obj.intercepts = None model_obj.shapes = None def logit(x): return np.log(x / (1 - x)) model_obj.nests =\ pd.Series(logit(natural_nest_coefs), index=fake_nest_spec.keys()) model_obj.params =\ pd.concat([model_obj.nests, model_obj.coefs], axis=0, ignore_index=False) return model_obj