def simulated_problem(request): """Configure and solve a simulated problem, either with or without supply-side data.""" name, supply = request.param simulation, product_data = request.getfixturevalue(f'{name}_simulation') product_formulations = simulation.product_formulations if not supply: product_data = np.lib.recfunctions.drop_fields(product_data, 'supply_instruments') product_formulations = product_formulations[:2] problem = Problem(product_formulations, product_data, simulation.agent_formulation, simulation.agent_data) results = problem.solve(simulation.sigma, simulation.pi, steps=1, linear_costs=simulation.linear_costs) return simulation, product_data, problem, results
def knittel_metaxoglou_2014(): """Configure the example automobile problem from Knittel and Metaxoglou (2014) and load initial parameter values and estimates created by replication code. The replication code was modified to output a Matlab data file for the automobile dataset, which contains the results of one round of Knitro optimization and post-estimation calculations. The replication code was kept mostly intact, but was modified slightly in the following ways: - Tolerance parameters, Knitro optimization parameters, and starting values for sigma were all configured. - A bug in the code's computation of the BLP instruments was fixed. When creating a vector of "other" and "rival" sums, the code did not specify a dimension over which to sum, which created problems with one- dimensional vectors. A dimension of 1 was added to both sum commands. - Delta was initialized as the solution to the Logit model. - After estimation, the objective was called again at the optimal parameters to re-load globals at the optimal parameter values. - Before being saved to a Matlab data file, matrices were renamed and reshaped. """ product_data = np.recfromcsv(BLP_PRODUCTS_LOCATION) product_data = {n: product_data[n] for n in product_data.dtype.names} product_data['demand_instruments'] = build_blp_instruments( Formulation('hpwt + air + mpg + space'), product_data) problem = Problem(product_formulations=( Formulation('0 + prices + I(1) + hpwt + air + mpg + space'), Formulation('0 + prices + I(1) + hpwt + air + mpg')), product_data=product_data, agent_data=np.recfromcsv(BLP_AGENTS_LOCATION)) return scipy.io.loadmat( str(TEST_DATA_PATH / 'knittel_metaxoglou_2014.mat'), {'problem': problem})
def test_extra_demographics( simulated_problem: SimulatedProblemFixture) -> None: """Test that agents in a simulated problem are identical to agents in a problem created with agent data built according to the same integration specification and but containing unnecessary rows of demographics. """ simulation, simulation_results, problem, _, _ = simulated_problem # skip simulations without demographics if simulation.D == 0: return # reconstruct the problem with unnecessary rows of demographics assert simulation.agent_data is not None product_data = simulation_results.product_data agent_data = simulation.agent_data extra_agent_data = { k: np.r_[agent_data[k], agent_data[k]] for k in agent_data.dtype.names } new_problem = Problem(problem.product_formulations, product_data, problem.agent_formulation, extra_agent_data, simulation.integration) # test that the agents are essentially identical for key in problem.agents.dtype.names: if problem.agents[key].dtype != np.object: np.testing.assert_allclose(problem.agents[key], new_problem.agents[key], atol=1e-14, rtol=0, err_msg=key)
def test_extra_nodes(simulated_problem: SimulatedProblemFixture) -> None: """Test that agents in a simulated problem are identical to agents in a problem created with agent data built according to the same integration specification but containing unnecessary columns of nodes. """ simulation, simulation_results, problem, _, _ = simulated_problem # skip simulations without agents if simulation.K2 == 0: return # reconstruct the problem with unnecessary columns of nodes assert simulation.agent_data is not None product_data = simulation_results.product_data extra_agent_data = { k: simulation.agent_data[k] for k in simulation.agent_data.dtype.names } extra_agent_data['nodes'] = np.c_[extra_agent_data['nodes'], extra_agent_data['nodes']] new_problem = Problem(problem.product_formulations, product_data, problem.agent_formulation, extra_agent_data) # test that the agents are essentially identical for key in problem.agents.dtype.names: if problem.agents[key].dtype != np.object: np.testing.assert_allclose(problem.agents[key], new_problem.agents[key], atol=1e-14, rtol=0, err_msg=key)
def test_extra_demographics(simulated_problem): """Test that agents in a simulated problem are identical to agents in a problem created with agent data built according to the same integration specification and but containing unnecessary rows of demographics. """ simulation, product_data, problem1, _ = simulated_problem # skip simulations without demographics if simulation.D == 0: return # reconstruct the problem with unnecessary rows of demographics problem2 = Problem( problem1.product_formulations, product_data, problem1.agent_formulation, { k: np.r_[simulation.agent_data[k], simulation.agent_data[k]] for k in simulation.agent_data.dtype.names }, simulation.integration) # test that the agents are essentially identical for key in problem1.agents.dtype.names: if np.issubdtype(problem1.agents.dtype[key], options.dtype): values1 = problem1.agents[key] values2 = problem2.agents[key] np.testing.assert_allclose(values1, values2, atol=1e-14, rtol=0, err_msg=key)
def test_extra_nodes(simulated_problem): """Test that agents in a simulated problem are identical to agents in a problem created with agent data built according to the same integration specification but containing unnecessary columns of nodes. """ simulation, product_data, problem1, _ = simulated_problem # reconstruct the problem with unnecessary columns of nodes agent_data2 = { k: simulation.agent_data[k] for k in simulation.agent_data.dtype.names } agent_data2['nodes'] = np.c_[agent_data2['nodes'], agent_data2['nodes']] problem2 = Problem(problem1.product_formulations, product_data, problem1.agent_formulation, agent_data2) # test that the agents are essentially identical for key in problem1.agents.dtype.names: if np.issubdtype(problem1.agents.dtype[key], options.dtype): values1 = problem1.agents[key] values2 = problem2.agents[key] np.testing.assert_allclose(values1, values2, atol=1e-14, rtol=0, err_msg=key)
def test_fixed_effects(simulated_problem: SimulatedProblemFixture, ED: int, ES: int, absorb_method: Optional[Union[str, Iteration]]) -> None: """Test that absorbing different numbers of demand- and supply-side fixed effects gives rise to essentially identical first-stage results as does including indicator variables. Also test that optimal instruments results and marginal costs remain unchanged. """ simulation, simulation_results, problem, solve_options, problem_results = simulated_problem # there cannot be supply-side fixed effects if there isn't a supply side if problem.K3 == 0: ES = 0 if ED == ES == 0: return # make product data mutable product_data = { k: simulation_results.product_data[k] for k in simulation_results.product_data.dtype.names } # remove constants and delete associated elements in the initial beta solve_options = solve_options.copy() product_formulations = list(problem.product_formulations).copy() if ED > 0: assert product_formulations[0] is not None constant_indices = [ i for i, e in enumerate(product_formulations[0]._expressions) if not e.free_symbols ] solve_options['beta'] = np.delete(solve_options['beta'], constant_indices, axis=0) product_formulations[0] = Formulation( f'{product_formulations[0]._formula} - 1') if ES > 0: assert product_formulations[2] is not None product_formulations[2] = Formulation( f'{product_formulations[2]._formula} - 1') # add fixed effect IDs to the data demand_id_names: List[str] = [] supply_id_names: List[str] = [] state = np.random.RandomState(seed=0) for side, count, names in [('demand', ED, demand_id_names), ('supply', ES, supply_id_names)]: for index in range(count): name = f'{side}_ids{index}' ids = state.choice(['a', 'b', 'c'], problem.N) product_data[name] = ids names.append(name) # split apart excluded demand-side instruments so they can be included in formulations instrument_names: List[str] = [] for index, instrument in enumerate(product_data['demand_instruments'].T): name = f'demand_instrument{index}' product_data[name] = instrument instrument_names.append(name) # build formulas for the IDs demand_id_formula = ' + '.join(demand_id_names) supply_id_formula = ' + '.join(supply_id_names) # solve the first stage of a problem in which the fixed effects are absorbed solve_options1 = solve_options.copy() product_formulations1 = product_formulations.copy() if ED > 0: assert product_formulations[0] is not None product_formulations1[0] = Formulation( product_formulations[0]._formula, demand_id_formula, absorb_method) if ES > 0: assert product_formulations[2] is not None product_formulations1[2] = Formulation( product_formulations[2]._formula, supply_id_formula, absorb_method) problem1 = Problem(product_formulations1, product_data, problem.agent_formulation, simulation.agent_data) problem_results1 = problem1.solve(**solve_options1) # solve the first stage of a problem in which fixed effects are included as indicator variables solve_options2 = solve_options.copy() product_formulations2 = product_formulations.copy() if ED > 0: assert product_formulations[0] is not None product_formulations2[0] = Formulation( f'{product_formulations[0]._formula} + {demand_id_formula}') if ES > 0: assert product_formulations[2] is not None product_formulations2[2] = Formulation( f'{product_formulations[2]._formula} + {supply_id_formula}') problem2 = Problem(product_formulations2, product_data, problem.agent_formulation, simulation.agent_data) solve_options2['beta'] = np.r_[solve_options2['beta'], np.full((problem2.K1 - solve_options2['beta'].size, 1), np.nan)] problem_results2 = problem2.solve(**solve_options2) # solve the first stage of a problem in which some fixed effects are absorbed and some are included as indicators if ED == ES == 0: problem_results3 = problem_results2 else: solve_options3 = solve_options.copy() product_formulations3 = product_formulations.copy() if ED > 0: assert product_formulations[0] is not None product_formulations3[0] = Formulation( f'{product_formulations[0]._formula} + {demand_id_names[0]}', ' + '.join(demand_id_names[1:]) or None) if ES > 0: assert product_formulations[2] is not None product_formulations3[2] = Formulation( f'{product_formulations[2]._formula} + {supply_id_names[0]}', ' + '.join(supply_id_names[1:]) or None) problem3 = Problem(product_formulations3, product_data, problem.agent_formulation, simulation.agent_data) solve_options3['beta'] = np.r_[solve_options3['beta'], np.full((problem3.K1 - solve_options3['beta'].size, 1), np.nan)] problem_results3 = problem3.solve(**solve_options3) # compute optimal instruments (use only two draws for speed; accuracy is not a concern here) Z_results1 = problem_results1.compute_optimal_instruments(draws=2, seed=0) Z_results2 = problem_results2.compute_optimal_instruments(draws=2, seed=0) Z_results3 = problem_results3.compute_optimal_instruments(draws=2, seed=0) # compute marginal costs costs1 = problem_results1.compute_costs() costs2 = problem_results2.compute_costs() costs3 = problem_results3.compute_costs() # choose tolerances (be more flexible with iterative de-meaning) atol = 1e-8 rtol = 1e-5 if ED > 2 or ES > 2 or isinstance(absorb_method, Iteration): atol *= 10 rtol *= 10 # test that all problem results expected to be identical are essentially identical problem_results_keys = [ 'theta', 'sigma', 'pi', 'rho', 'beta', 'gamma', 'sigma_se', 'pi_se', 'rho_se', 'beta_se', 'gamma_se', 'delta', 'tilde_costs', 'xi', 'omega', 'xi_by_theta_jacobian', 'omega_by_theta_jacobian', 'objective', 'gradient', 'gradient_norm', 'sigma_gradient', 'pi_gradient', 'rho_gradient', 'beta_gradient', 'gamma_gradient' ] for key in problem_results_keys: result1 = getattr(problem_results1, key) result2 = getattr(problem_results2, key) result3 = getattr(problem_results3, key) if key in { 'beta', 'gamma', 'beta_se', 'gamma_se', 'beta_gradient', 'gamma_gradient' }: result2 = result2[:result1.size] result3 = result3[:result1.size] np.testing.assert_allclose(result1, result2, atol=atol, rtol=rtol, err_msg=key) np.testing.assert_allclose(result1, result3, atol=atol, rtol=rtol, err_msg=key) # test that all optimal instrument results expected to be identical are essentially identical Z_results_keys = [ 'demand_instruments', 'supply_instruments', 'inverse_covariance_matrix', 'expected_xi_by_theta_jacobian', 'expected_omega_by_theta_jacobian' ] for key in Z_results_keys: result1 = getattr(Z_results1, key) result2 = getattr(Z_results2, key) result3 = getattr(Z_results3, key) np.testing.assert_allclose(result1, result2, atol=atol, rtol=rtol, err_msg=key) np.testing.assert_allclose(result1, result3, atol=atol, rtol=rtol, err_msg=key) # test that marginal costs are essentially identical np.testing.assert_allclose(costs1, costs2, atol=atol, rtol=rtol) np.testing.assert_allclose(costs1, costs3, atol=atol, rtol=rtol)
def test_fixed_effects(simulated_problem, ED, ES): """Test that absorbing different numbers of demand- and supply-side fixed effects gives rise to essentially identical first-stage results as including indicator variables. Also test that results that should be equal when there aren't any fixed effects are indeed equal. """ simulation, product_data, problem, results = simulated_problem # test that results that should be equal when there aren't any fixed effects are indeed equal for key in [ 'delta', 'tilde_costs', 'xi', 'omega', 'xi_jacobian', 'omega_jacobian' ]: result = getattr(results, key) true_result = getattr(results, f'true_{key}') assert (result is not None) == (true_result is not None) if result is not None: np.testing.assert_allclose(result, true_result, atol=1e-14, rtol=0, err_msg=key) # there cannot be supply-side fixed effects if there isn't a supply side if problem.K3 == 0: ES = 0 if ED == ES == 0: return # add fixed effect IDs to the data np.random.seed(0) demand_names = [] supply_names = [] product_data = {k: product_data[k] for k in product_data.dtype.names} for side, count, names in [('demand', ED, demand_names), ('supply', ES, supply_names)]: for index in range(count): name = f'{side}_ids{index}' ids = np.random.choice(['a', 'b', 'c'], product_data['market_ids'].size, [0.7, 0.2, 0.1]) product_data[name] = ids names.append(name) # remove constants product_formulations = list(problem.product_formulations).copy() if ED > 0: product_formulations[0] = Formulation( f'{product_formulations[0]._formula} - 1') product_data['demand_instruments'] = product_data[ 'demand_instruments'][:, 1:] if ES > 0: product_formulations[2] = Formulation( f'{product_formulations[2]._formula} - 1') product_data['supply_instruments'] = product_data[ 'supply_instruments'][:, 1:] # build formulas for the IDs demand_formula = ' + '.join(demand_names) supply_formula = ' + '.join(supply_names) # solve the first stage of a problem in which the fixed effects are absorbed product_formulations1 = product_formulations.copy() if ED > 0: product_formulations1[0] = Formulation( product_formulations[0]._formula, demand_formula) if ES > 0: product_formulations1[2] = Formulation( product_formulations[2]._formula, supply_formula) problem1 = Problem(product_formulations1, product_data, problem.agent_formulation, simulation.agent_data) results1 = problem1.solve(simulation.sigma, simulation.pi, steps=1) # solve the first stage of a problem in which fixed effects are included as indicator variables product_data2 = product_data.copy() product_formulations2 = product_formulations.copy() if ED > 0: demand_indicators = build_matrix(Formulation(demand_formula), product_data) product_data2['demand_instruments'] = np.c_[ product_data['demand_instruments'], demand_indicators] product_formulations2[0] = Formulation( f'{product_formulations[0]._formula} + {demand_formula}') if ES > 0: supply_indicators = build_matrix(Formulation(supply_formula), product_data) product_data2['supply_instruments'] = np.c_[ product_data['supply_instruments'], supply_indicators] product_formulations2[2] = Formulation( f'{product_formulations[2]._formula} + {supply_formula}') problem2 = Problem(product_formulations2, product_data2, problem.agent_formulation, simulation.agent_data) results2 = problem2.solve(simulation.sigma, simulation.pi, steps=1) # test that all arrays expected to be identical are identical keys = [ 'theta', 'sigma', 'pi', 'beta', 'gamma', 'sigma_se', 'pi_se', 'beta_se', 'gamma_se', 'true_delta', 'true_tilde_costs', 'true_xi', 'true_omega', 'true_xi_jacobian', 'true_omega_jacobian', 'objective', 'gradient', 'sigma_gradient', 'pi_gradient' ] for key in keys: result1 = getattr(results1, key) if result1 is not None: result2 = getattr(results2, key) if 'beta' in key or 'gamma' in key: result2 = result2[:result1.size] np.testing.assert_allclose(result1, result2, atol=1e-8, rtol=1e-5, err_msg=key)