def large_nested_logit_simulation() -> SimulationFixture: """Solve a simulation with ten markets, a linear constant, linear prices, a linear/cost characteristic, another two linear characteristics, another three cost characteristics, three nesting groups with the same nesting parameter, and a log-linear cost specification. """ id_data = build_id_data(T=10, J=20, F=9) simulation = Simulation( product_formulations=(Formulation('1 + prices + x + y + z'), None, Formulation('0 + log(x) + a + b + c')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'nesting_ids': np.random.RandomState(2).choice(['f', 'g', 'h'], id_data.size), 'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size) }, beta=[1, -6, 1, 2, 3], gamma=[0.1, 0.2, 0.3, 0.5], rho=0.1, xi_variance=0.00001, omega_variance=0.00001, correlation=0.9, costs_type='log', seed=2) simulation_results = simulation.replace_endogenous() return simulation, simulation_results, {}, []
def small_logit_simulation() -> SimulationFixture: """Solve a simulation with two markets, a linear constant, linear prices, a linear characteristic, a cost characteristic, and a scaled epsilon. """ id_data = build_id_data(T=2, J=18, F=3) simulation = Simulation( product_formulations=(Formulation('1 + prices + x'), None, Formulation('0 + a')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(0).choice(range(10), id_data.size) }, beta=[1, -5, 1], gamma=2, xi_variance=0.001, omega_variance=0.001, correlation=0.7, epsilon_scale=0.5, seed=0, ) simulation_results = simulation.replace_exogenous('x', 'a') return simulation, simulation_results, {}, []
def test_matrices( formula_data: Data, formulas: Iterable[str], build_columns: Callable[[Mapping[str, Array]], Sequence[Array]], build_derivatives: Callable[[Mapping[str, Array]], Sequence[Array]]) -> None: """Test that equivalent formulas build columns and derivatives as expected. Take derivatives with respect to x.""" # construct convenience columns of ones and zeros ones = np.ones_like(formula_data['x']) zeros = np.zeros_like(formula_data['x']) # build columns and derivatives for each formula, making sure that it can be formatted for formula in formulas: formulation = Formulation(formula) assert str(formulation) matrix, column_formulations, underlying_data = formulation._build_matrix(formula_data) evaluated_matrix = np.column_stack([ones * f.evaluate(underlying_data) for f in column_formulations]) derivatives = np.column_stack([ones * f.evaluate_derivative('x', underlying_data) for f in column_formulations]) # build expected columns and derivatives supplemented_data = {'1': ones, '0': zeros, **underlying_data} expected_matrix = np.column_stack(build_columns(supplemented_data)) expected_derivatives = np.column_stack(build_derivatives(supplemented_data)) # compare columns and derivatives np.testing.assert_allclose(matrix, expected_matrix, rtol=0, atol=1e-14, err_msg=formula) np.testing.assert_allclose(matrix, evaluated_matrix, rtol=0, atol=1e-14, err_msg=formula) np.testing.assert_allclose(derivatives, expected_derivatives, rtol=0, atol=1e-14, err_msg=formula)
def medium_blp_simulation() -> SimulationFixture: """Solve a simulation with four markets, linear/nonlinear/cost constants, two linear characteristics, two cost characteristics, a demographic interacted with second-degree prices, and an alternative ownership structure. """ id_data = build_id_data(T=4, J=25, F=6) simulation = Simulation( product_formulations=(Formulation('1 + x + y'), Formulation('1 + I(prices ** 2)'), Formulation('1 + a + b')), beta=[1, 2, 1], sigma=[ [0.5, 0], [0.0, 0], ], gamma=[1, 1, 2], product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(1).choice(range(20), id_data.size), 'ownership': build_ownership( id_data, lambda f, g: 1 if f == g else (0.1 if f > 3 and g > 3 else 0)) }, agent_formulation=Formulation('0 + f'), pi=[[+0], [-3]], integration=Integration('product', 4), xi_variance=0.0001, omega_variance=0.0001, correlation=0.8, seed=1) return simulation, simulation.solve()
def small_nested_blp_simulation() -> SimulationFixture: """Solve a simulation with eight markets, linear prices, a linear/nonlinear characteristic, another linear characteristic, three cost characteristics, and two nesting groups with different nesting parameters. """ id_data = build_id_data(T=8, J=18, F=3) simulation = Simulation( product_formulations=(Formulation('0 + prices + x + z'), Formulation('0 + x'), Formulation('0 + a + b + c')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'nesting_ids': np.random.RandomState(0).choice(['f', 'g'], id_data.size), 'clustering_ids': np.random.RandomState(0).choice(range(10), id_data.size) }, beta=[-5, 1, 2], sigma=2, gamma=[2, 1, 1], rho=[0.1, 0.2], integration=Integration('product', 3), xi_variance=0.001, omega_variance=0.001, correlation=0.7, seed=0) simulation_results = simulation.replace_endogenous() return simulation, simulation_results, {}, []
def large_simulation(): """Solve a simulation with ten markets, linear/nonlinear prices, a linear constant, a cost/linear/nonlinear characteristic, another three cost characteristics, another two linear characteristics, demographics interacted with prices and the cost/linear/nonlinear characteristic, dense parameter matrices, an acquisition, a triple acquisition, and a log-linear cost specification. """ simulation = Simulation( product_formulations=(Formulation('1 + prices + x + y + z'), Formulation('0 + prices + x'), Formulation('0 + log(x) + a + b + c')), beta=[1, -6, 1, 2, 3], sigma=[[1, -0.1], [0, 2]], gamma=[0.1, 0.2, 0.3, 0.5], product_data=build_id_data( T=10, J=20, F=9, mergers=[{f: 4 + int(f > 0) for f in range(4)}]), agent_formulation=Formulation('0 + f + g'), pi=[[1, 0], [0, 2]], integration=Integration('product', 4), xi_variance=0.00001, omega_variance=0.00001, correlation=0.9, linear_costs=False, seed=2) clustering_ids = np.random.choice(['a', 'b', 'c', 'd'], simulation.N) product_data = np.lib.recfunctions.rec_append_fields( simulation.solve(), 'clustering_ids', clustering_ids) return simulation, product_data
def large_logit_simulation() -> SimulationFixture: """Solve a simulation with ten markets, a linear constant, linear prices, a linear/cost characteristic, another two linear characteristics, another three cost characteristics, and a log-linear cost specification. """ id_data = build_id_data(T=10, J=20, F=9) simulation = Simulation( product_formulations=(Formulation('1 + prices + x + y + z'), None, Formulation('0 + log(x) + a + b + c')), beta=[1, -6, 1, 2, 3], sigma=None, gamma=[0.1, 0.2, 0.3, 0.5], product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size) }, xi_variance=0.00001, omega_variance=0.00001, correlation=0.1, costs_type='log', seed=2) return simulation, simulation.solve()
def small_logit_simulation() -> SimulationFixture: """Solve a simulation with two markets, a linear constant, linear prices, a linear characteristic, a cost characteristic, and an acquisition. """ id_data = build_id_data(T=2, J=18, F=3, mergers=[{1: 0}]) simulation = Simulation( product_formulations=( Formulation('1 + prices + x'), None, Formulation('0 + a') ), beta=[1, -5, 1], sigma=None, gamma=2, product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(0).choice(range(10), id_data.size) }, xi_variance=0.001, omega_variance=0.001, correlation=0.7, seed=0 ) return simulation, simulation.solve()
def knittel_metaxoglou_2014(): """Configure the example automobile problem from Knittel and Metaxoglou (2014) and load initial parameter values and estimates created by replication code. The replication code was modified to output a Matlab data file for the automobile dataset, which contains the results of one round of Knitro optimization and post-estimation calculations. The replication code was kept mostly intact, but was modified slightly in the following ways: - Tolerance parameters, Knitro optimization parameters, and starting values for sigma were all configured. - A bug in the code's computation of the BLP instruments was fixed. When creating a vector of "other" and "rival" sums, the code did not specify a dimension over which to sum, which created problems with one- dimensional vectors. A dimension of 1 was added to both sum commands. - Delta was initialized as the solution to the Logit model. - After estimation, the objective was called again at the optimal parameters to re-load globals at the optimal parameter values. - Before being saved to a Matlab data file, matrices were renamed and reshaped. """ product_data = np.recfromcsv(BLP_PRODUCTS_LOCATION) product_data = {n: product_data[n] for n in product_data.dtype.names} product_data['demand_instruments'] = build_blp_instruments( Formulation('hpwt + air + mpg + space'), product_data) problem = Problem(product_formulations=( Formulation('0 + prices + I(1) + hpwt + air + mpg + space'), Formulation('0 + prices + I(1) + hpwt + air + mpg')), product_data=product_data, agent_data=np.recfromcsv(BLP_AGENTS_LOCATION)) return scipy.io.loadmat( str(TEST_DATA_PATH / 'knittel_metaxoglou_2014.mat'), {'problem': problem})
def small_blp_simulation() -> SimulationFixture: """Solve a simulation with three markets, linear prices, a linear/nonlinear characteristic, two cost characteristics, and an acquisition. """ id_data = build_id_data(T=3, J=18, F=3, mergers=[{1: 0}]) simulation = Simulation( product_formulations=( Formulation('0 + prices + x'), Formulation('0 + x'), Formulation('0 + a + b') ), beta=[-5, 1], sigma=2, gamma=[2, 1], product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(0).choice(range(10), id_data.size) }, integration=Integration('product', 3), xi_variance=0.001, omega_variance=0.001, correlation=0.7, seed=0 ) return simulation, simulation.solve()
def small_nested_logit_simulation() -> SimulationFixture: """Solve a simulation with four markets, linear prices, two linear characteristics, two cost characteristics, and two nesting groups with different nesting parameters """ id_data = build_id_data(T=4, J=18, F=3) simulation = Simulation( product_formulations=(Formulation('0 + prices + x + y'), None, Formulation('0 + a + b')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'nesting_ids': np.random.RandomState(0).choice(['f', 'g'], id_data.size), 'clustering_ids': np.random.RandomState(0).choice(range(10), id_data.size) }, beta=[-5, 1, 1], gamma=[2, 1], rho=[0.1, 0.2], xi_variance=0.001, omega_variance=0.001, correlation=0.7, seed=0) simulation_results = simulation.replace_endogenous() return simulation, simulation_results, {}, []
def small_blp_simulation() -> SimulationFixture: """Solve a simulation with three markets, linear prices, a linear/nonlinear characteristic, two cost characteristics, and uniform unobserved product characteristics. """ id_data = build_id_data(T=3, J=18, F=3) uniform = 0.001 * np.random.RandomState(0).uniform(size=(id_data.size, 3)) simulation = Simulation( product_formulations=(Formulation('0 + prices + x'), Formulation('0 + x'), Formulation('0 + a + b')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(0).choice(range(10), id_data.size) }, beta=[-5, 1], sigma=2, gamma=[2, 1], integration=Integration('product', 3), xi=uniform[:, 0] + uniform[:, 1], omega=uniform[:, 0] + uniform[:, 2], seed=0) simulation_results = simulation.replace_endogenous() return simulation, simulation_results, {}, []
def small_simulation(): """Solve a simulation with two markets, linear prices, a nonlinear characteristic, a cost characteristic, and an acquisition. """ simulation = Simulation(product_formulations=(Formulation('0 + prices'), Formulation('0 + x'), Formulation('0 + a')), beta=-5, sigma=1, gamma=2, product_data=build_id_data(T=2, J=18, F=3, mergers=[{ 1: 0 }]), integration=Integration('product', 3), xi_variance=0.001, omega_variance=0.001, correlation=0.7, seed=0) clustering_ids = np.random.choice(['a', 'b'], simulation.N) product_data = np.lib.recfunctions.rec_append_fields( simulation.solve(), 'clustering_ids', clustering_ids) return simulation, product_data
def large_logit_simulation() -> SimulationFixture: """Solve a simulation with ten markets, a linear constant, linear prices, a linear/cost characteristic, another two linear characteristics, another two cost characteristics, and a quantity-dependent, log-linear cost specification. """ id_data = build_id_data(T=10, J=20, F=9) simulation = Simulation(product_formulations=( Formulation('1 + prices + x + y + z'), None, Formulation('0 + log(x) + a + b + I(0.5 * shares)')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(2).choice( range(30), id_data.size) }, beta=[1, -6, 1, 2, 3], gamma=[0.1, 0.2, 0.3, -0.2], xi_variance=0.00001, omega_variance=0.00001, correlation=0.1, costs_type='log', seed=2) simulation_results = simulation.replace_endogenous(constant_costs=False) return simulation, simulation_results, {}, []
def test_ids( formula_data: Data, formulas: Iterable[str], build_columns: Callable[[Mapping[str, Array]], Sequence[Array]]) -> None: """Test that equivalent formulas build IDs as expected.""" # create convenience columns of tuples of categorical variables formula_data = copy.deepcopy(formula_data) for (key1, values1), (key2, values2), (key3, values3) in itertools.product( formula_data.items(), repeat=3): key12 = f'{key1}{key2}' key123 = f'{key1}{key2}{key3}' if key12 not in formula_data: values12 = np.empty_like(values1, np.object_) values12[:] = list(zip(values1, values2)) formula_data[key12] = values12 if key123 not in formula_data: values123 = np.empty_like(values1, np.object_) values123[:] = list(zip(values1, values2, values3)) formula_data[key123] = values123 # build and compare columns for each formula, making sure that it can be formatted for absorb in formulas: formulation = Formulation('x', absorb) assert str(formulation) ids = formulation._build_ids(formula_data) expected_ids = np.column_stack(build_columns(formula_data)) np.testing.assert_array_equal(ids, expected_ids, err_msg=absorb)
def medium_simulation(): """Solve a simulation with four markets, a nonlinear/cost constant, two linear characteristics, two cost characteristics, a demographic interacted with prices, a double acquisition, and a non-standard ownership structure. """ id_data = build_id_data(T=4, J=25, F=6, mergers=[{f: 2 for f in range(2)}]) simulation = Simulation( product_formulations=( Formulation('0 + x + y'), Formulation('1 + prices'), Formulation('1 + a + b') ), beta=[2, 1], sigma=[ [0.5, 0], [0, 0], ], gamma=[1, 1, 2], product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'ownership': build_ownership(id_data, lambda f, g: 1 if f == g else (0.1 if f > 3 and g > 3 else 0)) }, agent_formulation=Formulation('0 + f'), pi=[ [ 0], [-3] ], integration=Integration('product', 4), xi_variance=0.0001, omega_variance=0.0001, correlation=0.8, seed=1 ) return simulation, simulation.solve()
def test_invalid_formula(formula_data, formula): """Test that an invalid formula gives rise to an exception.""" try: formulation = Formulation(formula) formulation._build(formula_data) except: return raise RuntimeError( f"The formula '{formula}' was successfully formulated as '{formulation}'." )
def medium_blp_simulation() -> SimulationFixture: """Solve a simulation with four markets, linear/nonlinear/cost constants, two linear characteristics, two cost characteristics, a demographic interacted with second-degree prices, an alternative ownership structure, and a scaled epsilon. """ id_data = build_id_data(T=10, J=25, F=6) simulation = Simulation( product_formulations=(Formulation('1 + x + prices'), Formulation('1 + I(prices**2)'), Formulation('1 + a + b')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(1).choice(range(20), id_data.size), 'ownership': build_ownership( id_data, lambda f, g: 1 if f == g else (0.1 if f > 3 and g > 3 else 0)) }, beta=[1, 2, -3], sigma=[ [0.5, 0], [0.0, 0], ], pi=[[+0.0], [-0.1]], gamma=[1, 1, 2], agent_formulation=Formulation('0 + f'), integration=Integration('product', 4), xi_variance=0.00001, omega_variance=0.00001, correlation=0.8, epsilon_scale=0.7, seed=1, ) simulation_results = simulation.replace_endogenous() simulated_micro_moments = simulation_results.replace_micro_moment_values([ MicroMoment( name="demographic interaction", dataset=MicroDataset( name="inside", observations=simulation.N, compute_weights=lambda _, p, a: np.ones((a.size, p.size)), market_ids=[simulation.unique_market_ids[2]], ), value=0, compute_values=lambda _, p, a: p.X2[:, [0]].T * a. demographics[:, [0]], ) ]) return simulation, simulation_results, {}, simulated_micro_moments
def large_blp_simulation() -> SimulationFixture: """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, log-linear coefficients on prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, dense parameter matrices, a log-linear cost specification, and local differentiation instruments. """ id_data = build_id_data(T=20, J=20, F=9) keep = np.arange(id_data.size) np.random.RandomState(0).shuffle(keep) id_data = id_data[keep[:int(0.5 * id_data.size)]] simulation = Simulation( product_formulations=(Formulation('1 + x + y + z + q'), Formulation('0 + I(-prices) + x'), Formulation('0 + log(x) + log(a) + log(b)')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size) }, beta=[1, 1, 2, 3, 1], sigma=[[+0.5, 0], [-0.1, 2]], pi=[[2, 1, 0], [0, 0, 2]], gamma=[0.1, 0.2, 0.3], agent_formulation=Formulation('1 + f + g'), integration=Integration('product', 4), xi_variance=0.00001, omega_variance=0.00001, correlation=0.9, distributions=['lognormal', 'normal'], costs_type='log', seed=2) simulation_results = simulation.replace_endogenous() simulated_data_override = { 'demand_instruments': np.c_[build_differentiation_instruments( Formulation('0 + x + y + z + q'), simulation_results.product_data), build_matrix(Formulation('0 + a + b'), simulation_results. product_data)], 'supply_instruments': np.c_[build_differentiation_instruments( Formulation('0 + x + a + b'), simulation_results.product_data), build_matrix(Formulation('0 + y + z + q'), simulation_results. product_data)] } simulated_micro_moments = [ FirstChoiceCovarianceMoment( X2_index=1, demographics_index=1, value=0, market_ids=simulation.unique_market_ids[:5]), FirstChoiceCovarianceMoment( X2_index=0, demographics_index=1, value=0, market_ids=simulation.unique_market_ids[-3:]) ] return simulation, simulation_results, simulated_data_override, simulated_micro_moments
def large_nested_blp_simulation() -> SimulationFixture: """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, log-normal coefficients on prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, three nesting groups with the same nesting parameter, and a log-linear cost specification. """ id_data = build_id_data(T=20, J=20, F=9) keep = np.arange(id_data.size) np.random.RandomState(0).shuffle(keep) id_data = id_data[keep[:int(0.5 * id_data.size)]] simulation = Simulation( product_formulations=( Formulation('1 + x + y + z + q'), Formulation('0 + I(-prices) + x'), Formulation('0 + log(x) + log(a) + log(b)') ), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'nesting_ids': np.random.RandomState(2).choice(['f', 'g', 'h'], id_data.size), 'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size) }, beta=[1, 1, 2, 3, 1], sigma=[ [0.5, 0], [0.0, 2] ], pi=[ [2, 1, 0], [0, 0, 2] ], gamma=[0.1, 0.2, 0.3], rho=0.1, agent_formulation=Formulation('1 + f + g'), integration=Integration('product', 4), xi_variance=0.00001, omega_variance=0.00001, correlation=0.9, distributions=['lognormal', 'normal'], costs_type='log', seed=2, ) simulation_results = simulation.replace_endogenous() simulated_micro_moments = [DemographicExpectationMoment( product_id=None, demographics_index=1, value=0, market_ids=simulation.unique_market_ids[3:5] )] return simulation, simulation_results, {}, simulated_micro_moments
def medium_blp_simulation() -> SimulationFixture: """Solve a simulation with four markets, linear/nonlinear/cost constants, two linear characteristics, two cost characteristics, a demographic interacted with second-degree prices, an alternative ownership structure, and a scaled epsilon. """ id_data = build_id_data(T=4, J=25, F=6) simulation = Simulation( product_formulations=(Formulation('1 + x + y'), Formulation('1 + I(prices**2)'), Formulation('1 + a + b')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(1).choice(range(20), id_data.size), 'ownership': build_ownership( id_data, lambda f, g: 1 if f == g else (0.1 if f > 3 and g > 3 else 0)) }, beta=[1, 2, 1], sigma=[ [0.5, 0], [0.0, 0], ], pi=[[+0], [-3]], gamma=[1, 1, 2], agent_formulation=Formulation('0 + f'), integration=Integration('product', 4), xi_variance=0.0001, omega_variance=0.0001, correlation=0.8, epsilon_scale=0.7, seed=1, ) simulation_results = simulation.replace_endogenous() simulated_micro_moments = [ DemographicCovarianceMoment( X2_index=0, demographics_index=0, value=0, observations=simulation.N, market_ids=[simulation.unique_market_ids[2]]) ] return simulation, simulation_results, {}, simulated_micro_moments
def large_nested_blp_simulation() -> SimulationFixture: """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, linear/nonlinear prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, three nesting groups with the same nesting parameter, and a log-linear cost specification. """ id_data = build_id_data(T=20, J=20, F=9) keep = np.arange(id_data.size) np.random.RandomState(0).shuffle(keep) id_data = id_data[keep[:int(0.5 * id_data.size)]] simulation = Simulation( product_formulations=(Formulation('1 + prices + x + y + z + q'), Formulation('0 + prices + x'), Formulation('0 + log(x) + log(a) + log(b)')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'nesting_ids': np.random.RandomState(2).choice(['f', 'g', 'h'], id_data.size), 'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size) }, beta=[1, -10, 1, 2, 3, 1], sigma=[[1, 0], [0, 2]], pi=[[1, 0], [0, 2]], gamma=[0.1, 0.2, 0.3], rho=0.1, agent_formulation=Formulation('0 + f + g'), integration=Integration('product', 4), xi_variance=0.00001, omega_variance=0.00001, correlation=0.9, costs_type='log', seed=2) simulation_results = simulation.solve() simulated_micro_moments = [ ProductsAgentsCovarianceMoment(X2_index=0, demographics_index=0, value=0), ProductsAgentsCovarianceMoment(X2_index=1, demographics_index=1, value=0) ] return simulation, simulation_results, simulated_micro_moments
def small_simulation(): """Solve a simulation with two markets, linear prices, a nonlinear characteristic, a cost characteristic, and an acquisition. """ simulation = Simulation( product_formulations=( Formulation('0 + prices'), Formulation('0 + x'), Formulation('0 + a') ), beta=-5, sigma=1, gamma=2, product_data=build_id_data(T=2, J=18, F=3, mergers=[{1: 0}]), integration=Integration('product', 3), xi_variance=0.001, omega_variance=0.001, correlation=0.7, seed=0 ) return simulation, simulation.solve()
def test_optimal_instruments(simulated_problem: SimulatedProblemFixture, compute_options: Options) -> None: """Test that starting parameters that are half their true values also give rise to errors of less than 10% under optimal instruments. """ simulation, product_data, problem, solve_options, problem_results = simulated_problem # make product data mutable product_data = {k: product_data[k] for k in product_data.dtype.names} # split apart the full set of demand-side instruments so they can be included in formulations ZD_names: List[str] = [] for index, instrument in enumerate(problem.products.ZD.T): name = f'ZD{index}' product_data[name] = instrument ZD_names.append(name) # without a supply side, compute expected prices with a reduced form regression on all instruments expected_prices = None if problem.K3 == 0: ZD_formula = ' + '.join(ZD_names) expected_prices = compute_fitted_values( product_data['prices'], Formulation(f'0 + {ZD_formula}'), product_data) # compute optimal instruments and update the problem (only use a few draws to speed up the test) compute_options = compute_options.copy() compute_options.update({ 'draws': 5, 'seed': 0, 'expected_prices': expected_prices }) new_problem = problem_results.compute_optimal_instruments( **compute_options).to_problem() # update the default options and solve the problem updated_solve_options = solve_options.copy() updated_solve_options.update( {k: 0.5 * solve_options[k] for k in ['sigma', 'pi', 'rho', 'beta']}) new_results = new_problem.solve(**updated_solve_options) # test the accuracy of the estimated parameters keys = ['beta', 'sigma', 'pi', 'rho'] if problem.K3 > 0: keys.append('gamma') for key in keys: np.testing.assert_allclose(getattr(simulation, key), getattr(new_results, key), atol=0, rtol=0.1, err_msg=key)
def large_nested_blp_simulation() -> SimulationFixture: """Solve a simulation with ten markets, a linear constant, linear/nonlinear prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another four cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, three nesting groups with the same nesting parameter, an acquisition, a triple acquisition, and a log-linear cost specification. """ id_data = build_id_data(T=10, J=20, F=9, mergers=[{f: 4 + int(f > 0) for f in range(4)}]) simulation = Simulation( product_formulations=( Formulation('1 + prices + x + y + z + q'), Formulation('0 + prices + x'), Formulation('0 + log(x) + a + b + c + d') ), beta=[1, -10, 1, 2, 3, 1], sigma=[ [1, 0], [0, 2] ], gamma=[0.1, 0.2, 0.3, 0.1, 0.3], product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'nesting_ids': np.random.RandomState(2).choice(['f', 'g', 'h'], id_data.size), 'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size) }, agent_formulation=Formulation('0 + f + g'), pi=[ [1, 0], [0, 2] ], integration=Integration('product', 4), rho=0.05, xi_variance=0.00001, omega_variance=0.00001, correlation=0.9, costs_type='log', seed=2 ) return simulation, simulation.solve()
def test_invalid_formulas(formula_data, exception, formula, absorb): """Test that an invalid formula gives rise to an exception.""" try: formulation = Formulation(formula, absorb) formulation._build_matrix(formula_data) if absorb is not None: formulation._build_ids(formula_data) except exception: print(traceback.format_exc()) return raise RuntimeError(f"Successful formulation: {formulation}.")
def test_valid_formulas(formula_data, formulas, build_columns, build_derivatives): """Test that equivalent formulas build columns and derivatives as expected. Take derivatives with respect to x.""" # construct convenience columns of ones and zeros ones = np.ones_like(formula_data['x']) zeros = np.zeros_like(formula_data['x']) # build columns and derivatives for each formula for formula in formulas: matrix, column_formulations, underlying_data = Formulation( formula)._build(formula_data) evaluated_matrix = np.column_stack( [ones * f.evaluate(underlying_data) for f in column_formulations]) derivatives = np.column_stack([ ones * f.evaluate_derivative('x', underlying_data) for f in column_formulations ]) # build expected columns and derivatives supplemented_data = {'1': ones, '0': zeros, **underlying_data} expected_matrix = np.column_stack(build_columns(supplemented_data)) expected_derivatives = np.column_stack( build_derivatives(supplemented_data)) # compare columns and derivatives np.testing.assert_allclose(matrix, expected_matrix, rtol=0, atol=1e-14, err_msg=formula) np.testing.assert_allclose(matrix, evaluated_matrix, rtol=0, atol=1e-14, err_msg=formula) np.testing.assert_allclose(derivatives, expected_derivatives, rtol=0, atol=1e-14, err_msg=formula)
def large_blp_simulation() -> SimulationFixture: """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, linear/nonlinear prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, dense parameter matrices, a log-linear cost specification, and local differentiation instruments on the demand side. """ id_data = build_id_data(T=20, J=20, F=9) keep = np.arange(id_data.size) np.random.RandomState(0).shuffle(keep) id_data = id_data[keep[:int(0.5 * id_data.size)]] simulation = Simulation( product_formulations=(Formulation('1 + prices + x + y + z + q'), Formulation('0 + prices + x'), Formulation('0 + log(x) + log(a) + log(b)')), beta=[1, -10, 1, 2, 3, 1], sigma=[[1, -0.1], [0, +2.0]], gamma=[0.1, 0.2, 0.3], product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size) }, agent_formulation=Formulation('0 + f + g'), pi=[[1, 0], [0, 2]], integration=Integration('product', 4), xi_variance=0.00001, omega_variance=0.00001, correlation=0.9, costs_type='log', seed=2) simulation_results = simulation.solve() differentiation_instruments = np.c_[ build_differentiation_instruments(Formulation('0 + x + y + z + q'), simulation_results.product_data), build_matrix(Formulation('0 + a + b'), simulation_results.product_data )] simulation_results.product_data = update_matrices( simulation_results.product_data, { 'demand_instruments': (differentiation_instruments, simulation_results.product_data.demand_instruments.dtype) }) return simulation, simulation_results
def test_ids(formula_data, formulas, build_columns): """Test that equivalent formulas build IDs as expected.""" # create convenience columns of tuples of categorical variables old_formula_data = formula_data.copy() for (key1, values1), (key2, values2), (key3, values3) in itertools.product( old_formula_data.items(), repeat=3): key12 = f'{key1}{key2}' key123 = f'{key1}{key2}{key3}' if key12 not in formula_data: values12 = np.empty_like(values1, np.object) values12[:] = list(zip(values1, values2)) formula_data[key12] = values12 if key123 not in formula_data: values123 = np.empty_like(values1, np.object) values123[:] = list(zip(values1, values2, values3)) formula_data[key123] = values123 # build and compare columns for each formula for absorb in formulas: ids = Formulation('', absorb)._build_ids(formula_data) expected_ids = np.column_stack(build_columns(formula_data)) np.testing.assert_array_equal(ids, expected_ids, err_msg=absorb)
def test_fixed_effects(simulated_problem: SimulatedProblemFixture, ED: int, ES: int, absorb_method: Optional[Union[str, Iteration]]) -> None: """Test that absorbing different numbers of demand- and supply-side fixed effects gives rise to essentially identical first-stage results as does including indicator variables. Also test that optimal instruments results and marginal costs remain unchanged. """ simulation, simulation_results, problem, solve_options, problem_results = simulated_problem # there cannot be supply-side fixed effects if there isn't a supply side if problem.K3 == 0: ES = 0 if ED == ES == 0: return # make product data mutable product_data = { k: simulation_results.product_data[k] for k in simulation_results.product_data.dtype.names } # remove constants and delete associated elements in the initial beta solve_options = solve_options.copy() product_formulations = list(problem.product_formulations).copy() if ED > 0: assert product_formulations[0] is not None constant_indices = [ i for i, e in enumerate(product_formulations[0]._expressions) if not e.free_symbols ] solve_options['beta'] = np.delete(solve_options['beta'], constant_indices, axis=0) product_formulations[0] = Formulation( f'{product_formulations[0]._formula} - 1') if ES > 0: assert product_formulations[2] is not None product_formulations[2] = Formulation( f'{product_formulations[2]._formula} - 1') # add fixed effect IDs to the data demand_id_names: List[str] = [] supply_id_names: List[str] = [] state = np.random.RandomState(seed=0) for side, count, names in [('demand', ED, demand_id_names), ('supply', ES, supply_id_names)]: for index in range(count): name = f'{side}_ids{index}' ids = state.choice(['a', 'b', 'c'], problem.N) product_data[name] = ids names.append(name) # split apart excluded demand-side instruments so they can be included in formulations instrument_names: List[str] = [] for index, instrument in enumerate(product_data['demand_instruments'].T): name = f'demand_instrument{index}' product_data[name] = instrument instrument_names.append(name) # build formulas for the IDs demand_id_formula = ' + '.join(demand_id_names) supply_id_formula = ' + '.join(supply_id_names) # solve the first stage of a problem in which the fixed effects are absorbed solve_options1 = solve_options.copy() product_formulations1 = product_formulations.copy() if ED > 0: assert product_formulations[0] is not None product_formulations1[0] = Formulation( product_formulations[0]._formula, demand_id_formula, absorb_method) if ES > 0: assert product_formulations[2] is not None product_formulations1[2] = Formulation( product_formulations[2]._formula, supply_id_formula, absorb_method) problem1 = Problem(product_formulations1, product_data, problem.agent_formulation, simulation.agent_data) problem_results1 = problem1.solve(**solve_options1) # solve the first stage of a problem in which fixed effects are included as indicator variables solve_options2 = solve_options.copy() product_formulations2 = product_formulations.copy() if ED > 0: assert product_formulations[0] is not None product_formulations2[0] = Formulation( f'{product_formulations[0]._formula} + {demand_id_formula}') if ES > 0: assert product_formulations[2] is not None product_formulations2[2] = Formulation( f'{product_formulations[2]._formula} + {supply_id_formula}') problem2 = Problem(product_formulations2, product_data, problem.agent_formulation, simulation.agent_data) solve_options2['beta'] = np.r_[solve_options2['beta'], np.full((problem2.K1 - solve_options2['beta'].size, 1), np.nan)] problem_results2 = problem2.solve(**solve_options2) # solve the first stage of a problem in which some fixed effects are absorbed and some are included as indicators if ED == ES == 0: problem_results3 = problem_results2 else: solve_options3 = solve_options.copy() product_formulations3 = product_formulations.copy() if ED > 0: assert product_formulations[0] is not None product_formulations3[0] = Formulation( f'{product_formulations[0]._formula} + {demand_id_names[0]}', ' + '.join(demand_id_names[1:]) or None) if ES > 0: assert product_formulations[2] is not None product_formulations3[2] = Formulation( f'{product_formulations[2]._formula} + {supply_id_names[0]}', ' + '.join(supply_id_names[1:]) or None) problem3 = Problem(product_formulations3, product_data, problem.agent_formulation, simulation.agent_data) solve_options3['beta'] = np.r_[solve_options3['beta'], np.full((problem3.K1 - solve_options3['beta'].size, 1), np.nan)] problem_results3 = problem3.solve(**solve_options3) # compute optimal instruments (use only two draws for speed; accuracy is not a concern here) Z_results1 = problem_results1.compute_optimal_instruments(draws=2, seed=0) Z_results2 = problem_results2.compute_optimal_instruments(draws=2, seed=0) Z_results3 = problem_results3.compute_optimal_instruments(draws=2, seed=0) # compute marginal costs costs1 = problem_results1.compute_costs() costs2 = problem_results2.compute_costs() costs3 = problem_results3.compute_costs() # choose tolerances (be more flexible with iterative de-meaning) atol = 1e-8 rtol = 1e-5 if ED > 2 or ES > 2 or isinstance(absorb_method, Iteration): atol *= 10 rtol *= 10 # test that all problem results expected to be identical are essentially identical problem_results_keys = [ 'theta', 'sigma', 'pi', 'rho', 'beta', 'gamma', 'sigma_se', 'pi_se', 'rho_se', 'beta_se', 'gamma_se', 'delta', 'tilde_costs', 'xi', 'omega', 'xi_by_theta_jacobian', 'omega_by_theta_jacobian', 'objective', 'gradient', 'gradient_norm', 'sigma_gradient', 'pi_gradient', 'rho_gradient', 'beta_gradient', 'gamma_gradient' ] for key in problem_results_keys: result1 = getattr(problem_results1, key) result2 = getattr(problem_results2, key) result3 = getattr(problem_results3, key) if key in { 'beta', 'gamma', 'beta_se', 'gamma_se', 'beta_gradient', 'gamma_gradient' }: result2 = result2[:result1.size] result3 = result3[:result1.size] np.testing.assert_allclose(result1, result2, atol=atol, rtol=rtol, err_msg=key) np.testing.assert_allclose(result1, result3, atol=atol, rtol=rtol, err_msg=key) # test that all optimal instrument results expected to be identical are essentially identical Z_results_keys = [ 'demand_instruments', 'supply_instruments', 'inverse_covariance_matrix', 'expected_xi_by_theta_jacobian', 'expected_omega_by_theta_jacobian' ] for key in Z_results_keys: result1 = getattr(Z_results1, key) result2 = getattr(Z_results2, key) result3 = getattr(Z_results3, key) np.testing.assert_allclose(result1, result2, atol=atol, rtol=rtol, err_msg=key) np.testing.assert_allclose(result1, result3, atol=atol, rtol=rtol, err_msg=key) # test that marginal costs are essentially identical np.testing.assert_allclose(costs1, costs2, atol=atol, rtol=rtol) np.testing.assert_allclose(costs1, costs3, atol=atol, rtol=rtol)