def large_blp_simulation() -> SimulationFixture: """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, log-linear coefficients on prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, dense parameter matrices, a log-linear cost specification, and local differentiation instruments. """ id_data = build_id_data(T=20, J=20, F=9) keep = np.arange(id_data.size) np.random.RandomState(0).shuffle(keep) id_data = id_data[keep[:int(0.5 * id_data.size)]] simulation = Simulation( product_formulations=(Formulation('1 + x + y + z + q'), Formulation('0 + I(-prices) + x'), Formulation('0 + log(x) + log(a) + log(b)')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size) }, beta=[1, 1, 2, 3, 1], sigma=[[+0.5, 0], [-0.1, 2]], pi=[[2, 1, 0], [0, 0, 2]], gamma=[0.1, 0.2, 0.3], agent_formulation=Formulation('1 + f + g'), integration=Integration('product', 4), xi_variance=0.00001, omega_variance=0.00001, correlation=0.9, distributions=['lognormal', 'normal'], costs_type='log', seed=2) simulation_results = simulation.replace_endogenous() simulated_data_override = { 'demand_instruments': np.c_[build_differentiation_instruments( Formulation('0 + x + y + z + q'), simulation_results.product_data), build_matrix(Formulation('0 + a + b'), simulation_results. product_data)], 'supply_instruments': np.c_[build_differentiation_instruments( Formulation('0 + x + a + b'), simulation_results.product_data), build_matrix(Formulation('0 + y + z + q'), simulation_results. product_data)] } simulated_micro_moments = [ FirstChoiceCovarianceMoment( X2_index=1, demographics_index=1, value=0, market_ids=simulation.unique_market_ids[:5]), FirstChoiceCovarianceMoment( X2_index=0, demographics_index=1, value=0, market_ids=simulation.unique_market_ids[-3:]) ] return simulation, simulation_results, simulated_data_override, simulated_micro_moments
def large_blp_simulation() -> SimulationFixture: """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, linear/nonlinear prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, dense parameter matrices, a log-linear cost specification, and local differentiation instruments on the demand side. """ id_data = build_id_data(T=20, J=20, F=9) keep = np.arange(id_data.size) np.random.RandomState(0).shuffle(keep) id_data = id_data[keep[:int(0.5 * id_data.size)]] simulation = Simulation( product_formulations=(Formulation('1 + prices + x + y + z + q'), Formulation('0 + prices + x'), Formulation('0 + log(x) + log(a) + log(b)')), beta=[1, -10, 1, 2, 3, 1], sigma=[[1, -0.1], [0, +2.0]], gamma=[0.1, 0.2, 0.3], product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size) }, agent_formulation=Formulation('0 + f + g'), pi=[[1, 0], [0, 2]], integration=Integration('product', 4), xi_variance=0.00001, omega_variance=0.00001, correlation=0.9, costs_type='log', seed=2) simulation_results = simulation.solve() differentiation_instruments = np.c_[ build_differentiation_instruments(Formulation('0 + x + y + z + q'), simulation_results.product_data), build_matrix(Formulation('0 + a + b'), simulation_results.product_data )] simulation_results.product_data = update_matrices( simulation_results.product_data, { 'demand_instruments': (differentiation_instruments, simulation_results.product_data.demand_instruments.dtype) }) return simulation, simulation_results
def get_blp_logit(): product_data_df = product_data.to_dict('series') logit_products = product_data_df.copy() inst_form = pyblp.Formulation('1 + hpwt + air + mpd + space') X = pyblp.build_matrix(inst_form, logit_products) # get the "original instruments" orig_inst = np.apply_along_axis(zscore, 0, original_inst(X, logit_products)) # solve one logit # first argument: instruments # second argument: logit_products (which are a copy) problem_logit, results_logit = solve_logit_blp(np.c_[X, orig_inst], logit_products) save_pyblp_results(results_logit, problem_logit, filename_logit) return results_logit
def large_blp_simulation() -> SimulationFixture: """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, log-linear coefficients on prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, dense parameter matrices, a log-linear cost specification, and local differentiation instruments. """ id_data = build_id_data(T=20, J=20, F=9) keep = np.arange(id_data.size) np.random.RandomState(0).shuffle(keep) id_data = id_data[keep[:int(0.5 * id_data.size)]] product_ids = id_data.market_ids.copy() for t in np.unique(id_data.market_ids): product_ids[id_data.market_ids == t] = np.arange( (id_data.market_ids == t).sum()) simulation = Simulation( product_formulations=(Formulation('1 + x + y + z + q'), Formulation('1 + I(-prices) + x'), Formulation('0 + log(x) + log(a) + log(b)')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'product_ids': product_ids, 'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size) }, beta=[1, 1, 2, 3, 1], sigma=[[0, +0.0, 0], [0, +0.5, 0], [0, -0.2, 2]], pi=[[0, 0, 0], [2, 1, 0], [0, 0, 2]], gamma=[0.1, 0.2, 0.3], agent_formulation=Formulation('1 + f + g'), integration=Integration('product', 4), xi_variance=0.00001, omega_variance=0.00001, correlation=0.9, distributions=['normal', 'lognormal', 'normal'], costs_type='log', seed=2) simulation_results = simulation.replace_endogenous() simulated_data_override = { 'demand_instruments': np.c_[build_differentiation_instruments( Formulation('0 + x + y + z + q'), simulation_results.product_data), build_matrix(Formulation('0 + a + b'), simulation_results. product_data)], 'supply_instruments': np.c_[build_differentiation_instruments( Formulation('0 + x + a + b'), simulation_results.product_data), build_matrix(Formulation('0 + y + z + q'), simulation_results. product_data)] } simulated_micro_moments = [ DemographicExpectationMoment(product_ids=[0], demographics_index=1, value=0, observations=simulation.N), DemographicExpectationMoment( product_ids=[None, 0], demographics_index=1, value=0, observations=simulation.N, market_ids=simulation.unique_market_ids[1:4], market_weights=[0.2, 0.4, 0.4], ), DemographicCovarianceMoment( X2_index=0, demographics_index=2, value=0, observations=simulation.N, market_ids=simulation.unique_market_ids[3:5]), DiversionProbabilityMoment( product_id1=1, product_id2=0, value=0, observations=simulation.N, market_ids=simulation.unique_market_ids[6:10]), DiversionProbabilityMoment( product_id1=None, product_id2=1, value=0, observations=simulation.N, market_ids=[simulation.unique_market_ids[8]]), DiversionProbabilityMoment( product_id1=1, product_id2=None, value=0, observations=simulation.N, market_ids=[simulation.unique_market_ids[9]]), DiversionCovarianceMoment( X2_index1=1, X2_index2=1, value=0, observations=simulation.N, market_ids=[simulation.unique_market_ids[12]]), ] return simulation, simulation_results, simulated_data_override, simulated_micro_moments
def large_blp_simulation() -> SimulationFixture: """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, log-linear coefficients on prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, dense parameter matrices, a log-linear cost specification, and local differentiation instruments. """ id_data = build_id_data(T=20, J=20, F=9) keep = np.arange(id_data.size) np.random.RandomState(0).shuffle(keep) id_data = id_data[keep[:int(0.5 * id_data.size)]] product_ids = id_data.market_ids.copy() for t in np.unique(id_data.market_ids): product_ids[id_data.market_ids == t] = np.arange( (id_data.market_ids == t).sum()) simulation = Simulation( product_formulations=(Formulation('1 + x + y + z + q'), Formulation('1 + I(-prices) + x'), Formulation('0 + log(x) + log(a) + log(b)')), product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, 'product_ids': product_ids, 'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size) }, beta=[1, 1, 2, 3, 1], sigma=[[0, +0.0, 0], [0, +0.5, 0], [0, -0.2, 2]], pi=[[0, 0, 0], [2, 1, 0], [0, 0, 2]], gamma=[0.1, 0.2, 0.3], agent_formulation=Formulation('1 + f + g'), integration=Integration('product', 4), xi_variance=0.00001, omega_variance=0.00001, correlation=0.9, rc_types=['linear', 'log', 'linear'], costs_type='log', seed=2, ) simulation_results = simulation.replace_endogenous() simulated_data_override = { 'demand_instruments': np.c_[build_differentiation_instruments( Formulation('0 + x + y + z + q'), simulation_results.product_data), build_matrix(Formulation('0 + a + b'), simulation_results. product_data)], 'supply_instruments': np.c_[build_differentiation_instruments( Formulation('0 + x + a + b'), simulation_results.product_data), build_matrix(Formulation('0 + y + z + q'), simulation_results. product_data)] } inside_diversion_micro_dataset = MicroDataset( name="diversion from 1", observations=simulation.N, compute_weights=lambda _, p, a: np.tile(p.product_ids == 1, (a.size, 1, 1 + p.size)), market_ids=simulation.unique_market_ids[6:10], ) outside_diversion_micro_dataset = MicroDataset( name="diversion from outside", observations=simulation.N, compute_weights=lambda _, p, a: np.concatenate([ np.ones((a.size, 1, 1 + p.size)), np.zeros((a.size, p.size, 1 + p.size)) ], axis=1), market_ids=[simulation.unique_market_ids[8]], ) simulated_micro_moments = simulation_results.replace_micro_moment_values([ MicroMoment( name="demographic 1 expectation for 0", dataset=MicroDataset( name="product 0", observations=simulation.N, compute_weights=lambda _, p, a: np.tile( p.product_ids.flat == 0, (a.size, 1)), ), value=0, compute_values=lambda _, p, a: np.tile(a.demographics[:, [1]], (1, p.size)), ), MicroMoment( name="demographic 1 expectation for 0 and outside", dataset=MicroDataset( name="product 0 and outside", observations=simulation.N, compute_weights=lambda _, p, a: np.c_[ np.ones((a.size, 1)), np.tile(p.product_ids.flat == 0, (a.size, 1))], market_ids=simulation.unique_market_ids[1:4], ), value=0, compute_values=lambda _, p, a: np.tile(a.demographics[:, [1]], (1, 1 + p.size)), ), MicroMoment( name="1 to 0 diversion ratio", dataset=inside_diversion_micro_dataset, value=0, compute_values=lambda _, p, a: np.concatenate([ np.zeros((a.size, p.size, 1)), np.tile(p.product_ids.flat == 0, (a.size, p.size, 1)) ], axis=2), ), MicroMoment( name="outside to 1 diversion ratio", dataset=outside_diversion_micro_dataset, value=0, compute_values=lambda _, p, a: np.concatenate([ np.zeros((a.size, 1 + p.size, 1)), np.tile(p.product_ids.flat == 1, (a.size, 1 + p.size, 1)) ], axis=2), ), MicroMoment( name="1 to outside diversion ratio", dataset=inside_diversion_micro_dataset, value=0, compute_values=lambda _, p, a: np.concatenate([ np.ones((a.size, p.size, 1)), np.zeros((a.size, p.size, p.size)) ], axis=2), ), MicroMoment( name="diversion interaction", dataset=MicroDataset( name="inside first and second", observations=simulation.N, compute_weights=lambda _, p, a: np.ones( (a.size, p.size, p.size)), market_ids=[simulation.unique_market_ids[12]], ), value=0, compute_values=lambda _, p, a: (np.tile(p.X2[:, [1]], (a.size, 1, p.size)) * np.tile(p.X2[:, [1]].T, (a.size, p.size, 1))), ), ]) return simulation, simulation_results, simulated_data_override, simulated_micro_moments
def test_fixed_effects(simulated_problem, ED, ES): """Test that absorbing different numbers of demand- and supply-side fixed effects gives rise to essentially identical first-stage results as including indicator variables. Also test that results that should be equal when there aren't any fixed effects are indeed equal. """ simulation, product_data, problem, results = simulated_problem # test that results that should be equal when there aren't any fixed effects are indeed equal for key in [ 'delta', 'tilde_costs', 'xi', 'omega', 'xi_jacobian', 'omega_jacobian' ]: result = getattr(results, key) true_result = getattr(results, f'true_{key}') assert (result is not None) == (true_result is not None) if result is not None: np.testing.assert_allclose(result, true_result, atol=1e-14, rtol=0, err_msg=key) # there cannot be supply-side fixed effects if there isn't a supply side if problem.K3 == 0: ES = 0 if ED == ES == 0: return # add fixed effect IDs to the data np.random.seed(0) demand_names = [] supply_names = [] product_data = {k: product_data[k] for k in product_data.dtype.names} for side, count, names in [('demand', ED, demand_names), ('supply', ES, supply_names)]: for index in range(count): name = f'{side}_ids{index}' ids = np.random.choice(['a', 'b', 'c'], product_data['market_ids'].size, [0.7, 0.2, 0.1]) product_data[name] = ids names.append(name) # remove constants product_formulations = list(problem.product_formulations).copy() if ED > 0: product_formulations[0] = Formulation( f'{product_formulations[0]._formula} - 1') product_data['demand_instruments'] = product_data[ 'demand_instruments'][:, 1:] if ES > 0: product_formulations[2] = Formulation( f'{product_formulations[2]._formula} - 1') product_data['supply_instruments'] = product_data[ 'supply_instruments'][:, 1:] # build formulas for the IDs demand_formula = ' + '.join(demand_names) supply_formula = ' + '.join(supply_names) # solve the first stage of a problem in which the fixed effects are absorbed product_formulations1 = product_formulations.copy() if ED > 0: product_formulations1[0] = Formulation( product_formulations[0]._formula, demand_formula) if ES > 0: product_formulations1[2] = Formulation( product_formulations[2]._formula, supply_formula) problem1 = Problem(product_formulations1, product_data, problem.agent_formulation, simulation.agent_data) results1 = problem1.solve(simulation.sigma, simulation.pi, steps=1) # solve the first stage of a problem in which fixed effects are included as indicator variables product_data2 = product_data.copy() product_formulations2 = product_formulations.copy() if ED > 0: demand_indicators = build_matrix(Formulation(demand_formula), product_data) product_data2['demand_instruments'] = np.c_[ product_data['demand_instruments'], demand_indicators] product_formulations2[0] = Formulation( f'{product_formulations[0]._formula} + {demand_formula}') if ES > 0: supply_indicators = build_matrix(Formulation(supply_formula), product_data) product_data2['supply_instruments'] = np.c_[ product_data['supply_instruments'], supply_indicators] product_formulations2[2] = Formulation( f'{product_formulations[2]._formula} + {supply_formula}') problem2 = Problem(product_formulations2, product_data2, problem.agent_formulation, simulation.agent_data) results2 = problem2.solve(simulation.sigma, simulation.pi, steps=1) # test that all arrays expected to be identical are identical keys = [ 'theta', 'sigma', 'pi', 'beta', 'gamma', 'sigma_se', 'pi_se', 'beta_se', 'gamma_se', 'true_delta', 'true_tilde_costs', 'true_xi', 'true_omega', 'true_xi_jacobian', 'true_omega_jacobian', 'objective', 'gradient', 'sigma_gradient', 'pi_gradient' ] for key in keys: result1 = getattr(results1, key) if result1 is not None: result2 = getattr(results2, key) if 'beta' in key or 'gamma' in key: result2 = result2[:result1.size] np.testing.assert_allclose(result1, result2, atol=1e-8, rtol=1e-5, err_msg=key)