def solve_logit_blp(inst, my_products): # only one formulation logit_form = pyblp.Formulation('1 + prices + hpwt + air + mpd + space') # only demand instruments my_products['demand_instruments'] = inst logit_problem = pyblp.Problem(logit_form, my_products, add_exogenous=False) logit_results = logit_problem.solve(W=np.identity(inst.shape[1])) return logit_problem, logit_results
def solve_nl_nevo(df, rho=0.375): groups = df.groupby(['market_ids', 'nesting_ids']) df['demand_instruments20'] = groups['shares'].transform(np.size) nl_formulation = pyblp.Formulation('0 + prices') problem = pyblp.Problem(nl_formulation, df) res = problem.solve(rho=rho, optimization=pyblp.Optimization('return')) og = res.extract_diagonals(res.compute_diversion_ratios()).mean() print(og) return problem, res
def fakeBLP(): product_data = pd.read_csv(pyblp.data.NEVO_PRODUCTS_LOCATION) X1_formulation = pyblp.Formulation('0 + prices', absorb='C(product_ids)') X2_formulation = pyblp.Formulation('1 + prices + sugar + mushy') product_formulations = (X1_formulation, X2_formulation) mc_integration = pyblp.Integration('monte_carlo', size=50, seed=0) pr_integration = pyblp.Integration('product', size=5) mc_problem = pyblp.Problem(product_formulations, product_data, integration=mc_integration) pr_problem = pyblp.Problem(product_formulations, product_data, integration=pr_integration) bfgs = pyblp.Optimization('bfgs') results1 = mc_problem.solve(sigma=np.eye(4), optimization=bfgs) elasticities = results1.compute_elasticities() diversions = results1.compute_diversion_ratios() print(results1) print(diversions) single_market = product_data['market_ids'] == 'C01Q1' plt.colorbar(plt.matshow(diversions[single_market]))
def runBLP(parms, data): X1_formulation = pyblp.Formulation('0 + prices', absorb='C(product_ids)') X2_formulation = pyblp.Formulation('1 + prices + protein + fat') product_formulations = (X1_formulation, X2_formulation) mc_integration = pyblp.Integration('monte_carlo', size=50, seed=0) pr_integration = pyblp.Integration('product', size=5) mc_problem = pyblp.Problem(product_formulations, data, integration=mc_integration) pr_problem = pyblp.Problem(product_formulations, data, integration=pr_integration) # this is not the ideal optimizer and needs to be changed once the code has been proven to work opt = pyblp.Optimization('l-bfgs-b') #results1 = mc_problem.solve(sigma=np.ones((4, 4)), optimization=bfgs) results1 = mc_problem.solve(sigma=np.eye(4), optimization=opt) print(results1) elasticities = results1.compute_elasticities() diversions = results1.compute_diversion_ratios() single_market = data['market_ids'] == 18 plt.colorbar(plt.matshow(elasticities[single_market])) plt.colorbar(plt.matshow(diversions[single_market])) plt.show()
def run_simulation(n_firms, betas, gammas, kappa=0, maverick=False): config_data = pyblp.build_id_data(T=1, J=n_firms, F=n_firms) mutable_id_data = {k: config_data[k] for k in config_data.dtype.names} mutable_id_data['ownership'] = construct_ownership(n_firms, kappa, maverick) simulation = pyblp.Simulation( product_formulations=(pyblp.Formulation('1 + prices+x1'), None, pyblp.Formulation('1+x1')), beta=betas, sigma=None, gamma=gammas, xi_variance=1e-6, omega_variance=1e-6, product_data=mutable_id_data, seed=0) # solve the simulation for P+Q prod_data = simulation.replace_endogenous() # Construct a Problem and Solve # Don't estimate a model since we know the answers and only want to do # counterfactuals res = prod_data.to_problem().solve( beta=betas, gamma=gammas, sigma=None, optimization=pyblp.Optimization('return')) # Pull the calculated P,Q,Profits,Diversion,etc inside_share = np.sum(prod_data.product_data['shares']) prices = np.mean(prod_data.product_data['prices']) og_diversion = np.mean(np.diag(res.compute_diversion_ratios())) own_elas = np.diag(res.compute_elasticities()).mean() total_pi = res.compute_profits().sum() return (inside_share, prices, og_diversion, own_elas, total_pi)
def get_nevo_logit(): tick() problem_logit = pyblp.Problem( product_formulations=( pyblp.Formulation('0 + prices', absorb='C(product_ids)') ), product_data = nevo_products, agent_formulation = None, agent_data = None ) results_logit = problem_logit.solve() save_pyblp_results(results_logit, problem_logit,filename_logit) tock() return results_logit
def genInstruments(parms): fileLoc = os.path.join(parms['dir'], parms['file']) data = pd.read_csv(fileLoc) formulation = pyblp.Formulation('1 + protein + fat') instruments = pyblp.build_blp_instruments(formulation, data) # the first set of instruments generated are for other-product-same-manufacturer sumes # since we only have solo-product firms, we just drop these to prevent columns of zeroes #instruments = instruments[...,5:] instNames = [ 'demand_instruments' + str(x) for x in range(0, instruments.shape[1]) ] instNames[0] = 'demand_instruments0' data = pd.concat( [data, pd.DataFrame(instruments, index=data.index, columns=instNames)], axis=1) return data
def get_blp_logit(): product_data_df = product_data.to_dict('series') logit_products = product_data_df.copy() inst_form = pyblp.Formulation('1 + hpwt + air + mpd + space') X = pyblp.build_matrix(inst_form, logit_products) # get the "original instruments" orig_inst = np.apply_along_axis(zscore, 0, original_inst(X, logit_products)) # solve one logit # first argument: instruments # second argument: logit_products (which are a copy) problem_logit, results_logit = solve_logit_blp(np.c_[X, orig_inst], logit_products) save_pyblp_results(results_logit, problem_logit, filename_logit) return results_logit
# pd.DataFrame(demand_instruments).describe() # # demand_instruments = demand_instruments[:,5] # demand_instruments = demand_instruments.reshape((len(demand_instruments),1)) # product_data['demand_instruments0'] = demand_instruments[:,0] # # supply_instruments = pyblp.build_blp_instruments(pyblp.Formulation('obs_cost'), product_data) # supply_instruments = supply_instruments[:,3] # supply_instruments.reshape((len(supply_instruments),1)) # product_data['supply_instruments0'] = supply_instruments # In[3]: # quad diff instruments demand_instruments = pyblp.build_differentiation_instruments( pyblp.Formulation('0 + quality + obs_cost'), product_data, version='quadratic') demand_instruments = demand_instruments[:, 2:4] demand_instruments = demand_instruments.reshape((len(demand_instruments), 2)) product_data['demand_instruments0'] = demand_instruments[:, 0] product_data['demand_instruments1'] = demand_instruments[:, 1] supply_instruments = pyblp.build_differentiation_instruments( pyblp.Formulation('0 + obs_cost'), product_data, version='quadratic') supply_instruments = supply_instruments[:, 1] supply_instruments.reshape((len(supply_instruments), 1)) product_data['supply_instruments0'] = supply_instruments product_data_diff = product_data.copy() # In[4]:
ps3['market_ids'] = ( 'C' + (ps3['city'].astype(str)).apply(lambda x: '{0:0>2}'.format(x)) + 'Q' + ps3['quarter'].astype(str)) demog['market_ids'] = ( 'C' + (demog['city'].astype(str)).apply(lambda x: '{0:0>2}'.format(x)) + 'Q' + demog['quarter'].astype(str)) znames = {} for i in range(1, 20): znames[('z' + str(i))] = ('demand_instruments' + str(i - 1)) ps3.rename(columns=znames, inplace=True) # define formulations x1_form = blp.Formulation('0 + prices', absorb='C(brand)') x2_form = blp.Formulation('1 + prices + sugar + mushy') forms = (x1_form, x2_form) agent_form = blp.Formulation('0 + income + income_sq + age + child') # simulate 20 individuals """ There appear to be some irregularities in the 'v' and 'demog' data from Nevo--they don't appear to represent what the document says they do; load the Nevo data directly to get sensible results """ demog = pd.read_csv(blp.data.NEVO_AGENTS_LOCATION) agent_form = blp.Formulation('0 + income + income_squared + age + child') problem = blp.Problem(forms, ps3, agent_form, demog)
def solve_nl(df): groups = df.groupby(['market_ids', 'nesting_ids']) df['demand_instruments20'] = groups['shares'].transform(np.size) nl_formulation = pyblp.Formulation('0 + prices') problem = pyblp.Problem(nl_formulation, df) return problem.solve(rho=0.7)
@author: micha Code draws strongly from the pyblp docs written by Jeff Gortmaker: https://pyblp.readthedocs.io/en/stable/_notebooks/tutorial/nevo.html I actually know Jeff (former coworker) and he gave me lots of advice when I was applying to grad schools. Small world! """ import pyblp import numpy as np import pandas as pd pyblp.options.digits = 2 pyblp.options.verbose = False product_data = pd.read_csv(pyblp.data.NEVO_PRODUCTS_LOCATION) logit_formulation = pyblp.Formulation('prices', absorb='C(product_ids)') problem = pyblp.Problem(logit_formulation, product_data) logit_results = problem.solve() #logit_results def solve_nl(df): groups = df.groupby(['market_ids', 'nesting_ids']) df['demand_instruments20'] = groups['shares'].transform(np.size) nl_formulation = pyblp.Formulation('0 + prices') problem = pyblp.Problem(nl_formulation, df) return problem.solve(rho=0.7) df1 = product_data.copy()
# %% ################ ### BLP problem ################ # get the BLP Results Back # compare blp_base and results_blp blp_products = pd.read_parquet(raw_dir / 'blp_product_data_opt.parquet') blp_products['product_ids'] = range(0, 2217) # Set draws here blp_agents = draw_blp_agents(500) blp_agents['draw_ids'] = np.tile(range(0, 500), 20) problem_options = dict(product_formulations=( pyblp.Formulation('1 + hpwt + air + mpd + space'), pyblp.Formulation('1 + prices + hpwt + air + mpd + space'), pyblp.Formulation(f'1 + log(hpwt) + air + log(mpg) + log(space) + trend'), ), agent_formulation=pyblp.Formulation( '0 + I(1 / income)'), costs_type='log', agent_data=blp_agents) solve_options = dict( costs_bounds=(0.001, None), W_type='clustered', se_type='clustered', initial_update=True, iteration=pyblp.Iteration('squarem', {'atol': 1e-14}), optimization=pyblp.Optimization('bfgs', {'gtol': 1e-5}),
import csv data = list(csv.reader(open('../temp/zeta_1000.csv'))) column_name = ['product_ids', 'market_ids', 'quality', 'satellite', 'wired', 'prices', 'obs_cost','unobs_demand','unobs_cost','shares', 'marginal_cost','price_elasticity','D1','D2','D3','D4'] product_data = pd.DataFrame(data, columns = column_name) product_data = product_data.astype('float') product_data['firm_ids'] = product_data['product_ids'] # ## 5 Estimate the Correctly Specified Model # ### 5 (8) Report a table with the estimates of the demand parameters and standard error # #### (a) When estimating demand alone # DEMAND INSTRUMENTS short_df = product_data[['firm_ids', 'market_ids', 'quality', 'satellite', 'wired']].head(8) print(short_df) n_ZD = 2 demand_instruments = pyblp.build_differentiation_instruments(pyblp.Formulation('0 + quality + obs_cost'), product_data, version = 'quadratic') print(demand_instruments[0:10,:]) # own characteristics will be collinear with X1 because each firm only has one # product. hence we drop half of these "instruments" assert(n_ZD * 2 == len(demand_instruments[0])) for j in range(0, n_ZD): assert(sum(demand_instruments[:,j]) == 0) demand_instruments = demand_instruments[:, n_ZD:(2*n_ZD)] for j in range(0, n_ZD): product_data['demand_instruments' + str(j)] = demand_instruments[:,j] # SUPPLY INSTRUMENTS n_ZS = 1 supply_instruments = pyblp.build_differentiation_instruments(pyblp.Formulation('0 + obs_cost'), product_data, version = 'quadratic') assert( n_ZS * 2 == len(supply_instruments[0]))