def solve_logit_blp(inst, my_products):
    # only one formulation
    logit_form = pyblp.Formulation('1 + prices + hpwt + air + mpd + space')
    # only demand instruments
    my_products['demand_instruments'] = inst
    logit_problem = pyblp.Problem(logit_form, my_products, add_exogenous=False)
    logit_results = logit_problem.solve(W=np.identity(inst.shape[1]))
    return logit_problem, logit_results
def solve_nl_nevo(df, rho=0.375):
    groups = df.groupby(['market_ids', 'nesting_ids'])
    df['demand_instruments20'] = groups['shares'].transform(np.size)
    nl_formulation = pyblp.Formulation('0 + prices')
    problem = pyblp.Problem(nl_formulation, df)
    res = problem.solve(rho=rho, optimization=pyblp.Optimization('return'))
    og = res.extract_diagonals(res.compute_diversion_ratios()).mean()
    print(og)
    return problem, res
Exemple #3
0
def fakeBLP():
    product_data = pd.read_csv(pyblp.data.NEVO_PRODUCTS_LOCATION)
    X1_formulation = pyblp.Formulation('0 + prices', absorb='C(product_ids)')
    X2_formulation = pyblp.Formulation('1 + prices + sugar + mushy')
    product_formulations = (X1_formulation, X2_formulation)
    mc_integration = pyblp.Integration('monte_carlo', size=50, seed=0)
    pr_integration = pyblp.Integration('product', size=5)
    mc_problem = pyblp.Problem(product_formulations,
                               product_data,
                               integration=mc_integration)
    pr_problem = pyblp.Problem(product_formulations,
                               product_data,
                               integration=pr_integration)
    bfgs = pyblp.Optimization('bfgs')
    results1 = mc_problem.solve(sigma=np.eye(4), optimization=bfgs)
    elasticities = results1.compute_elasticities()
    diversions = results1.compute_diversion_ratios()
    print(results1)
    print(diversions)
    single_market = product_data['market_ids'] == 'C01Q1'
    plt.colorbar(plt.matshow(diversions[single_market]))
Exemple #4
0
def runBLP(parms, data):
    X1_formulation = pyblp.Formulation('0 + prices', absorb='C(product_ids)')
    X2_formulation = pyblp.Formulation('1 + prices + protein + fat')
    product_formulations = (X1_formulation, X2_formulation)
    mc_integration = pyblp.Integration('monte_carlo', size=50, seed=0)
    pr_integration = pyblp.Integration('product', size=5)
    mc_problem = pyblp.Problem(product_formulations,
                               data,
                               integration=mc_integration)
    pr_problem = pyblp.Problem(product_formulations,
                               data,
                               integration=pr_integration)
    # this is not the ideal optimizer and needs to be changed once the code has been proven to work
    opt = pyblp.Optimization('l-bfgs-b')
    #results1 = mc_problem.solve(sigma=np.ones((4, 4)), optimization=bfgs)
    results1 = mc_problem.solve(sigma=np.eye(4), optimization=opt)
    print(results1)
    elasticities = results1.compute_elasticities()
    diversions = results1.compute_diversion_ratios()
    single_market = data['market_ids'] == 18
    plt.colorbar(plt.matshow(elasticities[single_market]))
    plt.colorbar(plt.matshow(diversions[single_market]))
    plt.show()
def run_simulation(n_firms, betas, gammas, kappa=0, maverick=False):
    config_data = pyblp.build_id_data(T=1, J=n_firms, F=n_firms)
    mutable_id_data = {k: config_data[k] for k in config_data.dtype.names}
    mutable_id_data['ownership'] = construct_ownership(n_firms, kappa,
                                                       maverick)

    simulation = pyblp.Simulation(
        product_formulations=(pyblp.Formulation('1 + prices+x1'), None,
                              pyblp.Formulation('1+x1')),
        beta=betas,
        sigma=None,
        gamma=gammas,
        xi_variance=1e-6,
        omega_variance=1e-6,
        product_data=mutable_id_data,
        seed=0)

    # solve the simulation for P+Q
    prod_data = simulation.replace_endogenous()

    # Construct a Problem and Solve
    # Don't estimate a model since we know the answers and only want to do
    # counterfactuals
    res = prod_data.to_problem().solve(
        beta=betas,
        gamma=gammas,
        sigma=None,
        optimization=pyblp.Optimization('return'))

    # Pull the calculated P,Q,Profits,Diversion,etc
    inside_share = np.sum(prod_data.product_data['shares'])
    prices = np.mean(prod_data.product_data['prices'])
    og_diversion = np.mean(np.diag(res.compute_diversion_ratios()))
    own_elas = np.diag(res.compute_elasticities()).mean()
    total_pi = res.compute_profits().sum()
    return (inside_share, prices, og_diversion, own_elas, total_pi)
def get_nevo_logit():
    tick()
    problem_logit = pyblp.Problem(
        product_formulations=(
           pyblp.Formulation('0 + prices', absorb='C(product_ids)')
        ),
        product_data = nevo_products,
        agent_formulation = None,
        agent_data = None
    )
        
    results_logit = problem_logit.solve()
    save_pyblp_results(results_logit, problem_logit,filename_logit)
    
    tock()
    return results_logit
Exemple #7
0
def genInstruments(parms):
    fileLoc = os.path.join(parms['dir'], parms['file'])
    data = pd.read_csv(fileLoc)
    formulation = pyblp.Formulation('1 + protein + fat')
    instruments = pyblp.build_blp_instruments(formulation, data)
    # the first set of instruments generated are for other-product-same-manufacturer sumes
    # since we only have solo-product firms, we just drop these to prevent columns of zeroes
    #instruments = instruments[...,5:]
    instNames = [
        'demand_instruments' + str(x) for x in range(0, instruments.shape[1])
    ]
    instNames[0] = 'demand_instruments0'
    data = pd.concat(
        [data,
         pd.DataFrame(instruments, index=data.index, columns=instNames)],
        axis=1)
    return data
def get_blp_logit():
    product_data_df = product_data.to_dict('series')
    logit_products = product_data_df.copy()

    inst_form = pyblp.Formulation('1 + hpwt + air + mpd + space')
    X = pyblp.build_matrix(inst_form, logit_products)

    # get the "original instruments"
    orig_inst = np.apply_along_axis(zscore, 0,
                                    original_inst(X, logit_products))

    # solve one logit
    # first argument: instruments
    # second argument: logit_products (which are a copy)
    problem_logit, results_logit = solve_logit_blp(np.c_[X, orig_inst],
                                                   logit_products)

    save_pyblp_results(results_logit, problem_logit, filename_logit)

    return results_logit
Exemple #9
0
# pd.DataFrame(demand_instruments).describe()
#
# demand_instruments = demand_instruments[:,5]
# demand_instruments = demand_instruments.reshape((len(demand_instruments),1))
# product_data['demand_instruments0'] = demand_instruments[:,0]
#
# supply_instruments = pyblp.build_blp_instruments(pyblp.Formulation('obs_cost'), product_data)
# supply_instruments = supply_instruments[:,3]
# supply_instruments.reshape((len(supply_instruments),1))
# product_data['supply_instruments0'] = supply_instruments

# In[3]:

# quad diff instruments
demand_instruments = pyblp.build_differentiation_instruments(
    pyblp.Formulation('0 + quality + obs_cost'),
    product_data,
    version='quadratic')
demand_instruments = demand_instruments[:, 2:4]
demand_instruments = demand_instruments.reshape((len(demand_instruments), 2))
product_data['demand_instruments0'] = demand_instruments[:, 0]
product_data['demand_instruments1'] = demand_instruments[:, 1]

supply_instruments = pyblp.build_differentiation_instruments(
    pyblp.Formulation('0 + obs_cost'), product_data, version='quadratic')
supply_instruments = supply_instruments[:, 1]
supply_instruments.reshape((len(supply_instruments), 1))
product_data['supply_instruments0'] = supply_instruments
product_data_diff = product_data.copy()

# In[4]:
Exemple #10
0
ps3['market_ids'] = (
    'C' + (ps3['city'].astype(str)).apply(lambda x: '{0:0>2}'.format(x)) +
    'Q' + ps3['quarter'].astype(str))

demog['market_ids'] = (
    'C' + (demog['city'].astype(str)).apply(lambda x: '{0:0>2}'.format(x)) +
    'Q' + demog['quarter'].astype(str))

znames = {}
for i in range(1, 20):
    znames[('z' + str(i))] = ('demand_instruments' + str(i - 1))

ps3.rename(columns=znames, inplace=True)

# define formulations
x1_form = blp.Formulation('0 + prices', absorb='C(brand)')
x2_form = blp.Formulation('1 + prices + sugar + mushy')
forms = (x1_form, x2_form)

agent_form = blp.Formulation('0 + income + income_sq + age + child')

# simulate 20 individuals
"""
    There appear to be some irregularities in the 'v' and 'demog' data from
    Nevo--they don't appear to represent what the document says they do;
    load the Nevo data directly to get sensible results
"""
demog = pd.read_csv(blp.data.NEVO_AGENTS_LOCATION)
agent_form = blp.Formulation('0 + income + income_squared + age + child')

problem = blp.Problem(forms, ps3, agent_form, demog)
def solve_nl(df):
    groups = df.groupby(['market_ids', 'nesting_ids'])
    df['demand_instruments20'] = groups['shares'].transform(np.size)
    nl_formulation = pyblp.Formulation('0 + prices')
    problem = pyblp.Problem(nl_formulation, df)
    return problem.solve(rho=0.7)
@author: micha
Code draws strongly from the pyblp docs written by Jeff Gortmaker:
    https://pyblp.readthedocs.io/en/stable/_notebooks/tutorial/nevo.html
I actually know Jeff (former coworker) and he gave me lots of advice when I was applying to
grad schools. Small world!
"""
import pyblp
import numpy as np
import pandas as pd

pyblp.options.digits = 2
pyblp.options.verbose = False

product_data = pd.read_csv(pyblp.data.NEVO_PRODUCTS_LOCATION)

logit_formulation = pyblp.Formulation('prices', absorb='C(product_ids)')
problem = pyblp.Problem(logit_formulation, product_data)

logit_results = problem.solve()
#logit_results


def solve_nl(df):
    groups = df.groupby(['market_ids', 'nesting_ids'])
    df['demand_instruments20'] = groups['shares'].transform(np.size)
    nl_formulation = pyblp.Formulation('0 + prices')
    problem = pyblp.Problem(nl_formulation, df)
    return problem.solve(rho=0.7)


df1 = product_data.copy()
Exemple #13
0
# %%
################
### BLP problem
################
# get the BLP Results Back
# compare blp_base and results_blp

blp_products = pd.read_parquet(raw_dir / 'blp_product_data_opt.parquet')
blp_products['product_ids'] = range(0, 2217)

# Set draws here
blp_agents = draw_blp_agents(500)
blp_agents['draw_ids'] = np.tile(range(0, 500), 20)

problem_options = dict(product_formulations=(
    pyblp.Formulation('1 + hpwt + air + mpd + space'),
    pyblp.Formulation('1 + prices + hpwt + air + mpd + space'),
    pyblp.Formulation(f'1 + log(hpwt) + air + log(mpg) + log(space) + trend'),
),
                       agent_formulation=pyblp.Formulation(
                           '0 + I(1 / income)'),
                       costs_type='log',
                       agent_data=blp_agents)

solve_options = dict(
    costs_bounds=(0.001, None),
    W_type='clustered',
    se_type='clustered',
    initial_update=True,
    iteration=pyblp.Iteration('squarem', {'atol': 1e-14}),
    optimization=pyblp.Optimization('bfgs', {'gtol': 1e-5}),
Exemple #14
0
import csv
data = list(csv.reader(open('../temp/zeta_1000.csv')))
column_name = ['product_ids', 'market_ids', 'quality', 'satellite', 'wired', 'prices', 'obs_cost','unobs_demand','unobs_cost','shares', 'marginal_cost','price_elasticity','D1','D2','D3','D4']
product_data = pd.DataFrame(data, columns = column_name)
product_data = product_data.astype('float')
product_data['firm_ids'] = product_data['product_ids']

# ## 5 Estimate the Correctly Specified Model
# ### 5 (8) Report a table with the estimates of the demand parameters and standard error
# #### (a) When estimating demand alone

# DEMAND INSTRUMENTS
short_df = product_data[['firm_ids', 'market_ids', 'quality', 'satellite', 'wired']].head(8)
print(short_df)
n_ZD = 2
demand_instruments = pyblp.build_differentiation_instruments(pyblp.Formulation('0 + quality + obs_cost'), product_data, version = 'quadratic')
print(demand_instruments[0:10,:])

# own characteristics will be collinear with X1 because each firm only has one 
# product. hence we drop half of these "instruments"
assert(n_ZD * 2 == len(demand_instruments[0]))
for j in range(0, n_ZD):
    assert(sum(demand_instruments[:,j]) == 0)
demand_instruments = demand_instruments[:, n_ZD:(2*n_ZD)]
for j in range(0, n_ZD):
    product_data['demand_instruments' + str(j)] = demand_instruments[:,j]

# SUPPLY INSTRUMENTS
n_ZS = 1
supply_instruments = pyblp.build_differentiation_instruments(pyblp.Formulation('0 + obs_cost'), product_data, version = 'quadratic')
assert( n_ZS * 2 == len(supply_instruments[0]))