Exemple #1
0
def large_blp_simulation() -> SimulationFixture:
    """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, log-linear
    coefficients on prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two
    cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, dense
    parameter matrices, a log-linear cost specification, and local differentiation instruments.
    """
    id_data = build_id_data(T=20, J=20, F=9)
    keep = np.arange(id_data.size)
    np.random.RandomState(0).shuffle(keep)
    id_data = id_data[keep[:int(0.5 * id_data.size)]]
    simulation = Simulation(
        product_formulations=(Formulation('1 + x + y + z + q'),
                              Formulation('0 + I(-prices) + x'),
                              Formulation('0 + log(x) + log(a) + log(b)')),
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'clustering_ids':
            np.random.RandomState(2).choice(range(30), id_data.size)
        },
        beta=[1, 1, 2, 3, 1],
        sigma=[[+0.5, 0], [-0.1, 2]],
        pi=[[2, 1, 0], [0, 0, 2]],
        gamma=[0.1, 0.2, 0.3],
        agent_formulation=Formulation('1 + f + g'),
        integration=Integration('product', 4),
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.9,
        distributions=['lognormal', 'normal'],
        costs_type='log',
        seed=2)
    simulation_results = simulation.replace_endogenous()
    simulated_data_override = {
        'demand_instruments':
        np.c_[build_differentiation_instruments(
            Formulation('0 + x + y + z + q'), simulation_results.product_data),
              build_matrix(Formulation('0 + a + b'), simulation_results.
                           product_data)],
        'supply_instruments':
        np.c_[build_differentiation_instruments(
            Formulation('0 + x + a + b'), simulation_results.product_data),
              build_matrix(Formulation('0 + y + z + q'), simulation_results.
                           product_data)]
    }
    simulated_micro_moments = [
        FirstChoiceCovarianceMoment(
            X2_index=1,
            demographics_index=1,
            value=0,
            market_ids=simulation.unique_market_ids[:5]),
        FirstChoiceCovarianceMoment(
            X2_index=0,
            demographics_index=1,
            value=0,
            market_ids=simulation.unique_market_ids[-3:])
    ]
    return simulation, simulation_results, simulated_data_override, simulated_micro_moments
Exemple #2
0
def large_blp_simulation() -> SimulationFixture:
    """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, linear/nonlinear
    prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two cost
    characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, dense parameter
    matrices, a log-linear cost specification, and local differentiation instruments on the demand side.
    """
    id_data = build_id_data(T=20, J=20, F=9)
    keep = np.arange(id_data.size)
    np.random.RandomState(0).shuffle(keep)
    id_data = id_data[keep[:int(0.5 * id_data.size)]]
    simulation = Simulation(
        product_formulations=(Formulation('1 + prices + x + y + z + q'),
                              Formulation('0 + prices + x'),
                              Formulation('0 + log(x) + log(a) + log(b)')),
        beta=[1, -10, 1, 2, 3, 1],
        sigma=[[1, -0.1], [0, +2.0]],
        gamma=[0.1, 0.2, 0.3],
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'clustering_ids':
            np.random.RandomState(2).choice(range(30), id_data.size)
        },
        agent_formulation=Formulation('0 + f + g'),
        pi=[[1, 0], [0, 2]],
        integration=Integration('product', 4),
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.9,
        costs_type='log',
        seed=2)
    simulation_results = simulation.solve()
    differentiation_instruments = np.c_[
        build_differentiation_instruments(Formulation('0 + x + y + z + q'),
                                          simulation_results.product_data),
        build_matrix(Formulation('0 + a + b'), simulation_results.product_data
                     )]
    simulation_results.product_data = update_matrices(
        simulation_results.product_data, {
            'demand_instruments':
            (differentiation_instruments,
             simulation_results.product_data.demand_instruments.dtype)
        })
    return simulation, simulation_results
def get_blp_logit():
    product_data_df = product_data.to_dict('series')
    logit_products = product_data_df.copy()

    inst_form = pyblp.Formulation('1 + hpwt + air + mpd + space')
    X = pyblp.build_matrix(inst_form, logit_products)

    # get the "original instruments"
    orig_inst = np.apply_along_axis(zscore, 0,
                                    original_inst(X, logit_products))

    # solve one logit
    # first argument: instruments
    # second argument: logit_products (which are a copy)
    problem_logit, results_logit = solve_logit_blp(np.c_[X, orig_inst],
                                                   logit_products)

    save_pyblp_results(results_logit, problem_logit, filename_logit)

    return results_logit
Exemple #4
0
def large_blp_simulation() -> SimulationFixture:
    """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, log-linear
    coefficients on prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two
    cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, dense
    parameter matrices, a log-linear cost specification, and local differentiation instruments.
    """
    id_data = build_id_data(T=20, J=20, F=9)

    keep = np.arange(id_data.size)
    np.random.RandomState(0).shuffle(keep)
    id_data = id_data[keep[:int(0.5 * id_data.size)]]

    product_ids = id_data.market_ids.copy()
    for t in np.unique(id_data.market_ids):
        product_ids[id_data.market_ids == t] = np.arange(
            (id_data.market_ids == t).sum())

    simulation = Simulation(
        product_formulations=(Formulation('1 + x + y + z + q'),
                              Formulation('1 + I(-prices) + x'),
                              Formulation('0 + log(x) + log(a) + log(b)')),
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'product_ids':
            product_ids,
            'clustering_ids':
            np.random.RandomState(2).choice(range(30), id_data.size)
        },
        beta=[1, 1, 2, 3, 1],
        sigma=[[0, +0.0, 0], [0, +0.5, 0], [0, -0.2, 2]],
        pi=[[0, 0, 0], [2, 1, 0], [0, 0, 2]],
        gamma=[0.1, 0.2, 0.3],
        agent_formulation=Formulation('1 + f + g'),
        integration=Integration('product', 4),
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.9,
        distributions=['normal', 'lognormal', 'normal'],
        costs_type='log',
        seed=2)
    simulation_results = simulation.replace_endogenous()
    simulated_data_override = {
        'demand_instruments':
        np.c_[build_differentiation_instruments(
            Formulation('0 + x + y + z + q'), simulation_results.product_data),
              build_matrix(Formulation('0 + a + b'), simulation_results.
                           product_data)],
        'supply_instruments':
        np.c_[build_differentiation_instruments(
            Formulation('0 + x + a + b'), simulation_results.product_data),
              build_matrix(Formulation('0 + y + z + q'), simulation_results.
                           product_data)]
    }
    simulated_micro_moments = [
        DemographicExpectationMoment(product_ids=[0],
                                     demographics_index=1,
                                     value=0,
                                     observations=simulation.N),
        DemographicExpectationMoment(
            product_ids=[None, 0],
            demographics_index=1,
            value=0,
            observations=simulation.N,
            market_ids=simulation.unique_market_ids[1:4],
            market_weights=[0.2, 0.4, 0.4],
        ),
        DemographicCovarianceMoment(
            X2_index=0,
            demographics_index=2,
            value=0,
            observations=simulation.N,
            market_ids=simulation.unique_market_ids[3:5]),
        DiversionProbabilityMoment(
            product_id1=1,
            product_id2=0,
            value=0,
            observations=simulation.N,
            market_ids=simulation.unique_market_ids[6:10]),
        DiversionProbabilityMoment(
            product_id1=None,
            product_id2=1,
            value=0,
            observations=simulation.N,
            market_ids=[simulation.unique_market_ids[8]]),
        DiversionProbabilityMoment(
            product_id1=1,
            product_id2=None,
            value=0,
            observations=simulation.N,
            market_ids=[simulation.unique_market_ids[9]]),
        DiversionCovarianceMoment(
            X2_index1=1,
            X2_index2=1,
            value=0,
            observations=simulation.N,
            market_ids=[simulation.unique_market_ids[12]]),
    ]
    return simulation, simulation_results, simulated_data_override, simulated_micro_moments
Exemple #5
0
def large_blp_simulation() -> SimulationFixture:
    """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, log-linear
    coefficients on prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two
    cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, dense
    parameter matrices, a log-linear cost specification, and local differentiation instruments.
    """
    id_data = build_id_data(T=20, J=20, F=9)

    keep = np.arange(id_data.size)
    np.random.RandomState(0).shuffle(keep)
    id_data = id_data[keep[:int(0.5 * id_data.size)]]

    product_ids = id_data.market_ids.copy()
    for t in np.unique(id_data.market_ids):
        product_ids[id_data.market_ids == t] = np.arange(
            (id_data.market_ids == t).sum())

    simulation = Simulation(
        product_formulations=(Formulation('1 + x + y + z + q'),
                              Formulation('1 + I(-prices) + x'),
                              Formulation('0 + log(x) + log(a) + log(b)')),
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'product_ids':
            product_ids,
            'clustering_ids':
            np.random.RandomState(2).choice(range(30), id_data.size)
        },
        beta=[1, 1, 2, 3, 1],
        sigma=[[0, +0.0, 0], [0, +0.5, 0], [0, -0.2, 2]],
        pi=[[0, 0, 0], [2, 1, 0], [0, 0, 2]],
        gamma=[0.1, 0.2, 0.3],
        agent_formulation=Formulation('1 + f + g'),
        integration=Integration('product', 4),
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.9,
        rc_types=['linear', 'log', 'linear'],
        costs_type='log',
        seed=2,
    )
    simulation_results = simulation.replace_endogenous()

    simulated_data_override = {
        'demand_instruments':
        np.c_[build_differentiation_instruments(
            Formulation('0 + x + y + z + q'), simulation_results.product_data),
              build_matrix(Formulation('0 + a + b'), simulation_results.
                           product_data)],
        'supply_instruments':
        np.c_[build_differentiation_instruments(
            Formulation('0 + x + a + b'), simulation_results.product_data),
              build_matrix(Formulation('0 + y + z + q'), simulation_results.
                           product_data)]
    }

    inside_diversion_micro_dataset = MicroDataset(
        name="diversion from 1",
        observations=simulation.N,
        compute_weights=lambda _, p, a: np.tile(p.product_ids == 1,
                                                (a.size, 1, 1 + p.size)),
        market_ids=simulation.unique_market_ids[6:10],
    )
    outside_diversion_micro_dataset = MicroDataset(
        name="diversion from outside",
        observations=simulation.N,
        compute_weights=lambda _, p, a: np.concatenate([
            np.ones((a.size, 1, 1 + p.size)),
            np.zeros((a.size, p.size, 1 + p.size))
        ],
                                                       axis=1),
        market_ids=[simulation.unique_market_ids[8]],
    )
    simulated_micro_moments = simulation_results.replace_micro_moment_values([
        MicroMoment(
            name="demographic 1 expectation for 0",
            dataset=MicroDataset(
                name="product 0",
                observations=simulation.N,
                compute_weights=lambda _, p, a: np.tile(
                    p.product_ids.flat == 0, (a.size, 1)),
            ),
            value=0,
            compute_values=lambda _, p, a: np.tile(a.demographics[:, [1]],
                                                   (1, p.size)),
        ),
        MicroMoment(
            name="demographic 1 expectation for 0 and outside",
            dataset=MicroDataset(
                name="product 0 and outside",
                observations=simulation.N,
                compute_weights=lambda _, p, a: np.c_[
                    np.ones((a.size, 1)),
                    np.tile(p.product_ids.flat == 0, (a.size, 1))],
                market_ids=simulation.unique_market_ids[1:4],
            ),
            value=0,
            compute_values=lambda _, p, a: np.tile(a.demographics[:, [1]],
                                                   (1, 1 + p.size)),
        ),
        MicroMoment(
            name="1 to 0 diversion ratio",
            dataset=inside_diversion_micro_dataset,
            value=0,
            compute_values=lambda _, p, a: np.concatenate([
                np.zeros((a.size, p.size, 1)),
                np.tile(p.product_ids.flat == 0, (a.size, p.size, 1))
            ],
                                                          axis=2),
        ),
        MicroMoment(
            name="outside to 1 diversion ratio",
            dataset=outside_diversion_micro_dataset,
            value=0,
            compute_values=lambda _, p, a: np.concatenate([
                np.zeros((a.size, 1 + p.size, 1)),
                np.tile(p.product_ids.flat == 1, (a.size, 1 + p.size, 1))
            ],
                                                          axis=2),
        ),
        MicroMoment(
            name="1 to outside diversion ratio",
            dataset=inside_diversion_micro_dataset,
            value=0,
            compute_values=lambda _, p, a: np.concatenate([
                np.ones((a.size, p.size, 1)),
                np.zeros((a.size, p.size, p.size))
            ],
                                                          axis=2),
        ),
        MicroMoment(
            name="diversion interaction",
            dataset=MicroDataset(
                name="inside first and second",
                observations=simulation.N,
                compute_weights=lambda _, p, a: np.ones(
                    (a.size, p.size, p.size)),
                market_ids=[simulation.unique_market_ids[12]],
            ),
            value=0,
            compute_values=lambda _, p, a:
            (np.tile(p.X2[:, [1]],
                     (a.size, 1, p.size)) * np.tile(p.X2[:, [1]].T,
                                                    (a.size, p.size, 1))),
        ),
    ])

    return simulation, simulation_results, simulated_data_override, simulated_micro_moments
Exemple #6
0
def test_fixed_effects(simulated_problem, ED, ES):
    """Test that absorbing different numbers of demand- and supply-side fixed effects gives rise to essentially
    identical first-stage results as including indicator variables. Also test that results that should be equal when
    there aren't any fixed effects are indeed equal.
    """
    simulation, product_data, problem, results = simulated_problem

    # test that results that should be equal when there aren't any fixed effects are indeed equal
    for key in [
            'delta', 'tilde_costs', 'xi', 'omega', 'xi_jacobian',
            'omega_jacobian'
    ]:
        result = getattr(results, key)
        true_result = getattr(results, f'true_{key}')
        assert (result is not None) == (true_result is not None)
        if result is not None:
            np.testing.assert_allclose(result,
                                       true_result,
                                       atol=1e-14,
                                       rtol=0,
                                       err_msg=key)

    # there cannot be supply-side fixed effects if there isn't a supply side
    if problem.K3 == 0:
        ES = 0
    if ED == ES == 0:
        return

    # add fixed effect IDs to the data
    np.random.seed(0)
    demand_names = []
    supply_names = []
    product_data = {k: product_data[k] for k in product_data.dtype.names}
    for side, count, names in [('demand', ED, demand_names),
                               ('supply', ES, supply_names)]:
        for index in range(count):
            name = f'{side}_ids{index}'
            ids = np.random.choice(['a', 'b', 'c'],
                                   product_data['market_ids'].size,
                                   [0.7, 0.2, 0.1])
            product_data[name] = ids
            names.append(name)

    # remove constants
    product_formulations = list(problem.product_formulations).copy()
    if ED > 0:
        product_formulations[0] = Formulation(
            f'{product_formulations[0]._formula} - 1')
        product_data['demand_instruments'] = product_data[
            'demand_instruments'][:, 1:]
    if ES > 0:
        product_formulations[2] = Formulation(
            f'{product_formulations[2]._formula} - 1')
        product_data['supply_instruments'] = product_data[
            'supply_instruments'][:, 1:]

    # build formulas for the IDs
    demand_formula = ' + '.join(demand_names)
    supply_formula = ' + '.join(supply_names)

    # solve the first stage of a problem in which the fixed effects are absorbed
    product_formulations1 = product_formulations.copy()
    if ED > 0:
        product_formulations1[0] = Formulation(
            product_formulations[0]._formula, demand_formula)
    if ES > 0:
        product_formulations1[2] = Formulation(
            product_formulations[2]._formula, supply_formula)
    problem1 = Problem(product_formulations1, product_data,
                       problem.agent_formulation, simulation.agent_data)
    results1 = problem1.solve(simulation.sigma, simulation.pi, steps=1)

    # solve the first stage of a problem in which fixed effects are included as indicator variables
    product_data2 = product_data.copy()
    product_formulations2 = product_formulations.copy()
    if ED > 0:
        demand_indicators = build_matrix(Formulation(demand_formula),
                                         product_data)
        product_data2['demand_instruments'] = np.c_[
            product_data['demand_instruments'], demand_indicators]
        product_formulations2[0] = Formulation(
            f'{product_formulations[0]._formula} + {demand_formula}')
    if ES > 0:
        supply_indicators = build_matrix(Formulation(supply_formula),
                                         product_data)
        product_data2['supply_instruments'] = np.c_[
            product_data['supply_instruments'], supply_indicators]
        product_formulations2[2] = Formulation(
            f'{product_formulations[2]._formula} + {supply_formula}')
    problem2 = Problem(product_formulations2, product_data2,
                       problem.agent_formulation, simulation.agent_data)
    results2 = problem2.solve(simulation.sigma, simulation.pi, steps=1)

    # test that all arrays expected to be identical are identical
    keys = [
        'theta', 'sigma', 'pi', 'beta', 'gamma', 'sigma_se', 'pi_se',
        'beta_se', 'gamma_se', 'true_delta', 'true_tilde_costs', 'true_xi',
        'true_omega', 'true_xi_jacobian', 'true_omega_jacobian', 'objective',
        'gradient', 'sigma_gradient', 'pi_gradient'
    ]
    for key in keys:
        result1 = getattr(results1, key)
        if result1 is not None:
            result2 = getattr(results2, key)
            if 'beta' in key or 'gamma' in key:
                result2 = result2[:result1.size]
            np.testing.assert_allclose(result1,
                                       result2,
                                       atol=1e-8,
                                       rtol=1e-5,
                                       err_msg=key)