Exemple #1
0
def large_nested_logit_simulation() -> SimulationFixture:
    """Solve a simulation with ten markets, a linear constant, linear prices, a linear/cost characteristic, another two
    linear characteristics, another three cost characteristics, three nesting groups with the same nesting
    parameter, and a log-linear cost specification.
    """
    id_data = build_id_data(T=10, J=20, F=9)
    simulation = Simulation(
        product_formulations=(Formulation('1 + prices + x + y + z'), None,
                              Formulation('0 + log(x) + a + b + c')),
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'nesting_ids':
            np.random.RandomState(2).choice(['f', 'g', 'h'], id_data.size),
            'clustering_ids':
            np.random.RandomState(2).choice(range(30), id_data.size)
        },
        beta=[1, -6, 1, 2, 3],
        gamma=[0.1, 0.2, 0.3, 0.5],
        rho=0.1,
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.9,
        costs_type='log',
        seed=2)
    simulation_results = simulation.replace_endogenous()
    return simulation, simulation_results, {}, []
Exemple #2
0
def small_logit_simulation() -> SimulationFixture:
    """Solve a simulation with two markets, a linear constant, linear prices, a linear characteristic, a cost
    characteristic, and a scaled epsilon.
    """
    id_data = build_id_data(T=2, J=18, F=3)
    simulation = Simulation(
        product_formulations=(Formulation('1 + prices + x'), None,
                              Formulation('0 + a')),
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'clustering_ids':
            np.random.RandomState(0).choice(range(10), id_data.size)
        },
        beta=[1, -5, 1],
        gamma=2,
        xi_variance=0.001,
        omega_variance=0.001,
        correlation=0.7,
        epsilon_scale=0.5,
        seed=0,
    )
    simulation_results = simulation.replace_exogenous('x', 'a')
    return simulation, simulation_results, {}, []
Exemple #3
0
def test_matrices(
        formula_data: Data, formulas: Iterable[str], build_columns: Callable[[Mapping[str, Array]], Sequence[Array]],
        build_derivatives: Callable[[Mapping[str, Array]], Sequence[Array]]) -> None:
    """Test that equivalent formulas build columns and derivatives as expected. Take derivatives with respect to x."""

    # construct convenience columns of ones and zeros
    ones = np.ones_like(formula_data['x'])
    zeros = np.zeros_like(formula_data['x'])

    # build columns and derivatives for each formula, making sure that it can be formatted
    for formula in formulas:
        formulation = Formulation(formula)
        assert str(formulation)
        matrix, column_formulations, underlying_data = formulation._build_matrix(formula_data)
        evaluated_matrix = np.column_stack([ones * f.evaluate(underlying_data) for f in column_formulations])
        derivatives = np.column_stack([ones * f.evaluate_derivative('x', underlying_data) for f in column_formulations])

        # build expected columns and derivatives
        supplemented_data = {'1': ones, '0': zeros, **underlying_data}
        expected_matrix = np.column_stack(build_columns(supplemented_data))
        expected_derivatives = np.column_stack(build_derivatives(supplemented_data))

        # compare columns and derivatives
        np.testing.assert_allclose(matrix, expected_matrix, rtol=0, atol=1e-14, err_msg=formula)
        np.testing.assert_allclose(matrix, evaluated_matrix, rtol=0, atol=1e-14, err_msg=formula)
        np.testing.assert_allclose(derivatives, expected_derivatives, rtol=0, atol=1e-14, err_msg=formula)
Exemple #4
0
def medium_blp_simulation() -> SimulationFixture:
    """Solve a simulation with four markets, linear/nonlinear/cost constants, two linear characteristics, two cost
    characteristics, a demographic interacted with second-degree prices, and an alternative ownership structure.
    """
    id_data = build_id_data(T=4, J=25, F=6)
    simulation = Simulation(
        product_formulations=(Formulation('1 + x + y'),
                              Formulation('1 + I(prices ** 2)'),
                              Formulation('1 + a + b')),
        beta=[1, 2, 1],
        sigma=[
            [0.5, 0],
            [0.0, 0],
        ],
        gamma=[1, 1, 2],
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'clustering_ids':
            np.random.RandomState(1).choice(range(20), id_data.size),
            'ownership':
            build_ownership(
                id_data, lambda f, g: 1
                if f == g else (0.1 if f > 3 and g > 3 else 0))
        },
        agent_formulation=Formulation('0 + f'),
        pi=[[+0], [-3]],
        integration=Integration('product', 4),
        xi_variance=0.0001,
        omega_variance=0.0001,
        correlation=0.8,
        seed=1)
    return simulation, simulation.solve()
Exemple #5
0
def small_nested_blp_simulation() -> SimulationFixture:
    """Solve a simulation with eight markets, linear prices, a linear/nonlinear characteristic, another linear
    characteristic, three cost characteristics, and two nesting groups with different nesting parameters.
    """
    id_data = build_id_data(T=8, J=18, F=3)
    simulation = Simulation(
        product_formulations=(Formulation('0 + prices + x + z'),
                              Formulation('0 + x'),
                              Formulation('0 + a + b + c')),
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'nesting_ids':
            np.random.RandomState(0).choice(['f', 'g'], id_data.size),
            'clustering_ids':
            np.random.RandomState(0).choice(range(10), id_data.size)
        },
        beta=[-5, 1, 2],
        sigma=2,
        gamma=[2, 1, 1],
        rho=[0.1, 0.2],
        integration=Integration('product', 3),
        xi_variance=0.001,
        omega_variance=0.001,
        correlation=0.7,
        seed=0)
    simulation_results = simulation.replace_endogenous()
    return simulation, simulation_results, {}, []
Exemple #6
0
def large_simulation():
    """Solve a simulation with ten markets, linear/nonlinear prices, a linear constant, a cost/linear/nonlinear
    characteristic, another three cost characteristics, another two linear characteristics, demographics interacted with
    prices and the cost/linear/nonlinear characteristic, dense parameter matrices, an acquisition, a triple acquisition,
    and a log-linear cost specification.
    """
    simulation = Simulation(
        product_formulations=(Formulation('1 + prices + x + y + z'),
                              Formulation('0 + prices + x'),
                              Formulation('0 + log(x) + a + b + c')),
        beta=[1, -6, 1, 2, 3],
        sigma=[[1, -0.1], [0, 2]],
        gamma=[0.1, 0.2, 0.3, 0.5],
        product_data=build_id_data(
            T=10, J=20, F=9, mergers=[{f: 4 + int(f > 0)
                                       for f in range(4)}]),
        agent_formulation=Formulation('0 + f + g'),
        pi=[[1, 0], [0, 2]],
        integration=Integration('product', 4),
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.9,
        linear_costs=False,
        seed=2)
    clustering_ids = np.random.choice(['a', 'b', 'c', 'd'], simulation.N)
    product_data = np.lib.recfunctions.rec_append_fields(
        simulation.solve(), 'clustering_ids', clustering_ids)
    return simulation, product_data
Exemple #7
0
def large_logit_simulation() -> SimulationFixture:
    """Solve a simulation with ten markets, a linear constant, linear prices, a linear/cost characteristic, another two
    linear characteristics, another three cost characteristics, and a log-linear cost specification.
    """
    id_data = build_id_data(T=10, J=20, F=9)
    simulation = Simulation(
        product_formulations=(Formulation('1 + prices + x + y + z'), None,
                              Formulation('0 + log(x) + a + b + c')),
        beta=[1, -6, 1, 2, 3],
        sigma=None,
        gamma=[0.1, 0.2, 0.3, 0.5],
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'clustering_ids':
            np.random.RandomState(2).choice(range(30), id_data.size)
        },
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.1,
        costs_type='log',
        seed=2)
    return simulation, simulation.solve()
Exemple #8
0
def small_logit_simulation() -> SimulationFixture:
    """Solve a simulation with two markets, a linear constant, linear prices, a linear characteristic, a cost
    characteristic, and an acquisition.
    """
    id_data = build_id_data(T=2, J=18, F=3, mergers=[{1: 0}])
    simulation = Simulation(
        product_formulations=(
            Formulation('1 + prices + x'),
            None,
            Formulation('0 + a')
        ),
        beta=[1, -5, 1],
        sigma=None,
        gamma=2,
        product_data={
            'market_ids': id_data.market_ids,
            'firm_ids': id_data.firm_ids,
            'clustering_ids': np.random.RandomState(0).choice(range(10), id_data.size)
        },
        xi_variance=0.001,
        omega_variance=0.001,
        correlation=0.7,
        seed=0
    )
    return simulation, simulation.solve()
Exemple #9
0
def knittel_metaxoglou_2014():
    """Configure the example automobile problem from Knittel and Metaxoglou (2014) and load initial parameter values and
    estimates created by replication code.

    The replication code was modified to output a Matlab data file for the automobile dataset, which contains the
    results of one round of Knitro optimization and post-estimation calculations. The replication code was kept mostly
    intact, but was modified slightly in the following ways:

        - Tolerance parameters, Knitro optimization parameters, and starting values for sigma were all configured.
        - A bug in the code's computation of the BLP instruments was fixed. When creating a vector of "other" and
          "rival" sums, the code did not specify a dimension over which to sum, which created problems with one-
          dimensional vectors. A dimension of 1 was added to both sum commands.
        - Delta was initialized as the solution to the Logit model.
        - After estimation, the objective was called again at the optimal parameters to re-load globals at the optimal
          parameter values.
        - Before being saved to a Matlab data file, matrices were renamed and reshaped.

    """
    product_data = np.recfromcsv(BLP_PRODUCTS_LOCATION)
    product_data = {n: product_data[n] for n in product_data.dtype.names}
    product_data['demand_instruments'] = build_blp_instruments(
        Formulation('hpwt + air + mpg + space'), product_data)
    problem = Problem(product_formulations=(
        Formulation('0 + prices + I(1) + hpwt + air + mpg + space'),
        Formulation('0 + prices + I(1) + hpwt + air + mpg')),
                      product_data=product_data,
                      agent_data=np.recfromcsv(BLP_AGENTS_LOCATION))
    return scipy.io.loadmat(
        str(TEST_DATA_PATH / 'knittel_metaxoglou_2014.mat'),
        {'problem': problem})
Exemple #10
0
def small_blp_simulation() -> SimulationFixture:
    """Solve a simulation with three markets, linear prices, a linear/nonlinear characteristic, two cost
    characteristics, and an acquisition.
    """
    id_data = build_id_data(T=3, J=18, F=3, mergers=[{1: 0}])
    simulation = Simulation(
        product_formulations=(
            Formulation('0 + prices + x'),
            Formulation('0 + x'),
            Formulation('0 + a + b')
        ),
        beta=[-5, 1],
        sigma=2,
        gamma=[2, 1],
        product_data={
            'market_ids': id_data.market_ids,
            'firm_ids': id_data.firm_ids,
            'clustering_ids': np.random.RandomState(0).choice(range(10), id_data.size)
        },
        integration=Integration('product', 3),
        xi_variance=0.001,
        omega_variance=0.001,
        correlation=0.7,
        seed=0
    )
    return simulation, simulation.solve()
Exemple #11
0
def small_nested_logit_simulation() -> SimulationFixture:
    """Solve a simulation with four markets, linear prices, two linear characteristics, two cost characteristics, and
    two nesting groups with different nesting parameters
    """
    id_data = build_id_data(T=4, J=18, F=3)
    simulation = Simulation(
        product_formulations=(Formulation('0 + prices + x + y'), None,
                              Formulation('0 + a + b')),
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'nesting_ids':
            np.random.RandomState(0).choice(['f', 'g'], id_data.size),
            'clustering_ids':
            np.random.RandomState(0).choice(range(10), id_data.size)
        },
        beta=[-5, 1, 1],
        gamma=[2, 1],
        rho=[0.1, 0.2],
        xi_variance=0.001,
        omega_variance=0.001,
        correlation=0.7,
        seed=0)
    simulation_results = simulation.replace_endogenous()
    return simulation, simulation_results, {}, []
Exemple #12
0
def small_blp_simulation() -> SimulationFixture:
    """Solve a simulation with three markets, linear prices, a linear/nonlinear characteristic, two cost
    characteristics, and uniform unobserved product characteristics.
    """
    id_data = build_id_data(T=3, J=18, F=3)
    uniform = 0.001 * np.random.RandomState(0).uniform(size=(id_data.size, 3))
    simulation = Simulation(
        product_formulations=(Formulation('0 + prices + x'),
                              Formulation('0 + x'), Formulation('0 + a + b')),
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'clustering_ids':
            np.random.RandomState(0).choice(range(10), id_data.size)
        },
        beta=[-5, 1],
        sigma=2,
        gamma=[2, 1],
        integration=Integration('product', 3),
        xi=uniform[:, 0] + uniform[:, 1],
        omega=uniform[:, 0] + uniform[:, 2],
        seed=0)
    simulation_results = simulation.replace_endogenous()
    return simulation, simulation_results, {}, []
Exemple #13
0
def small_simulation():
    """Solve a simulation with two markets, linear prices, a nonlinear characteristic, a cost characteristic, and an
    acquisition.
    """
    simulation = Simulation(product_formulations=(Formulation('0 + prices'),
                                                  Formulation('0 + x'),
                                                  Formulation('0 + a')),
                            beta=-5,
                            sigma=1,
                            gamma=2,
                            product_data=build_id_data(T=2,
                                                       J=18,
                                                       F=3,
                                                       mergers=[{
                                                           1: 0
                                                       }]),
                            integration=Integration('product', 3),
                            xi_variance=0.001,
                            omega_variance=0.001,
                            correlation=0.7,
                            seed=0)
    clustering_ids = np.random.choice(['a', 'b'], simulation.N)
    product_data = np.lib.recfunctions.rec_append_fields(
        simulation.solve(), 'clustering_ids', clustering_ids)
    return simulation, product_data
Exemple #14
0
def large_logit_simulation() -> SimulationFixture:
    """Solve a simulation with ten markets, a linear constant, linear prices, a linear/cost characteristic, another two
    linear characteristics, another two cost characteristics, and a quantity-dependent, log-linear cost specification.
    """
    id_data = build_id_data(T=10, J=20, F=9)
    simulation = Simulation(product_formulations=(
        Formulation('1 + prices + x + y + z'), None,
        Formulation('0 + log(x) + a + b + I(0.5 * shares)')),
                            product_data={
                                'market_ids':
                                id_data.market_ids,
                                'firm_ids':
                                id_data.firm_ids,
                                'clustering_ids':
                                np.random.RandomState(2).choice(
                                    range(30), id_data.size)
                            },
                            beta=[1, -6, 1, 2, 3],
                            gamma=[0.1, 0.2, 0.3, -0.2],
                            xi_variance=0.00001,
                            omega_variance=0.00001,
                            correlation=0.1,
                            costs_type='log',
                            seed=2)
    simulation_results = simulation.replace_endogenous(constant_costs=False)
    return simulation, simulation_results, {}, []
def test_ids(
        formula_data: Data, formulas: Iterable[str],
        build_columns: Callable[[Mapping[str, Array]],
                                Sequence[Array]]) -> None:
    """Test that equivalent formulas build IDs as expected."""

    # create convenience columns of tuples of categorical variables
    formula_data = copy.deepcopy(formula_data)
    for (key1, values1), (key2, values2), (key3, values3) in itertools.product(
            formula_data.items(), repeat=3):
        key12 = f'{key1}{key2}'
        key123 = f'{key1}{key2}{key3}'
        if key12 not in formula_data:
            values12 = np.empty_like(values1, np.object_)
            values12[:] = list(zip(values1, values2))
            formula_data[key12] = values12
        if key123 not in formula_data:
            values123 = np.empty_like(values1, np.object_)
            values123[:] = list(zip(values1, values2, values3))
            formula_data[key123] = values123

    # build and compare columns for each formula, making sure that it can be formatted
    for absorb in formulas:
        formulation = Formulation('x', absorb)
        assert str(formulation)
        ids = formulation._build_ids(formula_data)
        expected_ids = np.column_stack(build_columns(formula_data))
        np.testing.assert_array_equal(ids, expected_ids, err_msg=absorb)
Exemple #16
0
def medium_simulation():
    """Solve a simulation with four markets, a nonlinear/cost constant, two linear characteristics, two cost
    characteristics, a demographic interacted with prices, a double acquisition, and a non-standard ownership structure.
    """
    id_data = build_id_data(T=4, J=25, F=6, mergers=[{f: 2 for f in range(2)}])
    simulation = Simulation(
        product_formulations=(
            Formulation('0 + x + y'),
            Formulation('1 + prices'),
            Formulation('1 + a + b')
        ),
        beta=[2, 1],
        sigma=[
            [0.5, 0],
            [0,   0],
        ],
        gamma=[1, 1, 2],
        product_data={
            'market_ids': id_data.market_ids,
            'firm_ids': id_data.firm_ids,
            'ownership': build_ownership(id_data, lambda f, g: 1 if f == g else (0.1 if f > 3 and g > 3 else 0))
        },
        agent_formulation=Formulation('0 + f'),
        pi=[
            [ 0],
            [-3]
        ],
        integration=Integration('product', 4),
        xi_variance=0.0001,
        omega_variance=0.0001,
        correlation=0.8,
        seed=1
    )
    return simulation, simulation.solve()
Exemple #17
0
def test_invalid_formula(formula_data, formula):
    """Test that an invalid formula gives rise to an exception."""
    try:
        formulation = Formulation(formula)
        formulation._build(formula_data)
    except:
        return
    raise RuntimeError(
        f"The formula '{formula}' was successfully formulated as '{formulation}'."
    )
Exemple #18
0
def medium_blp_simulation() -> SimulationFixture:
    """Solve a simulation with four markets, linear/nonlinear/cost constants, two linear characteristics, two cost
    characteristics, a demographic interacted with second-degree prices, an alternative ownership structure, and a
    scaled epsilon.
    """
    id_data = build_id_data(T=10, J=25, F=6)
    simulation = Simulation(
        product_formulations=(Formulation('1 + x + prices'),
                              Formulation('1 + I(prices**2)'),
                              Formulation('1 + a + b')),
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'clustering_ids':
            np.random.RandomState(1).choice(range(20), id_data.size),
            'ownership':
            build_ownership(
                id_data, lambda f, g: 1
                if f == g else (0.1 if f > 3 and g > 3 else 0))
        },
        beta=[1, 2, -3],
        sigma=[
            [0.5, 0],
            [0.0, 0],
        ],
        pi=[[+0.0], [-0.1]],
        gamma=[1, 1, 2],
        agent_formulation=Formulation('0 + f'),
        integration=Integration('product', 4),
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.8,
        epsilon_scale=0.7,
        seed=1,
    )
    simulation_results = simulation.replace_endogenous()

    simulated_micro_moments = simulation_results.replace_micro_moment_values([
        MicroMoment(
            name="demographic interaction",
            dataset=MicroDataset(
                name="inside",
                observations=simulation.N,
                compute_weights=lambda _, p, a: np.ones((a.size, p.size)),
                market_ids=[simulation.unique_market_ids[2]],
            ),
            value=0,
            compute_values=lambda _, p, a: p.X2[:, [0]].T * a.
            demographics[:, [0]],
        )
    ])

    return simulation, simulation_results, {}, simulated_micro_moments
Exemple #19
0
def large_blp_simulation() -> SimulationFixture:
    """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, log-linear
    coefficients on prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two
    cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, dense
    parameter matrices, a log-linear cost specification, and local differentiation instruments.
    """
    id_data = build_id_data(T=20, J=20, F=9)
    keep = np.arange(id_data.size)
    np.random.RandomState(0).shuffle(keep)
    id_data = id_data[keep[:int(0.5 * id_data.size)]]
    simulation = Simulation(
        product_formulations=(Formulation('1 + x + y + z + q'),
                              Formulation('0 + I(-prices) + x'),
                              Formulation('0 + log(x) + log(a) + log(b)')),
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'clustering_ids':
            np.random.RandomState(2).choice(range(30), id_data.size)
        },
        beta=[1, 1, 2, 3, 1],
        sigma=[[+0.5, 0], [-0.1, 2]],
        pi=[[2, 1, 0], [0, 0, 2]],
        gamma=[0.1, 0.2, 0.3],
        agent_formulation=Formulation('1 + f + g'),
        integration=Integration('product', 4),
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.9,
        distributions=['lognormal', 'normal'],
        costs_type='log',
        seed=2)
    simulation_results = simulation.replace_endogenous()
    simulated_data_override = {
        'demand_instruments':
        np.c_[build_differentiation_instruments(
            Formulation('0 + x + y + z + q'), simulation_results.product_data),
              build_matrix(Formulation('0 + a + b'), simulation_results.
                           product_data)],
        'supply_instruments':
        np.c_[build_differentiation_instruments(
            Formulation('0 + x + a + b'), simulation_results.product_data),
              build_matrix(Formulation('0 + y + z + q'), simulation_results.
                           product_data)]
    }
    simulated_micro_moments = [
        FirstChoiceCovarianceMoment(
            X2_index=1,
            demographics_index=1,
            value=0,
            market_ids=simulation.unique_market_ids[:5]),
        FirstChoiceCovarianceMoment(
            X2_index=0,
            demographics_index=1,
            value=0,
            market_ids=simulation.unique_market_ids[-3:])
    ]
    return simulation, simulation_results, simulated_data_override, simulated_micro_moments
Exemple #20
0
def large_nested_blp_simulation() -> SimulationFixture:
    """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, log-normal
    coefficients on prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two
    cost characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, three
    nesting groups with the same nesting parameter, and a log-linear cost specification.
    """
    id_data = build_id_data(T=20, J=20, F=9)

    keep = np.arange(id_data.size)
    np.random.RandomState(0).shuffle(keep)
    id_data = id_data[keep[:int(0.5 * id_data.size)]]

    simulation = Simulation(
        product_formulations=(
            Formulation('1 + x + y + z + q'),
            Formulation('0 + I(-prices) + x'),
            Formulation('0 + log(x) + log(a) + log(b)')
        ),
        product_data={
            'market_ids': id_data.market_ids,
            'firm_ids': id_data.firm_ids,
            'nesting_ids': np.random.RandomState(2).choice(['f', 'g', 'h'], id_data.size),
            'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size)
        },
        beta=[1, 1, 2, 3, 1],
        sigma=[
            [0.5, 0],
            [0.0, 2]
        ],
        pi=[
            [2, 1, 0],
            [0, 0, 2]
        ],
        gamma=[0.1, 0.2, 0.3],
        rho=0.1,
        agent_formulation=Formulation('1 + f + g'),
        integration=Integration('product', 4),
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.9,
        distributions=['lognormal', 'normal'],
        costs_type='log',
        seed=2,
    )
    simulation_results = simulation.replace_endogenous()
    simulated_micro_moments = [DemographicExpectationMoment(
        product_id=None, demographics_index=1, value=0, market_ids=simulation.unique_market_ids[3:5]
    )]
    return simulation, simulation_results, {}, simulated_micro_moments
Exemple #21
0
def medium_blp_simulation() -> SimulationFixture:
    """Solve a simulation with four markets, linear/nonlinear/cost constants, two linear characteristics, two cost
    characteristics, a demographic interacted with second-degree prices, an alternative ownership structure, and a
    scaled epsilon.
    """
    id_data = build_id_data(T=4, J=25, F=6)
    simulation = Simulation(
        product_formulations=(Formulation('1 + x + y'),
                              Formulation('1 + I(prices**2)'),
                              Formulation('1 + a + b')),
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'clustering_ids':
            np.random.RandomState(1).choice(range(20), id_data.size),
            'ownership':
            build_ownership(
                id_data, lambda f, g: 1
                if f == g else (0.1 if f > 3 and g > 3 else 0))
        },
        beta=[1, 2, 1],
        sigma=[
            [0.5, 0],
            [0.0, 0],
        ],
        pi=[[+0], [-3]],
        gamma=[1, 1, 2],
        agent_formulation=Formulation('0 + f'),
        integration=Integration('product', 4),
        xi_variance=0.0001,
        omega_variance=0.0001,
        correlation=0.8,
        epsilon_scale=0.7,
        seed=1,
    )
    simulation_results = simulation.replace_endogenous()
    simulated_micro_moments = [
        DemographicCovarianceMoment(
            X2_index=0,
            demographics_index=0,
            value=0,
            observations=simulation.N,
            market_ids=[simulation.unique_market_ids[2]])
    ]
    return simulation, simulation_results, {}, simulated_micro_moments
Exemple #22
0
def large_nested_blp_simulation() -> SimulationFixture:
    """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, linear/nonlinear
    prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two cost
    characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, three nesting
    groups with the same nesting parameter, and a log-linear cost specification.
    """
    id_data = build_id_data(T=20, J=20, F=9)
    keep = np.arange(id_data.size)
    np.random.RandomState(0).shuffle(keep)
    id_data = id_data[keep[:int(0.5 * id_data.size)]]
    simulation = Simulation(
        product_formulations=(Formulation('1 + prices + x + y + z + q'),
                              Formulation('0 + prices + x'),
                              Formulation('0 + log(x) + log(a) + log(b)')),
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'nesting_ids':
            np.random.RandomState(2).choice(['f', 'g', 'h'], id_data.size),
            'clustering_ids':
            np.random.RandomState(2).choice(range(30), id_data.size)
        },
        beta=[1, -10, 1, 2, 3, 1],
        sigma=[[1, 0], [0, 2]],
        pi=[[1, 0], [0, 2]],
        gamma=[0.1, 0.2, 0.3],
        rho=0.1,
        agent_formulation=Formulation('0 + f + g'),
        integration=Integration('product', 4),
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.9,
        costs_type='log',
        seed=2)
    simulation_results = simulation.solve()
    simulated_micro_moments = [
        ProductsAgentsCovarianceMoment(X2_index=0,
                                       demographics_index=0,
                                       value=0),
        ProductsAgentsCovarianceMoment(X2_index=1,
                                       demographics_index=1,
                                       value=0)
    ]
    return simulation, simulation_results, simulated_micro_moments
Exemple #23
0
def small_simulation():
    """Solve a simulation with two markets, linear prices, a nonlinear characteristic, a cost characteristic, and an
    acquisition.
    """
    simulation = Simulation(
        product_formulations=(
            Formulation('0 + prices'),
            Formulation('0 + x'),
            Formulation('0 + a')
        ),
        beta=-5,
        sigma=1,
        gamma=2,
        product_data=build_id_data(T=2, J=18, F=3, mergers=[{1: 0}]),
        integration=Integration('product', 3),
        xi_variance=0.001,
        omega_variance=0.001,
        correlation=0.7,
        seed=0
    )
    return simulation, simulation.solve()
Exemple #24
0
def test_optimal_instruments(simulated_problem: SimulatedProblemFixture,
                             compute_options: Options) -> None:
    """Test that starting parameters that are half their true values also give rise to errors of less than 10% under
    optimal instruments.
    """
    simulation, product_data, problem, solve_options, problem_results = simulated_problem

    # make product data mutable
    product_data = {k: product_data[k] for k in product_data.dtype.names}

    # split apart the full set of demand-side instruments so they can be included in formulations
    ZD_names: List[str] = []
    for index, instrument in enumerate(problem.products.ZD.T):
        name = f'ZD{index}'
        product_data[name] = instrument
        ZD_names.append(name)

    # without a supply side, compute expected prices with a reduced form regression on all instruments
    expected_prices = None
    if problem.K3 == 0:
        ZD_formula = ' + '.join(ZD_names)
        expected_prices = compute_fitted_values(
            product_data['prices'], Formulation(f'0 + {ZD_formula}'),
            product_data)

    # compute optimal instruments and update the problem (only use a few draws to speed up the test)
    compute_options = compute_options.copy()
    compute_options.update({
        'draws': 5,
        'seed': 0,
        'expected_prices': expected_prices
    })
    new_problem = problem_results.compute_optimal_instruments(
        **compute_options).to_problem()

    # update the default options and solve the problem
    updated_solve_options = solve_options.copy()
    updated_solve_options.update(
        {k: 0.5 * solve_options[k]
         for k in ['sigma', 'pi', 'rho', 'beta']})
    new_results = new_problem.solve(**updated_solve_options)

    # test the accuracy of the estimated parameters
    keys = ['beta', 'sigma', 'pi', 'rho']
    if problem.K3 > 0:
        keys.append('gamma')
    for key in keys:
        np.testing.assert_allclose(getattr(simulation, key),
                                   getattr(new_results, key),
                                   atol=0,
                                   rtol=0.1,
                                   err_msg=key)
Exemple #25
0
def large_nested_blp_simulation() -> SimulationFixture:
    """Solve a simulation with ten markets, a linear constant, linear/nonlinear prices, a linear/nonlinear/cost
    characteristic, another three linear characteristics, another four cost characteristics, demographics interacted
    with prices and the linear/nonlinear/cost characteristic, three nesting groups with the same nesting parameter, an
    acquisition, a triple acquisition, and a log-linear cost specification.
    """
    id_data = build_id_data(T=10, J=20, F=9, mergers=[{f: 4 + int(f > 0) for f in range(4)}])
    simulation = Simulation(
        product_formulations=(
            Formulation('1 + prices + x + y + z + q'),
            Formulation('0 + prices + x'),
            Formulation('0 + log(x) + a + b + c + d')
        ),
        beta=[1, -10, 1, 2, 3, 1],
        sigma=[
            [1, 0],
            [0, 2]
        ],
        gamma=[0.1, 0.2, 0.3, 0.1, 0.3],
        product_data={
            'market_ids': id_data.market_ids,
            'firm_ids': id_data.firm_ids,
            'nesting_ids': np.random.RandomState(2).choice(['f', 'g', 'h'], id_data.size),
            'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size)
        },
        agent_formulation=Formulation('0 + f + g'),
        pi=[
            [1, 0],
            [0, 2]
        ],
        integration=Integration('product', 4),
        rho=0.05,
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.9,
        costs_type='log',
        seed=2
    )
    return simulation, simulation.solve()
Exemple #26
0
def test_invalid_formulas(formula_data, exception, formula, absorb):
    """Test that an invalid formula gives rise to an exception."""
    try:
        formulation = Formulation(formula, absorb)
        formulation._build_matrix(formula_data)
        if absorb is not None:
            formulation._build_ids(formula_data)
    except exception:
        print(traceback.format_exc())
        return
    raise RuntimeError(f"Successful formulation: {formulation}.")
Exemple #27
0
def test_valid_formulas(formula_data, formulas, build_columns,
                        build_derivatives):
    """Test that equivalent formulas build columns and derivatives as expected. Take derivatives with respect to x."""

    # construct convenience columns of ones and zeros
    ones = np.ones_like(formula_data['x'])
    zeros = np.zeros_like(formula_data['x'])

    # build columns and derivatives for each formula
    for formula in formulas:
        matrix, column_formulations, underlying_data = Formulation(
            formula)._build(formula_data)
        evaluated_matrix = np.column_stack(
            [ones * f.evaluate(underlying_data) for f in column_formulations])
        derivatives = np.column_stack([
            ones * f.evaluate_derivative('x', underlying_data)
            for f in column_formulations
        ])

        # build expected columns and derivatives
        supplemented_data = {'1': ones, '0': zeros, **underlying_data}
        expected_matrix = np.column_stack(build_columns(supplemented_data))
        expected_derivatives = np.column_stack(
            build_derivatives(supplemented_data))

        # compare columns and derivatives
        np.testing.assert_allclose(matrix,
                                   expected_matrix,
                                   rtol=0,
                                   atol=1e-14,
                                   err_msg=formula)
        np.testing.assert_allclose(matrix,
                                   evaluated_matrix,
                                   rtol=0,
                                   atol=1e-14,
                                   err_msg=formula)
        np.testing.assert_allclose(derivatives,
                                   expected_derivatives,
                                   rtol=0,
                                   atol=1e-14,
                                   err_msg=formula)
Exemple #28
0
def large_blp_simulation() -> SimulationFixture:
    """Solve a simulation with 20 markets, varying numbers of products per market, a linear constant, linear/nonlinear
    prices, a linear/nonlinear/cost characteristic, another three linear characteristics, another two cost
    characteristics, demographics interacted with prices and the linear/nonlinear/cost characteristic, dense parameter
    matrices, a log-linear cost specification, and local differentiation instruments on the demand side.
    """
    id_data = build_id_data(T=20, J=20, F=9)
    keep = np.arange(id_data.size)
    np.random.RandomState(0).shuffle(keep)
    id_data = id_data[keep[:int(0.5 * id_data.size)]]
    simulation = Simulation(
        product_formulations=(Formulation('1 + prices + x + y + z + q'),
                              Formulation('0 + prices + x'),
                              Formulation('0 + log(x) + log(a) + log(b)')),
        beta=[1, -10, 1, 2, 3, 1],
        sigma=[[1, -0.1], [0, +2.0]],
        gamma=[0.1, 0.2, 0.3],
        product_data={
            'market_ids':
            id_data.market_ids,
            'firm_ids':
            id_data.firm_ids,
            'clustering_ids':
            np.random.RandomState(2).choice(range(30), id_data.size)
        },
        agent_formulation=Formulation('0 + f + g'),
        pi=[[1, 0], [0, 2]],
        integration=Integration('product', 4),
        xi_variance=0.00001,
        omega_variance=0.00001,
        correlation=0.9,
        costs_type='log',
        seed=2)
    simulation_results = simulation.solve()
    differentiation_instruments = np.c_[
        build_differentiation_instruments(Formulation('0 + x + y + z + q'),
                                          simulation_results.product_data),
        build_matrix(Formulation('0 + a + b'), simulation_results.product_data
                     )]
    simulation_results.product_data = update_matrices(
        simulation_results.product_data, {
            'demand_instruments':
            (differentiation_instruments,
             simulation_results.product_data.demand_instruments.dtype)
        })
    return simulation, simulation_results
Exemple #29
0
def test_ids(formula_data, formulas, build_columns):
    """Test that equivalent formulas build IDs as expected."""

    # create convenience columns of tuples of categorical variables
    old_formula_data = formula_data.copy()
    for (key1, values1), (key2, values2), (key3, values3) in itertools.product(
            old_formula_data.items(), repeat=3):
        key12 = f'{key1}{key2}'
        key123 = f'{key1}{key2}{key3}'
        if key12 not in formula_data:
            values12 = np.empty_like(values1, np.object)
            values12[:] = list(zip(values1, values2))
            formula_data[key12] = values12
        if key123 not in formula_data:
            values123 = np.empty_like(values1, np.object)
            values123[:] = list(zip(values1, values2, values3))
            formula_data[key123] = values123

    # build and compare columns for each formula
    for absorb in formulas:
        ids = Formulation('', absorb)._build_ids(formula_data)
        expected_ids = np.column_stack(build_columns(formula_data))
        np.testing.assert_array_equal(ids, expected_ids, err_msg=absorb)
Exemple #30
0
def test_fixed_effects(simulated_problem: SimulatedProblemFixture, ED: int,
                       ES: int,
                       absorb_method: Optional[Union[str, Iteration]]) -> None:
    """Test that absorbing different numbers of demand- and supply-side fixed effects gives rise to essentially
    identical first-stage results as does including indicator variables. Also test that optimal instruments results
    and marginal costs remain unchanged.
    """
    simulation, simulation_results, problem, solve_options, problem_results = simulated_problem

    # there cannot be supply-side fixed effects if there isn't a supply side
    if problem.K3 == 0:
        ES = 0
    if ED == ES == 0:
        return

    # make product data mutable
    product_data = {
        k: simulation_results.product_data[k]
        for k in simulation_results.product_data.dtype.names
    }

    # remove constants and delete associated elements in the initial beta
    solve_options = solve_options.copy()
    product_formulations = list(problem.product_formulations).copy()
    if ED > 0:
        assert product_formulations[0] is not None
        constant_indices = [
            i for i, e in enumerate(product_formulations[0]._expressions)
            if not e.free_symbols
        ]
        solve_options['beta'] = np.delete(solve_options['beta'],
                                          constant_indices,
                                          axis=0)
        product_formulations[0] = Formulation(
            f'{product_formulations[0]._formula} - 1')
    if ES > 0:
        assert product_formulations[2] is not None
        product_formulations[2] = Formulation(
            f'{product_formulations[2]._formula} - 1')

    # add fixed effect IDs to the data
    demand_id_names: List[str] = []
    supply_id_names: List[str] = []
    state = np.random.RandomState(seed=0)
    for side, count, names in [('demand', ED, demand_id_names),
                               ('supply', ES, supply_id_names)]:
        for index in range(count):
            name = f'{side}_ids{index}'
            ids = state.choice(['a', 'b', 'c'], problem.N)
            product_data[name] = ids
            names.append(name)

    # split apart excluded demand-side instruments so they can be included in formulations
    instrument_names: List[str] = []
    for index, instrument in enumerate(product_data['demand_instruments'].T):
        name = f'demand_instrument{index}'
        product_data[name] = instrument
        instrument_names.append(name)

    # build formulas for the IDs
    demand_id_formula = ' + '.join(demand_id_names)
    supply_id_formula = ' + '.join(supply_id_names)

    # solve the first stage of a problem in which the fixed effects are absorbed
    solve_options1 = solve_options.copy()
    product_formulations1 = product_formulations.copy()
    if ED > 0:
        assert product_formulations[0] is not None
        product_formulations1[0] = Formulation(
            product_formulations[0]._formula, demand_id_formula, absorb_method)
    if ES > 0:
        assert product_formulations[2] is not None
        product_formulations1[2] = Formulation(
            product_formulations[2]._formula, supply_id_formula, absorb_method)
    problem1 = Problem(product_formulations1, product_data,
                       problem.agent_formulation, simulation.agent_data)
    problem_results1 = problem1.solve(**solve_options1)

    # solve the first stage of a problem in which fixed effects are included as indicator variables
    solve_options2 = solve_options.copy()
    product_formulations2 = product_formulations.copy()
    if ED > 0:
        assert product_formulations[0] is not None
        product_formulations2[0] = Formulation(
            f'{product_formulations[0]._formula} + {demand_id_formula}')
    if ES > 0:
        assert product_formulations[2] is not None
        product_formulations2[2] = Formulation(
            f'{product_formulations[2]._formula} + {supply_id_formula}')
    problem2 = Problem(product_formulations2, product_data,
                       problem.agent_formulation, simulation.agent_data)
    solve_options2['beta'] = np.r_[solve_options2['beta'],
                                   np.full((problem2.K1 -
                                            solve_options2['beta'].size,
                                            1), np.nan)]
    problem_results2 = problem2.solve(**solve_options2)

    # solve the first stage of a problem in which some fixed effects are absorbed and some are included as indicators
    if ED == ES == 0:
        problem_results3 = problem_results2
    else:
        solve_options3 = solve_options.copy()
        product_formulations3 = product_formulations.copy()
        if ED > 0:
            assert product_formulations[0] is not None
            product_formulations3[0] = Formulation(
                f'{product_formulations[0]._formula} + {demand_id_names[0]}',
                ' + '.join(demand_id_names[1:]) or None)
        if ES > 0:
            assert product_formulations[2] is not None
            product_formulations3[2] = Formulation(
                f'{product_formulations[2]._formula} + {supply_id_names[0]}',
                ' + '.join(supply_id_names[1:]) or None)
        problem3 = Problem(product_formulations3, product_data,
                           problem.agent_formulation, simulation.agent_data)
        solve_options3['beta'] = np.r_[solve_options3['beta'],
                                       np.full((problem3.K1 -
                                                solve_options3['beta'].size,
                                                1), np.nan)]
        problem_results3 = problem3.solve(**solve_options3)

    # compute optimal instruments (use only two draws for speed; accuracy is not a concern here)
    Z_results1 = problem_results1.compute_optimal_instruments(draws=2, seed=0)
    Z_results2 = problem_results2.compute_optimal_instruments(draws=2, seed=0)
    Z_results3 = problem_results3.compute_optimal_instruments(draws=2, seed=0)

    # compute marginal costs
    costs1 = problem_results1.compute_costs()
    costs2 = problem_results2.compute_costs()
    costs3 = problem_results3.compute_costs()

    # choose tolerances (be more flexible with iterative de-meaning)
    atol = 1e-8
    rtol = 1e-5
    if ED > 2 or ES > 2 or isinstance(absorb_method, Iteration):
        atol *= 10
        rtol *= 10

    # test that all problem results expected to be identical are essentially identical
    problem_results_keys = [
        'theta', 'sigma', 'pi', 'rho', 'beta', 'gamma', 'sigma_se', 'pi_se',
        'rho_se', 'beta_se', 'gamma_se', 'delta', 'tilde_costs', 'xi', 'omega',
        'xi_by_theta_jacobian', 'omega_by_theta_jacobian', 'objective',
        'gradient', 'gradient_norm', 'sigma_gradient', 'pi_gradient',
        'rho_gradient', 'beta_gradient', 'gamma_gradient'
    ]
    for key in problem_results_keys:
        result1 = getattr(problem_results1, key)
        result2 = getattr(problem_results2, key)
        result3 = getattr(problem_results3, key)
        if key in {
                'beta', 'gamma', 'beta_se', 'gamma_se', 'beta_gradient',
                'gamma_gradient'
        }:
            result2 = result2[:result1.size]
            result3 = result3[:result1.size]
        np.testing.assert_allclose(result1,
                                   result2,
                                   atol=atol,
                                   rtol=rtol,
                                   err_msg=key)
        np.testing.assert_allclose(result1,
                                   result3,
                                   atol=atol,
                                   rtol=rtol,
                                   err_msg=key)

    # test that all optimal instrument results expected to be identical are essentially identical
    Z_results_keys = [
        'demand_instruments', 'supply_instruments',
        'inverse_covariance_matrix', 'expected_xi_by_theta_jacobian',
        'expected_omega_by_theta_jacobian'
    ]
    for key in Z_results_keys:
        result1 = getattr(Z_results1, key)
        result2 = getattr(Z_results2, key)
        result3 = getattr(Z_results3, key)
        np.testing.assert_allclose(result1,
                                   result2,
                                   atol=atol,
                                   rtol=rtol,
                                   err_msg=key)
        np.testing.assert_allclose(result1,
                                   result3,
                                   atol=atol,
                                   rtol=rtol,
                                   err_msg=key)

    # test that marginal costs are essentially identical
    np.testing.assert_allclose(costs1, costs2, atol=atol, rtol=rtol)
    np.testing.assert_allclose(costs1, costs3, atol=atol, rtol=rtol)