예제 #1
0
파일: conftest.py 프로젝트: yuminliut/pyblp
def simulated_problem(request):
    """Configure and solve a simulated problem, either with or without supply-side data."""
    name, supply = request.param
    simulation, product_data = request.getfixturevalue(f'{name}_simulation')
    product_formulations = simulation.product_formulations
    if not supply:
        product_data = np.lib.recfunctions.drop_fields(product_data, 'supply_instruments')
        product_formulations = product_formulations[:2]
    problem = Problem(product_formulations, product_data, simulation.agent_formulation, simulation.agent_data)
    results = problem.solve(simulation.sigma, simulation.pi, steps=1, linear_costs=simulation.linear_costs)
    return simulation, product_data, problem, results
예제 #2
0
def knittel_metaxoglou_2014():
    """Configure the example automobile problem from Knittel and Metaxoglou (2014) and load initial parameter values and
    estimates created by replication code.

    The replication code was modified to output a Matlab data file for the automobile dataset, which contains the
    results of one round of Knitro optimization and post-estimation calculations. The replication code was kept mostly
    intact, but was modified slightly in the following ways:

        - Tolerance parameters, Knitro optimization parameters, and starting values for sigma were all configured.
        - A bug in the code's computation of the BLP instruments was fixed. When creating a vector of "other" and
          "rival" sums, the code did not specify a dimension over which to sum, which created problems with one-
          dimensional vectors. A dimension of 1 was added to both sum commands.
        - Delta was initialized as the solution to the Logit model.
        - After estimation, the objective was called again at the optimal parameters to re-load globals at the optimal
          parameter values.
        - Before being saved to a Matlab data file, matrices were renamed and reshaped.

    """
    product_data = np.recfromcsv(BLP_PRODUCTS_LOCATION)
    product_data = {n: product_data[n] for n in product_data.dtype.names}
    product_data['demand_instruments'] = build_blp_instruments(
        Formulation('hpwt + air + mpg + space'), product_data)
    problem = Problem(product_formulations=(
        Formulation('0 + prices + I(1) + hpwt + air + mpg + space'),
        Formulation('0 + prices + I(1) + hpwt + air + mpg')),
                      product_data=product_data,
                      agent_data=np.recfromcsv(BLP_AGENTS_LOCATION))
    return scipy.io.loadmat(
        str(TEST_DATA_PATH / 'knittel_metaxoglou_2014.mat'),
        {'problem': problem})
예제 #3
0
def test_extra_demographics(
        simulated_problem: SimulatedProblemFixture) -> None:
    """Test that agents in a simulated problem are identical to agents in a problem created with agent data built
    according to the same integration specification and but containing unnecessary rows of demographics.
    """
    simulation, simulation_results, problem, _, _ = simulated_problem

    # skip simulations without demographics
    if simulation.D == 0:
        return

    # reconstruct the problem with unnecessary rows of demographics
    assert simulation.agent_data is not None
    product_data = simulation_results.product_data
    agent_data = simulation.agent_data
    extra_agent_data = {
        k: np.r_[agent_data[k], agent_data[k]]
        for k in agent_data.dtype.names
    }
    new_problem = Problem(problem.product_formulations, product_data,
                          problem.agent_formulation, extra_agent_data,
                          simulation.integration)

    # test that the agents are essentially identical
    for key in problem.agents.dtype.names:
        if problem.agents[key].dtype != np.object:
            np.testing.assert_allclose(problem.agents[key],
                                       new_problem.agents[key],
                                       atol=1e-14,
                                       rtol=0,
                                       err_msg=key)
예제 #4
0
def test_extra_nodes(simulated_problem: SimulatedProblemFixture) -> None:
    """Test that agents in a simulated problem are identical to agents in a problem created with agent data built
    according to the same integration specification but containing unnecessary columns of nodes.
    """
    simulation, simulation_results, problem, _, _ = simulated_problem

    # skip simulations without agents
    if simulation.K2 == 0:
        return

    # reconstruct the problem with unnecessary columns of nodes
    assert simulation.agent_data is not None
    product_data = simulation_results.product_data
    extra_agent_data = {
        k: simulation.agent_data[k]
        for k in simulation.agent_data.dtype.names
    }
    extra_agent_data['nodes'] = np.c_[extra_agent_data['nodes'],
                                      extra_agent_data['nodes']]
    new_problem = Problem(problem.product_formulations, product_data,
                          problem.agent_formulation, extra_agent_data)

    # test that the agents are essentially identical
    for key in problem.agents.dtype.names:
        if problem.agents[key].dtype != np.object:
            np.testing.assert_allclose(problem.agents[key],
                                       new_problem.agents[key],
                                       atol=1e-14,
                                       rtol=0,
                                       err_msg=key)
예제 #5
0
def test_extra_demographics(simulated_problem):
    """Test that agents in a simulated problem are identical to agents in a problem created with agent data built
    according to the same integration specification and but containing unnecessary rows of demographics.
    """
    simulation, product_data, problem1, _ = simulated_problem

    # skip simulations without demographics
    if simulation.D == 0:
        return

    # reconstruct the problem with unnecessary rows of demographics
    problem2 = Problem(
        problem1.product_formulations, product_data,
        problem1.agent_formulation, {
            k: np.r_[simulation.agent_data[k], simulation.agent_data[k]]
            for k in simulation.agent_data.dtype.names
        }, simulation.integration)

    # test that the agents are essentially identical
    for key in problem1.agents.dtype.names:
        if np.issubdtype(problem1.agents.dtype[key], options.dtype):
            values1 = problem1.agents[key]
            values2 = problem2.agents[key]
            np.testing.assert_allclose(values1,
                                       values2,
                                       atol=1e-14,
                                       rtol=0,
                                       err_msg=key)
예제 #6
0
def test_extra_nodes(simulated_problem):
    """Test that agents in a simulated problem are identical to agents in a problem created with agent data built
    according to the same integration specification but containing unnecessary columns of nodes.
    """
    simulation, product_data, problem1, _ = simulated_problem

    # reconstruct the problem with unnecessary columns of nodes
    agent_data2 = {
        k: simulation.agent_data[k]
        for k in simulation.agent_data.dtype.names
    }
    agent_data2['nodes'] = np.c_[agent_data2['nodes'], agent_data2['nodes']]
    problem2 = Problem(problem1.product_formulations, product_data,
                       problem1.agent_formulation, agent_data2)

    # test that the agents are essentially identical
    for key in problem1.agents.dtype.names:
        if np.issubdtype(problem1.agents.dtype[key], options.dtype):
            values1 = problem1.agents[key]
            values2 = problem2.agents[key]
            np.testing.assert_allclose(values1,
                                       values2,
                                       atol=1e-14,
                                       rtol=0,
                                       err_msg=key)
예제 #7
0
def test_fixed_effects(simulated_problem: SimulatedProblemFixture, ED: int,
                       ES: int,
                       absorb_method: Optional[Union[str, Iteration]]) -> None:
    """Test that absorbing different numbers of demand- and supply-side fixed effects gives rise to essentially
    identical first-stage results as does including indicator variables. Also test that optimal instruments results
    and marginal costs remain unchanged.
    """
    simulation, simulation_results, problem, solve_options, problem_results = simulated_problem

    # there cannot be supply-side fixed effects if there isn't a supply side
    if problem.K3 == 0:
        ES = 0
    if ED == ES == 0:
        return

    # make product data mutable
    product_data = {
        k: simulation_results.product_data[k]
        for k in simulation_results.product_data.dtype.names
    }

    # remove constants and delete associated elements in the initial beta
    solve_options = solve_options.copy()
    product_formulations = list(problem.product_formulations).copy()
    if ED > 0:
        assert product_formulations[0] is not None
        constant_indices = [
            i for i, e in enumerate(product_formulations[0]._expressions)
            if not e.free_symbols
        ]
        solve_options['beta'] = np.delete(solve_options['beta'],
                                          constant_indices,
                                          axis=0)
        product_formulations[0] = Formulation(
            f'{product_formulations[0]._formula} - 1')
    if ES > 0:
        assert product_formulations[2] is not None
        product_formulations[2] = Formulation(
            f'{product_formulations[2]._formula} - 1')

    # add fixed effect IDs to the data
    demand_id_names: List[str] = []
    supply_id_names: List[str] = []
    state = np.random.RandomState(seed=0)
    for side, count, names in [('demand', ED, demand_id_names),
                               ('supply', ES, supply_id_names)]:
        for index in range(count):
            name = f'{side}_ids{index}'
            ids = state.choice(['a', 'b', 'c'], problem.N)
            product_data[name] = ids
            names.append(name)

    # split apart excluded demand-side instruments so they can be included in formulations
    instrument_names: List[str] = []
    for index, instrument in enumerate(product_data['demand_instruments'].T):
        name = f'demand_instrument{index}'
        product_data[name] = instrument
        instrument_names.append(name)

    # build formulas for the IDs
    demand_id_formula = ' + '.join(demand_id_names)
    supply_id_formula = ' + '.join(supply_id_names)

    # solve the first stage of a problem in which the fixed effects are absorbed
    solve_options1 = solve_options.copy()
    product_formulations1 = product_formulations.copy()
    if ED > 0:
        assert product_formulations[0] is not None
        product_formulations1[0] = Formulation(
            product_formulations[0]._formula, demand_id_formula, absorb_method)
    if ES > 0:
        assert product_formulations[2] is not None
        product_formulations1[2] = Formulation(
            product_formulations[2]._formula, supply_id_formula, absorb_method)
    problem1 = Problem(product_formulations1, product_data,
                       problem.agent_formulation, simulation.agent_data)
    problem_results1 = problem1.solve(**solve_options1)

    # solve the first stage of a problem in which fixed effects are included as indicator variables
    solve_options2 = solve_options.copy()
    product_formulations2 = product_formulations.copy()
    if ED > 0:
        assert product_formulations[0] is not None
        product_formulations2[0] = Formulation(
            f'{product_formulations[0]._formula} + {demand_id_formula}')
    if ES > 0:
        assert product_formulations[2] is not None
        product_formulations2[2] = Formulation(
            f'{product_formulations[2]._formula} + {supply_id_formula}')
    problem2 = Problem(product_formulations2, product_data,
                       problem.agent_formulation, simulation.agent_data)
    solve_options2['beta'] = np.r_[solve_options2['beta'],
                                   np.full((problem2.K1 -
                                            solve_options2['beta'].size,
                                            1), np.nan)]
    problem_results2 = problem2.solve(**solve_options2)

    # solve the first stage of a problem in which some fixed effects are absorbed and some are included as indicators
    if ED == ES == 0:
        problem_results3 = problem_results2
    else:
        solve_options3 = solve_options.copy()
        product_formulations3 = product_formulations.copy()
        if ED > 0:
            assert product_formulations[0] is not None
            product_formulations3[0] = Formulation(
                f'{product_formulations[0]._formula} + {demand_id_names[0]}',
                ' + '.join(demand_id_names[1:]) or None)
        if ES > 0:
            assert product_formulations[2] is not None
            product_formulations3[2] = Formulation(
                f'{product_formulations[2]._formula} + {supply_id_names[0]}',
                ' + '.join(supply_id_names[1:]) or None)
        problem3 = Problem(product_formulations3, product_data,
                           problem.agent_formulation, simulation.agent_data)
        solve_options3['beta'] = np.r_[solve_options3['beta'],
                                       np.full((problem3.K1 -
                                                solve_options3['beta'].size,
                                                1), np.nan)]
        problem_results3 = problem3.solve(**solve_options3)

    # compute optimal instruments (use only two draws for speed; accuracy is not a concern here)
    Z_results1 = problem_results1.compute_optimal_instruments(draws=2, seed=0)
    Z_results2 = problem_results2.compute_optimal_instruments(draws=2, seed=0)
    Z_results3 = problem_results3.compute_optimal_instruments(draws=2, seed=0)

    # compute marginal costs
    costs1 = problem_results1.compute_costs()
    costs2 = problem_results2.compute_costs()
    costs3 = problem_results3.compute_costs()

    # choose tolerances (be more flexible with iterative de-meaning)
    atol = 1e-8
    rtol = 1e-5
    if ED > 2 or ES > 2 or isinstance(absorb_method, Iteration):
        atol *= 10
        rtol *= 10

    # test that all problem results expected to be identical are essentially identical
    problem_results_keys = [
        'theta', 'sigma', 'pi', 'rho', 'beta', 'gamma', 'sigma_se', 'pi_se',
        'rho_se', 'beta_se', 'gamma_se', 'delta', 'tilde_costs', 'xi', 'omega',
        'xi_by_theta_jacobian', 'omega_by_theta_jacobian', 'objective',
        'gradient', 'gradient_norm', 'sigma_gradient', 'pi_gradient',
        'rho_gradient', 'beta_gradient', 'gamma_gradient'
    ]
    for key in problem_results_keys:
        result1 = getattr(problem_results1, key)
        result2 = getattr(problem_results2, key)
        result3 = getattr(problem_results3, key)
        if key in {
                'beta', 'gamma', 'beta_se', 'gamma_se', 'beta_gradient',
                'gamma_gradient'
        }:
            result2 = result2[:result1.size]
            result3 = result3[:result1.size]
        np.testing.assert_allclose(result1,
                                   result2,
                                   atol=atol,
                                   rtol=rtol,
                                   err_msg=key)
        np.testing.assert_allclose(result1,
                                   result3,
                                   atol=atol,
                                   rtol=rtol,
                                   err_msg=key)

    # test that all optimal instrument results expected to be identical are essentially identical
    Z_results_keys = [
        'demand_instruments', 'supply_instruments',
        'inverse_covariance_matrix', 'expected_xi_by_theta_jacobian',
        'expected_omega_by_theta_jacobian'
    ]
    for key in Z_results_keys:
        result1 = getattr(Z_results1, key)
        result2 = getattr(Z_results2, key)
        result3 = getattr(Z_results3, key)
        np.testing.assert_allclose(result1,
                                   result2,
                                   atol=atol,
                                   rtol=rtol,
                                   err_msg=key)
        np.testing.assert_allclose(result1,
                                   result3,
                                   atol=atol,
                                   rtol=rtol,
                                   err_msg=key)

    # test that marginal costs are essentially identical
    np.testing.assert_allclose(costs1, costs2, atol=atol, rtol=rtol)
    np.testing.assert_allclose(costs1, costs3, atol=atol, rtol=rtol)
예제 #8
0
def test_fixed_effects(simulated_problem, ED, ES):
    """Test that absorbing different numbers of demand- and supply-side fixed effects gives rise to essentially
    identical first-stage results as including indicator variables. Also test that results that should be equal when
    there aren't any fixed effects are indeed equal.
    """
    simulation, product_data, problem, results = simulated_problem

    # test that results that should be equal when there aren't any fixed effects are indeed equal
    for key in [
            'delta', 'tilde_costs', 'xi', 'omega', 'xi_jacobian',
            'omega_jacobian'
    ]:
        result = getattr(results, key)
        true_result = getattr(results, f'true_{key}')
        assert (result is not None) == (true_result is not None)
        if result is not None:
            np.testing.assert_allclose(result,
                                       true_result,
                                       atol=1e-14,
                                       rtol=0,
                                       err_msg=key)

    # there cannot be supply-side fixed effects if there isn't a supply side
    if problem.K3 == 0:
        ES = 0
    if ED == ES == 0:
        return

    # add fixed effect IDs to the data
    np.random.seed(0)
    demand_names = []
    supply_names = []
    product_data = {k: product_data[k] for k in product_data.dtype.names}
    for side, count, names in [('demand', ED, demand_names),
                               ('supply', ES, supply_names)]:
        for index in range(count):
            name = f'{side}_ids{index}'
            ids = np.random.choice(['a', 'b', 'c'],
                                   product_data['market_ids'].size,
                                   [0.7, 0.2, 0.1])
            product_data[name] = ids
            names.append(name)

    # remove constants
    product_formulations = list(problem.product_formulations).copy()
    if ED > 0:
        product_formulations[0] = Formulation(
            f'{product_formulations[0]._formula} - 1')
        product_data['demand_instruments'] = product_data[
            'demand_instruments'][:, 1:]
    if ES > 0:
        product_formulations[2] = Formulation(
            f'{product_formulations[2]._formula} - 1')
        product_data['supply_instruments'] = product_data[
            'supply_instruments'][:, 1:]

    # build formulas for the IDs
    demand_formula = ' + '.join(demand_names)
    supply_formula = ' + '.join(supply_names)

    # solve the first stage of a problem in which the fixed effects are absorbed
    product_formulations1 = product_formulations.copy()
    if ED > 0:
        product_formulations1[0] = Formulation(
            product_formulations[0]._formula, demand_formula)
    if ES > 0:
        product_formulations1[2] = Formulation(
            product_formulations[2]._formula, supply_formula)
    problem1 = Problem(product_formulations1, product_data,
                       problem.agent_formulation, simulation.agent_data)
    results1 = problem1.solve(simulation.sigma, simulation.pi, steps=1)

    # solve the first stage of a problem in which fixed effects are included as indicator variables
    product_data2 = product_data.copy()
    product_formulations2 = product_formulations.copy()
    if ED > 0:
        demand_indicators = build_matrix(Formulation(demand_formula),
                                         product_data)
        product_data2['demand_instruments'] = np.c_[
            product_data['demand_instruments'], demand_indicators]
        product_formulations2[0] = Formulation(
            f'{product_formulations[0]._formula} + {demand_formula}')
    if ES > 0:
        supply_indicators = build_matrix(Formulation(supply_formula),
                                         product_data)
        product_data2['supply_instruments'] = np.c_[
            product_data['supply_instruments'], supply_indicators]
        product_formulations2[2] = Formulation(
            f'{product_formulations[2]._formula} + {supply_formula}')
    problem2 = Problem(product_formulations2, product_data2,
                       problem.agent_formulation, simulation.agent_data)
    results2 = problem2.solve(simulation.sigma, simulation.pi, steps=1)

    # test that all arrays expected to be identical are identical
    keys = [
        'theta', 'sigma', 'pi', 'beta', 'gamma', 'sigma_se', 'pi_se',
        'beta_se', 'gamma_se', 'true_delta', 'true_tilde_costs', 'true_xi',
        'true_omega', 'true_xi_jacobian', 'true_omega_jacobian', 'objective',
        'gradient', 'sigma_gradient', 'pi_gradient'
    ]
    for key in keys:
        result1 = getattr(results1, key)
        if result1 is not None:
            result2 = getattr(results2, key)
            if 'beta' in key or 'gamma' in key:
                result2 = result2[:result1.size]
            np.testing.assert_allclose(result1,
                                       result2,
                                       atol=1e-8,
                                       rtol=1e-5,
                                       err_msg=key)