예제 #1
0
def test_estimate_and_variance_improved_by_higher_target_cost(
        data_input, models_from_data):
    """
    Runs simulator with increasing target costs and ensures that the resulting
    estimates are increasingly accurate and variances decrease.
    """
    np.random.seed(1)

    # Result from 20,000 sample monte carlo spring mass simulation.
    mc_20000_output_sample_mean = 12.3186216602

    sim = MLMCSimulator(models=models_from_data, data=data_input)

    estimates = np.zeros(3)
    variances = np.zeros_like(estimates)
    sample_sizes = np.zeros((3, 3))
    for i, target_cost in enumerate([5, 25, 500]):

        estimates[i], sample_sizes[i], variances[i] = \
            sim.simulate(epsilon=.5,
                         initial_sample_sizes=100,
                         target_cost=target_cost)

    error = np.abs(estimates - mc_20000_output_sample_mean)
    assert error[0] > error[1] > error[2]

    assert np.sum(sample_sizes[0]) < np.sum(sample_sizes[1])
    assert np.sum(sample_sizes[1]) < np.sum(sample_sizes[2])

    assert variances[0] > variances[1] > variances[2]
예제 #2
0
def test_model_from_data(data_input, models_from_data):
    """
    Executes  simulate() with models and inputs created from files
    to ensure there are no exceptions while performing basic functionality.
    """
    sim = MLMCSimulator(models=models_from_data, data=data_input)
    sim.simulate(1., initial_sample_sizes=20)
예제 #3
0
def test_multi_cpu_sample_splitting(data_input, models_from_data, num_cpus):
    """
    Tests simulator's _determine_num_cpu_samples() by ensuring that all samples
    will be used and that the difference in number of samples between processes
    is never greater than one.
    """
    total_samples = 100

    sample_sizes = np.zeros(num_cpus)

    sim = MLMCSimulator(models=models_from_data, data=data_input)

    for cpu_rank in range(num_cpus):

        sim._num_cpus = num_cpus
        sim._cpu_rank = cpu_rank

        sample_sizes[cpu_rank] = sim._determine_num_cpu_samples(total_samples)

    # Test that all samples will be utilized.
    assert np.sum(sample_sizes) == total_samples

    # Test that there is never more than a difference of one sample
    # between processes.
    assert np.max(sample_sizes) - np.min(sample_sizes) <= 1
예제 #4
0
def test_optimal_sample_sizes_expected_outputs(num_qoi, variances, epsilons,
                                               data_input, models_from_data):
    """
    Tests samples sizes produced by simulator's compute_optimal_sample_sizes()
    against expected computed sample sizes for various sets of parameters.
    """
    test_mlmc = MLMCSimulator(models=models_from_data[:2], data=data_input)

    data_input._data = np.broadcast_to(data_input._data,
                                       (data_input._data.shape[0], num_qoi))

    test_mlmc._epsilons = epsilons
    costs = np.array([1., 4.])

    test_mlmc._compute_optimal_sample_sizes(costs, np.array(variances))

    # Check results.
    sample_sizes = test_mlmc._sample_sizes

    if num_qoi == 1:
        expected_sample_size = [800, 200]
    else:
        expected_sample_size = [80000, 20000]

    assert np.all(np.isclose(sample_sizes, expected_sample_size, atol=1))
def test_estimate_and_variance_improved_by_lower_epsilon(data_input,
                                                         models_from_data):
    """
    Runs simulate with decreasing epsilons and ensures that the resulting
    estimates are increasingly accurate and that the variances decrease.
    """
    np.random.seed(1)

    # Result from 20,000 sample monte carlo spring mass simulation.
    mc_20000_output_sample_mean = 12.3186216602

    sim = MLMCSimulator(models=models_from_data,
                        data=data_input)

    estimates = np.zeros(3)
    variances = np.zeros_like(estimates)
    for i, epsilon in enumerate([1., .5, .1]):

        estimates[i], sample_sizes, variances[i] = \
            sim.simulate(epsilon=epsilon, initial_sample_sizes=50)

    error = np.abs(estimates - mc_20000_output_sample_mean)
    assert error[0] > error[1] > error[2]

    assert variances[0] > variances[1] > variances[2]
예제 #6
0
def test_model_with_random_input(beta_distribution_input, spring_models):
    """
    Executes simulate() with models and inputs created from random
    distributions to ensure there are no exceptions while performing basic
    functionality.
    """
    sim = MLMCSimulator(models=spring_models, data=beta_distribution_input)
    sim.simulate(1., initial_sample_sizes=20)
예제 #7
0
def test_always_at_least_one_sample_taken(data_input, models_from_data):

    sim = MLMCSimulator(models=models_from_data, data=data_input)

    estimates, sample_sizes, variances = sim.simulate(epsilon=5.,
                                                      initial_sample_sizes=100)

    assert np.sum(sample_sizes) > 0
예제 #8
0
def test_always_at_least_one_sample_taken(data_input, models_from_data):

    sim = MLMCSimulator(models=models_from_data, data=data_input)

    sample_sizes = sim.simulate(epsilon=5.,
                                initial_sample_sizes=100,
                                only_collect_sample_sizes=True)

    assert np.sum(sample_sizes) > 0
예제 #9
0
def test_simulate_with_bad_sample_sizes_input(data_input, models_from_data,
                                              sample_sizes):
    """
    Tests running MLMC by specifying the number of samples but providing 
    bad values for the sample_sizes input. Makes sure exceptions are raised.
    """

    test_mlmc = MLMCSimulator(models=models_from_data, data=data_input)

    with pytest.raises(ValueError):
        test_mlmc.simulate(epsilon=1., sample_sizes=sample_sizes)
예제 #10
0
def test_simulate_with_bad_type_sample_sizes_input(data_input,
                                                   models_from_data,
                                                   sample_sizes):
    """
    Tests running MLMC by specifying the number of samples but providing 
    wrong type for sample_sizes input
    """

    test_mlmc = MLMCSimulator(models=models_from_data, data=data_input)

    with pytest.raises(TypeError):
        test_mlmc.simulate(epsilon=1., sample_sizes=sample_sizes)
예제 #11
0
def test_output_caching(data_input, models_from_data, cache_size):
    """
    Runs simulator's _evaluate_sample() with and without caching enabled
    to ensure consistency of outputs. Also tests the estimate and variances
    with and without caching.
    """
    sim = MLMCSimulator(models=models_from_data, data=data_input)

    # Run simulation to generate cache.
    estimate1, sample_sizes, variances1 = sim.simulate(1., cache_size)

    # Collect output from _evaluate_sample with caching enabled.
    num_levels = len(models_from_data)
    max_samples = np.max(sim._sample_sizes)

    outputs_with_caching = np.zeros((num_levels, max_samples, 1))
    outputs_without_caching = np.zeros_like(outputs_with_caching)

    data_input.reset_sampling()

    for level in range(num_levels):

        num_samples = sim._sample_sizes[level]

        if num_samples == 0:
            continue

        samples = sim._draw_samples(num_samples)

        for i, sample in enumerate(samples):

            outputs_with_caching[level, i] = \
                sim._evaluate_sample(sample, level)

    # Collect same data with caching disabled.
    sim._caching_enabled = False
    sim._data.reset_sampling()

    for level in range(num_levels):
        num_samples = sim._sample_sizes[level]

        if num_samples == 0:
            continue

        samples = sim._draw_samples(num_samples)
        for i, sample in enumerate(samples):

            outputs_without_caching[level, i] = \
                sim._evaluate_sample(sample, level)

    assert np.all(np.isclose(outputs_without_caching, outputs_with_caching))

    estimate2, sample_sizes, variances2 = sim._run_simulation()

    # Now compare estimator and output variances.
    # If caching is working properly, they should match.
    assert np.array_equal(estimate1, estimate2)
    assert np.array_equal(variances1, variances2)
예제 #12
0
def test_simulate_expected_output_types(data_input, models_from_data):
    """
    Tests the data types returned by simulate().
    """
    test_mlmc = MLMCSimulator(models=models_from_data, data=data_input)

    result, sample_count, variances = \
        test_mlmc.simulate(epsilon=1., initial_sample_sizes=20)

    assert isinstance(result, np.ndarray)
    assert isinstance(sample_count, np.ndarray)
    assert isinstance(variances, np.ndarray)
예제 #13
0
def test_simulate_with_scalar_sample_sizes(data_input, models_from_data):
    """
    Tests running MLMC by specifying the number of samples to run on each
    level. Tests providing just a scalar value that mlmc handles this
    """
    test_mlmc = MLMCSimulator(models=models_from_data, data=data_input)

    sample_sizes = 5

    result, sample_count, variances = \
        test_mlmc.simulate(epsilon=1., sample_sizes=sample_sizes)

    assert np.array_equal(sample_count, np.array([5, 5, 5]))
예제 #14
0
def test_hard_coded_springmass_random_input(beta_distribution_input,
                                            spring_models, comm):
    """
    Tests simulator estimate and variances against precomputed values.
    """
    np.random.seed(1)

    mlmc_hard_coded_mean = [12.274674424393805]
    mlmc_hard_coded_variance = [0.01078008]

    sim = MLMCSimulator(models=spring_models, data=beta_distribution_input)

    all_sample_sizes = np.array([1113, 34, 0])
    get_cpu_samples = np.vectorize(sim._determine_num_cpu_samples)
    sim._cpu_sample_sizes = get_cpu_samples(all_sample_sizes)
    sim._determine_input_output_size()

    sim._caching_enabled = False
    sim._sample_sizes = all_sample_sizes

    np.random.seed(1)
    estimate, cpu_sample_sizes, variances = sim._run_simulation()

    assert np.all(np.isclose(estimate, mlmc_hard_coded_mean))
    assert np.all(np.isclose(variances, mlmc_hard_coded_variance))
예제 #15
0
def test_final_variances_less_than_epsilon_goal(data_input, models_from_data,
                                                epsilon):
    """
    Ensures that square root of variances produced by simulator are lower than
    the specified epsilon parameter.
    """
    sim = MLMCSimulator(models=models_from_data, data=data_input)

    estimate, sample_sizes, variances = \
        sim.simulate(epsilon=epsilon,
                     initial_sample_sizes=50)

    assert np.sqrt(variances[0]) < epsilon
    assert not np.isclose(variances[0], 0.)
예제 #16
0
def test_outputs_for_small_sample_sizes(data_input, models_from_data,
                                        cpu_sample_sizes, comm):
    """
    Test various combinations of small sample sizes to ensure stability of
    simulator under these conditions as well as accuracy of estimate and
    variances.
    """
    output1_filepath = os.path.join(data_path,
                                    "spring_mass_1D_outputs_1.0.txt")
    output2_filepath = os.path.join(data_path,
                                    "spring_mass_1D_outputs_0.1.txt")
    output3_filepath = os.path.join(data_path,
                                    "spring_mass_1D_outputs_0.01.txt")

    outputs = list()
    outputs.append(np.genfromtxt(output1_filepath)[comm.rank::comm.size])
    outputs.append(np.genfromtxt(output2_filepath)[comm.rank::comm.size])
    outputs.append(np.genfromtxt(output3_filepath)[comm.rank::comm.size])

    all_sample_sizes = np.array(cpu_sample_sizes) * comm.size

    sim = MLMCSimulator(models=models_from_data, data=data_input)

    sim._caching_enabled = False
    sim._cpu_sample_sizes = np.array(cpu_sample_sizes)
    sim._sample_sizes = np.copy(all_sample_sizes)
    sim._determine_input_output_size()
    sim_estimate, ss, sim_variance = sim._run_simulation()

    # Acquire samples in same sequence simulator would.
    samples = []
    sample_index = 0
    for i, s in enumerate(cpu_sample_sizes):

        output = outputs[i][sample_index:sample_index + s]

        if i > 0:
            lower_output = outputs[i - 1][sample_index:sample_index + s]
        else:
            lower_output = np.zeros_like(output)

        diff = output - lower_output
        all_diff = np.concatenate(comm.allgather(diff))

        samples.append(all_diff)
        sample_index += s

    # Compute mean and variances.
    sample_mean = 0.
    sample_variance = 0.
    for i, sample in enumerate(samples):

        if all_sample_sizes[i] > 0:
            sample_mean += np.sum(sample, axis=0) / all_sample_sizes[i]
            sample_variance += np.var(sample, axis=0) / all_sample_sizes[i]

    # Test sample computations vs simulator.
    assert np.isclose(sim_estimate, sample_mean, atol=10e-15)
    assert np.isclose(sim_variance, sample_variance, atol=10e-15)
예제 #17
0
def test_simulate_with_set_sample_sizes(data_input, models_from_data):
    """
    Tests running MLMC by specifying the number of samples to run on each
    level rather than computing it. Takes precomputed reference solution from
    spring-mass data example
    """
    test_mlmc = MLMCSimulator(models=models_from_data, data=data_input)

    sample_sizes = [7007, 290, 1]

    result, sample_count, variances = \
        test_mlmc.simulate(epsilon=1., sample_sizes=sample_sizes)

    assert np.array_equal(sample_sizes, sample_count)
    assert np.isclose(result[0], 12.31220864)
예제 #18
0
def test_for_verbose_exceptions(data_input, models_from_data):
    """
    Executes simulate() with verbose enabled to ensure that there are
    no exceptions while in verbose mode.
    """
    # Redirect the verbose out to null.
    stdout = sys.stdout
    with open(os.devnull, 'w') as f:
        sys.stdout = f

        sim = MLMCSimulator(models=models_from_data, data=data_input)
        sim.simulate(1., initial_sample_sizes=20, verbose=True)

    # Put stdout back in place.
    sys.stdout = stdout
예제 #19
0
def test_calculate_estimate_for_springmass_random_input(
        beta_distribution_input, spring_models):
    """
    Tests simulator estimate against expected value for beta distribution.
    """
    np.random.seed(1)

    # Result from 20,000 sample monte carlo spring mass simulation.
    mc_20000_output_sample_mean = 12.3186216602

    sim = MLMCSimulator(models=spring_models, data=beta_distribution_input)

    estimate, sample_sizes, variances = sim.simulate(0.1, 100)

    assert np.isclose(estimate[0], mc_20000_output_sample_mean, atol=.25)
예제 #20
0
def test_input_output_with_differing_column_count(filename_2d_5_column_data,
                                                  filename_2d_3_column_data):
    """
    Ensures that simulator handles input and output data with differing numbers
    of columns.
    """
    model1 = ModelFromData(filename_2d_5_column_data,
                           filename_2d_3_column_data, 1.)

    model2 = ModelFromData(filename_2d_5_column_data,
                           filename_2d_3_column_data, 4.)

    data_input = InputFromData(filename_2d_5_column_data)

    sim = MLMCSimulator(models=[model1, model2], data=data_input)
    sim.simulate(100., 10)
예제 #21
0
def test_multiple_cpu_simulation(data_input, models_from_data, comm):
    """
    Compares outputs of simulator in single cpu vs MPI environments to ensure
    consistency.
    """
    # Set up baseline simulation like single processor run.
    data_filename = os.path.join(data_path, "spring_mass_1D_inputs.txt")
    full_data_input = InputFromData(data_filename)
    full_data_input._data = np.genfromtxt(data_filename)
    full_data_input._data = \
        full_data_input._data.reshape(full_data_input._data.shape[0], -1)

    base_sim = MLMCSimulator(models=models_from_data, data=full_data_input)
    base_sim._num_cpus = 1
    base_sim._cpu_rank = 0

    base_estimate, base_sample_sizes, base_variances = \
        base_sim.simulate(.1, 200)

    full_data_input.reset_sampling()
    base_costs, base_initial_variances = base_sim._compute_costs_and_variances(
    )

    sim = MLMCSimulator(models=models_from_data, data=data_input)
    estimates, sample_sizes, variances = sim.simulate(.1, 200)

    data_input.reset_sampling()
    sim_costs, initial_variances = sim._compute_costs_and_variances()

    assert np.all(np.isclose(base_initial_variances, initial_variances))
    assert np.all(np.isclose(base_costs, sim_costs))

    all_estimates = comm.allgather(estimates)
    all_sample_sizes = comm.allgather(sample_sizes)
    all_variances = comm.allgather(variances)

    assert np.all(estimates[0] == estimates)
    assert np.all(variances[0] == variances)

    for estimate in all_estimates:
        assert np.all(np.isclose(estimate, base_estimate))

    for variance in all_variances:
        assert np.all(np.isclose(variance, base_variances))

    for i, sample_size in enumerate(all_sample_sizes):
        assert np.array_equal(base_sample_sizes, sample_size)
예제 #22
0
def test_monte_carlo_estimate_value(data_input, models_from_data):
    """
    Tests simulator estimate against expected value for spring mass file data.
    """
    np.random.seed(1)

    # Result from 20,000 sample monte carlo spring mass simulation.
    mc_20000_output_sample_mean = 12.3186216602

    # Passing in one model into MLMCSimulator should make it run in monte
    # carlo simulation mode.
    models = [models_from_data[0]]

    sim = MLMCSimulator(models=models, data=data_input)
    estimate, sample_sizes, variances = sim.simulate(.05, 50)

    assert np.isclose(estimate, mc_20000_output_sample_mean, atol=.25)
예제 #23
0
def test_costs_and_initial_variances_models_from_data(data_input,
                                                      models_from_data):
    """
    Tests costs and variances computed by simulator's
    compute_costs_and_variances() against expected values based on data loaded
    from files.
    """
    np.random.seed(1)
    sim = MLMCSimulator(models=models_from_data, data=data_input)

    sim._initial_sample_sizes = np.array([100, 100, 100])
    costs, variances = sim._compute_costs_and_variances()

    true_variances = np.array([[9.262628271266264], [0.07939834631411287],
                               [5.437083709623372e-06]])

    true_costs = np.array([1.0, 5.0, 20.0])

    assert np.all(np.isclose(true_costs, costs))
    assert np.all(np.isclose(true_variances, variances, rtol=.1))
예제 #24
0
def test_costs_and_initial_variances_spring_models(beta_distribution_input,
                                                   spring_models):
    """
    Tests costs and variances computed by simulator's
    compute_costs_and_variances() against expected values based on a
    beta distribution.
    """
    sim = MLMCSimulator(models=spring_models, data=beta_distribution_input)

    np.random.seed(1)

    sim._initial_sample_sizes = np.array([100, 100, 100])
    costs, variances = sim._compute_costs_and_variances()

    true_variances = np.array([[8.245224951411819], [0.0857219498864355],
                               [7.916295509470576e-06]])

    true_costs = np.array([1., 11., 110.])

    assert np.all(np.isclose(true_costs, costs))
    assert np.all(np.isclose(true_variances, variances, rtol=.1))
예제 #25
0
def test_gather_arrays(data_input, models_from_data, comm):
    """
    Tests simulator's _gather_arrays() to ensure that it produces expected
    results for axis=0 and axis=1 parameters.
    """
    sim = MLMCSimulator(data=data_input, models=models_from_data)

    # Axis 0 test.
    test = np.ones((2, 2)) * comm.rank

    expected_result = np.zeros((2, 2))

    for i in range(1, comm.size):
        new_block = np.ones((2, 2)) * i
        expected_result = np.concatenate((expected_result, new_block), axis=0)

    test_result = sim._gather_arrays(test, axis=0)

    assert np.array_equal(expected_result, test_result)

    # Axis 1 test.
    test2 = np.ones((2, 2)) * comm.rank

    expected_result2 = np.zeros((2, 2))

    for i in range(1, comm.size):
        new_block = np.ones((2, 2)) * i
        expected_result2 = np.concatenate((expected_result2, new_block),
                                          axis=1)

    test2_result = sim._gather_arrays(test2, axis=1)

    assert np.array_equal(expected_result2, test2_result)

    # Test for cross-sync failure issue that could occur if some processes
    # run samples for a particular level while others don't.
    if comm.rank % 2 == 0:
        sim._cpu_sample_sizes = np.array([2, 1, 0])
    else:
        sim._cpu_sample_sizes = np.array([2, 0, 0])

    sim._sample_sizes = sim._sample_sizes * comm.size

    # An exception will occur here if the problem is present.
    sim._run_simulation()
예제 #26
0
def test_hard_coded_test_3_level(data_input, models_from_data):
    """
    Test simulator cost, initial variance, and sample size computations against
    precomputed values with three models.
    """
    # Get simulation results.
    sim = MLMCSimulator(models=models_from_data, data=data_input)
    sim_estimate, sim_sample_sizes, output_variances = \
        sim.simulate(epsilon=1., initial_sample_sizes=200)
    sim_costs, sim_variances = sim._compute_costs_and_variances()

    # Results from hard coded testing with same parameters.
    hard_coded_variances = np.array([[7.659619446414387],
                                     [0.07288894751770203],
                                     [7.363159154583542e-06]])

    hard_coded_sample_sizes = np.array([9, 0, 0])
    hard_coded_estimate = np.array([11.639166038233583])

    assert np.all(np.isclose(sim_variances, hard_coded_variances))
    assert np.all(np.isclose(sim_estimate, hard_coded_estimate))
    assert np.all(np.isclose(sim._sample_sizes, hard_coded_sample_sizes))
예제 #27
0
def test_fail_if_model_outputs_do_not_match_shapes(filename_2d_5_column_data,
                                                   filename_2d_3_column_data):
    """
    Ensures simulator throws an exception if inputs and outputs with differing
    numbers of samples are provided.
    """
    model1 = ModelFromData(filename_2d_5_column_data,
                           filename_2d_5_column_data, 1.)

    model2 = ModelFromData(filename_2d_5_column_data,
                           filename_2d_3_column_data, 4.)

    data_input = InputFromData(filename_2d_5_column_data)

    with pytest.raises(ValueError):
        MLMCSimulator(models=[model1, model2], data=data_input)
예제 #28
0
def test_exception_more_cpus_than_samples(data_input, models_from_data, comm):
    """
    Ensure that an exception is thrown if we don't have enough samples to
    distribute among available processors.
    """
    # Avoid showing warnings from InputFromData due to insufficient samples.
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')

        sim = MLMCSimulator(models=models_from_data, data=data_input)
        sim._comm = comm
        sim._num_cpus = 50000
        sim._cpu_rank = 49999

        with pytest.raises(ValueError):
            sim.simulate(epsilon=.1, initial_sample_sizes=50)
예제 #29
0
def test_spoof_multi_cpu_simulation(data_input, models_from_data, comm,
                                    num_cpus):
    """
    Ensure that we can run MPI related sections of MLMCSimulator without
    throwing exceptions. Results from running simulator in this manner will
    not be accurate, so we do not test outputs vs a baseline. To perform more
    rigorous MPI tests, run pytest with mpiexec.
    """
    # Run simulator with a modified comm to spoof MPI's allgather function.
    sim = MLMCSimulator(models=models_from_data, data=data_input)
    sim._comm = comm
    sim._num_cpus = num_cpus
    sim.simulate(epsilon=.1, initial_sample_sizes=50)
예제 #30
0
def test_graceful_handling_of_insufficient_samples(data_input_2d, comm,
                                                   models_from_2d_data):
    """
    Ensure that the simulator does not throw an exception when insufficient
    samples are provided.
    """
    # Warnings will be triggered; avoid displaying them during testing.
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')

        # We only have five rows of data, so ignore cpus of rank > 4.
        # An intentional exception would be thrown by the simulator.
        if comm.rank > 4:
            return

        # Test when sampling with too large initial sample size.
        sim = MLMCSimulator(models=models_from_2d_data, data=data_input_2d)
        sim.simulate(epsilon=1., initial_sample_sizes=10)

        # Test when sampling with too large computed sample sizes.
        sim = MLMCSimulator(models=models_from_2d_data, data=data_input_2d)
        sim.simulate(epsilon=.01, initial_sample_sizes=5)