def test_estimate_and_variance_improved_by_higher_target_cost( data_input, models_from_data): """ Runs simulator with increasing target costs and ensures that the resulting estimates are increasingly accurate and variances decrease. """ np.random.seed(1) # Result from 20,000 sample monte carlo spring mass simulation. mc_20000_output_sample_mean = 12.3186216602 sim = MLMCSimulator(models=models_from_data, data=data_input) estimates = np.zeros(3) variances = np.zeros_like(estimates) sample_sizes = np.zeros((3, 3)) for i, target_cost in enumerate([5, 25, 500]): estimates[i], sample_sizes[i], variances[i] = \ sim.simulate(epsilon=.5, initial_sample_sizes=100, target_cost=target_cost) error = np.abs(estimates - mc_20000_output_sample_mean) assert error[0] > error[1] > error[2] assert np.sum(sample_sizes[0]) < np.sum(sample_sizes[1]) assert np.sum(sample_sizes[1]) < np.sum(sample_sizes[2]) assert variances[0] > variances[1] > variances[2]
def test_model_from_data(data_input, models_from_data): """ Executes simulate() with models and inputs created from files to ensure there are no exceptions while performing basic functionality. """ sim = MLMCSimulator(models=models_from_data, data=data_input) sim.simulate(1., initial_sample_sizes=20)
def test_multi_cpu_sample_splitting(data_input, models_from_data, num_cpus): """ Tests simulator's _determine_num_cpu_samples() by ensuring that all samples will be used and that the difference in number of samples between processes is never greater than one. """ total_samples = 100 sample_sizes = np.zeros(num_cpus) sim = MLMCSimulator(models=models_from_data, data=data_input) for cpu_rank in range(num_cpus): sim._num_cpus = num_cpus sim._cpu_rank = cpu_rank sample_sizes[cpu_rank] = sim._determine_num_cpu_samples(total_samples) # Test that all samples will be utilized. assert np.sum(sample_sizes) == total_samples # Test that there is never more than a difference of one sample # between processes. assert np.max(sample_sizes) - np.min(sample_sizes) <= 1
def test_optimal_sample_sizes_expected_outputs(num_qoi, variances, epsilons, data_input, models_from_data): """ Tests samples sizes produced by simulator's compute_optimal_sample_sizes() against expected computed sample sizes for various sets of parameters. """ test_mlmc = MLMCSimulator(models=models_from_data[:2], data=data_input) data_input._data = np.broadcast_to(data_input._data, (data_input._data.shape[0], num_qoi)) test_mlmc._epsilons = epsilons costs = np.array([1., 4.]) test_mlmc._compute_optimal_sample_sizes(costs, np.array(variances)) # Check results. sample_sizes = test_mlmc._sample_sizes if num_qoi == 1: expected_sample_size = [800, 200] else: expected_sample_size = [80000, 20000] assert np.all(np.isclose(sample_sizes, expected_sample_size, atol=1))
def test_estimate_and_variance_improved_by_lower_epsilon(data_input, models_from_data): """ Runs simulate with decreasing epsilons and ensures that the resulting estimates are increasingly accurate and that the variances decrease. """ np.random.seed(1) # Result from 20,000 sample monte carlo spring mass simulation. mc_20000_output_sample_mean = 12.3186216602 sim = MLMCSimulator(models=models_from_data, data=data_input) estimates = np.zeros(3) variances = np.zeros_like(estimates) for i, epsilon in enumerate([1., .5, .1]): estimates[i], sample_sizes, variances[i] = \ sim.simulate(epsilon=epsilon, initial_sample_sizes=50) error = np.abs(estimates - mc_20000_output_sample_mean) assert error[0] > error[1] > error[2] assert variances[0] > variances[1] > variances[2]
def test_model_with_random_input(beta_distribution_input, spring_models): """ Executes simulate() with models and inputs created from random distributions to ensure there are no exceptions while performing basic functionality. """ sim = MLMCSimulator(models=spring_models, data=beta_distribution_input) sim.simulate(1., initial_sample_sizes=20)
def test_always_at_least_one_sample_taken(data_input, models_from_data): sim = MLMCSimulator(models=models_from_data, data=data_input) estimates, sample_sizes, variances = sim.simulate(epsilon=5., initial_sample_sizes=100) assert np.sum(sample_sizes) > 0
def test_always_at_least_one_sample_taken(data_input, models_from_data): sim = MLMCSimulator(models=models_from_data, data=data_input) sample_sizes = sim.simulate(epsilon=5., initial_sample_sizes=100, only_collect_sample_sizes=True) assert np.sum(sample_sizes) > 0
def test_simulate_with_bad_sample_sizes_input(data_input, models_from_data, sample_sizes): """ Tests running MLMC by specifying the number of samples but providing bad values for the sample_sizes input. Makes sure exceptions are raised. """ test_mlmc = MLMCSimulator(models=models_from_data, data=data_input) with pytest.raises(ValueError): test_mlmc.simulate(epsilon=1., sample_sizes=sample_sizes)
def test_simulate_with_bad_type_sample_sizes_input(data_input, models_from_data, sample_sizes): """ Tests running MLMC by specifying the number of samples but providing wrong type for sample_sizes input """ test_mlmc = MLMCSimulator(models=models_from_data, data=data_input) with pytest.raises(TypeError): test_mlmc.simulate(epsilon=1., sample_sizes=sample_sizes)
def test_output_caching(data_input, models_from_data, cache_size): """ Runs simulator's _evaluate_sample() with and without caching enabled to ensure consistency of outputs. Also tests the estimate and variances with and without caching. """ sim = MLMCSimulator(models=models_from_data, data=data_input) # Run simulation to generate cache. estimate1, sample_sizes, variances1 = sim.simulate(1., cache_size) # Collect output from _evaluate_sample with caching enabled. num_levels = len(models_from_data) max_samples = np.max(sim._sample_sizes) outputs_with_caching = np.zeros((num_levels, max_samples, 1)) outputs_without_caching = np.zeros_like(outputs_with_caching) data_input.reset_sampling() for level in range(num_levels): num_samples = sim._sample_sizes[level] if num_samples == 0: continue samples = sim._draw_samples(num_samples) for i, sample in enumerate(samples): outputs_with_caching[level, i] = \ sim._evaluate_sample(sample, level) # Collect same data with caching disabled. sim._caching_enabled = False sim._data.reset_sampling() for level in range(num_levels): num_samples = sim._sample_sizes[level] if num_samples == 0: continue samples = sim._draw_samples(num_samples) for i, sample in enumerate(samples): outputs_without_caching[level, i] = \ sim._evaluate_sample(sample, level) assert np.all(np.isclose(outputs_without_caching, outputs_with_caching)) estimate2, sample_sizes, variances2 = sim._run_simulation() # Now compare estimator and output variances. # If caching is working properly, they should match. assert np.array_equal(estimate1, estimate2) assert np.array_equal(variances1, variances2)
def test_simulate_expected_output_types(data_input, models_from_data): """ Tests the data types returned by simulate(). """ test_mlmc = MLMCSimulator(models=models_from_data, data=data_input) result, sample_count, variances = \ test_mlmc.simulate(epsilon=1., initial_sample_sizes=20) assert isinstance(result, np.ndarray) assert isinstance(sample_count, np.ndarray) assert isinstance(variances, np.ndarray)
def test_simulate_with_scalar_sample_sizes(data_input, models_from_data): """ Tests running MLMC by specifying the number of samples to run on each level. Tests providing just a scalar value that mlmc handles this """ test_mlmc = MLMCSimulator(models=models_from_data, data=data_input) sample_sizes = 5 result, sample_count, variances = \ test_mlmc.simulate(epsilon=1., sample_sizes=sample_sizes) assert np.array_equal(sample_count, np.array([5, 5, 5]))
def test_hard_coded_springmass_random_input(beta_distribution_input, spring_models, comm): """ Tests simulator estimate and variances against precomputed values. """ np.random.seed(1) mlmc_hard_coded_mean = [12.274674424393805] mlmc_hard_coded_variance = [0.01078008] sim = MLMCSimulator(models=spring_models, data=beta_distribution_input) all_sample_sizes = np.array([1113, 34, 0]) get_cpu_samples = np.vectorize(sim._determine_num_cpu_samples) sim._cpu_sample_sizes = get_cpu_samples(all_sample_sizes) sim._determine_input_output_size() sim._caching_enabled = False sim._sample_sizes = all_sample_sizes np.random.seed(1) estimate, cpu_sample_sizes, variances = sim._run_simulation() assert np.all(np.isclose(estimate, mlmc_hard_coded_mean)) assert np.all(np.isclose(variances, mlmc_hard_coded_variance))
def test_final_variances_less_than_epsilon_goal(data_input, models_from_data, epsilon): """ Ensures that square root of variances produced by simulator are lower than the specified epsilon parameter. """ sim = MLMCSimulator(models=models_from_data, data=data_input) estimate, sample_sizes, variances = \ sim.simulate(epsilon=epsilon, initial_sample_sizes=50) assert np.sqrt(variances[0]) < epsilon assert not np.isclose(variances[0], 0.)
def test_outputs_for_small_sample_sizes(data_input, models_from_data, cpu_sample_sizes, comm): """ Test various combinations of small sample sizes to ensure stability of simulator under these conditions as well as accuracy of estimate and variances. """ output1_filepath = os.path.join(data_path, "spring_mass_1D_outputs_1.0.txt") output2_filepath = os.path.join(data_path, "spring_mass_1D_outputs_0.1.txt") output3_filepath = os.path.join(data_path, "spring_mass_1D_outputs_0.01.txt") outputs = list() outputs.append(np.genfromtxt(output1_filepath)[comm.rank::comm.size]) outputs.append(np.genfromtxt(output2_filepath)[comm.rank::comm.size]) outputs.append(np.genfromtxt(output3_filepath)[comm.rank::comm.size]) all_sample_sizes = np.array(cpu_sample_sizes) * comm.size sim = MLMCSimulator(models=models_from_data, data=data_input) sim._caching_enabled = False sim._cpu_sample_sizes = np.array(cpu_sample_sizes) sim._sample_sizes = np.copy(all_sample_sizes) sim._determine_input_output_size() sim_estimate, ss, sim_variance = sim._run_simulation() # Acquire samples in same sequence simulator would. samples = [] sample_index = 0 for i, s in enumerate(cpu_sample_sizes): output = outputs[i][sample_index:sample_index + s] if i > 0: lower_output = outputs[i - 1][sample_index:sample_index + s] else: lower_output = np.zeros_like(output) diff = output - lower_output all_diff = np.concatenate(comm.allgather(diff)) samples.append(all_diff) sample_index += s # Compute mean and variances. sample_mean = 0. sample_variance = 0. for i, sample in enumerate(samples): if all_sample_sizes[i] > 0: sample_mean += np.sum(sample, axis=0) / all_sample_sizes[i] sample_variance += np.var(sample, axis=0) / all_sample_sizes[i] # Test sample computations vs simulator. assert np.isclose(sim_estimate, sample_mean, atol=10e-15) assert np.isclose(sim_variance, sample_variance, atol=10e-15)
def test_simulate_with_set_sample_sizes(data_input, models_from_data): """ Tests running MLMC by specifying the number of samples to run on each level rather than computing it. Takes precomputed reference solution from spring-mass data example """ test_mlmc = MLMCSimulator(models=models_from_data, data=data_input) sample_sizes = [7007, 290, 1] result, sample_count, variances = \ test_mlmc.simulate(epsilon=1., sample_sizes=sample_sizes) assert np.array_equal(sample_sizes, sample_count) assert np.isclose(result[0], 12.31220864)
def test_for_verbose_exceptions(data_input, models_from_data): """ Executes simulate() with verbose enabled to ensure that there are no exceptions while in verbose mode. """ # Redirect the verbose out to null. stdout = sys.stdout with open(os.devnull, 'w') as f: sys.stdout = f sim = MLMCSimulator(models=models_from_data, data=data_input) sim.simulate(1., initial_sample_sizes=20, verbose=True) # Put stdout back in place. sys.stdout = stdout
def test_calculate_estimate_for_springmass_random_input( beta_distribution_input, spring_models): """ Tests simulator estimate against expected value for beta distribution. """ np.random.seed(1) # Result from 20,000 sample monte carlo spring mass simulation. mc_20000_output_sample_mean = 12.3186216602 sim = MLMCSimulator(models=spring_models, data=beta_distribution_input) estimate, sample_sizes, variances = sim.simulate(0.1, 100) assert np.isclose(estimate[0], mc_20000_output_sample_mean, atol=.25)
def test_input_output_with_differing_column_count(filename_2d_5_column_data, filename_2d_3_column_data): """ Ensures that simulator handles input and output data with differing numbers of columns. """ model1 = ModelFromData(filename_2d_5_column_data, filename_2d_3_column_data, 1.) model2 = ModelFromData(filename_2d_5_column_data, filename_2d_3_column_data, 4.) data_input = InputFromData(filename_2d_5_column_data) sim = MLMCSimulator(models=[model1, model2], data=data_input) sim.simulate(100., 10)
def test_multiple_cpu_simulation(data_input, models_from_data, comm): """ Compares outputs of simulator in single cpu vs MPI environments to ensure consistency. """ # Set up baseline simulation like single processor run. data_filename = os.path.join(data_path, "spring_mass_1D_inputs.txt") full_data_input = InputFromData(data_filename) full_data_input._data = np.genfromtxt(data_filename) full_data_input._data = \ full_data_input._data.reshape(full_data_input._data.shape[0], -1) base_sim = MLMCSimulator(models=models_from_data, data=full_data_input) base_sim._num_cpus = 1 base_sim._cpu_rank = 0 base_estimate, base_sample_sizes, base_variances = \ base_sim.simulate(.1, 200) full_data_input.reset_sampling() base_costs, base_initial_variances = base_sim._compute_costs_and_variances( ) sim = MLMCSimulator(models=models_from_data, data=data_input) estimates, sample_sizes, variances = sim.simulate(.1, 200) data_input.reset_sampling() sim_costs, initial_variances = sim._compute_costs_and_variances() assert np.all(np.isclose(base_initial_variances, initial_variances)) assert np.all(np.isclose(base_costs, sim_costs)) all_estimates = comm.allgather(estimates) all_sample_sizes = comm.allgather(sample_sizes) all_variances = comm.allgather(variances) assert np.all(estimates[0] == estimates) assert np.all(variances[0] == variances) for estimate in all_estimates: assert np.all(np.isclose(estimate, base_estimate)) for variance in all_variances: assert np.all(np.isclose(variance, base_variances)) for i, sample_size in enumerate(all_sample_sizes): assert np.array_equal(base_sample_sizes, sample_size)
def test_monte_carlo_estimate_value(data_input, models_from_data): """ Tests simulator estimate against expected value for spring mass file data. """ np.random.seed(1) # Result from 20,000 sample monte carlo spring mass simulation. mc_20000_output_sample_mean = 12.3186216602 # Passing in one model into MLMCSimulator should make it run in monte # carlo simulation mode. models = [models_from_data[0]] sim = MLMCSimulator(models=models, data=data_input) estimate, sample_sizes, variances = sim.simulate(.05, 50) assert np.isclose(estimate, mc_20000_output_sample_mean, atol=.25)
def test_costs_and_initial_variances_models_from_data(data_input, models_from_data): """ Tests costs and variances computed by simulator's compute_costs_and_variances() against expected values based on data loaded from files. """ np.random.seed(1) sim = MLMCSimulator(models=models_from_data, data=data_input) sim._initial_sample_sizes = np.array([100, 100, 100]) costs, variances = sim._compute_costs_and_variances() true_variances = np.array([[9.262628271266264], [0.07939834631411287], [5.437083709623372e-06]]) true_costs = np.array([1.0, 5.0, 20.0]) assert np.all(np.isclose(true_costs, costs)) assert np.all(np.isclose(true_variances, variances, rtol=.1))
def test_costs_and_initial_variances_spring_models(beta_distribution_input, spring_models): """ Tests costs and variances computed by simulator's compute_costs_and_variances() against expected values based on a beta distribution. """ sim = MLMCSimulator(models=spring_models, data=beta_distribution_input) np.random.seed(1) sim._initial_sample_sizes = np.array([100, 100, 100]) costs, variances = sim._compute_costs_and_variances() true_variances = np.array([[8.245224951411819], [0.0857219498864355], [7.916295509470576e-06]]) true_costs = np.array([1., 11., 110.]) assert np.all(np.isclose(true_costs, costs)) assert np.all(np.isclose(true_variances, variances, rtol=.1))
def test_gather_arrays(data_input, models_from_data, comm): """ Tests simulator's _gather_arrays() to ensure that it produces expected results for axis=0 and axis=1 parameters. """ sim = MLMCSimulator(data=data_input, models=models_from_data) # Axis 0 test. test = np.ones((2, 2)) * comm.rank expected_result = np.zeros((2, 2)) for i in range(1, comm.size): new_block = np.ones((2, 2)) * i expected_result = np.concatenate((expected_result, new_block), axis=0) test_result = sim._gather_arrays(test, axis=0) assert np.array_equal(expected_result, test_result) # Axis 1 test. test2 = np.ones((2, 2)) * comm.rank expected_result2 = np.zeros((2, 2)) for i in range(1, comm.size): new_block = np.ones((2, 2)) * i expected_result2 = np.concatenate((expected_result2, new_block), axis=1) test2_result = sim._gather_arrays(test2, axis=1) assert np.array_equal(expected_result2, test2_result) # Test for cross-sync failure issue that could occur if some processes # run samples for a particular level while others don't. if comm.rank % 2 == 0: sim._cpu_sample_sizes = np.array([2, 1, 0]) else: sim._cpu_sample_sizes = np.array([2, 0, 0]) sim._sample_sizes = sim._sample_sizes * comm.size # An exception will occur here if the problem is present. sim._run_simulation()
def test_hard_coded_test_3_level(data_input, models_from_data): """ Test simulator cost, initial variance, and sample size computations against precomputed values with three models. """ # Get simulation results. sim = MLMCSimulator(models=models_from_data, data=data_input) sim_estimate, sim_sample_sizes, output_variances = \ sim.simulate(epsilon=1., initial_sample_sizes=200) sim_costs, sim_variances = sim._compute_costs_and_variances() # Results from hard coded testing with same parameters. hard_coded_variances = np.array([[7.659619446414387], [0.07288894751770203], [7.363159154583542e-06]]) hard_coded_sample_sizes = np.array([9, 0, 0]) hard_coded_estimate = np.array([11.639166038233583]) assert np.all(np.isclose(sim_variances, hard_coded_variances)) assert np.all(np.isclose(sim_estimate, hard_coded_estimate)) assert np.all(np.isclose(sim._sample_sizes, hard_coded_sample_sizes))
def test_fail_if_model_outputs_do_not_match_shapes(filename_2d_5_column_data, filename_2d_3_column_data): """ Ensures simulator throws an exception if inputs and outputs with differing numbers of samples are provided. """ model1 = ModelFromData(filename_2d_5_column_data, filename_2d_5_column_data, 1.) model2 = ModelFromData(filename_2d_5_column_data, filename_2d_3_column_data, 4.) data_input = InputFromData(filename_2d_5_column_data) with pytest.raises(ValueError): MLMCSimulator(models=[model1, model2], data=data_input)
def test_exception_more_cpus_than_samples(data_input, models_from_data, comm): """ Ensure that an exception is thrown if we don't have enough samples to distribute among available processors. """ # Avoid showing warnings from InputFromData due to insufficient samples. with warnings.catch_warnings(): warnings.simplefilter('ignore') sim = MLMCSimulator(models=models_from_data, data=data_input) sim._comm = comm sim._num_cpus = 50000 sim._cpu_rank = 49999 with pytest.raises(ValueError): sim.simulate(epsilon=.1, initial_sample_sizes=50)
def test_spoof_multi_cpu_simulation(data_input, models_from_data, comm, num_cpus): """ Ensure that we can run MPI related sections of MLMCSimulator without throwing exceptions. Results from running simulator in this manner will not be accurate, so we do not test outputs vs a baseline. To perform more rigorous MPI tests, run pytest with mpiexec. """ # Run simulator with a modified comm to spoof MPI's allgather function. sim = MLMCSimulator(models=models_from_data, data=data_input) sim._comm = comm sim._num_cpus = num_cpus sim.simulate(epsilon=.1, initial_sample_sizes=50)
def test_graceful_handling_of_insufficient_samples(data_input_2d, comm, models_from_2d_data): """ Ensure that the simulator does not throw an exception when insufficient samples are provided. """ # Warnings will be triggered; avoid displaying them during testing. with warnings.catch_warnings(): warnings.simplefilter('ignore') # We only have five rows of data, so ignore cpus of rank > 4. # An intentional exception would be thrown by the simulator. if comm.rank > 4: return # Test when sampling with too large initial sample size. sim = MLMCSimulator(models=models_from_2d_data, data=data_input_2d) sim.simulate(epsilon=1., initial_sample_sizes=10) # Test when sampling with too large computed sample sizes. sim = MLMCSimulator(models=models_from_2d_data, data=data_input_2d) sim.simulate(epsilon=.01, initial_sample_sizes=5)