def test_draw_sample_warning_issued_for_insufficient_data(data_filename_2d): """ Ensure that a warning (but not an exception) is triggered when the specified number of samples cannot be provided. """ small_input = InputFromData(data_filename_2d) with pytest.warns(UserWarning): small_input.draw_samples(1000)
def sample_entire_data_set(file_path): filename = os.path.basename(file_path) file_length = data_file_lengths[filename] data_sampler = InputFromData(file_path) full_data_set = data_sampler.draw_samples(file_length) return full_data_set
def test_can_load_alternatively_delimited_files(delimiter, filename): """ Test ability of InputFromData to load files with different data """ file_path = os.path.join(data_path, filename) sampler = InputFromData(file_path, delimiter=delimiter) sample = sampler.draw_samples(5) assert np.sum(sample) == 125.
def test_skip_rows(data_filename_2d, rows_to_skip): """ Test ability to skip head and footer rows as specified. """ normal_input = InputFromData(data_filename_2d) normal_row_count = normal_input._data.shape[0] skipped_row_input = InputFromData(data_filename_2d, skip_header=rows_to_skip) skipped_row_count = skipped_row_input._data.shape[0] assert normal_row_count - rows_to_skip == skipped_row_count
def test_sample_data_is_scrambled(data_filename): """ Ensure that sample data is reordered. """ all_file_data = sample_entire_data_set(data_filename) file_length = all_file_data.shape[0] np.random.seed(1) data_sampler = InputFromData(data_filename) sample_data = data_sampler.draw_samples(file_length) assert not np.array_equal(all_file_data, sample_data) assert np.isclose(np.sum(all_file_data), np.sum(sample_data))
def data_input_2d(): """ Creates an InputFromData object that produces samples from a file containing two dimensional data. """ return InputFromData(os.path.join(data_path, "2D_test_data.csv"), shuffle_data=False)
def spring_data_input(): """ Creates an InputFromData object that produces samples from a file containing spring mass input data. """ return InputFromData(os.path.join(data_path, "spring_mass_1D_inputs.txt"), shuffle_data=False)
def test_draw_samples_returns_expected_output(data_filename): """ Ensure draw_samples() returns expected output type, shape, and number of samples. """ data_sampler = InputFromData(data_filename) for num_samples in range(1, 4): sample = data_sampler.draw_samples(num_samples) data_sampler.reset_sampling() # Returns correct data type. assert isinstance(sample, np.ndarray) # Returns correct shape of data. assert len(sample.shape) == 2 # Returns requested number of samples. assert sample.shape[0] == num_samples
def test_input_output_with_differing_column_count(filename_2d_5_column_data, filename_2d_3_column_data): """ Ensures that simulator handles input and output data with differing numbers of columns. """ model1 = ModelFromData(filename_2d_5_column_data, filename_2d_3_column_data, 1.) model2 = ModelFromData(filename_2d_5_column_data, filename_2d_3_column_data, 4.) data_input = InputFromData(filename_2d_5_column_data) sim = MLMCSimulator(models=[model1, model2], data=data_input) sim.simulate(100., 10)
def test_fail_if_model_outputs_do_not_match_shapes(filename_2d_5_column_data, filename_2d_3_column_data): """ Ensures simulator throws an exception if inputs and outputs with differing numbers of samples are provided. """ model1 = ModelFromData(filename_2d_5_column_data, filename_2d_5_column_data, 1.) model2 = ModelFromData(filename_2d_5_column_data, filename_2d_3_column_data, 4.) data_input = InputFromData(filename_2d_5_column_data) with pytest.raises(ValueError): MLMCSimulator(models=[model1, model2], data=data_input)
def test_draw_samples_invalid_parameters_fails(data_filename): """ Ensure expected exceptions occur when invalid parameters are given. """ data_sampler = InputFromData(data_filename) with pytest.raises(TypeError): data_sampler.draw_samples("five") with pytest.raises(ValueError): data_sampler.draw_samples(0)
def test_multiple_cpu_simulation(data_input, models_from_data, comm): """ Compares outputs of simulator in single cpu vs MPI environments to ensure consistency. """ # Set up baseline simulation like single processor run. data_filename = os.path.join(data_path, "spring_mass_1D_inputs.txt") full_data_input = InputFromData(data_filename) full_data_input._data = np.genfromtxt(data_filename) full_data_input._data = \ full_data_input._data.reshape(full_data_input._data.shape[0], -1) base_sim = MLMCSimulator(models=models_from_data, data=full_data_input) base_sim._num_cpus = 1 base_sim._cpu_rank = 0 base_estimate, base_sample_sizes, base_variances = \ base_sim.simulate(.1, 200) full_data_input.reset_sampling() base_costs, base_initial_variances = base_sim._compute_costs_and_variances( ) sim = MLMCSimulator(models=models_from_data, data=data_input) estimates, sample_sizes, variances = sim.simulate(.1, 200) data_input.reset_sampling() sim_costs, initial_variances = sim._compute_costs_and_variances() assert np.all(np.isclose(base_initial_variances, initial_variances)) assert np.all(np.isclose(base_costs, sim_costs)) all_estimates = comm.allgather(estimates) all_sample_sizes = comm.allgather(sample_sizes) all_variances = comm.allgather(variances) assert np.all(estimates[0] == estimates) assert np.all(variances[0] == variances) for estimate in all_estimates: assert np.all(np.isclose(estimate, base_estimate)) for variance in all_variances: assert np.all(np.isclose(variance, base_variances)) for i, sample_size in enumerate(all_sample_sizes): assert np.array_equal(base_sample_sizes, sample_size)
from MLMCPy.input import InputFromData from MLMCPy.mlmc import MLMCSimulator from MLMCPy.model import ModelFromData # Define I/O files inputfile = "data/spring_mass_1D_inputs.txt" outputfile_level1 = "data/spring_mass_1D_outputs_1.0.txt" outputfile_level2 = "data/spring_mass_1D_outputs_0.1.txt" outputfile_level3 = "data/spring_mass_1D_outputs_0.01.txt" # Initialize random input & model objects data_input = InputFromData(inputfile) model_level1 = ModelFromData(inputfile, outputfile_level1, cost=1.0) model_level2 = ModelFromData(inputfile, outputfile_level2, cost=10.0) model_level3 = ModelFromData(inputfile, outputfile_level3, cost=100.0) models = [model_level1, model_level2, model_level3] mlmc_simulator = MLMCSimulator(data_input, models) [estimates, sample_sizes, variances] = \ mlmc_simulator.simulate(epsilon=1e-1, initial_sample_sizes=100) print 'Estimate: %s' % estimates print 'Sample sizes used: %s' % sample_sizes print 'Variance: %s' % variances
def test_fail_on_nan_data(bad_data_file): """ Ensure exceptions occur when bad data is provided. """ with pytest.raises(ValueError): InputFromData(bad_data_file)
def test_init_fails_on_invalid_input_file(): """ Ensure an exception occurs if a non-extant file is specified. """ with pytest.raises(IOError): InputFromData("not_a_real_file.txt")
import numpy as np import os from MLMCPy.mlmc import MLMCSimulator from MLMCPy.input import InputFromData from MLMCPy.model import ModelFromData my_path = os.path.dirname(os.path.abspath(__file__)) data_path = my_path + "/../../tests/testing_data" data_input = InputFromData(os.path.join(data_path, "spring_mass_1D_inputs.txt"), shuffle_data=False) input_filepath = os.path.join(data_path, "spring_mass_1D_inputs.txt") output1_filepath = os.path.join(data_path, "spring_mass_1D_outputs_1.0.txt") output2_filepath = os.path.join(data_path, "spring_mass_1D_outputs_0.1.txt") output3_filepath = os.path.join(data_path, "spring_mass_1D_outputs_0.01.txt") model1 = ModelFromData(input_filepath, output1_filepath, 1.) model2 = ModelFromData(input_filepath, output2_filepath, 4.) model3 = ModelFromData(input_filepath, output3_filepath, 16.) models_from_data = [model1, model2, model3] np.random.seed(1) initial_sample_size = 200 epsilon = 1. # Get output data for each layer. level_0_data = np.zeros(initial_sample_size) level_1_data = np.zeros(initial_sample_size)
def test_init_does_not_fail_on_valid_input_file(data_filename): """ Ensure no exceptions occur when instantiating InputFromData with valid files. """ InputFromData(data_filename)