def test_GIVEN_no_matching_variables_found_overall_WHEN_read_data_THEN_raises_Error(self): variables = ['test?.hdf', '*.nc'] file_vars = ['sample_file.hdf', 'aeronet.lev20'] filenames = 'filename1' get_data_func = MagicMock() get_var_func = MagicMock(return_value=file_vars) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) with self.assertRaises(ValueError): reader.read_data_list(filenames, variables)[0]
def test_GIVEN_no_matching_variables_for_wildcards_WHEN_read_data_THEN_no_Error(self): variables = ['aeronet.lev20', '*.nc', 'test?.hdf'] file_vars = ['aeronet.lev20', 'var2.hdf'] filenames = 'filename1' get_data_func = MagicMock(return_value=make_regular_2d_ungridded_data()) get_var_func = MagicMock(return_value=file_vars) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) reader.read_data_list(filenames, variables)[0] assert_that(reader._get_data_func.call_count, is_(1)) assert_that(reader._get_data_func.call_args_list[0][0][1], is_('aeronet.lev20'))
def test_GIVEN_no_matching_variables_found_overall_WHEN_read_data_THEN_raises_Error( self): variables = ['test?.hdf', '*.nc'] file_vars = ['sample_file.hdf', 'aeronet.lev20'] filenames = 'filename1' get_data_func = MagicMock() get_var_func = MagicMock(return_value=file_vars) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) with self.assertRaises(ValueError): reader.read_data_list(filenames, variables)[0]
def test_GIVEN_wildcards_WHEN_read_data_THEN_matching_variables_identified(self): variables = ['*.nc', 'test?.hdf'] file_vars = ['aeronet.lev20', 'var2.hdf', 'netcdf1.nc', 'netcdf3.nc', 'test.hdf', 'test1.hdf'] should_match = ['netcdf1.nc', 'netcdf3.nc', 'test1.hdf'] filenames = 'filename1' get_data_func = MagicMock(return_value=make_regular_2d_ungridded_data()) get_var_func = MagicMock(return_value=file_vars) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) reader.read_data_list(filenames, variables)[0] assert_that(reader._get_data_func.call_count, is_(len(should_match))) for i in range(len(should_match)): assert_that(reader._get_data_func.call_args_list[i][0][1], is_(should_match[i]))
def test_GIVEN_no_matching_variables_for_wildcards_WHEN_read_data_THEN_no_Error( self): variables = ['aeronet.lev20', '*.nc', 'test?.hdf'] file_vars = ['aeronet.lev20', 'var2.hdf'] filenames = 'filename1' get_data_func = MagicMock( return_value=make_regular_2d_ungridded_data()) get_var_func = MagicMock(return_value=file_vars) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) reader.read_data_list(filenames, variables)[0] assert_that(reader._get_data_func.call_count, is_(1)) assert_that(reader._get_data_func.call_args_list[0][0][1], is_('aeronet.lev20'))
def test_GIVEN_single_variable_WHEN_aggregate_THEN_DataWriter_called_correctly(self): variables = 'var_name' filenames = 'filename' output_file = 'output.hdf' kernel = 'mean' grid = None input_data = GriddedDataList([make_from_cube(make_square_5x3_2d_cube())]) output_data = make_from_cube(make_square_5x3_2d_cube() + 1) mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=input_data) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_aggregator = Aggregator(None, None) mock_aggregator.aggregate_gridded = MagicMock(return_value=output_data) # Return the modified data array aggregate = Aggregate(grid, output_file, data_reader=mock_data_reader, data_writer=mock_data_writer) aggregate._create_aggregator = MagicMock(return_value=mock_aggregator) aggregate.aggregate(variables, filenames, None, kernel) assert_that(mock_data_writer.write_data.call_count, is_(1)) written_data = mock_data_writer.write_data.call_args[0][0] written_filename = mock_data_writer.write_data.call_args[0][1] assert_that(written_data.data.tolist(), is_([[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13], [14, 15, 16]])) assert_that(written_filename, is_(output_file))
def test_GIVEN_single_variable_WHEN_subset_THEN_DataWriter_called_correctly(self): variable = 'var_name' filename = 'filename' xmin, xmax = -10, 10 ymin, ymax = 40, 60 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' def _mock_subset(data, constraint): data.data += 1 # Modify the data slightly so we can be sure it's passed in correctly return data mock_subsetter = Subsetter() mock_subsetter.subset = _mock_subset mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data()) mock_data_writer = DataWriter() mock_data_writer.write_data = MagicMock() subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variable, filename, product=None) assert_that(mock_data_writer.write_data.call_count, is_(1)) written_data = mock_data_writer.write_data.call_args[0][0] written_filename = mock_data_writer.write_data.call_args[0][1] assert_that(written_data.data_flattened.tolist(), is_([2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])) assert_that(written_filename, is_(output_file))
def test_GIVEN_single_variable_WHEN_subset_THEN_Subsetter_called_correctly(self): variable = 'var_name' filename = 'filename' xmin, xmax = -10, 10 ymin, ymax = 40, 60 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data()) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = MagicMock(side_effect=lambda *args: args[0]) # Return the data array unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variable, filename, product=None) assert_that(mock_subsetter.subset.call_count, is_(1)) called_data = mock_subsetter.subset.call_args[0][0] called_constraint = mock_subsetter.subset.call_args[0][1] assert_that(called_data.data_flattened.tolist(), is_([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])) assert_that(called_constraint, instance_of(UngriddedSubsetConstraint)) assert_that(called_constraint._limits['lat'][1:3], is_((ymin, ymax))) assert_that(called_constraint._limits['lon'][1:3], is_((xmin, xmax)))
def test_GIVEN_multiple_variables_WHEN_subset_THEN_DataWriter_called_correctly(self): variables = ['var_name1', 'var_name2'] filename = 'filename' xmin, xmax = 0, 5 ymin, ymax = -5, 5 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' def _mock_subset(data, constraint): # Modify the data slightly so we can be sure it's passed in correctly for var in data: var.data += 1 return data mock_subsetter = Subsetter() mock_subsetter.subset = _mock_subset mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList([make_square_5x3_2d_cube(), make_square_5x3_2d_cube()])) mock_data_writer = DataWriter() mock_data_writer.write_data = MagicMock() subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variables, filename, product=None) assert_that(mock_data_writer.write_data.call_count, is_(1)) written_data = mock_data_writer.write_data.call_args[0][0] written_filename = mock_data_writer.write_data.call_args[0][1] assert_that(written_data[0].data.tolist(), is_([[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13], [14, 15, 16]])) assert_that(written_data[0].data.tolist(), written_data[1].data.tolist()) assert_that(written_filename, is_(output_file))
def test_GIVEN_multiple_variables_WHEN_subset_THEN_Subsetter_called_correctly(self): variables = ['var_name1', 'var_name2'] filename = 'filename' xmin, xmax = 0, 5 ymin, ymax = -5, 5 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList(2 * [make_square_5x3_2d_cube()])) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = MagicMock(side_effect=lambda *args: args[0]) # Return the data list unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variables, filename, product=None) assert_that(mock_subsetter.subset.call_count, is_(1)) called_data = mock_subsetter.subset.call_args[0][0] called_constraint = mock_subsetter.subset.call_args[0][1] assert_that(called_data[0].data.tolist(), is_([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]])) assert_that(called_data[1].data.tolist(), is_(called_data[0].data.tolist())) assert_that(called_constraint, instance_of(GriddedSubsetConstraint)) assert_that(called_constraint._limits['latitude'][1:3], is_((ymin, ymax))) assert_that(called_constraint._limits['longitude'][1:3], is_((xmin, xmax)))
def test_GIVEN_multiple_variable_gridded_WHEN_read_data_THEN_GriddedDataList_returned( self): variables = ['var1', 'var2'] filenames = 'filename1' product = None gridded_data = make_square_5x3_2d_cube() gridded_data.__class__ = GriddedData get_data_func = MagicMock(return_value=gridded_data) reader = DataReader(get_data_func=get_data_func) data = reader.read_data_list(filenames, variables, product) # Check the data read function is called correctly assert_that(get_data_func.call_count, is_(2)) first_call_args = get_data_func.call_args_list[0][0] second_call_args = get_data_func.call_args_list[1][0] assert_that(first_call_args[0], is_([filenames])) assert_that(first_call_args[1], is_(variables[0])) assert_that(second_call_args[1], is_(variables[1])) assert_that(first_call_args[2], is_(product)) # Check the data we got back is as expected assert_that(data, instance_of(GriddedDataList)) assert_that(data[0].data.tolist(), is_(make_square_5x3_2d_cube().data.tolist())) assert_that(data[1].data.tolist(), is_(data[0].data.tolist()))
def col_cmd(main_arguments): """ Main routine for handling calls to the collocate ('col') command. :param main_arguments: The command line arguments (minus the col command) """ from cis.exceptions import ClassNotFoundError, CISError from cis.collocation.col import Collocate output_file = main_arguments.output data_reader = DataReader() missing_data_for_missing_samples = False if main_arguments.samplevariable is not None: sample_data = data_reader.read_data_list(main_arguments.samplefiles, main_arguments.samplevariable, main_arguments.sampleproduct)[0] else: sample_data = data_reader.read_coordinates(main_arguments.samplefiles, main_arguments.sampleproduct) missing_data_for_missing_samples = True try: col = Collocate(sample_data, missing_data_for_missing_samples) except IOError as e: __error_occurred("There was an error reading one of the files: \n" + str(e)) col_name = main_arguments.samplegroup['collocator'][0] if main_arguments.samplegroup[ 'collocator'] is not None else None col_options = main_arguments.samplegroup['collocator'][1] if main_arguments.samplegroup[ 'collocator'] is not None else {} kern_name = main_arguments.samplegroup['kernel'][0] if main_arguments.samplegroup['kernel'] is not None else None kern_options = main_arguments.samplegroup['kernel'][1] if main_arguments.samplegroup['kernel'] is not None else None for input_group in main_arguments.datagroups: variables = input_group['variables'] filenames = input_group['filenames'] product = input_group["product"] if input_group["product"] is not None else None data = data_reader.read_data_list(filenames, variables, product) data_writer = DataWriter() try: output = col.collocate(data, col_name, col_options, kern_name, kern_options) data_writer.write_data(output, output_file) except ClassNotFoundError as e: __error_occurred(str(e) + "\nInvalid collocation option.") except (CISError, IOError) as e: __error_occurred(e)
def test_GIVEN_wildcards_WHEN_read_data_THEN_matching_variables_identified( self): variables = ['*.nc', 'test?.hdf'] file_vars = [ 'aeronet.lev20', 'var2.hdf', 'netcdf1.nc', 'netcdf3.nc', 'test.hdf', 'test1.hdf' ] should_match = ['netcdf1.nc', 'netcdf3.nc', 'test1.hdf'] filenames = 'filename1' get_data_func = MagicMock( return_value=make_regular_2d_ungridded_data()) get_var_func = MagicMock(return_value=file_vars) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) reader.read_data_list(filenames, variables)[0] assert_that(reader._get_data_func.call_count, is_(len(should_match))) for i in range(len(should_match)): assert_that(reader._get_data_func.call_args_list[i][0][1], is_(should_match[i]))
def test_GIVEN_multiple_variable_mix_of_gridded_ungridded_WHEN_read_data_THEN_raises_TypeError(self): variables = ['var1', 'var2'] filenames = 'filename1' product = None gridded_data = make_square_5x3_2d_cube() gridded_data.__class__ = GriddedData ungridded_data = make_regular_2d_ungridded_data() get_data_func = MagicMock(side_effect=[gridded_data, ungridded_data]) reader = DataReader(get_data_func=get_data_func) with self.assertRaises(TypeError): data = reader.read_data_list(filenames, variables, product)[0]
def test_GIVEN_multiple_variable_mix_of_gridded_ungridded_WHEN_read_data_THEN_raises_TypeError( self): variables = ['var1', 'var2'] filenames = 'filename1' product = None gridded_data = make_square_5x3_2d_cube() gridded_data.__class__ = GriddedData ungridded_data = make_regular_2d_ungridded_data() get_data_func = MagicMock(side_effect=[gridded_data, ungridded_data]) reader = DataReader(get_data_func=get_data_func) with self.assertRaises(TypeError): data = reader.read_data_list(filenames, variables, product)[0]
def test_GIVEN_single_variable_ungridded_WHEN_read_data_THEN_GriddedData_returned(self): variables = 'var1' filenames = 'filename1' product = None get_data_func = MagicMock(return_value=make_regular_2d_ungridded_data()) reader = DataReader(get_data_func=get_data_func) data = reader.read_data_list(filenames, variables, product)[0] # Check the data read function is called correctly assert_that(get_data_func.call_count, is_(1)) call_args = get_data_func.call_args_list[0][0] assert_that(call_args[0], is_([filenames])) assert_that(call_args[1], is_(variables)) assert_that(call_args[2], is_(product)) # Check the data we got back is as expected assert_that(data, instance_of(UngriddedData)) assert_that(data.data.tolist(), is_(make_regular_2d_ungridded_data().data.tolist()))
def test_GIVEN_single_variable_ungridded_WHEN_read_data_THEN_GriddedData_returned( self): variables = 'var1' filenames = 'filename1' product = None get_data_func = MagicMock( return_value=make_regular_2d_ungridded_data()) reader = DataReader(get_data_func=get_data_func) data = reader.read_data_list(filenames, variables, product)[0] # Check the data read function is called correctly assert_that(get_data_func.call_count, is_(1)) call_args = get_data_func.call_args_list[0][0] assert_that(call_args[0], is_([filenames])) assert_that(call_args[1], is_(variables)) assert_that(call_args[2], is_(product)) # Check the data we got back is as expected assert_that(data, instance_of(UngriddedData)) assert_that(data.data.tolist(), is_(make_regular_2d_ungridded_data().data.tolist()))
def test_GIVEN_multiple_variables_and_filenames_WHEN_subset_THEN_DataReader_called_correctly(self): variables = ['var_name1', 'var_name2'] filenames = ['filename1', 'filename2'] output_file = 'output.hdf' xmin, xmax = 0, 5 ymin, ymax = -5, 5 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList(2 * [make_square_5x3_2d_cube()])) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = lambda *args: args[0] # Return the data array unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variables, filenames, product=None) assert_that(mock_data_reader.read_data_list.call_count, is_(1)) assert_that(mock_data_reader.read_data_list.call_args[0][0], filenames) assert_that(mock_data_reader.read_data_list.call_args[0][1], variables)
def test_GIVEN_multiple_variables_and_filenames_WHEN_aggregate_THEN_Aggregate_called_correctly(self): variables = ['var_name1', 'var_name2'] filenames = ['filename1', 'filename2'] output_file = 'output.hdf' kernel = 'mean' grid = 'grid' input_data = GriddedDataList(2 * [make_from_cube(make_square_5x3_2d_cube())]) output_data = input_data mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=input_data) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_aggregator = Aggregator(None, None) mock_aggregator.aggregate_gridded = MagicMock(return_value=output_data) # Return the data array unmodified aggregate = Aggregate(grid, output_file, data_reader=mock_data_reader, data_writer=mock_data_writer) aggregate._create_aggregator = MagicMock(return_value=mock_aggregator) aggregate.aggregate(variables, filenames, None, kernel) assert_that(mock_aggregator.aggregate_gridded.call_count, is_(1)) assert_that(mock_aggregator.aggregate_gridded.call_args[0][0], kernel)
def test_GIVEN_single_variable_WHEN_subset_THEN_DataReader_called_correctly(self): variable = 'var_name' filename = 'filename' output_file = 'output.hdf' xmin, xmax = -10, 10 ymin, ymax = 40, 60 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data()) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = lambda *args: args[0] # Return the data array unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variable, filename, product=None) assert_that(mock_data_reader.read_data_list.call_count, is_(1)) assert_that(mock_data_reader.read_data_list.call_args[0][0], filename) assert_that(mock_data_reader.read_data_list.call_args[0][1], variable)
def test_GIVEN_multiple_variable_gridded_WHEN_read_data_THEN_GriddedDataList_returned(self): variables = ['var1', 'var2'] filenames = 'filename1' product = None gridded_data = make_square_5x3_2d_cube() gridded_data.__class__ = GriddedData get_data_func = MagicMock(return_value=gridded_data) reader = DataReader(get_data_func=get_data_func) data = reader.read_data_list(filenames, variables, product) # Check the data read function is called correctly assert_that(get_data_func.call_count, is_(2)) first_call_args = get_data_func.call_args_list[0][0] second_call_args = get_data_func.call_args_list[1][0] assert_that(first_call_args[0], is_([filenames])) assert_that(first_call_args[1], is_(variables[0])) assert_that(second_call_args[1], is_(variables[1])) assert_that(first_call_args[2], is_(product)) # Check the data we got back is as expected assert_that(data, instance_of(GriddedDataList)) assert_that(data[0].data.tolist(), is_(make_square_5x3_2d_cube().data.tolist())) assert_that(data[1].data.tolist(), is_(data[0].data.tolist()))