Ejemplo n.º 1
0
def aggregate_cmd(main_arguments):
    """
    Main routine for handling calls to the aggregation command.

    :param main_arguments: The command line arguments (minus the aggregate command)
    """
    import cis.exceptions as ex
    from cis.data_io.gridded_data import GriddedDataList

    if len(main_arguments.datagroups) > 1:
        __error_occurred("Aggregation can only be performed on one data group")
    input_group = main_arguments.datagroups[0]

    data = DataReader().read_single_datagroup(input_group)

    if isinstance(data, GriddedDataList):
        logging.warning(
            "The aggregate command is deprecated for GriddedData and will not be supported in future "
            "versions of CIS. Please use 'collapse' instead.")
        if any(v is not None for v in main_arguments.grid.values()):
            raise ex.InvalidCommandLineOptionError(
                "Grid specifications are not supported for Gridded aggregation."
            )
        output = data.collapsed(list(main_arguments.grid.keys()),
                                how=input_group.get("kernel", ''))
    else:
        output = data.aggregate(how=input_group.get("kernel", ''),
                                **main_arguments.grid)

    output.save_data(main_arguments.output)
Ejemplo n.º 2
0
    def test_GIVEN_multiple_variable_gridded_WHEN_read_data_THEN_GriddedDataList_returned(
            self):
        variables = ['var1', 'var2']
        filenames = 'filename1'
        product = None
        gridded_data = make_square_5x3_2d_cube()
        gridded_data.__class__ = GriddedData
        get_data_func = MagicMock(return_value=gridded_data)
        reader = DataReader(get_data_func=get_data_func)
        data = reader.read_data_list(filenames, variables, product)

        # Check the data read function is called correctly
        assert_that(get_data_func.call_count, is_(2))
        first_call_args = get_data_func.call_args_list[0][0]
        second_call_args = get_data_func.call_args_list[1][0]
        assert_that(first_call_args[0], is_([filenames]))
        assert_that(first_call_args[1], is_(variables[0]))
        assert_that(second_call_args[1], is_(variables[1]))
        assert_that(first_call_args[2], is_(product))

        # Check the data we got back is as expected
        assert_that(data, instance_of(GriddedDataList))
        assert_that(data[0].data.tolist(),
                    is_(make_square_5x3_2d_cube().data.tolist()))
        assert_that(data[1].data.tolist(), is_(data[0].data.tolist()))
Ejemplo n.º 3
0
def col_cmd(main_arguments):
    """
    Main routine for handling calls to the collocate ('col') command.

    :param main_arguments:    The command line arguments (minus the col command)
    """
    from cis.collocation.col_framework import get_kernel
    from cis.parse import check_boolean

    # Read the sample data
    missing_data_for_missing_sample = False
    if main_arguments.samplevariable is not None:
        sample_data = DataReader().read_data_list(main_arguments.samplefiles, main_arguments.samplevariable,
                                                  main_arguments.sampleproduct)[0]
        missing_data_for_missing_sample = True
    else:
        sample_data = DataReader().read_coordinates(main_arguments.samplefiles, main_arguments.sampleproduct)

    # Unpack the sample options
    col_name, col_options = main_arguments.samplegroup.get('collocator', ('', {}))
    kern_name, kern_options = main_arguments.samplegroup.get('kernel', ('', {}))

    missing_data_for_missing_sample = check_boolean(col_options.pop('missing_data_for_missing_sample',
                                                                    str(missing_data_for_missing_sample)), logging)

    kernel = get_kernel(kern_name)(**kern_options) if kern_name else None

    for input_group in main_arguments.datagroups:
        # Then collocate each datagroup
        data = DataReader().read_single_datagroup(input_group)
        output = data.collocated_onto(sample_data, how=col_name, kernel=kernel,
                                      missing_data_for_missing_sample=missing_data_for_missing_sample, **col_options)
        output.save_data(main_arguments.output)
Ejemplo n.º 4
0
def stats_cmd(main_arguments):
    """
    Main routine for handling calls to the statistics command.

    :param main_arguments: The command line arguments (minus the stats command)
    """
    from cis.stats import StatsAnalyzer
    from cis.data_io.gridded_data import GriddedDataList
    data_reader = DataReader()
    data_list = data_reader.read_datagroups(main_arguments.datagroups)
    analyzer = StatsAnalyzer(*data_list)
    results = analyzer.analyze()
    header = "RESULTS OF STATISTICAL COMPARISON:"
    note = "Compared all points which have non-missing values in both variables"
    header_length = max(len(header), len(note))
    print(header_length * '=')
    print(header)
    print(header_length * '-')
    print(note)
    print(header_length * '=')
    for result in results:
        print(result.pprint())
    if main_arguments.output:
        cubes = GriddedDataList([result.as_cube() for result in results])
        variables = []
        filenames = []
        for datagroup in main_arguments.datagroups:
            variables.extend(datagroup['variables'])
            filenames.extend(datagroup['filenames'])
        history = "Statistical comparison performed using CIS version " + __version__ + \
                  "\n variables: " + str(variables) + \
                  "\n from files: " + str(set(filenames))
        cubes.add_history(history)
        cubes.save_data(main_arguments.output)
Ejemplo n.º 5
0
 def test_GIVEN_multiple_datagroups_WHEN_read_datagroups_THEN_get_data_called_correctly(
         self):
     datagroup_1 = {
         'variables': ['var1', 'var2'],
         'filenames': ['filename1.nc'],
         'product': None
     }
     datagroup_2 = {
         'variables': ['var3', 'var4'],
         'filenames': ['filename2.nc'],
         'product': 'cis'
     }
     get_data_func = MagicMock(
         return_value=make_regular_2d_ungridded_data())
     get_var_func = MagicMock(side_effect=lambda f: {
         'filename1.nc': ['var1', 'var2'],
         'filename2.nc': ['var3', 'var4']
     }[f])
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     data = reader.read_datagroups([datagroup_1, datagroup_2])
     assert_that(get_data_func.call_count, is_(4))
     assert_that(get_data_func.call_args_list[0][0],
                 is_((['filename1.nc'], 'var1', None)))
     assert_that(get_data_func.call_args_list[1][0],
                 is_((['filename1.nc'], 'var2', None)))
     assert_that(get_data_func.call_args_list[2][0],
                 is_((['filename2.nc'], 'var3', 'cis')))
     assert_that(get_data_func.call_args_list[3][0],
                 is_((['filename2.nc'], 'var4', 'cis')))
Ejemplo n.º 6
0
def stats_cmd(main_arguments):
    """
    Main routine for handling calls to the statistics command.

    :param main_arguments: The command line arguments (minus the stats command)
    """
    from cis.stats import StatsAnalyzer
    from cis.data_io.gridded_data import GriddedDataList
    data_reader = DataReader()
    data_list = data_reader.read_datagroups(main_arguments.datagroups)
    analyzer = StatsAnalyzer(*data_list)
    results = analyzer.analyze()
    header = "RESULTS OF STATISTICAL COMPARISON:"
    note = "Compared all points which have non-missing values in both variables"
    header_length = max(len(header), len(note))
    print(header_length * '=')
    print(header)
    print(header_length * '-')
    print(note)
    print(header_length * '=')
    for result in results:
        print(result.pprint())
    if main_arguments.output:
        cubes = GriddedDataList([result.as_cube() for result in results])
        variables = []
        filenames = []
        for datagroup in main_arguments.datagroups:
            variables.extend(datagroup['variables'])
            filenames.extend(datagroup['filenames'])
        history = "Statistical comparison performed using CIS version " + __version__ + \
                  "\n variables: " + str(variables) + \
                  "\n from files: " + str(set(filenames))
        cubes.add_history(history)
        cubes.save_data(main_arguments.output)
Ejemplo n.º 7
0
 def test_GIVEN_gridded_datagroups_WHEN_read_datagroups_THEN_data_returned_in_list(
         self):
     datagroup_1 = {
         'variables': ['var1', 'var2'],
         'filenames': ['filename1.nc'],
         'product': None
     }
     datagroup_2 = {
         'variables': ['var3'],
         'filenames': ['filename2.nc'],
         'product': 'cis'
     }
     var1 = make_from_cube(make_square_5x3_2d_cube())
     var2 = make_from_cube(make_square_5x3_2d_cube())
     var3 = make_from_cube(make_square_5x3_2d_cube())
     get_data_func = MagicMock(side_effect=[var1, var2, var3])
     get_var_func = MagicMock(side_effect=lambda f: {
         'filename1.nc': ['var1', 'var2'],
         'filename2.nc': ['var3']
     }[f])
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     data = reader.read_datagroups([datagroup_1, datagroup_2])
     assert_that(len(data), is_(3))
     assert_that(data[0], is_(var1))
     assert_that(data[1], is_(var2))
     assert_that(data[2], is_(var3))
Ejemplo n.º 8
0
    def test_GIVEN_single_variable_WHEN_aggregate_THEN_DataWriter_called_correctly(self):
        variables = 'var_name'
        filenames = 'filename'
        output_file = 'output.hdf'
        kernel = 'mean'
        grid = None
        input_data = GriddedDataList([make_from_cube(make_square_5x3_2d_cube())])
        output_data = make_from_cube(make_square_5x3_2d_cube() + 1)

        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=input_data)
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = Mock()
        mock_aggregator = Aggregator(None, None)
        mock_aggregator.aggregate_gridded = MagicMock(return_value=output_data)  # Return the modified data array

        aggregate = Aggregate(grid, output_file, data_reader=mock_data_reader, data_writer=mock_data_writer)
        aggregate._create_aggregator = MagicMock(return_value=mock_aggregator)
        aggregate.aggregate(variables, filenames, None, kernel)

        assert_that(mock_data_writer.write_data.call_count, is_(1))
        written_data = mock_data_writer.write_data.call_args[0][0]
        written_filename = mock_data_writer.write_data.call_args[0][1]
        assert_that(written_data.data.tolist(), is_([[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13], [14, 15, 16]]))
        assert_that(written_filename, is_(output_file))
Ejemplo n.º 9
0
    def test_GIVEN_single_variable_WHEN_subset_THEN_DataWriter_called_correctly(self):
        variable = 'var_name'
        filename = 'filename'
        xmin, xmax = -10, 10
        ymin, ymax = 40, 60
        limits = {'x': SubsetLimits(xmin, xmax, False),
                  'y': SubsetLimits(ymin, ymax, False)}
        output_file = 'output.hdf'

        def _mock_subset(data, constraint):
            data.data += 1  # Modify the data slightly so we can be sure it's passed in correctly
            return data

        mock_subsetter = Subsetter()
        mock_subsetter.subset = _mock_subset
        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data())
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = MagicMock()

        subset = Subset(limits, output_file, subsetter=mock_subsetter,
                        data_reader=mock_data_reader, data_writer=mock_data_writer)
        subset.subset(variable, filename, product=None)

        assert_that(mock_data_writer.write_data.call_count, is_(1))
        written_data = mock_data_writer.write_data.call_args[0][0]
        written_filename = mock_data_writer.write_data.call_args[0][1]
        assert_that(written_data.data_flattened.tolist(), is_([2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]))
        assert_that(written_filename, is_(output_file))
Ejemplo n.º 10
0
    def test_GIVEN_single_variable_WHEN_subset_THEN_Subsetter_called_correctly(self):
        variable = 'var_name'
        filename = 'filename'
        xmin, xmax = -10, 10
        ymin, ymax = 40, 60
        limits = {'x': SubsetLimits(xmin, xmax, False),
                  'y': SubsetLimits(ymin, ymax, False)}
        output_file = 'output.hdf'

        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data())
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = Mock()
        mock_subsetter = Subsetter()
        mock_subsetter.subset = MagicMock(side_effect=lambda *args: args[0])  # Return the data array unmodified

        subset = Subset(limits, output_file, subsetter=mock_subsetter,
                        data_reader=mock_data_reader, data_writer=mock_data_writer)
        subset.subset(variable, filename, product=None)
        assert_that(mock_subsetter.subset.call_count, is_(1))
        called_data = mock_subsetter.subset.call_args[0][0]
        called_constraint = mock_subsetter.subset.call_args[0][1]
        assert_that(called_data.data_flattened.tolist(),
                    is_([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]))
        assert_that(called_constraint, instance_of(UngriddedSubsetConstraint))
        assert_that(called_constraint._limits['lat'][1:3], is_((ymin, ymax)))
        assert_that(called_constraint._limits['lon'][1:3], is_((xmin, xmax)))
Ejemplo n.º 11
0
    def test_GIVEN_multiple_variables_WHEN_subset_THEN_DataWriter_called_correctly(self):
        variables = ['var_name1', 'var_name2']
        filename = 'filename'
        xmin, xmax = 0, 5
        ymin, ymax = -5, 5
        limits = {'x': SubsetLimits(xmin, xmax, False),
                  'y': SubsetLimits(ymin, ymax, False)}
        output_file = 'output.hdf'

        def _mock_subset(data, constraint):
            # Modify the data slightly so we can be sure it's passed in correctly
            for var in data:
                var.data += 1
            return data

        mock_subsetter = Subsetter()
        mock_subsetter.subset = _mock_subset
        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList([make_square_5x3_2d_cube(),
                                                                                  make_square_5x3_2d_cube()]))
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = MagicMock()

        subset = Subset(limits, output_file, subsetter=mock_subsetter,
                        data_reader=mock_data_reader, data_writer=mock_data_writer)
        subset.subset(variables, filename, product=None)

        assert_that(mock_data_writer.write_data.call_count, is_(1))
        written_data = mock_data_writer.write_data.call_args[0][0]
        written_filename = mock_data_writer.write_data.call_args[0][1]
        assert_that(written_data[0].data.tolist(), is_([[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13], [14, 15, 16]]))
        assert_that(written_data[0].data.tolist(), written_data[1].data.tolist())
        assert_that(written_filename, is_(output_file))
Ejemplo n.º 12
0
    def test_GIVEN_multiple_variables_WHEN_subset_THEN_Subsetter_called_correctly(self):
        variables = ['var_name1', 'var_name2']
        filename = 'filename'
        xmin, xmax = 0, 5
        ymin, ymax = -5, 5
        limits = {'x': SubsetLimits(xmin, xmax, False),
                  'y': SubsetLimits(ymin, ymax, False)}
        output_file = 'output.hdf'

        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList(2 * [make_square_5x3_2d_cube()]))
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = Mock()
        mock_subsetter = Subsetter()
        mock_subsetter.subset = MagicMock(side_effect=lambda *args: args[0])  # Return the data list unmodified

        subset = Subset(limits, output_file, subsetter=mock_subsetter,
                        data_reader=mock_data_reader, data_writer=mock_data_writer)
        subset.subset(variables, filename, product=None)
        assert_that(mock_subsetter.subset.call_count, is_(1))
        called_data = mock_subsetter.subset.call_args[0][0]
        called_constraint = mock_subsetter.subset.call_args[0][1]
        assert_that(called_data[0].data.tolist(), is_([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]]))
        assert_that(called_data[1].data.tolist(), is_(called_data[0].data.tolist()))
        assert_that(called_constraint, instance_of(GriddedSubsetConstraint))
        assert_that(called_constraint._limits['latitude'][1:3], is_((ymin, ymax)))
        assert_that(called_constraint._limits['longitude'][1:3], is_((xmin, xmax)))
Ejemplo n.º 13
0
def aggregate_cmd(main_arguments):
    """
    Main routine for handling calls to the aggregation command.

    :param main_arguments: The command line arguments (minus the aggregate command)
    """
    import cis.exceptions as ex
    from cis.data_io.gridded_data import GriddedDataList

    if len(main_arguments.datagroups) > 1:
        __error_occurred("Aggregation can only be performed on one data group")
    input_group = main_arguments.datagroups[0]

    data = DataReader().read_single_datagroup(input_group)

    if isinstance(data, GriddedDataList):
        logging.warning("The aggregate command is deprecated for GriddedData and will not be supported in future "
                        "versions of CIS. Please use 'collapse' instead.")
        if any(v is not None for v in main_arguments.grid.values()):
            raise ex.InvalidCommandLineOptionError("Grid specifications are not supported for Gridded aggregation.")
        output = data.collapsed(list(main_arguments.grid.keys()), how=input_group.get("kernel", ''))
    else:
        output = data.aggregate(how=input_group.get("kernel", ''), **main_arguments.grid)

    output.save_data(main_arguments.output)
Ejemplo n.º 14
0
 def test_GIVEN_no_matching_variables_found_overall_WHEN_read_data_THEN_raises_Error(self):
     variables = ['test?.hdf', '*.nc']
     file_vars = ['sample_file.hdf', 'aeronet.lev20']
     filenames = 'filename1'
     get_data_func = MagicMock()
     get_var_func = MagicMock(return_value=file_vars)
     reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func)
     with self.assertRaises(ValueError):
         reader.read_data_list(filenames, variables)[0]
Ejemplo n.º 15
0
 def test_GIVEN_no_matching_variables_for_wildcards_WHEN_read_data_THEN_no_Error(self):
     variables = ['aeronet.lev20', '*.nc', 'test?.hdf']
     file_vars = ['aeronet.lev20', 'var2.hdf']
     filenames = 'filename1'
     get_data_func = MagicMock(return_value=make_regular_2d_ungridded_data())
     get_var_func = MagicMock(return_value=file_vars)
     reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func)
     reader.read_data_list(filenames, variables)[0]
     assert_that(reader._get_data_func.call_count, is_(1))
     assert_that(reader._get_data_func.call_args_list[0][0][1], is_('aeronet.lev20'))
Ejemplo n.º 16
0
 def test_GIVEN_aliases_missing_WHEN_read_datagroups_THEN_read_OK_aliases_default_to_var_names(self):
     datagroup = {'variables': ['var1'],
                  'filenames': ['filename1.nc'],
                  'product': None}
     var1 = make_from_cube(make_square_5x3_2d_cube())
     get_data_func = MagicMock(side_effect=[var1])
     get_var_func = MagicMock(side_effect=['var1'])
     reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func)
     data = reader.read_datagroups([datagroup])
     assert_that(data[0].var_name, is_('dummy'))
Ejemplo n.º 17
0
 def test_GIVEN_aliases_WHEN_read_datagroups_THEN_output_data_has_aliases(self):
     datagroup = {'variables': ['var1'],
                  'filenames': ['filename1.nc'],
                  'product': None,
                  'aliases': ['alias1']}
     get_data_func = MagicMock(return_value=make_from_cube(make_square_5x3_2d_cube()))
     get_var_func = MagicMock(return_value=['var1'])
     reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func)
     data = reader.read_datagroups([datagroup])
     assert_that(data[0].alias, is_('alias1'))
Ejemplo n.º 18
0
 def test_GIVEN_no_matching_variables_found_overall_WHEN_read_data_THEN_raises_Error(
         self):
     variables = ['test?.hdf', '*.nc']
     file_vars = ['sample_file.hdf', 'aeronet.lev20']
     filenames = 'filename1'
     get_data_func = MagicMock()
     get_var_func = MagicMock(return_value=file_vars)
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     with self.assertRaises(ValueError):
         reader.read_data_list(filenames, variables)[0]
Ejemplo n.º 19
0
 def test_GIVEN_multiple_variable_mix_of_gridded_ungridded_WHEN_read_data_THEN_raises_TypeError(self):
     variables = ['var1', 'var2']
     filenames = 'filename1'
     product = None
     gridded_data = make_square_5x3_2d_cube()
     gridded_data.__class__ = GriddedData
     ungridded_data = make_regular_2d_ungridded_data()
     get_data_func = MagicMock(side_effect=[gridded_data, ungridded_data])
     reader = DataReader(get_data_func=get_data_func)
     with self.assertRaises(TypeError):
         data = reader.read_data_list(filenames, variables, product)[0]
Ejemplo n.º 20
0
 def test_GIVEN_wildcards_WHEN_read_data_THEN_matching_variables_identified(self):
     variables = ['*.nc', 'test?.hdf']
     file_vars = ['aeronet.lev20', 'var2.hdf', 'netcdf1.nc', 'netcdf3.nc', 'test.hdf', 'test1.hdf']
     should_match = ['netcdf1.nc', 'netcdf3.nc', 'test1.hdf']
     filenames = 'filename1'
     get_data_func = MagicMock(return_value=make_regular_2d_ungridded_data())
     get_var_func = MagicMock(return_value=file_vars)
     reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func)
     reader.read_data_list(filenames, variables)[0]
     assert_that(reader._get_data_func.call_count, is_(len(should_match)))
     for i in range(len(should_match)):
         assert_that(reader._get_data_func.call_args_list[i][0][1], is_(should_match[i]))
Ejemplo n.º 21
0
 def test_GIVEN_multiple_variable_mix_of_gridded_ungridded_WHEN_read_data_THEN_raises_TypeError(
         self):
     variables = ['var1', 'var2']
     filenames = 'filename1'
     product = None
     gridded_data = make_square_5x3_2d_cube()
     gridded_data.__class__ = GriddedData
     ungridded_data = make_regular_2d_ungridded_data()
     get_data_func = MagicMock(side_effect=[gridded_data, ungridded_data])
     reader = DataReader(get_data_func=get_data_func)
     with self.assertRaises(TypeError):
         data = reader.read_data_list(filenames, variables, product)[0]
Ejemplo n.º 22
0
 def test_GIVEN_not_enough_aliases_WHEN_read_datagroups_THEN_raises_ValueError(self):
     datagroup = {'variables': ['var1', 'var2'],
                  'filenames': ['filename1.nc'],
                  'product': None,
                  'aliases': ['alias1']}
     var1 = make_from_cube(make_square_5x3_2d_cube())
     var2 = make_from_cube(make_square_5x3_2d_cube())
     get_data_func = MagicMock(side_effect=[var1, var2])
     get_var_func = MagicMock(side_effect=['var1', 'var2'])
     reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func)
     with self.assertRaises(ValueError):
         data = reader.read_datagroups([datagroup])
Ejemplo n.º 23
0
def evaluate_cmd(main_arguments):
    """
    Main routine for handling calls to the evaluation command

    :param main_arguments: The command line arguments (minus the eval command)
    """
    from cis.evaluate import Calculator
    data_reader = DataReader()
    data_list = data_reader.read_datagroups(main_arguments.datagroups)
    calculator = Calculator()
    result = calculator.evaluate(data_list, main_arguments.expr, main_arguments.output_var,
                                 main_arguments.units, main_arguments.attributes)
    result.save_data(main_arguments.output)
Ejemplo n.º 24
0
 def test_GIVEN_aliases_missing_WHEN_read_datagroups_THEN_read_OK_aliases_default_to_var_names(
         self):
     datagroup = {
         'variables': ['var1'],
         'filenames': ['filename1.nc'],
         'product': None
     }
     var1 = make_from_cube(make_square_5x3_2d_cube())
     get_data_func = MagicMock(side_effect=[var1])
     get_var_func = MagicMock(side_effect=['var1'])
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     data = reader.read_datagroups([datagroup])
     assert_that(data[0].var_name, is_('dummy'))
Ejemplo n.º 25
0
 def test_GIVEN_no_matching_variables_for_wildcards_WHEN_read_data_THEN_no_Error(
         self):
     variables = ['aeronet.lev20', '*.nc', 'test?.hdf']
     file_vars = ['aeronet.lev20', 'var2.hdf']
     filenames = 'filename1'
     get_data_func = MagicMock(
         return_value=make_regular_2d_ungridded_data())
     get_var_func = MagicMock(return_value=file_vars)
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     reader.read_data_list(filenames, variables)[0]
     assert_that(reader._get_data_func.call_count, is_(1))
     assert_that(reader._get_data_func.call_args_list[0][0][1],
                 is_('aeronet.lev20'))
Ejemplo n.º 26
0
def evaluate_cmd(main_arguments):
    """
    Main routine for handling calls to the evaluation command

    :param main_arguments: The command line arguments (minus the eval command)
    """
    from cis.evaluate import Calculator
    data_reader = DataReader()
    data_list = data_reader.read_datagroups(main_arguments.datagroups)
    calculator = Calculator()
    result = calculator.evaluate(data_list, main_arguments.expr,
                                 main_arguments.output_var,
                                 main_arguments.units,
                                 main_arguments.attributes)
    result.save_data(main_arguments.output)
Ejemplo n.º 27
0
 def test_GIVEN_gridded_and_ungridded_datagroups_WHEN_read_datagroups_THEN_raises_TypeError(self):
     datagroup_1 = {'variables': ['var1'],
                    'filenames': ['filename1.nc'],
                    'product': None}
     datagroup_2 = {'variables': ['var3'],
                    'filenames': ['filename2.nc'],
                    'product': 'cis'}
     var1 = make_from_cube(make_square_5x3_2d_cube())
     var2 = make_regular_2d_ungridded_data()
     get_data_func = MagicMock(side_effect=[var1, var2])
     get_var_func = MagicMock(side_effect=lambda f: {'filename1.nc': ['var1'],
                                                     'filename2.nc': ['var3']}[f])
     reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func)
     with self.assertRaises(TypeError):
         reader.read_datagroups([datagroup_1, datagroup_2])
Ejemplo n.º 28
0
 def test_GIVEN_aliases_WHEN_read_datagroups_THEN_output_data_has_aliases(
         self):
     datagroup = {
         'variables': ['var1'],
         'filenames': ['filename1.nc'],
         'product': None,
         'aliases': ['alias1']
     }
     get_data_func = MagicMock(
         return_value=make_from_cube(make_square_5x3_2d_cube()))
     get_var_func = MagicMock(return_value=['var1'])
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     data = reader.read_datagroups([datagroup])
     assert_that(data[0].alias, is_('alias1'))
Ejemplo n.º 29
0
def plot_cmd(main_arguments):
    """
    Main routine for handling calls to the 'plot' command.
    Reads in the data files specified and passes the rest of the arguments to the plot function.

    :param main_arguments:    The command line arguments
    """
    from cis.plotting.formatted_plot import Plotter
    from cis.data_io.data_reader import DataReader

    data = DataReader().read_datagroups(main_arguments.datagroups)

    # We have to pop off the arguments which plot isn't expecting so that it treats everything else as an mpl kwarg
    main_arguments = vars(main_arguments)
    _ = main_arguments.pop('command')
    _ = main_arguments.pop("quiet")
    _ = main_arguments.pop("verbose")
    _ = main_arguments.pop("force_overwrite")
    _ = main_arguments.pop("output_var", None)

    layer_opts = [{
        k: v
        for k, v in d.items()
        if k not in ['variables', 'filenames', 'product']
    } for d in main_arguments.pop('datagroups')]
    Plotter(data, layer_opts=layer_opts, **main_arguments)
Ejemplo n.º 30
0
 def test_GIVEN_not_enough_aliases_WHEN_read_datagroups_THEN_raises_ValueError(
         self):
     datagroup = {
         'variables': ['var1', 'var2'],
         'filenames': ['filename1.nc'],
         'product': None,
         'aliases': ['alias1']
     }
     var1 = make_from_cube(make_square_5x3_2d_cube())
     var2 = make_from_cube(make_square_5x3_2d_cube())
     get_data_func = MagicMock(side_effect=[var1, var2])
     get_var_func = MagicMock(side_effect=['var1', 'var2'])
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     with self.assertRaises(ValueError):
         data = reader.read_datagroups([datagroup])
Ejemplo n.º 31
0
 def test_GIVEN_multiple_datagroups_WHEN_read_datagroups_THEN_get_data_called_correctly(self):
     datagroup_1 = {'variables': ['var1', 'var2'],
                    'filenames': ['filename1.nc'],
                    'product': None}
     datagroup_2 = {'variables': ['var3', 'var4'],
                    'filenames': ['filename2.nc'],
                    'product': 'cis'}
     get_data_func = MagicMock(return_value=make_regular_2d_ungridded_data())
     get_var_func = MagicMock(side_effect=lambda f: {'filename1.nc': ['var1', 'var2'],
                                                     'filename2.nc': ['var3', 'var4']}[f])
     reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func)
     data = reader.read_datagroups([datagroup_1, datagroup_2])
     assert_that(get_data_func.call_count, is_(4))
     assert_that(get_data_func.call_args_list[0][0], is_((['filename1.nc'], 'var1', None)))
     assert_that(get_data_func.call_args_list[1][0], is_((['filename1.nc'], 'var2', None)))
     assert_that(get_data_func.call_args_list[2][0], is_((['filename2.nc'], 'var3', 'cis')))
     assert_that(get_data_func.call_args_list[3][0], is_((['filename2.nc'], 'var4', 'cis')))
Ejemplo n.º 32
0
def col_cmd(main_arguments):
    """
    Main routine for handling calls to the collocate ('col') command.

    :param main_arguments:    The command line arguments (minus the col command)
    """
    from cis.exceptions import ClassNotFoundError, CISError
    from cis.collocation.col import Collocate

    output_file = main_arguments.output
    data_reader = DataReader()
    missing_data_for_missing_samples = False
    if main_arguments.samplevariable is not None:
        sample_data = data_reader.read_data_list(main_arguments.samplefiles, main_arguments.samplevariable,
                                                 main_arguments.sampleproduct)[0]
    else:
        sample_data = data_reader.read_coordinates(main_arguments.samplefiles, main_arguments.sampleproduct)
        missing_data_for_missing_samples = True

    try:
        col = Collocate(sample_data, missing_data_for_missing_samples)
    except IOError as e:
        __error_occurred("There was an error reading one of the files: \n" + str(e))

    col_name = main_arguments.samplegroup['collocator'][0] if main_arguments.samplegroup[
                                                                  'collocator'] is not None else None
    col_options = main_arguments.samplegroup['collocator'][1] if main_arguments.samplegroup[
                                                                     'collocator'] is not None else {}
    kern_name = main_arguments.samplegroup['kernel'][0] if main_arguments.samplegroup['kernel'] is not None else None
    kern_options = main_arguments.samplegroup['kernel'][1] if main_arguments.samplegroup['kernel'] is not None else None

    for input_group in main_arguments.datagroups:
        variables = input_group['variables']
        filenames = input_group['filenames']
        product = input_group["product"] if input_group["product"] is not None else None

        data = data_reader.read_data_list(filenames, variables, product)
        data_writer = DataWriter()
        try:
            output = col.collocate(data, col_name, col_options, kern_name, kern_options)
            data_writer.write_data(output, output_file)
        except ClassNotFoundError as e:
            __error_occurred(str(e) + "\nInvalid collocation option.")
        except (CISError, IOError) as e:
            __error_occurred(e)
Ejemplo n.º 33
0
    def test_GIVEN_single_variable_ungridded_WHEN_read_data_THEN_GriddedData_returned(self):
        variables = 'var1'
        filenames = 'filename1'
        product = None
        get_data_func = MagicMock(return_value=make_regular_2d_ungridded_data())
        reader = DataReader(get_data_func=get_data_func)
        data = reader.read_data_list(filenames, variables, product)[0]

        # Check the data read function is called correctly
        assert_that(get_data_func.call_count, is_(1))
        call_args = get_data_func.call_args_list[0][0]
        assert_that(call_args[0], is_([filenames]))
        assert_that(call_args[1], is_(variables))
        assert_that(call_args[2], is_(product))

        # Check the data we got back is as expected
        assert_that(data, instance_of(UngriddedData))
        assert_that(data.data.tolist(), is_(make_regular_2d_ungridded_data().data.tolist()))
Ejemplo n.º 34
0
 def test_GIVEN_wildcards_WHEN_read_data_THEN_matching_variables_identified(
         self):
     variables = ['*.nc', 'test?.hdf']
     file_vars = [
         'aeronet.lev20', 'var2.hdf', 'netcdf1.nc', 'netcdf3.nc',
         'test.hdf', 'test1.hdf'
     ]
     should_match = ['netcdf1.nc', 'netcdf3.nc', 'test1.hdf']
     filenames = 'filename1'
     get_data_func = MagicMock(
         return_value=make_regular_2d_ungridded_data())
     get_var_func = MagicMock(return_value=file_vars)
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     reader.read_data_list(filenames, variables)[0]
     assert_that(reader._get_data_func.call_count, is_(len(should_match)))
     for i in range(len(should_match)):
         assert_that(reader._get_data_func.call_args_list[i][0][1],
                     is_(should_match[i]))
Ejemplo n.º 35
0
def col_cmd(main_arguments):
    """
    Main routine for handling calls to the collocate ('col') command.

    :param main_arguments:    The command line arguments (minus the col command)
    """
    from cis.collocation.col_framework import get_kernel
    from cis.parse import check_boolean

    # Read the sample data
    missing_data_for_missing_sample = False
    if main_arguments.samplevariable is not None:
        sample_data = DataReader().read_data_list(
            main_arguments.samplefiles, main_arguments.samplevariable,
            main_arguments.sampleproduct)[0]
        missing_data_for_missing_sample = True
    else:
        sample_data = DataReader().read_coordinates(
            main_arguments.samplefiles, main_arguments.sampleproduct)

    # Unpack the sample options
    col_name, col_options = main_arguments.samplegroup.get(
        'collocator', ('', {}))
    kern_name, kern_options = main_arguments.samplegroup.get(
        'kernel', ('', {}))

    missing_data_for_missing_sample = check_boolean(
        col_options.pop('missing_data_for_missing_sample',
                        str(missing_data_for_missing_sample)), logging)

    kernel = get_kernel(kern_name)(**kern_options) if kern_name else None

    for input_group in main_arguments.datagroups:
        # Then collocate each datagroup
        data = DataReader().read_single_datagroup(input_group)
        output = data.collocated_onto(
            sample_data,
            how=col_name,
            kernel=kernel,
            missing_data_for_missing_sample=missing_data_for_missing_sample,
            **col_options)
        output.save_data(main_arguments.output)
Ejemplo n.º 36
0
 def test_GIVEN_gridded_datagroups_WHEN_read_datagroups_THEN_data_returned_in_list(self):
     datagroup_1 = {'variables': ['var1', 'var2'],
                    'filenames': ['filename1.nc'],
                    'product': None}
     datagroup_2 = {'variables': ['var3'],
                    'filenames': ['filename2.nc'],
                    'product': 'cis'}
     var1 = make_from_cube(make_square_5x3_2d_cube())
     var2 = make_from_cube(make_square_5x3_2d_cube())
     var3 = make_from_cube(make_square_5x3_2d_cube())
     get_data_func = MagicMock(side_effect=[var1, var2, var3])
     get_var_func = MagicMock(side_effect=lambda f: {'filename1.nc': ['var1', 'var2'],
                                                     'filename2.nc': ['var3']}[f])
     reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func)
     data = reader.read_datagroups([datagroup_1, datagroup_2])
     assert_that(isinstance(data, GriddedDataList))
     assert_that(len(data), is_(3))
     assert_that(data[0], is_(var1))
     assert_that(data[1], is_(var2))
     assert_that(data[2], is_(var3))
Ejemplo n.º 37
0
def subset_cmd(main_arguments):
    """
    Main routine for handling calls to the subset command.

    :param main_arguments:    The command line arguments (minus the subset command)
    """
    import cis.exceptions as ex

    if len(main_arguments.datagroups) > 1:
        __error_occurred("Subsetting can only be performed on one data group")

    data = DataReader().read_single_datagroup(main_arguments.datagroups[0])

    subset = data.subset(**main_arguments.limits)

    if subset is None:
        # Constraints exclude all data.
        raise ex.NoDataInSubsetError("No output created - constraints exclude all data")

    subset.save_data(main_arguments.output)
Ejemplo n.º 38
0
def subset_cmd(main_arguments):
    """
    Main routine for handling calls to the subset command.

    :param main_arguments:    The command line arguments (minus the subset command)
    """
    import cis.exceptions as ex

    if len(main_arguments.datagroups) > 1:
        __error_occurred("Subsetting can only be performed on one data group")

    data = DataReader().read_single_datagroup(main_arguments.datagroups[0])

    subset = data.subset(**main_arguments.limits)

    if subset is None:
        # Constraints exclude all data.
        raise ex.NoDataInSubsetError(
            "No output created - constraints exclude all data")

    subset.save_data(main_arguments.output)
Ejemplo n.º 39
0
    def test_GIVEN_single_variable_ungridded_WHEN_read_data_THEN_GriddedData_returned(
            self):
        variables = 'var1'
        filenames = 'filename1'
        product = None
        get_data_func = MagicMock(
            return_value=make_regular_2d_ungridded_data())
        reader = DataReader(get_data_func=get_data_func)
        data = reader.read_data_list(filenames, variables, product)[0]

        # Check the data read function is called correctly
        assert_that(get_data_func.call_count, is_(1))
        call_args = get_data_func.call_args_list[0][0]
        assert_that(call_args[0], is_([filenames]))
        assert_that(call_args[1], is_(variables))
        assert_that(call_args[2], is_(product))

        # Check the data we got back is as expected
        assert_that(data, instance_of(UngriddedData))
        assert_that(data.data.tolist(),
                    is_(make_regular_2d_ungridded_data().data.tolist()))
Ejemplo n.º 40
0
def collapse_cmd(main_arguments):
    """
    Main routine for handling calls to the collapse command.

    :param main_arguments: The command line arguments (minus the collapse command)
    """
    from cis.data_io.ungridded_data import UngriddedDataList

    if len(main_arguments.datagroups) > 1:
        __error_occurred("Collapse can only be performed on one data group")
    input_group = main_arguments.datagroups[0]

    data = DataReader().read_single_datagroup(input_group)

    if isinstance(data, UngriddedDataList):
        logging.error("The collapse command can only be performed on gridded data. "
                      "Please use 'aggregate' instead.")

    output = data.collapsed(main_arguments.dimensions, how=input_group.get("kernel", ''))

    output.save_data(main_arguments.output)
Ejemplo n.º 41
0
    def test_GIVEN_multiple_variables_and_filenames_WHEN_aggregate_THEN_Aggregate_called_correctly(self):
        variables = ['var_name1', 'var_name2']
        filenames = ['filename1', 'filename2']
        output_file = 'output.hdf'
        kernel = 'mean'
        grid = 'grid'
        input_data = GriddedDataList(2 * [make_from_cube(make_square_5x3_2d_cube())])
        output_data = input_data

        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=input_data)
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = Mock()
        mock_aggregator = Aggregator(None, None)
        mock_aggregator.aggregate_gridded = MagicMock(return_value=output_data)  # Return the data array unmodified

        aggregate = Aggregate(grid, output_file, data_reader=mock_data_reader, data_writer=mock_data_writer)
        aggregate._create_aggregator = MagicMock(return_value=mock_aggregator)
        aggregate.aggregate(variables, filenames, None, kernel)

        assert_that(mock_aggregator.aggregate_gridded.call_count, is_(1))
        assert_that(mock_aggregator.aggregate_gridded.call_args[0][0], kernel)
Ejemplo n.º 42
0
    def test_GIVEN_single_variable_WHEN_subset_THEN_DataReader_called_correctly(self):
        variable = 'var_name'
        filename = 'filename'
        output_file = 'output.hdf'
        xmin, xmax = -10, 10
        ymin, ymax = 40, 60
        limits = {'x': SubsetLimits(xmin, xmax, False),
                  'y': SubsetLimits(ymin, ymax, False)}

        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data())
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = Mock()
        mock_subsetter = Subsetter()
        mock_subsetter.subset = lambda *args: args[0]  # Return the data array unmodified

        subset = Subset(limits, output_file, subsetter=mock_subsetter,
                        data_reader=mock_data_reader, data_writer=mock_data_writer)
        subset.subset(variable, filename, product=None)
        assert_that(mock_data_reader.read_data_list.call_count, is_(1))
        assert_that(mock_data_reader.read_data_list.call_args[0][0], filename)
        assert_that(mock_data_reader.read_data_list.call_args[0][1], variable)
Ejemplo n.º 43
0
    def test_GIVEN_multiple_variables_and_filenames_WHEN_subset_THEN_DataReader_called_correctly(self):
        variables = ['var_name1', 'var_name2']
        filenames = ['filename1', 'filename2']
        output_file = 'output.hdf'
        xmin, xmax = 0, 5
        ymin, ymax = -5, 5
        limits = {'x': SubsetLimits(xmin, xmax, False),
                  'y': SubsetLimits(ymin, ymax, False)}

        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList(2 * [make_square_5x3_2d_cube()]))
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = Mock()
        mock_subsetter = Subsetter()
        mock_subsetter.subset = lambda *args: args[0]  # Return the data array unmodified

        subset = Subset(limits, output_file, subsetter=mock_subsetter,
                        data_reader=mock_data_reader, data_writer=mock_data_writer)
        subset.subset(variables, filenames, product=None)
        assert_that(mock_data_reader.read_data_list.call_count, is_(1))
        assert_that(mock_data_reader.read_data_list.call_args[0][0], filenames)
        assert_that(mock_data_reader.read_data_list.call_args[0][1], variables)
Ejemplo n.º 44
0
def collapse_cmd(main_arguments):
    """
    Main routine for handling calls to the collapse command.

    :param main_arguments: The command line arguments (minus the collapse command)
    """
    from cis.data_io.ungridded_data import UngriddedDataList

    if len(main_arguments.datagroups) > 1:
        __error_occurred("Collapse can only be performed on one data group")
    input_group = main_arguments.datagroups[0]

    data = DataReader().read_single_datagroup(input_group)

    if isinstance(data, UngriddedDataList):
        logging.error(
            "The collapse command can only be performed on gridded data. "
            "Please use 'aggregate' instead.")

    output = data.collapsed(main_arguments.dimensions,
                            how=input_group.get("kernel", ''))

    output.save_data(main_arguments.output)
Ejemplo n.º 45
0
    def test_GIVEN_multiple_variable_gridded_WHEN_read_data_THEN_GriddedDataList_returned(self):
        variables = ['var1', 'var2']
        filenames = 'filename1'
        product = None
        gridded_data = make_square_5x3_2d_cube()
        gridded_data.__class__ = GriddedData
        get_data_func = MagicMock(return_value=gridded_data)
        reader = DataReader(get_data_func=get_data_func)
        data = reader.read_data_list(filenames, variables, product)

        # Check the data read function is called correctly
        assert_that(get_data_func.call_count, is_(2))
        first_call_args = get_data_func.call_args_list[0][0]
        second_call_args = get_data_func.call_args_list[1][0]
        assert_that(first_call_args[0], is_([filenames]))
        assert_that(first_call_args[1], is_(variables[0]))
        assert_that(second_call_args[1], is_(variables[1]))
        assert_that(first_call_args[2], is_(product))

        # Check the data we got back is as expected
        assert_that(data, instance_of(GriddedDataList))
        assert_that(data[0].data.tolist(), is_(make_square_5x3_2d_cube().data.tolist()))
        assert_that(data[1].data.tolist(), is_(data[0].data.tolist()))
Ejemplo n.º 46
0
def read_data_list(filenames, variables, product=None, aliases=None):
    """
    Read multiple data objects from a list of files. Files can be either gridded or ungridded but not a mix of both.

    :param filenames:   The filenames of the files to read. This can be either a single filename as a string, a comma
     separated list, or a :class:`list` of string filenames. Filenames can include directories which will be expanded to
     include all files in that directory, or wildcards such as ``*`` or ``?``.
    :type filenames: string or list
    :param variables: One or more variables to read from the files
    :type variables: string or list
    :param str product: The name of the data reading plugin to use to read the data (e.g. ``Cloud_CCI``).
    :param aliases: List of aliases to put on each variable's data object as an alternative means of identifying them.
    :type aliases: string or list
    :return:  A list of the data read out (either a :class:`GriddedDataList` or :class:`UngriddedDataList` depending on
     the type of data contained in the files)
    """
    from cis.data_io.data_reader import DataReader, expand_filelist
    try:
        file_set = expand_filelist(filenames)
    except ValueError as e:
        raise IOError(e)
    if len(file_set) == 0:
        raise IOError("No files found which match: {}".format(filenames))
    return DataReader().read_data_list(file_set, variables, product, aliases)