Esempio n. 1
0
 def test_GIVEN_gridded_datagroups_WHEN_read_datagroups_THEN_data_returned_in_list(
         self):
     datagroup_1 = {
         'variables': ['var1', 'var2'],
         'filenames': ['filename1.nc'],
         'product': None
     }
     datagroup_2 = {
         'variables': ['var3'],
         'filenames': ['filename2.nc'],
         'product': 'cis'
     }
     var1 = make_from_cube(make_square_5x3_2d_cube())
     var2 = make_from_cube(make_square_5x3_2d_cube())
     var3 = make_from_cube(make_square_5x3_2d_cube())
     get_data_func = MagicMock(side_effect=[var1, var2, var3])
     get_var_func = MagicMock(side_effect=lambda f: {
         'filename1.nc': ['var1', 'var2'],
         'filename2.nc': ['var3']
     }[f])
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     data = reader.read_datagroups([datagroup_1, datagroup_2])
     assert_that(len(data), is_(3))
     assert_that(data[0], is_(var1))
     assert_that(data[1], is_(var2))
     assert_that(data[2], is_(var3))
Esempio n. 2
0
def stats_cmd(main_arguments):
    """
    Main routine for handling calls to the statistics command.

    :param main_arguments: The command line arguments (minus the stats command)
    """
    from cis.stats import StatsAnalyzer
    from cis.data_io.gridded_data import GriddedDataList
    data_reader = DataReader()
    data_list = data_reader.read_datagroups(main_arguments.datagroups)
    analyzer = StatsAnalyzer(*data_list)
    results = analyzer.analyze()
    header = "RESULTS OF STATISTICAL COMPARISON:"
    note = "Compared all points which have non-missing values in both variables"
    header_length = max(len(header), len(note))
    print(header_length * '=')
    print(header)
    print(header_length * '-')
    print(note)
    print(header_length * '=')
    for result in results:
        print(result.pprint())
    if main_arguments.output:
        cubes = GriddedDataList([result.as_cube() for result in results])
        variables = []
        filenames = []
        for datagroup in main_arguments.datagroups:
            variables.extend(datagroup['variables'])
            filenames.extend(datagroup['filenames'])
        history = "Statistical comparison performed using CIS version " + __version__ + \
                  "\n variables: " + str(variables) + \
                  "\n from files: " + str(set(filenames))
        cubes.add_history(history)
        cubes.save_data(main_arguments.output)
Esempio n. 3
0
 def test_GIVEN_multiple_datagroups_WHEN_read_datagroups_THEN_get_data_called_correctly(
         self):
     datagroup_1 = {
         'variables': ['var1', 'var2'],
         'filenames': ['filename1.nc'],
         'product': None
     }
     datagroup_2 = {
         'variables': ['var3', 'var4'],
         'filenames': ['filename2.nc'],
         'product': 'cis'
     }
     get_data_func = MagicMock(
         return_value=make_regular_2d_ungridded_data())
     get_var_func = MagicMock(side_effect=lambda f: {
         'filename1.nc': ['var1', 'var2'],
         'filename2.nc': ['var3', 'var4']
     }[f])
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     data = reader.read_datagroups([datagroup_1, datagroup_2])
     assert_that(get_data_func.call_count, is_(4))
     assert_that(get_data_func.call_args_list[0][0],
                 is_((['filename1.nc'], 'var1', None)))
     assert_that(get_data_func.call_args_list[1][0],
                 is_((['filename1.nc'], 'var2', None)))
     assert_that(get_data_func.call_args_list[2][0],
                 is_((['filename2.nc'], 'var3', 'cis')))
     assert_that(get_data_func.call_args_list[3][0],
                 is_((['filename2.nc'], 'var4', 'cis')))
Esempio n. 4
0
def plot_cmd(main_arguments):
    """
    Main routine for handling calls to the 'plot' command.
    Reads in the data files specified and passes the rest of the arguments to the plot function.

    :param main_arguments:    The command line arguments
    """
    from cis.plotting.formatted_plot import Plotter
    from cis.data_io.data_reader import DataReader

    data = DataReader().read_datagroups(main_arguments.datagroups)

    # We have to pop off the arguments which plot isn't expecting so that it treats everything else as an mpl kwarg
    main_arguments = vars(main_arguments)
    _ = main_arguments.pop('command')
    _ = main_arguments.pop("quiet")
    _ = main_arguments.pop("verbose")
    _ = main_arguments.pop("force_overwrite")
    _ = main_arguments.pop("output_var", None)

    layer_opts = [{
        k: v
        for k, v in d.items()
        if k not in ['variables', 'filenames', 'product']
    } for d in main_arguments.pop('datagroups')]
    Plotter(data, layer_opts=layer_opts, **main_arguments)
Esempio n. 5
0
def aggregate_cmd(main_arguments):
    """
    Main routine for handling calls to the aggregation command.

    :param main_arguments: The command line arguments (minus the aggregate command)
    """
    import cis.exceptions as ex
    from cis.data_io.gridded_data import GriddedDataList

    if len(main_arguments.datagroups) > 1:
        __error_occurred("Aggregation can only be performed on one data group")
    input_group = main_arguments.datagroups[0]

    data = DataReader().read_single_datagroup(input_group)

    if isinstance(data, GriddedDataList):
        logging.warning(
            "The aggregate command is deprecated for GriddedData and will not be supported in future "
            "versions of CIS. Please use 'collapse' instead.")
        if any(v is not None for v in main_arguments.grid.values()):
            raise ex.InvalidCommandLineOptionError(
                "Grid specifications are not supported for Gridded aggregation."
            )
        output = data.collapsed(list(main_arguments.grid.keys()),
                                how=input_group.get("kernel", ''))
    else:
        output = data.aggregate(how=input_group.get("kernel", ''),
                                **main_arguments.grid)

    output.save_data(main_arguments.output)
Esempio n. 6
0
    def test_GIVEN_multiple_variable_gridded_WHEN_read_data_THEN_GriddedDataList_returned(
            self):
        variables = ['var1', 'var2']
        filenames = 'filename1'
        product = None
        gridded_data = make_square_5x3_2d_cube()
        gridded_data.__class__ = GriddedData
        get_data_func = MagicMock(return_value=gridded_data)
        reader = DataReader(get_data_func=get_data_func)
        data = reader.read_data_list(filenames, variables, product)

        # Check the data read function is called correctly
        assert_that(get_data_func.call_count, is_(2))
        first_call_args = get_data_func.call_args_list[0][0]
        second_call_args = get_data_func.call_args_list[1][0]
        assert_that(first_call_args[0], is_([filenames]))
        assert_that(first_call_args[1], is_(variables[0]))
        assert_that(second_call_args[1], is_(variables[1]))
        assert_that(first_call_args[2], is_(product))

        # Check the data we got back is as expected
        assert_that(data, instance_of(GriddedDataList))
        assert_that(data[0].data.tolist(),
                    is_(make_square_5x3_2d_cube().data.tolist()))
        assert_that(data[1].data.tolist(), is_(data[0].data.tolist()))
Esempio n. 7
0
def col_cmd(main_arguments):
    """
    Main routine for handling calls to the collocate ('col') command.

    :param main_arguments:    The command line arguments (minus the col command)
    """
    from cis.collocation.col_framework import get_kernel
    from cis.parse import check_boolean

    # Read the sample data
    missing_data_for_missing_sample = False
    if main_arguments.samplevariable is not None:
        sample_data = DataReader().read_data_list(
            main_arguments.samplefiles, main_arguments.samplevariable,
            main_arguments.sampleproduct)[0]
        missing_data_for_missing_sample = True
    else:
        sample_data = DataReader().read_coordinates(
            main_arguments.samplefiles, main_arguments.sampleproduct)

    # Unpack the sample options
    col_name, col_options = main_arguments.samplegroup.get(
        'collocator', ('', {}))
    kern_name, kern_options = main_arguments.samplegroup.get(
        'kernel', ('', {}))

    missing_data_for_missing_sample = check_boolean(
        col_options.pop('missing_data_for_missing_sample',
                        str(missing_data_for_missing_sample)), logging)

    kernel = get_kernel(kern_name)(**kern_options) if kern_name else None

    for input_group in main_arguments.datagroups:
        # Then collocate each datagroup
        data = DataReader().read_single_datagroup(input_group)
        output = data.collocated_onto(
            sample_data,
            how=col_name,
            kernel=kernel,
            missing_data_for_missing_sample=missing_data_for_missing_sample,
            **col_options)
        output.save_data(main_arguments.output)
Esempio n. 8
0
 def test_GIVEN_no_matching_variables_found_overall_WHEN_read_data_THEN_raises_Error(
         self):
     variables = ['test?.hdf', '*.nc']
     file_vars = ['sample_file.hdf', 'aeronet.lev20']
     filenames = 'filename1'
     get_data_func = MagicMock()
     get_var_func = MagicMock(return_value=file_vars)
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     with self.assertRaises(ValueError):
         reader.read_data_list(filenames, variables)[0]
Esempio n. 9
0
 def test_GIVEN_multiple_variable_mix_of_gridded_ungridded_WHEN_read_data_THEN_raises_TypeError(
         self):
     variables = ['var1', 'var2']
     filenames = 'filename1'
     product = None
     gridded_data = make_square_5x3_2d_cube()
     gridded_data.__class__ = GriddedData
     ungridded_data = make_regular_2d_ungridded_data()
     get_data_func = MagicMock(side_effect=[gridded_data, ungridded_data])
     reader = DataReader(get_data_func=get_data_func)
     with self.assertRaises(TypeError):
         data = reader.read_data_list(filenames, variables, product)[0]
Esempio n. 10
0
 def test_GIVEN_no_matching_variables_for_wildcards_WHEN_read_data_THEN_no_Error(
         self):
     variables = ['aeronet.lev20', '*.nc', 'test?.hdf']
     file_vars = ['aeronet.lev20', 'var2.hdf']
     filenames = 'filename1'
     get_data_func = MagicMock(
         return_value=make_regular_2d_ungridded_data())
     get_var_func = MagicMock(return_value=file_vars)
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     reader.read_data_list(filenames, variables)[0]
     assert_that(reader._get_data_func.call_count, is_(1))
     assert_that(reader._get_data_func.call_args_list[0][0][1],
                 is_('aeronet.lev20'))
Esempio n. 11
0
 def test_GIVEN_aliases_missing_WHEN_read_datagroups_THEN_read_OK_aliases_default_to_var_names(
         self):
     datagroup = {
         'variables': ['var1'],
         'filenames': ['filename1.nc'],
         'product': None
     }
     var1 = make_from_cube(make_square_5x3_2d_cube())
     get_data_func = MagicMock(side_effect=[var1])
     get_var_func = MagicMock(side_effect=['var1'])
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     data = reader.read_datagroups([datagroup])
     assert_that(data[0].var_name, is_('dummy'))
Esempio n. 12
0
def evaluate_cmd(main_arguments):
    """
    Main routine for handling calls to the evaluation command

    :param main_arguments: The command line arguments (minus the eval command)
    """
    from cis.evaluate import Calculator
    data_reader = DataReader()
    data_list = data_reader.read_datagroups(main_arguments.datagroups)
    calculator = Calculator()
    result = calculator.evaluate(data_list, main_arguments.expr,
                                 main_arguments.output_var,
                                 main_arguments.units,
                                 main_arguments.attributes)
    result.save_data(main_arguments.output)
Esempio n. 13
0
 def test_GIVEN_aliases_WHEN_read_datagroups_THEN_output_data_has_aliases(
         self):
     datagroup = {
         'variables': ['var1'],
         'filenames': ['filename1.nc'],
         'product': None,
         'aliases': ['alias1']
     }
     get_data_func = MagicMock(
         return_value=make_from_cube(make_square_5x3_2d_cube()))
     get_var_func = MagicMock(return_value=['var1'])
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     data = reader.read_datagroups([datagroup])
     assert_that(data[0].alias, is_('alias1'))
Esempio n. 14
0
 def test_GIVEN_not_enough_aliases_WHEN_read_datagroups_THEN_raises_ValueError(
         self):
     datagroup = {
         'variables': ['var1', 'var2'],
         'filenames': ['filename1.nc'],
         'product': None,
         'aliases': ['alias1']
     }
     var1 = make_from_cube(make_square_5x3_2d_cube())
     var2 = make_from_cube(make_square_5x3_2d_cube())
     get_data_func = MagicMock(side_effect=[var1, var2])
     get_var_func = MagicMock(side_effect=['var1', 'var2'])
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     with self.assertRaises(ValueError):
         data = reader.read_datagroups([datagroup])
Esempio n. 15
0
 def test_GIVEN_wildcards_WHEN_read_data_THEN_matching_variables_identified(
         self):
     variables = ['*.nc', 'test?.hdf']
     file_vars = [
         'aeronet.lev20', 'var2.hdf', 'netcdf1.nc', 'netcdf3.nc',
         'test.hdf', 'test1.hdf'
     ]
     should_match = ['netcdf1.nc', 'netcdf3.nc', 'test1.hdf']
     filenames = 'filename1'
     get_data_func = MagicMock(
         return_value=make_regular_2d_ungridded_data())
     get_var_func = MagicMock(return_value=file_vars)
     reader = DataReader(get_data_func=get_data_func,
                         get_variables_func=get_var_func)
     reader.read_data_list(filenames, variables)[0]
     assert_that(reader._get_data_func.call_count, is_(len(should_match)))
     for i in range(len(should_match)):
         assert_that(reader._get_data_func.call_args_list[i][0][1],
                     is_(should_match[i]))
Esempio n. 16
0
    def test_GIVEN_single_variable_ungridded_WHEN_read_data_THEN_GriddedData_returned(
            self):
        variables = 'var1'
        filenames = 'filename1'
        product = None
        get_data_func = MagicMock(
            return_value=make_regular_2d_ungridded_data())
        reader = DataReader(get_data_func=get_data_func)
        data = reader.read_data_list(filenames, variables, product)[0]

        # Check the data read function is called correctly
        assert_that(get_data_func.call_count, is_(1))
        call_args = get_data_func.call_args_list[0][0]
        assert_that(call_args[0], is_([filenames]))
        assert_that(call_args[1], is_(variables))
        assert_that(call_args[2], is_(product))

        # Check the data we got back is as expected
        assert_that(data, instance_of(UngriddedData))
        assert_that(data.data.tolist(),
                    is_(make_regular_2d_ungridded_data().data.tolist()))
Esempio n. 17
0
def subset_cmd(main_arguments):
    """
    Main routine for handling calls to the subset command.

    :param main_arguments:    The command line arguments (minus the subset command)
    """
    import cis.exceptions as ex

    if len(main_arguments.datagroups) > 1:
        __error_occurred("Subsetting can only be performed on one data group")

    data = DataReader().read_single_datagroup(main_arguments.datagroups[0])

    subset = data.subset(**main_arguments.limits)

    if subset is None:
        # Constraints exclude all data.
        raise ex.NoDataInSubsetError(
            "No output created - constraints exclude all data")

    subset.save_data(main_arguments.output)
Esempio n. 18
0
def collapse_cmd(main_arguments):
    """
    Main routine for handling calls to the collapse command.

    :param main_arguments: The command line arguments (minus the collapse command)
    """
    from cis.data_io.ungridded_data import UngriddedDataList

    if len(main_arguments.datagroups) > 1:
        __error_occurred("Collapse can only be performed on one data group")
    input_group = main_arguments.datagroups[0]

    data = DataReader().read_single_datagroup(input_group)

    if isinstance(data, UngriddedDataList):
        logging.error(
            "The collapse command can only be performed on gridded data. "
            "Please use 'aggregate' instead.")

    output = data.collapsed(main_arguments.dimensions,
                            how=input_group.get("kernel", ''))

    output.save_data(main_arguments.output)
Esempio n. 19
0
def read_data_list(filenames, variables, product=None, aliases=None):
    """
    Read multiple data objects from a list of files. Files can be either gridded or ungridded but not a mix of both.

    :param filenames:   The filenames of the files to read. This can be either a single filename as a string, a comma
     separated list, or a :class:`list` of string filenames. Filenames can include directories which will be expanded to
     include all files in that directory, or wildcards such as ``*`` or ``?``.
    :type filenames: string or list
    :param variables: One or more variables to read from the files
    :type variables: string or list
    :param str product: The name of the data reading plugin to use to read the data (e.g. ``Cloud_CCI``).
    :param aliases: List of aliases to put on each variable's data object as an alternative means of identifying them.
    :type aliases: string or list
    :return:  A list of the data read out (either a :class:`GriddedDataList` or :class:`UngriddedDataList` depending on
     the type of data contained in the files)
    """
    from cis.data_io.data_reader import DataReader, expand_filelist
    try:
        file_set = expand_filelist(filenames)
    except ValueError as e:
        raise IOError(e)
    if len(file_set) == 0:
        raise IOError("No files found which match: {}".format(filenames))
    return DataReader().read_data_list(file_set, variables, product, aliases)