Esempio n. 1
0
    def test_collapse_second_dim(self):
        config = Config.from_nc(self.inputs[0])
        config.dims["b"].update({"flatten": True, "index_by": "b"})
        l = generate_aggregation_list(config, self.inputs)
        evaluate_aggregation_list(config, l, self.filename)
        with nc.Dataset(self.filename) as nc_out:  # type: nc.Dataset
            # This is the more practically useful method of aggregation,
            # where, for example, the dimension "a" might represent time
            # and dim "b" is maybe satellite, or event, etc. (something that,
            # at any point in time, there could be an arbitrary number of).

            # flatten b dimension, should turn out like:

            # [[0 -- --]
            #  [1 -- --]
            #  [2 -- --]
            #  [3 3 --]
            #  [4 4 --]
            #  [5 5 --]
            #  [6 6 6]
            #  [7 7 7]
            #  [8 8 8]]
            c = nc_out.variables["c"][:]
            self.assertEqual(c.shape, (9, 3))
            self.assertEqual(np.sum(c), 90)
            self.assertEqual(np.ma.count_masked(c), 9)
            for i, a in enumerate(["a", "b", "c"]):
                self.assertEqual(nc_out.variables["b"][i], a)
Esempio n. 2
0
    def setUp(self):
        # tmp file to aggregate to
        _, self.nc_out_filename = tempfile.mkstemp()

        pwd = os.path.dirname(__file__)
        self.files = sorted(glob.glob(os.path.join(pwd, "data", "*.nc")))
        self.config = Config.from_nc(self.files[0])
Esempio n. 3
0
 def test_missing_dim(self):
     """ The variable t depends on a dimension c that has not been configured.
     Make sure a ValueError is raised because of this."""
     dims = DimensionConfig([{
         "name": "a",
         "size": 2
     }, {
         "name": "b",
         "size": None
     }])
     vars = VariableConfig([
         {
             "name": "t",
             "dimensions": ["c"],
             "datatype": "float32"
         },
         {
             "name": "x",
             "dimensions": ["b", "a"],
             "datatype": "float32"
         },
     ])
     attrs = GlobalAttributeConfig([])
     with self.assertRaises(ValueError):
         Config(dims, vars, attrs)
Esempio n. 4
0
    def setUpClass(cls):
        super(TestEvaluateAggregationList, cls).setUpClass()
        pwd = os.path.dirname(__file__)
        cls.start_time = datetime(2017, 3, 16, 15, 27)
        cls.end_time = datetime(2017, 3, 16, 15, 28)
        cls.files = glob.glob(os.path.join(pwd, "data", "*.nc"))

        cls.config = Config.from_nc(cls.files[0])
        cls.config.dims["report_number"].update({
            "index_by": "OB_time",
            "min": cls.
            start_time,  # for convenience, will convert according to index_by units if this is datetime
            "max": cls.end_time,
            "other_dim_indicies": {
                "samples_per_record": 0
            },
            "expected_cadence": {
                "report_number": 1,
                "number_samples_per_report": 10,
            },
        })
        _, cls.filename = tempfile.mkstemp()
        agg_list = generate_aggregation_list(cls.config, cls.files)
        logger.info(agg_list)
        evaluate_aggregation_list(cls.config, agg_list, cls.filename)
        cls.output = nc.Dataset(cls.filename, "r")
Esempio n. 5
0
 def test_extra_dim(self):
     """We have configured an extra dimension z that isn't used by any variables.
     Make sure a ValueError is raised. """
     dims = DimensionConfig([
         {
             "name": "a",
             "size": 2
         },
         {
             "name": "b",
             "size": None
         },
         {
             "name": "z",
             "size": None
         },
     ])
     vars = VariableConfig([
         {
             "name": "t",
             "dimensions": ["a"],
             "datatype": "float32"
         },
         {
             "name": "x",
             "dimensions": ["b", "a"],
             "datatype": "float32"
         },
     ])
     attrs = GlobalAttributeConfig([])
     with self.assertRaises(ValueError):
         Config(dims, vars, attrs)
Esempio n. 6
0
 def test_initialize_several_variables(self):
     """Ensure aggregation file is created correctly according to the variable config."""
     config = Config.from_dict(
         {
             "dimensions": [{"name": "x", "size": None}, {"name": "y", "size": 10}],
             "variables": [
                 {
                     "name": "foo",
                     "dimensions": ["x", "y"],
                     "datatype": "float32",
                     "attributes": {"units": "seconds"},
                 },
                 {
                     "name": "foo_x",
                     "dimensions": ["x"],
                     "datatype": "float64",
                     "attributes": {"units": "floops", "created_by": "the flooper"},
                 },
             ],
             "global attributes": [],
         }
     )
     initialize_aggregation_file(config, self.filename)
     with nc.Dataset(self.filename) as nc_check:
         self.assertEqual(len(nc_check.variables), 2)
         self.assertEqual(nc_check.variables["foo"].dimensions, ("x", "y"))
         self.assertEqual(nc_check.variables["foo"].datatype, np.dtype(np.float32))
         self.assertEqual(nc_check.variables["foo"].units, "seconds")
         self.assertEqual(nc_check.variables["foo_x"].dimensions, ("x",))
         self.assertEqual(nc_check.variables["foo_x"].datatype, np.dtype(np.float64))
         self.assertEqual(nc_check.variables["foo_x"].units, "floops")
         self.assertEqual(
             nc_check.variables["foo_x"].getncattr("created_by"), "the flooper"
         )
Esempio n. 7
0
 def test_basic_with_var_attrs(self):
     """ Make sure the configuration accepts a valid configuration. """
     dims = DimensionConfig([{
         "name": "a",
         "size": 2
     }, {
         "name": "b",
         "size": None
     }])
     vars = VariableConfig([
         {
             "name": "t",
             "dimensions": ["b"],
             "datatype": "float32",
             "attributes": {
                 "_FillValue": 0
             },
         },
         {
             "name": "x",
             "dimensions": ["b", "a"],
             "datatype": "float32"
         },
     ])
     attrs = GlobalAttributeConfig([])
     Config(dims, vars, attrs)
Esempio n. 8
0
 def setUp(self):
     _, self.file = tempfile.mkstemp()
     pwd = os.path.dirname(__file__)
     self.files = glob.glob(os.path.join(pwd, "data", "*.nc"))
     self.config = Config.from_nc(self.files[0])
     self.config.dims["time"].update(
         {"index_by": "time", "expected_cadence": {"time": 10},}
     )
Esempio n. 9
0
    def setUp(self):
        # tmp file to aggregate to
        _, self.nc_out_filename = tempfile.mkstemp()

        pwd = os.path.dirname(__file__)
        self.files = sorted(glob.glob(os.path.join(pwd, "data", "*.nc")))
        with open(os.path.join(pwd, "new_dim_config.json")) as config_in:
            self.config = Config.from_dict(json.load(config_in))
Esempio n. 10
0
 def setUp(self):
     _, self.file = tempfile.mkstemp()
     pwd = os.path.dirname(__file__)
     self.files = glob.glob(os.path.join(pwd, "data", "*.nc"))
     self.config = Config.from_nc(self.files[0])
     self.config.dims["report_number"].update(
         {
             "index_by": "OB_time",
             "other_dim_indicies": {"samples_per_record": 0},
             "expected_cadence": {
                 "report_number": 1,
                 "number_samples_per_report": 10,
             },
         }
     )
Esempio n. 11
0
 def test_initialize_basic(self):
     """Ensure aggregation file is created with proper dimensions according to the config."""
     config = Config.from_dict(
         {
             "dimensions": [{"name": "x", "size": None}, {"name": "y", "size": 10}],
             "variables": [
                 {"name": "x", "dimensions": ["x", "y"], "datatype": "int8"}
             ],
             "global attributes": [],
         }
     )
     initialize_aggregation_file(config, self.filename)
     with nc.Dataset(self.filename) as nc_check:
         self.assertEqual(len(nc_check.dimensions), 2)
         self.assertEqual(nc_check.dimensions["y"].size, 10)
         self.assertFalse(nc_check.dimensions["y"].isunlimited())
         self.assertTrue(nc_check.dimensions["x"].isunlimited())
Esempio n. 12
0
 def setUpClass(cls):
     super(TestEvaluateAggregationList, cls).setUpClass()
     pwd = os.path.dirname(__file__)
     cls.start_time = datetime(2017, 6, 8, 16, 45)
     cls.end_time = datetime(2017, 6, 8, 16, 50)
     cls.files = glob.glob(os.path.join(pwd, "data", "*.nc"))
     cls.config = Config.from_nc(cls.files[0])
     cls.config.dims["report_number"].update(
         {
             "index_by": "L1a_SciData_TimeStamp",
             "min": cls.start_time,  # for convenience, will convert according to index_by units if this is datetime
             "max": cls.end_time,
             "expected_cadence": {"report_number": 1, "sensor_unit": 0},
         }
     )
     _, cls.filename = tempfile.mkstemp()
     agg_list = generate_aggregation_list(cls.config, cls.files)
     evaluate_aggregation_list(cls.config, agg_list, cls.filename)
     cls.output = nc.Dataset(cls.filename, "r")
Esempio n. 13
0
 def setUpClass(cls):
     super(TestEvaluateAggregationList, cls).setUpClass()
     pwd = os.path.dirname(__file__)
     cls.start_time = datetime(2017, 4, 14, 19, 23)
     cls.end_time = datetime(2017, 4, 14, 20, 30)
     cls.files = glob.glob(os.path.join(pwd, "data", "*.nc"))
     cls.files = glob.glob(os.path.join(pwd, "data", "*.nc"))
     cls.config = Config.from_nc(cls.files[0])
     cls.config.dims["time"].update(
         {
             "index_by": "time",
             "min": cls.start_time,  # for convenience, will convert according to index_by units if this is datetime
             "max": cls.end_time,
             "expected_cadence": {"time": 10},
         }
     )
     _, cls.filename = tempfile.mkstemp()
     agg_list = generate_aggregation_list(cls.config, cls.files)
     evaluate_aggregation_list(cls.config, agg_list, cls.filename)
     cls.output = nc.Dataset(cls.filename, "r")
Esempio n. 14
0
 def test_initialize_with_list_attribute(self):
     """Ensure aggregation file is created with proper dimensions according to the config."""
     config = Config.from_dict(
         {
             "dimensions": [{"name": "x", "size": None}, {"name": "y", "size": 10}],
             "variables": [
                 {
                     "name": "x",
                     "dimensions": ["x", "y"],
                     "datatype": "int8",
                     "attributes": {"valid_range": [0, 10]},
                 }
             ],
             "global attributes": [],
         }
     )
     initialize_aggregation_file(config, self.filename)
     with nc.Dataset(self.filename) as nc_check:
         self.assertEqual(len(nc_check.dimensions), 2)
         self.assertEqual(nc_check.variables["x"].valid_range[0], 0)
         self.assertEqual(nc_check.variables["x"].valid_range[1], 10)
Esempio n. 15
0
 def test_to_json(self):
     dims = DimensionConfig([{
         "name": "a",
         "size": 2
     }, {
         "name": "b",
         "size": None
     }])
     vars = VariableConfig([
         {
             "name": "t",
             "dimensions": ["b"],
             "datatype": "float32"
         },
         {
             "name": "x",
             "dimensions": ["b", "a"],
             "datatype": "float32"
         },
     ])
     attrs = GlobalAttributeConfig([])
     json = Config(dims, vars, attrs).to_dict()
Esempio n. 16
0
    def test_default_multi_dim(self):
        config = Config.from_nc(self.inputs[0])
        l = generate_aggregation_list(config, self.inputs)
        evaluate_aggregation_list(config, l, self.filename)
        with nc.Dataset(self.filename) as nc_out:  # type: nc.Dataset
            # this is the default aggregation produced by aggregation
            # along both unlimited dimensions. This isn't really practically
            # useful, but, by our "basic" definition of aggregation along unlitimed
            # dimensions is correct. Need to make sure we get what's expected.

            # [[0 -- -- -- -- --]
            #  [1 -- -- -- -- --]
            #  [2 -- -- -- -- --]
            #  [-- 3 3 -- -- --]
            #  [-- 4 4 -- -- --]
            #  [-- 5 5 -- -- --]
            #  [-- -- -- 6 6 6]
            #  [-- -- -- 7 7 7]
            #  [-- -- -- 8 8 8]]
            c = nc_out.variables["c"][:]
            self.assertEqual(c.shape, (9, 6))
            self.assertEqual(np.sum(c), 90)
            self.assertEqual(np.ma.count_masked(c), 36)
Esempio n. 17
0
    def setUpClass(cls):
        super(TestEvaluateAggregationList, cls).setUpClass()
        pwd = os.path.dirname(__file__)

        cls.start_time = datetime(2018, 1, 17, 15, 5)
        cls.end_time = datetime(2018, 1, 17, 15, 56)
        cls.files = glob.glob(os.path.join(pwd, "data", "*.nc"))
        cls.config = Config.from_nc(cls.files[0])
        cls.config.dims["report_number"].update(
            {
                "index_by": "ELF_StartStopTime",
                "min": cls.start_time,  # for convenience, will convert according to index_by units if this is datetime
                "max": cls.end_time,
                "expected_cadence": {
                    "report_number": 1.0 / (5.0 * 60.0),
                    "number_of_time_bounds": 1.0 / ((5.0 * 60.0) - 1),
                },
                "size": None,
            }
        )
        _, cls.filename = tempfile.mkstemp()
        agg_list = generate_aggregation_list(cls.config, cls.files)
        evaluate_aggregation_list(cls.config, agg_list, cls.filename)
        cls.output = nc.Dataset(cls.filename, "r")
 def setUp(self):
     self.config = Config.from_nc(test_input_file)
Esempio n. 19
0
    def setUp(self):
        _, self.file = tempfile.mkstemp()

        pwd = os.path.dirname(__file__)
        self.files = glob.glob(os.path.join(pwd, "data", "*.nc"))[:2]
        self.config = Config.from_nc(self.files[0])
Esempio n. 20
0
 def setUp(self):
     self.config = Config.from_nc(another_input_file)