def test_load_json_stringio(self):
     st = io.StringIO()
     qc = QcConfig(self.yamlfile)
     with open(self.yamlfile, 'rt') as f:
         js = json.dumps(yaml.load(f.read()))
         st.write(js)
     qc = QcConfig(st)
     st.close()
     assert qc.config == self.expected_dict
Exemple #2
0
    def test_different_kwargs_run(self):

        config = deepcopy(self.config)
        config['qartod']['location_test'] = {
            'bbox': [-100, -40, 100, 40]
        }

        xs = [ -101, -100, -99, 0, 99, 100, 101 ]
        ys = [  -41,  -40, -39, 0, 39,  40,  41 ]
        qc = QcConfig(config)
        r = qc.run(
            inp=list(range(13)),
            lat=ys,
            lon=xs
        )

        range_expected = np.array([3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3])
        npt.assert_array_equal(
            r['qartod']['gross_range_test'],
            range_expected
        )
        location_expected = np.array([4, 1, 1, 1, 1, 1, 4])
        npt.assert_array_equal(
            r['qartod']['location_test'],
            location_expected
        )
    def test_run(self):
        qc = QcConfig(self.config)
        r = qc.run(inp=list(range(13)))

        expected = np.array([3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3])
        npt.assert_array_equal(r['qartod']['gross_range_test'], expected)
        assert 'aggregate' not in r['qartod']
Exemple #4
0
 def test_pressure_increasing_test(self):
     qc = QcConfig({
         'argo': {
             'pressure_increasing_test': {}
         }
     })
     self.perf_test(qc)
Exemple #5
0
    def test_qartod_compare(self):
        qc = QcConfig({
            'qartod': {
                'gross_range_test': {
                    'suspect_span': [1, 11],
                    'fail_span': [0, 12],
                },
                'spike_test': {
                    'suspect_threshold': 3,
                    'fail_threshold': 6,
                },
                'rate_of_change_test': {
                    'threshold': 2.5,
                }
            }
        })
        results = qc.run(inp=self.inp, tinp=self.times, zinp=self.zinp)
        all_tests = [
            results['qartod'][test_name]
            for test_name in list(results['qartod'])
        ]

        def run_fn():
            qartod.qartod_compare(all_tests)

        self.perf_test(None, method_name='qartod_compare', run_fn=run_fn)
Exemple #6
0
    def test_with_empty_config(self):
        self.config['qartod']['flat_line_test'] = None
        qc = QcConfig(self.config)
        r = qc.run(inp=list(range(13)))

        assert 'gross_range_test' in r['qartod']
        assert 'flat_line_test' not in r['qartod']
 def test_load_yaml_stringio(self):
     st = io.StringIO()
     with open(self.yamlfile, 'rt') as f:
         st.write(f.read())
     qc = QcConfig(st)
     st.close()
     assert qc.config == self.expected_dict
Exemple #8
0
 def test_attenuated_signal_test(self):
     qc = QcConfig(
         {'qartod': {
             'attenuated_signal_test': {
                 'threshold': (2.5, 5),
             }
         }})
     self.perf_test(qc)
Exemple #9
0
 def test_rate_of_change_test(self):
     qc = QcConfig(
         {'qartod': {
             'rate_of_change_test': {
                 'threshold': 2.5,
             }
         }})
     self.perf_test(qc)
Exemple #10
0
    def test_using_config(self):
        config = {'argo': {'pressure_increasing_test': {}}}

        qc = QcConfig(config)
        r = qc.run(inp=np.array([0.0, 2.0, 2.0, 1.99, 2.3, 2.4, 2.4, 2.5],
                                dtype='float32'))

        expected = np.array([1, 1, 3, 3, 1, 1, 3, 1])
        npt.assert_array_equal(r['argo']['pressure_increasing_test'], expected)
Exemple #11
0
    def test_comparing_nc_and_qc_from_nc(self):
        c = NcQcConfig(self.fp)
        ncresults = c.run(self.fp)

        qcr = QcConfig(c.config['data1'])
        result = qcr.run(inp=list(range(13)))

        npt.assert_array_equal(
            ncresults['data1']['qartod']['gross_range_test'],
            result['qartod']['gross_range_test'], self.expected)
Exemple #12
0
 def test_location_test(self):
     qc = QcConfig({
         'qartod': {
             'location_test': {
                 'lon': self.lon,
                 'lat': self.lat,
             }
         }
     })
     self.perf_test(qc)
Exemple #13
0
 def test_gross_range(self):
     qc = QcConfig({
         'qartod': {
             'gross_range_test': {
                 'suspect_span': [1, 11],
                 'fail_span': [0, 12],
             }
         }
     })
     self.perf_test(qc)
Exemple #14
0
 def test_spike_test(self):
     qc = QcConfig({
         'qartod': {
             'spike_test': {
                 'suspect_threshold': 3,
                 'fail_threshold': 6,
             }
         }
     })
     self.perf_test(qc)
Exemple #15
0
 def test_attenuated_signal_test(self):
     qc = QcConfig({
         'qartod': {
             'attenuated_signal_test': {
                 'suspect_threshold': 5,
                 'fail_threshold': 2.5,
             }
         }
     })
     self.perf_test(qc)
Exemple #16
0
 def test_attenuated_signal_with_time_period_test(self):
     qc = QcConfig({
         'qartod': {
             'attenuated_signal_test': {
                 'suspect_threshold': 5,
                 'fail_threshold': 2.5,
                 'test_period': 86400
             }
         }
     })
     self.perf_test(qc)
Exemple #17
0
 def test_flat_line_test(self):
     qc = QcConfig({
         'qartod': {
             'flat_line_test': {
                 'suspect_threshold': 43200,
                 'fail_threshold': 86400,
                 'tolerance': 1,
             }
         }
     })
     self.perf_test(qc)
Exemple #18
0
 def test_location_test__with_range_max(self):
     qc = QcConfig({
         'qartod': {
             'location_test': {
                 'lon': self.lon,
                 'lat': self.lat,
                 'range_max': 1,
             }
         }
     })
     self.perf_test(qc)
    def test_comparing_nc_and_qc_config(self):
        # Compare results from QcConfig to those from NcQcConfig

        nc_config = NcQcConfig(self.config)
        nc_results = nc_config.run(self.fp)

        qc_config = QcConfig(self.config['data1'])
        qc_results = qc_config.run(inp=self.data)

        npt.assert_array_equal(
            nc_results['data1']['qartod']['gross_range_test'],
            qc_results['qartod']['gross_range_test'], self.expected)
Exemple #20
0
 def test_speed_test(self):
     qc = QcConfig({
         'argo': {
             'speed_test': {
                 'tinp': self.times,
                 'lon': self.lon,
                 'lat': self.lat,
                 'suspect_threshold': 1,
                 'fail_threshold': 3,
             }
         }
     })
     self.perf_test(qc)
Exemple #21
0
 def test_climatology_test(self):
     qc = QcConfig({
         'qartod': {
             'climatology_test': {
                 'config': [
                     {
                         'vspan': (10, 20),
                         'tspan': (0, 1),
                         'period': 'quarter'
                     },
                 ]
             }
         }
     })
     self.perf_test(qc)
Exemple #22
0
    def test_with_values_in_config(self):

        config = deepcopy(self.config)
        config['qartod']['location_test'] = {
            'bbox': [-100, -40, 100, 40],
            'lat': [-41, -40, -39, 0, 39, 40, 41],
            'lon': [-101, -100, -99, 0, 99, 100, 101],
        }
        config['qartod']['gross_range_test']['inp'] = list(range(13))

        qc = QcConfig(config)
        r = qc.run()

        range_expected = np.array([3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3])
        npt.assert_array_equal(r['qartod']['gross_range_test'], range_expected)
        location_expected = np.array([4, 1, 1, 1, 1, 1, 4])
        npt.assert_array_equal(r['qartod']['location_test'], location_expected)
Exemple #23
0
 def test_climatology_config_test(self):
     tests = [
         (
             np.datetime64('2011-01-02 00:00:00'),
             11,
             None
         )
     ]
     times, values, depths = zip(*tests)
     qc = QcConfig(self.yamlfile)
     results = qc.run(
         tinp=times,
         inp=values,
         zinp=depths
     )
     npt.assert_array_equal(
         results['qartod']['climatology_test'],
         np.ma.array([1])
     )
def run_qartod(df, config, time="time", depth="depth"):
    # Run QARTOD tests
    # We are using the deprecated QcConfig method and hopefully will move
    #  to a new stream method soon.

    # TODO this is a deprecated method and we should move on the Stream method in the near future.
    for var in config.keys():
        qc = QcConfig(config[var])
        qc_result = qc.run(
            inp=df[var],
            tinp=df[time],
            zinp=df[depth],
        )
        for module, tests in qc_result.items():
            for test, flag in tests.items():
                flag_name = var + "_" + module + "_" + test
                if type(df) is xr.Dataset:
                    df[flag_name] = (df[var].dims, flag)
                else:
                    df.loc[flag_name] = flag
    return df
Exemple #25
0
 def test_climatology_test_depths(self):
     tests = [
         (
             np.datetime64('2012-01-02 00:00:00'),
             51,
             2
         ),
         (
             np.datetime64('2012-01-02 00:00:00'),
             71,
             90
         ),
         (
             np.datetime64('2012-01-02 00:00:00'),
             42,
             None
         ),
         (
             np.datetime64('2012-01-02 00:00:00'),
             59,
             11
         ),
         (
             np.datetime64('2012-01-02 00:00:00'),
             79,
             101
         )
     ]
     times, values, depths = zip(*tests)
     qc = QcConfig(self.yamlfile)
     results = qc.run(
         tinp=times,
         inp=values,
         zinp=depths
     )
     npt.assert_array_equal(
         results['qartod']['climatology_test'],
         np.ma.array([1, 1, 1, 3, 9])
     )
    def test_run_with_agg(self):
        qc = QcConfig({
            'qartod': {
                'gross_range_test': {
                    'fail_span': [0, 12],
                },
                'spike_test': {
                    'suspect_threshold': 3,
                    'fail_threshold': 10,
                },
                'aggregate': {}
            }
        })
        inp = [-1, 0, 1, 2, 10, 3]
        expected_gross_range = np.array([4, 1, 1, 1, 1, 1])
        expected_spike = np.array([2, 1, 1, 3, 3, 2])
        expected_agg = np.array([4, 1, 1, 3, 3, 1])

        r = qc.run(inp=inp)

        npt.assert_array_equal(r['qartod']['gross_range_test'],
                               expected_gross_range)
        npt.assert_array_equal(r['qartod']['spike_test'], expected_spike)
        npt.assert_array_equal(r['qartod']['aggregate'], expected_agg)
Exemple #27
0
 def test_load_path_object(self):
     qc = QcConfig(Path(self.yamlfile))
     assert qc.config == self.expected_dict
Exemple #28
0
 def test_load_file_path(self):
     qc = QcConfig(self.yamlfile)
     assert qc.config == self.expected_dict
Exemple #29
0
 def test_load_yaml_dict_object(self):
     with open(self.yamlfile) as f:
         y = yaml.load(f.read(), Loader=yaml.Loader)
         qc = QcConfig(y)
     assert qc.config == self.expected_dict
Exemple #30
0
    def qc(self, dataset_ids=None, verbose=False, skip_units=False):
        """Light quality check on data.

        This runs one IOOS QARTOD on data as a first order quality check.
        Only returns data that is quality checked.

        Requires pint for unit handling. Requires user-input `criteria` and
        `var_def` to run.

        This is slow if your data is both chunks of time and space, so this
        should first narrow by both as much as possible.

        Parameters
        ----------
        dataset_ids: str, list, optional
            Read in data for dataset_ids specifically. If none are
            provided, data will be read in for all `self.keys()`.
        verbose: boolean, optional
            If True, report summary statistics on QC flag distribution in datasets.
        skip_units: boolean, optional
            If True, do not interpret or alter units and assume the data is in
            the units described in var_def already.

        Returns
        -------
        Dataset with added variables for each variable in dataset that was checked, with name of [variable]+'_qc'.

        Notes
        -----
        Code has been saved for data in DataFrames, but is changing so
        that data will be in Datasets. This way, can use cf-xarray
        functionality for custom variable names and easier to have
        recognizable units for variables with netcdf than csv.
        """

        assertion = (
            "Need to have custom criteria and variable information defined to run QC."
        )
        assert self.criteria and self.var_def, assertion

        if dataset_ids is None:
            data_ids = (
                self.keys()
            )  # Only return already read-in dataset_ids  # self.dataset_ids
        else:
            data_ids = dataset_ids
            if not isinstance(data_ids, list):
                data_ids = [data_ids]

        data_out = {}
        for data_id in data_ids:
            # access the Dataset
            dd = self[data_id]
            # which custom variable names are in dataset
            # dd_varnames are the variable names in the Dataset dd
            # cf_varnames are the custom names we can use to refer to the
            # variables through cf-xarray
            if isinstance(dd, pd.DataFrame):
                varnames, cf_varnames = [], []
                for var in self.var_def.keys():
                    try:
                        varname = dd.cf[var].name
                        varnames.append(varname)
                        cf_varnames.append(var)
                    except:
                        pass
            elif isinstance(dd, xr.Dataset):
                varnames = [
                    (cf_xarray.accessor._get_custom_criteria(dd, var), var)
                    for var in self.var_def.keys() if
                    len(cf_xarray.accessor._get_custom_criteria(dd, var)) > 0
                ]
            assert len(varnames) > 0, "no custom names matched in Dataset."
            if isinstance(dd, pd.DataFrame):
                dd_varnames = varnames.copy()
            elif isinstance(dd, xr.Dataset):
                dd_varnames, cf_varnames = zip(*varnames)
                dd_varnames = sum(dd_varnames, [])
            assert len(dd_varnames) == len(
                cf_varnames
            ), "looks like multiple variables might have been identified for a custom variable name"

            # subset to just the boem or requested variables for each df or ds
            if isinstance(dd, pd.DataFrame):
                dd2 = dd[list(varnames)]
            elif isinstance(dd, xr.Dataset):
                dd2 = dd.cf[cf_varnames]
                # dd2 = dd[varnames]  # equivalent

            if not skip_units:

                # Preprocess to change salinity units away from 1e-3
                if isinstance(dd, pd.DataFrame):
                    # this replaces units in the 2nd column level of 1e-3 with psu
                    new_levs = [
                        "psu" if col == "1e-3" else col
                        for col in dd2.columns.levels[1]
                    ]
                    dd2.columns.set_levels(new_levs, level=1, inplace=True)
                elif isinstance(dd, xr.Dataset):
                    for Var in dd2.data_vars:
                        if ("units" in dd2[Var].attrs
                                and dd2[Var].attrs["units"] == "1e-3"):
                            dd2[Var].attrs["units"] = "psu"
                # run pint quantify on each data structure
                dd2 = dd2.pint.quantify()
                # dd2 = dd2.pint.quantify(level=-1)

                # go through each variable by name to make sure in correct units
                # have to do this in separate loop so that can dequantify afterward
                if isinstance(dd, pd.DataFrame):
                    print("NOT IMPLEMENTED FOR DATAFRAME YET")
                elif isinstance(dd, xr.Dataset):
                    # form of "temp": "degree_Celsius"
                    units_dict = {
                        dd_varname: self.var_def[cf_varname]["units"]
                        for (dd_varname,
                             cf_varname) in zip(dd_varnames, cf_varnames)
                    }
                    # convert to conventional units
                    dd2 = dd2.pint.to(units_dict)

                dd2 = dd2.pint.dequantify()

            # now loop for QARTOD on each variable
            for dd_varname, cf_varname in zip(dd_varnames, cf_varnames):
                # run QARTOD
                qc_config = {
                    "qartod": {
                        "gross_range_test": {
                            "fail_span":
                            self.var_def[cf_varname]["fail_span"],
                            "suspect_span":
                            self.var_def[cf_varname]["suspect_span"],
                        },
                    }
                }
                qc = QcConfig(qc_config)
                qc_results = qc.run(inp=dd2[dd_varname])
                # qc_results = qc.run(inp=dd2.cf[cf_varname])  # this isn't working for some reason

                # put flags into dataset
                new_qc_var = f"{dd_varname}_qc"
                if isinstance(dd, pd.DataFrame):
                    dd2[new_qc_var] = qc_results["qartod"]["gross_range_test"]
                elif isinstance(dd, xr.Dataset):
                    new_data = qc_results["qartod"]["gross_range_test"]
                    dims = dd2[dd_varname].dims
                    dd2[f"{dd_varname}_qc"] = (dims, new_data)

            data_out[data_id] = dd2

        if verbose:
            for dataset_id, dd in data_out.items():
                print(dataset_id)
                qckeys = dd2[[var for var in dd.data_vars if "_qc" in var]]
                for qckey in qckeys:
                    print(qckey)
                    for flag, desc in odg.qcdefs.items():
                        print(
                            f"Flag == {flag} ({desc}): {int((dd[qckey] == int(flag)).sum())}"
                        )

        return data_out