def test_standard_error(): """I need to improve this!! """ profile = DummyData() profile.attrs["datetime"] = datetime(2016, 6, 4) profile.attrs["LATITUDE"] = -30.0 profile.attrs["LONGITUDE"] = 15 profile.data["PRES"] = np.array([2.0, 5.0, 6.0, 21.0, 44.0, 79.0]) profile.data["TEMP"] = np.array([16.0, 15.6, 15.9, 5.7, 15.2, 14.1]) cfg = {"TEMP": {"cars_normbias": {"threshold": 6}}} pqc = ProfileQC(profile, cfg=cfg) assert "cars_normbias" in pqc.flags["TEMP"] assert pqc.flags["TEMP"]["cars_normbias"].shape == profile.data[ "TEMP"].shape assert (pqc.flags["TEMP"]["cars_normbias"] == [1, 1, 1, 3, 1, 1]).all() cfg = { "TEMP": { "cars_normbias": { "threshold": 6, "use_standard_error": False } } } pqc_noSE = ProfileQC(profile, cfg=cfg) assert "cars_normbias" in pqc.flags["TEMP"] assert pqc.flags["TEMP"]["cars_normbias"].shape == profile.data[ "TEMP"].shape assert (pqc.flags["TEMP"]["cars_normbias"] == [1, 1, 1, 3, 1, 1]).all()
def test_standard_error(): """I need to improve this!! """ profile = DummyData() profile.attrs['datetime'] = datetime(2016,6,4) profile.attrs['LATITUDE'] = -30.0 profile.attrs['LONGITUDE'] = 15 profile.data['PRES'] = np.array([2.0, 5.0, 6.0, 21.0, 44.0, 79.0, 1000, 5000]) profile.data['TEMP'] = np.array([16.0, 15.6, 15.9, 15.7, 15.2, 14.1, 8.6, 2.0]) cfg = {"TEMP": {"woa_normbias": {"threshold": 10}}} pqc = ProfileQC(profile, cfg=cfg) assert 'woa_normbias' in pqc.flags['TEMP'] assert pqc.flags['TEMP']['woa_normbias'].shape == profile.data['TEMP'].shape assert (pqc.flags['TEMP']['woa_normbias'] == [1, 1, 1, 1, 1, 1, 4, 0]).all() cfg = {"TEMP": {"woa_normbias": { "threshold": 10, "use_standard_error": False}}} pqc_noSE = ProfileQC(profile, cfg=cfg) assert 'woa_normbias' in pqc.flags['TEMP'] assert pqc.flags['TEMP']['woa_normbias'].shape == profile.data['TEMP'].shape assert (pqc.flags['TEMP']['woa_normbias'] == [1, 1, 1, 1, 1, 1, 4, 0]).all() cfg = {"TEMP": {"woa_normbias": {"threshold": 10, "use_standard_error": True}}} pqc_SE = ProfileQC(profile, cfg=cfg) assert 'woa_normbias' in pqc.flags['TEMP'] assert pqc.flags['TEMP']['woa_normbias'].shape == profile.data['TEMP'].shape assert (pqc.flags['TEMP']['woa_normbias'] == [1, 1, 1, 1, 1, 1, 4, 0]).all()
def test_single_negative_depth(): """Evaluate a profile with a single measurement WOD has some profiles with a single measurement. Something certainly went wrong on those profiles, despite that, CoTeDe should be able to do the best assessement possible. Some tests can't be applied, like spike which requires neighbor measurements, but those should return flag 0. """ profile = DummyData() profile.attrs = { "id": 609483, "LATITUDE": 6.977, "LONGITUDE": 79.873, "datetime": datetime(2009, 8, 14, 1, 18, 36), "date": date(2009, 8, 14), "schema": "pfl", } profile.data = { "id": ma.masked_array(data=[51190527], mask=[False], dtype="i"), "PRES": ma.masked_array(data=[-1.0], mask=[False], dtype="f"), "TEMP": ma.masked_array(data=[25.81], mask=[False], dtype="f"), "PSAL": ma.masked_array(data=[0.01], mask=[False], dtype="f"), } ProfileQC(profile, saveauxiliary=False) ProfileQC(profile, saveauxiliary=True)
def test(): profile = DummyData() pqc = ProfileQC(profile, saveauxiliary=False) pqc = ProfileQC(profile, saveauxiliary=True) keys = ['PRES', 'TEMP', 'PSAL', 'flag'] for v in profile.keys(): assert v in pqc.keys() assert np.allclose(profile[v], pqc[v]) for a in profile.attrs: assert a in pqc.attrs assert profile.attrs[a] == pqc.attrs[a] assert hasattr(pqc, 'flags') assert type(pqc.flags) is dict vs = list(pqc.flags.keys()) vs.remove('common') for v in vs: for f in pqc.flags[v]: assert pqc.flags[v][f].dtype == 'i1' assert hasattr(pqc, 'features') assert type(pqc.features) is dict
def get_qc(p, config, test): '''Wrapper for running and returning results of CoTeDe tests. Inputs are: p is a wodpy profile object. config is the suite of tests that test comes from e.g. gtspp. test is the specific test to get the results from. ''' global cotede_results # Disable logging messages from CoTeDe unless they are more # severe than a warning. logging.disable('warn') # Create a dummy results variable if this is the first call. try: cotede_results except NameError: cotede_results = [-1, '', None] var = 'TEMP' # Check if we need to perform the quality control. if (p.uid() != cotede_results[0] or config != cotede_results[1] or p.uid() is None): inputs = Wod4CoTeDe(p) try: try: # Assumes config as the QC test group, like 'cotede', # and load only the desired 'test' pqc = ProfileQC(inputs, cfg=load_cfg(config)[var][test]) except: # In case of a full set, like full GTSPP suite of tests, in # that case test='overall', or a dictionary pqc = ProfileQC(inputs, cfg=config) except: with open('cotede_qc/qc_cfg/' + config + '.json') as f: cfg = json.load(f) pqc = ProfileQC(inputs, cfg=cfg) cotede_results = [p.uid(), config, pqc] # Get the QC results, which use the IOC conventions. qc_returned = cotede_results[2].flags[var][test] # It looks like CoTeDe never returns a QC decision # of 2. If it ever does, we need to decide whether # this counts as a pass or reject. qc = np.ma.zeros(p.n_levels(), dtype=bool) if var == 'common': if qc_returned == 3 or qc_returned == 4: qc[:] = True else: qc[np.logical_or(qc_returned == 3, qc_returned == 4)] = True return qc
def test_ProfileQC(): if nogsw: print("GSW package not available. Can't run density_inversion test.") return profile = DummyData() profile["TEMP"][4] = profile["TEMP"][3] + 5 cfg = { "TEMP": { "density_inversion": { "threshold": -0.03, "flag_good": 1, "flag_bad": 4 } } } pqc = ProfileQC(profile, cfg) assert type(pqc.features) is dict assert "densitystep" in pqc.features["TEMP"] assert type(pqc.flags) is dict assert "density_inversion" in pqc.flags["TEMP"] assert pqc.flags["TEMP"]["density_inversion"][4] == 4
def test_serialize_ProfileQC(): """ Serialize ProfileQC """ for datafile in INPUTFILES: data = cnv.fCNV(datafile) pqc = ProfileQC(data, saveauxiliary=False) pqc2 = pickle.loads(pickle.dumps(pqc)) assert pqc.attributes == pqc2.attributes
def test_argo(): datafile = download_testdata("20150127_prof.nc") profile = argo.profile_from_nc(datafile)[0] pqc = ProfileQC(profile, cfg='argo') assert hasattr(pqc, 'flags') for v in ['TEMP', 'PSAL']: assert v in pqc.keys() assert len(pqc[v]) == 1034 assert v in pqc.flags for f in pqc.flags[v]: assert len(pqc.flags[v][f]) == 1034 for a in ['datetime', 'LATITUDE', 'LONGITUDE']: assert a in pqc.attributes assert type(pqc.attributes['datetime']) == datetime
def test_common_flags(): profile = DummyData() cfg = {"main": {"valid_datetime": None, "valid_geolocation": None}, "TEMP": {}} pqc = ProfileQC(profile, cfg=cfg) assert 'common' in pqc.flags assert 'valid_datetime' in pqc.flags['TEMP'] assert pqc.flags['TEMP']['valid_datetime'].shape == profile['TEMP'].shape
def qc(inputfilename, outputfilename, config): """ """ from cotede.qc import ProfileQC, combined_flag if outputfilename is None: outputfilename = inputfilename.replace('.cnv', '.nc') click.echo('Saving on %s' % outputfilename) data = fCNV(inputfilename) profile = ProfileQC(data, cfg=config, verbose=False) print(profile.flags)
def test_attribute(): profile = DummyData() cfg = {"TEMP": {"woa_normbias": {"threshold": 3}}} pqc = ProfileQC(profile, cfg=cfg) assert 'woa_normbias' in pqc.flags['TEMP'] assert pqc.flags['TEMP']['woa_normbias'].shape == profile.data['TEMP'].shape assert np.unique(pqc.features['TEMP']['woa_mean']).size > 1 assert (pqc.flags['TEMP']['woa_normbias'] == [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 9]).all()
def test_basic(): """ """ profile = DummyData() cfg = {"TEMP": {"woa_normbias": {"threshold": 3, "flag_bad": 3}}, "PSAL": {"woa_normbias": {"threshold": 3, "flag_bad": 3}}} pqc = ProfileQC(profile, cfg=cfg) assert 'woa_normbias' in pqc.flags['TEMP'] assert sorted(np.unique(pqc.flags['TEMP']['woa_normbias'])) == [1, 3, 9] assert sorted(np.unique(pqc.flags['PSAL']['woa_normbias'])) == [1, 9]
def test(): """ """ profile = DummyData() pqc = ProfileQC(profile, cfg='morello2014') assert 'morello2014' in pqc.flags['TEMP'] assert 'morello2014' in pqc.flags['PSAL'] assert profile['TEMP'].shape == pqc.flags['TEMP']['morello2014'].shape assert profile['PSAL'].shape == pqc.flags['PSAL']['morello2014'].shape
def test(): """ """ profile = DummyData() cfg = {"TEMP": {"cars_normbias": {"threshold": 10}}, "PSAL": {"cars_normbias": {"threshold": 10}}} pqc = ProfileQC(profile, cfg=cfg) assert 'cars_normbias' in pqc.flags['TEMP'] assert sorted(np.unique(pqc.flags['TEMP']['cars_normbias'])) == [1, 9] #assert sorted(np.unique(pqc.flags['TEMP2']['cars_normbias'])) == [1] assert sorted(np.unique(pqc.flags['PSAL']['cars_normbias'])) == [1, 9]
def test(): """ """ profile = DummyData() pqc = ProfileQC(profile, cfg='fuzzylogic') assert 'fuzzylogic' in pqc.flags['TEMP'] assert 'fuzzylogic' in pqc.flags['PSAL'] assert profile['TEMP'].shape == pqc.flags['TEMP']['fuzzylogic'].shape assert profile['PSAL'].shape == pqc.flags['PSAL']['fuzzylogic'].shape
def test_attribute(): profile = DummyData() cfg = {"TEMP": {"woa_normbias": {"threshold": 3}}} pqc = ProfileQC(profile, cfg=cfg) assert "woa_normbias" in pqc.flags["TEMP"] assert pqc.flags["TEMP"]["woa_normbias"].shape == profile.data[ "TEMP"].shape assert np.unique(pqc.features["TEMP"]["woa_mean"]).size > 1 assert (pqc.flags["TEMP"]["woa_normbias"] == [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 9 ]).all()
def test_morello_from_profileqc(): """Run Morello2014 through ProfileQC Notes ----- - There is room to improve this and verify more stuff """ profile = DummyData() pqc = ProfileQC(profile, cfg="morello2014") for v in ("TEMP", "PSAL"): assert "morello2014" in pqc.flags[v]
def test_attribute(): profile = DummyData() profile.attrs['datetime'] = datetime(2016, 6, 4) profile.attrs['LATITUDE'] = -30.0 profile.attrs['LONGITUDE'] = 15 profile.data['PRES'] = np.array([2.0, 5.0, 6.0, 21.0, 44.0, 79.0]) profile.data['TEMP'] = np.array([16.0, 15.6, 15.9, 5.7, 15.2, 14.1]) cfg = {"TEMP": {"cars_normbias": {"threshold": 6}}} pqc = ProfileQC(profile, cfg=cfg) assert 'cars_normbias' in pqc.flags['TEMP'] assert pqc.flags['TEMP']['cars_normbias'].shape == profile.data['TEMP'].shape assert (pqc.flags['TEMP']['cars_normbias'] == [1, 1, 1, 3, 1, 1]).all()
def test(): """ Only test if run. Must improve this. """ profile = DummyData() pqc = ProfileQC(profile, cfg='anomaly_detection') assert 'anomaly_detection' in pqc.flags['TEMP'] assert 'anomaly_detection' in pqc.flags['PSAL'] assert profile['TEMP'].shape == pqc.flags['TEMP'][ 'anomaly_detection'].shape assert profile['PSAL'].shape == pqc.flags['PSAL'][ 'anomaly_detection'].shape
def test_all_valid_no_9(): """ If all measurements are valid it can't return flag 9 This is to test a special condition when all values are valid, .mask return False, instead of an array on the same size with False. This test input all valid values, and check if there is no flag 9. """ profile = DummyData() pqc = ProfileQC(profile) assert pqc['TEMP'].mask.all() == False assert np.allclose( combined_flag(pqc.flags['TEMP']) == 9, profile['TEMP'].mask)
def test_track(): profile = DummyData() N = profile['TEMP'].size profile.data['LATITUDE'] = np.linspace(4, 25, N) profile.data['LONGITUDE'] = np.linspace(-30, -38, N) profile.data['PRES'] *= 0 # Location in data, one per measurement, has precedence on attrs profile.attrs['LATITUDE'] = None profile.attrs['LONGITUDE'] = None cfg = {"TEMP": {"woa_normbias": {"threshold": 3}}} pqc = ProfileQC(profile, cfg=cfg) assert 'woa_normbias' in pqc.flags['TEMP'] assert pqc.flags['TEMP']['woa_normbias'].shape == profile.data['TEMP'].shape assert np.unique(pqc.features['TEMP']['woa_mean']).size > 1 assert (pqc.flags['TEMP']['woa_normbias'] == [1, 1, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 9]).all()
def test(): profile = DummyData() pqc = ProfileQC(profile) # assert type(pqc.keys()) == list assert type(pqc.attributes) == dict assert hasattr(pqc, 'input') assert hasattr(pqc, 'flags') assert hasattr(pqc, 'features') assert type(pqc.flags) == dict for k in pqc.flags.keys(): assert type(pqc.flags[k]) == dict for kk in pqc.flags[k].keys(): assert (type(pqc.flags[k][kk]) == np.ndarray) or \ (type(pqc.flags[k][kk]) == int) if (type(pqc.flags[k][kk]) == np.ndarray): assert pqc.flags[k][kk].dtype == 'int8'
def test_serialize_ProfileQC(): """ Serialize ProfileQC """ profile = DummyData() pqc = ProfileQC(profile) pqc2 = pickle.loads(pickle.dumps(pqc)) assert sorted(pqc.data.keys()) == sorted(pqc2.data.keys()) for v in pqc.data: assert np.allclose(pqc[v], pqc2[v]) assert sorted(pqc.attributes.keys()) == sorted(pqc2.attributes.keys()) for v in pqc.attributes: assert pqc.attributes[v] == pqc2.attributes[v] assert sorted(pqc.flags.keys()) == sorted(pqc2.flags.keys()) for v in pqc.flags: for f in pqc.flags[v]: assert np.allclose(pqc.flags[v][f], pqc2.flags[v][f])
def test_track(): profile = DummyData() N = profile["TEMP"].size profile.data["LATITUDE"] = np.linspace(4, 25, N) profile.data["LONGITUDE"] = np.linspace(-30, -38, N) profile.data["PRES"] *= 0 # Location in data, one per measurement, has precedence on attrs profile.attrs["LATITUDE"] = None profile.attrs["LONGITUDE"] = None cfg = {"TEMP": {"woa_normbias": {"threshold": 3}}} pqc = ProfileQC(profile, cfg=cfg) assert "woa_normbias" in pqc.flags["TEMP"] assert pqc.flags["TEMP"]["woa_normbias"].shape == profile.data[ "TEMP"].shape assert np.unique(pqc.features["TEMP"]["woa_mean"]).size > 1 assert np.allclose( pqc.flags["TEMP"]["woa_normbias"], [1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 9], equal_nan=True, )
def test_serialize_ProfileQC(): """Serialize ProfileQC Guarantee that the returned object can be processed by pickle, thus it can be transported in queues. """ profile = DummyData() pqc = ProfileQC(profile) pqc2 = pickle.loads(pickle.dumps(pqc)) assert sorted(pqc.data.keys()) == sorted(pqc2.data.keys()) for v in pqc.data: assert np.allclose(pqc[v], pqc2[v], equal_nan=True) assert sorted(pqc.attributes.keys()) == sorted(pqc2.attributes.keys()) for v in pqc.attributes: assert pqc.attributes[v] == pqc2.attributes[v] assert sorted(pqc.flags.keys()) == sorted(pqc2.flags.keys()) for v in pqc.flags: for f in pqc.flags[v]: assert np.allclose(pqc.flags[v][f], pqc2.flags[v][f])
def test(): """ """ profile = DummyData() pqc = ProfileQC(profile) pqced = ProfileQCed(profile) assert pqc.data.keys() == pqced.data.keys() for v in pqc.data: assert np.allclose(pqc[v].data, pqced[v].data) assert not np.allclose(pqc['TEMP'].mask, pqced['TEMP'].mask) assert pqc.attributes.keys() == pqced.attributes.keys() for v in pqc.attributes: assert pqc.attributes[v] == pqced.attributes[v] assert pqc.flags.keys() == pqced.flags.keys() for v in pqc.flags: for f in pqc.flags[v]: assert np.allclose(pqc.flags[v][f], pqced.flags[v][f])
def test_basic(): """ """ profile = DummyData() cfg = { "TEMP": { "woa_normbias": { "threshold": 3, "flag_bad": 3 } }, "PSAL": { "woa_normbias": { "threshold": 3, "flag_bad": 3 } }, } pqc = ProfileQC(profile, cfg=cfg) assert "woa_normbias" in pqc.flags["TEMP"] assert sorted(np.unique(pqc.flags["TEMP"]["woa_normbias"])) == [1, 3, 9] assert sorted(np.unique(pqc.flags["PSAL"]["woa_normbias"])) == [1, 9]
def test(): """ """ profile = DummyData() pqc = ProfileQC(profile) pqced = ProfileQCed(profile) assert pqc.data.keys() == pqced.data.keys() for v in pqc.data: assert np.allclose(pqc[v].data, pqced[v].data, equal_nan=True) assert not np.allclose(pqc["TEMP"].mask, pqced["TEMP"].mask) assert pqc.attributes.keys() == pqced.attributes.keys() for v in pqc.attributes: assert pqc.attributes[v] == pqced.attributes[v] assert pqc.flags.keys() == pqced.flags.keys() for v in pqc.flags: for f in pqc.flags[v]: assert np.allclose( pqc.flags[v][f], pqced.flags[v][f]), "Didn't match {}, {}".format(v, f)
def test_densityinversion_from_profileqc(): """Validate if ProfileQC can run DensityInversion It requires GSW to estimate density if the density itself is not provided. """ cfg = { "TEMP": { "density_inversion": { "threshold": -0.03 } }, "PSAL": { "density_inversion": { "threshold": -0.03 } }, } profile = DummyData() pqc = ProfileQC(profile, cfg=cfg) for v in ("TEMP", "PSAL"): assert "density_inversion" in pqc.flags[v] if not GSW_AVAILABLE: assert (pqc.flags[v]["density_inversion"] == 0).all()
def get_qc(p, config, test): '''Wrapper for running and returning results of CoTeDe tests. Inputs are: p is a wodpy profile object. config is the suite of tests that test comes from e.g. gtspp. test is the specific test to get the results from. ''' global cotede_results # Disable logging messages from CoTeDe unless they are more # severe than a warning. logging.disable(logging.WARNING) # Create a dummy results variable if this is the first call. try: cotede_results except NameError: cotede_results = [-1, '', {}, None] var = 'TEMP' # Check if we need to perform the quality control. if (p.uid() != cotede_results[0] or config != cotede_results[1] or test not in cotede_results[2] or p.uid() is None): inputs = Wod4CoTeDe(p) dt = inputs.attributes['datetime'] if dt.year < 1900: inputs.attributes['datetime'] = dt.replace(year=1900) # If config is a dictionary, use it. if type(config) is not dict: try: # Load config from CoTeDe cfg = load_cfg(config) if test == config: # AutoQC runs only on TEMP, so clean the rest. for v in list(cfg): if v not in ['main', var]: del (cfg[v]) # If is a specific test, elif test != config: # Load from TEMP, try: cfg = {var: {test: cfg[var][test]}} # otherwise load it from main. except: # The dummy configuration ensures that the results from # 'main' is copied into the results for var. cfg = { 'main': { test: cfg['main'][test] }, var: { 'dummy': None } } except: with open('cotede_qc/qc_cfg/' + config + '.json') as f: cfg = json.load(f) pqc = ProfileQC(inputs, cfg=cfg) cotede_results = [p.uid(), config, pqc.flags[var].keys(), pqc] # Get the QC results, which use the IOC conventions. qc_returned = cotede_results[3].flags[var][test] # It looks like CoTeDe never returns a QC decision # of 2. If it ever does, we need to decide whether # this counts as a pass or reject. # Gui: Yes, some tests can return 2. My suggestions is to flag as good. qc = np.ma.zeros(p.n_levels(), dtype=bool) qc[np.logical_or(qc_returned == 3, qc_returned == 4)] = True return qc