def test_ic_fl_max_ctc(): # Testing dataset with negative fl_max_ctc values ddict = example_data_dict(size=8472, keys=["fl1_max_ctc"]) ddict["fl1_max_ctc"] -= min(ddict["fl1_max_ctc"]) + 1 ds = new_dataset(ddict) with check.IntegrityChecker(ds) as ic: cues = ic.check_fl_max_ctc_positive() assert cues[0].level == "alert" assert cues[0].category == "feature data" # Testing dataset with fl_max_ctc values of 0.1 ddict = example_data_dict(size=8472, keys=["fl1_max_ctc"]) ddict["fl1_max_ctc"] -= min(ddict["fl1_max_ctc"]) - 0.1 ds = new_dataset(ddict) with check.IntegrityChecker(ds) as ic: cues = ic.check_fl_max_ctc_positive() assert cues[0].level == "alert" assert cues[0].category == "feature data" # Testing dataset with fl_max_ctc values > 0.1 ddict = example_data_dict(size=8472, keys=["fl1_max_ctc"]) ddict["fl1_max_ctc"] -= min(ddict["fl1_max_ctc"]) - 1 ds = new_dataset(ddict) with check.IntegrityChecker(ds) as ic: cues = ic.check_fl_max_ctc_positive() assert not cues
def test_ic_expand_section(): ddict = example_data_dict(size=8472, keys=["area_um", "deform"]) ds1 = new_dataset(ddict) ds2 = new_dataset(ddict) with check.IntegrityChecker(ds1) as ic: cues1 = ic.check_metadata_missing(expand_section=True) with check.IntegrityChecker(ds2) as ic: cues2 = ic.check_metadata_missing(expand_section=False) assert len(cues1) > len(cues2)
def test_online_polygon_filters(): """Shape-In 2.3 supports online polygon filters""" path = retrieve_data("fmt-hdf5_mask-contour_2018.zip") # add an artificial online polygon filter with h5py.File(path, "a") as h5: # set soft filter to True h5.attrs["online_filter:area_um,deform soft limit"] = True # set filter values pf_name = "online_filter:area_um,deform polygon points" area_um = h5["events"]["area_um"] deform = h5["events"]["deform"] pf_points = np.array([ [np.mean(area_um) + np.std(area_um), np.mean(deform)], [ np.mean(area_um) + np.std(area_um), np.mean(deform) + np.std(deform) ], [np.mean(area_um), np.mean(deform) + np.std(deform)], ]) h5.attrs[pf_name] = pf_points # see if we can open the file without any error with check.IntegrityChecker(path) as ic: cues = [cc for cc in ic.check() if cc.level != "info"] # [imaging]: roi position x should be <class 'numbers.Integral'> # [imaging]: roi position y should be <class 'numbers.Integral'> assert len(cues) == 2
def test_online_polygon_filters_wrong_shape(shape): """Shape-In 2.3 supports online polygon filters (test for shape)""" path = retrieve_data("fmt-hdf5_mask-contour_2018.zip") # Since 0.35.0 we really check the configuration key types. # Just make sure that they are properly set: with h5py.File(path, "a") as h5: for key in ["imaging:roi position x", "imaging:roi position y"]: h5.attrs[key] = int(h5.attrs[key]) # add an artificial online polygon filter with h5py.File(path, "a") as h5: # set soft filter to True h5.attrs["online_filter:area_um,deform soft limit"] = True # set filter values pf_name = "online_filter:area_um,deform polygon points" pf_points = np.arange(shape[0] * shape[1]).reshape(*shape) h5.attrs[pf_name] = pf_points # see if we can open the file without any error with check.IntegrityChecker(path) as ic: cues = [cc for cc in ic.check() if cc.level != "info"] assert len(cues) == 1 assert cues[0].category == "metadata wrong" assert cues[0].level == "violation" assert cues[0].cfg_section == "online_filter" assert cues[0].cfg_key == "area_um,deform polygon points"
def test_online_polygon_filters_real_data(): """Shape-In 2.3 supports online polygon filters""" path = retrieve_data("fmt-hdf5_polygon_gate_2021.zip") # see if we can open the file without any error with check.IntegrityChecker(path) as ic: cues = [cc for cc in ic.check() if cc.level != "info"] assert len(cues) == 0
def test_ic_fmt_hdf5_image3(): h5path = retrieve_data("fmt-hdf5_fl_2018.zip") with h5py.File(h5path, "a") as h5: del h5["events/image"].attrs["CLASS"] with check.IntegrityChecker(h5path) as ic: cues = ic.check_fmt_hdf5() assert cues[0].category == "format HDF5" assert cues[0].msg == "HDF5: '/image': missing attribute 'CLASS'" assert cues[0].level == "alert"
def test_ic_flow_rate_not_zero(): ddict = example_data_dict(size=8472, keys=["area_um", "deform"]) ds = new_dataset(ddict) ds.config["setup"]["flow rate"] = 0 with check.IntegrityChecker(ds) as ic: cues = ic.check_metadata_bad_greater_zero() assert cues[0].category == "metadata wrong" assert cues[0].cfg_section == "setup" assert cues[0].cfg_key == "flow rate"
def test_ic_metadata_choices_medium(): ddict = example_data_dict(size=8472, keys=["area_um", "deform"]) ds = new_dataset(ddict) ds.config["setup"]["medium"] = "honey" with check.IntegrityChecker(ds) as ic: cues = ic.check_metadata_choices() # changed in 0.29.1: medium can now be an arbitrary string # except for an empty string. assert len(cues) == 0
def test_ic_fl_metadata_channel_names_2(): ddict = example_data_dict(size=8472, keys=["area_um", "deform"]) ds = new_dataset(ddict) ds.config["fluorescence"]["channel 1 name"] = "peter" with check.IntegrityChecker(ds) as ic: cues = ic.check_fl_metadata_channel_names() assert cues[0].category == "metadata invalid" assert cues[0].cfg_section == "fluorescence" assert cues[0].cfg_key == "channel 1 name"
def test_ic_fl_metadata_channel_names(): ddict = example_data_dict(size=8472, keys=["area_um", "deform", "fl1_max"]) ddict["trace"] = {"fl1_raw": [range(10)] * 1000} ds = new_dataset(ddict) with check.IntegrityChecker(ds) as ic: cues = ic.check_fl_metadata_channel_names() assert cues[0].category == "metadata missing" assert cues[0].cfg_section == "fluorescence" assert cues[0].cfg_key == "channel 1 name"
def test_shapein_issue3_bad_medium_control(si_version): h5path = retrieve_data("fmt-hdf5_fl_2018.zip") with h5py.File(h5path, "a") as h5: h5.attrs["setup:software version"] = si_version h5.attrs["setup:medium"] = "CellCarrierB" ds = new_dataset(h5path) with check.IntegrityChecker(ds) as ic: cues = ic.check_shapein_issue3_bad_medium() assert len(cues) == 0
def test_ic_fmt_hdf5_image2(): h5path = retrieve_data("fmt-hdf5_fl_2018.zip") with h5py.File(h5path, "a") as h5: h5["events/image"].attrs["CLASS"] = np.string_("bad") with check.IntegrityChecker(h5path) as ic: cues = ic.check_fmt_hdf5() assert cues[0].category == "format HDF5" assert cues[0].msg == "HDF5: '/image': attribute 'CLASS' should have " \ + "value 'b'IMAGE''" assert cues[0].level == "alert"
def test_ic_fl_num_channels(): ddict = example_data_dict(size=8472, keys=["area_um", "deform"]) ds = new_dataset(ddict) ds.config["fluorescence"]["channel count"] = 3 ds.config["fluorescence"]["channel 1 name"] = "hans" with check.IntegrityChecker(ds) as ic: cues = ic.check_fl_num_channels() assert cues[0].category == "metadata wrong" assert cues[0].cfg_section == "fluorescence" assert cues[0].cfg_key == "channel count"
def test_ic_metadata_bad(): ddict = example_data_dict(size=8472, keys=["area_um", "deform"]) ds = new_dataset(ddict) # Since version 0.35, metadata are checked in `Configuration` class with pytest.warns(dclab.rtdc_dataset.config.WrongConfigurationTypeWarning, match="run index"): ds.config["experiment"]["run index"] = "1" with check.IntegrityChecker(ds) as ic: cues = ic.check_metadata_bad() assert len(cues) == 0
def test_ic_invalid_dataset(): # Testing if IC throws NotImplementedError for hierarchy datasets ddict = example_data_dict(size=8472, keys=["area_um", "deform"]) ds = new_dataset(ddict) ds_child = new_dataset(ds) with check.IntegrityChecker(ds_child) as ic: with pytest.raises(NotImplementedError): ic.check() # Testing if IC throws NotImplementedError for raw-datasets with # applied filters ddict = example_data_dict(size=8472, keys=["area_um", "deform"]) ds = new_dataset(ddict) ds.config["filtering"]["area_um max"] = 100 ds.config["filtering"]["area_um min"] = 1 ds.apply_filter() with check.IntegrityChecker(ds) as ic: with pytest.raises(NotImplementedError): ic.check()
def test_check_metadata_hdf5_type_issue_139(): """Check that chip region is lower-case""" h5path = retrieve_data("fmt-hdf5_polygon_gate_2021.zip") with h5py.File(h5path, "a") as h5: h5.attrs["setup:chip region"] = "Channel" with check.IntegrityChecker(h5path) as ic: cues = ic.check_metadata_hdf5_type() assert len(cues) == 1 assert cues[0].msg.count("channel") assert cues[0].msg.count("Channel")
def test_ic_fmt_hdf5_image1(): h5path = retrieve_data("fmt-hdf5_fl_2018.zip") with h5py.File(h5path, "a") as h5: h5["events/image"].attrs["CLASS"] = "bad" with check.IntegrityChecker(h5path) as ic: cues = ic.check_fmt_hdf5() assert cues[0].category == "format HDF5" assert cues[0].msg == "HDF5: '/image': attribute 'CLASS' should be " \ + "fixed-length ASCII string" assert cues[0].level == "alert"
def test_ic_fl_num_lasers(): ddict = example_data_dict(size=8472, keys=["area_um", "deform"]) ds = new_dataset(ddict) ds.config["fluorescence"]["laser count"] = 3 ds.config["fluorescence"]["laser 1 lambda"] = 550 ds.config["fluorescence"]["laser 1 power"] = 20 with check.IntegrityChecker(ds) as ic: cues = ic.check_fl_num_lasers() assert cues[0].category == "metadata wrong" assert cues[0].cfg_section == "fluorescence" assert cues[0].cfg_key == "laser count"
def test_ic_fmt_hdf5_logs(): h5path = retrieve_data("fmt-hdf5_fl_2018.zip") hw = RTDCWriter(h5path) hw.store_log("test", ["asdasd" * 100]) hw.store_log("M1_para.ini", ["asdasd" * 100]) with check.IntegrityChecker(h5path) as ic: cues = ic.check_fmt_hdf5() assert len(cues) == 1 assert cues[0].category == "format HDF5" assert cues[0].msg == 'Logs: test line 0 exceeds maximum line length 100' assert cues[0].level == "alert"
def test_ml_class(): """Test score data outside boundary""" data = { "ml_score_001": [.1, 10, -10, 0.01, .89], "ml_score_002": [.2, .1, .4, 0, .4], } ds = new_dataset(data) with check.IntegrityChecker(ds) as ic: cues = ic.check_ml_class() assert len(cues) == 1 assert "ml_score_001" in cues[0].msg
def test_shapein_issue3_bad_medium(si_version): h5path = retrieve_data("fmt-hdf5_fl_2018.zip") with h5py.File(h5path, "a") as h5: h5.attrs["setup:software version"] = si_version h5.attrs["setup:medium"] = "CellCarrierB" ds = new_dataset(h5path) with check.IntegrityChecker(ds) as ic: cues = ic.check_shapein_issue3_bad_medium() assert len(cues) == 1 assert cues[0].cfg_key == "medium" assert cues[0].cfg_section == "setup" assert cues[0].category == "metadata wrong"
def test_ic_sanity(): h5path = retrieve_data("fmt-hdf5_polygon_gate_2021.zip") with h5py.File(h5path, "a") as h5: del h5["events"]["deform"] h5["events"]["deform"] = np.ones(100) * .1 with check.IntegrityChecker(h5path) as ic: cues = ic.sanity_check() assert len(cues) == 1 assert cues[0].category == "feature size" assert cues[0].msg.count("Sanity check failed:") assert cues[0].msg.count("deform") assert cues[0].level == "violation"
def test_icue(): h5path = retrieve_data("fmt-hdf5_fl_2018.zip") with check.IntegrityChecker(h5path) as ic: cues = ic.check() assert cues[0] != cues[1] levels = check.ICue.get_level_summary(cues) assert levels["info"] >= 2 # [fluorescence]: sample rate should be <class 'numbers.Integral'> # [imaging]: roi position x should be <class 'numbers.Integral'> # [imaging]: roi position y should be <class 'numbers.Integral'> assert levels["alert"] == 3 assert levels["violation"] == 0 assert cues[0].msg in cues[0].__repr__()
def test_ic_feature_size_scalar(): ddict = example_data_dict(size=8472, keys=["area_um", "deform"]) ddict["bright_sd"] = np.linspace(10, 20, 1000) ds = new_dataset(ddict) with check.IntegrityChecker(ds) as ic: cues = ic.check_feature_size() for cue in cues: if cue.category == "feature size": assert cue.msg == "Features: wrong event count: 'bright_sd' " \ + "(1000 of 8472)" break else: assert False
def test_ic_feature_size_trace(): ddict = example_data_dict(size=8472, keys=["area_um", "deform"]) ddict["trace"] = {"fl1_raw": [range(10)] * 1000} ds = new_dataset(ddict) with check.IntegrityChecker(ds) as ic: cues = ic.check_feature_size() for cue in cues: if cue.category == "feature size": assert cue.msg == "Features: wrong event count: 'trace/fl1_raw'" \ + " (1000 of 8472)" break else: assert False
def test_ic_fmt_hdf5_image_bg(): h5path = retrieve_data("fmt-hdf5_fl_2018.zip") # add a fake image_bg column with h5py.File(h5path, "a") as h5: image_bg = h5["events"]["image"][:] // 2 hw = RTDCWriter(h5) hw.store_feature("image_bg", image_bg) del h5["events/image_bg"].attrs["CLASS"] with check.IntegrityChecker(h5path) as ic: cues = ic.check_fmt_hdf5() assert cues[0].category == "format HDF5" assert cues[0].msg == "HDF5: '/image_bg': missing attribute 'CLASS'" assert cues[0].level == "alert"
def test_ic_metadata_empty_string(): """Empty metadata values are ignored with a warning in dclab>0.33.2""" path = retrieve_data("fmt-hdf5_fl_2018.zip") # add empty attribute with h5py.File(path, "r+") as h5: h5.attrs["setup:medium"] = "" ds = new_dataset(path) with check.IntegrityChecker(ds) as ic: cues = ic.check_metadata_missing() assert cues[0].category == "metadata missing" assert cues[0].level == "violation" assert cues[0].cfg_section == "setup" assert cues[0].cfg_key == "medium"