Esempio n. 1
0
def test_join_frame():
    path_in1 = retrieve_data("fmt-hdf5_mask-contour_2018.zip")
    path_in2 = retrieve_data("fmt-hdf5_mask-contour_2018.zip")
    # same directory (will be cleaned up with path_in)
    path_out = path_in1.with_name("out.rtdc")

    # modify acquisition times
    with h5py.File(path_in1, mode="a") as h1:
        h1.attrs["experiment:date"] = "2019-11-04"
        h1.attrs["experiment:time"] = "15:00:00"

    with h5py.File(path_in2, mode="a") as h2:
        h2.attrs["experiment:date"] = "2019-11-05"
        h2.attrs["experiment:time"] = "16:01:15.050"

    offset = 24 * 60 * 60 + 60 * 60 + 1 * 60 + 15 + .05

    cli.join(path_out=path_out, paths_in=[path_in1, path_in2])
    with new_dataset(path_out) as dsj, new_dataset(path_in1) as ds0:
        fr = ds0.config["imaging"]["frame rate"]
        assert np.allclose(dsj["frame"],
                           np.concatenate((ds0["frame"],
                                           ds0["frame"] + offset * fr)),
                           rtol=0,
                           atol=.0001)
Esempio n. 2
0
def test_join_rtdc():
    path_in = retrieve_data("fmt-hdf5_mask-contour_2018.zip")
    # same directory (will be cleaned up with path_in)
    path_out = path_in.with_name("out.rtdc")

    cli.join(path_out=path_out, paths_in=[path_in, path_in])
    with new_dataset(path_out) as dsj, new_dataset(path_in) as ds0:
        assert "dclab-join" in dsj.logs
        assert len(dsj)
        assert len(dsj) == 2 * len(ds0)
        assert np.all(dsj["circ"][:len(ds0)] == ds0["circ"])
        assert np.all(dsj["circ"][len(ds0):] == ds0["circ"])
        assert set(dsj.features) == set(ds0.features)
        assert 'identifier = ZMDD-AcC-8ecba5-cd57e2' in dsj.logs["cfg-#1"]
Esempio n. 3
0
def test_join_rtdc_index_online_issue_158():
    """
    dclab did not correctly access events/index_online before
    """
    path1 = retrieve_data("fmt-hdf5_polygon_gate_2021.zip")
    path2 = retrieve_data("fmt-hdf5_polygon_gate_2021.zip")
    path_out_a = path1.with_name("outa.rtdc")

    # this did not work
    cli.join(path_out=path_out_a, paths_in=[path1, path2])

    # verification
    with dclab.new_dataset(path_out_a) as ds:
        assert "index_online" in ds.features_innate
        assert np.all(np.diff(ds["index_online"]) > 0)
Esempio n. 4
0
def test_join_tdms_logs():
    pytest.importorskip("nptdms")
    path_in = retrieve_data("fmt-tdms_shapein-2.0.1-no-image_2017.zip")
    # same directory (will be cleaned up with path_in)
    path_out = path_in.with_name("out.rtdc")

    cli.join(path_out=path_out, paths_in=[path_in, path_in])

    with new_dataset(path_out) as dsj, new_dataset(path_in) as ds0:
        assert "dclab-join" in dsj.logs
        assert "cfg-#1" in dsj.logs
        assert "software version = ShapeIn 2.0.1" in dsj.logs["cfg-#1"]
        assert ds0.logs
        for key in ds0.logs:
            jkey = "src-#1_" + key
            assert np.all(np.array(ds0.logs[key]) == np.array(dsj.logs[jkey]))
Esempio n. 5
0
def test_join_tdms():
    pytest.importorskip("nptdms")
    path_in = retrieve_data("fmt-tdms_shapein-2.0.1-no-image_2017.zip")
    # same directory (will be cleaned up with path_in)
    path_out = path_in.with_name("out.rtdc")

    cli.join(path_out=path_out, paths_in=[path_in, path_in])

    with new_dataset(path_out) as dsj, new_dataset(path_in) as ds0:
        assert len(dsj)
        assert len(dsj) == 2 * len(ds0)
        assert len(ds0) == ds0.config["experiment"]["event count"]
        assert len(dsj) == dsj.config["experiment"]["event count"]
        assert np.all(dsj["circ"][:100] == ds0["circ"][:100])
        assert np.all(dsj["circ"][len(ds0):len(ds0)+100] == ds0["circ"][:100])
        assert set(dsj.features) == set(ds0.features)
Esempio n. 6
0
def test_join_rtdc_unequal_features_issue_157_2():
    """
    Same test as above, but we use a feature that cannot be computed
    """
    path1 = retrieve_data("fmt-hdf5_polygon_gate_2021.zip")
    path2 = retrieve_data("fmt-hdf5_polygon_gate_2021.zip")
    path_out_a = path1.with_name("outa.rtdc")
    path_out_b = path1.with_name("outb.rtdc")

    # add a feature to path1 and define order (join sorts the files with date)
    with h5py.File(path1, "a") as h51:
        h51["events"]["ml_score_abc"] = np.log10(h51["events"]["area_um"][:])
        h51.attrs["experiment:date"] = "2020-01-01"
    with h5py.File(path2, "a") as h51:
        h51.attrs["experiment:date"] = "2021-01-01"

    # sanity checks
    with dclab.new_dataset(path1) as ds:
        assert "ml_score_abc" in ds.features_innate
    with dclab.new_dataset(path2) as ds:
        assert "ml_score_abc" not in ds.features_innate

    # First test
    cli.join(path_out=path_out_a, paths_in=[path1, path2])
    with dclab.new_dataset(path_out_a) as ds:
        # Score cannot be computed for path2
        assert "ml_score_abc" not in ds.features_innate
        assert "dclab-join-feature-warnings" in ds.logs
        log = "\n".join(ds.logs["dclab-join-feature-warnings"])
        assert "ml_score_abc" in log

    # Second test: Now do the same thing with reversed dates
    with h5py.File(path2, "a") as h51:
        h51.attrs["experiment:date"] = "2019-01-01"
    cli.join(path_out=path_out_b, paths_in=[path1, path2])
    with dclab.new_dataset(path_out_b) as ds:
        assert "ml_score_abc" not in ds.features_innate
        assert "dclab-join-feature-warnings" in ds.logs
        log = "\n".join(ds.logs["dclab-join-feature-warnings"])
        assert "ml_score_abc" in log
Esempio n. 7
0
def test_join_rtdc_unequal_features_issue_157():
    """
    If two files do not contain the same number of features, joining
    should only take into account the same features and then should
    issue a warning (which will then be written to the logs in DCKit).
    """
    path1 = retrieve_data("fmt-hdf5_polygon_gate_2021.zip")
    path2 = retrieve_data("fmt-hdf5_polygon_gate_2021.zip")
    path_out_a = path1.with_name("outa.rtdc")
    path_out_b = path1.with_name("outb.rtdc")
    path_out_c = path1.with_name("outc.rtdc")

    # add a feature to path1 and define order (join sorts the files with date)
    with h5py.File(path1, "a") as h51:
        h51["events"]["volume"] = h51["events"]["area_um"][:] ** 1.5
        h51.attrs["experiment:date"] = "2020-01-01"
    with h5py.File(path2, "a") as h51:
        h51.attrs["experiment:date"] = "2021-01-01"

    # sanity checks
    with dclab.new_dataset(path1) as ds:
        assert "volume" in ds.features_innate
    with dclab.new_dataset(path2) as ds:
        assert "volume" not in ds.features_innate

    # First test
    # There should be no warning here, because for path2 volume can be
    # computed.
    cli.join(path_out=path_out_a, paths_in=[path1, path2])
    with dclab.new_dataset(path_out_a) as ds:
        # Volume is in this file, because it can be computed for path2
        assert "volume" in ds.features_innate

    # Second test: Now do the same thing with reversed dates
    with h5py.File(path2, "a") as h51:
        h51.attrs["experiment:date"] = "2019-01-01"
    cli.join(path_out=path_out_b, paths_in=[path1, path2])
    with dclab.new_dataset(path_out_b) as ds:
        assert "volume" not in ds.features_innate
        assert "dclab-join-feature-warnings" in ds.logs
        log = "\n".join(ds.logs["dclab-join-feature-warnings"])
        assert "volume" in log

    # Third test: we flip around paths_in to also test sorting
    with h5py.File(path2, "a") as h51:
        h51.attrs["experiment:date"] = "2019-01-01"
    cli.join(path_out=path_out_c, paths_in=[path2, path1])
    with dclab.new_dataset(path_out_c) as ds:
        assert "volume" not in ds.features_innate
        assert "dclab-join-feature-warnings" in ds.logs
        log = "\n".join(ds.logs["dclab-join-feature-warnings"])
        assert "volume" in log