Beispiel #1
0
def test_compress_with_online_polygon_filters():
    """Shape-In 2.3 supports online polygon filters"""
    path = retrieve_data("fmt-hdf5_mask-contour_2018.zip")
    # add an artificial online polygon filter
    with h5py.File(path, "a") as h5:
        # set soft filter to True
        h5.attrs["online_filter:area_um,deform soft limit"] = True
        # set filter values
        pf_name = "online_filter:area_um,deform polygon points"
        area_um = h5["events"]["area_um"]
        deform = h5["events"]["deform"]
        pf_points = np.array([
            [np.mean(area_um) + np.std(area_um),
             np.mean(deform)],
            [np.mean(area_um) + np.std(area_um),
             np.mean(deform) + np.std(deform)],
            [np.mean(area_um),
             np.mean(deform) + np.std(deform)],
        ])
        h5.attrs[pf_name] = pf_points

    path_out = path.with_name("compressed.rtdc")
    cli.compress(path_out=path_out, path_in=path)

    with dclab.new_dataset(path_out) as ds:
        assert len(ds) == 8
        assert ds.config["online_filter"]["area_um,deform soft limit"]
        assert "area_um,deform polygon points" in ds.config["online_filter"]
        assert np.allclose(
            ds.config["online_filter"]["area_um,deform polygon points"],
            pf_points)
Beispiel #2
0
def test_check_suffix_disabled_compress():
    path_in_o = retrieve_data("fmt-hdf5_polygon_gate_2021.zip")
    path_in = path_in_o.with_suffix("")
    path_in_o.rename(path_in)
    assert path_in.suffix == ""
    with pytest.raises(ValueError, match="Unsupported file type"):
        cli.compress(path_in=path_in,
                     path_out=path_in.with_name("compressed.rtdc"))
    # but this should work:
    cli.compress(path_in=path_in,
                 path_out=path_in.with_name("compressed2.rtdc"),
                 check_suffix=False)
Beispiel #3
0
def test_compress_already_compressed_no_force():
    path_in = retrieve_data("fmt-hdf5_mask-contour_2018.zip")
    # same directory (will be cleaned up with path_in)
    path_out = path_in.with_name("compressed.rtdc")
    path_out2 = path_in.with_name("compressed2.rtdc")

    with mock.patch("sys.argv", ["", str(path_in), str(path_out)]):
        cli.compress()

    with mock.patch("sys.argv", ["", str(path_out), str(path_out2)]):
        cli.compress()

    h1 = hashlib.md5(path_out.read_bytes()).hexdigest()
    h2 = hashlib.md5(path_out2.read_bytes()).hexdigest()
    assert h1 == h2
Beispiel #4
0
def test_compress_already_compressed_force():
    """An extension of the above test to make sure "force" works"""
    path_in = retrieve_data("fmt-hdf5_mask-contour_2018.zip")
    # same directory (will be cleaned up with path_in)
    path_out1 = path_in.with_name("compressed_1.rtdc")
    path_out2 = path_in.with_name("compressed_not_a_copy_of_1.rtdc")
    # this is straight-forward
    cli.compress(path_out=path_out1, path_in=path_in)
    # just for the sake of comparison
    cli.compress(path_out=path_out2, path_in=path_out1, force=True)

    # the first two files should not be the same (dates are written, etc)
    h1 = hashlib.md5(path_out1.read_bytes()).hexdigest()
    h2 = hashlib.md5(path_out2.read_bytes()).hexdigest()
    assert h1 != h2
Beispiel #5
0
def test_compress():
    path_in = retrieve_data("fmt-hdf5_mask-contour_2018.zip")
    # same directory (will be cleaned up with path_in)
    path_out = path_in.with_name("compressed.rtdc")

    cli.compress(path_out=path_out, path_in=path_in)
    with new_dataset(path_out) as dsj, new_dataset(path_in) as ds0:
        assert "dclab-compress" in dsj.logs
        assert len(dsj)
        assert len(dsj) == len(ds0)
        for feat in ds0.features:
            if feat in ["contour", "image", "mask"]:
                for ii in range(len(dsj)):
                    assert np.all(dsj[feat][ii] == ds0[feat][ii]), feat
            else:
                assert np.all(dsj[feat] == ds0[feat]), feat
Beispiel #6
0
def test_compress_with_online_polygon_filters_real_data():
    """Shape-In 2.3 supports online polygon filters"""
    path = retrieve_data("fmt-hdf5_polygon_gate_2021.zip")

    path_out = path.with_name("compressed.rtdc")
    cli.compress(path_out=path_out, path_in=path)

    with dclab.new_dataset(path_out) as ds:
        assert len(ds) == 1
        assert ds.config["online_filter"]["size_x,size_y soft limit"]
        assert "size_x,size_y polygon points" in ds.config["online_filter"]
        assert np.allclose(
            ds.config["online_filter"]["size_x,size_y polygon points"],
            [[0.1, 0.2],
             [0.1, 2.5],
             [3.3, 3.2],
             [5.2, 0.9]]
        )
Beispiel #7
0
    def task_compress_resources(self):
        """Compress resources if they are not fully compressed

        Data are stored in the user's cache directory and
        deleted after upload is complete.
        """
        self.set_state("compress")
        for ii, path in enumerate(list(self.paths)):
            if path.suffix in [".rtdc", ".dc"]:  # do we have an .rtdc file?
                # check for compression
                with IntegrityChecker(path) as ic:
                    cdata = ic.check_compression()[0].data
                if cdata["uncompressed"]:  # (partially) not compressed?
                    res_dir = self.cache_dir / "{}".format(ii)
                    res_dir.mkdir(exist_ok=True, parents=True)
                    path_out = res_dir / path.name
                    compress(path_out=path_out, path_in=path)
                    self.paths[ii] = path_out
        self.set_state("parcel")
Beispiel #8
0
def test_compress_already_compressed():
    """By default, an already compressed dataset should not be compressed"""
    path_in = retrieve_data("fmt-hdf5_mask-contour_2018.zip")
    # same directory (will be cleaned up with path_in)
    path_out1 = path_in.with_name("compressed_1.rtdc")
    path_out2 = path_in.with_name("compressed_2.rtdc")
    path_out3 = path_in.with_name("compressed_copy_of_1.rtdc")
    # this is straight-forward
    cli.compress(path_out=path_out1, path_in=path_in)
    # just for the sake of comparison
    time.sleep(1)  # we need different time stamps in path_out2
    cli.compress(path_out=path_out2, path_in=path_in)
    # this is not trivial
    cli.compress(path_out=path_out3, path_in=path_out1)

    # the first two files should not be the same (dates are written, etc)
    h1 = hashlib.md5(path_out1.read_bytes()).hexdigest()
    h2 = hashlib.md5(path_out2.read_bytes()).hexdigest()
    h3 = hashlib.md5(path_out3.read_bytes()).hexdigest()
    assert h1 != h2
    assert h1 == h3