def test_compress_with_online_polygon_filters(): """Shape-In 2.3 supports online polygon filters""" path = retrieve_data("fmt-hdf5_mask-contour_2018.zip") # add an artificial online polygon filter with h5py.File(path, "a") as h5: # set soft filter to True h5.attrs["online_filter:area_um,deform soft limit"] = True # set filter values pf_name = "online_filter:area_um,deform polygon points" area_um = h5["events"]["area_um"] deform = h5["events"]["deform"] pf_points = np.array([ [np.mean(area_um) + np.std(area_um), np.mean(deform)], [np.mean(area_um) + np.std(area_um), np.mean(deform) + np.std(deform)], [np.mean(area_um), np.mean(deform) + np.std(deform)], ]) h5.attrs[pf_name] = pf_points path_out = path.with_name("compressed.rtdc") cli.compress(path_out=path_out, path_in=path) with dclab.new_dataset(path_out) as ds: assert len(ds) == 8 assert ds.config["online_filter"]["area_um,deform soft limit"] assert "area_um,deform polygon points" in ds.config["online_filter"] assert np.allclose( ds.config["online_filter"]["area_um,deform polygon points"], pf_points)
def test_check_suffix_disabled_compress(): path_in_o = retrieve_data("fmt-hdf5_polygon_gate_2021.zip") path_in = path_in_o.with_suffix("") path_in_o.rename(path_in) assert path_in.suffix == "" with pytest.raises(ValueError, match="Unsupported file type"): cli.compress(path_in=path_in, path_out=path_in.with_name("compressed.rtdc")) # but this should work: cli.compress(path_in=path_in, path_out=path_in.with_name("compressed2.rtdc"), check_suffix=False)
def test_compress_already_compressed_no_force(): path_in = retrieve_data("fmt-hdf5_mask-contour_2018.zip") # same directory (will be cleaned up with path_in) path_out = path_in.with_name("compressed.rtdc") path_out2 = path_in.with_name("compressed2.rtdc") with mock.patch("sys.argv", ["", str(path_in), str(path_out)]): cli.compress() with mock.patch("sys.argv", ["", str(path_out), str(path_out2)]): cli.compress() h1 = hashlib.md5(path_out.read_bytes()).hexdigest() h2 = hashlib.md5(path_out2.read_bytes()).hexdigest() assert h1 == h2
def test_compress_already_compressed_force(): """An extension of the above test to make sure "force" works""" path_in = retrieve_data("fmt-hdf5_mask-contour_2018.zip") # same directory (will be cleaned up with path_in) path_out1 = path_in.with_name("compressed_1.rtdc") path_out2 = path_in.with_name("compressed_not_a_copy_of_1.rtdc") # this is straight-forward cli.compress(path_out=path_out1, path_in=path_in) # just for the sake of comparison cli.compress(path_out=path_out2, path_in=path_out1, force=True) # the first two files should not be the same (dates are written, etc) h1 = hashlib.md5(path_out1.read_bytes()).hexdigest() h2 = hashlib.md5(path_out2.read_bytes()).hexdigest() assert h1 != h2
def test_compress(): path_in = retrieve_data("fmt-hdf5_mask-contour_2018.zip") # same directory (will be cleaned up with path_in) path_out = path_in.with_name("compressed.rtdc") cli.compress(path_out=path_out, path_in=path_in) with new_dataset(path_out) as dsj, new_dataset(path_in) as ds0: assert "dclab-compress" in dsj.logs assert len(dsj) assert len(dsj) == len(ds0) for feat in ds0.features: if feat in ["contour", "image", "mask"]: for ii in range(len(dsj)): assert np.all(dsj[feat][ii] == ds0[feat][ii]), feat else: assert np.all(dsj[feat] == ds0[feat]), feat
def test_compress_with_online_polygon_filters_real_data(): """Shape-In 2.3 supports online polygon filters""" path = retrieve_data("fmt-hdf5_polygon_gate_2021.zip") path_out = path.with_name("compressed.rtdc") cli.compress(path_out=path_out, path_in=path) with dclab.new_dataset(path_out) as ds: assert len(ds) == 1 assert ds.config["online_filter"]["size_x,size_y soft limit"] assert "size_x,size_y polygon points" in ds.config["online_filter"] assert np.allclose( ds.config["online_filter"]["size_x,size_y polygon points"], [[0.1, 0.2], [0.1, 2.5], [3.3, 3.2], [5.2, 0.9]] )
def task_compress_resources(self): """Compress resources if they are not fully compressed Data are stored in the user's cache directory and deleted after upload is complete. """ self.set_state("compress") for ii, path in enumerate(list(self.paths)): if path.suffix in [".rtdc", ".dc"]: # do we have an .rtdc file? # check for compression with IntegrityChecker(path) as ic: cdata = ic.check_compression()[0].data if cdata["uncompressed"]: # (partially) not compressed? res_dir = self.cache_dir / "{}".format(ii) res_dir.mkdir(exist_ok=True, parents=True) path_out = res_dir / path.name compress(path_out=path_out, path_in=path) self.paths[ii] = path_out self.set_state("parcel")
def test_compress_already_compressed(): """By default, an already compressed dataset should not be compressed""" path_in = retrieve_data("fmt-hdf5_mask-contour_2018.zip") # same directory (will be cleaned up with path_in) path_out1 = path_in.with_name("compressed_1.rtdc") path_out2 = path_in.with_name("compressed_2.rtdc") path_out3 = path_in.with_name("compressed_copy_of_1.rtdc") # this is straight-forward cli.compress(path_out=path_out1, path_in=path_in) # just for the sake of comparison time.sleep(1) # we need different time stamps in path_out2 cli.compress(path_out=path_out2, path_in=path_in) # this is not trivial cli.compress(path_out=path_out3, path_in=path_out1) # the first two files should not be the same (dates are written, etc) h1 = hashlib.md5(path_out1.read_bytes()).hexdigest() h2 = hashlib.md5(path_out2.read_bytes()).hexdigest() h3 = hashlib.md5(path_out3.read_bytes()).hexdigest() assert h1 != h2 assert h1 == h3