def compress(self, original_path: str, compressed_path: str, original_file_info=None): # Tested limit: self.max_tested_spatial_size if original_file_info["width"] <= self.max_dimension_size \ and original_file_info["height"] <= self.max_dimension_size: return self.compress_one(original_path=original_path, compressed_path=compressed_path, original_file_info=original_file_info) else: tl_writer = tarlite.TarliteWriter() img = isets.load_array_bsq(file_or_path=original_path, image_properties_row=original_file_info) with tempfile.TemporaryDirectory( dir=options.base_tmp_dir) as tmp_dir: compound_size = 0 total_compression_time = 0 for y in range(self.split_height_count): for x in range(self.split_width_count): small_array = \ img[x * (original_file_info["width"] // self.split_width_count): (((x + 1) * (original_file_info[ "width"] // self.split_width_count)) if x < self.split_width_count - 1 else original_file_info["width"]), y * (original_file_info["height"] // self.split_height_count): (((y + 1) * (original_file_info[ "height"] // self.split_height_count)) if y < self.split_height_count - 1 else original_file_info["height"]), :] small_path = os.path.join(tmp_dir, f"{x}_{y}.raw") small_compressed_path = os.path.join( tmp_dir, f"{x}_{y}.mcalic") isets.dump_array_bsq(small_array, small_path) small_file_info = copy.copy(original_file_info) small_file_info["width"], small_file_info[ "height"], small_file_info[ "component_count"] = small_array.shape compression_results = self.compress_one( original_path=small_path, compressed_path=small_compressed_path, original_file_info=small_file_info) total_compression_time += compression_results.compression_time_seconds tl_writer.add_file(small_compressed_path) os.remove(small_path) compound_size += small_array.size assert compound_size == original_file_info[ "samples"], f"compound_size = {compound_size} != {original_file_info['samples']} = original samples" tl_writer.write(output_path=compressed_path) compression_results = self.compression_results_from_paths( original_path=original_path, compressed_path=compressed_path) compression_results.compression_time_seconds = total_compression_time return compression_results
def compress(self, original_path: str, compressed_path: str, original_file_info=None): assert original_file_info["bytes_per_sample"] in [1, 2], \ f"Only 1 or 2 bytes per sample, unsigned values {original_file_info['bytes_per_sample']}" assert original_file_info["width"] <= self.max_dimension_size, \ f"The input path has width {original_file_info['width']} exceeding the maximum {self.max_dimension_size}" assert original_file_info["height"] <= self.max_dimension_size, \ f"The input path has height {original_file_info['height']} exceeding the maximum {self.max_dimension_size}" complete_array = isets.load_array_bsq(file_or_path=original_path, image_properties_row=original_file_info) if original_file_info["signed"]: complete_array = complete_array.astype(np.int64) complete_array += 2 ** ((8 * original_file_info['bytes_per_sample']) - 1) assert (complete_array >= 0).all(), f"Error converting signed into unsigned" bands_per_image = min(original_file_info["component_count"], self.max_dimension_size // original_file_info["height"]) tw = tarlite.TarliteWriter() total_compression_time = 0 with tempfile.TemporaryDirectory(dir=options.base_tmp_dir) as tmp_dir: stacked_size = 0 for stack_index, start_band_index in enumerate( range(0, original_file_info["component_count"], bands_per_image)): end_band_index_not_included = min(original_file_info["component_count"], start_band_index + bands_per_image) stacked_array = np.hstack(tuple(complete_array[:, :, z].squeeze() for z in range(start_band_index, end_band_index_not_included))) stacked_size += stacked_array.shape[0] * stacked_array.shape[1] with tempfile.NamedTemporaryFile(dir=options.base_tmp_dir, suffix=".pgm") as tmp_stack_file: pgm.write_pgm(array=stacked_array, bytes_per_sample=original_file_info["bytes_per_sample"], output_path=tmp_stack_file.name) stack_compressed_path = os.path.join(tmp_dir, str(stack_index)) compression_results = super().compress( original_path=tmp_stack_file.name, compressed_path=stack_compressed_path, original_file_info=original_file_info) total_compression_time += compression_results.compression_time_seconds tw.add_file(input_path=stack_compressed_path) assert stacked_size == complete_array.shape[0] * complete_array.shape[1] * complete_array.shape[2], \ f"Total stacked size {stacked_size} does not match the expectations. " \ f"({stacked_size} vs {complete_array.shape[0] * complete_array.shape[1] * complete_array.shape[2]}" tw.write(output_path=compressed_path) compression_results = self.compression_results_from_paths( original_path=original_path, compressed_path=compressed_path) compression_results.compression_time_seconds = total_compression_time return compression_results
def test_read_write(self): with tempfile.NamedTemporaryFile() as tmp_tarlite_file: input_paths = glob.glob( os.path.join(os.path.abspath(os.path.dirname(__file__)), "*.py")) tw = tarlite.TarliteWriter(initial_input_paths=input_paths) tw.write(output_path=tmp_tarlite_file.name) tr = tarlite.TarliteReader(tarlite_path=tmp_tarlite_file.name) with tempfile.TemporaryDirectory() as tmp_extract_dir: tr.extract_all(output_dir_path=tmp_extract_dir) for input_path in input_paths: check_path = os.path.join(tmp_extract_dir, os.path.basename(input_path)) assert filecmp.cmp(input_path, check_path)
def compress_one(self, original_path: str, compressed_path: str, original_file_info=None): """Compress one image tile with M-CALIC. """ assert original_file_info["bytes_per_sample"] == 2, \ f"This implementation of M-CALIC ({self.compressor_path}) only supports 16bpp" assert original_file_info["component_count"] > 1, \ f"This implementation of M-CALIC ({self.compressor_path}) only supports images with more than one component" with tempfile.NamedTemporaryFile( dir=options.base_tmp_dir, prefix=f"bil_le_{os.path.basename(original_path)}" ) as bil_le_file: # M-Calic implementation requires little endian, unsigned 16bpp BIL format original_dtype = isets.iproperties_row_to_numpy_dtype( image_properties_row=original_file_info) img = np.fromfile(original_path, dtype=original_dtype).reshape( original_file_info["component_count"], original_file_info["height"], original_file_info["width"]) offset = None if original_file_info["signed"]: offset, original_max = int(img.min()), int(img.max()) offset = min(offset, 0) assert original_max - offset <= 2 ** 15 - 1, \ f"Invalid dynamic range of signed image ({offset}, {original_max})" img = (img.astype("i4") - offset).astype( original_dtype.replace("i", "u")) if original_file_info["big_endian"]: img = img.astype( original_dtype.replace(">", "<").replace("i", "u")) img.swapaxes(0, 1).tofile(bil_le_file.name) if original_file_info["signed"]: with tempfile.NamedTemporaryFile(dir=options.base_tmp_dir, prefix=f"bil_le_{os.path.basename(original_path)}", suffix=".mcalic") as tmp_compressed_file, \ tempfile.NamedTemporaryFile(dir=options.base_tmp_dir, prefix=f"side_info_{os.path.basename(original_path)}", suffix=".txt", mode="w") as si_file: si_file.write(f"{abs(offset):d}") si_file.flush() compression_results = super().compress( original_path=bil_le_file.name, compressed_path=tmp_compressed_file.name, original_file_info=original_file_info) tarlite.TarliteWriter(initial_input_paths=[ si_file.name, tmp_compressed_file.name ]).write(compressed_path) compression_results.original_path = original_path compression_results.compressed_path = compressed_path return compression_results else: compression_results = super().compress( original_path=bil_le_file.name, compressed_path=compressed_path, original_file_info=original_file_info) compression_results.original_path = original_path return compression_results