def version_one_path_local(version_fun, input_path, output_path, overwrite, original_info_df, check_generated_files, options): """Version input_path into output_path using version_fun. :return: a tuple ``(output_path, l)``, where output_path is the selected otuput path and l is a list with the obtained versioning time. The list l shall contain options.repetitions elements. NOTE: If the subclass version method returns a value, that value is taken as the time measurement. :param version_fun: function with signature like FileVersionTable.version :param input_path: path of the file to be versioned :param output_path: path where the versioned file is to be stored :param overwrite: if True, the version is calculated even if output_path already exists :param original_info_df: DataFrame produced by a FilePropertiesTable instance that contains an entry for :meth:`atable.indices_to_internal_loc`. :param check_generated_files: flag indicating whether failing to produce output_path must raise an exception. :param options: additional runtime options """ time_measurements = [] output_path = get_canonical_path(output_path) if os.path.exists(output_path) and not overwrite: if options.verbose > 2: print(f"[S]kipping versioning of {input_path}->{output_path}") return output_path, [-1] if options.verbose > 1: print(f"[V]ersioning {input_path} -> {output_path} (overwrite={overwrite}) <{version_fun}>") os.makedirs(os.path.dirname(output_path), exist_ok=True) row = original_info_df.loc[atable.indices_to_internal_loc(input_path)] for repetition_index in range(options.repetitions): try: time_before = time.time() versioning_time = version_fun( input_path=input_path, output_path=output_path, row=row) if check_generated_files and \ (not os.path.exists(output_path) or os.path.getsize(output_path) == 0): raise VersioningFailedException( f"Function {version_fun} did not produce a versioned path {input_path}->{output_path}") versioning_time = versioning_time if versioning_time is not None \ else time.time() - time_before if versioning_time < 0: if options.verbose: print(f"[W]arning: versioning_time = {versioning_time} < 0 for " f"{self.__class__.__name__} on {input_path}") versioning_time = 0 time_measurements.append(versioning_time) if repetition_index < options.repetitions - 1: os.remove(output_path) except Exception as ex: try: os.remove(output_path) except FileNotFoundError: pass raise ex return output_path, time_measurements
def process_row(self, index, column_fun_tuples, row, overwrite, fill): # Right now we are using file_path as testing_dataset_path maybe we will need to also add training_dataset_path file_path, model_name = index model = self.models_by_name[model_name] image_info_row = self.dataset_table_df.loc[indices_to_internal_loc( file_path)] row_wrapper = self.RowWrapper(file_path, model, row) result = super().process_row(index=index, column_fun_tuples=column_fun_tuples, row=row_wrapper, overwrite=overwrite, fill=fill) if isinstance(result, Exception): return result print(result.__dict__) return row
def set_comparison_results(self, index, row): """Perform a compression-decompression cycle and store the comparison results """ file_path, codec_name = index row.image_info_row = self.dataset_table_df.loc[indices_to_internal_loc(file_path)] assert row.compression_results.compressed_path == row.decompression_results.compressed_path assert row.image_info_row["bytes_per_sample"] * row.image_info_row["samples"] \ == os.path.getsize(row.compression_results.original_path) hasher = hashlib.sha256() with open(row.compression_results.compressed_path, "rb") as compressed_file: hasher.update(compressed_file.read()) compressed_file_sha256 = hasher.hexdigest() row["lossless_reconstruction"] = filecmp.cmp(row.compression_results.original_path, row.decompression_results.reconstructed_path) assert row.compression_results.compression_time_seconds is not None row["compression_time_seconds"] = row.compression_results.compression_time_seconds assert row.decompression_results.decompression_time_seconds is not None row["decompression_time_seconds"] = row.decompression_results.decompression_time_seconds row["repetitions"] = options.repetitions row["compression_ratio"] = os.path.getsize(row.compression_results.original_path) / row["compressed_size_bytes"] row["compressed_file_sha256"] = compressed_file_sha256
def process_row(self, index, column_fun_tuples, row, overwrite, fill): file_path, codec_name = index codec = self.codecs_by_name[codec_name] image_info_row = self.dataset_table_df.loc[indices_to_internal_loc(file_path)] row_wrapper = self.RowWrapper( file_path=file_path, codec=codec, image_info_row=image_info_row, row=row) result = super().process_row(index=index, column_fun_tuples=column_fun_tuples, row=row_wrapper, overwrite=overwrite, fill=fill) if isinstance(result, Exception): return result if self.compressed_copy_dir_path: output_compressed_path = os.path.join( self.compressed_copy_dir_path, codec.name, os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path)) os.makedirs(os.path.dirname(output_compressed_path), exist_ok=True) if options.verbose > 1: print(f"[C]opying {file_path} into {output_compressed_path}") shutil.copy(row_wrapper.compression_results.compressed_path, output_compressed_path) if self.reconstructed_dir_path is not None: output_reconstructed_path = os.path.join( self.reconstructed_dir_path, codec.name, os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path)) os.makedirs(os.path.dirname(output_reconstructed_path), exist_ok=True) if options.verbose > 1: print(f"[C]opying {row_wrapper.compression_results.compressed_path} into {output_reconstructed_path}") shutil.copy(row_wrapper.decompression_results.reconstructed_path, output_reconstructed_path) if image_info_row["component_count"] == 3: rendered_path = f"{output_reconstructed_path}.png" if not os.path.exists(rendered_path) or options.force: array = isets.load_array_bsq(file_or_path=row_wrapper.decompression_results.reconstructed_path, image_properties_row=image_info_row).astype(np.int) if options.reconstructed_size is not None: width, height, _ = array.shape array = array[ width // 2 - options.reconstructed_size // 2:width // 2 + options.reconstructed_size // 2, height // 2 - options.reconstructed_size // 2:height // 2 + options.reconstructed_size // 2, :] cmin = array.min() cmax = array.max() array = np.round((255 * (array.astype(np.int) - cmin) / (cmax - cmin))).astype("uint8") if options.verbose > 1: print(f"[R]endering {rendered_path}") numpngw.imwrite(rendered_path, array.swapaxes(0, 1)) else: full_array = isets.load_array_bsq( file_or_path=row_wrapper.decompression_results.reconstructed_path, image_properties_row=image_info_row).astype(np.int) if options.reconstructed_size is not None: width, height, _ = full_array.shape full_array = full_array[ width // 2 - options.reconstructed_size // 2:width // 2 + options.reconstructed_size // 2, height // 2 - options.reconstructed_size // 2:height // 2 + options.reconstructed_size // 2, :] for i, rendered_path in enumerate(f"{output_reconstructed_path}_component{i}.png" for i in range(image_info_row['component_count'])): if not os.path.exists(rendered_path) or options.force: array = full_array[:, :, i].squeeze().swapaxes(0, 1) cmin = array.min() cmax = array.max() array = np.round((255 * (array - cmin) / (cmax - cmin))).astype("uint8") if options.verbose > 1: print(f"[R]endering {rendered_path}") numpngw.imwrite(rendered_path, array) return row
def get_dataset_info_row(self, file_path): """Get the dataset info table row for the file path given as argument. """ return self.dataset_table_df.loc[atable.indices_to_internal_loc( file_path)]