Example #1
0
def version_one_path_local(version_fun, input_path, output_path, overwrite,
                           original_info_df, check_generated_files, options):
    """Version input_path into output_path using version_fun.
    
    :return: a tuple ``(output_path, l)``, where output_path is the selected otuput path and
      l is a list with the obtained versioning time. The list l shall contain options.repetitions elements.
      NOTE: If the subclass version method returns a value, that value is taken
      as the time measurement.
    
    :param version_fun: function with signature like FileVersionTable.version
    :param input_path: path of the file to be versioned
    :param output_path: path where the versioned file is to be stored
    :param overwrite: if True, the version is calculated even if output_path already exists
    :param original_info_df: DataFrame produced by a FilePropertiesTable instance that contains
      an entry for :meth:`atable.indices_to_internal_loc`.
    :param check_generated_files: flag indicating whether failing to produce output_path must raise an exception.
    :param options: additional runtime options
    """
    time_measurements = []

    output_path = get_canonical_path(output_path)
    if os.path.exists(output_path) and not overwrite:
        if options.verbose > 2:
            print(f"[S]kipping versioning of {input_path}->{output_path}")
        return output_path, [-1]

    if options.verbose > 1:
        print(f"[V]ersioning {input_path} -> {output_path} (overwrite={overwrite}) <{version_fun}>")
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    row = original_info_df.loc[atable.indices_to_internal_loc(input_path)]
    for repetition_index in range(options.repetitions):
        try:
            time_before = time.time()
            versioning_time = version_fun(
                input_path=input_path, output_path=output_path, row=row)
            if check_generated_files and \
                    (not os.path.exists(output_path) or os.path.getsize(output_path) == 0):
                raise VersioningFailedException(
                    f"Function {version_fun} did not produce a versioned path {input_path}->{output_path}")
            versioning_time = versioning_time if versioning_time is not None \
                else time.time() - time_before
            if versioning_time < 0:
                if options.verbose:
                    print(f"[W]arning: versioning_time = {versioning_time} < 0 for "
                          f"{self.__class__.__name__} on {input_path}")
                versioning_time = 0
            time_measurements.append(versioning_time)
            if repetition_index < options.repetitions - 1:
                os.remove(output_path)
        except Exception as ex:
            try:
                os.remove(output_path)
            except FileNotFoundError:
                pass
            raise ex

    return output_path, time_measurements
Example #2
0
    def process_row(self, index, column_fun_tuples, row, overwrite, fill):
        # Right now we are using file_path as testing_dataset_path maybe we will need to also add training_dataset_path
        file_path, model_name = index
        model = self.models_by_name[model_name]
        image_info_row = self.dataset_table_df.loc[indices_to_internal_loc(
            file_path)]
        row_wrapper = self.RowWrapper(file_path, model, row)
        result = super().process_row(index=index,
                                     column_fun_tuples=column_fun_tuples,
                                     row=row_wrapper,
                                     overwrite=overwrite,
                                     fill=fill)

        if isinstance(result, Exception):
            return result

        print(result.__dict__)
        return row
    def set_comparison_results(self, index, row):
        """Perform a compression-decompression cycle and store the comparison results
        """
        file_path, codec_name = index
        row.image_info_row = self.dataset_table_df.loc[indices_to_internal_loc(file_path)]
        assert row.compression_results.compressed_path == row.decompression_results.compressed_path
        assert row.image_info_row["bytes_per_sample"] * row.image_info_row["samples"] \
               == os.path.getsize(row.compression_results.original_path)
        hasher = hashlib.sha256()
        with open(row.compression_results.compressed_path, "rb") as compressed_file:
            hasher.update(compressed_file.read())
        compressed_file_sha256 = hasher.hexdigest()

        row["lossless_reconstruction"] = filecmp.cmp(row.compression_results.original_path,
                                                     row.decompression_results.reconstructed_path)
        assert row.compression_results.compression_time_seconds is not None
        row["compression_time_seconds"] = row.compression_results.compression_time_seconds
        assert row.decompression_results.decompression_time_seconds is not None
        row["decompression_time_seconds"] = row.decompression_results.decompression_time_seconds
        row["repetitions"] = options.repetitions
        row["compression_ratio"] = os.path.getsize(row.compression_results.original_path) / row["compressed_size_bytes"]
        row["compressed_file_sha256"] = compressed_file_sha256
    def process_row(self, index, column_fun_tuples, row, overwrite, fill):
        file_path, codec_name = index
        codec = self.codecs_by_name[codec_name]
        image_info_row = self.dataset_table_df.loc[indices_to_internal_loc(file_path)]
        row_wrapper = self.RowWrapper(
            file_path=file_path, codec=codec,
            image_info_row=image_info_row,
            row=row)
        result = super().process_row(index=index, column_fun_tuples=column_fun_tuples,
                                     row=row_wrapper, overwrite=overwrite, fill=fill)

        if isinstance(result, Exception):
            return result

        if self.compressed_copy_dir_path:
            output_compressed_path = os.path.join(
                self.compressed_copy_dir_path,
                codec.name,
                os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path))
            os.makedirs(os.path.dirname(output_compressed_path), exist_ok=True)
            if options.verbose > 1:
                print(f"[C]opying {file_path} into {output_compressed_path}")
            shutil.copy(row_wrapper.compression_results.compressed_path, output_compressed_path)

        if self.reconstructed_dir_path is not None:
            output_reconstructed_path = os.path.join(
                self.reconstructed_dir_path,
                codec.name,
                os.path.basename(os.path.dirname(file_path)), os.path.basename(file_path))
            os.makedirs(os.path.dirname(output_reconstructed_path), exist_ok=True)
            if options.verbose > 1:
                print(f"[C]opying {row_wrapper.compression_results.compressed_path} into {output_reconstructed_path}")
            shutil.copy(row_wrapper.decompression_results.reconstructed_path,
                        output_reconstructed_path)

            if image_info_row["component_count"] == 3:
                rendered_path = f"{output_reconstructed_path}.png"
                if not os.path.exists(rendered_path) or options.force:
                    array = isets.load_array_bsq(file_or_path=row_wrapper.decompression_results.reconstructed_path,
                                                 image_properties_row=image_info_row).astype(np.int)
                    if options.reconstructed_size is not None:
                        width, height, _ = array.shape
                        array = array[
                                width // 2 - options.reconstructed_size // 2:width // 2 + options.reconstructed_size // 2,
                                height // 2 - options.reconstructed_size // 2:height // 2 + options.reconstructed_size // 2,
                                :]
                    cmin = array.min()
                    cmax = array.max()
                    array = np.round((255 * (array.astype(np.int) - cmin) / (cmax - cmin))).astype("uint8")
                    if options.verbose > 1:
                        print(f"[R]endering {rendered_path}")

                    numpngw.imwrite(rendered_path, array.swapaxes(0, 1))

            else:
                full_array = isets.load_array_bsq(
                    file_or_path=row_wrapper.decompression_results.reconstructed_path,
                    image_properties_row=image_info_row).astype(np.int)
                if options.reconstructed_size is not None:
                    width, height, _ = full_array.shape
                    full_array = full_array[
                                 width // 2 - options.reconstructed_size // 2:width // 2 + options.reconstructed_size // 2,
                                 height // 2 - options.reconstructed_size // 2:height // 2 + options.reconstructed_size // 2,
                                 :]
                for i, rendered_path in enumerate(f"{output_reconstructed_path}_component{i}.png"
                                                  for i in range(image_info_row['component_count'])):
                    if not os.path.exists(rendered_path) or options.force:
                        array = full_array[:, :, i].squeeze().swapaxes(0, 1)
                        cmin = array.min()
                        cmax = array.max()
                        array = np.round((255 * (array - cmin) / (cmax - cmin))).astype("uint8")
                        if options.verbose > 1:
                            print(f"[R]endering {rendered_path}")
                        numpngw.imwrite(rendered_path, array)

        return row
 def get_dataset_info_row(self, file_path):
     """Get the dataset info table row for the file path given as argument.
     """
     return self.dataset_table_df.loc[atable.indices_to_internal_loc(
         file_path)]