def test_with_spech5(tmp_path): """Test write_to_h5 with SpecH5 input""" filepath = tmp_path / "file.spec" filepath.write_bytes( bytes("""#F /tmp/sf.dat #S 1 cmd #L a b 1 2 """, encoding='ascii')) output_filepath = tmp_path / "output.h5" with spech5.SpecH5(str(filepath)) as spech5file: write_to_h5(spech5file, str(output_filepath)) print(h5todict(str(output_filepath))) def assert_equal(item1, item2): if isinstance(item1, dict): assert tuple(item1.keys()) == tuple(item2.keys()) for key in item1.keys(): assert_equal(item1[key], item2[key]) else: numpy.array_equal(item1, item2) assert_equal( h5todict(str(output_filepath)), { '1.1': { 'instrument': { 'positioners': {}, 'specfile': { 'file_header': ['#F /tmp/sf.dat'], 'scan_header': ['#S 1 cmd', '#L a b'], }, }, 'measurement': { 'a': [1.], 'b': [2.], }, 'start_time': '', 'title': 'cmd', }, })
def test_with_commonh5(tmp_path): """Test write_to_h5 with commonh5 input""" fobj = commonh5.File("filename.txt", mode="w") group = fobj.create_group("group") dataset = group.create_dataset("dataset", data=numpy.array(50)) group["soft_link"] = dataset # Create softlink output_filepath = tmp_path / "output.h5" write_to_h5(fobj, str(output_filepath)) assert h5todict(str(output_filepath)) == { 'group': { 'dataset': numpy.array(50), 'soft_link': numpy.array(50) }, } with h5py.File(output_filepath, mode="r") as h5file: soft_link = h5file.get("/group/soft_link", getlink=True) assert isinstance(soft_link, h5py.SoftLink) assert soft_link.path == "/group/dataset"
def test_with_hdf5(tmp_path): """Test write_to_h5 with HDF5 file input""" filepath = tmp_path / "base.h5" with h5py.File(filepath, mode="w") as h5file: h5file["group/dataset"] = 50 h5file["group/soft_link"] = h5py.SoftLink("/group/dataset") h5file["group/external_link"] = h5py.ExternalLink( "base.h5", "/group/dataset") output_filepath = tmp_path / "output.h5" write_to_h5(str(filepath), str(output_filepath)) assert h5todict(str(output_filepath)) == { 'group': { 'dataset': 50, 'soft_link': 50 }, } with h5py.File(output_filepath, mode="r") as h5file: soft_link = h5file.get("group/soft_link", getlink=True) assert isinstance(soft_link, h5py.SoftLink) assert soft_link.path == "/group/dataset"
def main(argv): """ Main function to launch the converter as an application :param argv: Command line arguments :returns: exit status """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('input_files', nargs="+", help='Input files (EDF, SPEC)') parser.add_argument( '-o', '--output-uri', nargs="?", help='Output file (HDF5). If omitted, it will be the ' 'concatenated input file names, with a ".h5" suffix added.' ' An URI can be provided to write the data into a specific ' 'group in the output file: /path/to/file::/path/to/group') parser.add_argument('-m', '--mode', default="w-", help='Write mode: "r+" (read/write, file must exist), ' '"w" (write, existing file is lost), ' '"w-" (write, fail if file exists) or ' '"a" (read/write if exists, create otherwise)') parser.add_argument( '--no-root-group', action="store_true", help='This option disables the default behavior of creating a ' 'root group (entry) for each file to be converted. When ' 'merging multiple input files, this can cause conflicts ' 'when datasets have the same name (see --overwrite-data).') parser.add_argument( '--overwrite-data', action="store_true", help='If the output path exists and an input dataset has the same' ' name as an existing output dataset, overwrite the output ' 'dataset (in modes "r+" or "a").') parser.add_argument( '--min-size', type=int, default=500, help='Minimum number of elements required to be in a dataset to ' 'apply compression or chunking (default 500).') parser.add_argument( '--chunks', nargs="?", const="auto", help='Chunk shape. Provide an argument that evaluates as a python ' 'tuple (e.g. "(1024, 768)"). If this option is provided without ' 'specifying an argument, the h5py library will guess a chunk for ' 'you. Note that if you specify an explicit chunking shape, it ' 'will be applied identically to all datasets with a large enough ' 'size (see --min-size). ') parser.add_argument( '--compression', nargs="?", const="gzip", help='Compression filter. By default, the datasets in the output ' 'file are not compressed. If this option is specified without ' 'argument, the GZIP compression is used. Additional compression ' 'filters may be available, depending on your HDF5 installation.') def check_gzip_compression_opts(value): ivalue = int(value) if ivalue < 0 or ivalue > 9: raise argparse.ArgumentTypeError( "--compression-opts must be an int from 0 to 9") return ivalue parser.add_argument( '--compression-opts', type=check_gzip_compression_opts, help='Compression options. For "gzip", this may be an integer from ' '0 to 9, with a default of 4. This is only supported for GZIP.') parser.add_argument( '--shuffle', action="store_true", help='Enables the byte shuffle filter, may improve the compression ' 'ratio for block oriented compressors like GZIP or LZF.') parser.add_argument( '--fletcher32', action="store_true", help='Adds a checksum to each chunk to detect data corruption.') parser.add_argument('--debug', action="store_true", default=False, help='Set logging system in debug mode') options = parser.parse_args(argv[1:]) # some shells (windows) don't interpret wildcard characters (*, ?, []) old_input_list = list(options.input_files) options.input_files = [] for fname in old_input_list: globbed_files = glob(fname) if not globbed_files: # no files found, keep the name as it is, to raise an error later options.input_files += [fname] else: options.input_files += globbed_files old_input_list = None if options.debug: logging.root.setLevel(logging.DEBUG) # Import most of the things here to be sure to use the right logging level try: # it should be loaded before h5py import hdf5plugin # noqa except ImportError: _logger.debug("Backtrace", exc_info=True) hdf5plugin = None try: import h5py from silx.io.convert import write_to_h5 except ImportError: _logger.debug("Backtrace", exc_info=True) h5py = None write_to_h5 = None if h5py is None: message = "Module 'h5py' is not installed but is mandatory."\ + " You can install it using \"pip install h5py\"." _logger.error(message) return -1 if hdf5plugin is None: message = "Module 'hdf5plugin' is not installed. It supports additional hdf5"\ + " compressions. You can install it using \"pip install hdf5plugin\"." _logger.debug(message) # Test that the output path is writeable if options.output_uri is None: input_basenames = [ os.path.basename(name) for name in options.input_files ] output_name = ''.join(input_basenames) + ".h5" _logger.info("No output file specified, using %s", output_name) hdf5_path = "/" else: if "::" in options.output_uri: output_name, hdf5_path = options.output_uri.split("::") else: output_name, hdf5_path = options.output_uri, "/" if os.path.isfile(output_name): if options.mode == "w-": _logger.error( "Output file %s exists and mode is 'w-'" " (write, file must not exist). Aborting.", output_name) return -1 elif not os.access(output_name, os.W_OK): _logger.error("Output file %s exists and is not writeable.", output_name) return -1 elif options.mode == "w": _logger.info( "Output file %s exists and mode is 'w'. " "Overwriting existing file.", output_name) elif options.mode in ["a", "r+"]: _logger.info("Appending data to existing file %s.", output_name) else: if options.mode == "r+": _logger.error( "Output file %s does not exist and mode is 'r+'" " (append, file must exist). Aborting.", output_name) return -1 else: _logger.info("Creating new output file %s.", output_name) # Test that all input files exist and are readable bad_input = False for fname in options.input_files: if not os.access(fname, os.R_OK): _logger.error("Cannot read input file %s.", fname) bad_input = True if bad_input: _logger.error("Aborting.") return -1 # create_dataset special args create_dataset_args = {} if options.chunks is not None: if options.chunks.lower() in ["auto", "true"]: create_dataset_args["chunks"] = True else: try: chunks = ast.literal_eval(options.chunks) except (ValueError, SyntaxError): _logger.error("Invalid --chunks argument %s", options.chunks) return -1 if not isinstance(chunks, (tuple, list)): _logger.error( "--chunks argument str does not evaluate to a tuple") return -1 else: nitems = numpy.prod(chunks) nbytes = nitems * 8 if nbytes > 10**6: _logger.warning("Requested chunk size might be larger than" " the default 1MB chunk cache, for float64" " data. This can dramatically affect I/O " "performances.") create_dataset_args["chunks"] = chunks if options.compression is not None: create_dataset_args["compression"] = options.compression if options.compression_opts is not None: create_dataset_args["compression_opts"] = options.compression_opts if options.shuffle: create_dataset_args["shuffle"] = True if options.fletcher32: create_dataset_args["fletcher32"] = True with h5py.File(output_name, mode=options.mode) as h5f: for input_name in options.input_files: hdf5_path_for_file = hdf5_path if not options.no_root_group: hdf5_path_for_file = hdf5_path.rstrip( "/") + "/" + os.path.basename(input_name) write_to_h5(input_name, h5f, h5path=hdf5_path_for_file, overwrite_data=options.overwrite_data, create_dataset_args=create_dataset_args, min_size=options.min_size) # append the convert command to the creator attribute, for NeXus files creator = h5f[hdf5_path_for_file].attrs.get("creator", b"").decode() convert_command = " ".join(argv) if convert_command not in creator: h5f[hdf5_path_for_file].attrs["creator"] = \ numpy.string_(creator + "; convert command: %s" % " ".join(argv)) return 0
def main(argv): """ Main function to launch the converter as an application :param argv: Command line arguments :returns: exit status """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( 'input_files', nargs="*", help='Input files (EDF, TIFF, SPEC...). When specifying multiple ' 'files, you cannot specify both fabio images and SPEC files. ' 'Multiple SPEC files will simply be concatenated, with one ' 'entry per scan. Multiple image files will be merged into ' 'a single entry with a stack of images.') # input_files and --filepattern are mutually exclusive parser.add_argument( '--file-pattern', help='File name pattern for loading a series of indexed image files ' '(toto_%%04d.edf). This argument is incompatible with argument ' 'input_files. If an output URI with a HDF5 path is provided, ' 'only the content of the NXdetector group will be copied there. ' 'If no HDF5 path, or just "/", is given, a complete NXdata ' 'structure will be created.') parser.add_argument( '-o', '--output-uri', default=time.strftime("%Y%m%d-%H%M%S") + '.h5', help='Output file name (HDF5). An URI can be provided to write' ' the data into a specific group in the output file: ' '/path/to/file::/path/to/group. ' 'If not provided, the filename defaults to a timestamp:' ' YYYYmmdd-HHMMSS.h5') parser.add_argument( '-m', '--mode', default="w-", help='Write mode: "r+" (read/write, file must exist), ' '"w" (write, existing file is lost), ' '"w-" (write, fail if file exists) or ' '"a" (read/write if exists, create otherwise)') parser.add_argument( '--begin', help='First file index, or first file indices to be considered. ' 'This argument only makes sense when used together with ' '--file-pattern. Provide as many start indices as there ' 'are indices in the file pattern, separated by commas. ' 'Examples: "--filepattern toto_%%d.edf --begin 100", ' ' "--filepattern toto_%%d_%%04d_%%02d.edf --begin 100,2000,5".') parser.add_argument( '--end', help='Last file index, or last file indices to be considered. ' 'The same rules as with argument --begin apply. ' 'Example: "--filepattern toto_%%d_%%d.edf --end 199,1999"') parser.add_argument( '--add-root-group', action="store_true", help='This option causes each input file to be written to a ' 'specific root group with the same name as the file. When ' 'merging multiple input files, this can help preventing conflicts' ' when datasets have the same name (see --overwrite-data). ' 'This option is ignored when using --file-pattern.') parser.add_argument( '--overwrite-data', action="store_true", help='If the output path exists and an input dataset has the same' ' name as an existing output dataset, overwrite the output ' 'dataset (in modes "r+" or "a").') parser.add_argument( '--min-size', type=int, default=500, help='Minimum number of elements required to be in a dataset to ' 'apply compression or chunking (default 500).') parser.add_argument( '--chunks', nargs="?", const="auto", help='Chunk shape. Provide an argument that evaluates as a python ' 'tuple (e.g. "(1024, 768)"). If this option is provided without ' 'specifying an argument, the h5py library will guess a chunk for ' 'you. Note that if you specify an explicit chunking shape, it ' 'will be applied identically to all datasets with a large enough ' 'size (see --min-size). ') parser.add_argument( '--compression', nargs="?", const="gzip", help='Compression filter. By default, the datasets in the output ' 'file are not compressed. If this option is specified without ' 'argument, the GZIP compression is used. Additional compression ' 'filters may be available, depending on your HDF5 installation.') def check_gzip_compression_opts(value): ivalue = int(value) if ivalue < 0 or ivalue > 9: raise argparse.ArgumentTypeError( "--compression-opts must be an int from 0 to 9") return ivalue parser.add_argument( '--compression-opts', type=check_gzip_compression_opts, help='Compression options. For "gzip", this may be an integer from ' '0 to 9, with a default of 4. This is only supported for GZIP.') parser.add_argument( '--shuffle', action="store_true", help='Enables the byte shuffle filter. This may improve the compression ' 'ratio for block oriented compressors like GZIP or LZF.') parser.add_argument( '--fletcher32', action="store_true", help='Adds a checksum to each chunk to detect data corruption.') parser.add_argument( '--debug', action="store_true", default=False, help='Set logging system in debug mode') options = parser.parse_args(argv[1:]) if options.debug: logging.root.setLevel(logging.DEBUG) # Import after parsing --debug try: # it should be loaded before h5py import hdf5plugin # noqa except ImportError: _logger.debug("Backtrace", exc_info=True) hdf5plugin = None try: import h5py from silx.io.convert import write_to_h5 except ImportError: _logger.debug("Backtrace", exc_info=True) h5py = None write_to_h5 = None if h5py is None: message = "Module 'h5py' is not installed but is mandatory."\ + " You can install it using \"pip install h5py\"." _logger.error(message) return -1 if hdf5plugin is None: message = "Module 'hdf5plugin' is not installed. It supports additional hdf5"\ + " compressions. You can install it using \"pip install hdf5plugin\"." _logger.debug(message) # Process input arguments (mutually exclusive arguments) if bool(options.input_files) == bool(options.file_pattern is not None): if not options.input_files: message = "You must specify either input files (at least one), " message += "or a file pattern." else: message = "You cannot specify input files and a file pattern" message += " at the same time." _logger.error(message) return -1 elif options.input_files: # some shells (windows) don't interpret wildcard characters (*, ?, []) old_input_list = list(options.input_files) options.input_files = [] for fname in old_input_list: globbed_files = glob(fname) if not globbed_files: # no files found, keep the name as it is, to raise an error later options.input_files += [fname] else: # glob does not sort files, but the bash shell does options.input_files += sorted(globbed_files) else: # File series dirname = os.path.dirname(options.file_pattern) file_pattern_re = c_format_string_to_re(options.file_pattern) + "$" files_in_dir = glob(os.path.join(dirname, "*")) _logger.debug(""" Processing file_pattern dirname: %s file_pattern_re: %s files_in_dir: %s """, dirname, file_pattern_re, files_in_dir) options.input_files = sorted(list(filter(lambda name: re.match(file_pattern_re, name), files_in_dir))) _logger.debug("options.input_files: %s", options.input_files) if options.begin is not None: options.input_files = drop_indices_before_begin(options.input_files, file_pattern_re, options.begin) _logger.debug("options.input_files after applying --begin: %s", options.input_files) if options.end is not None: options.input_files = drop_indices_after_end(options.input_files, file_pattern_re, options.end) _logger.debug("options.input_files after applying --end: %s", options.input_files) if are_files_missing_in_series(options.input_files, file_pattern_re): _logger.error("File missing in the file series. Aborting.") return -1 if not options.input_files: _logger.error("No file matching --file-pattern found.") return -1 # Test that the output path is writeable if "::" in options.output_uri: output_name, hdf5_path = options.output_uri.split("::") else: output_name, hdf5_path = options.output_uri, "/" if os.path.isfile(output_name): if options.mode == "w-": _logger.error("Output file %s exists and mode is 'w-' (default)." " Aborting. To append data to an existing file, " "use 'a' or 'r+'.", output_name) return -1 elif not os.access(output_name, os.W_OK): _logger.error("Output file %s exists and is not writeable.", output_name) return -1 elif options.mode == "w": _logger.info("Output file %s exists and mode is 'w'. " "Overwriting existing file.", output_name) elif options.mode in ["a", "r+"]: _logger.info("Appending data to existing file %s.", output_name) else: if options.mode == "r+": _logger.error("Output file %s does not exist and mode is 'r+'" " (append, file must exist). Aborting.", output_name) return -1 else: _logger.info("Creating new output file %s.", output_name) # Test that all input files exist and are readable bad_input = False for fname in options.input_files: if not os.access(fname, os.R_OK): _logger.error("Cannot read input file %s.", fname) bad_input = True if bad_input: _logger.error("Aborting.") return -1 # create_dataset special args create_dataset_args = {} if options.chunks is not None: if options.chunks.lower() in ["auto", "true"]: create_dataset_args["chunks"] = True else: try: chunks = ast.literal_eval(options.chunks) except (ValueError, SyntaxError): _logger.error("Invalid --chunks argument %s", options.chunks) return -1 if not isinstance(chunks, (tuple, list)): _logger.error("--chunks argument str does not evaluate to a tuple") return -1 else: nitems = numpy.prod(chunks) nbytes = nitems * 8 if nbytes > 10**6: _logger.warning("Requested chunk size might be larger than" " the default 1MB chunk cache, for float64" " data. This can dramatically affect I/O " "performances.") create_dataset_args["chunks"] = chunks if options.compression is not None: create_dataset_args["compression"] = options.compression if options.compression_opts is not None: create_dataset_args["compression_opts"] = options.compression_opts if options.shuffle: create_dataset_args["shuffle"] = True if options.fletcher32: create_dataset_args["fletcher32"] = True if (len(options.input_files) > 1 and not contains_specfile(options.input_files) and not options.add_root_group) or options.file_pattern is not None: # File series -> stack of images if fabioh5 is None: # return a helpful error message if fabio is missing try: import fabio except ImportError: _logger.error("The fabio library is required to convert" " edf files. Please install it with 'pip " "install fabio` and try again.") else: # unexpected problem in silx.io.fabioh5 raise return -1 input_group = fabioh5.File(file_series=options.input_files) if hdf5_path != "/": # we want to append only data and headers to an existing file input_group = input_group["/scan_0/instrument/detector_0"] with h5py.File(output_name, mode=options.mode) as h5f: write_to_h5(input_group, h5f, h5path=hdf5_path, overwrite_data=options.overwrite_data, create_dataset_args=create_dataset_args, min_size=options.min_size) elif len(options.input_files) == 1 or \ are_all_specfile(options.input_files) or\ options.add_root_group: # single file, or spec files h5paths_and_groups = [] for input_name in options.input_files: hdf5_path_for_file = hdf5_path if options.add_root_group: hdf5_path_for_file = hdf5_path.rstrip("/") + "/" + os.path.basename(input_name) try: h5paths_and_groups.append((hdf5_path_for_file, silx.io.open(input_name))) except IOError: _logger.error("Cannot read file %s. If this is a file format " "supported by the fabio library, you can try to" " install fabio (`pip install fabio`)." " Aborting conversion.", input_name) return -1 with h5py.File(output_name, mode=options.mode) as h5f: for hdf5_path_for_file, input_group in h5paths_and_groups: write_to_h5(input_group, h5f, h5path=hdf5_path_for_file, overwrite_data=options.overwrite_data, create_dataset_args=create_dataset_args, min_size=options.min_size) else: # multiple file, SPEC and fabio images mixed _logger.error("Multiple files with incompatible formats specified. " "You can provide multiple SPEC files or multiple image " "files, but not both.") return -1 with h5py.File(output_name, mode="r+") as h5f: # append "silx convert" to the creator attribute, for NeXus files previous_creator = h5f.attrs.get("creator", u"") creator = "silx convert (v%s)" % silx.version # only if it not already there if creator not in previous_creator: if not previous_creator: new_creator = creator else: new_creator = previous_creator + "; " + creator h5f.attrs["creator"] = numpy.array( new_creator, dtype=h5py.special_dtype(vlen=six.text_type)) return 0
'else create new file.') mode_group.add_argument('-a', '--append', action="store_true", help='Append data to existing file if it exists, ' + 'else create new file.') parser.add_argument('--overwrite-data', action="store_true", help='In append mode, overwrite existing groups and ' + 'datasets in the output file, if they exist with ' + 'the same name as input data. By default, existing' + ' data is not touched, corresponding input data is' + ' ignored.') args = parser.parse_args() if args.overwrite_data and not args.append: print("Option --overwrite-data ignored " + "(only relevant combined with option -a)") if args.overwrite: mode = "w" elif args.append: mode = "a" else: # by default, use "write" mode and fail if file already exists mode = "w-" write_to_h5(args.input_path, args.h5_path, h5path=args.target_path, mode=mode, overwrite_data=args.overwrite_data)
def main(argv): """ Main function to launch the converter as an application :param argv: Command line arguments :returns: exit status """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( 'input_files', nargs="*", help='Input files (EDF, TIFF, SPEC...). When specifying multiple ' 'files, you cannot specify both fabio images and SPEC files. ' 'Multiple SPEC files will simply be concatenated, with one ' 'entry per scan. Multiple image files will be merged into ' 'a single entry with a stack of images.') # input_files and --filepattern are mutually exclusive parser.add_argument( '--file-pattern', help='File name pattern for loading a series of indexed image files ' '(toto_%%04d.edf). This argument is incompatible with argument ' 'input_files. If an output URI with a HDF5 path is provided, ' 'only the content of the NXdetector group will be copied there. ' 'If no HDF5 path, or just "/", is given, a complete NXdata ' 'structure will be created.') parser.add_argument( '-o', '--output-uri', default=time.strftime("%Y%m%d-%H%M%S") + '.h5', help='Output file name (HDF5). An URI can be provided to write' ' the data into a specific group in the output file: ' '/path/to/file::/path/to/group. ' 'If not provided, the filename defaults to a timestamp:' ' YYYYmmdd-HHMMSS.h5') parser.add_argument('-m', '--mode', default="w-", help='Write mode: "r+" (read/write, file must exist), ' '"w" (write, existing file is lost), ' '"w-" (write, fail if file exists) or ' '"a" (read/write if exists, create otherwise)') parser.add_argument( '--begin', help='First file index, or first file indices to be considered. ' 'This argument only makes sense when used together with ' '--file-pattern. Provide as many start indices as there ' 'are indices in the file pattern, separated by commas. ' 'Examples: "--filepattern toto_%%d.edf --begin 100", ' ' "--filepattern toto_%%d_%%04d_%%02d.edf --begin 100,2000,5".') parser.add_argument( '--end', help='Last file index, or last file indices to be considered. ' 'The same rules as with argument --begin apply. ' 'Example: "--filepattern toto_%%d_%%d.edf --end 199,1999"') parser.add_argument( '--add-root-group', action="store_true", help='This option causes each input file to be written to a ' 'specific root group with the same name as the file. When ' 'merging multiple input files, this can help preventing conflicts' ' when datasets have the same name (see --overwrite-data). ' 'This option is ignored when using --file-pattern.') parser.add_argument( '--overwrite-data', action="store_true", help='If the output path exists and an input dataset has the same' ' name as an existing output dataset, overwrite the output ' 'dataset (in modes "r+" or "a").') parser.add_argument( '--min-size', type=int, default=500, help='Minimum number of elements required to be in a dataset to ' 'apply compression or chunking (default 500).') parser.add_argument( '--chunks', nargs="?", const="auto", help='Chunk shape. Provide an argument that evaluates as a python ' 'tuple (e.g. "(1024, 768)"). If this option is provided without ' 'specifying an argument, the h5py library will guess a chunk for ' 'you. Note that if you specify an explicit chunking shape, it ' 'will be applied identically to all datasets with a large enough ' 'size (see --min-size). ') parser.add_argument( '--compression', nargs="?", const="gzip", help='Compression filter. By default, the datasets in the output ' 'file are not compressed. If this option is specified without ' 'argument, the GZIP compression is used. Additional compression ' 'filters may be available, depending on your HDF5 installation.') def check_gzip_compression_opts(value): ivalue = int(value) if ivalue < 0 or ivalue > 9: raise argparse.ArgumentTypeError( "--compression-opts must be an int from 0 to 9") return ivalue parser.add_argument( '--compression-opts', type=check_gzip_compression_opts, help='Compression options. For "gzip", this may be an integer from ' '0 to 9, with a default of 4. This is only supported for GZIP.') parser.add_argument( '--shuffle', action="store_true", help='Enables the byte shuffle filter. This may improve the compression ' 'ratio for block oriented compressors like GZIP or LZF.') parser.add_argument( '--fletcher32', action="store_true", help='Adds a checksum to each chunk to detect data corruption.') parser.add_argument('--debug', action="store_true", default=False, help='Set logging system in debug mode') options = parser.parse_args(argv[1:]) if options.debug: logging.root.setLevel(logging.DEBUG) # Import after parsing --debug try: # it should be loaded before h5py import hdf5plugin # noqa except ImportError: _logger.debug("Backtrace", exc_info=True) hdf5plugin = None try: import h5py from silx.io.convert import write_to_h5 except ImportError: _logger.debug("Backtrace", exc_info=True) h5py = None write_to_h5 = None if h5py is None: message = "Module 'h5py' is not installed but is mandatory."\ + " You can install it using \"pip install h5py\"." _logger.error(message) return -1 if hdf5plugin is None: message = "Module 'hdf5plugin' is not installed. It supports additional hdf5"\ + " compressions. You can install it using \"pip install hdf5plugin\"." _logger.debug(message) # Process input arguments (mutually exclusive arguments) if bool(options.input_files) == bool(options.file_pattern is not None): if not options.input_files: message = "You must specify either input files (at least one), " message += "or a file pattern." else: message = "You cannot specify input files and a file pattern" message += " at the same time." _logger.error(message) return -1 elif options.input_files: # some shells (windows) don't interpret wildcard characters (*, ?, []) old_input_list = list(options.input_files) options.input_files = [] for fname in old_input_list: globbed_files = glob(fname) if not globbed_files: # no files found, keep the name as it is, to raise an error later options.input_files += [fname] else: # glob does not sort files, but the bash shell does options.input_files += sorted(globbed_files) else: # File series dirname = os.path.dirname(options.file_pattern) file_pattern_re = c_format_string_to_re(options.file_pattern) + "$" files_in_dir = glob(os.path.join(dirname, "*")) _logger.debug( """ Processing file_pattern dirname: %s file_pattern_re: %s files_in_dir: %s """, dirname, file_pattern_re, files_in_dir) options.input_files = sorted( list( filter(lambda name: re.match(file_pattern_re, name), files_in_dir))) _logger.debug("options.input_files: %s", options.input_files) if options.begin is not None: options.input_files = drop_indices_before_begin( options.input_files, file_pattern_re, options.begin) _logger.debug("options.input_files after applying --begin: %s", options.input_files) if options.end is not None: options.input_files = drop_indices_after_end( options.input_files, file_pattern_re, options.end) _logger.debug("options.input_files after applying --end: %s", options.input_files) if are_files_missing_in_series(options.input_files, file_pattern_re): _logger.error("File missing in the file series. Aborting.") return -1 if not options.input_files: _logger.error("No file matching --file-pattern found.") return -1 # Test that the output path is writeable if "::" in options.output_uri: output_name, hdf5_path = options.output_uri.split("::") else: output_name, hdf5_path = options.output_uri, "/" if os.path.isfile(output_name): if options.mode == "w-": _logger.error( "Output file %s exists and mode is 'w-' (default)." " Aborting. To append data to an existing file, " "use 'a' or 'r+'.", output_name) return -1 elif not os.access(output_name, os.W_OK): _logger.error("Output file %s exists and is not writeable.", output_name) return -1 elif options.mode == "w": _logger.info( "Output file %s exists and mode is 'w'. " "Overwriting existing file.", output_name) elif options.mode in ["a", "r+"]: _logger.info("Appending data to existing file %s.", output_name) else: if options.mode == "r+": _logger.error( "Output file %s does not exist and mode is 'r+'" " (append, file must exist). Aborting.", output_name) return -1 else: _logger.info("Creating new output file %s.", output_name) # Test that all input files exist and are readable bad_input = False for fname in options.input_files: if not os.access(fname, os.R_OK): _logger.error("Cannot read input file %s.", fname) bad_input = True if bad_input: _logger.error("Aborting.") return -1 # create_dataset special args create_dataset_args = {} if options.chunks is not None: if options.chunks.lower() in ["auto", "true"]: create_dataset_args["chunks"] = True else: try: chunks = ast.literal_eval(options.chunks) except (ValueError, SyntaxError): _logger.error("Invalid --chunks argument %s", options.chunks) return -1 if not isinstance(chunks, (tuple, list)): _logger.error( "--chunks argument str does not evaluate to a tuple") return -1 else: nitems = numpy.prod(chunks) nbytes = nitems * 8 if nbytes > 10**6: _logger.warning("Requested chunk size might be larger than" " the default 1MB chunk cache, for float64" " data. This can dramatically affect I/O " "performances.") create_dataset_args["chunks"] = chunks if options.compression is not None: create_dataset_args["compression"] = options.compression if options.compression_opts is not None: create_dataset_args["compression_opts"] = options.compression_opts if options.shuffle: create_dataset_args["shuffle"] = True if options.fletcher32: create_dataset_args["fletcher32"] = True if (len(options.input_files) > 1 and not contains_specfile(options.input_files) and not options.add_root_group) or options.file_pattern is not None: # File series -> stack of images if fabioh5 is None: # return a helpful error message if fabio is missing try: import fabio except ImportError: _logger.error("The fabio library is required to convert" " edf files. Please install it with 'pip " "install fabio` and try again.") else: # unexpected problem in silx.io.fabioh5 raise return -1 input_group = fabioh5.File(file_series=options.input_files) if hdf5_path != "/": # we want to append only data and headers to an existing file input_group = input_group["/scan_0/instrument/detector_0"] with h5py.File(output_name, mode=options.mode) as h5f: write_to_h5(input_group, h5f, h5path=hdf5_path, overwrite_data=options.overwrite_data, create_dataset_args=create_dataset_args, min_size=options.min_size) elif len(options.input_files) == 1 or \ are_all_specfile(options.input_files) or\ options.add_root_group: # single file, or spec files h5paths_and_groups = [] for input_name in options.input_files: hdf5_path_for_file = hdf5_path if options.add_root_group: hdf5_path_for_file = hdf5_path.rstrip( "/") + "/" + os.path.basename(input_name) try: h5paths_and_groups.append( (hdf5_path_for_file, silx.io.open(input_name))) except IOError: _logger.error( "Cannot read file %s. If this is a file format " "supported by the fabio library, you can try to" " install fabio (`pip install fabio`)." " Aborting conversion.", input_name) return -1 with h5py.File(output_name, mode=options.mode) as h5f: for hdf5_path_for_file, input_group in h5paths_and_groups: write_to_h5(input_group, h5f, h5path=hdf5_path_for_file, overwrite_data=options.overwrite_data, create_dataset_args=create_dataset_args, min_size=options.min_size) else: # multiple file, SPEC and fabio images mixed _logger.error("Multiple files with incompatible formats specified. " "You can provide multiple SPEC files or multiple image " "files, but not both.") return -1 with h5py.File(output_name, mode="r+") as h5f: # append "silx convert" to the creator attribute, for NeXus files previous_creator = h5f.attrs.get("creator", u"") creator = "silx convert (v%s)" % silx.version # only if it not already there if creator not in previous_creator: if not previous_creator: new_creator = creator else: new_creator = previous_creator + "; " + creator h5f.attrs["creator"] = numpy.array( new_creator, dtype=h5py.special_dtype(vlen=six.text_type)) return 0