def convert_files(filenames, extension = None, path = None, processes = None, verbose = False): """Transforms list of files to their sink format in parallel. Arguments --------- filenames : list of str The filenames to convert extension : str The new file format extension. path : str or None Optional path speicfication. processes : int, 'serial' or None The number of processes to use for parallel conversion. verbose : bool If True, print progress information. Returns ------- filenames : list of str The new file names. """ if not isinstance(filenames, (tuple, list)): filenames = [filenames]; if len(filenames) == 0: return []; n_files = len(filenames); if path is not None: filenames = [fu.join(path, fu.split(f)[1]) for f in filenames]; sinks = ['.'.join(f.split('.')[:-1] + [extension]) for f in filenames]; if verbose: timer = tmr.Timer() print('Converting %d files to %s!' % (n_files, extension)); if not isinstance(processes, int) and processes != 'serial': processes = mp.cpu_count(); #print(n_files, extension, filenames, sinks) _convert = functools.partial(_convert_files, n_files=n_files, extension=extension, verbose=verbose); if processes == 'serial': [_convert(source,sink,i) for i,source,sink in zip(range(n_files), filenames, sinks)]; else: with concurrent.futures.ProcessPoolExecutor(processes) as executor: executor.map(_convert, filenames, sinks, range(n_files)); if verbose: timer.print_elapsed_time('Converting %d files to %s' % (n_files, extension)); return sinks;
def initialize_lookup_table(function=match_index, filename=filename): """Initialize the lookup table""" filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) #check if only compressed file exists fu.uncompress(filename) if os.path.exists(filename): return np.load(filename) else: lut = generate_lookup_table(function=function) np.save(filename, lut) return lut
def initializeStitcher(path = None): """Initialize settings for the TeraStitcher Arguments: path (str or None): path to TeraStitcher root directory, if None :const:`ClearMap.Settings.TeraStitcherPath` is used. See also: :const:`TeraStitcherBinary`, :const:`Initialized` """ global TeraStitcherBinary, Initialized if path is None: path = settings.TeraStitcherPath; #search for elastix binary terasticherbin = os.path.join(path, 'bin/terastitcher'); if os.path.exists(terasticherbin): TeraStitcherBinary = terasticherbin; else: raise RuntimeError("Cannot find TeraSticher binary %s, set path in Settings.py accordingly!" % terasticherbin); # add the global optimization script folder if not fileutils.isFile(os.path.join(path, 'LQP_HE.py')): print('Warning: the global optimization file %s for TeraStitcher cannot be found in %s' % ('LQP_HE.py', path)); os.environ["__LQP_PATH__"] = path; Initialized = True; print "TeraSticher sucessfully initialized from path: %s" % path; return path;
def initialize_lookup_table(function = index_to_smoothing, filename = smooth_by_configuration_filename, verbose = True, processes = None): """Initialize the lookup table""" filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename); #uncompress if only zip file exists. fu.uncompress(filename); #load lookup table if os.path.exists(filename): if verbose: print('Smoothing: Loading look-up table from %s!' % filename) return np.load(filename); else: if verbose: print('Smoothing: Look-up table does not exists! Pre-calculating it!') lut = generate_lookup_table(function = function, verbose=verbose, processes=processes); np.save(filename, lut); return lut;
def is_memmap(source): if isinstance(source, (np.memmap, Source)): return True elif isinstance(source, str): if fu.is_file(source): try: memmap = np.memmap(source) #analysis:ignore except: return False return True else: return False
def _file_list(expression=None, file_list=None, sort=True, verbose=False): """Returns the list of files that match the tag expression. Arguments --------- expression :str The regular expression the file names should match. sort : bool If True, sort files naturally. verbose : bool If True, print warning if no files exists. Returns ------- file_list : list of str The list of files that matched the expression. """ if isinstance(file_list, list): return file_list if isinstance(expression, te.Expression): fl = glob.glob(expression.glob()) elif fu.is_directory(expression): expression = fu.join(expression, '*') fl = glob.glob(expression) else: e = te.Expression(expression) fl = glob.glob(e.glob()) if verbose and len(fl) == 0: warnings.warn('No files found matching %s !' % expression) return [] if sort: fl = natsort.natsorted(fl) return fl
def _expression_and_file_list(expression=None, file_list=None): if isinstance(expression, te.Expression) or expression is None: pass elif fu.is_directory(expression): if file_list is None: file_list = glob.glob(fu.join(expression, '*')) expression = te.detect(file_list) elif isinstance(expression, (str, list)): expression = te.Expression(expression) else: raise ValueError('Expression %r is not valid!' % expression) if file_list is None: if expression is None: raise ValueError( 'Either expression or file_list need to be specified!') file_list = glob.glob(expression.glob()) elif isinstance(file_list, list): if expression is None: expression = te.detect(file_list) else: raise ValueError('The file_list %r is not value!' % file_list) return expression, file_list
def _expression_or_file_list(expression=None, file_list=None): if isinstance(expression, te.Expression) or expression is None: pass elif fu.is_directory(expression): file_list = _file_list(expression=expression, sort=True) expression = None elif isinstance(expression, str): expression = te.Expression(expression) else: raise ValueError('The expression %r is not valid!' % expression) if file_list is not None and not isinstance(file_list, list): raise ValueError('The file_list %r is not a list or None!' % file_list) if expression is None and file_list is None: raise ValueError('Expresson and file_list cannot both be None!') return expression, file_list
def filename_to_module(filename): """Returns the IO module associated with a filename. Arguments --------- filename : str The file name. Returns ------- module : module The module that handles the IO of the file. """ ext = fu.file_extension(filename); mod = file_extension_to_module.get(ext, None); if mod is None: raise ValueError("Cannot determine module for file %s with extension %s!" % (filename, ext)); return mod;
def exists(self): if self.location is not None: return fu.is_file(self.location); else: return False;
############################################################################### #TODO: move to settings ? atlas_path = os.path.join(settings.resources_path, 'Atlas') """Default path to atlas infomration. """ default_annotation_file = os.path.join(atlas_path, 'ABA_25um_annotation.tif') """Default volumetric annotated image file. Note ---- This file is by default the Allen brain annotated mouse atlas with 25um isotropic resolution. """ fu.uncompress(default_annotation_file) default_reference_file = os.path.join(atlas_path, 'ABA_25um_reference.tif') """Default volumetric annotated image file. Note ---- This file is by default the Allen brain annotated mouse atlas with 25um isotropic resolution. """ fu.uncompress(default_reference_file) default_distance_to_surface_file = os.path.join( atlas_path, 'ABA_25um_distance_to_surface.tif') """Default volumetric annotated image file.
def write(filename, data, slicing=None, blocks=None, processes=None, verbose=False): """Write a large array to disk in parallel. Arguments --------- filename : str Filename of array to load. data : array Array to save to disk. blocks : int or None Number of blocks to split array into for parallel processing. processes : None or int Number of processes, if None use number of cpus. verbose : bool Print info about the file to be loaded. Returns ------- filename : str The filename of the numpy array on disk. """ if processes is None: processes = mp.cpu_count() if blocks is None: blocks = processes * default_blocks_per_process #data data = io.as_source(data) #prepare sink is_file = fu.is_file(filename) if slicing is not None and not is_file: raise ValueError('Cannot write to a slice to a non-existing file %s!' % filename) if slicing is None: #create file on disk via memmap fortran_order = 'F' == data.order memmap = np.lib.format.open_memmap(filename, mode='w+', shape=data.shape, dtype=data.dtype, fortran_order=fortran_order) memmap.flush() del (memmap) sink = mmp.Source(location=filename) if slicing is not None: sink = slc.Slice(source=sink, slicing=slicing) shape, dtype, order, offset = sink.shape, sink.dtype, sink.order, sink.offset if (data.order != order): raise RuntimeError('Order of arrays do not match %r!=%r' % (data.order, order)) if order not in ['C', 'F']: raise NotImplementedError( 'Cannot read in parallel from non-contigous source!') #TODO: implement parallel reader with strides ! if verbose: timer = tmr.Timer() print( 'Writing data to sink of shape = %r, dtype = %r, order = %r, offset = %r' % (shape, dtype, order, offset)) d = data.reshape(-1, order='A') if d.dtype == bool: d = d.view('uint8') code.write(data=d, filename=filename, offset=offset, blocks=blocks, processes=processes) if verbose: timer.print_elapsed_time(head='Writing data to %s' % filename) return filename
def is_file_list(expression, exists=False, tag_names=None, n_tags=-1, verbose=False): """Checks if the expression is a valid file list. Arguments --------- expression : str The regular expression to check. exists : bool If True, check if at least one file exists. tag_names : list of str or None List of tag names expected to be present in the expression. n_Tags : int or None Number of tags to expect. verbose : bool If True, print reason why the epxression does not represent the desired file list. Returns ------- is_expression : bool Returns True if the expression fullfills the desired criteria and at least one file matching the expression exists. """ if isinstance(expression, Source): return True if not isinstance(expression, (str, te.Expression)): if verbose: warnings.warn( 'The expression %r is not a string or valid Source!' % expression) return False if fu.is_directory(expression): if exists: if len(os.listdir(expression)) == 0: if verbose: warnings.warn('No files exists in the directory %s!' % expression) return False else: return True else: return True if tag_names is not None or n_tags is not None: t = te.Expression(expression) if not isinstance( expression, te.Expression) else expression if n_tags is not None: if n_tags < 0 and -n_tags > t.ntags(): if verbose: warnings.warn( 'Expression has not required number %d of tags, but %d!' % (n_tags, t.ntags())) return False elif n_tags >= 0 and n_tags != t.ntags(): if verbose: warnings.warn( 'Expression has not required number %d of tags, but %d!' % (n_tags, t.ntags())) return False if tag_names is not None: if tag_names != t.tag_names(): if verbose: warnings.warn( 'Expression has not required tags %r, but %r!' % (tag_names, t.tag_names())) return False if exists: f = _first_file(expression) if f is None: if verbose: warnings.warn('Expression does not point to any files!') return False return True
def __setitem__(self, slicing, data, processes=None): e = self.expression shape = self.shape ndim = self.ndim ndim_list = e.ntags() slicing = slc.unpack_slicing(slicing, ndim) slicing_file = slicing[:-ndim_list] slicing_list = slicing[-ndim_list:] shape_list = shape[-ndim_list:] #start indices indices_start = self.expression.indices(self.file_list[0]) #TODO: steps in file list #genereate file list to read #Note: indices increase according to the axes order but thier own order is in tag order indices = [] for sl, s, i in zip(slicing_list, shape_list, indices_start): if isinstance(sl, slice): slice_indices = sl.indices(s) slice_indices = (slice_indices[0] + i, slice_indices[1] + i, slice_indices[2]) indices.append(range(*slice_indices)) elif isinstance(sl, (list, np.ndarray)): indices.append(np.array(sl) + i) elif isinstance(sl, numbers.Integral): indices.append([sl + i]) else: raise IndexError('Invalid slice specification %r!' % sl) indices.reverse() indices = itertools.product(*indices) indices = [i[::-1] for i in indices] axes_to_tags = self.axes_to_tag_order() if len(axes_to_tags) > 1 and axes_to_tags != list( range(len(axes_to_tags))): indices = [tuple(i[j] for j in axes_to_tags) for i in indices] fl = [e.string_from_index(i) for i in indices] #print indices, fl #create directory if it does not exists #Note: move this to func if files need to be distributed accross several directories fu.create_directory(fl[0], split=True) if processes is None: processes = mp.cpu_count() @ptb.parallel_traceback def func(filename, index, data=data, slicing=slicing_file): index = (Ellipsis, ) + index io.write(sink=filename, data=data[index], slicing=slicing, processes='serial') if processes == 'serial': for f, i in zip(fl, indices): func(f, i) else: with concurrent.futures.ThreadPoolExecutor(processes) as executor: executor.map(func, fl, indices)
def _memmap(location=None, shape=None, dtype=None, order=None, mode=None, array=None): """Create a memory map. Arguments --------- location : str The filename of the memory mapped array. shape : tuple or None The shape of the memory map to create. dtype : dtype The data type of the memory map. order : 'C', 'F', or None The contiguous order of the memmap. mode : 'r', 'w', 'w+', None The mode to open the memory map. array : array, Source or None Optional source with data to fill the memory map with. Returns ------- memmap : np.memmap The memory map. Note ---- By default memmaps are initialized as fortran contiguous if order is None. """ #print location, shape, dtype, order, mode, array if isinstance(location, np.memmap): array = location location = None if array is None: if not isinstance(location, str): raise ValueError('Cannot create memmap without a location!') if mode != 'w+' and fu.is_file(location): array = np.lib.format.open_memmap(location) if array is None: if shape is None: raise ValueError( 'Cannot create memmap without shape at location %r!' % location) mode = 'w+' if mode is None else mode fortran = order in ['F', None] #default is 'F' for memmaps memmap = np.lib.format.open_memmap(location, mode=mode, shape=shape, dtype=dtype, fortran_order=fortran) elif isinstance(array, np.memmap): location = location if location is not None else array.filename location = fu.abspath(location) shape = shape if shape is not None else array.shape dtype = dtype if dtype is not None else array.dtype order = order if order is not None else npy.order(array) #if shape != array.shape: # raise ValueError('Shape %r and array shape %r mismatch!' % (shape, array.shape)); if shape != array.shape or dtype != array.dtype or order != npy.order( array) or location != fu.abspath(array.filename): fortran = order in ['F', None] #default is 'F' for memmaps memmap = np.lib.format.open_memmap(location, mode='w+', shape=shape, dtype=dtype, fortran_order=fortran) if shape == array.shape: memmap[:] = array else: memmap = array if mode is None: mode = 'r+' if mode != memmap.mode: memmap = np.lib.format.open_memmap(location, mode=mode) elif isinstance(array, np.ndarray): if not isinstance(location, str): raise ValueError('Cannot create memmap without a location!') shape = shape if shape is not None else array.shape dtype = dtype if dtype is not None else array.dtype order = order if order is not None else npy.order(array) if shape != array.shape: raise ValueError('Shape %r and array shape %r mismatch!' % (shape, array.shape)) fortran = order in ['F', None] #default is 'F' for memmaps memmap = np.lib.format.open_memmap(location, mode='w+', shape=shape, dtype=dtype, fortran_order=fortran) memmap[:] = array if mode is None: mode = 'r+' if mode != memmap.mode: memmap = np.lib.format.open_memmap(location, mode=mode) else: raise ValueError('Array is not a valid!') return memmap