def _to_input_list(v): # TODO - move this fcn definition?? if isinstance(v, list): return v else: # value is a string glob name, serverfilter = unifiedIO.split_cluster_url(v) return clusterIO.cglob(name, serverfilter)
def __init__(self, storage_directory, pyramid_tile_size=256, mdh=None, n_tiles_x=0, n_tiles_y=0, depth=0, x0=0, y0=0, pixel_size=1, backend=PZFTileIO): if isinstance(storage_directory, tempfile.TemporaryDirectory): # If the storage directory is a temporary directory, keep a reference and cleanup the directory when we delete the pyramid # used to support transitory pyramids. self._temp_directory = storage_directory storage_directory = storage_directory.name if unifiedIO.is_cluster_uri(storage_directory): assert (backend == ClusterPZFTileIO) storage_directory, _ = unifiedIO.split_cluster_url( storage_directory) self.base_dir = storage_directory self.tile_size = pyramid_tile_size self.pyramid_valid = False self._mdh = NestedClassMDHandler(mdh) self._mdh['Pyramid.TileSize'] = self.tile_size self.n_tiles_x = n_tiles_x self.n_tiles_y = n_tiles_y self.depth = depth self.x0 = x0 self.y0 = y0 self.pixel_size = pixel_size # TODO - should we be re-assigning these on load, not just when we create a new pyramid? self._mdh['Pyramid.x0'] = x0 self._mdh['Pyramid.y0'] = y0 self._mdh['Pyramid.PixelSize'] = pixel_size if (not os.path.exists(self.base_dir)) and (not backend == ClusterPZFTileIO): os.makedirs(self.base_dir) #self._tilecache = TileCache() if backend is None: backend = infer_tileio_backend(self.base_dir) self._imgs = backend(base_dir=self.base_dir, suff='img') self._acc = backend(base_dir=self.base_dir, suff='acc') self._occ = backend(base_dir=self.base_dir, suff='occ')
def loadInput(self, filename, key='input'): """ Load input data from a file and inject into namespace """ from PYME.IO import unifiedIO import os extension = os.path.splitext(filename)[1] if extension in ['.h5r', '.hdf']: import tables from PYME.IO import h5rFile try: with unifiedIO.local_or_temp_filename( filename) as fn, h5rFile.openH5R( fn, mode='r')._h5file as h5f: self._inject_tables_from_hdf5(key, h5f, fn, extension) except tables.exceptions.HDF5ExtError: # access issue likely due to multiple processes if unifiedIO.is_cluster_uri(filename): # try again, this time forcing access through the dataserver # NOTE: it is unclear why this should work when local_or_temp_filename() doesn't # as this still opens / copies the file independently, albeit in the same process as is doing the writing. # The fact that this works is relying on one of a quirk of the GIL, a quirk in HDF5 locking, or the fact # that copying the file to a stream is much faster than opening it with pytables. The copy vs pytables open # scenario would match what has been observed with old style spooling analysis where copying a file # prior to opening in VisGUI would work more reliably than opening directly. This retains, however, # an inherent race condition so we risk replacing a predictable failure with a less frequent one. # TODO - consider whether h5r_part might be a better choice. # FIXME: (DB) I'm not comfortable with having this kind of special case retry logic here, and would # much prefer if we could find an alternative workaround, refactor into something like h5rFile.open_robust(), # or just let this fail). Leaving it for the meantime to get chained recipes working, but we should revisit. from PYME.IO import clusterIO relative_filename, server_filter = unifiedIO.split_cluster_url( filename) file_as_bytes = clusterIO.get_file( relative_filename, serverfilter=server_filter, local_short_circuit=False) with tables.open_file('in-memory.h5', driver='H5FD_CORE', driver_core_image=file_as_bytes, driver_core_backing_store=0) as h5f: self._inject_tables_from_hdf5(key, h5f, filename, extension) else: #not a cluster file, doesn't make sense to retry with cluster. Propagate exception to user. raise elif extension == '.csv': logger.error('loading .csv not supported yet') raise NotImplementedError elif extension in ['.xls', '.xlsx']: logger.error('loading .xls not supported yet') raise NotImplementedError else: self.namespace[key] = ImageStack(filename=filename, haveGUI=False)
def local_or_named_temp_filename(url): """ riff of PYME.IO.clusterIO.local_or_temp_filename, but one which returns a filename with a matching file stub to the original rather than a random temporary filename Parameters ---------- url : str local path or pyme-cluster url Yields ------- str path to a (temporary) file so `url` can be loaded using modules which expect a local filename """ from PYME.IO.FileUtils import nameUtils from PYME.IO import unifiedIO import tempfile filename = nameUtils.getFullExistingFilename(url) if os.path.exists(filename): yield filename elif unifiedIO.is_cluster_uri(url): from PYME.IO import clusterIO name, clusterfilter = unifiedIO.split_cluster_url(filename) localpath = clusterIO.get_local_path(name, clusterfilter) if localpath: yield localpath else: ext = os.path.splitext(name)[-1] with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, os.path.split(name)[-1]), 'wb') as f: s = clusterIO.get_file(name, clusterfilter) f.write(s) f.flush() yield f.name else: raise IOError('Path "%s" could not be found' % url)