def load_shiftmap(uri): """ helper function to handle I/O of two versions of shiftmaps. Note that HDF is prefered :param uri: str path or url to shiftmap-containing file (hdf, or [less ideal] json) :return: dict shiftmap """ from PYME.IO import unifiedIO, tabular from PYME.IO.MetaDataHandler import HDFMDHandler import tables import json try: # try loading shift map as hdf file with unifiedIO.local_or_temp_filename(uri) as f: t = tables.open_file(f) shift_map_source = tabular.HDFSource(t, 'shift_map') # todo - is there a cleaner way to do this? shift_map_source.mdh = HDFMDHandler(t) # build dict of dicts so we can easily rebuild shiftfield objects in multiview.calc_shifts_for_points shift_map = {'shiftModel': shift_map_source.mdh['Multiview.shift_map.model']} legend = shift_map_source.mdh['Multiview.shift_map.legend'] for l in legend.keys(): keys = shift_map_source.keys() shift_map[l] = dict(zip(keys, [shift_map_source[k][legend[l]] for k in keys])) t.close() except tables.HDF5ExtError: # file is probably saved as json (legacy) s = unifiedIO.read(uri) shift_map = json.loads(s) return shift_map
def OnROIsFromFile(self, event): import wx from PYME.IO import tabular filename = wx.FileSelector("Load ROI Positions:", wildcard="*.hdf", flags=wx.FD_OPEN) if not filename == '': rois = tabular.HDFSource(filename, tablename='roi_locations') rois = [(x, y) for x, y in zip(rois['x_um'], rois['y_um'])] self._add_ROIs(rois)
def set_roi_locations(self, locations_file, tablename='roi_locations'): from PYME.IO import tabular print(locations_file) if locations_file.endswith('.hdf'): locs = tabular.HDFSource(locations_file, tablename=tablename) self.roi_locations = [Location(x, y) for x, y in zip(locs['x_um'], locs['y_um'])] locs.close() del(locs) # elif locations_file.endswith('.csv'): # self.roi_locations = tabular.textfileSource(locations_file) raise cherrypy.HTTPRedirect('/roi_list')
def test_h5r(): data = tabular.ColumnSource(x=1e3*np.random.randn(1000), y=1e3*np.random.randn(1000), z=1e3*np.random.randn(1000)) tempdir = tempfile.mkdtemp() filename = os.path.join(tempdir, 'test_hdf.hdf') try: data.to_hdf(filename, tablename='Data') inp = tabular.HDFSource(filename, tablename='Data') assert (np.allclose(data['x'], inp['x'])) finally: shutil.rmtree(tempdir)
def execute(self, namespace): from PYME.Analysis.points import multiview from PYME.IO import unifiedIO from PYME.IO.MetaDataHandler import HDFMDHandler import tables import json inp = namespace[self.input_name] if 'mdh' not in dir(inp): raise RuntimeError('ShiftCorrect needs metadata') if self.shift_map_path == '': # grab shftmap from the metadata loc = inp.mdh['Shiftmap'] else: loc = self.shift_map_path try: # try loading shift map as hdf file with unifiedIO.local_or_temp_filename(loc) as f: t = tables.open_file(f) shift_map_source = tabular.HDFSource( t, 'shift_map') # todo - is there a cleaner way to do this? shift_map_source.mdh = HDFMDHandler(t) # build dict of dicts so we can easily rebuild shiftfield objects in multiview.calc_shifts_for_points shift_map = { 'shiftModel': shift_map_source.mdh['Multiview.shift_map.model'] } legend = shift_map_source.mdh['Multiview.shift_map.legend'] for l in legend.keys(): keys = shift_map_source.keys() shift_map[l] = dict( zip(keys, [shift_map_source[k][legend[l]] for k in keys])) t.close() except tables.HDF5ExtError: # file is probably saved as json (legacy) s = unifiedIO.read(self.shift_map_path) shift_map = json.loads(s) mapped = tabular.MappingFilter(inp) multiview.apply_shifts_to_points(mapped, shift_map) # propagate metadata mapped.mdh = inp.mdh mapped.mdh['Multiview.shift_map.location'] = loc namespace[self.output_name] = mapped
def _ds_from_file(self, filename, **kwargs): """ loads a data set from a file Parameters ---------- filename : str kwargs : any additional arguments (see OpenFile) Returns ------- ds : the dataset """ if os.path.splitext(filename)[1] == '.h5r': import tables h5f = tables.open_file(filename) self.filesToClose.append(h5f) try: ds = tabular.H5RSource(h5f) if 'DriftResults' in h5f.root: driftDS = tabular.H5RDSource(h5f) self.driftInputMapping = tabular.MappingFilter(driftDS) #self.dataSources['Fiducials'] = self.driftInputMapping self.addDataSource('Fiducials', self.driftInputMapping) if len(ds['x']) == 0: self.selectDataSource('Fiducials') except: #fallback to catch series that only have drift data logger.exception('No fitResults table found') ds = tabular.H5RDSource(h5f) self.driftInputMapping = tabular.MappingFilter(ds) #self.dataSources['Fiducials'] = self.driftInputMapping self.addDataSource('Fiducials', self.driftInputMapping) #self.selectDataSource('Fiducials') #catch really old files which don't have any metadata if 'MetaData' in h5f.root: self.mdh.copyEntriesFrom(MetaDataHandler.HDFMDHandler(h5f)) if ('Events' in h5f.root) and ('StartTime' in self.mdh.keys()): self.events = h5f.root.Events[:] elif filename.endswith('.hdf'): #recipe output - handles generically formatted .h5 import tables h5f = tables.open_file(filename) self.filesToClose.append(h5f) for t in h5f.list_nodes('/'): if isinstance(t, tables.table.Table): tab = tabular.HDFSource(h5f, t.name) self.addDataSource(t.name, tab) if 'EventName' in t.description._v_names: #FIXME - we shouldn't have a special case here self.events = t[:] # this does not handle multiple events tables per hdf file if 'MetaData' in h5f.root: self.mdh.copyEntriesFrom(MetaDataHandler.HDFMDHandler(h5f)) for dsname, ds_ in self.dataSources.items(): #loop through tables until we get one which defines x. If no table defines x, take the last table to be added #TODO make this logic better. ds = ds_.resultsSource if 'x' in ds.keys(): break elif os.path.splitext(filename)[1] == '.mat': #matlab file if 'VarName' in kwargs.keys(): #old style matlab import ds = tabular.MatfileSource(filename, kwargs['FieldNames'], kwargs['VarName']) else: ds = tabular.MatfileColumnSource(filename) elif os.path.splitext(filename)[1] == '.csv': #special case for csv files - tell np.loadtxt to use a comma rather than whitespace as a delimeter if 'SkipRows' in kwargs.keys(): ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',', skiprows=kwargs['SkipRows']) else: ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',') else: #assume it's a tab (or other whitespace) delimited text file if 'SkipRows' in kwargs.keys(): ds = tabular.TextfileSource(filename, kwargs['FieldNames'], skiprows=kwargs['SkipRows']) else: ds = tabular.TextfileSource(filename, kwargs['FieldNames']) return ds
def loadInput(self, filename, key='input'): """Load input data from a file and inject into namespace Currently only handles images (anything you can open in dh5view). TODO - extend to other types. """ #modify this to allow for different file types - currently only supports images from PYME.IO import unifiedIO import os extension = os.path.splitext(filename)[1] if extension in ['.h5r', '.h5', '.hdf']: import tables from PYME.IO import MetaDataHandler from PYME.IO import tabular with unifiedIO.local_or_temp_filename(filename) as fn: with tables.open_file(fn, mode='r') as h5f: #make sure our hdf file gets closed key_prefix = '' if key == 'input' else key + '_' try: mdh = MetaDataHandler.NestedClassMDHandler( MetaDataHandler.HDFMDHandler(h5f)) except tables.FileModeError: # Occurs if no metadata is found, since we opened the table in read-mode logger.warning( 'No metadata found, proceeding with empty metadata' ) mdh = MetaDataHandler.NestedClassMDHandler() for t in h5f.list_nodes('/'): # FIXME - The following isinstance tests are not very safe (and badly broken in some cases e.g. # PZF formatted image data, Image data which is not in an EArray, etc ...) # Note that EArray is only used for streaming data! # They should ideally be replaced with more comprehensive tests (potentially based on array or dataset # dimensionality and/or data type) - i.e. duck typing. Our strategy for images in HDF should probably # also be improved / clarified - can we use hdf attributes to hint at the data intent? How do we support # > 3D data? if isinstance(t, tables.VLArray): from PYME.IO.ragged import RaggedVLArray rag = RaggedVLArray( h5f, t.name, copy=True ) #force an in-memory copy so we can close the hdf file properly rag.mdh = mdh self.namespace[key_prefix + t.name] = rag elif isinstance(t, tables.table.Table): # pipe our table into h5r or hdf source depending on the extension tab = tabular.H5RSource( h5f, t.name ) if extension == '.h5r' else tabular.HDFSource( h5f, t.name) tab.mdh = mdh self.namespace[key_prefix + t.name] = tab elif isinstance(t, tables.EArray): # load using ImageStack._loadh5, which finds metdata im = ImageStack(filename=filename, haveGUI=False) # assume image is the main table in the file and give it the named key self.namespace[key] = im elif extension == '.csv': logger.error('loading .csv not supported yet') raise NotImplementedError elif extension in ['.xls', '.xlsx']: logger.error('loading .xls not supported yet') raise NotImplementedError else: self.namespace[key] = ImageStack(filename=filename, haveGUI=False)
def _inject_tables_from_hdf5(self, key, h5f, filename, extension): """ Search through hdf5 file nodes and add them to the recipe namespace Parameters ---------- key : str base key name for loaded file components, if key is not the default 'input', each file node will be loaded into recipe namespace with `key`_`node_name`. h5f : file open hdf5 file filename : str full filename extension : str file extension, used here mainly to toggle which PYME.IO.tabular source is used for table nodes. """ import tables from PYME.IO import MetaDataHandler, tabular key_prefix = '' if key == 'input' else key + '_' # Handle a 'MetaData' group as a special case # TODO - find/implement a more portable way of handling metadata in HDF (e.g. as .json in a blob) so that # non-python exporters have a chance of adding metadata if 'MetaData' in h5f.root: mdh = MetaDataHandler.NestedClassMDHandler( MetaDataHandler.HDFMDHandler(h5f)) else: logger.warning('No metadata found, proceeding with empty metadata') mdh = MetaDataHandler.NestedClassMDHandler() events = None # handle an 'Events' table as a special case (so that it can be attached to subsequently loaded tables) # FIXME - this relies on a special /reserved table name and format and could raise name collision issues # when importing 3rd party / generic HDF # FIXME - do we really want to attach events (which will not get propagated through recipe modules) if ('Events' in h5f.root): if 'EventName' in h5f.root.Events.description._v_names: # check that the event table is formatted as we expect if ('StartTime' in mdh.keys()): events = h5f.root.Events[:] else: logger.warning( 'Acquisition events found in .hdf, but no "StartTime" in metadata' ) else: logger.warning( 'Table called "Events" found in .hdf does not match the signature for acquisition events, ignoring' ) for t in h5f.list_nodes('/'): # FIXME - The following isinstance tests are not very safe (and badly broken in some cases e.g. # PZF formatted image data, Image data which is not in an EArray, etc ...) # Note that EArray is only used for streaming data! # They should ideally be replaced with more comprehensive tests (potentially based on array or dataset # dimensionality and/or data type) - i.e. duck typing. Our strategy for images in HDF should probably # also be improved / clarified - can we use hdf attributes to hint at the data intent? How do we support # > 3D data? if getattr(t, 'name', None) == 'Events': # NB: This assumes we've handled this in the special case earlier, and blocks anything in a 3rd party # HDF events table from being seen. # TODO - do we really want to have so much special case stuff in our generic hdf handling? Are we sure # that events shouldn't be injected into the namespace (given that events do not propagate through recipe modules)? continue elif isinstance(t, tables.VLArray): from PYME.IO.ragged import RaggedVLArray rag = RaggedVLArray( h5f, t.name, copy=True ) #force an in-memory copy so we can close the hdf file properly rag.mdh = mdh if events is not None: rag.events = events self.namespace[key_prefix + t.name] = rag elif isinstance(t, tables.table.Table): # pipe our table into h5r or hdf source depending on the extension tab = tabular.H5RSource( h5f, t.name) if extension == '.h5r' else tabular.HDFSource( h5f, t.name) tab.mdh = mdh if events is not None: tab.events = events self.namespace[key_prefix + t.name] = tab elif isinstance(t, tables.EArray): # load using ImageStack._loadh5 # FIXME - ._loadh5 will load events lazily, which isn't great if we got here after # sending file over clusterIO inside of a context manager -> force it through since we already found it im = ImageStack(filename=filename, mdh=mdh, events=events, haveGUI=False) # assume image is the main table in the file and give it the named key self.namespace[key] = im