def _loadh5(self, filename): """Load PYMEs semi-custom HDF5 image data format. Offloads all the hard work to the HDFDataSource class""" import tables from PYME.IO.DataSources import HDFDataSource, BGSDataSource from PYME.IO import tabular self.dataSource = HDFDataSource.DataSource(filename, None) #chain on a background subtraction data source, so we can easily do #background subtraction in the GUI the same way as in the analysis self.data = BGSDataSource.DataSource( self.dataSource) #this will get replaced with a wrapped version if 'MetaData' in self.dataSource.h5File.root: #should be true the whole time self.mdh = MetaData.TIRFDefault self.mdh.copyEntriesFrom( MetaDataHandler.HDFMDHandler(self.dataSource.h5File)) else: self.mdh = MetaData.TIRFDefault import wx wx.MessageBox( "Carrying on with defaults - no gaurantees it'll work well", 'ERROR: No metadata found in file ...', wx.OK) print( "ERROR: No metadata fond in file ... Carrying on with defaults - no gaurantees it'll work well" ) #attempt to estimate any missing parameters from the data itself try: MetaData.fillInBlanks(self.mdh, self.dataSource) except: logger.exception('Error attempting to populate missing metadata') #calculate the name to use when we do batch analysis on this #from PYME.IO.FileUtils.nameUtils import getRelFilename self.seriesName = getRelFilename(filename) #try and find a previously performed analysis fns = filename.split(os.path.sep) cand = os.path.sep.join(fns[:-2] + [ 'analysis', ] + fns[-2:]) + 'r' print(cand) if False: #os.path.exists(cand): h5Results = tables.open_file(cand) if 'FitResults' in dir(h5Results.root): self.fitResults = h5Results.root.FitResults[:] self.resultsSource = tabular.H5RSource(h5Results) self.resultsMdh = MetaData.TIRFDefault self.resultsMdh.copyEntriesFrom( MetaDataHandler.HDFMDHandler(h5Results)) self.events = self.dataSource.getEvents() self.mode = 'LM'
def convertFile(inFile, outFile): ds = tabular.H5RSource(inFile) nRecords = len(ds[ds.keys()[0]]) of = open(outFile, 'w') of.write('#' + '\t'.join(['%s' % k for k in ds._keys]) + '\n') for row in zip(*[ds[k] for k in ds._keys]): of.write('\t'.join(['%e' % c for c in row]) + '\n') of.close()
def _ds_from_file(self, filename, **kwargs): """ loads a data set from a file Parameters ---------- filename : str kwargs : any additional arguments (see OpenFile) Returns ------- ds : tabular.TabularBase the datasource, complete with metadatahandler and events if found. """ mdh = MetaDataHandler.NestedClassMDHandler() events = None if os.path.splitext(filename)[1] == '.h5r': import tables h5f = tables.open_file(filename) self.filesToClose.append(h5f) try: ds = tabular.H5RSource(h5f) if 'DriftResults' in h5f.root: driftDS = tabular.H5RDSource(h5f) self.driftInputMapping = tabular.MappingFilter(driftDS) #self.dataSources['Fiducials'] = self.driftInputMapping self.addDataSource('Fiducials', self.driftInputMapping) if len(ds['x']) == 0: self.selectDataSource('Fiducials') except: #fallback to catch series that only have drift data logger.exception('No fitResults table found') ds = tabular.H5RDSource(h5f) self.driftInputMapping = tabular.MappingFilter(ds) #self.dataSources['Fiducials'] = self.driftInputMapping self.addDataSource('Fiducials', self.driftInputMapping) #self.selectDataSource('Fiducials') # really old files might not have metadata, so test for it before assuming if 'MetaData' in h5f.root: mdh = MetaDataHandler.HDFMDHandler(h5f) if ('Events' in h5f.root) and ('StartTime' in mdh.keys()): events = h5f.root.Events[:] elif filename.endswith('.hdf'): #recipe output - handles generically formatted .h5 import tables h5f = tables.open_file(filename) self.filesToClose.append(h5f) #defer our IO to the recipe IO method - TODO - do this for other file types as well self.recipe._inject_tables_from_hdf5('', h5f, filename, '.hdf') for dsname, ds_ in self.dataSources.items(): #loop through tables until we get one which defines x. If no table defines x, take the last table to be added #TODO make this logic better. ds = ds_ if 'x' in ds.keys(): # TODO - get rid of some of the grossness here mdh = getattr(ds, 'mdh', mdh) events = getattr(ds, 'events', events) break elif os.path.splitext(filename)[1] == '.mat': #matlab file if 'VarName' in kwargs.keys(): #old style matlab import ds = tabular.MatfileSource(filename, kwargs['FieldNames'], kwargs['VarName']) else: if kwargs.get('Multichannel', False): ds = tabular.MatfileMultiColumnSource(filename) else: ds = tabular.MatfileColumnSource(filename) # check for column name mapping field_names = kwargs.get('FieldNames', None) if field_names: if kwargs.get('Multichannel', False): field_names.append( 'probe') # don't forget to copy this field over ds = tabular.MappingFilter( ds, **{ new_field: old_field for new_field, old_field in zip( field_names, ds.keys()) }) elif os.path.splitext(filename)[1] == '.csv': #special case for csv files - tell np.loadtxt to use a comma rather than whitespace as a delimeter if 'SkipRows' in kwargs.keys(): ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',', skiprows=kwargs['SkipRows']) else: ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',') else: #assume it's a tab (or other whitespace) delimited text file if 'SkipRows' in kwargs.keys(): ds = tabular.TextfileSource(filename, kwargs['FieldNames'], skiprows=kwargs['SkipRows']) else: ds = tabular.TextfileSource(filename, kwargs['FieldNames']) # make sure mdh is writable (file-based might not be) ds.mdh = MetaDataHandler.NestedClassMDHandler(mdToCopy=mdh) if events is not None: # only set the .events attribute if we actually have events. # ensure that events are sorted in increasing time order ds.events = events[np.argsort(events['Time'])] return ds
def _ds_from_file(self, filename, **kwargs): """ loads a data set from a file Parameters ---------- filename : str kwargs : any additional arguments (see OpenFile) Returns ------- ds : the dataset """ if os.path.splitext(filename)[1] == '.h5r': import tables h5f = tables.open_file(filename) self.filesToClose.append(h5f) try: ds = tabular.H5RSource(h5f) if 'DriftResults' in h5f.root: driftDS = tabular.H5RDSource(h5f) self.driftInputMapping = tabular.MappingFilter(driftDS) #self.dataSources['Fiducials'] = self.driftInputMapping self.addDataSource('Fiducials', self.driftInputMapping) if len(ds['x']) == 0: self.selectDataSource('Fiducials') except: #fallback to catch series that only have drift data logger.exception('No fitResults table found') ds = tabular.H5RDSource(h5f) self.driftInputMapping = tabular.MappingFilter(ds) #self.dataSources['Fiducials'] = self.driftInputMapping self.addDataSource('Fiducials', self.driftInputMapping) #self.selectDataSource('Fiducials') #catch really old files which don't have any metadata if 'MetaData' in h5f.root: self.mdh.copyEntriesFrom(MetaDataHandler.HDFMDHandler(h5f)) if ('Events' in h5f.root) and ('StartTime' in self.mdh.keys()): self.events = h5f.root.Events[:] elif filename.endswith('.hdf'): #recipe output - handles generically formatted .h5 import tables h5f = tables.open_file(filename) self.filesToClose.append(h5f) for t in h5f.list_nodes('/'): if isinstance(t, tables.table.Table): tab = tabular.HDFSource(h5f, t.name) self.addDataSource(t.name, tab) if 'EventName' in t.description._v_names: #FIXME - we shouldn't have a special case here self.events = t[:] # this does not handle multiple events tables per hdf file if 'MetaData' in h5f.root: self.mdh.copyEntriesFrom(MetaDataHandler.HDFMDHandler(h5f)) for dsname, ds_ in self.dataSources.items(): #loop through tables until we get one which defines x. If no table defines x, take the last table to be added #TODO make this logic better. ds = ds_.resultsSource if 'x' in ds.keys(): break elif os.path.splitext(filename)[1] == '.mat': #matlab file if 'VarName' in kwargs.keys(): #old style matlab import ds = tabular.MatfileSource(filename, kwargs['FieldNames'], kwargs['VarName']) else: ds = tabular.MatfileColumnSource(filename) elif os.path.splitext(filename)[1] == '.csv': #special case for csv files - tell np.loadtxt to use a comma rather than whitespace as a delimeter if 'SkipRows' in kwargs.keys(): ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',', skiprows=kwargs['SkipRows']) else: ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',') else: #assume it's a tab (or other whitespace) delimited text file if 'SkipRows' in kwargs.keys(): ds = tabular.TextfileSource(filename, kwargs['FieldNames'], skiprows=kwargs['SkipRows']) else: ds = tabular.TextfileSource(filename, kwargs['FieldNames']) return ds
def loadInput(self, filename, key='input'): """Load input data from a file and inject into namespace Currently only handles images (anything you can open in dh5view). TODO - extend to other types. """ #modify this to allow for different file types - currently only supports images from PYME.IO import unifiedIO import os extension = os.path.splitext(filename)[1] if extension in ['.h5r', '.h5', '.hdf']: import tables from PYME.IO import MetaDataHandler from PYME.IO import tabular with unifiedIO.local_or_temp_filename(filename) as fn: with tables.open_file(fn, mode='r') as h5f: #make sure our hdf file gets closed key_prefix = '' if key == 'input' else key + '_' try: mdh = MetaDataHandler.NestedClassMDHandler( MetaDataHandler.HDFMDHandler(h5f)) except tables.FileModeError: # Occurs if no metadata is found, since we opened the table in read-mode logger.warning( 'No metadata found, proceeding with empty metadata' ) mdh = MetaDataHandler.NestedClassMDHandler() for t in h5f.list_nodes('/'): # FIXME - The following isinstance tests are not very safe (and badly broken in some cases e.g. # PZF formatted image data, Image data which is not in an EArray, etc ...) # Note that EArray is only used for streaming data! # They should ideally be replaced with more comprehensive tests (potentially based on array or dataset # dimensionality and/or data type) - i.e. duck typing. Our strategy for images in HDF should probably # also be improved / clarified - can we use hdf attributes to hint at the data intent? How do we support # > 3D data? if isinstance(t, tables.VLArray): from PYME.IO.ragged import RaggedVLArray rag = RaggedVLArray( h5f, t.name, copy=True ) #force an in-memory copy so we can close the hdf file properly rag.mdh = mdh self.namespace[key_prefix + t.name] = rag elif isinstance(t, tables.table.Table): # pipe our table into h5r or hdf source depending on the extension tab = tabular.H5RSource( h5f, t.name ) if extension == '.h5r' else tabular.HDFSource( h5f, t.name) tab.mdh = mdh self.namespace[key_prefix + t.name] = tab elif isinstance(t, tables.EArray): # load using ImageStack._loadh5, which finds metdata im = ImageStack(filename=filename, haveGUI=False) # assume image is the main table in the file and give it the named key self.namespace[key] = im elif extension == '.csv': logger.error('loading .csv not supported yet') raise NotImplementedError elif extension in ['.xls', '.xlsx']: logger.error('loading .xls not supported yet') raise NotImplementedError else: self.namespace[key] = ImageStack(filename=filename, haveGUI=False)
def _inject_tables_from_hdf5(self, key, h5f, filename, extension): """ Search through hdf5 file nodes and add them to the recipe namespace Parameters ---------- key : str base key name for loaded file components, if key is not the default 'input', each file node will be loaded into recipe namespace with `key`_`node_name`. h5f : file open hdf5 file filename : str full filename extension : str file extension, used here mainly to toggle which PYME.IO.tabular source is used for table nodes. """ import tables from PYME.IO import MetaDataHandler, tabular key_prefix = '' if key == 'input' else key + '_' # Handle a 'MetaData' group as a special case # TODO - find/implement a more portable way of handling metadata in HDF (e.g. as .json in a blob) so that # non-python exporters have a chance of adding metadata if 'MetaData' in h5f.root: mdh = MetaDataHandler.NestedClassMDHandler( MetaDataHandler.HDFMDHandler(h5f)) else: logger.warning('No metadata found, proceeding with empty metadata') mdh = MetaDataHandler.NestedClassMDHandler() events = None # handle an 'Events' table as a special case (so that it can be attached to subsequently loaded tables) # FIXME - this relies on a special /reserved table name and format and could raise name collision issues # when importing 3rd party / generic HDF # FIXME - do we really want to attach events (which will not get propagated through recipe modules) if ('Events' in h5f.root): if 'EventName' in h5f.root.Events.description._v_names: # check that the event table is formatted as we expect if ('StartTime' in mdh.keys()): events = h5f.root.Events[:] else: logger.warning( 'Acquisition events found in .hdf, but no "StartTime" in metadata' ) else: logger.warning( 'Table called "Events" found in .hdf does not match the signature for acquisition events, ignoring' ) for t in h5f.list_nodes('/'): # FIXME - The following isinstance tests are not very safe (and badly broken in some cases e.g. # PZF formatted image data, Image data which is not in an EArray, etc ...) # Note that EArray is only used for streaming data! # They should ideally be replaced with more comprehensive tests (potentially based on array or dataset # dimensionality and/or data type) - i.e. duck typing. Our strategy for images in HDF should probably # also be improved / clarified - can we use hdf attributes to hint at the data intent? How do we support # > 3D data? if getattr(t, 'name', None) == 'Events': # NB: This assumes we've handled this in the special case earlier, and blocks anything in a 3rd party # HDF events table from being seen. # TODO - do we really want to have so much special case stuff in our generic hdf handling? Are we sure # that events shouldn't be injected into the namespace (given that events do not propagate through recipe modules)? continue elif isinstance(t, tables.VLArray): from PYME.IO.ragged import RaggedVLArray rag = RaggedVLArray( h5f, t.name, copy=True ) #force an in-memory copy so we can close the hdf file properly rag.mdh = mdh if events is not None: rag.events = events self.namespace[key_prefix + t.name] = rag elif isinstance(t, tables.table.Table): # pipe our table into h5r or hdf source depending on the extension tab = tabular.H5RSource( h5f, t.name) if extension == '.h5r' else tabular.HDFSource( h5f, t.name) tab.mdh = mdh if events is not None: tab.events = events self.namespace[key_prefix + t.name] = tab elif isinstance(t, tables.EArray): # load using ImageStack._loadh5 # FIXME - ._loadh5 will load events lazily, which isn't great if we got here after # sending file over clusterIO inside of a context manager -> force it through since we already found it im = ImageStack(filename=filename, mdh=mdh, events=events, haveGUI=False) # assume image is the main table in the file and give it the named key self.namespace[key] = im
def generateThumbnail(inputFile, thumbSize): f1 = tabular.H5RSource(inputFile) threeD = False stack = False split = False #print f1.keys() if 'fitResults_Ag' in f1.keys(): #if we used the splitter set up a mapping so we can filter on total amplitude and ratio f1_ = tabular.MappingFilter(f1, A='fitResults_Ag + fitResults_Ar', gFrac='fitResults_Ag/(fitResults_Ag + fitResults_Ar)') #f2 = inpFilt.resultsFilter(f1_, error_x=[0,30], A=[5, 1e5], sig=[100/2.35, 350/2.35]) split = True else: f1_ = f1 if 'fitResults_sigma' in f1.keys(): f2 = tabular.ResultsFilter(f1_, error_x=[0, 30], A=[5, 1e5], sig=[100 / 2.35, 350 / 2.35]) else: f2 = tabular.ResultsFilter(f1_, error_x=[0, 30], A=[5, 1e5]) if 'fitResults_z0' in f1_.keys(): threeD = True if 'Events' in dir(f1.h5f.root): events = f1.h5f.root.Events[:] evKeyNames = set() for e in events: evKeyNames.add(e['EventName']) if b'ProtocolFocus' in evKeyNames: stack = True xmax = f2['x'].max() ymax = f2['y'].max() if xmax > ymax: step = xmax/thumbSize else: step = ymax/thumbSize im, edx, edy = histogram2d(f2['x'], f2['y'], [arange(0, xmax, step), arange(0, ymax, step)]) f1.close() im = minimum(2*(255*im)/im.max(), 255).T im = concatenate((im[:,:,newaxis], im[:,:,newaxis], im[:,:,newaxis]), 2) if stack: im[-10:, -10:, 0] = 180 if threeD: im[-10:, -10:, 1] = 180 if split: im[-10:-5, :10, 1] = 210 im[-5:, :10, 0] = 210 return im.astype('uint8')
def OpenFile(self, filename): self.dataSources = [] if 'zm' in dir(self): del self.zm self.filter = None self.mapping = None self.colourFilter = None self.filename = filename self.selectedDataSource = inpFilt.H5RSource(filename) self.dataSources.append(self.selectedDataSource) self.mdh = MetaDataHandler.HDFMDHandler(self.selectedDataSource.h5f) if 'Camera.ROIWidth' in self.mdh.getEntryNames(): x0 = 0 y0 = 0 x1 = self.mdh.getEntry( 'Camera.ROIWidth') * 1e3 * self.mdh.getEntry('voxelsize.x') y1 = self.mdh.getEntry( 'Camera.ROIHeight') * 1e3 * self.mdh.getEntry('voxelsize.y') if 'Splitter' in self.mdh.getEntry('Analysis.FitModule'): y1 = y1 / 2 self.imageBounds = ImageBounds(x0, y0, x1, y1) else: self.imageBounds = ImageBounds.estimateFromSource( self.selectedDataSource) if 'fitResults_Ag' in self.selectedDataSource.keys(): #if we used the splitter set up a mapping so we can filter on total amplitude and ratio #if not 'fitError_Ag' in self.selectedDataSource.keys(): if 'fitError_Ag' in self.selectedDataSource.keys(): self.selectedDataSource = inpFilt.MappingFilter( self.selectedDataSource, A='fitResults_Ag + fitResults_Ar', gFrac='fitResults_Ag/(fitResults_Ag + fitResults_Ar)', error_gFrac= 'sqrt((fitError_Ag/fitResults_Ag)**2 + (fitError_Ag**2 + fitError_Ar**2)/(fitResults_Ag + fitResults_Ar)**2)*fitResults_Ag/(fitResults_Ag + fitResults_Ar)' ) sg = self.selectedDataSource['fitError_Ag'] sr = self.selectedDataSource['fitError_Ar'] g = self.selectedDataSource['fitResults_Ag'] r = self.selectedDataSource['fitResults_Ar'] I = self.selectedDataSource['A'] self.selectedDataSource.colNorm = np.sqrt( 2 * np.pi) * sg * sr / (2 * np.sqrt(sg**2 + sr**2) * I) * ( scipy.special.erf( (sg**2 * r + sr**2 * (I - g)) / (np.sqrt(2) * sg * sr * np.sqrt(sg**2 + sr**2))) - scipy.special.erf( (sg**2 * (r - I) - sr**2 * g) / (np.sqrt(2) * sg * sr * np.sqrt(sg**2 + sr**2)))) self.selectedDataSource.setMapping('ColourNorm', '1.0*colNorm') else: self.selectedDataSource = inpFilt.MappingFilter( self.selectedDataSource, A='fitResults_Ag + fitResults_Ar', gFrac='fitResults_Ag/(fitResults_Ag + fitResults_Ar)', error_gFrac='0*x + 0.01') self.selectedDataSource.setMapping('fitError_Ag', '1*sqrt(fitResults_Ag/1)') self.selectedDataSource.setMapping('fitError_Ar', '1*sqrt(fitResults_Ar/1)') sg = self.selectedDataSource['fitError_Ag'] sr = self.selectedDataSource['fitError_Ar'] g = self.selectedDataSource['fitResults_Ag'] r = self.selectedDataSource['fitResults_Ar'] I = self.selectedDataSource['A'] self.selectedDataSource.colNorm = np.sqrt( 2 * np.pi) * sg * sr / (2 * np.sqrt(sg**2 + sr**2) * I) * ( scipy.special.erf( (sg**2 * r + sr**2 * (I - g)) / (np.sqrt(2) * sg * sr * np.sqrt(sg**2 + sr**2))) - scipy.special.erf( (sg**2 * (r - I) - sr**2 * g) / (np.sqrt(2) * sg * sr * np.sqrt(sg**2 + sr**2)))) self.selectedDataSource.setMapping('ColourNorm', '1.0*colNorm') self.dataSources.append(self.selectedDataSource) elif 'fitResults_sigxl' in self.selectedDataSource.keys(): self.selectedDataSource = inpFilt.MappingFilter( self.selectedDataSource) self.dataSources.append(self.selectedDataSource) self.selectedDataSource.setMapping( 'sig', 'fitResults_sigxl + fitResults_sigyu') self.selectedDataSource.setMapping( 'sig_d', 'fitResults_sigxl - fitResults_sigyu') self.selectedDataSource.dsigd_dz = -30. self.selectedDataSource.setMapping('fitResults_z0', 'dsigd_dz*sig_d') else: self.selectedDataSource = inpFilt.MappingFilter( self.selectedDataSource) self.dataSources.append(self.selectedDataSource) if 'Events' in self.selectedDataSource.resultsSource.h5f.root: self.events = self.selectedDataSource.resultsSource.h5f.root.Events[:] evKeyNames = set() for e in self.events: evKeyNames.add(e['EventName']) if b'ProtocolFocus' in evKeyNames: self.zm = piecewiseMapping.GeneratePMFromEventList( self.events, self.mdh, self.mdh.getEntry('StartTime'), self.mdh.getEntry('Protocol.PiezoStartPos')) self.z_focus = 1.e3 * self.zm(self.selectedDataSource['t']) #self.elv.SetCharts([('Focus [um]', self.zm, 'ProtocolFocus'),]) self.selectedDataSource.z_focus = self.z_focus self.selectedDataSource.setMapping('focus', 'z_focus') if 'ScannerXPos' in evKeyNames: x0 = 0 if 'Positioning.Stage_X' in self.mdh.getEntryNames(): x0 = self.mdh.getEntry('Positioning.Stage_X') self.xm = piecewiseMapping.GeneratePMFromEventList( self.elv.eventSource, self.mdh, self.mdh.getEntry('StartTime'), x0, 'ScannerXPos', 0) self.selectedDataSource.scan_x = 1.e3 * self.xm( self.selectedDataSource['t'] - .01) self.selectedDataSource.setMapping('ScannerX', 'scan_x') self.selectedDataSource.setMapping('x', 'x + scan_x') if 'ScannerYPos' in evKeyNames: y0 = 0 if 'Positioning.Stage_Y' in self.mdh.getEntryNames(): y0 = self.mdh.getEntry('Positioning.Stage_Y') self.ym = piecewiseMapping.GeneratePMFromEventList( self.elv.eventSource, self.mdh, self.mdh.getEntry('StartTime'), y0, 'ScannerYPos', 0) self.selectedDataSource.scan_y = 1.e3 * self.ym( self.selectedDataSource['t'] - .01) self.selectedDataSource.setMapping('ScannerY', 'scan_y') self.selectedDataSource.setMapping('y', 'y + scan_y') if 'ScannerXPos' in evKeyNames or 'ScannerYPos' in evKeyNames: self.imageBounds = ImageBounds.estimateFromSource( self.selectedDataSource) if not 'foreShort' in dir(self.selectedDataSource): self.selectedDataSource.foreShort = 1. if not 'focus' in self.selectedDataSource.mappings.keys(): self.selectedDataSource.focus = np.zeros( self.selectedDataSource['x'].shape) if 'fitResults_z0' in self.selectedDataSource.keys(): self.selectedDataSource.setMapping( 'z', 'fitResults_z0 + foreShort*focus') else: self.selectedDataSource.setMapping('z', 'foreShort*focus') #if we've done a 3d fit #print self.selectedDataSource.keys() for k in self.filterKeys.keys(): if not k in self.selectedDataSource.keys(): self.filterKeys.pop(k) #print self.filterKeys self.RegenFilter() if 'Sample.Labelling' in self.mdh.getEntryNames(): self.SpecFromMetadata()