def _ds_from_file(self, filename, **kwargs): """ loads a data set from a file Parameters ---------- filename : str kwargs : any additional arguments (see OpenFile) Returns ------- ds : tabular.TabularBase the datasource, complete with metadatahandler and events if found. """ mdh = MetaDataHandler.NestedClassMDHandler() events = None if os.path.splitext(filename)[1] == '.h5r': import tables h5f = tables.open_file(filename) self.filesToClose.append(h5f) try: ds = tabular.H5RSource(h5f) if 'DriftResults' in h5f.root: driftDS = tabular.H5RDSource(h5f) self.driftInputMapping = tabular.MappingFilter(driftDS) #self.dataSources['Fiducials'] = self.driftInputMapping self.addDataSource('Fiducials', self.driftInputMapping) if len(ds['x']) == 0: self.selectDataSource('Fiducials') except: #fallback to catch series that only have drift data logger.exception('No fitResults table found') ds = tabular.H5RDSource(h5f) self.driftInputMapping = tabular.MappingFilter(ds) #self.dataSources['Fiducials'] = self.driftInputMapping self.addDataSource('Fiducials', self.driftInputMapping) #self.selectDataSource('Fiducials') # really old files might not have metadata, so test for it before assuming if 'MetaData' in h5f.root: mdh = MetaDataHandler.HDFMDHandler(h5f) if ('Events' in h5f.root) and ('StartTime' in mdh.keys()): events = h5f.root.Events[:] elif filename.endswith('.hdf'): #recipe output - handles generically formatted .h5 import tables h5f = tables.open_file(filename) self.filesToClose.append(h5f) #defer our IO to the recipe IO method - TODO - do this for other file types as well self.recipe._inject_tables_from_hdf5('', h5f, filename, '.hdf') for dsname, ds_ in self.dataSources.items(): #loop through tables until we get one which defines x. If no table defines x, take the last table to be added #TODO make this logic better. ds = ds_ if 'x' in ds.keys(): # TODO - get rid of some of the grossness here mdh = getattr(ds, 'mdh', mdh) events = getattr(ds, 'events', events) break elif os.path.splitext(filename)[1] == '.mat': #matlab file if 'VarName' in kwargs.keys(): #old style matlab import ds = tabular.MatfileSource(filename, kwargs['FieldNames'], kwargs['VarName']) else: if kwargs.get('Multichannel', False): ds = tabular.MatfileMultiColumnSource(filename) else: ds = tabular.MatfileColumnSource(filename) # check for column name mapping field_names = kwargs.get('FieldNames', None) if field_names: if kwargs.get('Multichannel', False): field_names.append( 'probe') # don't forget to copy this field over ds = tabular.MappingFilter( ds, **{ new_field: old_field for new_field, old_field in zip( field_names, ds.keys()) }) elif os.path.splitext(filename)[1] == '.csv': #special case for csv files - tell np.loadtxt to use a comma rather than whitespace as a delimeter if 'SkipRows' in kwargs.keys(): ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',', skiprows=kwargs['SkipRows']) else: ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',') else: #assume it's a tab (or other whitespace) delimited text file if 'SkipRows' in kwargs.keys(): ds = tabular.TextfileSource(filename, kwargs['FieldNames'], skiprows=kwargs['SkipRows']) else: ds = tabular.TextfileSource(filename, kwargs['FieldNames']) # make sure mdh is writable (file-based might not be) ds.mdh = MetaDataHandler.NestedClassMDHandler(mdToCopy=mdh) if events is not None: # only set the .events attribute if we actually have events. # ensure that events are sorted in increasing time order ds.events = events[np.argsort(events['Time'])] return ds
def _ds_from_file(self, filename, **kwargs): """ loads a data set from a file Parameters ---------- filename : str kwargs : any additional arguments (see OpenFile) Returns ------- ds : the dataset """ if os.path.splitext(filename)[1] == '.h5r': import tables h5f = tables.open_file(filename) self.filesToClose.append(h5f) try: ds = tabular.H5RSource(h5f) if 'DriftResults' in h5f.root: driftDS = tabular.H5RDSource(h5f) self.driftInputMapping = tabular.MappingFilter(driftDS) #self.dataSources['Fiducials'] = self.driftInputMapping self.addDataSource('Fiducials', self.driftInputMapping) if len(ds['x']) == 0: self.selectDataSource('Fiducials') except: #fallback to catch series that only have drift data logger.exception('No fitResults table found') ds = tabular.H5RDSource(h5f) self.driftInputMapping = tabular.MappingFilter(ds) #self.dataSources['Fiducials'] = self.driftInputMapping self.addDataSource('Fiducials', self.driftInputMapping) #self.selectDataSource('Fiducials') #catch really old files which don't have any metadata if 'MetaData' in h5f.root: self.mdh.copyEntriesFrom(MetaDataHandler.HDFMDHandler(h5f)) if ('Events' in h5f.root) and ('StartTime' in self.mdh.keys()): self.events = h5f.root.Events[:] elif filename.endswith('.hdf'): #recipe output - handles generically formatted .h5 import tables h5f = tables.open_file(filename) self.filesToClose.append(h5f) for t in h5f.list_nodes('/'): if isinstance(t, tables.table.Table): tab = tabular.HDFSource(h5f, t.name) self.addDataSource(t.name, tab) if 'EventName' in t.description._v_names: #FIXME - we shouldn't have a special case here self.events = t[:] # this does not handle multiple events tables per hdf file if 'MetaData' in h5f.root: self.mdh.copyEntriesFrom(MetaDataHandler.HDFMDHandler(h5f)) for dsname, ds_ in self.dataSources.items(): #loop through tables until we get one which defines x. If no table defines x, take the last table to be added #TODO make this logic better. ds = ds_.resultsSource if 'x' in ds.keys(): break elif os.path.splitext(filename)[1] == '.mat': #matlab file if 'VarName' in kwargs.keys(): #old style matlab import ds = tabular.MatfileSource(filename, kwargs['FieldNames'], kwargs['VarName']) else: ds = tabular.MatfileColumnSource(filename) elif os.path.splitext(filename)[1] == '.csv': #special case for csv files - tell np.loadtxt to use a comma rather than whitespace as a delimeter if 'SkipRows' in kwargs.keys(): ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',', skiprows=kwargs['SkipRows']) else: ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',') else: #assume it's a tab (or other whitespace) delimited text file if 'SkipRows' in kwargs.keys(): ds = tabular.TextfileSource(filename, kwargs['FieldNames'], skiprows=kwargs['SkipRows']) else: ds = tabular.TextfileSource(filename, kwargs['FieldNames']) return ds