Example #1
0
    def _ds_from_file(self, filename, **kwargs):
        """
        loads a data set from a file

        Parameters
        ----------
        filename : str
        kwargs : any additional arguments (see OpenFile)

        Returns
        -------
        ds : tabular.TabularBase
            the datasource, complete with metadatahandler and events if found.

        """
        mdh = MetaDataHandler.NestedClassMDHandler()
        events = None
        if os.path.splitext(filename)[1] == '.h5r':
            import tables
            h5f = tables.open_file(filename)
            self.filesToClose.append(h5f)

            try:
                ds = tabular.H5RSource(h5f)

                if 'DriftResults' in h5f.root:
                    driftDS = tabular.H5RDSource(h5f)
                    self.driftInputMapping = tabular.MappingFilter(driftDS)
                    #self.dataSources['Fiducials'] = self.driftInputMapping
                    self.addDataSource('Fiducials', self.driftInputMapping)

                    if len(ds['x']) == 0:
                        self.selectDataSource('Fiducials')

            except:  #fallback to catch series that only have drift data
                logger.exception('No fitResults table found')
                ds = tabular.H5RDSource(h5f)

                self.driftInputMapping = tabular.MappingFilter(ds)
                #self.dataSources['Fiducials'] = self.driftInputMapping
                self.addDataSource('Fiducials', self.driftInputMapping)
                #self.selectDataSource('Fiducials')

            # really old files might not have metadata, so test for it before assuming
            if 'MetaData' in h5f.root:
                mdh = MetaDataHandler.HDFMDHandler(h5f)

            if ('Events' in h5f.root) and ('StartTime' in mdh.keys()):
                events = h5f.root.Events[:]

        elif filename.endswith('.hdf'):
            #recipe output - handles generically formatted .h5
            import tables
            h5f = tables.open_file(filename)
            self.filesToClose.append(h5f)

            #defer our IO to the recipe IO method - TODO - do this for other file types as well
            self.recipe._inject_tables_from_hdf5('', h5f, filename, '.hdf')

            for dsname, ds_ in self.dataSources.items():
                #loop through tables until we get one which defines x. If no table defines x, take the last table to be added
                #TODO make this logic better.
                ds = ds_
                if 'x' in ds.keys():
                    # TODO - get rid of some of the grossness here
                    mdh = getattr(ds, 'mdh', mdh)
                    events = getattr(ds, 'events', events)
                    break

        elif os.path.splitext(filename)[1] == '.mat':  #matlab file
            if 'VarName' in kwargs.keys():
                #old style matlab import
                ds = tabular.MatfileSource(filename, kwargs['FieldNames'],
                                           kwargs['VarName'])
            else:
                if kwargs.get('Multichannel', False):
                    ds = tabular.MatfileMultiColumnSource(filename)
                else:
                    ds = tabular.MatfileColumnSource(filename)

                # check for column name mapping
                field_names = kwargs.get('FieldNames', None)
                if field_names:
                    if kwargs.get('Multichannel', False):
                        field_names.append(
                            'probe')  # don't forget to copy this field over
                    ds = tabular.MappingFilter(
                        ds, **{
                            new_field: old_field
                            for new_field, old_field in zip(
                                field_names, ds.keys())
                        })

        elif os.path.splitext(filename)[1] == '.csv':
            #special case for csv files - tell np.loadtxt to use a comma rather than whitespace as a delimeter
            if 'SkipRows' in kwargs.keys():
                ds = tabular.TextfileSource(filename,
                                            kwargs['FieldNames'],
                                            delimiter=',',
                                            skiprows=kwargs['SkipRows'])
            else:
                ds = tabular.TextfileSource(filename,
                                            kwargs['FieldNames'],
                                            delimiter=',')

        else:  #assume it's a tab (or other whitespace) delimited text file
            if 'SkipRows' in kwargs.keys():
                ds = tabular.TextfileSource(filename,
                                            kwargs['FieldNames'],
                                            skiprows=kwargs['SkipRows'])
            else:
                ds = tabular.TextfileSource(filename, kwargs['FieldNames'])

        # make sure mdh is writable (file-based might not be)
        ds.mdh = MetaDataHandler.NestedClassMDHandler(mdToCopy=mdh)
        if events is not None:
            # only set the .events attribute if we actually have events.
            # ensure that events are sorted in increasing time order
            ds.events = events[np.argsort(events['Time'])]

        return ds
Example #2
0
    def _ds_from_file(self, filename, **kwargs):
        """
        loads a data set from a file

        Parameters
        ----------
        filename : str
        kwargs : any additional arguments (see OpenFile)

        Returns
        -------

        ds : the dataset

        """

        if os.path.splitext(filename)[1] == '.h5r':
            import tables
            h5f = tables.open_file(filename)
            self.filesToClose.append(h5f)
            
            try:
                ds = tabular.H5RSource(h5f)

                if 'DriftResults' in h5f.root:
                    driftDS = tabular.H5RDSource(h5f)
                    self.driftInputMapping = tabular.MappingFilter(driftDS)
                    #self.dataSources['Fiducials'] = self.driftInputMapping
                    self.addDataSource('Fiducials', self.driftInputMapping)

                    if len(ds['x']) == 0:
                        self.selectDataSource('Fiducials')

            except: #fallback to catch series that only have drift data
                logger.exception('No fitResults table found')
                ds = tabular.H5RDSource(h5f)

                self.driftInputMapping = tabular.MappingFilter(ds)
                #self.dataSources['Fiducials'] = self.driftInputMapping
                self.addDataSource('Fiducials', self.driftInputMapping)
                #self.selectDataSource('Fiducials')

            #catch really old files which don't have any metadata
            if 'MetaData' in h5f.root:
                self.mdh.copyEntriesFrom(MetaDataHandler.HDFMDHandler(h5f))

            if ('Events' in h5f.root) and ('StartTime' in self.mdh.keys()):
                self.events = h5f.root.Events[:]

        elif filename.endswith('.hdf'):
            #recipe output - handles generically formatted .h5
            import tables
            h5f = tables.open_file(filename)
            self.filesToClose.append(h5f)

            for t in h5f.list_nodes('/'):
                if isinstance(t, tables.table.Table):
                    tab = tabular.HDFSource(h5f, t.name)
                    self.addDataSource(t.name, tab)
                        
                    if 'EventName' in t.description._v_names: #FIXME - we shouldn't have a special case here
                        self.events = t[:]  # this does not handle multiple events tables per hdf file

            if 'MetaData' in h5f.root:
                self.mdh.copyEntriesFrom(MetaDataHandler.HDFMDHandler(h5f))

            for dsname, ds_ in self.dataSources.items():
                #loop through tables until we get one which defines x. If no table defines x, take the last table to be added
                #TODO make this logic better.
                ds = ds_.resultsSource
                if 'x' in ds.keys():
                    break

        elif os.path.splitext(filename)[1] == '.mat': #matlab file
            if 'VarName' in kwargs.keys():
                #old style matlab import
                ds = tabular.MatfileSource(filename, kwargs['FieldNames'], kwargs['VarName'])
            else:
                ds = tabular.MatfileColumnSource(filename)
                

        elif os.path.splitext(filename)[1] == '.csv':
            #special case for csv files - tell np.loadtxt to use a comma rather than whitespace as a delimeter
            if 'SkipRows' in kwargs.keys():
                ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',', skiprows=kwargs['SkipRows'])
            else:
                ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',')

        else: #assume it's a tab (or other whitespace) delimited text file
            if 'SkipRows' in kwargs.keys():
                ds = tabular.TextfileSource(filename, kwargs['FieldNames'], skiprows=kwargs['SkipRows'])
            else:
                ds = tabular.TextfileSource(filename, kwargs['FieldNames'])



        return ds