Python HDFSource Examples, PYME.IO.tabular.HDFSource Python Examples

Example #1

0

Show file

File: multiview.py Project: b3nroll1ns/python-microscopy

def load_shiftmap(uri):
    """
    helper function to handle I/O of two versions of shiftmaps. Note that HDF is prefered
    :param uri: str
        path or url to shiftmap-containing file (hdf, or [less ideal] json)
    :return: dict
        shiftmap
    """
    from PYME.IO import unifiedIO, tabular
    from PYME.IO.MetaDataHandler import HDFMDHandler
    import tables
    import json

    try:  # try loading shift map as hdf file
        with unifiedIO.local_or_temp_filename(uri) as f:
            t = tables.open_file(f)
            shift_map_source = tabular.HDFSource(t, 'shift_map')  # todo - is there a cleaner way to do this?
            shift_map_source.mdh = HDFMDHandler(t)

        # build dict of dicts so we can easily rebuild shiftfield objects in multiview.calc_shifts_for_points
        shift_map = {'shiftModel': shift_map_source.mdh['Multiview.shift_map.model']}
        legend = shift_map_source.mdh['Multiview.shift_map.legend']
        for l in legend.keys():
            keys = shift_map_source.keys()
            shift_map[l] = dict(zip(keys, [shift_map_source[k][legend[l]] for k in keys]))

        t.close()
    except tables.HDF5ExtError:  # file is probably saved as json (legacy)
        s = unifiedIO.read(uri)
        shift_map = json.loads(s)

    return shift_map

Example #2

0

Show file

File: actionUI.py Project: FA1984/python-microscopy

    def OnROIsFromFile(self, event):
        import wx
        from PYME.IO import tabular

        filename = wx.FileSelector("Load ROI Positions:",
                                   wildcard="*.hdf",
                                   flags=wx.FD_OPEN)
        if not filename == '':
            rois = tabular.HDFSource(filename, tablename='roi_locations')

            rois = [(x, y) for x, y in zip(rois['x_um'], rois['y_um'])]

            self._add_ROIs(rois)

Example #3

0

Show file

 def set_roi_locations(self, locations_file, tablename='roi_locations'):
     from PYME.IO import tabular
     
     print(locations_file)
     
     if locations_file.endswith('.hdf'):
         locs = tabular.HDFSource(locations_file, tablename=tablename)
         self.roi_locations = [Location(x, y) for x, y in zip(locs['x_um'], locs['y_um'])]
         locs.close()
         del(locs)
     # elif locations_file.endswith('.csv'):
     #     self.roi_locations = tabular.textfileSource(locations_file)
     raise cherrypy.HTTPRedirect('/roi_list')

Example #4

0

Show file

File: test_h5.py Project: barentine/python-microscopy

def test_h5r():
    data = tabular.ColumnSource(x=1e3*np.random.randn(1000), y=1e3*np.random.randn(1000), z=1e3*np.random.randn(1000))

    tempdir = tempfile.mkdtemp()
    filename = os.path.join(tempdir, 'test_hdf.hdf')

    try:
        data.to_hdf(filename, tablename='Data')
    
        inp = tabular.HDFSource(filename, tablename='Data')
    
        assert (np.allclose(data['x'], inp['x']))
    finally:
        shutil.rmtree(tempdir)

Example #5

0

Show file

    def execute(self, namespace):
        from PYME.Analysis.points import multiview
        from PYME.IO import unifiedIO
        from PYME.IO.MetaDataHandler import HDFMDHandler
        import tables
        import json

        inp = namespace[self.input_name]

        if 'mdh' not in dir(inp):
            raise RuntimeError('ShiftCorrect needs metadata')

        if self.shift_map_path == '':  # grab shftmap from the metadata
            loc = inp.mdh['Shiftmap']
        else:
            loc = self.shift_map_path

        try:  # try loading shift map as hdf file
            with unifiedIO.local_or_temp_filename(loc) as f:
                t = tables.open_file(f)
                shift_map_source = tabular.HDFSource(
                    t,
                    'shift_map')  # todo - is there a cleaner way to do this?
                shift_map_source.mdh = HDFMDHandler(t)

            # build dict of dicts so we can easily rebuild shiftfield objects in multiview.calc_shifts_for_points
            shift_map = {
                'shiftModel': shift_map_source.mdh['Multiview.shift_map.model']
            }
            legend = shift_map_source.mdh['Multiview.shift_map.legend']
            for l in legend.keys():
                keys = shift_map_source.keys()
                shift_map[l] = dict(
                    zip(keys, [shift_map_source[k][legend[l]] for k in keys]))

            t.close()
        except tables.HDF5ExtError:  # file is probably saved as json (legacy)
            s = unifiedIO.read(self.shift_map_path)
            shift_map = json.loads(s)

        mapped = tabular.MappingFilter(inp)

        multiview.apply_shifts_to_points(mapped, shift_map)
        # propagate metadata
        mapped.mdh = inp.mdh
        mapped.mdh['Multiview.shift_map.location'] = loc

        namespace[self.output_name] = mapped

Example #6

0

Show file

    def _ds_from_file(self, filename, **kwargs):
        """
        loads a data set from a file

        Parameters
        ----------
        filename : str
        kwargs : any additional arguments (see OpenFile)

        Returns
        -------

        ds : the dataset

        """

        if os.path.splitext(filename)[1] == '.h5r':
            import tables
            h5f = tables.open_file(filename)
            self.filesToClose.append(h5f)
            
            try:
                ds = tabular.H5RSource(h5f)

                if 'DriftResults' in h5f.root:
                    driftDS = tabular.H5RDSource(h5f)
                    self.driftInputMapping = tabular.MappingFilter(driftDS)
                    #self.dataSources['Fiducials'] = self.driftInputMapping
                    self.addDataSource('Fiducials', self.driftInputMapping)

                    if len(ds['x']) == 0:
                        self.selectDataSource('Fiducials')

            except: #fallback to catch series that only have drift data
                logger.exception('No fitResults table found')
                ds = tabular.H5RDSource(h5f)

                self.driftInputMapping = tabular.MappingFilter(ds)
                #self.dataSources['Fiducials'] = self.driftInputMapping
                self.addDataSource('Fiducials', self.driftInputMapping)
                #self.selectDataSource('Fiducials')

            #catch really old files which don't have any metadata
            if 'MetaData' in h5f.root:
                self.mdh.copyEntriesFrom(MetaDataHandler.HDFMDHandler(h5f))

            if ('Events' in h5f.root) and ('StartTime' in self.mdh.keys()):
                self.events = h5f.root.Events[:]

        elif filename.endswith('.hdf'):
            #recipe output - handles generically formatted .h5
            import tables
            h5f = tables.open_file(filename)
            self.filesToClose.append(h5f)

            for t in h5f.list_nodes('/'):
                if isinstance(t, tables.table.Table):
                    tab = tabular.HDFSource(h5f, t.name)
                    self.addDataSource(t.name, tab)
                        
                    if 'EventName' in t.description._v_names: #FIXME - we shouldn't have a special case here
                        self.events = t[:]  # this does not handle multiple events tables per hdf file

            if 'MetaData' in h5f.root:
                self.mdh.copyEntriesFrom(MetaDataHandler.HDFMDHandler(h5f))

            for dsname, ds_ in self.dataSources.items():
                #loop through tables until we get one which defines x. If no table defines x, take the last table to be added
                #TODO make this logic better.
                ds = ds_.resultsSource
                if 'x' in ds.keys():
                    break

        elif os.path.splitext(filename)[1] == '.mat': #matlab file
            if 'VarName' in kwargs.keys():
                #old style matlab import
                ds = tabular.MatfileSource(filename, kwargs['FieldNames'], kwargs['VarName'])
            else:
                ds = tabular.MatfileColumnSource(filename)
                

        elif os.path.splitext(filename)[1] == '.csv':
            #special case for csv files - tell np.loadtxt to use a comma rather than whitespace as a delimeter
            if 'SkipRows' in kwargs.keys():
                ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',', skiprows=kwargs['SkipRows'])
            else:
                ds = tabular.TextfileSource(filename, kwargs['FieldNames'], delimiter=',')

        else: #assume it's a tab (or other whitespace) delimited text file
            if 'SkipRows' in kwargs.keys():
                ds = tabular.TextfileSource(filename, kwargs['FieldNames'], skiprows=kwargs['SkipRows'])
            else:
                ds = tabular.TextfileSource(filename, kwargs['FieldNames'])



        return ds

Example #7

0

Show file

File: base.py Project: b3nroll1ns/python-microscopy

    def loadInput(self, filename, key='input'):
        """Load input data from a file and inject into namespace

        Currently only handles images (anything you can open in dh5view). TODO -
        extend to other types.
        """
        #modify this to allow for different file types - currently only supports images
        from PYME.IO import unifiedIO
        import os
        extension = os.path.splitext(filename)[1]
        if extension in ['.h5r', '.h5', '.hdf']:
            import tables
            from PYME.IO import MetaDataHandler
            from PYME.IO import tabular

            with unifiedIO.local_or_temp_filename(filename) as fn:
                with tables.open_file(fn, mode='r') as h5f:
                    #make sure our hdf file gets closed

                    key_prefix = '' if key == 'input' else key + '_'

                    try:
                        mdh = MetaDataHandler.NestedClassMDHandler(
                            MetaDataHandler.HDFMDHandler(h5f))
                    except tables.FileModeError:  # Occurs if no metadata is found, since we opened the table in read-mode
                        logger.warning(
                            'No metadata found, proceeding with empty metadata'
                        )
                        mdh = MetaDataHandler.NestedClassMDHandler()

                    for t in h5f.list_nodes('/'):
                        # FIXME - The following isinstance tests are not very safe (and badly broken in some cases e.g.
                        # PZF formatted image data, Image data which is not in an EArray, etc ...)
                        # Note that EArray is only used for streaming data!
                        # They should ideally be replaced with more comprehensive tests (potentially based on array or dataset
                        # dimensionality and/or data type) - i.e. duck typing. Our strategy for images in HDF should probably
                        # also be improved / clarified - can we use hdf attributes to hint at the data intent? How do we support
                        # > 3D data?

                        if isinstance(t, tables.VLArray):
                            from PYME.IO.ragged import RaggedVLArray

                            rag = RaggedVLArray(
                                h5f, t.name, copy=True
                            )  #force an in-memory copy so we can close the hdf file properly
                            rag.mdh = mdh

                            self.namespace[key_prefix + t.name] = rag

                        elif isinstance(t, tables.table.Table):
                            #  pipe our table into h5r or hdf source depending on the extension
                            tab = tabular.H5RSource(
                                h5f, t.name
                            ) if extension == '.h5r' else tabular.HDFSource(
                                h5f, t.name)
                            tab.mdh = mdh

                            self.namespace[key_prefix + t.name] = tab

                        elif isinstance(t, tables.EArray):
                            # load using ImageStack._loadh5, which finds metdata
                            im = ImageStack(filename=filename, haveGUI=False)
                            # assume image is the main table in the file and give it the named key
                            self.namespace[key] = im

        elif extension == '.csv':
            logger.error('loading .csv not supported yet')
            raise NotImplementedError
        elif extension in ['.xls', '.xlsx']:
            logger.error('loading .xls not supported yet')
            raise NotImplementedError
        else:
            self.namespace[key] = ImageStack(filename=filename, haveGUI=False)

Example #8

0

Show file

    def _inject_tables_from_hdf5(self, key, h5f, filename, extension):
        """
        Search through hdf5 file nodes and add them to the recipe namespace

        Parameters
        ----------
        key : str
            base key name for loaded file components, if key is not the default 'input', each file node will be loaded into
            recipe namespace with `key`_`node_name`.
        h5f : file
            open hdf5 file
        filename : str
            full filename
        extension : str
            file extension, used here mainly to toggle which PYME.IO.tabular source is used for table nodes.
        """
        import tables
        from PYME.IO import MetaDataHandler, tabular

        key_prefix = '' if key == 'input' else key + '_'

        # Handle a 'MetaData' group as a special case
        # TODO - find/implement a more portable way of handling metadata in HDF (e.g. as .json in a blob) so that
        # non-python exporters have a chance of adding metadata
        if 'MetaData' in h5f.root:
            mdh = MetaDataHandler.NestedClassMDHandler(
                MetaDataHandler.HDFMDHandler(h5f))
        else:
            logger.warning('No metadata found, proceeding with empty metadata')
            mdh = MetaDataHandler.NestedClassMDHandler()

        events = None
        # handle an 'Events' table as a special case (so that it can be attached to subsequently loaded tables)
        # FIXME - this relies on a special /reserved table name and format and could raise name collision issues
        # when importing 3rd party / generic HDF
        # FIXME - do we really want to attach events (which will not get propagated through recipe modules)
        if ('Events' in h5f.root):
            if 'EventName' in h5f.root.Events.description._v_names:
                # check that the event table is formatted as we expect
                if ('StartTime' in mdh.keys()):
                    events = h5f.root.Events[:]
                else:
                    logger.warning(
                        'Acquisition events found in .hdf, but no "StartTime" in metadata'
                    )
            else:
                logger.warning(
                    'Table called "Events" found in .hdf does not match the signature for acquisition events, ignoring'
                )

        for t in h5f.list_nodes('/'):
            # FIXME - The following isinstance tests are not very safe (and badly broken in some cases e.g.
            # PZF formatted image data, Image data which is not in an EArray, etc ...)
            # Note that EArray is only used for streaming data!
            # They should ideally be replaced with more comprehensive tests (potentially based on array or dataset
            # dimensionality and/or data type) - i.e. duck typing. Our strategy for images in HDF should probably
            # also be improved / clarified - can we use hdf attributes to hint at the data intent? How do we support
            # > 3D data?

            if getattr(t, 'name', None) == 'Events':
                # NB: This assumes we've handled this in the special case earlier, and blocks anything in a 3rd party
                # HDF events table from being seen.
                # TODO - do we really want to have so much special case stuff in our generic hdf handling? Are we sure
                # that events shouldn't be injected into the namespace (given that events do not propagate through recipe modules)?
                continue

            elif isinstance(t, tables.VLArray):
                from PYME.IO.ragged import RaggedVLArray

                rag = RaggedVLArray(
                    h5f, t.name, copy=True
                )  #force an in-memory copy so we can close the hdf file properly
                rag.mdh = mdh
                if events is not None:
                    rag.events = events

                self.namespace[key_prefix + t.name] = rag

            elif isinstance(t, tables.table.Table):
                #  pipe our table into h5r or hdf source depending on the extension
                tab = tabular.H5RSource(
                    h5f, t.name) if extension == '.h5r' else tabular.HDFSource(
                        h5f, t.name)
                tab.mdh = mdh
                if events is not None:
                    tab.events = events

                self.namespace[key_prefix + t.name] = tab

            elif isinstance(t, tables.EArray):
                # load using ImageStack._loadh5
                # FIXME - ._loadh5 will load events lazily, which isn't great if we got here after
                # sending file over clusterIO inside of a context manager -> force it through since we already found it
                im = ImageStack(filename=filename,
                                mdh=mdh,
                                events=events,
                                haveGUI=False)
                # assume image is the main table in the file and give it the named key
                self.namespace[key] = im