예제 #1
0
    def analysis_refresh_bruteforce(self):
        """Refresh points display from cluster-based analysis. This works in the crudest possible way by simply repeatedly
        re-reading the results file. This should also work for old style analysis, with the same caveats (below) about IO.
        
        WARNING: This has potential performance implications as there wll be significant IO overhead to doing it this way!!!
                 Files should be closed when spooling is complete.
        
        TODO: - Allow incremental update (as is done in refresh_analysis above). Will probably need modification of PYMEDataServer.
              - Stop updates when spooling is complete (HOW? Check file size?)
              - What is the effect of the caches in clusterIO? Do we actually get the updated file?
              - Make sure this only gets called at a reasonable rate.
              - Add logic to call this (and make sure that self.results_filename is defined)
        
        """
        from PYME.IO import unifiedIO

        try:
            with unifiedIO.local_or_temp_filename(
                    self.results_filename
            ) as fn:  #download a copy of the file if needed, so that we can pass pytables a local filename
                self.dsviewer.pipeline.OpenFile(fn)
                self.numEvents = len(
                    self.dsviewer.pipeline.selectedDataSoure['x'])

                #populate the fitResults member TODO - is this actually needed?
                self.fitResults = self.dsviewer.pipeline.selectedDataSoure.resultsSource.fitResults  #FIXME - this is really fragile!

        except IOError:
            pass

        self.progPan.draw()
        self.progPan.Refresh()
        self.dsviewer.Refresh()
        self.dsviewer.update()
def test_mulithread_result_filing():
    import numpy as np
    from PYME.IO import clusterResults, unifiedIO
    import tables
    import posixpath
    import threading
    
    n_filings = 500
    n_per = np.random.randint(0, 100, n_filings)
    data = [np.ones(n_per[ind], dtype=[('a', '<f4'), ('b', '<f4')]) for ind in range(n_filings)]
    dest = 'pyme-cluster://TES1/__aggregate_h5r/_testing/test_result_filing.h5r'

    threads = []
    for ind in range(n_filings):
        t = threading.Thread(target=clusterResults.fileResults, 
                             args=(posixpath.join(dest, 'foo'), data[ind]))
        t.start()
        threads.append(t)
    
    [t.join() for t in threads]

    time.sleep(5)

    with unifiedIO.local_or_temp_filename('pyme-cluster://TES1/_testing/test_result_filing.h5r') as f,\
        tables.open_file(f) as t:
        n_received = len(t.root.foo)
    
    assert n_received == np.sum(n_per)
예제 #3
0
 def _loadClassifier(self):
     from PYME.Analysis import svmSegment
     if not (('_cf' in dir(self)) and
             (self._classifier == self.classifier)):
         self._classifier = self.classifier
         with unifiedIO.local_or_temp_filename(self.classifier) as fn:
             self._cf = svmSegment.svmClassifier(filename=fn)
def test_mulithread_result_filing():
    # FIXME - this test is expected to fail as files should be created before multi-threaded aggregate operations
    # with enough of a delay between creation and access to ensure that the file is present in directory caches.
    import numpy as np
    from PYME.IO import clusterResults, unifiedIO
    import tables
    import posixpath
    import threading
    
    n_filings = 500
    n_per = np.random.randint(0, 100, n_filings)
    data = [np.ones(n_per[ind], dtype=[('a', '<f4'), ('b', '<f4')]) for ind in range(n_filings)]
    dest = 'pyme-cluster://TES1/__aggregate_h5r/_testing/test_result_filing.h5r'

    threads = []
    for ind in range(n_filings):
        t = threading.Thread(target=clusterResults.fileResults, 
                             args=(posixpath.join(dest, 'foo'), data[ind]))
        t.start()
        threads.append(t)
    
    [t.join() for t in threads]

    time.sleep(5)

    with unifiedIO.local_or_temp_filename('pyme-cluster://TES1/_testing/test_result_filing.h5r') as f,\
        tables.open_file(f) as t:
        n_received = len(t.root.foo)
    
    assert n_received == np.sum(n_per)
예제 #5
0
def load_shiftmap(uri):
    """
    helper function to handle I/O of two versions of shiftmaps. Note that HDF is prefered
    :param uri: str
        path or url to shiftmap-containing file (hdf, or [less ideal] json)
    :return: dict
        shiftmap
    """
    from PYME.IO import unifiedIO, tabular
    from PYME.IO.MetaDataHandler import HDFMDHandler
    import tables
    import json

    try:  # try loading shift map as hdf file
        with unifiedIO.local_or_temp_filename(uri) as f:
            t = tables.open_file(f)
            shift_map_source = tabular.HDFSource(t, 'shift_map')  # todo - is there a cleaner way to do this?
            shift_map_source.mdh = HDFMDHandler(t)

        # build dict of dicts so we can easily rebuild shiftfield objects in multiview.calc_shifts_for_points
        shift_map = {'shiftModel': shift_map_source.mdh['Multiview.shift_map.model']}
        legend = shift_map_source.mdh['Multiview.shift_map.legend']
        for l in legend.keys():
            keys = shift_map_source.keys()
            shift_map[l] = dict(zip(keys, [shift_map_source[k][legend[l]] for k in keys]))

        t.close()
    except tables.HDF5ExtError:  # file is probably saved as json (legacy)
        s = unifiedIO.read(uri)
        shift_map = json.loads(s)

    return shift_map
예제 #6
0
    def loadInput(self, filename, key='input'):
        """
        Load input data from a file and inject into namespace
        """
        from PYME.IO import unifiedIO
        import os

        extension = os.path.splitext(filename)[1]
        if extension in ['.h5r', '.hdf']:
            import tables
            from PYME.IO import h5rFile
            try:
                with unifiedIO.local_or_temp_filename(
                        filename) as fn, h5rFile.openH5R(
                            fn, mode='r')._h5file as h5f:
                    self._inject_tables_from_hdf5(key, h5f, fn, extension)
            except tables.exceptions.HDF5ExtError:  # access issue likely due to multiple processes
                if unifiedIO.is_cluster_uri(filename):
                    # try again, this time forcing access through the dataserver
                    # NOTE: it is unclear why this should work when local_or_temp_filename() doesn't
                    # as this still opens / copies the file independently, albeit in the same process as is doing the writing.
                    # The fact that this works is relying on one of a quirk of the GIL, a quirk in HDF5 locking, or the fact
                    # that copying the file to a stream is much faster than opening it with pytables. The copy vs pytables open
                    # scenario would match what has been observed with old style spooling analysis where copying a file
                    # prior to opening in VisGUI would work more reliably than opening directly. This retains, however,
                    # an inherent race condition so we risk replacing a predictable failure with a less frequent one.
                    # TODO - consider whether h5r_part might be a better choice.
                    # FIXME: (DB) I'm not comfortable with having this kind of special case retry logic here, and would
                    # much prefer if we could find an alternative workaround, refactor into something like h5rFile.open_robust(),
                    # or just let this fail). Leaving it for the meantime to get chained recipes working, but we should revisit.
                    from PYME.IO import clusterIO
                    relative_filename, server_filter = unifiedIO.split_cluster_url(
                        filename)
                    file_as_bytes = clusterIO.get_file(
                        relative_filename,
                        serverfilter=server_filter,
                        local_short_circuit=False)
                    with tables.open_file('in-memory.h5',
                                          driver='H5FD_CORE',
                                          driver_core_image=file_as_bytes,
                                          driver_core_backing_store=0) as h5f:
                        self._inject_tables_from_hdf5(key, h5f, filename,
                                                      extension)
                else:
                    #not a cluster file, doesn't make sense to retry with cluster. Propagate exception to user.
                    raise

        elif extension == '.csv':
            logger.error('loading .csv not supported yet')
            raise NotImplementedError
        elif extension in ['.xls', '.xlsx']:
            logger.error('loading .xls not supported yet')
            raise NotImplementedError
        else:
            self.namespace[key] = ImageStack(filename=filename, haveGUI=False)
예제 #7
0
def list_h5(filename):
    import tables
    from PYME.IO import MetaDataHandler
    from PYME.IO import tabular
    from PYME.IO import unifiedIO
    import json

    with unifiedIO.local_or_temp_filename(filename) as fn:
        with tables.open_file(fn, mode='r') as h5f:
            #make sure our hdf file gets closed

            try:
                mdh = MetaDataHandler.NestedClassMDHandler(
                    MetaDataHandler.HDFMDHandler(h5f))
                print('Metadata:\n____________')
                print(repr(mdh))
            except tables.FileModeError:  # Occurs if no metadata is found, since we opened the table in read-mode
                logger.warning(
                    'No metadata found, proceeding with empty metadata')
                mdh = MetaDataHandler.NestedClassMDHandler()

            print('\n\n')

            for t in h5f.list_nodes('/'):
                # FIXME - The following isinstance tests are not very safe (and badly broken in some cases e.g.
                # PZF formatted image data, Image data which is not in an EArray, etc ...)
                # Note that EArray is only used for streaming data!
                # They should ideally be replaced with more comprehensive tests (potentially based on array or dataset
                # dimensionality and/or data type) - i.e. duck typing. Our strategy for images in HDF should probably
                # also be improved / clarified - can we use hdf attributes to hint at the data intent? How do we support
                # > 3D data?

                if not isinstance(t, tables.Group):
                    print(t.name)
                    print('______________')

                    if isinstance(t, tables.VLArray):
                        data = h5f.get_node(h5f.root, t.name)
                        print('Ragged (VLArray) with %d rows' % len(data))
                        print('Row 0: %s' % data)

                    elif isinstance(t, tables.table.Table):
                        #  pipe our table into h5r or hdf source depending on the extension
                        data = h5f.get_node(h5f.root, t.name)

                        print('Table with %d rows\n dtype = %s' %
                              (len(data), data[0].dtype))

                    elif isinstance(t, tables.EArray):
                        data = h5f.get_node(h5f.root, t.name)

                        print('Image, shape = %s' % data.shape)

                    print('\n\n')
예제 #8
0
    def _loadNPY(self, filename):
        """Load numpy .npy data.
        
       
        """
        from PYME.IO import unifiedIO
        mdfn = self._findAndParseMetadata(filename)

        with unifiedIO.local_or_temp_filename(filename) as fn:
            self.data = numpy.load(fn)

        #from PYME.ParallelTasks.relativeFiles import getRelFilename
        self.seriesName = getRelFilename(filename)

        self.mode = 'default'
예제 #9
0
    def execute(self, namespace):
        from PYME.Analysis.points import multiview
        from PYME.IO import unifiedIO
        from PYME.IO.MetaDataHandler import HDFMDHandler
        import tables
        import json

        inp = namespace[self.input_name]

        if 'mdh' not in dir(inp):
            raise RuntimeError('ShiftCorrect needs metadata')

        if self.shift_map_path == '':  # grab shftmap from the metadata
            loc = inp.mdh['Shiftmap']
        else:
            loc = self.shift_map_path

        try:  # try loading shift map as hdf file
            with unifiedIO.local_or_temp_filename(loc) as f:
                t = tables.open_file(f)
                shift_map_source = tabular.HDFSource(
                    t,
                    'shift_map')  # todo - is there a cleaner way to do this?
                shift_map_source.mdh = HDFMDHandler(t)

            # build dict of dicts so we can easily rebuild shiftfield objects in multiview.calc_shifts_for_points
            shift_map = {
                'shiftModel': shift_map_source.mdh['Multiview.shift_map.model']
            }
            legend = shift_map_source.mdh['Multiview.shift_map.legend']
            for l in legend.keys():
                keys = shift_map_source.keys()
                shift_map[l] = dict(
                    zip(keys, [shift_map_source[k][legend[l]] for k in keys]))

            t.close()
        except tables.HDF5ExtError:  # file is probably saved as json (legacy)
            s = unifiedIO.read(self.shift_map_path)
            shift_map = json.loads(s)

        mapped = tabular.MappingFilter(inp)

        multiview.apply_shifts_to_points(mapped, shift_map)
        # propagate metadata
        mapped.mdh = inp.mdh
        mapped.mdh['Multiview.shift_map.location'] = loc

        namespace[self.output_name] = mapped
예제 #10
0
    def _loadPSF(self, filename):
        """Load PYME .psf data.
        
        .psf files consist of a tuple containing the data and the voxelsize.
        """
        from PYME.IO import unifiedIO
        with unifiedIO.local_or_temp_filename(filename) as fn:
            self.data, vox = numpy.load(fn)
        self.mdh = MetaDataHandler.NestedClassMDHandler(MetaData.ConfocDefault)

        self.mdh.setEntry('voxelsize.x', vox.x)
        self.mdh.setEntry('voxelsize.y', vox.y)
        self.mdh.setEntry('voxelsize.z', vox.z)

        #from PYME.ParallelTasks.relativeFiles import getRelFilename
        self.seriesName = getRelFilename(filename)

        self.mode = 'psf'
예제 #11
0
    def OpenFile(self, filename='', ds=None, clobber_recipe=True, **kwargs):
        """Open a file - accepts optional keyword arguments for use with files
        saved as .txt and .mat. These are:
            
            FieldNames: a list of names for the fields in the text file or
                        matlab variable.
            VarName:    the name of the variable in the .mat file which 
                        contains the data.
            SkipRows:   Number of header rows to skip for txt file data
            
            PixelSize:  Pixel size if not in nm
            
        """

        #close any files we had open previously
        while len(self.filesToClose) > 0:
            self.filesToClose.pop().close()

        # clear our state
        # nb - equivalent to clearing recipe namespace
        self.dataSources.clear()

        if clobber_recipe:
            # clear any processing modules from the pipeline
            # call with clobber_recipe = False in a 'Open a new file with the processing pipeline I've set up' use case
            # TODO: Add an "File-->Open [preserving recipe]" menu option or similar
            self.recipe.modules = []

        if 'zm' in dir(self):
            del self.zm
        self.filter = None
        self.mapping = None
        self.colourFilter = None
        self.events = None
        self.mdh = MetaDataHandler.NestedClassMDHandler()

        self.filename = filename

        if ds is None:
            from PYME.IO import unifiedIO  # TODO - what is the launch time penalty here for importing clusterUI and finding a nameserver?

            # load from file(/cluster, downloading a copy of the file if needed)
            with unifiedIO.local_or_temp_filename(filename) as fn:
                # TODO - check that loading isn't lazy (i.e. we need to make a copy of data in memory whilst in the
                # context manager in order to be safe with unifiedIO and cluster data). From a quick look, it would seem
                # that _ds_from_file() copies the data, but potentially keeps the file open which could be problematic.
                # This won't effect local file loading even if loading is lazy (i.e. shouldn't cause a regression)
                ds = self._ds_from_file(fn, **kwargs)
                self.events = getattr(ds, 'events', None)
                self.mdh.copyEntriesFrom(ds.mdh)

        # skip the MappingFilter wrapping, etc. in self.addDataSource and add this datasource as-is
        self.dataSources['FitResults'] = ds

        # Fit module specific filter settings
        # TODO - put all the defaults here and use a local variable rather than in __init__ (self.filterKeys is largely an artifact of pre-recipe based pipeline)
        if 'Analysis.FitModule' in self.mdh.getEntryNames():
            fitModule = self.mdh['Analysis.FitModule']
            if 'Interp' in fitModule:
                self.filterKeys['A'] = (5, 100000)
            if fitModule == 'SplitterShiftEstFR':
                self.filterKeys['fitError_dx'] = (0, 10)
                self.filterKeys['fitError_dy'] = (0, 10)

        if clobber_recipe:
            from PYME.recipes.localisations import ProcessColour, Pipelineify
            from PYME.recipes.tablefilters import FilterTable

            add_pipeline_variables = Pipelineify(
                self.recipe,
                inputFitResults='FitResults',
                pixelSizeNM=kwargs.get('PixelSize', 1.),
                outputLocalizations='Localizations')
            self.recipe.add_module(add_pipeline_variables)

            #self._get_dye_ratios_from_metadata()

            colour_mapper = ProcessColour(self.recipe,
                                          input='Localizations',
                                          output='colour_mapped')
            self.recipe.add_module(colour_mapper)
            self.recipe.add_module(
                FilterTable(self.recipe,
                            inputName='colour_mapped',
                            outputName='filtered_localizations',
                            filters={
                                k: list(v)
                                for k, v in self.filterKeys.items()
                                if k in ds.keys()
                            }))
        else:
            logger.warn(
                'Opening file without clobbering recipe, filter and ratiometric colour settings might not be handled properly'
            )
            # FIXME - should we update filter keys and/or make the filter more robust
            # FIXME - do we need to do anything about colour settings?

        self.recipe.execute()
        self.filterKeys = {}
        if 'filtered_localizations' in self.dataSources.keys():
            self.selectDataSource(
                'filtered_localizations')  #NB - this rebuilds the pipeline
        else:
            # TODO - replace / remove this fallback with something better. This is currently required
            # when we use/abuse the pipeline in dh5view, but that should ideally be replaced with
            # something cleaner. This (and case above) should probably also be conditional on `clobber_recipe`
            # as if opening with an existing recipe we would likely want to keep selectedDataSource constant as well.
            self.selectDataSource('FitResults')

        # FIXME - we do this already in pipelinify, maybe we can avoid doubling up?
        self.ev_mappings, self.eventCharts = _processEvents(
            ds, self.events, self.mdh)  # extract information from any events
        # Retrieve or estimate image bounds
        if False:  # 'imgBounds' in kwargs.keys():
            # TODO - why is this disabled? Current usage would appear to be when opening from LMAnalysis
            # during real-time localization, to force image bounds to match raw data, but also potentially useful
            # for other scenarios where metadata is not fully present.
            self.imageBounds = kwargs['imgBounds']
        elif ('scanx' not in self.selectedDataSource.keys()
              or 'scany' not in self.selectedDataSource.keys()
              ) and 'Camera.ROIWidth' in self.mdh.getEntryNames():
            self.imageBounds = ImageBounds.extractFromMetadata(self.mdh)
        else:
            self.imageBounds = ImageBounds.estimateFromSource(
                self.selectedDataSource)
예제 #12
0
    def loadInput(self, filename, key='input'):
        """Load input data from a file and inject into namespace

        Currently only handles images (anything you can open in dh5view). TODO -
        extend to other types.
        """
        #modify this to allow for different file types - currently only supports images
        from PYME.IO import unifiedIO
        import os
        extension = os.path.splitext(filename)[1]
        if extension in ['.h5r', '.h5', '.hdf']:
            import tables
            from PYME.IO import MetaDataHandler
            from PYME.IO import tabular

            with unifiedIO.local_or_temp_filename(filename) as fn:
                with tables.open_file(fn, mode='r') as h5f:
                    #make sure our hdf file gets closed

                    key_prefix = '' if key == 'input' else key + '_'

                    try:
                        mdh = MetaDataHandler.NestedClassMDHandler(
                            MetaDataHandler.HDFMDHandler(h5f))
                    except tables.FileModeError:  # Occurs if no metadata is found, since we opened the table in read-mode
                        logger.warning(
                            'No metadata found, proceeding with empty metadata'
                        )
                        mdh = MetaDataHandler.NestedClassMDHandler()

                    for t in h5f.list_nodes('/'):
                        # FIXME - The following isinstance tests are not very safe (and badly broken in some cases e.g.
                        # PZF formatted image data, Image data which is not in an EArray, etc ...)
                        # Note that EArray is only used for streaming data!
                        # They should ideally be replaced with more comprehensive tests (potentially based on array or dataset
                        # dimensionality and/or data type) - i.e. duck typing. Our strategy for images in HDF should probably
                        # also be improved / clarified - can we use hdf attributes to hint at the data intent? How do we support
                        # > 3D data?

                        if isinstance(t, tables.VLArray):
                            from PYME.IO.ragged import RaggedVLArray

                            rag = RaggedVLArray(
                                h5f, t.name, copy=True
                            )  #force an in-memory copy so we can close the hdf file properly
                            rag.mdh = mdh

                            self.namespace[key_prefix + t.name] = rag

                        elif isinstance(t, tables.table.Table):
                            #  pipe our table into h5r or hdf source depending on the extension
                            tab = tabular.H5RSource(
                                h5f, t.name
                            ) if extension == '.h5r' else tabular.HDFSource(
                                h5f, t.name)
                            tab.mdh = mdh

                            self.namespace[key_prefix + t.name] = tab

                        elif isinstance(t, tables.EArray):
                            # load using ImageStack._loadh5, which finds metdata
                            im = ImageStack(filename=filename, haveGUI=False)
                            # assume image is the main table in the file and give it the named key
                            self.namespace[key] = im

        elif extension == '.csv':
            logger.error('loading .csv not supported yet')
            raise NotImplementedError
        elif extension in ['.xls', '.xlsx']:
            logger.error('loading .xls not supported yet')
            raise NotImplementedError
        else:
            self.namespace[key] = ImageStack(filename=filename, haveGUI=False)
예제 #13
0
 def _load_model(self):
     from keras.models import load_model
     if not getattr(self, '_model_name', None) == self.model:
         self._model_name = self.model
         with unifiedIO.local_or_temp_filename(self._model_name) as fn:
             self._model = load_model(fn)