def getSlice(self, ind): frameName = '%s/frame%05d.pzf' % (self.sequenceName, ind) sl = PZFFormat.loads(clusterIO.get_file(frameName, self.clusterfilter))[0] #print sl.shape, sl.dtype return sl.squeeze()
def sendData(self, wfile, bpoint=0, epoint=0): """Send the file to the client. Literally.""" data = clusterIO.get_file(self.fsname) f_size = len(data) #st = clusterIO.stat(self.fsname) #with open(self.fsname, 'rb') as f: f = BytesIO(data) #TODO - does this make sense? writ = 0 # for send Range xxx-xxx if bpoint > 0 and bpoint < f_size: f.seek(bpoint) if epoint > bpoint: if epoint <= f_size: rsize = epoint - bpoint + 1 else: rsize = f_size - bpoint else: rsize = f_size while writ < rsize: if (rsize - writ) < 65536: buf = f.read(rsize) else: buf = f.read(65536) if not buf: break writ += len(buf) wfile.write(buf)
def loadInput(self, filename, key='input'): """ Load input data from a file and inject into namespace """ from PYME.IO import unifiedIO import os extension = os.path.splitext(filename)[1] if extension in ['.h5r', '.hdf']: import tables from PYME.IO import h5rFile try: with unifiedIO.local_or_temp_filename( filename) as fn, h5rFile.openH5R( fn, mode='r')._h5file as h5f: self._inject_tables_from_hdf5(key, h5f, fn, extension) except tables.exceptions.HDF5ExtError: # access issue likely due to multiple processes if unifiedIO.is_cluster_uri(filename): # try again, this time forcing access through the dataserver # NOTE: it is unclear why this should work when local_or_temp_filename() doesn't # as this still opens / copies the file independently, albeit in the same process as is doing the writing. # The fact that this works is relying on one of a quirk of the GIL, a quirk in HDF5 locking, or the fact # that copying the file to a stream is much faster than opening it with pytables. The copy vs pytables open # scenario would match what has been observed with old style spooling analysis where copying a file # prior to opening in VisGUI would work more reliably than opening directly. This retains, however, # an inherent race condition so we risk replacing a predictable failure with a less frequent one. # TODO - consider whether h5r_part might be a better choice. # FIXME: (DB) I'm not comfortable with having this kind of special case retry logic here, and would # much prefer if we could find an alternative workaround, refactor into something like h5rFile.open_robust(), # or just let this fail). Leaving it for the meantime to get chained recipes working, but we should revisit. from PYME.IO import clusterIO relative_filename, server_filter = unifiedIO.split_cluster_url( filename) file_as_bytes = clusterIO.get_file( relative_filename, serverfilter=server_filter, local_short_circuit=False) with tables.open_file('in-memory.h5', driver='H5FD_CORE', driver_core_image=file_as_bytes, driver_core_backing_store=0) as h5f: self._inject_tables_from_hdf5(key, h5f, filename, extension) else: #not a cluster file, doesn't make sense to retry with cluster. Propagate exception to user. raise elif extension == '.csv': logger.error('loading .csv not supported yet') raise NotImplementedError elif extension in ['.xls', '.xlsx']: logger.error('loading .xls not supported yet') raise NotImplementedError else: self.namespace[key] = ImageStack(filename=filename, haveGUI=False)
def getEvents(self): from PYME.IO import events import json try: ev = json.loads( clusterIO.get_file(self.eventFileName, self.clusterfilter, timeout=10)) return events.EventLogger.list_to_array(ev) except (IOError, ValueError): #our series might not have any events return []
def test_single_put(): testdata = 'foo bar\n' t = time.time() clusterIO.put_file('_testing/test.txt', testdata, 'TEST') print('putting a small file took %3.5f s' % (time.time() - t)) t = time.time() clusterIO.put_file('_testing/test1.txt', testdata, 'TEST') print('putting a second small file took %3.5f s' % (time.time() - t)) t = time.time() retrieved = clusterIO.get_file('_testing/test.txt', 'TEST') print('retrieving a small file took %3.5f s' % (time.time() - t))
def local_or_named_temp_filename(url): """ riff of PYME.IO.clusterIO.local_or_temp_filename, but one which returns a filename with a matching file stub to the original rather than a random temporary filename Parameters ---------- url : str local path or pyme-cluster url Yields ------- str path to a (temporary) file so `url` can be loaded using modules which expect a local filename """ from PYME.IO.FileUtils import nameUtils from PYME.IO import unifiedIO import tempfile filename = nameUtils.getFullExistingFilename(url) if os.path.exists(filename): yield filename elif unifiedIO.is_cluster_uri(url): from PYME.IO import clusterIO name, clusterfilter = unifiedIO.split_cluster_url(filename) localpath = clusterIO.get_local_path(name, clusterfilter) if localpath: yield localpath else: ext = os.path.splitext(name)[-1] with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, os.path.split(name)[-1]), 'wb') as f: s = clusterIO.get_file(name, clusterfilter) f.write(s) f.flush() yield f.name else: raise IOError('Path "%s" could not be found' % url)
def getEvents(self): import pandas as pd #defer pandas import for as long as possible try: #return json.loads(clusterIO.getFile(eventFileName, self.clusterfilter)) ev = pd.read_json(clusterIO.get_file(self.eventFileName, self.clusterfilter)) if len(ev) == 0: return [] ev.columns = ['EventName', 'EventDescr', 'Time'] evts = np.empty(len(ev), dtype=[('EventName', 'S32'), ('Time', 'f8'), ('EventDescr', 'S256')]) evts['EventName'] = ev['EventName'] evts['EventDescr'] = ev['EventDescr'] evts['Time'] = ev['Time'] return evts except (IOError, ValueError): #our series might not have any events return []
def __init__(self, url, queue=None): self.seriesName = url #print url self.clusterfilter = url.split('://')[1].split('/')[0] #print self.clusterfilter self.sequenceName = url.split('://%s/' % self.clusterfilter)[1] #print self.sequenceName self.lastShapeTime = 0 mdfn = '/'.join([self.sequenceName, 'metadata.json']) #print mdfn self.mdh = MetaDataHandler.NestedClassMDHandler() self.mdh.update(json.loads(clusterIO.get_file(mdfn, self.clusterfilter))) self.fshape = None#(self.mdh['Camera.ROIWidth'],self.mdh['Camera.ROIHeight']) self._getNumFrames()
def file(request, filename): type = request.GET.get('type', 'raw') #print 'file' if type == 'raw': return HttpResponse(clusterIO.get_file(filename, use_file_cache=False), content_type='') elif type in ['tiff', 'h5']: from PYME.IO import image import tempfile img = image.ImageStack( filename='pyme-cluster://%s/%s' % (clusterIO.local_serverfilter, filename.rstrip('/')), haveGUI=False) if type == 'tiff': ext = '.tif' else: ext = '.' + type fn = os.path.splitext(os.path.split(filename.rstrip('/'))[-1])[0] + ext #note we are being a bit tricky here to ensure our temporary file gets deleted when we are done # 1) We create the temporary file using the tempfile module. This gets automagically deleted when we close the # file (at the end of the with block) # 2) We pass the filename of the temporary file to img.Save. This will mean that a second file object / file handle # gets created, the contents get written, and the file gets closed #with tempfile.NamedTemporaryFile(mode='w+b', suffix=ext) as outf: # don't use a context manager as this closes our file prematurely - rely on a cascading close through HTTPResponse # and FileWrapper instead outf = tempfile.NamedTemporaryFile(mode='w+b', suffix=ext) img.Save(outf.name) #seek to update temporary file (so that it knows the new length) outf.seek(0) wrapper = FileWrapper(outf) response = StreamingHttpResponse(wrapper, content_type='image/%s' % ext.lstrip('.')) response['Content-Disposition'] = 'attachment; filename=%s' % fn response['Content-Length'] = os.path.getsize(outf.name) return response
def __init__(self, url, queue=None): self.seriesName = url #print url self.clusterfilter = url.split('://')[1].split('/')[0] #print self.clusterfilter self.sequenceName = url.split('://%s/' % self.clusterfilter)[1] #print self.sequenceName self.lastShapeTime = 0 mdfn = '/'.join([self.sequenceName, 'metadata.json']) #print mdfn self.mdh = MetaDataHandler.NestedClassMDHandler() self.mdh.update( json.loads(clusterIO.get_file(mdfn, self.clusterfilter))) self.fshape = None #(self.mdh['Camera.ROIWidth'],self.mdh['Camera.ROIHeight']) self._getNumFrames() # if the series is complete when we start, we don't need to update the number of slices self._complete = clusterIO.exists(self.eventFileName, self.clusterfilter)
def test_put(): testdata = b'foo bar\n' clusterIO.put_file('_testing/test.txt', testdata, 'TES1') retrieved = clusterIO.get_file('_testing/test.txt', 'TES1') assert testdata == retrieved
def _load(self, filename): from PYME.IO import clusterIO, PZFFormat s = clusterIO.get_file(filename) return PZFFormat.loads(s)[0].squeeze()