def CollapseStream(axis=0, name="collapse-image"): ''' This stream collapses 2D images to 1D images by averaging along an axis. **Stream Inputs** image : 2d np.ndarray the 2D image mask : 2d np.ndarray optional mask ''' def collapse(image, mask=None, axis=0): if mask is None: # normalization is number of pixels in dimension # if no mask # TODO : Fix # norm = img.shape[ norm = 1 else: norm = np.sum(mask, axis=axis) cll = np.sum(image, axis=axis) res = cll / norm return dict(line=res, axis=axis) sin = sc.Stream(stream_name=name) sout = scs.map(collapse, axis=axis) return sin, sout
def PCAStream(Nimgs=100, n_components=16): ''' This runs principle component analysis on the last Nimgs **Stream Inputs** image : 2d np.ndarray the nth image **Stream Outputs** components : 2d np.ndarray the components Returns ------- sin : Stream instance the input stream sout : Stream instance the output stream ''' sin = sc.Stream(stream_name="PCA Stream") sout = sin.sliding_window(Nimgs) sout = scs.select(sin, ('image', 'data')) sout = scs.squash(sin) sout = scs.map(sout, PCA_fit, n_components=n_components) return sin, sout
def QPHIMapStream(bins=(800, 360)): ''' Transform the scattering image into a q, phi map. Parameters ---------- bins : 2 tuple, optional the number of bins to divide into **Stream Inputs** img : 2d np.ndarray the image mask : 2d np.ndarray, optional the mask origin : 2 tuple the beam center in the image qmap : 2d np.ndarray the qmap of the image **Stream Outputs** sqphi : 2d np.ndarray the sqphi map qs : 1d np.ndarray the q values phis : 1d np.ndarray the phi values Returns ------- sin : Stream instance the input stream (see Stream Inputs) sout : Stream instance the output stream (see Stream Outputs) Examples -------- >>> bins = (3, 4) >>> sin, sout = QPHIMapStream(bins=bins) >>> L = sout.sink_to_list() >>> mask = None >>> img = np.random.random((10, 10)) >>> origin = (3, 3) >>> sdoc = StreamDoc(kwargs=dict(image=img, ... origin=origin, ... mask=mask)) >>> sin.emit(sdoc) ''' sin = sc.Stream(stream_name="QPHI map Stream") sout = scs.map(qphiavg, sin, bins=bins) sout = scs.add_attributes(sout, stream_name="qphiavg") return sin, sout
def ThumbStream(blur=None, crop=None, resize=None): ''' Thumbnail stream **Stream Inputs** image : 2d np.ndarray the image **Stream Outputs** sin : Stream instance the stream input sout : Stream instance the stream output Parameters ---------- blur : float, optional the sigma of the Gaussian kernel to convolve image with for smoothing default is None, no smoothing crop : 4 tuple of int, optional the boundaries to crop by default is None, no cropping resize : int, optional the factor to resize by for example resize=2 performs 2x2 binning of the image Stream Inputs ------------- image : 2d np.ndarray the image Returns ------- sin : the stream input sout : the stream output ''' # TODO add flags to actually process into thumbs sin = sc.Stream(stream_name="Thumbnail Stream") sout = scs.add_attributes(sin, stream_name="thumb") # s1 = sin.add_attributes(stream_name="ThumbStream") sout = scs.map(_blur, sout, sigma=blur, remote=True) sout = scs.map(_crop, sout, crop=crop, remote=True) sout = scs.map(_resize, sout, resize=resize, remote=True) # change the key from image to thumb sout = scs.select(sout, ('image', 'thumb')) return sin, sout
def _read_kafka_accumulated(self, max_mesgs, strategy): data = {} mesg_count = 0 stream = streamz.Stream() acc = stream.accumulate(strategy, start=data) for message in self._consumer: mesg = self._parse_kafka_message(message) mesg_count += 1 stream.emit(mesg) if self._is_requested_messages_read(message, max_mesgs, mesg_count): break return data
def __init__(self, model, fields, parameters, dt, t=0, tmax=None, id=None, hook=null_hook, scheme=schemes.RODASPR, time_stepping=True, **kwargs): def intersection_kwargs(kwargs, function): """Inspect the function signature to identify the relevant keys in a dictionary of named parameters. """ func_signature = inspect.signature(function) func_parameters = func_signature.parameters kwargs = {key: value for key, value in kwargs.items() if key in func_parameters} return kwargs kwargs["time_stepping"] = time_stepping self.id = str(uuid1())[:6] if not id else id self.model = model self.parameters = parameters self.fields = model.fields_template(**fields) self.t = t self.user_dt = self.dt = dt self.tmax = tmax self.i = 0 self._stream = streamz.Stream() self._pprocesses = [] self._scheme = scheme(model, **intersection_kwargs(kwargs, scheme.__init__)) if (time_stepping and self._scheme not in [schemes.RODASPR, schemes.ROS3PRL, schemes.ROS3PRw]): self._scheme = schemes.time_stepping( self._scheme, **intersection_kwargs(kwargs, schemes.time_stepping)) self.status = 'created' self._total_running = 0 self._last_running = 0 self._created_timestamp = pendulum.now() self._started_timestamp = None self._last_timestamp = None self._actual_timestamp = pendulum.now() self._hook = hook self._container = None self._iterator = self.compute()
def accumulate_to_df(logfile, accumulate_func): """ Run an accumulator against a logfile, and return output in a dataframe """ stream = streamz.Stream() with open(logfile) as infile, io.StringIO() as outfile: stream.map(json.loads).accumulate( accumulate_func, returns_state=True, start={}).sink(lambda e: outfile.write(json.dumps(e) + '\n')) for l in infile: stream.emit(l) outfile.seek(0) dataframe = pd.read_json(outfile, lines=True) dataframe.set_index('timestamp', inplace=True) return dataframe
def PeakFindingStream(name='peakfind'): ''' Find peaks in 1d line data. **Stream Inputs** sqx : 1d np.ndarray the x domain of the curve sqy : 1d np.ndarray the y domain of the curve **Stream Outputs** model: lmfit.Model instance The model for the fit y_origin: inds_peak: list the peak indices xdata: ratio: ydata: wdata: bkgd: variance: variance_mean: peaksx: peaksy: Parameters ---------- name : str, optional name of stream ''' sin = sc.Stream(stream_name="Peak Finder") # pkfind stream sout = scs.map(call_peak, scs.select(sin, 'sqy', 'sqx')) sout = scs.add_attributes(sout, stream_name=name) return sin, sout
def ImageTaggingStream(): ''' Creates an image tagging stream. This stream will take in an image and output a tage as to what the image is. **Stream Inputs** image : 2d np.ndarray the image to be tagged **Stream Outputs** tag_name : str the name of the tag for the image ''' sin = sc.Stream(stream_name="Image Tagger") sout = scs.map(infer, scs.select(sin, 'image')) sout = scs.add_attributes(sout, stream_name="image-tag") return sin, sout
def AngularCorrelatorStream(bins=(800, 360)): ''' Stream to run angular correlations. **Stream Inputs** image : 2d np.ndarray the image to run the angular correltions on mask : 2d np.ndarray the mask origin : 2 tuple the beam center of the image bins : tuple the number of bins in q and phi method : string, optional the method to use for the angular correlations defaults to 'bgest' q_map : the q_map to be used **Stream Outputs** sin : Stream instance the stream input sout : Stream instance the stream output ''' # TODO : Allow optional kwargs in streams sin = sc.Stream(stream_name="Angular Correlation") sout = scs.select(sin, ('image', 'image'), ('mask', 'mask'), ('origin', 'origin'), ('q_map', 'r_map')) sout = scs.map(angular_corr, sout, bins=bins) sout = scs.add_attributes(sout, stream_name="angular-corr") return sin, sout
def ImageStitchingStream(return_intermediate=False): ''' Image stitching **Stream Inputs** image : 2d np.ndarray the image for the stitching mask : 2d np.ndarray the mask origin : 2 tuple the beam center stitchback : bool whether or not to stitchback to previous image **Stream Outputs** image : 2d np.ndarray the stitched image mask : 2d np.ndarray the mask from the stitch origin : 2 tuple the beam center stitchback : bool whether or not to stitchback to previous image Returns ------- sin : Stream instance the input stream sout : Stream instance the output stream Parameters ---------- return_intermediate : bool, optional decide whether to return intermediate results or not defaults to False Notes ----- Any normalization of images (for ex: by exposure time) should be done before inputting to this stream. Examples -------- >>> sin, sout = ImageStitchingStream() >>> L = sout.sink_to_list() >>> mask = np.ones((10, 10), dtype=np.int64) >>> img1 = np.ones_like(mask, dtype=float) >>> # 3 rows are higher >>> img1[2:4] = 2 >>> # some arb value >>> origin1 = [2, 3] >>> # roll along zero axis >>> img2 = np.roll(img1, 2, axis=0) >>> # rolled by two >>> origin2 = [2+2, 3] >>> # first image, stitchback can be anything >>> sdoc1 = StreamDoc(kwargs=dict(mask=mask, image=img1, ... origin=origin1, ... stitchback=False)) >>> sin.emit(sdoc1) >>> # emit a second image and it will be stitched >>> sdoc2 = StreamDoc(kwargs=dict(mask=mask, image=img2, ... origin=origin2, ... stitchback=True)) >>> sin.emit(sdoc2) >>> # A new image with False stitchback will have output >>> # stream output a result >>> img3 = np.random.random((10,10)) >>> origin3 = (0,0) >>> sdoc3 = StreamDoc(kwargs=dict(mask=mask, image=img3, ... origin=origin3, ... stitchback=False)) >>> sin.emit(sdoc3) >>> len(L) == 1 True >>> # the stitched image is here: >>> img = L[0]['kwargs']['image'] ''' # TODO : add state. When False returned, need a reason why def validate(x): if not hasattr(x, 'kwargs'): raise ValueError("No kwargs") kwargs = x['kwargs'] expected = ['mask', 'origin', 'stitchback', 'image'] for key in expected: if key not in kwargs: message = "{} not in kwargs".format(key) raise ValueError(message) if not isinstance(kwargs['mask'], np.ndarray): message = "mask is not array" raise ValueError(message) if not isinstance(kwargs['image'], np.ndarray): message = "image is not array" raise ValueError(message) if len(kwargs['origin']) != 2: message = "origin not length 2" raise ValueError(message) return x # TODO : remove the add_attributes part and just keep stream_name sin = sc.Stream(stream_name="Image Stitching Stream") # sout = sc.map(sin, validate) # sin.map(lambda x : print("Beginning of stream data\n\n\n")) # TODO : remove compute requirement # TODO : incomplete sout = scs.add_attributes(sin, stream_name="stitch") sout = scs.select(sout, ('image', None), ('mask', None), ('origin', None), ('stitchback', None)) # put all args into a tuple def pack(*args): return args sout = scs.map(pack, sout) # sout = scs.map(s3.map(psdm(pack)) sout = scs.accumulate(_xystitch_accumulate, sout) sout = scs.map(scs.star(_xystitch_result), sout) def stitchbackcomplete(xtuple): ''' only plot images whose stitch is complete, and only involved more than one image NOTE : *Only* the bool "True" will activate a stitch. "1" does not count. This is handled by checking 'is True' and 'is not True' ''' # previous must have been true for stitch to have involved more than # one image prev = xtuple[0]['kwargs']['stitchback'] # next must be False (or just not True) to be complete next = xtuple[1]['kwargs']['stitchback'] return next is not True and prev is True # swin.map(lambda x : print("result : {}".format(x)), raw=True) # only get results where stitch is stopped # NOTE : need to compute before filtering here # now emit some dummy value to swin, before connecting more to stream # swin.emit(dict(attributes=dict(stitchback=0))) # TODO : figure out how to filter if not return_intermediate: # keep previous two results sout = sout.sliding_window(2) sout = sout.filter(stitchbackcomplete) sout = sout.map(lambda x: x[0]) return sin, sout
def LineCutStream(axis=0, name=None): ''' Obtain line cuts from a 2D image. Just simple slicing. It's a stream mainly to make this more standard. Parameters ---------- axis : int, optional the axis to obtain linecuts from. Default is 0 (so we index rows A[i]) If 1, the index cols (A[:,i]) name : str, optional the name of the stream **Stream Inputs** image : 2d np.ndarray the image to obtain line cuts from y : 1d np.ndarray The y (row) values per pixel x : The x (column) values per pixel vals : list the values to obtain the linecuts from **Stream Outputs** linecuts : list a list of line cuts linecuts_domain : 1d np.ndarray the domain of the line cuts linecuts_vals : 1d np.ndarray the corresponding value for each line cut Returns ------- sin : Stream instance the input stream (see Stream Inputs) sout : Stream instance the output stream (see Stream Outputs) ''' def linecuts(image, y, x, vals, axis=0): ''' Can potentially return an empty list of linecuts.''' linecuts = list() linecuts_vals = list() if axis == 1: # swap x y and transpose tmp = y y = x x = tmp linecuts_domain = x for val in vals: ind = np.argmin(np.abs(y - val)) linecuts.append(image[ind]) linecuts_vals.append(y[ind]) return dict(linecuts=linecuts, linecuts_domain=linecuts_domain, linecuts_vals=linecuts_vals) # the string for the axis axisstr = ['y', 'x'][axis] if name is None: stream_name = 'linecuts-axis{}'.format(axisstr) else: stream_name = name + "-axis{}".format(axisstr) sin = sc.Stream(stream_name=stream_name) sout = scs.map(linecuts, sin, axis=axis) sout = scs.add_attributes(sout, stream_name=stream_name) return sin, sout
def CircularAverageStream(): ''' Circular average stream. **Stream Inputs** image : 2d np.ndarray the image to run circular average on calibration : 2D np.ndarray the calibration object, with members: q_map : 2d np.ndarray the magnite of the wave vectors r_map : 2d np.ndarray the pixel positions from center mask : 2d np.ndarray, optional the mask bins : int or tuple, optional if an int, the number of bins to divide into if a list, the bins to use **Stream Outputs** sqx : 1D np.ndarray the q values sqxerr : 1D np.ndarray the error q values sqy : 1D np.ndarray the intensities sqyerr : 1D np.ndarray the error in intensities (approximate) Notes ----- Assumes square pixels Assumes variance comes from shot noise only (by taking average along ring/Npixels) If bins is None, it does its best to estimate pixel sizes and make the bins a pixel in size. Note, for Ewald curvature this is not straightforward. You need both a r_map in pixels from the center and the q_map for the actual q values. Returns ------- sin : Stream instance the source stream (see Stream Inputs) sout : Stream instance the output stream (see Stream Outputs) Examples -------- >>> from streamz import Stream >>> from SciStreams import StreamDoc >>> import numpy as np >>> s = Stream() >>> from SciStreams.streams.XS_Streams import CircularAverageStream >>> sin, sout = CircularAverageStream() >>> s.connect(sin) >>> mask = None >>> bins = 3 >>> img = np.random.random((10, 10)) >>> x = np.linspace(-5, 5, 10) >>> X, Y = np.meshgrid(x, x) >>> r_map = np.sqrt(X**2 + Y**2) >>> q_map = r_map*.12 >>> class Calib: ... def __init__(self, qmap, rmap): ... self.q_map = qmap ... self.r_map = rmap >>> calibration = Calib(q_map, r_map) >>> sdoc = StreamDoc(kwargs=dict(image=img, ... calibration=calibration, ... mask=mask, ... bins=bins)) >>> # emit data as usual >>> sin.emit(sdoc) ''' # TODO : extend file to mltiple writers? def validate(x): kwargs = x['kwargs'] if 'image' not in kwargs or 'calibration' not in kwargs: message = "expected two kwargs: " message += "(image, calibration), " message += "got {} instead".format(list(kwargs.keys())) raise ValueError(message) # kwargs are optional so don't validate them return x sin = sc.Stream(stream_name="Circular Average") sout = scs.add_attributes(sin, stream_name="circavg") # No validation for now # validation should not be necessary, should just throw an error # sout = sout.map(validate) sout = scs.map(circavg_from_calibration, sout) return sin, sout
def CalibrationStream(): ''' This stream takes data with kwargs and creates calibration object. **Stream Inputs** md : dict No requirements data : dict requires keys who contain calibration information (this is usually obtained by moving metadata to data in first step) **Stream Outputs** md : dict keeps regular md data : dict calibration : object a calibration object Notes ----- This will distribute the computation of the qmaps and cache them by saving references to the futures (which distributed will bookkeep) Use the ``AttributeNormalizingStream`` first so that the data is as the CalibrationStream expects. Examples -------- >>> from streamz import Stream >>> from SciStreams import StreamDoc >>> import numpy as np >>> s = Stream() >>> from SciStreams.streams.XS_Streams import CalibrationStream >>> sin, sout = CalibrationStream() >>> s.connect(sin) >>> # some example data >>> data = dict(wavelength=dict(value=1), pixel_size_x=dict(value=1), ... pixel_size_y=dict(value=1), ... sample_det_distance=dict(value=1), ... beamx0=dict(value=0), beamy0=dict(value=0), ... shape=(100,100)) >>> sdoc = StreamDoc(kwargs=data) >>> s.emit(sdoc) Returns ------- sin : Stream instance the source stream (see Stream Inputs) sout : Stream instance the output stream (see Stream Outputs) ''' global global_calib sin = sc.Stream(stream_name="Calibration") # force computation to come back here sout = scs.map(make_calibration, sin, remote=True) # this piece should be computed using Dask def _generate_qxyz_maps(calibration): calibration.generate_maps() return dict(calibration=calibration) # from streamz.dask import scatter, gather # from SciStreams.globals import client # TODO : change to use scatter/gather # (need to setup event loop for this etc) # sout = scs.map(lambda calibration: # client.submit(_generate_qxyz_maps, calibration), # sout) # save the futures to a list (scheduler will ensure caching of result if # any reference to a future is kept) # sc.map(sout, lambda calibration: # streams_globals.futures_cache.append(calibration)) # sout = scs.map(lambda calibration: # client.gather(calibration), sout) sout = scs.map(_generate_qxyz_maps, sout) # sout.map(lambda x : # x['kwargs'].result()['calibration'].q_map).sink(print) # sink the futures to a global list (deque) sout.map(lambda x: x['kwargs']).sink(futures_cache.append) sout.map(lambda x: x['args']).sink(futures_cache.append) return sin, sout
def AttributeNormalizingStream(external_keymap=None): ''' Get and re-map the attributes of the stream. This step is typically performed before sending data to the CalibrationStream input stream. **Stream Inputs** md : dict The metadata for the stream. data : dict No requirements for the data **Stream Outputs** md : dict The same metadata is passed through data : dict The normalized metadata is put in the data which includes: beamx0: dict {'value' : val, 'unit' : unit} the x0 position of the beam beamy0: dict {'value' : val, 'unit' : unit} the y0 position of the beam wavelength: dict {'value' : val, 'unit' : unit} the wavelength of the beam pixel_size_x: dict {'value' : val, 'unit' : unit} the x size of a pixel pixel_size_y: dict {'value' : val, 'unit' : unit} the y size of a pixel detector_key: str the detector image key detector_name : str the detector name Examples -------- >>> # A typical workflow is as follows: >>> # instantiate the main stream input >>> from streamz import Stream >>> from SciStreams import StreamDoc >>> import numpy as np >>> s = Stream() >>> # create the filtering stream >>> from SciStreams.streams.XS_Streams import \ ... AttributeNormalizingStream >>> sin, sout = AttributeNormalizingStream() >>> s.connect(sin) >>> # create dummy detector image, from pilatus300 >>> attr = dict( ... calibration_wavelength_A=1.0, ... detector_SAXS_x0_pix=5.0, ... detector_SAXS_y0_pix=5.0, ... detector_SAXS_distance_m=5.0, ... detector_key='pilatus300_image', ... ) >>> sdoc = StreamDoc(attributes=attr) >>> # save result in a list L that you can review later >>> L = sout.sink_to_list() >>> # emit the data >>> s.emit(sdoc) Parameters ---------- external_keymap: dict, optional The keymap to perform the re-mapping from. If it is not specified, internal keymaps are used. ''' sin = sc.Stream() # set remote=False. These are quick calculations we don't care to cache # they are also always unique to each data so caching doesn't make sense sout = scs.get_attributes(sin) sout = scs.map(normalize_calib_dict, sout, external_keymap=external_keymap, remote=True) sout = scs.map(add_detector_info, sout, remote=True) return sin, sout
def PrimaryFilteringStream(): ''' Filter the stream for just primary results. **Stream Inputs** md : dict No requirements data : dict must have a 2D np.ndarray with one of accepted detector keys **Stream Outputs** Two streams are outputted, sout and serr sout : the stream with valid data Outputs from zero to any number streams (depends on how many detectors were found) md : detector_key : the detector key (string) data : data with only one image as detector key if there was more than one, it selects one of them Note this has unspecified behaviour. serr : the stream with bad data. This can be sinked to an error stream Examples -------- >>> # A typical workflow is as follows: >>> # instantiate the main stream input >>> from streamz import Stream >>> s = Stream() >>> # create the filtering stream >>> from SciStreams.streams.XS_Streams import PrimaryFilteringStream >>> sin, sout = PrimaryFilteringStream() >>> s.connect(sin) >>> import numpy as np >>> # create dummy detector image, from pilatus300 >>> img = np.random.random((619, 487)) >>> from SciStreams.core.StreamDoc import StreamDoc >>> sdoc = StreamDoc(kwargs=dict(pilatus300_image=img)) >>> # save result in a list L that you can review later >>> L = sout.sink_to_list() >>> # emit the data >>> s.emit(sdoc) Returns ------- sin : Stream instance the source stream (see Stream Inputs) sout : Stream instance the output stream (see Stream Outputs) ''' sin = sc.Stream(stream_name="Primary Filter") # a primary filter, data will not go through if does not match attributes sout = sin.filter(filter_attributes) # get the error streams attributes (to output to some log) serr = sin.filter(lambda x: not filter_attributes) serr = scs.get_attributes(serr) serr = scs.add_attributes(serr, error="primary_filter") sout = sc.map(sout, pick_allowed_detectors) # turn list into individual streams # (if empty list, emits nothing, this is sort of like filter) sout = sout.concat() # just some checks to see if it's good data, else ignore return sin, sout, serr