class UrlRetrieveApp(BarrierAppDROP): """ An App that retrieves the content of a URL Keywords: URL: string, URL to retrieve. """ component_meta = dlg_component( "UrlRetrieveApp", "URL Retrieve App", [dlg_batch_input("binary/*", [])], [dlg_batch_output("binary/*", [])], [dlg_streaming_input("binary/*")], ) url = dlg_string_param("url", "") def run(self): try: u = urllib.request.urlopen(self.url) except urllib.error.URLError as e: raise e.reason content = u.read() outs = self.outputs if len(outs) < 1: raise Exception( "At least one output should have been added to %r" % self) for o in outs: o.len = len(content) o.write(content) # send content to all outputs
class LP_filter_fft_fftw(LP_filter_fft_np): component_meta = dlg_component( 'LP_filter_fftw', 'Filters a signal with a provided window using FFTW', [dlg_batch_input('binary/*', [])], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) def initialize(self, **kwargs): super(LP_filter_fft_fftw, self).initialize(**kwargs) def filter(self): pyfftw.interfaces.cache.disable() signal = self.series[0] window = self.series[1] nfft = determine_size(len(signal) + len(window) - 1) sig_zero_pad = pyfftw.empty_aligned(len(signal), dtype=self.precision['float']) win_zero_pad = pyfftw.empty_aligned(len(window), dtype=self.precision['float']) sig_zero_pad[0:len(signal)] = signal win_zero_pad[0:len(window)] = window sig_fft = pyfftw.interfaces.numpy_fft.fft(sig_zero_pad, n=nfft) win_fft = pyfftw.interfaces.numpy_fft.fft(win_zero_pad, n=nfft) out_fft = np.multiply(sig_fft, win_fft) out = pyfftw.interfaces.numpy_fft.ifft(out_fft, n=nfft) return out.astype(self.precision['complex'])
class CopyApp(BarrierAppDROP): """ A BarrierAppDrop that copies its inputs into its outputs. All inputs are copied into all outputs in the order they were declared in the graph. """ component_meta = dlg_component( "CopyApp", "Copy App.", [dlg_batch_input("binary/*", [])], [dlg_batch_output("binary/*", [])], [dlg_streaming_input("binary/*")], ) _bufsize = dlg_int_param("bufsize", 65536) def run(self): self.copyAll() def copyAll(self): for inputDrop in self.inputs: self.copyRecursive(inputDrop) def copyRecursive(self, inputDrop): if isinstance(inputDrop, ContainerDROP): for child in inputDrop.children: self.copyRecursive(child) else: for outputDrop in self.outputs: droputils.copyDropContents(inputDrop, outputDrop, bufsize=self._bufsize)
class LP_filter_fft_cuda(LP_filter_fft_np): component_meta = dlg_component( 'LP_filter_fft_cuda', 'Filters a signal with a provided window using cuda', [dlg_batch_input('binary/*', [])], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) def initialize(self, **kwargs): super(LP_filter_fft_cuda, self).initialize(**kwargs) def filter(self): import pycuda.gpuarray as gpuarray import skcuda.fft as cu_fft import skcuda.linalg as linalg import pycuda.driver as cuda from pycuda.tools import make_default_context cuda.init() context = make_default_context() device = context.get_device() signal = self.series[0] window = self.series[1] linalg.init() nfft = determine_size(len(signal) + len(window) - 1) # Move data to GPU sig_zero_pad = np.zeros(nfft, dtype=self.precision['float']) win_zero_pad = np.zeros(nfft, dtype=self.precision['float']) sig_gpu = gpuarray.zeros(sig_zero_pad.shape, dtype=self.precision['float']) win_gpu = gpuarray.zeros(win_zero_pad.shape, dtype=self.precision['float']) sig_zero_pad[0:len(signal)] = signal win_zero_pad[0:len(window)] = window sig_gpu.set(sig_zero_pad) win_gpu.set(win_zero_pad) # Plan forwards sig_fft_gpu = gpuarray.zeros(nfft, dtype=self.precision['complex']) win_fft_gpu = gpuarray.zeros(nfft, dtype=self.precision['complex']) sig_plan_forward = cu_fft.Plan(sig_fft_gpu.shape, self.precision['float'], self.precision['complex']) win_plan_forward = cu_fft.Plan(win_fft_gpu.shape, self.precision['float'], self.precision['complex']) cu_fft.fft(sig_gpu, sig_fft_gpu, sig_plan_forward) cu_fft.fft(win_gpu, win_fft_gpu, win_plan_forward) # Convolve out_fft = linalg.multiply(sig_fft_gpu, win_fft_gpu, overwrite=True) linalg.scale(2.0, out_fft) # Plan inverse out_gpu = gpuarray.zeros_like(out_fft) plan_inverse = cu_fft.Plan(out_fft.shape, self.precision['complex'], self.precision['complex']) cu_fft.ifft(out_fft, out_gpu, plan_inverse, True) out_np = np.zeros(len(out_gpu), self.precision['complex']) out_gpu.get(out_np) context.pop() return out_np
class DDFacetApp(BashShellApp): DDF_CMD = 'DDF.py' compontent_meta = dlg_component( 'DDFacetApp', 'Faceting for direction-dependent spectral deconvolution', [dlg_batch_input('binary/*', [])], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) data_ms = dlg_string_param('Data-MS', None) data_colname = dlg_string_param('Data-ColName', "CORRECTED_DATA") data_chunkhours = dlg_int_param('Data-ChunkHours', 0.0) def initialize(self, **kwargs): self.command = 'dummy' super(DDFacetApp, self).initialize(**kwargs) def run(self): self.command = '{0} ' \ '--Data-MS={1}' \ '--Data-ColName={2} ' \ '--Data-ChunkHours={3}'.format(self.DDF_CMD, self.data_ms, self.data_colname, self.data_chunkhours) self._run_bash(self._inputs, self._outputs)
class LP_WindowGenerator(BarrierAppDROP): component_meta = dlg_component('LPWindowGen', 'Low-pass filter example window generator', [None], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) # default values length = dlg_int_param('length', 256) cutoff = dlg_int_param('cutoff', 600) srate = dlg_int_param('sample_rate', 5000) series = None def initialize(self, **kwargs): super(LP_WindowGenerator, self).initialize(**kwargs) def sinc(self, x_val: np.float64): """ Computes the sin_c value for the input float :param x_val: """ if np.isclose(x_val, 0.0): return 1.0 return np.sin(np.pi * x_val) / (np.pi * x_val) def gen_win(self): alpha = 2 * self.cutoff / self.srate win = np.zeros(self.length, dtype=np.float64) for i in range(int(self.length)): ham = 0.54 - 0.46 * np.cos( 2 * np.pi * i / int(self.length)) # Hamming coefficient hsupp = (i - int(self.length) / 2) win[i] = ham * alpha * self.sinc(alpha * hsupp) return win def run(self): outs = self.outputs if len(outs) < 1: raise Exception('At least one output required for %r' % self) self.series = self.gen_win() data = self.series.tostring() for o in outs: o.len = len(data) o.write(data) """ def generate_reproduce_data(self): return dict(data_hash=common_hash(self.series)) """ def generate_recompute_data(self): output = dict() output['length'] = self.length output['cutoff'] = self.cutoff output['sample_rate'] = self.srate output['status'] = self.status output['system'] = system_summary() return output
class NullBarrierApp(BarrierAppDROP): component_meta = dlg_component( "NullBarrierApp", "Null Barrier.", [dlg_batch_input("binary/*", [])], [dlg_batch_output("binary/*", [])], [dlg_streaming_input("binary/*")], ) """A BarrierAppDrop that doesn't perform any work""" def run(self): pass
class CallLeap(BarrierAppDROP): """A BarrierAppDrop that reads a config file, generates a command line for the LeapAccelerateCLI application, and then executes the application""" compontent_meta = dlg_component('Call Leap', 'Call Leap.', [dlg_batch_input('binary/*', [])], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) # TODO: this measurementSetFilename is not being read by dlg_string_param # hard-coding it for the moment measurementSetFilename = "/Users/james/working/leap-accelerate/testdata/1197638568-split.ms" DEBUG = True def initialize(self, **kwargs): super(CallLeap, self).initialize(**kwargs) def run(self): # check number of inputs and outputs if len(self.outputs) != 1: raise Exception("One output is expected by this application") if len(self.inputs) != 1: raise Exception("One input is expected by this application") # check that measurement set DIRECTORY exists if not os.path.isdir(self.measurementSetFilename): raise Exception("Could not find measurement set directory:" + self.measurementSetFilename) # read config from input config = self._readConfig(self.inputs[0]) # build command line commandLine = [ 'LeapAccelerateCLI', '-f', self.measurementSetFilename, '-s', str(config['numStations']), '-d', str(config['directions']), '-a', str(config['autoCorrelation']) ] if self.DEBUG: time.sleep(random.uniform(5,10)) self.outputs[0].write(json.dumps(commandLine)) else: # call leap result = subprocess.run(commandLine, stdout=subprocess.PIPE, stderr=subprocess.PIPE) self.outputs[0].write(result.stdout) def _readConfig(self, inDrop): with DROPFile(inDrop) as f: config = json.load(f) return config
class LP_filter_pointwise_np(LP_filter_fft_np): component_meta = dlg_component( 'LP_filter_pointwise_np', 'Filters a signal with a provided window using cuda', [dlg_batch_input('binary/*', [])], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) def initialize(self, **kwargs): super(LP_filter_pointwise_np, self).initialize(**kwargs) def filter(self): return np.convolve(self.series[0], self.series[1], mode='full').astype(self.precision['complex'])
class ListAppendThrashingApp(BarrierAppDROP): """ A BarrierAppDrop that appends random integers to a list N times. It does not require any inputs and writes the generated array to all of its outputs. Keywords: size: int, number of array elements """ compontent_meta = dlg_component( "ListAppendThrashingApp", "List Append Thrashing", [dlg_batch_input("binary/*", [])], [dlg_batch_output("binary/*", [])], [dlg_streaming_input("binary/*")], ) def initialize(self, **kwargs): self.size = self._getArg(kwargs, "size", 100) self.marray = [] super(ListAppendThrashingApp, self).initialize(**kwargs) def run(self): # At least one output should have been added outs = self.outputs if len(outs) < 1: raise Exception( "At least one output should have been added to %r" % self) self.marray = self.generateArray() for o in outs: d = pickle.dumps(self.marray) o.len = len(d) o.write(pickle.dumps(self.marray)) def generateArray(self): # This operation is wasteful to simulate an N^2 operation. marray = [] for _ in range(int(self.size)): marray = [] for i in range(int(self.size)): marray.append(random.random()) return marray def _getArray(self): return self.marray
class SleepApp(BarrierAppDROP): """A BarrierAppDrop that sleeps the specified amount of time (0 by default)""" component_meta = dlg_component( "SleepApp", "Sleep App.", [dlg_batch_input("binary/*", [])], [dlg_batch_output("binary/*", [])], [dlg_streaming_input("binary/*")], ) sleepTime = dlg_float_param("sleep time", 0) def initialize(self, **kwargs): super(SleepApp, self).initialize(**kwargs) def run(self): time.sleep(self.sleepTime)
class GenericNpyScatterApp(BarrierAppDROP): """ An APP that splits an object that has a len attribute into <num_of_copies> parts and returns a numpy array of arrays. """ component_meta = dlg_component( "GenericNpyScatterApp", "Scatter an array like object into <num_of_copies> parts", [dlg_batch_input("binary/*", [])], [dlg_batch_output("binary/*", [])], [dlg_streaming_input("binary/*")], ) # automatically populated by scatter node num_of_copies: int = dlg_int_param("num_of_copies", 1) scatter_axes: List[int] = dlg_list_param("scatter_axes", "[0]") def run(self): if len(self.inputs) * self.num_of_copies != len(self.outputs): raise DaliugeException( f"expected {len(self.inputs) * self.num_of_copies} outputs,\ got {len(self.outputs)}") if len(self.inputs) != len(self.scatter_axes): raise DaliugeException(f"expected {len(self.inputs)} axes,\ got {len(self.scatter_axes)}, {self.scatter_axes}") # split it as many times as we have outputs self.num_of_copies = self.num_of_copies for in_index in range(len(self.inputs)): nObj = droputils.load_numpy(self.inputs[in_index]) try: result = np.array_split(nObj, self.num_of_copies, axis=self.scatter_axes[in_index]) except IndexError as err: raise err for split_index in range(self.num_of_copies): out_index = in_index * self.num_of_copies + split_index droputils.save_numpy(self.outputs[out_index], result[split_index])
class HelloWorldApp(BarrierAppDROP): """ An App that writes 'Hello World!' or 'Hello <greet>!' to all of its outputs. Keywords: greet: string, [World], whom to greet. """ component_meta = dlg_component( "HelloWorldApp", "Hello World App.", [dlg_batch_input("binary/*", [])], [dlg_batch_output("binary/*", [])], [dlg_streaming_input("binary/*")], ) greet = dlg_string_param("greet", "World") def run(self): ins = self.inputs # if no inputs use the parameter else use the input if len(ins) == 0: self.greeting = "Hello %s" % self.greet elif len(ins) != 1: raise Exception("Only one input expected for %r" % self) else: # the input is expected to be a vector. We'll use the first element try: phrase = str( pickle.loads(droputils.allDropContents(ins[0]))[0]) except _pickle.UnpicklingError: phrase = str(droputils.allDropContents(ins[0]), encoding="utf-8") self.greeting = f"Hello {phrase}" outs = self.outputs if len(outs) < 1: raise Exception( "At least one output should have been added to %r" % self) for o in outs: o.len = len(self.greeting.encode()) o.write(self.greeting.encode()) # greet across all outputs
class GenericScatterApp(BarrierAppDROP): """ An APP that splits an object that has a len attribute into <numSplit> parts and returns a numpy array of arrays, where the first axis is of length <numSplit>. """ component_meta = dlg_component( "GenericScatterApp", "Scatter an array like object into numSplit parts", [dlg_batch_input("binary/*", [])], [dlg_batch_output("binary/*", [])], [dlg_streaming_input("binary/*")], ) # automatically populated by scatter node num_of_copies: int = dlg_int_param("num_of_copies", 1) def initialize(self, **kwargs): super(GenericScatterApp, self).initialize(**kwargs) def run(self): numSplit = self.num_of_copies cont = droputils.allDropContents(self.inputs[0]) # if the data is of type string it is not pickled, but stored as a binary string. try: inpArray = pickle.loads(cont) except: inpArray = cont.decode() try: # just checking whether the object is some object that can be used as an array nObj = np.array(inpArray) except: raise try: result = np.array_split(nObj, numSplit) except IndexError as err: raise err for i in range(numSplit): o = self.outputs[i] d = pickle.dumps(result[i]) o.len = len(d) o.write(d) # average across inputs
class LeapGather(BarrierAppDROP): """A BarrierAppDrop that gathers output from multiple instances of the LeapAccelerateCLI application, sorts it, and outputs it""" compontent_meta = dlg_component('Leap Gather', 'Leap Gather.', [dlg_batch_input('binary/*', [])], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) def initialize(self, **kwargs): super(LeapGather, self).initialize(**kwargs) def run(self): # check number of outputs if len(self.outputs) != 1: raise Exception("One output is expected by this application") # read from all inputs inputs = [] for i in range(len(self.inputs)): with DROPFile(self.inputs[i]) as f: file_data = f.read() inputs.append(json.loads(file_data)) # write to output self.outputs[0].write(json.dumps(inputs))
class ScpApp(BarrierAppDROP): """ A BarrierAppDROP that copies the content of its single input onto its single output via SSH's scp protocol. Because of the nature of the scp protocol, the input and output DROPs of this application must both be filesystem-based; i.e., they must be an instance of FileDROP or of DirectoryContainer. Depending on the physical location of each DROP (this application, and its input and outputs) this application will copy data FROM another host or TO other host. This application's node must thus coincide with one of the two I/O DROPs. """ component_meta = dlg_component( "ScpApp", "A BarrierAppDROP that copies the content of its single " "input onto its single output via SSHs scp protocol.", [ dlg_batch_input( "binary/*", [ NgasDROP, InMemoryDROP, SharedMemoryDROP, NullDROP, RDBMSDrop, ContainerDROP, ], ) ], [ dlg_batch_output( "binary/*", [ NgasDROP, InMemoryDROP, SharedMemoryDROP, NullDROP, RDBMSDrop, ContainerDROP, ], ) ], [dlg_streaming_input("binary/*")], ) remoteUser = dlg_string_param("remoteUser", None) pkeyPath = dlg_string_param("pkeyPath", None) timeout = dlg_float_param("timeout", None) def initialize(self, **kwargs): BarrierAppDROP.initialize(self, **kwargs) def run(self): # Check inputs/outputs are of a valid type for i in self.inputs + self.outputs: # The current only way to check if we are handling a FileDROP # or a DirectoryContainer is by checking if they have a `path` # attribute. Calling `isinstance(i, (FileDROP, DirectoryContainer))` # doesn't work because the input/output might be a proxy object # that fails the test if not hasattr(i, "path"): raise Exception("%r is not supported by the ScpApp" % (i)) # Only one input and one output are supported if len(self.inputs) != 1: raise Exception( "Only one input is supported by the ScpApp, %d given" % (len(self.inputs))) if len(self.outputs) != 1: raise Exception( "Only one output is supported by the ScpApp, %d given" % (len(self.outputs))) inp = self.inputs[0] out = self.outputs[0] # Input and output must be of the same type # See comment above regarding identification of DROP types, and why we # can't simply do: # if inp.__class__ != out.__class__: if hasattr(inp, "children") != hasattr(out, "children"): raise Exception("Input and output must be of the same type") # This app's location must be equal to at least one of the I/O if self.node != inp.node and self.node != out.node: raise Exception( "%r is deployed in a node different from its input AND its output" % (self, )) # See comment above regarding identification of File/Directory DROPs and # why we can't simply do: # recursive = isinstance(inp, DirectoryContainer) recursive = hasattr(inp, "children") if self.node == inp.node: copyTo( out.node, inp.path, remotePath=out.path, recursive=recursive, username=self.remoteUser, pkeyPath=self.pkeyPath, timeout=self.timeout, ) else: copyFrom( inp.node, inp.path, localPath=out.path, recursive=recursive, username=self.remoteUser, pkeyPath=self.pkeyPath, timeout=self.timeout, )
class AverageArraysApp(BarrierAppDROP): """ A BarrierAppDrop that averages arrays received on input. It requires multiple inputs and writes the generated average vector to all of its outputs. The input arrays are assumed to have the same number of elements and the output array will also have that same number of elements. Keywords: method: string <['mean']|'median'>, use mean or median as method. """ from numpy import mean, median component_meta = dlg_component( "AverageArraysApp", "Average Array App.", [dlg_batch_input("binary/*", [])], [dlg_batch_output("binary/*", [])], [dlg_streaming_input("binary/*")], ) # default values methods = ["mean", "median"] method = dlg_string_param("method", methods[0]) def __init__(self, oid, uid, **kwargs): super().__init__(oid, kwargs) self.marray = [] def initialize(self, **kwargs): super().initialize(**kwargs) def run(self): # At least one output should have been added outs = self.outputs if len(outs) < 1: raise Exception( "At least one output should have been added to %r" % self) self.getInputArrays() self._avg = self.averageArray() for o in outs: d = pickle.dumps(self._avg) o.len = len(d) o.write(d) # average across inputs def getInputArrays(self): """ Create the input array from all inputs received. Shape is (<#inputs>, <#elements>), where #elements is the length of the vector received from one input. """ ins = self.inputs if len(ins) < 1: raise Exception("At least one input should have been added to %r" % self) marray = [] for inp in ins: sarray = droputils.allDropContents(inp) if len(sarray) == 0: print(f"Input does not contain data!") else: sarray = pickle.loads(sarray) if isinstance(sarray, (list, tuple, np.ndarray)): marray.extend(list(sarray)) else: marray.append(sarray) self.marray = marray def averageArray(self): method_to_call = getattr(np, self.method) return method_to_call(self.marray, axis=0)
class ProduceConfig(BarrierAppDROP): """A BarrierAppDrop that produces multiple config files suitable for the CallLeap BarrierAppDrop""" compontent_meta = dlg_component('ProduceConfig', 'Produce Config.', [dlg_batch_input('binary/*', [])], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) # read component parameters numStations = dlg_int_param('number of stations', 1) implementation = dlg_string_param('implementation', 'cpu') autoCorrelation = dlg_bool_param('auto correlation', False) maxDirections = dlg_int_param('max directions', 1) def initialize(self, **kwargs): super(ProduceConfig, self).initialize(**kwargs) def run(self): # check number of inputs and outputs if len(self.inputs) != 1: raise Exception("One input is expected by this application") # read directions from input 0 directions = self._readDirections(self.inputs[0]) # determine number of directions per instance numDirectionsPerInstance = float(len(directions)) / float(len(self.outputs)) numDirectionsPerInstance = min(numDirectionsPerInstance, self.maxDirections) startDirectionIndex = 0 endDirectionIndex = 0 # split directions for i in range(len(self.outputs)): endDirectionIndex = int(math.floor((i+1)*numDirectionsPerInstance)) # split directions partDirections = directions[startDirectionIndex:endDirectionIndex] # build config configJSON = self._createConfig(self.numStations, partDirections, self.implementation, self.autoCorrelation) # stringify config config = json.dumps(configJSON) # write config to output if type(config) is str: config = config.encode() self.outputs[i].write(config) # continue from here in the next iteration startDirectionIndex = endDirectionIndex def _readDirections(self, inDrop): directions = [] # NOTE: it appears csv.reader() can't use the DROPFile(inDrop) directly, # since DROPFile is not a iterator. Instead, we read the whole # inDrop to a string and pass that to csv.reader() with DROPFile(inDrop) as f: file_data = f.read() if type(file_data) is bytes: file_data=file_data.decode('utf-8') csvreader = csv.reader(file_data.split('\n')) for row in csvreader: # skip rows with incorrect number of values if len(row) is not 2: continue x = float(row[0]) y = float(row[1]) directions.append([x,y]) return directions def _createConfig(self, numStations, directions, implementation, autoCorrelation): return { 'stations': numStations, 'directions': directions, 'computeImplementation': implementation, 'readAutoCorrelations': autoCorrelation }
class PyFuncApp(BarrierAppDROP): """ An application that wraps a simple python function. The inputs of the application are treated as the arguments of the function. Conversely, the output of the function is treated as the output of the application. If the application has more than one output, the result of calling the function is treated as an iterable, with each individual object being written to its corresponding output. Users indicate the function to be wrapped via the ``func_name`` parameter. In this case func_name needs to specify a funtion in the standard form ``module.function`` and the module needs to be accessible on the PYTHONPATH of the DALiuGE engine. Note that the engine is expanding the standard PYTHONPATH with DLG_ROOT/code. That directory is always available, even if the engine is running in a docker container. Otherwise, users can also *send* over the python code using the ``func_code`` parameter. The code needs to be base64-encoded and produced with the marshal module of the same Python version used to run DALiuGE. Both inputs and outputs are (de-)serialized using the pickle protocol if the value of the respective boolean component parameter is set to True. This is also applied to func_defaults and func_arg_mappings. In addition to the input mapping the implementation also allows to set defaults both in the function itself and in a logical graph. If set in the logical graph using the func_defaults parameter, the defaults need to be specified as a dictionary of the form ``{"kwargs":{"kw1_name":kw1_value, "kw2_name":kw2_value}, "args":[arg1, arg2]}`` The positional onlyargs will be used in order of appearance. """ component_meta = dlg_component( "PyFuncApp", "Py Func App.", [dlg_batch_input("binary/*", [])], [dlg_batch_output("binary/*", [])], [dlg_streaming_input("binary/*")], ) func_name = dlg_string_param("func_name", None) # func_code = dlg_bytes_param("func_code", None) # bytes or base64 string input_parser: DropParser = dlg_enum_param( DropParser, "input_parser", DropParser.PICKLE) # type: ignore output_parser: DropParser = dlg_enum_param( DropParser, "output_parser", DropParser.PICKLE) # type: ignore func_arg_mapping = dlg_dict_param("func_arg_mapping", {}) func_defaults = dlg_dict_param("func_defaults", {}) f: Callable fdefaults: dict def _init_func_defaults(self): """ Inititalize self.func_defaults dictionary from values provided. Multiple options exist and some are here for compatibility. """ logger.debug( f"Starting evaluation of func_defaults: {self.func_defaults}") if (isinstance(self.func_defaults, dict) and len(self.func_defaults) > 0 and list(self.func_defaults.keys()) == ["kwargs", "args"]): # we bring everything back to just kwargs, because positional args are messy # NOTE: This means that positional ONLY arguments won't work, but those are not used # too often. for arg in self.func_defaults["args"]: self.func_defaults["kwargs"][arg] = arg self.func_defaults = self.func_defaults["kwargs"] elif (isinstance(self.func_defaults, dict) and "kwargs" in self.func_defaults and isinstance(self.func_defaults["kwargs"], dict)): self.func_defaults = self.func_defaults["kwargs"] # we came all this way, now assume that any resulting dict is correct if not isinstance(self.func_defaults, dict): logger.error( "Wrong format or type for function defaults for %s: %r, %r", self.f.__name__, self.func_defaults, type(self.func_defaults)) raise ValueError if self.input_parser is DropParser.PICKLE: # only values are pickled, get them unpickled for name, value in self.func_defaults.items(): self.func_defaults[name] = deserialize_data(value) # set the function defaults from introspection if self.arguments: self.fn_npos = len(self.arguments.args) - self.fn_ndef self.fn_defaults = { name: None for name in self.arguments.args[:self.fn_npos] } logger.debug(f"initialized fn_defaults with {self.fn_defaults}") # deal with args and kwargs kwargs = (dict( zip(self.arguments.args[self.fn_npos:], self.arguments.defaults)) if self.arguments.defaults else {}) self.fn_defaults.update(kwargs) logger.debug(f"fn_defaults updated with {kwargs}") # deal with kwonlyargs if self.arguments.kwonlydefaults: kwonlyargs = dict( zip(self.arguments.kwonlyargs, self.arguments.kwonlydefaults)) self.fn_defaults.update(kwonlyargs) logger.debug(f"fn_defaults updated with {kwonlyargs}") self.fn_posargs = self.arguments.args[:self. fn_npos] # positional arg names def initialize(self, **kwargs): """ The initialization of a function component is mainly dealing with mapping inputs and provided applicationArgs to the function arguments. All of this should be driven by matching names, but currently that is not being done. """ BarrierAppDROP.initialize(self, **kwargs) self._applicationArgs = self._getArg(kwargs, "applicationArgs", {}) self.func_code = self._getArg(kwargs, "func_code", None) # check for function definition arguments in applicationArgs self.func_def_keywords = [ "func_code", "func_name", "func_arg_mapping", "input_parser", "output_parser", "func_defaults", "pickle", ] # backwards compatibility if "pickle" in self._applicationArgs: if self._applicationArgs["pickle"]["value"] == True: self.input_parser = DropParser.PICKLE self.output_parser = DropParser.PICKLE else: self.input_parser = DropParser.EVAL self.output_parser = DropParser.EVAL self._applicationArgs.pop("pickle") for kw in self.func_def_keywords: if kw in self._applicationArgs: # these are the preferred ones now if isinstance( self._applicationArgs[kw]["value"], bool or self._applicationArgs[kw]["value"] or self._applicationArgs[kw]["precious"]): # only transfer if there is a value or precious is True self._applicationArgs.pop(kw) self.num_args = len( self._applicationArgs) # number of additional arguments provided if not self.func_name and not self.func_code: raise InvalidDropException( self, "No function specified (either via name or code)") # Lookup function or import bytecode as a function if not self.func_code: self.f = import_using_name(self, self.func_name) else: if not isinstance(self.func_code, bytes): self.func_code = base64.b64decode( self.func_code.encode("utf8")) self.f = import_using_code(self.func_code) # make sure defaults are dicts if isinstance(self.func_defaults, str): self.func_defaults = ast.literal_eval(self.func_defaults) if isinstance(self.func_arg_mapping, str): self.func_arg_mapping = ast.literal_eval(self.func_arg_mapping) self.arguments = inspect.getfullargspec(self.f) logger.debug(f"Function inspection revealed {self.arguments}") # we don't want to mess with the 'self' argument if self.arguments.args.count('self'): self.arguments.args.remove('self') self.fn_nargs = len(self.arguments.args) self.fn_ndef = len( self.arguments.defaults) if self.arguments.defaults else 0 self._init_func_defaults() logger.info(f"Args summary for '{self.func_name}':") logger.info(f"Args: {self.arguments.args}") logger.info(f"Args defaults: {self.arguments.defaults}") logger.info(f"Args positional: {self.arguments.args[:self.fn_npos]}") logger.info(f"Args keyword: {self.arguments.args[self.fn_npos:]}") logger.info(f"Args supplied: {self.func_defaults}") logger.info(f"VarArgs allowed: {self.arguments.varargs}") logger.info(f"VarKwds allowed: {self.arguments.varkw}") # Mapping between argument name and input drop uids logger.debug(f"Input mapping: {self.func_arg_mapping}") self._recompute_data = {} def run(self): """ Function positional and keyword argument treatment: Function arguments can be provided in four different ways: 1) Through an input port 2) By specifying ApplicationArgs (one for each argument) 3) By specifying a func_defaults dictionary in the ComponentParameters 4) Through defaults at the time of function definition The priority follows the list above with input ports overruling the others. Function arguments in Python can be passed as positional, kw-value, positional only, kw-value only, and catch-all args and kwargs, which don't provide any hint about the names of accepted parameters. All of them are now supported. If positional arguments or kw-value arguments are provided by the user, but are not explicitely defined in the function signiture AND args and/or kwargs are allowed then these arguments are passed to the function. For args this is somewhat risky, since the order is relevant and in this code derived from the order defined in the graph (same order as defined in the component description). Input ports will NOT be used by order (anymore), but by the IdText (name field in EAGLE) of the port. Since each input port requires an associated data drop, this provides a unique mapping. This also allows to pass values to any function argument through a port. Function argument values as well as the function code can be provided in serialised (pickle) form by setting the 'pickle' flag. Note that this flag is valid for all arguments and the code (if specified) in a global way. """ # Inputs are un-pickled and treated as the arguments of the function # Their order must be preserved, so we use an OrderedDict if self.input_parser is DropParser.PICKLE: #all_contents = lambda x: pickle.loads(droputils.allDropContents(x)) all_contents = droputils.load_pickle elif self.input_parser is DropParser.EVAL: def optionalEval(x): # Null and Empty Drops will return an empty byte string # which should propogate back to None content: str = droputils.allDropContents(x).decode('utf-8') return ast.literal_eval(content) if len(content) > 0 else None all_contents = optionalEval elif self.input_parser is DropParser.NPY: all_contents = droputils.load_npy elif self.input_parser is DropParser.PATH: all_contents = lambda x: x.path elif self.input_parser is DropParser.DATAURL: all_contents = lambda x: x.dataurl else: raise ValueError(self.input_parser.__repr__()) inputs = collections.OrderedDict() for uid, drop in self._inputs.items(): inputs[uid] = all_contents(drop) outputs = collections.OrderedDict() for uid, drop in self._outputs.items(): outputs[uid] = all_contents( drop) if self.output_parser is DropParser.PATH else None # Keyword arguments are made up of the default values plus the inputs # that match one of the keyword argument names # if defaults dict has not been specified at all we'll go ahead anyway n_args = len(self.func_defaults) argnames = self.arguments.args # use explicit mapping of inputs to arguments first # TODO: Required by dlg_delayed?? Else, we should really not do this. kwargs = { name: inputs.pop(uid) for name, uid in self.func_arg_mapping.items() if name in self.func_defaults or name not in argnames } logger.debug(f"updating funcargs with {kwargs}") funcargs = kwargs # Fill arguments with rest of inputs logger.debug(f"available inputs: {inputs}") # if we have named ports use the inputs with # the correct UIDs logger.debug(f"Parameters found: {self.parameters}") posargs = self.arguments.args[:self.fn_npos] kwargs = {} pargs = [] # Initialize pargs dictionary and update with provided argument values pargsDict = collections.OrderedDict(zip(posargs, [None] * len(posargs))) if "applicationArgs" in self.parameters: appArgs = self.parameters[ "applicationArgs"] # we'll pop the identified ones _dum = [ appArgs.pop(k) for k in self.func_def_keywords if k in appArgs ] logger.debug("Identified keyword arguments removed: %s", [i['text'] for i in _dum]) pargsDict.update({ k: self.parameters[k] for k in pargsDict if k in self.parameters }) # if defined in both we use AppArgs values pargsDict.update( {k: appArgs[k]['value'] for k in pargsDict if k in appArgs}) logger.debug("Initial posargs dictionary: %s", pargsDict) else: appArgs = {} if ('inputs' in self.parameters and droputils.check_ports_dict(self.parameters['inputs'])): check_len = min(len(inputs), self.fn_nargs + len(self.arguments.kwonlyargs)) inputs_dict = collections.OrderedDict() for inport in self.parameters['inputs']: key = list(inport.keys())[0] inputs_dict[key] = {'name': inport[key], 'path': inputs[key]} kwargs.update( droputils.identify_named_ports(inputs_dict, posargs, pargsDict, appArgs, check_len=check_len, mode="inputs")) else: for i in range(min(len(inputs), self.fn_nargs)): kwargs.update( {self.arguments.args[i]: list(inputs.values())[i]}) logger.debug(f"Updating funcargs with input ports {kwargs}") funcargs.update(kwargs) if ('outputs' in self.parameters and droputils.check_ports_dict(self.parameters['outputs'])): check_len = min(len(outputs), self.fn_nargs + len(self.arguments.kwonlyargs)) outputs_dict = collections.OrderedDict() for outport in self.parameters['outputs']: key = list(outport.keys())[0] outputs_dict[key] = { 'name': outport[key], 'path': outputs[key] } kwargs.update( droputils.identify_named_ports(outputs_dict, posargs, pargsDict, appArgs, check_len=check_len, mode="outputs")) # Try to get values for still missing positional arguments from Application Args if "applicationArgs" in self.parameters: for pa in posargs: if pa != 'self' and pa not in funcargs: if pa in appArgs: arg = appArgs.pop(pa) value = arg['value'] ptype = arg['type'] if ptype in ["Complex", "Json"]: try: value = ast.literal_eval(value) except Exception as e: # just go on if this did not work logger.warning("Eval raised an error: %s", e) elif ptype in ["Python"]: try: import numpy value = eval(value, {"numpy": numpy}, {}) except: pass pargsDict.update({pa: value}) elif pa != 'self' and pa not in pargsDict: logger.warning( f"Required positional argument '{pa}' not found!") _dum = [appArgs.pop(k) for k in pargsDict if k in appArgs] logger.debug("Identified positional arguments removed: %s", [i['text'] for i in _dum]) logger.debug(f"updating posargs with {list(pargsDict.keys())}") pargs.extend(list(pargsDict.values())) # Try to get values for still missing kwargs arguments from Application kws kwargs = {} kws = self.arguments.args[self.fn_npos:] for ka in kws: if ka not in funcargs: if ka in appArgs: arg = appArgs.pop(ka) value = arg['value'] ptype = arg['type'] if ptype in ["Complex", "Json"]: try: value = ast.literal_eval(value) except: pass kwargs.update({ka: value}) else: logger.warning(f"Keyword argument '{ka}' not found!") logger.debug(f"updating funcargs with {kwargs}") funcargs.update(kwargs) # deal with kwonlyargs kwargs = {} kws = self.arguments.kwonlyargs for ka in kws: if ka not in funcargs: if ka in appArgs: arg = appArgs.pop(ka) value = arg['value'] ptype = arg['type'] if ptype in ["Complex", "Json"]: try: value = ast.literal_eval(value) except: pass kwargs.update({ka: value}) else: logger.warning( f"Keyword only argument '{ka}' not found!") logger.debug(f"updating funcargs with kwonlyargs: {kwargs}") funcargs.update(kwargs) # any remaining application arguments will be used for vargs and vkwargs vparg = [] vkarg = {} logger.debug(f"Remaining AppArguments {appArgs}") for arg in appArgs: if appArgs[arg]['type'] in ['Json', 'Complex']: value = ast.literal_eval(appArgs[arg]['value']) else: value = appArgs[arg]['value'] if appArgs[arg]['positional']: vparg.append(value) else: vkarg.update({arg: value}) if self.arguments.varargs: pargs.extend(vparg) if self.arguments.varkw: funcargs.update(vkarg) # Fill rest with default arguments if there are any more kwargs = {} for kw in self.func_defaults.keys(): value = self.func_defaults[kw] if kw not in funcargs: kwargs.update({kw: value}) logger.debug(f"updating funcargs with {kwargs}") funcargs.update(kwargs) self._recompute_data["args"] = funcargs.copy() logger.debug(f"Running {self.func_name} with *{pargs} **{funcargs}") # we capture and log whatever is produced on STDOUT capture = StringIO() with redirect_stdout(capture): result = self.f(*pargs, **funcargs) logger.info( f"Captured output from function app '{self.func_name}': {capture.getvalue()}" ) logger.debug(f"Finished execution of {self.func_name}.") # Depending on how many outputs we have we treat our result # as an iterable or as a single object. Each result is pickled # and written to its corresponding output self.write_results(result) def write_results(self, result): outputs = self.outputs if len(outputs) > 0: if len(outputs) == 1: result = [result] for r, o in zip(result, outputs): if self.output_parser is DropParser.PICKLE: logger.debug(f"Writing pickeled result {type(r)} to {o}") o.write(pickle.dumps(r)) elif self.output_parser is DropParser.EVAL: o.write(repr(r).encode('utf-8')) elif self.output_parser is DropParser.NPY: droputils.save_npy(o, r) else: ValueError(self.output_parser.__repr__()) def generate_recompute_data(self): for name, val in self._recompute_data.items(): try: json.dumps(val) except TypeError as e: logger.debug(e) self._recompute_data[name] = repr(val) return self._recompute_data
class LP_AddNoise(BarrierAppDROP): component_meta = dlg_component( 'LPAddNoise', 'Adds noise to a signal generated for the low-pass filter example', [dlg_batch_input('binary/*', [])], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) # default values mean = dlg_float_param('avg_noise', 0.0) std = dlg_float_param('std_deviation', 1.0) freq = dlg_int_param('frequency', 1200) srate = dlg_int_param('sample_rate', 5000) seed = dlg_int_param('random_seed', 42) alpha = dlg_float_param('noise_multiplier', 0.1) signal = np.empty([1]) def initialize(self, **kwargs): super(LP_AddNoise).initialize(**kwargs) def add_noise(self): np.random.seed(self.seed) samples = self.alpha * np.random.normal( self.mean, self.std, size=len(self.signal)) for i in range(len(self.signal)): samples[i] += np.sin(2 * np.pi * i * self.freq / self.srate) np.add(self.signal, samples, out=self.signal) return self.signal def getInputArrays(self): ins = self.inputs if len(ins) != 1: raise Exception('Precisely one input required for %r' % self) array = np.fromstring(droputils.allDropContents(ins[0])) self.signal = np.frombuffer(array) def run(self): outs = self.outputs if len(outs) < 1: raise Exception('At least one output required for %r' % self) self.getInputArrays() sig = self.add_noise() data = sig.tobytes() for o in outs: o.len = len(data) o.write(data) """ def generate_reproduce_data(self): return {'data_hash', common_hash(self.signal)} """ def generate_recompute_data(self): return { 'mean': self.mean, 'std': self.std, 'sample_rate': self.srate, 'seed': self.seed, 'alpha': self.alpha, 'system': system_summary(), 'status': self.status }
class LP_SignalGenerator(BarrierAppDROP): component_meta = dlg_component('LPSignalGen', 'Low-pass filter example signal generator', [None], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) # default values length = dlg_int_param('length', 256) srate = dlg_int_param('sample rate', 5000) freqs = dlg_list_param('frequencies', [440, 800, 1000, 2000]) noise = dlg_list_param('noise', []) series = None def initialize(self, **kwargs): super(LP_SignalGenerator, self).initialize(**kwargs) def add_noise(self, series: np.array, mean, std, freq, sample_rate, seed, alpha=0.1): """ A noise to the provided signal by producing random values of a given frequency :param series: The input (and output) numpy array signal series :param mean: The average value :param std: The standard deviation of the value :param freq: The frequency of the noisy signal :param sample_rate: The sample rate of the input series :param seed: The random seed :param alpha: The multiplier :return: The input series with noisy values added """ np.random.seed(seed) samples = alpha * np.random.normal(mean, std, size=len(series)) for i in range(len(series)): samples[i] += np.sin(2 * np.pi * i * freq / sample_rate) np.add(series, samples, out=series) return series def gen_sig(self): series = np.zeros(self.length, dtype=np.float64) for freq in self.freqs: for i in range(self.length): series[i] += np.sin(2 * np.pi * i * freq / self.srate) return series def run(self): outs = self.outputs if len(outs) < 1: raise Exception('At least one output required for %r' % self) self.series = self.gen_sig() if len(self.noise) > 0: self.noise[0] = 1 / self.noise[0] self.series = self.add_noise(self.series, self.noise[2], self.noise[4], self.noise[1], self.srate, self.noise[3], self.noise[0]) data = self.series.tostring() for o in outs: o.len = len(data) o.write(data) """ def generate_reproduce_data(self): # This will do for now return {'data_hash': common_hash(self.series)} """ def generate_recompute_data(self): # This will do for now return { 'length': self.length, 'sample_rate': self.srate, 'frequencies': self.freqs, 'status': self.status, 'system': system_summary() }
class LP_filter_fft_np(BarrierAppDROP): component_meta = dlg_component( 'LP_filter_np', 'Filters a signal with a provided window using numpy', [dlg_batch_input('binary/*', [])], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) PRECISIONS = { 'double': { 'float': np.float64, 'complex': np.complex128 }, 'single': { 'float': np.float32, 'complex': np.complex64 } } precision = {} # default values double_prec = dlg_bool_param('doublePrec', True) series = [] output = np.zeros([1]) def initialize(self, **kwargs): super(LP_filter_fft_np, self).initialize(**kwargs) if self.double_prec: self.precision = self.PRECISIONS['double'] else: self.precision = self.PRECISIONS['single'] def getInputArrays(self): ins = self.inputs if len(ins) != 2: raise Exception('Precisely two input required for %r' % self) array = [np.fromstring(droputils.allDropContents(inp)) for inp in ins] self.series = array def filter(self): signal = self.series[0] window = self.series[1] nfft = determine_size(len(signal) + len(window) - 1) print(nfft) sig_zero_pad = np.zeros(nfft, dtype=self.precision['float']) win_zero_pad = np.zeros(nfft, dtype=self.precision['float']) sig_zero_pad[0:len(signal)] = signal win_zero_pad[0:len(window)] = window sig_fft = np.fft.fft(sig_zero_pad) win_fft = np.fft.fft(win_zero_pad) out_fft = np.multiply(sig_fft, win_fft) out = np.fft.ifft(out_fft) return out.astype(self.precision['complex']) def run(self): outs = self.outputs if len(outs) < 1: raise Exception('At least one output required for %r' % self) self.getInputArrays() self.output = self.filter() data = self.output.tostring() for o in outs: o.len = len(data) o.write(data) def generate_recompute_data(self): return { 'precision_float': str(self.precision['float']), 'precision_complex': str(self.precision['complex']), 'system': system_summary(), 'status': self.status } """
class RandomArrayApp(BarrierAppDROP): """ A BarrierAppDrop that generates an array of random numbers. It does not require any inputs and writes the generated array to all of its outputs. Keywords: integer: bool [True], generate integer array low: float, lower boundary (will be converted to int for integer arrays) high: float, upper boundary (will be converted to int for integer arrays) size: int, number of array elements """ component_meta = dlg_component( "RandomArrayApp", "Random Array App.", [dlg_batch_input("binary/*", [])], [dlg_batch_output("binary/*", [])], [dlg_streaming_input("binary/*")], ) # default values integer = dlg_bool_param("integer", True) low = dlg_float_param("low", 0) high = dlg_float_param("high", 100) size = dlg_int_param("size", 100) marray = [] def initialize(self, keep_array=False, **kwargs): super(RandomArrayApp, self).initialize(**kwargs) self._keep_array = keep_array def run(self): # At least one output should have been added outs = self.outputs if len(outs) < 1: raise Exception( "At least one output should have been added to %r" % self) marray = self.generateRandomArray() if self._keep_array: self.marray = marray for o in outs: d = pickle.dumps(marray) o.len = len(d) o.write(d) def generateRandomArray(self): if self.integer: # generate an array of self.size integers with numbers between # slef.low and self.high marray = np.random.randint(int(self.low), int(self.high), size=(self.size)) else: # generate an array of self.size floats with numbers between # self.low and self.high marray = (np.random.random(size=self.size) + self.low) * self.high return marray def _getArray(self): return self.marray
class MSPlasmaReader(BarrierAppDROP): """ A BarrierAppDROP that reads a CASA measurement from a plasma store and writes out to file. Example: a = FileDROP('a', 'a', filepath=in_file) b = MSPlasmaWriter('b', 'b') c = PlasmaDROP('c', 'c') d = MSPlasmaReader('d', 'd') e = FileDROP('e', 'e', filepath=out_file) """ compontent_meta = dlg_component('MSPlasmaWriter', 'Measurement Set Plasma Writer.', [dlg_batch_input('binary/*', [])], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) ms_output_path = dlg_string_param('ms_output_path', None) def __init__(self, oid, uid, **kwargs): super().__init__(oid, uid, kwargs) self.reproduce_data = {} self.recompute_data = {} def initialize(self, **kwargs): super(MSPlasmaReader, self).initialize(**kwargs) def _write_table(self, ms, path, delete=True): if delete is True: try: os.rmdir(path) except OSError: pass abs_path = os.path.dirname(os.path.abspath(path)) filename = os.path.basename(path) value = ms.pop('/') with tables.table(abs_path + '/' + filename, value[0], nrow=len(value[1])) as t: with t.row() as r: for idx, val in enumerate(value[1]): r.put(idx, val) for key, value in ms.items(): name = abs_path + '/' + filename + '/' + key with tables.table(name, value[0], nrow=len(value[1])) as t: with t.row() as r: for idx, val in enumerate(value[1]): if val.get('LOG', None) == []: val['LOG'] = '' if val.get('SCHEDULE', None) == []: val['SCHEDULE'] = '' r.put(idx, val) def _deserialize_table(self, in_stream, path): load_bytes = io.BytesIO(in_stream) ms = np.load(load_bytes, allow_pickle=True).flat[0] self._write_table(ms, path) self.reproduce_data['data_hash'] = common_hash(ms) def run(self, **kwargs): if len(self.inputs) != 1: raise Exception("This application read only from one DROP") if len(self.outputs) != 1: raise Exception("This application writes only one DROP") inp = self.inputs[0] out = self.outputs[0].path self.recompute_data['in'] = str(inp) self.recompute_data['out'] = str(out) desc = inp.open() input_stream = inp.read(desc) self._deserialize_table(input_stream, out) def generate_recompute_data(self): self.recompute_data['status'] = self.status return self.recompute_data def generate_reproduce_data(self): return self.reproduce_data
class GenericNpyGatherApp(BarrierAppDROP): """ A BarrierAppDrop that reduces then gathers one or more inputs using cummulative operations. function: string <'sum'|'prod'|'min'|'max'|'add'|'multiply'|'maximum'|'minimum'>. """ component_meta = dlg_component( "GenericNpyGatherApp", "Generic Npy Gather App.", [dlg_batch_input("binary/*", [])], [dlg_batch_output("binary/*", [])], [dlg_streaming_input("binary/*")], ) # reduce and combine operation pair names # reduce operation reduces the dimensionality of a ndarray # gather operation combines ndarrays and retains dimensionality functions = { # reduce and gather (output dimension is reduced) "sum": "add", # sum reduction of inputs along an axis first then gathers across drops "prod": "multiply", # prod reduction of inputs along an axis first then gathers across drops "max": "maximum", # max reduction of input along an axis first then gathers across drops "min": "minimum", # min reduction of input along an axis first then gathers across drops # gather only "add": None, # elementwise addition of inputs, ndarrays must be of same shape "multiply": None, # elementwise multiplication of inputs, ndarrays must be of same shape "maximum": None, # elementwise maximums of inputs, ndarrays must be of same shape "minimum": None, # elementwise minimums of inputs, ndarrays must be of same shape } function: str = dlg_string_param("function", "sum") # type: ignore reduce_axes: list = dlg_list_param("reduce_axes", "None") # type: ignore def run(self): if len(self.inputs) < 1: raise Exception( f"At least one input should have been added to {self}") if len(self.outputs) < 1: raise Exception( f"At least one output should have been added to {self}") if self.function not in self.functions: raise Exception( f"Function {self.function} not supported by {self}") result = (self.reduce_gather_inputs() if self.functions[self.function] is not None else self.gather_inputs()) for o in self.outputs: droputils.save_numpy(o, result) def reduce_gather_inputs(self): """reduces then gathers each input drop interpreted as an npy drop""" result: Optional[Number] = None reduce = getattr(np, f"{self.function}") gather = getattr(np, f"{self.functions[self.function]}") for input in self.inputs: data = droputils.load_numpy(input) # skip gather for the first input result = (reduce(data, axis=self.reduce_axes) if result is None else gather(result, reduce(data, axis=self.reduce_axes))) return result def gather_inputs(self): """gathers each input drop interpreted as an npy drop""" result: Optional[Number] = None gather = getattr(np, f"{self.function}") for input in self.inputs: data = droputils.load_numpy(input) # assign instead of gather for the first input result = data if result is None else gather(result, data) return result
class MSPlasmaWriter(BarrierAppDROP): """ A BarrierAppDROP that reads a CASA measurement set and writes it out to a plasma store. Example: a = FileDROP('a', 'a', filepath=in_file) b = MSPlasmaWriter('b', 'b') c = PlasmaDROP('c', 'c') d = MSPlasmaReader('d', 'd') e = FileDROP('e', 'e', filepath=out_file) """ compontent_meta = dlg_component('MSPlasmaWriter', 'Measurement Set Plasma Writer.', [dlg_batch_input('binary/*', [])], [dlg_batch_output('binary/*', [])], [dlg_streaming_input('binary/*')]) ms_input_path = dlg_string_param('ms_input_path', None) def __init__(self, oid, uid, **kwargs): super().__init__(oid, uid, kwargs) self.recompute_data = {} self.reproduce_data = {} def initialize(self, **kwargs): super(MSPlasmaWriter, self).initialize(**kwargs) def _read_table(self, table_path, ms, table_name=None): if not table_name: table_name = os.path.basename(table_path) ms[table_name] = [] with tables.table(table_path) as t: ms[table_name].append(t.getdesc()) ms[table_name].append([]) for row in t: ms[table_name][1].append(row) def _serialize_table(self, path): ms = {} self._read_table(path, ms, table_name='/') with tables.table(path) as t: sub = t.getsubtables() for i in sub: self._read_table(i, ms) out_stream = io.BytesIO() np.save(out_stream, ms, allow_pickle=True) return out_stream.getvalue() def run(self, **kwargs): if len(self.inputs) != 1: raise Exception("This application read only from one DROP") if len(self.outputs) != 1: raise Exception("This application writes only one DROP") inp = self.inputs[0].path out = self.outputs[0] self.recompute_data['in'] = str(inp) self.recompute_data['out'] = str(out) out_bytes = self._serialize_table(inp) out.write(out_bytes) self.reproduce_data['data_hash'] = common_hash(out_bytes) def generate_recompute_data(self): self.recompute_data['status'] = self.status return self.recompute_data def generate_reproduce_data(self): return self.reproduce_data