コード例 #1
0
class DDFacetApp(BashShellApp):

    DDF_CMD = 'DDF.py'

    compontent_meta = dlg_component(
        'DDFacetApp',
        'Faceting for direction-dependent spectral deconvolution',
        [dlg_batch_input('binary/*', [])], [dlg_batch_output('binary/*', [])],
        [dlg_streaming_input('binary/*')])

    data_ms = dlg_string_param('Data-MS', None)
    data_colname = dlg_string_param('Data-ColName', "CORRECTED_DATA")
    data_chunkhours = dlg_int_param('Data-ChunkHours', 0.0)

    def initialize(self, **kwargs):
        self.command = 'dummy'

        super(DDFacetApp, self).initialize(**kwargs)

    def run(self):
        self.command = '{0} ' \
                       '--Data-MS={1}' \
                       '--Data-ColName={2} ' \
                       '--Data-ChunkHours={3}'.format(self.DDF_CMD,
                                                      self.data_ms,
                                                      self.data_colname,
                                                      self.data_chunkhours)

        self._run_bash(self._inputs, self._outputs)
コード例 #2
0
ファイル: simple.py プロジェクト: ICRAR/daliuge
class UrlRetrieveApp(BarrierAppDROP):
    """
    An App that retrieves the content of a URL

    Keywords:
    URL:   string, URL to retrieve.
    """

    component_meta = dlg_component(
        "UrlRetrieveApp",
        "URL Retrieve App",
        [dlg_batch_input("binary/*", [])],
        [dlg_batch_output("binary/*", [])],
        [dlg_streaming_input("binary/*")],
    )

    url = dlg_string_param("url", "")

    def run(self):
        try:
            u = urllib.request.urlopen(self.url)
        except urllib.error.URLError as e:
            raise e.reason

        content = u.read()

        outs = self.outputs
        if len(outs) < 1:
            raise Exception(
                "At least one output should have been added to %r" % self)
        for o in outs:
            o.len = len(content)
            o.write(content)  # send content to all outputs
コード例 #3
0
class ParameterSetDROP(DataDROP):
    """
    A generic configuration file template wrapper
    This drop opens an (optional) file containing some initial configuration information, then
    appends any additional specified parameters to it, finally serving it as a data object.
    """

    config_data = b""

    mode = dlg_string_param("mode", None)

    @abstractmethod
    def serialize_parameters(self, parameters: dict, mode):
        """
        Returns a string representing a serialization of the parameters.
        """
        if mode == "YANDA":
            # TODO: Add more complex value checking
            return "\n".join(f"{x}={y}" for x, y in parameters.items())
        # Add more formats (.ini for example)
        return "\n".join(f"{x}={y}" for x, y in parameters.items())

    @abstractmethod
    def filter_parameters(self, parameters: dict, mode):
        """
        Returns a dictionary of parameters, with daliuge-internal or other parameters filtered out
        """
        if mode == "YANDA":
            forbidden_params = list(DEFAULT_INTERNAL_PARAMETERS)
            if parameters["config_data"] == "":
                forbidden_params.append("configData")
            return {
                key: val
                for key, val in parameters.items()
                if key not in DEFAULT_INTERNAL_PARAMETERS
            }
        return parameters

    def initialize(self, **kwargs):
        """
        TODO: Open input file
        """
        self.config_data = self.serialize_parameters(
            self.filter_parameters(self.parameters, self.mode),
            self.mode).encode("utf-8")

    def getIO(self):
        return MemoryIO(io.BytesIO(self.config_data))

    @property
    def dataURL(self) -> str:
        hostname = os.uname()[1]
        return f"config://{hostname}/{os.getpid()}/{id(self.config_data)}"
コード例 #4
0
ファイル: simple.py プロジェクト: ICRAR/daliuge
class HelloWorldApp(BarrierAppDROP):
    """
    An App that writes 'Hello World!' or 'Hello <greet>!' to all of
    its outputs.

    Keywords:
    greet:   string, [World], whom to greet.
    """

    component_meta = dlg_component(
        "HelloWorldApp",
        "Hello World App.",
        [dlg_batch_input("binary/*", [])],
        [dlg_batch_output("binary/*", [])],
        [dlg_streaming_input("binary/*")],
    )

    greet = dlg_string_param("greet", "World")

    def run(self):
        ins = self.inputs
        # if no inputs use the parameter else use the input
        if len(ins) == 0:
            self.greeting = "Hello %s" % self.greet
        elif len(ins) != 1:
            raise Exception("Only one input expected for %r" % self)
        else:  # the input is expected to be a vector. We'll use the first element
            try:
                phrase = str(
                    pickle.loads(droputils.allDropContents(ins[0]))[0])
            except _pickle.UnpicklingError:
                phrase = str(droputils.allDropContents(ins[0]),
                             encoding="utf-8")
            self.greeting = f"Hello {phrase}"

        outs = self.outputs
        if len(outs) < 1:
            raise Exception(
                "At least one output should have been added to %r" % self)
        for o in outs:
            o.len = len(self.greeting.encode())
            o.write(self.greeting.encode())  # greet across all outputs
コード例 #5
0
ファイル: scp.py プロジェクト: ICRAR/daliuge
class ScpApp(BarrierAppDROP):
    """
    A BarrierAppDROP that copies the content of its single input onto its
    single output via SSH's scp protocol.

    Because of the nature of the scp protocol, the input and output DROPs
    of this application must both be filesystem-based; i.e., they must be an
    instance of FileDROP or of DirectoryContainer.

    Depending on the physical location of each DROP (this application, and
    its input and outputs) this application will copy data FROM another host or
    TO other host. This application's node must thus coincide with one of the
    two I/O DROPs.
    """

    component_meta = dlg_component(
        "ScpApp",
        "A BarrierAppDROP that copies the content of its single "
        "input onto its single output via SSHs scp protocol.",
        [
            dlg_batch_input(
                "binary/*",
                [
                    NgasDROP,
                    InMemoryDROP,
                    SharedMemoryDROP,
                    NullDROP,
                    RDBMSDrop,
                    ContainerDROP,
                ],
            )
        ],
        [
            dlg_batch_output(
                "binary/*",
                [
                    NgasDROP,
                    InMemoryDROP,
                    SharedMemoryDROP,
                    NullDROP,
                    RDBMSDrop,
                    ContainerDROP,
                ],
            )
        ],
        [dlg_streaming_input("binary/*")],
    )

    remoteUser = dlg_string_param("remoteUser", None)
    pkeyPath = dlg_string_param("pkeyPath", None)
    timeout = dlg_float_param("timeout", None)

    def initialize(self, **kwargs):
        BarrierAppDROP.initialize(self, **kwargs)

    def run(self):

        # Check inputs/outputs are of a valid type
        for i in self.inputs + self.outputs:
            # The current only way to check if we are handling a FileDROP
            # or a DirectoryContainer is by checking if they have a `path`
            # attribute. Calling `isinstance(i, (FileDROP, DirectoryContainer))`
            # doesn't work because the input/output might be a proxy object
            # that fails the test
            if not hasattr(i, "path"):
                raise Exception("%r is not supported by the ScpApp" % (i))

        # Only one input and one output are supported
        if len(self.inputs) != 1:
            raise Exception(
                "Only one input is supported by the ScpApp, %d given" %
                (len(self.inputs)))
        if len(self.outputs) != 1:
            raise Exception(
                "Only one output is supported by the ScpApp, %d given" %
                (len(self.outputs)))

        inp = self.inputs[0]
        out = self.outputs[0]

        # Input and output must be of the same type
        # See comment above regarding identification of DROP types, and why we
        # can't simply do:
        # if inp.__class__ != out.__class__:
        if hasattr(inp, "children") != hasattr(out, "children"):
            raise Exception("Input and output must be of the same type")

        # This app's location must be equal to at least one of the I/O
        if self.node != inp.node and self.node != out.node:
            raise Exception(
                "%r is deployed in a node different from its input AND its output"
                % (self, ))

        # See comment above regarding identification of File/Directory DROPs and
        # why we can't simply do:
        # recursive = isinstance(inp, DirectoryContainer)
        recursive = hasattr(inp, "children")
        if self.node == inp.node:
            copyTo(
                out.node,
                inp.path,
                remotePath=out.path,
                recursive=recursive,
                username=self.remoteUser,
                pkeyPath=self.pkeyPath,
                timeout=self.timeout,
            )
        else:
            copyFrom(
                inp.node,
                inp.path,
                localPath=out.path,
                recursive=recursive,
                username=self.remoteUser,
                pkeyPath=self.pkeyPath,
                timeout=self.timeout,
            )
コード例 #6
0
ファイル: pyfunc.py プロジェクト: ICRAR/daliuge
class PyFuncApp(BarrierAppDROP):
    """
    An application that wraps a simple python function.

    The inputs of the application are treated as the arguments of the function.
    Conversely, the output of the function is treated as the output of the
    application. If the application has more than one output, the result of
    calling the function is treated as an iterable, with each individual object
    being written to its corresponding output.

    Users indicate the function to be wrapped via the ``func_name`` parameter.
    In this case func_name needs to specify a funtion in the standard form

    ``module.function``

    and the module needs to be accessible on the PYTHONPATH of the DALiuGE
    engine. Note that the engine is expanding the standard PYTHONPATH with
    DLG_ROOT/code. That directory is always available, even if the engine is
    running in a docker container.

    Otherwise, users can also *send* over the python code using the ``func_code``
    parameter. The code needs to be base64-encoded and produced with the marshal
    module of the same Python version used to run DALiuGE.

    Both inputs and outputs are (de-)serialized using the pickle protocol if the value
    of the respective boolean component parameter is set to True. This is also
    applied to func_defaults and func_arg_mappings.

    In addition to the input mapping the implementation also allows to set defaults
    both in the function itself and in a logical graph. If set in the logical graph
    using the func_defaults parameter, the defaults need to be specified as a
    dictionary of the form

    ``{"kwargs":{"kw1_name":kw1_value, "kw2_name":kw2_value}, "args":[arg1, arg2]}``

    The positional onlyargs will be used in order of appearance.
    """

    component_meta = dlg_component(
        "PyFuncApp",
        "Py Func App.",
        [dlg_batch_input("binary/*", [])],
        [dlg_batch_output("binary/*", [])],
        [dlg_streaming_input("binary/*")],
    )

    func_name = dlg_string_param("func_name", None)
    # func_code = dlg_bytes_param("func_code", None) # bytes or base64 string
    input_parser: DropParser = dlg_enum_param(
        DropParser, "input_parser", DropParser.PICKLE)  # type: ignore
    output_parser: DropParser = dlg_enum_param(
        DropParser, "output_parser", DropParser.PICKLE)  # type: ignore
    func_arg_mapping = dlg_dict_param("func_arg_mapping", {})
    func_defaults = dlg_dict_param("func_defaults", {})
    f: Callable
    fdefaults: dict

    def _init_func_defaults(self):
        """
        Inititalize self.func_defaults dictionary from values provided.
        Multiple options exist and some are here for compatibility.
        """
        logger.debug(
            f"Starting evaluation of func_defaults: {self.func_defaults}")
        if (isinstance(self.func_defaults, dict)
                and len(self.func_defaults) > 0
                and list(self.func_defaults.keys()) == ["kwargs", "args"]):
            # we bring everything back to just kwargs, because positional args are messy
            # NOTE: This means that positional ONLY arguments won't work, but those are not used
            # too often.
            for arg in self.func_defaults["args"]:
                self.func_defaults["kwargs"][arg] = arg
                self.func_defaults = self.func_defaults["kwargs"]
        elif (isinstance(self.func_defaults, dict)
              and "kwargs" in self.func_defaults
              and isinstance(self.func_defaults["kwargs"], dict)):
            self.func_defaults = self.func_defaults["kwargs"]
        # we came all this way, now assume that any resulting dict is correct
        if not isinstance(self.func_defaults, dict):
            logger.error(
                "Wrong format or type for function defaults for %s: %r, %r",
                self.f.__name__, self.func_defaults, type(self.func_defaults))
            raise ValueError
        if self.input_parser is DropParser.PICKLE:
            # only values are pickled, get them unpickled
            for name, value in self.func_defaults.items():
                self.func_defaults[name] = deserialize_data(value)

        # set the function defaults from introspection
        if self.arguments:
            self.fn_npos = len(self.arguments.args) - self.fn_ndef
            self.fn_defaults = {
                name: None
                for name in self.arguments.args[:self.fn_npos]
            }
            logger.debug(f"initialized fn_defaults with {self.fn_defaults}")
            # deal with args and kwargs
            kwargs = (dict(
                zip(self.arguments.args[self.fn_npos:],
                    self.arguments.defaults))
                      if self.arguments.defaults else {})
            self.fn_defaults.update(kwargs)
            logger.debug(f"fn_defaults updated with {kwargs}")
            # deal with kwonlyargs
            if self.arguments.kwonlydefaults:
                kwonlyargs = dict(
                    zip(self.arguments.kwonlyargs,
                        self.arguments.kwonlydefaults))
                self.fn_defaults.update(kwonlyargs)
                logger.debug(f"fn_defaults updated with {kwonlyargs}")

            self.fn_posargs = self.arguments.args[:self.
                                                  fn_npos]  # positional arg names

    def initialize(self, **kwargs):
        """
        The initialization of a function component is mainly dealing with mapping
        inputs and provided applicationArgs to the function arguments. All of this
        should be driven by matching names, but currently that is not being done.
        """
        BarrierAppDROP.initialize(self, **kwargs)

        self._applicationArgs = self._getArg(kwargs, "applicationArgs", {})

        self.func_code = self._getArg(kwargs, "func_code", None)

        # check for function definition arguments in applicationArgs
        self.func_def_keywords = [
            "func_code",
            "func_name",
            "func_arg_mapping",
            "input_parser",
            "output_parser",
            "func_defaults",
            "pickle",
        ]

        # backwards compatibility
        if "pickle" in self._applicationArgs:
            if self._applicationArgs["pickle"]["value"] == True:
                self.input_parser = DropParser.PICKLE
                self.output_parser = DropParser.PICKLE
            else:
                self.input_parser = DropParser.EVAL
                self.output_parser = DropParser.EVAL
            self._applicationArgs.pop("pickle")

        for kw in self.func_def_keywords:
            if kw in self._applicationArgs:  # these are the preferred ones now
                if isinstance(
                        self._applicationArgs[kw]["value"], bool
                        or self._applicationArgs[kw]["value"]
                        or self._applicationArgs[kw]["precious"]):
                    # only transfer if there is a value or precious is True
                    self._applicationArgs.pop(kw)

        self.num_args = len(
            self._applicationArgs)  # number of additional arguments provided

        if not self.func_name and not self.func_code:
            raise InvalidDropException(
                self, "No function specified (either via name or code)")

        # Lookup function or import bytecode as a function
        if not self.func_code:
            self.f = import_using_name(self, self.func_name)
        else:
            if not isinstance(self.func_code, bytes):
                self.func_code = base64.b64decode(
                    self.func_code.encode("utf8"))
            self.f = import_using_code(self.func_code)
        # make sure defaults are dicts
        if isinstance(self.func_defaults, str):
            self.func_defaults = ast.literal_eval(self.func_defaults)
        if isinstance(self.func_arg_mapping, str):
            self.func_arg_mapping = ast.literal_eval(self.func_arg_mapping)

        self.arguments = inspect.getfullargspec(self.f)
        logger.debug(f"Function inspection revealed {self.arguments}")
        # we don't want to mess with the 'self' argument
        if self.arguments.args.count('self'):
            self.arguments.args.remove('self')
        self.fn_nargs = len(self.arguments.args)
        self.fn_ndef = len(
            self.arguments.defaults) if self.arguments.defaults else 0
        self._init_func_defaults()
        logger.info(f"Args summary for '{self.func_name}':")
        logger.info(f"Args: {self.arguments.args}")
        logger.info(f"Args defaults:  {self.arguments.defaults}")
        logger.info(f"Args positional: {self.arguments.args[:self.fn_npos]}")
        logger.info(f"Args keyword: {self.arguments.args[self.fn_npos:]}")
        logger.info(f"Args supplied:  {self.func_defaults}")
        logger.info(f"VarArgs allowed:  {self.arguments.varargs}")
        logger.info(f"VarKwds allowed:  {self.arguments.varkw}")

        # Mapping between argument name and input drop uids
        logger.debug(f"Input mapping: {self.func_arg_mapping}")
        self._recompute_data = {}

    def run(self):
        """
        Function positional and keyword argument treatment:

        Function arguments can be provided in four different ways:
        1) Through an input port
        2) By specifying ApplicationArgs (one for each argument)
        3) By specifying a func_defaults dictionary in the ComponentParameters
        4) Through defaults at the time of function definition

        The priority follows the list above with input ports overruling the others.
        Function arguments in Python can be passed as positional, kw-value, positional
        only, kw-value only, and catch-all args and kwargs, which don't provide any
        hint about the names of accepted parameters. All of them are now supported. If
        positional arguments or kw-value arguments are provided by the user, but are
        not explicitely defined in the function signiture AND args and/or kwargs are
        allowed then these arguments are passed to the function. For args this is
        somewhat risky, since the order is relevant and in this code derived from the
        order defined in the graph (same order as defined in the component description).

        Input ports will NOT be used by order (anymore), but by the IdText (name field
        in EAGLE) of the port. Since each input port requires an associated data drop,
        this provides a unique mapping. This also allows to pass values to any function
        argument through a port.

        Function argument values as well as the function code can be provided in
        serialised (pickle) form by setting the 'pickle' flag. Note that this flag
        is valid for all arguments and the code (if specified) in a global way.
        """

        # Inputs are un-pickled and treated as the arguments of the function
        # Their order must be preserved, so we use an OrderedDict
        if self.input_parser is DropParser.PICKLE:
            #all_contents = lambda x: pickle.loads(droputils.allDropContents(x))
            all_contents = droputils.load_pickle
        elif self.input_parser is DropParser.EVAL:

            def optionalEval(x):
                # Null and Empty Drops will return an empty byte string
                # which should propogate back to None
                content: str = droputils.allDropContents(x).decode('utf-8')
                return ast.literal_eval(content) if len(content) > 0 else None

            all_contents = optionalEval
        elif self.input_parser is DropParser.NPY:
            all_contents = droputils.load_npy
        elif self.input_parser is DropParser.PATH:
            all_contents = lambda x: x.path
        elif self.input_parser is DropParser.DATAURL:
            all_contents = lambda x: x.dataurl
        else:
            raise ValueError(self.input_parser.__repr__())

        inputs = collections.OrderedDict()
        for uid, drop in self._inputs.items():
            inputs[uid] = all_contents(drop)

        outputs = collections.OrderedDict()
        for uid, drop in self._outputs.items():
            outputs[uid] = all_contents(
                drop) if self.output_parser is DropParser.PATH else None

        # Keyword arguments are made up of the default values plus the inputs
        # that match one of the keyword argument names
        # if defaults dict has not been specified at all we'll go ahead anyway
        n_args = len(self.func_defaults)
        argnames = self.arguments.args

        # use explicit mapping of inputs to arguments first
        # TODO: Required by dlg_delayed?? Else, we should really not do this.
        kwargs = {
            name: inputs.pop(uid)
            for name, uid in self.func_arg_mapping.items()
            if name in self.func_defaults or name not in argnames
        }
        logger.debug(f"updating funcargs with {kwargs}")
        funcargs = kwargs

        # Fill arguments with rest of inputs
        logger.debug(f"available inputs: {inputs}")

        # if we have named ports use the inputs with
        # the correct UIDs
        logger.debug(f"Parameters found: {self.parameters}")
        posargs = self.arguments.args[:self.fn_npos]
        kwargs = {}
        pargs = []
        # Initialize pargs dictionary and update with provided argument values
        pargsDict = collections.OrderedDict(zip(posargs,
                                                [None] * len(posargs)))
        if "applicationArgs" in self.parameters:
            appArgs = self.parameters[
                "applicationArgs"]  # we'll pop the identified ones
            _dum = [
                appArgs.pop(k) for k in self.func_def_keywords if k in appArgs
            ]
            logger.debug("Identified keyword arguments removed: %s",
                         [i['text'] for i in _dum])
            pargsDict.update({
                k: self.parameters[k]
                for k in pargsDict if k in self.parameters
            })
            # if defined in both we use AppArgs values
            pargsDict.update(
                {k: appArgs[k]['value']
                 for k in pargsDict if k in appArgs})
            logger.debug("Initial posargs dictionary: %s", pargsDict)
        else:
            appArgs = {}

        if ('inputs' in self.parameters
                and droputils.check_ports_dict(self.parameters['inputs'])):
            check_len = min(len(inputs),
                            self.fn_nargs + len(self.arguments.kwonlyargs))
            inputs_dict = collections.OrderedDict()
            for inport in self.parameters['inputs']:
                key = list(inport.keys())[0]
                inputs_dict[key] = {'name': inport[key], 'path': inputs[key]}
            kwargs.update(
                droputils.identify_named_ports(inputs_dict,
                                               posargs,
                                               pargsDict,
                                               appArgs,
                                               check_len=check_len,
                                               mode="inputs"))
        else:
            for i in range(min(len(inputs), self.fn_nargs)):
                kwargs.update(
                    {self.arguments.args[i]: list(inputs.values())[i]})

        logger.debug(f"Updating funcargs with input ports {kwargs}")
        funcargs.update(kwargs)

        if ('outputs' in self.parameters
                and droputils.check_ports_dict(self.parameters['outputs'])):
            check_len = min(len(outputs),
                            self.fn_nargs + len(self.arguments.kwonlyargs))
            outputs_dict = collections.OrderedDict()
            for outport in self.parameters['outputs']:
                key = list(outport.keys())[0]
                outputs_dict[key] = {
                    'name': outport[key],
                    'path': outputs[key]
                }

            kwargs.update(
                droputils.identify_named_ports(outputs_dict,
                                               posargs,
                                               pargsDict,
                                               appArgs,
                                               check_len=check_len,
                                               mode="outputs"))

        # Try to get values for still missing positional arguments from Application Args
        if "applicationArgs" in self.parameters:
            for pa in posargs:
                if pa != 'self' and pa not in funcargs:
                    if pa in appArgs:
                        arg = appArgs.pop(pa)
                        value = arg['value']
                        ptype = arg['type']
                        if ptype in ["Complex", "Json"]:
                            try:
                                value = ast.literal_eval(value)
                            except Exception as e:
                                # just go on if this did not work
                                logger.warning("Eval raised an error: %s", e)
                        elif ptype in ["Python"]:
                            try:
                                import numpy
                                value = eval(value, {"numpy": numpy}, {})
                            except:
                                pass
                        pargsDict.update({pa: value})
                    elif pa != 'self' and pa not in pargsDict:
                        logger.warning(
                            f"Required positional argument '{pa}' not found!")
            _dum = [appArgs.pop(k) for k in pargsDict if k in appArgs]
            logger.debug("Identified positional arguments removed: %s",
                         [i['text'] for i in _dum])
            logger.debug(f"updating posargs with {list(pargsDict.keys())}")
            pargs.extend(list(pargsDict.values()))

            # Try to get values for still missing kwargs arguments from Application kws
            kwargs = {}
            kws = self.arguments.args[self.fn_npos:]
            for ka in kws:
                if ka not in funcargs:
                    if ka in appArgs:
                        arg = appArgs.pop(ka)
                        value = arg['value']
                        ptype = arg['type']
                        if ptype in ["Complex", "Json"]:
                            try:
                                value = ast.literal_eval(value)
                            except:
                                pass
                        kwargs.update({ka: value})
                    else:
                        logger.warning(f"Keyword argument '{ka}' not found!")
            logger.debug(f"updating funcargs with {kwargs}")
            funcargs.update(kwargs)

            # deal with kwonlyargs
            kwargs = {}
            kws = self.arguments.kwonlyargs
            for ka in kws:
                if ka not in funcargs:
                    if ka in appArgs:
                        arg = appArgs.pop(ka)
                        value = arg['value']
                        ptype = arg['type']
                        if ptype in ["Complex", "Json"]:
                            try:
                                value = ast.literal_eval(value)
                            except:
                                pass
                        kwargs.update({ka: value})
                    else:
                        logger.warning(
                            f"Keyword only argument '{ka}' not found!")
            logger.debug(f"updating funcargs with kwonlyargs: {kwargs}")
            funcargs.update(kwargs)

            # any remaining application arguments will be used for vargs and vkwargs
            vparg = []
            vkarg = {}
            logger.debug(f"Remaining AppArguments {appArgs}")
            for arg in appArgs:
                if appArgs[arg]['type'] in ['Json', 'Complex']:
                    value = ast.literal_eval(appArgs[arg]['value'])
                else:
                    value = appArgs[arg]['value']
                if appArgs[arg]['positional']:
                    vparg.append(value)
                else:
                    vkarg.update({arg: value})

            if self.arguments.varargs:
                pargs.extend(vparg)
            if self.arguments.varkw:
                funcargs.update(vkarg)

        # Fill rest with default arguments if there are any more
        kwargs = {}
        for kw in self.func_defaults.keys():
            value = self.func_defaults[kw]
            if kw not in funcargs:
                kwargs.update({kw: value})
        logger.debug(f"updating funcargs with {kwargs}")
        funcargs.update(kwargs)
        self._recompute_data["args"] = funcargs.copy()
        logger.debug(f"Running {self.func_name} with *{pargs} **{funcargs}")

        # we capture and log whatever is produced on STDOUT
        capture = StringIO()
        with redirect_stdout(capture):
            result = self.f(*pargs, **funcargs)
        logger.info(
            f"Captured output from function app '{self.func_name}': {capture.getvalue()}"
        )
        logger.debug(f"Finished execution of {self.func_name}.")

        # Depending on how many outputs we have we treat our result
        # as an iterable or as a single object. Each result is pickled
        # and written to its corresponding output
        self.write_results(result)

    def write_results(self, result):
        outputs = self.outputs
        if len(outputs) > 0:
            if len(outputs) == 1:
                result = [result]
            for r, o in zip(result, outputs):
                if self.output_parser is DropParser.PICKLE:
                    logger.debug(f"Writing pickeled result {type(r)} to {o}")
                    o.write(pickle.dumps(r))
                elif self.output_parser is DropParser.EVAL:
                    o.write(repr(r).encode('utf-8'))
                elif self.output_parser is DropParser.NPY:
                    droputils.save_npy(o, r)
                else:
                    ValueError(self.output_parser.__repr__())

    def generate_recompute_data(self):
        for name, val in self._recompute_data.items():
            try:
                json.dumps(val)
            except TypeError as e:
                logger.debug(e)
                self._recompute_data[name] = repr(val)
        return self._recompute_data
コード例 #7
0
ファイル: simple.py プロジェクト: ICRAR/daliuge
class GenericNpyGatherApp(BarrierAppDROP):
    """
    A BarrierAppDrop that reduces then gathers one or more inputs using cummulative operations.
    function:  string <'sum'|'prod'|'min'|'max'|'add'|'multiply'|'maximum'|'minimum'>.

    """

    component_meta = dlg_component(
        "GenericNpyGatherApp",
        "Generic Npy Gather App.",
        [dlg_batch_input("binary/*", [])],
        [dlg_batch_output("binary/*", [])],
        [dlg_streaming_input("binary/*")],
    )

    # reduce and combine operation pair names
    # reduce operation reduces the dimensionality of a ndarray
    # gather operation combines ndarrays and retains dimensionality
    functions = {
        # reduce and gather (output dimension is reduced)
        "sum":
        "add",  # sum reduction of inputs along an axis first then gathers across drops
        "prod":
        "multiply",  # prod reduction of inputs along an axis first then gathers across drops
        "max":
        "maximum",  # max reduction of input along an axis first then gathers across drops
        "min":
        "minimum",  # min reduction of input along an axis first then gathers across drops
        # gather only
        "add":
        None,  # elementwise addition of inputs, ndarrays must be of same shape
        "multiply":
        None,  # elementwise multiplication of inputs, ndarrays must be of same shape
        "maximum":
        None,  # elementwise maximums of inputs, ndarrays must be of same shape
        "minimum":
        None,  # elementwise minimums of inputs, ndarrays must be of same shape
    }
    function: str = dlg_string_param("function", "sum")  # type: ignore
    reduce_axes: list = dlg_list_param("reduce_axes", "None")  # type: ignore

    def run(self):
        if len(self.inputs) < 1:
            raise Exception(
                f"At least one input should have been added to {self}")
        if len(self.outputs) < 1:
            raise Exception(
                f"At least one output should have been added to {self}")
        if self.function not in self.functions:
            raise Exception(
                f"Function {self.function} not supported by {self}")

        result = (self.reduce_gather_inputs() if self.functions[self.function]
                  is not None else self.gather_inputs())

        for o in self.outputs:
            droputils.save_numpy(o, result)

    def reduce_gather_inputs(self):
        """reduces then gathers each input drop interpreted as an npy drop"""
        result: Optional[Number] = None
        reduce = getattr(np, f"{self.function}")
        gather = getattr(np, f"{self.functions[self.function]}")
        for input in self.inputs:
            data = droputils.load_numpy(input)
            # skip gather for the first input
            result = (reduce(data, axis=self.reduce_axes) if result is None
                      else gather(result, reduce(data, axis=self.reduce_axes)))
        return result

    def gather_inputs(self):
        """gathers each input drop interpreted as an npy drop"""
        result: Optional[Number] = None
        gather = getattr(np, f"{self.function}")
        for input in self.inputs:
            data = droputils.load_numpy(input)
            # assign instead of gather for the first input
            result = data if result is None else gather(result, data)
        return result
コード例 #8
0
ファイル: simple.py プロジェクト: ICRAR/daliuge
class AverageArraysApp(BarrierAppDROP):
    """
    A BarrierAppDrop that averages arrays received on input. It requires
    multiple inputs and writes the generated average vector to all of its
    outputs.
    The input arrays are assumed to have the same number of elements and
    the output array will also have that same number of elements.

    Keywords:

    method:  string <['mean']|'median'>, use mean or median as method.
    """

    from numpy import mean, median

    component_meta = dlg_component(
        "AverageArraysApp",
        "Average Array App.",
        [dlg_batch_input("binary/*", [])],
        [dlg_batch_output("binary/*", [])],
        [dlg_streaming_input("binary/*")],
    )

    # default values
    methods = ["mean", "median"]
    method = dlg_string_param("method", methods[0])

    def __init__(self, oid, uid, **kwargs):
        super().__init__(oid, kwargs)
        self.marray = []

    def initialize(self, **kwargs):
        super().initialize(**kwargs)

    def run(self):
        # At least one output should have been added

        outs = self.outputs
        if len(outs) < 1:
            raise Exception(
                "At least one output should have been added to %r" % self)
        self.getInputArrays()
        self._avg = self.averageArray()
        for o in outs:
            d = pickle.dumps(self._avg)
            o.len = len(d)
            o.write(d)  # average across inputs

    def getInputArrays(self):
        """
        Create the input array from all inputs received. Shape is
        (<#inputs>, <#elements>), where #elements is the length of the
        vector received from one input.
        """
        ins = self.inputs
        if len(ins) < 1:
            raise Exception("At least one input should have been added to %r" %
                            self)
        marray = []
        for inp in ins:
            sarray = droputils.allDropContents(inp)
            if len(sarray) == 0:
                print(f"Input does not contain data!")
            else:
                sarray = pickle.loads(sarray)
                if isinstance(sarray, (list, tuple, np.ndarray)):
                    marray.extend(list(sarray))
                else:
                    marray.append(sarray)
        self.marray = marray

    def averageArray(self):
        method_to_call = getattr(np, self.method)
        return method_to_call(self.marray, axis=0)
コード例 #9
0
class ProduceConfig(BarrierAppDROP):
    """A BarrierAppDrop that produces multiple config files suitable for the CallLeap BarrierAppDrop"""
    compontent_meta = dlg_component('ProduceConfig', 'Produce Config.',
                                    [dlg_batch_input('binary/*', [])],
                                    [dlg_batch_output('binary/*', [])],
                                    [dlg_streaming_input('binary/*')])

    # read component parameters
    numStations = dlg_int_param('number of stations', 1)
    implementation = dlg_string_param('implementation', 'cpu')
    autoCorrelation = dlg_bool_param('auto correlation', False)
    maxDirections = dlg_int_param('max directions', 1)


    def initialize(self, **kwargs):
        super(ProduceConfig, self).initialize(**kwargs)


    def run(self):
        # check number of inputs and outputs
        if len(self.inputs) != 1:
            raise Exception("One input is expected by this application")

        # read directions from input 0
        directions = self._readDirections(self.inputs[0])

        # determine number of directions per instance
        numDirectionsPerInstance = float(len(directions)) / float(len(self.outputs))
        numDirectionsPerInstance = min(numDirectionsPerInstance, self.maxDirections)

        startDirectionIndex = 0
        endDirectionIndex = 0

        # split directions
        for i in range(len(self.outputs)):
            endDirectionIndex = int(math.floor((i+1)*numDirectionsPerInstance))

            # split directions
            partDirections = directions[startDirectionIndex:endDirectionIndex]

            # build config
            configJSON = self._createConfig(self.numStations, partDirections, self.implementation, self.autoCorrelation)

            # stringify config
            config = json.dumps(configJSON)

            # write config to output
            if type(config) is str:
                config = config.encode()
            self.outputs[i].write(config)

            # continue from here in the next iteration
            startDirectionIndex = endDirectionIndex


    def _readDirections(self, inDrop):
        directions = []

        # NOTE: it appears csv.reader() can't use the DROPFile(inDrop) directly,
        #       since DROPFile is not a iterator. Instead, we read the whole
        #       inDrop to a string and pass that to csv.reader()
        with DROPFile(inDrop) as f:
            file_data = f.read()
            if type(file_data) is bytes:
                file_data=file_data.decode('utf-8')
            csvreader = csv.reader(file_data.split('\n'))
            for row in csvreader:
                # skip rows with incorrect number of values
                if len(row) is not 2:
                    continue

                x = float(row[0])
                y = float(row[1])
                directions.append([x,y])

        return directions


    def _createConfig(self, numStations, directions, implementation, autoCorrelation):
        return {
            'stations': numStations,
            'directions': directions,
            'computeImplementation': implementation,
            'readAutoCorrelations': autoCorrelation
        }
コード例 #10
0
class MSPlasmaReader(BarrierAppDROP):
    """
    A BarrierAppDROP that reads a CASA measurement from a plasma store and writes out to file.

    Example:
        a = FileDROP('a', 'a', filepath=in_file)
        b = MSPlasmaWriter('b', 'b')
        c = PlasmaDROP('c', 'c')
        d = MSPlasmaReader('d', 'd')
        e = FileDROP('e', 'e', filepath=out_file)
    """
    compontent_meta = dlg_component('MSPlasmaWriter',
                                    'Measurement Set Plasma Writer.',
                                    [dlg_batch_input('binary/*', [])],
                                    [dlg_batch_output('binary/*', [])],
                                    [dlg_streaming_input('binary/*')])

    ms_output_path = dlg_string_param('ms_output_path', None)

    def __init__(self, oid, uid, **kwargs):
        super().__init__(oid, uid, kwargs)
        self.reproduce_data = {}
        self.recompute_data = {}

    def initialize(self, **kwargs):
        super(MSPlasmaReader, self).initialize(**kwargs)

    def _write_table(self, ms, path, delete=True):
        if delete is True:
            try:
                os.rmdir(path)
            except OSError:
                pass

        abs_path = os.path.dirname(os.path.abspath(path))
        filename = os.path.basename(path)

        value = ms.pop('/')
        with tables.table(abs_path + '/' + filename,
                          value[0],
                          nrow=len(value[1])) as t:
            with t.row() as r:
                for idx, val in enumerate(value[1]):
                    r.put(idx, val)

        for key, value in ms.items():
            name = abs_path + '/' + filename + '/' + key
            with tables.table(name, value[0], nrow=len(value[1])) as t:
                with t.row() as r:
                    for idx, val in enumerate(value[1]):
                        if val.get('LOG', None) == []:
                            val['LOG'] = ''
                        if val.get('SCHEDULE', None) == []:
                            val['SCHEDULE'] = ''
                        r.put(idx, val)

    def _deserialize_table(self, in_stream, path):
        load_bytes = io.BytesIO(in_stream)
        ms = np.load(load_bytes, allow_pickle=True).flat[0]
        self._write_table(ms, path)
        self.reproduce_data['data_hash'] = common_hash(ms)

    def run(self, **kwargs):
        if len(self.inputs) != 1:
            raise Exception("This application read only from one DROP")
        if len(self.outputs) != 1:
            raise Exception("This application writes only one DROP")

        inp = self.inputs[0]
        out = self.outputs[0].path
        self.recompute_data['in'] = str(inp)
        self.recompute_data['out'] = str(out)

        desc = inp.open()
        input_stream = inp.read(desc)
        self._deserialize_table(input_stream, out)

    def generate_recompute_data(self):
        self.recompute_data['status'] = self.status
        return self.recompute_data

    def generate_reproduce_data(self):
        return self.reproduce_data
コード例 #11
0
class MSPlasmaWriter(BarrierAppDROP):
    """
    A BarrierAppDROP that reads a CASA measurement set and writes it out to a plasma store.

    Example:
        a = FileDROP('a', 'a', filepath=in_file)
        b = MSPlasmaWriter('b', 'b')
        c = PlasmaDROP('c', 'c')
        d = MSPlasmaReader('d', 'd')
        e = FileDROP('e', 'e', filepath=out_file)
    """
    compontent_meta = dlg_component('MSPlasmaWriter',
                                    'Measurement Set Plasma Writer.',
                                    [dlg_batch_input('binary/*', [])],
                                    [dlg_batch_output('binary/*', [])],
                                    [dlg_streaming_input('binary/*')])

    ms_input_path = dlg_string_param('ms_input_path', None)

    def __init__(self, oid, uid, **kwargs):
        super().__init__(oid, uid, kwargs)
        self.recompute_data = {}
        self.reproduce_data = {}

    def initialize(self, **kwargs):
        super(MSPlasmaWriter, self).initialize(**kwargs)

    def _read_table(self, table_path, ms, table_name=None):
        if not table_name:
            table_name = os.path.basename(table_path)

        ms[table_name] = []
        with tables.table(table_path) as t:
            ms[table_name].append(t.getdesc())
            ms[table_name].append([])
            for row in t:
                ms[table_name][1].append(row)

    def _serialize_table(self, path):
        ms = {}
        self._read_table(path, ms, table_name='/')

        with tables.table(path) as t:
            sub = t.getsubtables()
            for i in sub:
                self._read_table(i, ms)

        out_stream = io.BytesIO()
        np.save(out_stream, ms, allow_pickle=True)
        return out_stream.getvalue()

    def run(self, **kwargs):
        if len(self.inputs) != 1:
            raise Exception("This application read only from one DROP")
        if len(self.outputs) != 1:
            raise Exception("This application writes only one DROP")

        inp = self.inputs[0].path
        out = self.outputs[0]
        self.recompute_data['in'] = str(inp)
        self.recompute_data['out'] = str(out)
        out_bytes = self._serialize_table(inp)
        out.write(out_bytes)
        self.reproduce_data['data_hash'] = common_hash(out_bytes)

    def generate_recompute_data(self):
        self.recompute_data['status'] = self.status
        return self.recompute_data

    def generate_reproduce_data(self):
        return self.reproduce_data