Beispiel #1
0
    def _add_output_deps(self, executor, args, kwargs, app_fut, func):
        logger.debug("Adding output dependencies")
        outputs = kwargs.get('outputs', [])
        app_fut._outputs = []
        for idx, f in enumerate(outputs):
            if isinstance(f, File) and not self.check_staging_inhibited(kwargs):
                # replace a File with a DataFuture - either completing when the stageout
                # future completes, or if no stage out future is returned, then when the
                # app itself completes.

                # The staging code will get a clean copy which it is allowed to mutate,
                # while the DataFuture-contained original will not be modified by any staging.
                f_copy = f.cleancopy()
                outputs[idx] = f_copy

                logger.debug("Submitting stage out for output file {}".format(repr(f)))
                stageout_fut = self.data_manager.stage_out(f_copy, executor, app_fut)
                if stageout_fut:
                    logger.debug("Adding a dependency on stageout future for {}".format(repr(f)))
                    app_fut._outputs.append(DataFuture(stageout_fut, f, tid=app_fut.tid))
                else:
                    logger.debug("No stageout dependency for {}".format(repr(f)))
                    app_fut._outputs.append(DataFuture(app_fut, f, tid=app_fut.tid))

                # this is a hook for post-task stageout
                # note that nothing depends on the output - which is maybe a bug
                # in the not-very-tested stageout system?
                newfunc = self.data_manager.replace_task_stage_out(f_copy, func, executor)
                if newfunc:
                    func = newfunc
            else:
                logger.debug("Not performing staging for: {}".format(repr(f)))
                app_fut._outputs.append(DataFuture(app_fut, f, tid=app_fut.tid))
        return func
Beispiel #2
0
    def __call__(self, *args, **kwargs):
        ''' This is where the call to a python app is handled

        Args:
             - Arbitrary
        Kwargs:
             - Arbitrary

        Returns:
             If outputs=[...] was a kwarg then:
                   App_fut, [Data_Futures...]
             else:
                   App_fut

        '''

        if type(self.executor) == DataFlowKernel:
            logger.debug("Submitting to DataFlowKernel : %s",  self.executor)
            app_fut = self.executor.submit(self.func, *args, **kwargs)

        else:
            logger.debug("Submitting to Executor: %s",  self.executor)
            app_fut = self.executor.submit(self.func, *args, **kwargs)

        out_futs = [DataFuture(app_fut, o, parent=app_fut) for o in kwargs.get('outputs', []) ]
        if out_futs:
            return app_fut, out_futs
        else:
            return app_fut
Beispiel #3
0
    def stage_in(self, file, site_name=None):
        """Transport the file from the site of origin to the site.

        This function returns a DataFuture.

        Args:
            - self
            - file (File) - file to stage in
            - site_name (str) - a name of a site the file is going to be staged in to.
                                If the site argument is not specified for a file
                                with 'globus' scheme, the file will be staged in to
                                the first site with the "globus" key in a config.
        """

        if file.scheme == 'file':
            site_name = None
        elif file.scheme == 'globus':
            globus_ep = self._get_globus_site(site_name)

        df = file.get_data_future(globus_ep['site_name'])
        if df:
            return df

        if file.scheme == 'file':
            f = self.submit(self._file_transfer_in, file)
        elif file.scheme == 'globus':
            f = self.submit(self._globus_transfer_in, file, globus_ep)

        from parsl.app.futures import DataFuture

        df = DataFuture(f, file)
        file.set_data_future(df, globus_ep['site_name'])
        return df
Beispiel #4
0
    def __call__(self, *args, **kwargs):
        ''' This is where the call to a python app is handled

        Args:
             - Arbitrary
        Kwargs:
             - Arbitrary

        Returns:
             If outputs=[...] was a kwarg then:
                   App_fut, [Data_Futures...]
             else:
                   App_fut

        '''
        app_fut = self.executor.submit(self.func, *args,
                                       parsl_sites=self.sites,
                                       fn_hash=self.fn_hash,
                                       cache=self.cache,
                                       **kwargs)

        # logger.debug("App[{}] assigned Task[{}]".format(self.func.__name__,
        #                                                 app_fut.tid))
        out_futs = [DataFuture(app_fut, o, parent=app_fut, tid=app_fut.tid)
                    for o in kwargs.get('outputs', [])]
        app_fut._outputs = out_futs

        return app_fut
Beispiel #5
0
    def __call__(self, *args, **kwargs):
        ''' This is where the call to a Bash app is handled

        Args:
             - Arbitrary

        Kwargs:
             - Arbitrary

        Returns:
             If outputs=[...] was a kwarg then:
                   App_fut, [Data_Futures...]
             else:
                   App_fut

        '''
        trace_method = False

        # Update kwargs in the app definition with one's passed in at calltime
        self.kwargs.update(kwargs)

        app_fut = self.executor.submit(remote_side_bash_executor, self.func, *args,
                                       parsl_sites=self.sites,
                                       **self.kwargs)

        logger.debug("App[%s] assigned Task_id:[%s]" % (self.func.__name__,
                                                        app_fut.tid) )
        out_futs = [DataFuture(app_fut, o, parent=app_fut, tid=app_fut.tid)
                    for o in kwargs.get('outputs', []) ]
        app_fut._outputs = out_futs

        return app_fut
Beispiel #6
0
    def __call__(self, *args, **kwargs):
        """Handle the call to a Bash app.

        Args:
             - Arbitrary

        Kwargs:
             - Arbitrary

        Returns:
             If outputs=[...] was a kwarg then:
                   App_fut, [Data_Futures...]
             else:
                   App_fut

        """
        # Update kwargs in the app definition with ones passed in at calltime
        self.kwargs.update(kwargs)

        if self.data_flow_kernel is None:
            dfk = DataFlowKernelLoader.dfk()
        else:
            dfk = self.data_flow_kernel

        app_fut = dfk.submit(wrap_error(remote_side_bash_executor), self.func, *args,
                             executors=self.executors,
                             fn_hash=self.func_hash,
                             cache=self.cache,
                             **self.kwargs)

        out_futs = [DataFuture(app_fut, o, parent=app_fut, tid=app_fut.tid)
                    for o in kwargs.get('outputs', [])]
        app_fut._outputs = out_futs

        return app_fut
Beispiel #7
0
    def __call__(self, *args, **kwargs):
        """This is where the call to a python app is handled.

        Args:
             - Arbitrary
        Kwargs:
             - Arbitrary

        Returns:
             If outputs=[...] was a kwarg then:
                   App_fut, [Data_Futures...]
             else:
                   App_fut

        """
        if self.data_flow_kernel is None:
            self.data_flow_kernel = DataFlowKernelLoader.dfk()
        app_fut = self.data_flow_kernel.submit(self.func,
                                               *args,
                                               executors=self.executors,
                                               fn_hash=self.func_hash,
                                               cache=self.cache,
                                               **kwargs)

        # logger.debug("App[{}] assigned Task[{}]".format(self.func.__name__,
        #                                                 app_fut.tid))
        out_futs = [
            DataFuture(app_fut, o, parent=app_fut, tid=app_fut.tid)
            for o in kwargs.get('outputs', [])
        ]
        app_fut._outputs = out_futs

        return app_fut
Beispiel #8
0
    def __call__(self, *args, **kwargs):
        """This is where the call to a python app is handled.

        Args:
             - Arbitrary
        Kwargs:
             - Arbitrary

        Returns:
                   App_fut

        """

        if self.data_flow_kernel is None:
            dfk = DataFlowKernelLoader.dfk()
        else:
            dfk = self.data_flow_kernel

        walltime = self.kwargs.get('walltime')
        if walltime is not None:
            self.func = timeout(self.func, walltime)
        app_fut = dfk.submit(self.func, *args,
                             executors=self.executors,
                             fn_hash=self.func_hash,
                             cache=self.cache,
                             **kwargs)

        # logger.debug("App[{}] assigned Task[{}]".format(self.func.__name__,
        #                                                 app_fut.tid))
        out_futs = [DataFuture(app_fut, o, tid=app_fut.tid)
                    for o in kwargs.get('outputs', [])]
        app_fut._outputs = out_futs

        return app_fut
Beispiel #9
0
    def __call__(self, *args, **kwargs):
        """Handle the call to a Bash app.

        Args:
             - Arbitrary

        Kwargs:
             - Arbitrary

        Returns:
             If outputs=[...] was a kwarg then:
                   App_fut, [Data_Futures...]
             else:
                   App_fut

        """
        # Update kwargs in the app definition with one's passed in at calltime
        self.kwargs.update(kwargs)

        app_fut = self.executor.submit(remote_side_bash_executor,
                                       self.func,
                                       *args,
                                       parsl_sites=self.sites,
                                       fn_hash=self.fn_hash,
                                       cache=self.cache,
                                       **self.kwargs)

        out_futs = [
            DataFuture(app_fut, o, parent=app_fut, tid=app_fut.tid)
            for o in kwargs.get('outputs', [])
        ]
        app_fut._outputs = out_futs

        return app_fut
Beispiel #10
0
    def _add_output_deps(self, executor, args, kwargs, app_fut, func):
        logger.debug("Adding output dependencies")
        outputs = kwargs.get('outputs', [])
        app_fut._outputs = []
        for f in outputs:
            if isinstance(f,
                          File) and not self.check_staging_inhibited(kwargs):
                # replace a File with a DataFuture - either completing when the stageout
                # future completes, or if no stage out future is returned, then when the
                # app itself completes.
                logger.debug(
                    "Submitting stage out for output file {}".format(f))
                stageout_fut = self.data_manager.stage_out(
                    f, executor, app_fut)
                if stageout_fut:
                    logger.debug(
                        "Adding a dependency on stageout future for {}".format(
                            f))
                    app_fut._outputs.append(
                        DataFuture(stageout_fut, f, tid=app_fut.tid))
                else:
                    logger.debug("No stageout dependency for {}".format(f))
                    app_fut._outputs.append(
                        DataFuture(app_fut, f, tid=app_fut.tid))

                # this is a hook for post-task stageout
                # note that nothing depends on the output - which is maybe a bug
                # in the not-very-tested stageout system?
                newfunc = self.data_manager.replace_task_stage_out(
                    f, func, executor)
                if newfunc:
                    func = newfunc
            else:
                logger.debug("Not performing staging for: {}".format(f))
                app_fut._outputs.append(DataFuture(app_fut, f,
                                                   tid=app_fut.tid))
        return func
Beispiel #11
0
    def optionally_stage_in(self, input, func, executor):
        if isinstance(input, DataFuture):
            file = input.file_obj.cleancopy()
            # replace the input DataFuture with a new DataFuture which will complete at
            # the same time as the original one, but will contain the newly
            # copied file
            input = DataFuture(input, file, tid=input.tid)
        elif isinstance(input, File):
            file = input.cleancopy()
            input = file
        else:
            return (input, func)

        replacement_input = self.stage_in(file, input, executor)

        func = self.replace_task(file, func, executor)

        return (replacement_input, func)
Beispiel #12
0
    def __call__(self, *args, **kwargs):
        ''' This is where the call to a python app is handled

        Args:
             - Arbitrary

        Kwargs:
             - Arbitrary

        Returns:
             If outputs=[...] was a kwarg then:
                   App_fut, [Data_Futures...]
             else:
                   App_fut

        '''

        cmd_line = self._trace_cmdline(*args, **kwargs)
        self.kwargs.update(kwargs)
        self.executable = cmd_line #.format(**kwargs)

        if type(self.executor) == DataFlowKernel:
            logger.debug("Submitting to DataFlowKernel : %s",  self.executor)
            #app_fut = self.executor.submit(self._callable, *args, **kwargs)
            app_fut = self.executor.submit(bash_executor, cmd_line, *args, **self.kwargs)

        else:
            logger.debug("Submitting to Executor: %s",  self.executor)
            #app_fut = self.executor.submit(self._callable, *args, **kwargs)
            app_fut = self.executor.submit(bash_executor, cmd_line, *args, **self.kwargs)

        out_futs = [DataFuture(app_fut, o, parent=app_fut) for o in kwargs.get('outputs', []) ]
        if out_futs:
            return app_fut, out_futs
        else:
            return app_fut
Beispiel #13
0
        ''' Submit a staging request.
        '''
        return self.executor.submit(*args, **kwargs)

    def scale_in(self, blocks, *args, **kwargs):
        pass

    def scale_out(self, *args, **kwargs):
        pass

    def shutdown(self, block=False):
        return self.executor.shutdown(wait=block)

    def scaling_enabled(self):
        return self._scaling_enabled


if __name__ == "__main__":

    from parsl.data_provider.files import File
    from parsl.app.futures import DataFuture
    dm = DataManager(config={'a': 1})

    f = File("/tmp/a.txt")

    print(type(f), f)
    fut = dm.submit(f.stage_in, "foo")
    df = DataFuture(fut, f, parent=None, tid=None)

    print(df)