Beispiel #1
0
    def _run_get_new_deps(self):
        self.task.set_tracking_url = self.tracking_url_callback
        self.task.set_status_message = self.status_message_callback

        task_gen = self.task.run()

        self.task.set_tracking_url = None
        self.task.set_status_message = None

        if not isinstance(task_gen, types.GeneratorType):
            return None

        next_send = None
        while True:
            try:
                if next_send is None:
                    requires = six.next(task_gen)
                else:
                    requires = task_gen.send(next_send)
            except StopIteration:
                return None

            new_req = flatten(requires)
            if all(t.complete() for t in new_req):
                next_send = getpaths(requires)
            else:
                new_deps = [(t.task_module, t.task_family, t.to_str_params())
                            for t in new_req]
                return new_deps
Beispiel #2
0
    def _run_get_new_deps(self):
        self.task.set_tracking_url = self.status_reporter.update_tracking_url
        self.task.set_status_message = self.status_reporter.update_status
        self.task.set_progress_percentage = self.status_reporter.update_progress_percentage

        task_gen = self.task.run()

        self.task.set_tracking_url = None
        self.task.set_status_message = None
        self.task.set_progress_percentage = None

        if not isinstance(task_gen, types.GeneratorType):
            return None

        next_send = None
        while True:
            try:
                if next_send is None:
                    requires = six.next(task_gen)
                else:
                    requires = task_gen.send(next_send)
            except StopIteration:
                return None

            new_req = flatten(requires)
            if all(t.complete() for t in new_req):
                next_send = getpaths(requires)
            else:
                new_deps = [(t.task_module, t.task_family, t.to_str_params())
                            for t in new_req]
                return new_deps
Beispiel #3
0
    def _run_get_new_deps(self):
        # set task callbacks before running
        for reporter_attr, task_attr in six.iteritems(self.forward_reporter_callbacks):
            setattr(self.task, task_attr, getattr(self.status_reporter, reporter_attr))

        task_gen = self.task.run()

        # reset task callbacks
        for reporter_attr, task_attr in six.iteritems(self.forward_reporter_callbacks):
            setattr(self.task, task_attr, None)

        if not isinstance(task_gen, types.GeneratorType):
            return None

        next_send = None
        while True:
            try:
                if next_send is None:
                    requires = six.next(task_gen)
                else:
                    requires = task_gen.send(next_send)
            except StopIteration:
                return None

            new_req = flatten(requires)
            if all(t.complete() for t in new_req):
                next_send = getpaths(requires)
            else:
                new_deps = [(t.task_module, t.task_family, t.to_str_params())
                            for t in new_req]
                return new_deps
Beispiel #4
0
    def _run_get_new_deps(self):
        try:
            task_gen = self.task.run(
                tracking_url_callback=self.tracking_url_callback)
        except TypeError as ex:
            if 'unexpected keyword argument' not in getattr(
                    ex, 'message', ex.args[0]):
                raise
            task_gen = self.task.run()
        if not isinstance(task_gen, types.GeneratorType):
            return None

        next_send = None
        while True:
            try:
                if next_send is None:
                    requires = six.next(task_gen)
                else:
                    requires = task_gen.send(next_send)
            except StopIteration:
                return None

            new_req = flatten(requires)
            new_deps = [(t.task_module, t.task_family, t.to_str_params())
                        for t in new_req]
            if all(t.complete() for t in new_req):
                next_send = getpaths(requires)
            else:
                return new_deps
Beispiel #5
0
    def _run_get_new_deps(self):
        run_again = False
        try:
            task_gen = self.task.run(tracking_url_callback=self.tracking_url_callback)
        except TypeError as ex:
            if 'unexpected keyword argument' not in getattr(ex, 'message', ex.args[0]):
                raise
            run_again = True
        if run_again:
            task_gen = self.task.run()
        if not isinstance(task_gen, types.GeneratorType):
            return None

        next_send = None
        while True:
            try:
                if next_send is None:
                    requires = six.next(task_gen)
                else:
                    requires = task_gen.send(next_send)
            except StopIteration:
                return None

            new_req = flatten(requires)
            new_deps = [(t.task_module, t.task_family, t.to_str_params())
                        for t in new_req]
            if all(t.complete() for t in new_req):
                next_send = getpaths(requires)
            else:
                return new_deps
Beispiel #6
0
    def _run_get_new_deps(self):
        try:
            t0 = time.time()
            task_gen = self.task.run()
        finally:
            self.task.trigger_event(
                Event.PROCESSING_TIME, self.task, time.time() - t0)

        if not isinstance(task_gen, types.GeneratorType):
            return None

        next_send = None
        while True:
            try:
                if next_send is None:
                    requires = six.next(task_gen)
                else:
                    requires = task_gen.send(next_send)
            except StopIteration:
                return None

            new_req = flatten(requires)
            new_deps = [(t.task_module, t.task_family, t.to_str_params())
                        for t in new_req]
            if all(t.complete() for t in new_req):
                next_send = getpaths(requires)
            else:
                return new_deps
Beispiel #7
0
    def _run_get_new_deps(self):
        try:
            t0 = time.time()
            task_gen = self.task.run()
        finally:
            self.task.trigger_event(
                Event.PROCESSING_TIME, self.task, time.time() - t0)

        if not isinstance(task_gen, types.GeneratorType):
            return None

        next_send = None
        while True:
            try:
                if next_send is None:
                    requires = six.next(task_gen)
                else:
                    requires = task_gen.send(next_send)
            except StopIteration:
                return None

            new_req = flatten(requires)
            new_deps = [(t.task_module, t.task_family, t.to_str_params())
                        for t in new_req]
            if all(t.complete() for t in new_req):
                next_send = getpaths(requires)
            else:
                return new_deps
Beispiel #8
0
    def output(self):
        tasks = []
        if all(req.complete() for req in flatten(self.requires())):
            try:
                tasks = list(self.run())
            except:
                logger.exception('%s failed at run() step; the exception will not be raised because Luigi is still building the graph.', repr(self))

        # FIXME: conserve task structure: the generator actually create an
        # implicit array level even if a single task is yielded.
        # For now, we just handle the special singleton case.
        if len(tasks) == 1:
            tasks = tasks[0]

        return getpaths(tasks)
Beispiel #9
0
    def _run_get_new_deps(self):
        self.task.set_tracking_url = self.tracking_url_callback
        self.task.set_status_message = self.status_message_callback

        def deprecated_tracking_url_callback(*args, **kwargs):
            warnings.warn(
                "tracking_url_callback in run() args is deprecated, use "
                "set_tracking_url instead.", DeprecationWarning)
            self.tracking_url_callback(*args, **kwargs)

        run_again = False
        try:
            task_gen = self.task.run(
                tracking_url_callback=deprecated_tracking_url_callback)
        except TypeError as ex:
            if 'unexpected keyword argument' not in str(ex):
                raise
            run_again = True
        if run_again:
            task_gen = self.task.run()

        self.task.set_tracking_url = None
        self.task.set_status_message = None

        if not isinstance(task_gen, types.GeneratorType):
            return None

        next_send = None
        while True:
            try:
                if next_send is None:
                    requires = six.next(task_gen)
                else:
                    requires = task_gen.send(next_send)
            except StopIteration:
                return None

            new_req = flatten(requires)
            new_deps = [(t.task_module, t.task_family, t.to_str_params())
                        for t in new_req]
            if all(t.complete() for t in new_req):
                next_send = getpaths(requires)
            else:
                return new_deps
Beispiel #10
0
    def _run_get_new_deps(self):
        self.task.set_tracking_url = self.tracking_url_callback
        self.task.set_status_message = self.status_message_callback

        def deprecated_tracking_url_callback(*args, **kwargs):
            warnings.warn("tracking_url_callback in run() args is deprecated, use "
                          "set_tracking_url instead.", DeprecationWarning)
            self.tracking_url_callback(*args, **kwargs)

        run_again = False
        try:
            task_gen = self.task.run(tracking_url_callback=deprecated_tracking_url_callback)
        except TypeError as ex:
            if 'unexpected keyword argument' not in str(ex):
                raise
            run_again = True
        if run_again:
            task_gen = self.task.run()

        self.task.set_tracking_url = None
        self.task.set_status_message = None

        if not isinstance(task_gen, types.GeneratorType):
            return None

        next_send = None
        while True:
            try:
                if next_send is None:
                    requires = six.next(task_gen)
                else:
                    requires = task_gen.send(next_send)
            except StopIteration:
                return None

            new_req = flatten(requires)
            new_deps = [(t.task_module, t.task_family, t.to_str_params())
                        for t in new_req]
            if all(t.complete() for t in new_req):
                next_send = getpaths(requires)
            else:
                return new_deps
def convert_tasks_to_manifest_if_necessary(input_tasks):  # pylint: disable=invalid-name
    """
    Provide a manifest for the input paths if there are too many of them.

    The configuration section "manifest" can contain a "threshold" option which, when exceeded, causes this function
    to return a URLManifestTask instead of the original input_tasks.
    """
    all_input_tasks = task.flatten(input_tasks)
    targets = task.flatten(task.getpaths(all_input_tasks))
    threshold = configuration.get_config().getint(CONFIG_SECTION, 'threshold', -1)
    if threshold > 0 and len(targets) >= threshold:
        log.debug(
            'Using manifest since %d inputs are greater than or equal to the threshold %d', len(targets), threshold
        )
        return [URLManifestTask(urls=[target.path for target in targets])]
    else:
        log.debug(
            'Directly processing files since %d inputs are less than the threshold %d', len(targets), threshold
        )
        return all_input_tasks
def convert_tasks_to_manifest_if_necessary(input_tasks):  # pylint: disable=invalid-name
    """
    Provide a manifest for the input paths if there are too many of them.

    The configuration section "manifest" can contain a "threshold" option which, when exceeded, causes this function
    to return a URLManifestTask instead of the original input_tasks.
    """
    all_input_tasks = task.flatten(input_tasks)
    targets = task.flatten(task.getpaths(all_input_tasks))
    threshold = configuration.get_config().getint(CONFIG_SECTION, 'threshold',
                                                  -1)
    if threshold > 0 and len(targets) >= threshold:
        log.debug(
            'Using manifest since %d inputs are greater than or equal to the threshold %d',
            len(targets), threshold)
        return [URLManifestTask(urls=[target.path for target in targets])]
    else:
        log.debug(
            'Directly processing files since %d inputs are less than the threshold %d',
            len(targets), threshold)
        return all_input_tasks
Beispiel #13
0
    def _run_get_new_deps(self):
        task_gen = self.task.run()

        if not isinstance(task_gen, types.GeneratorType):
            return None

        next_send = None
        while True:
            try:
                if next_send is None:
                    requires = six.next(task_gen)
                else:
                    requires = task_gen.send(next_send)
            except StopIteration:
                return None

            new_req = flatten(requires)
            if all(t.complete() for t in new_req):
                next_send = getpaths(requires)
            else:
                new_deps = [(t.task_module, t.task_family, t.to_str_params())
                            for t in new_req]
                return new_deps
Beispiel #14
0
 def output(self):
     return getpaths(self.requires())
Beispiel #15
0
    def run(self):
        logger.info('[pid %s] Worker %s running   %s', os.getpid(),
                    self.worker_id, self.task.task_id)

        if self.random_seed:
            # Need to have different random seeds if running in separate processes
            random.seed((os.getpid(), time.time()))

        status = FAILED
        error_message = ''
        missing = []
        new_deps = []
        try:
            # Verify that all the tasks are fulfilled!
            missing = [
                dep.task_id for dep in self.task.deps() if not dep.complete()
            ]
            if missing:
                deps = 'dependency' if len(missing) == 1 else 'dependencies'
                raise RuntimeError('Unfulfilled %s at run time: %s' %
                                   (deps, ', '.join(missing)))
            self.task.trigger_event(Event.START, self.task)
            t0 = time.time()
            status = None
            try:
                task_gen = self.task.run()
                if isinstance(task_gen, types.GeneratorType):  # new deps
                    next_send = None
                    while True:
                        try:
                            if next_send is None:
                                requires = six.next(task_gen)
                            else:
                                requires = task_gen.send(next_send)
                        except StopIteration:
                            break

                        new_req = flatten(requires)
                        status = (RUNNING if all(
                            t.complete() for t in new_req) else SUSPENDED)
                        new_deps = [(t.task_module, t.task_family,
                                     t.to_str_params()) for t in new_req]
                        if status == RUNNING:
                            self.result_queue.put((self.task.task_id, status,
                                                   '', missing, new_deps))
                            next_send = getpaths(requires)
                            new_deps = []
                        else:
                            logger.info(
                                '[pid %s] Worker %s new requirements      %s',
                                os.getpid(), self.worker_id, self.task.task_id)
                            return
            finally:
                if status != SUSPENDED:
                    self.task.trigger_event(Event.PROCESSING_TIME, self.task,
                                            time.time() - t0)
            error_message = json.dumps(self.task.on_success())
            logger.info('[pid %s] Worker %s done      %s', os.getpid(),
                        self.worker_id, self.task.task_id)
            self.task.trigger_event(Event.SUCCESS, self.task)
            status = DONE

        except KeyboardInterrupt:
            raise
        except BaseException as ex:
            status = FAILED
            logger.exception("[pid %s] Worker %s failed    %s", os.getpid(),
                             self.worker_id, self.task)
            error_message = notifications.wrap_traceback(
                self.task.on_failure(ex))
            self.task.trigger_event(Event.FAILURE, self.task, ex)
            subject = "Luigi: %s FAILED" % self.task
            notifications.send_error_email(subject, error_message)
        finally:
            self.result_queue.put(
                (self.task.task_id, status, error_message, missing, new_deps))