Beispiel #1
0
    def run(self, args):
        store = UnitStore(args.config)
        stats = dict((w.label, [0, 0, 0]) for w in args.config.workflows)

        missing = []
        for wflow in args.config.workflows:
            logger.info('validating output files for {0}'.format(wflow.label))

            delete, missed = self.process_workflow(store, stats, wflow)
            missing += missed

            if not args.dry_run and len(delete) > 0:
                fs.remove(*delete)

        logger.info('finished validating')

        if sum(sum(stats.values(), [])) == 0:
            logger.info('no files found to cleanup')
        else:
            self.print_stats(stats)

        if len(missing) > 0:
            if not args.dry_run:
                store.update_missing(missing)

            verb = 'would have' if args.dry_run else 'have'
            template = 'the following {0} been marked as failed because their output could not be found: {1}'
            logger.warning(template.format(verb, ', '.join(map(str, missing))))
Beispiel #2
0
    def run(self, args):
        store = UnitStore(args.config)
        stats = dict((w.label, [0, 0, 0]) for w in args.config.workflows)

        missing = []
        for wflow in args.config.workflows:
            logger.info('validating output files for {0}'.format(wflow.label))

            delete, merged, missed = self.process_workflow(store, stats, wflow)
            missing += missed

            if not args.dry_run:
                fs.remove(*delete)
                if args.delete_merged:
                    fs.remove(*merged)

        logger.info('finished validating')

        if sum(sum(stats.values(), [])) == 0:
            logger.info('no files found to cleanup')
        else:
            self.print_stats(stats)

        if len(missing) > 0:
            if not args.dry_run:
                store.update_missing(missing)

            verb = 'would have' if args.dry_run else 'have'
            template = 'the following {0} been marked as failed because their output could not be found: {1}'
            logger.warning(template.format(verb, ', '.join(map(str, missing))))
Beispiel #3
0
    def release(self, tasks):
        fail_cleanup = []
        merge_cleanup = []
        input_cleanup = []
        update = defaultdict(list)
        propagate = defaultdict(dict)
        input_files = defaultdict(set)
        summary = ReleaseSummary()
        transfers = defaultdict(lambda: defaultdict(Counter))

        with self.measure('dash'):
            self.config.advanced.dashboard.update_task_status(
                (task.tag, dash.DONE) for task in tasks)

        for task in tasks:
            with self.measure('updates'):
                handler = self.__taskhandlers[task.tag]
                failed, task_update, file_update, unit_update = handler.process(
                    task, summary, transfers)

                wflow = getattr(self.config.workflows, handler.dataset)

            with self.measure('elk'):
                if self.config.elk:
                    self.config.elk.index_task(task)
                    self.config.elk.index_task_update(task_update)

            with self.measure('handler'):
                if failed:
                    faildir = util.move(wflow.workdir, handler.id, 'failed')
                    summary.dir(str(handler.id), faildir)
                    fail_cleanup.extend([lf for rf, lf in handler.outputs])
                else:
                    util.move(wflow.workdir, handler.id, 'successful')

                    merge = isinstance(handler, MergeTaskHandler)

                    if (wflow.merge_size <= 0
                            or merge) and len(handler.outputs) > 0:
                        outfn = handler.outputs[0][1]
                        outinfo = handler.output_info
                        for dep in wflow.dependents:
                            propagate[dep.label][outfn] = outinfo

                    if merge:
                        merge_cleanup.extend(handler.input_files)

                    if wflow.cleanup_input:
                        input_files[handler.dataset].update(
                            set([f for (_, _, f) in file_update]))

            update[(handler.dataset, handler.unit_source)].append(
                (task_update, file_update, unit_update))

            del self.__taskhandlers[task.tag]

        with self.measure('dash'):
            self.config.advanced.dashboard.update_task_status(
                (task.tag, dash.RETRIEVED) for task in tasks)

        if len(update) > 0:
            with self.measure('sqlite'):
                logger.info(summary)
                self.__store.update_units(update)

        with self.measure('cleanup'):
            if len(input_files) > 0:
                input_cleanup.extend(self.__store.finished_files(input_files))

            for cleanup in [fail_cleanup, merge_cleanup + input_cleanup]:
                if len(cleanup) > 0:
                    try:
                        fs.remove(*cleanup)
                    except (IOError, OSError):
                        pass
                    except ValueError as e:
                        logger.error("error removing {0}:\n{1}".format(
                            task.tag, e))

        with self.measure('propagate'):
            for label, infos in propagate.items():
                unique_args = getattr(self.config.workflows,
                                      label).unique_arguments
                self.__store.register_files(infos, label, unique_args)

        if len(transfers) > 0:
            with self.measure('transfers'):
                self.__store.update_transfers(transfers)

        if self.config.elk:
            with self.measure('elk'):
                try:
                    self.config.elk.index_summary(
                        self.__store.workflow_status())
                except Exception as e:
                    logger.error('ELK failed to index summary:\n{}'.format(e))
Beispiel #4
0
    def release(self, tasks):
        fail_cleanup = []
        merge_cleanup = []
        input_cleanup = []
        update = defaultdict(list)
        propagate = defaultdict(dict)
        input_files = defaultdict(set)
        summary = ReleaseSummary()
        transfers = defaultdict(lambda: defaultdict(Counter))

        with self.measure('dash'):
            self.config.advanced.dashboard.update_task_status(
                (task.tag, dash.DONE) for task in tasks
            )

        for task in tasks:
            with self.measure('updates'):
                handler = self.__taskhandlers[task.tag]
                failed, task_update, file_update, unit_update = handler.process(task, summary, transfers)

                wflow = getattr(self.config.workflows, handler.dataset)

            with self.measure('elk'):
                if self.config.elk:
                    self.config.elk.index_task(task)
                    self.config.elk.index_task_update(task_update)

            with self.measure('handler'):
                if failed:
                    faildir = util.move(wflow.workdir, handler.id, 'failed')
                    summary.dir(str(handler.id), faildir)
                    fail_cleanup.extend([lf for rf, lf in handler.outputs])
                else:
                    util.move(wflow.workdir, handler.id, 'successful')

                    merge = isinstance(handler, MergeTaskHandler)

                    if (wflow.merge_size <= 0 or merge) and len(handler.outputs) > 0:
                        outfn = handler.outputs[0][1]
                        outinfo = handler.output_info
                        for dep in wflow.dependents:
                            propagate[dep.label][outfn] = outinfo

                    if merge:
                        merge_cleanup.extend(handler.input_files)

                    if wflow.cleanup_input:
                        input_files[handler.dataset].update(set([f for (_, _, f) in file_update]))

            update[(handler.dataset, handler.unit_source)].append((task_update, file_update, unit_update))

            del self.__taskhandlers[task.tag]

        with self.measure('dash'):
            self.config.advanced.dashboard.update_task_status(
                (task.tag, dash.RETRIEVED) for task in tasks
            )

        if len(update) > 0:
            with self.measure('sqlite'):
                logger.info(summary)
                self.__store.update_units(update)

        with self.measure('cleanup'):
            if len(input_files) > 0:
                input_cleanup.extend(self.__store.finished_files(input_files))

            for cleanup in [fail_cleanup, merge_cleanup + input_cleanup]:
                if len(cleanup) > 0:
                    try:
                        fs.remove(*cleanup)
                    except (IOError, OSError):
                        pass
                    except ValueError as e:
                        logger.error("error removing {0}:\n{1}".format(task.tag, e))

        with self.measure('propagate'):
            for label, infos in propagate.items():
                unique_args = getattr(self.config.workflows, label).unique_arguments
                self.__store.register_files(infos, label, unique_args)

        if len(transfers) > 0:
            with self.measure('transfers'):
                self.__store.update_transfers(transfers)

        if self.config.elk:
            with self.measure('elk'):
                try:
                    self.config.elk.index_summary(self.__store.workflow_status())
                except Exception as e:
                    logger.error('ELK failed to index summary:\n{}'.format(e))