Beispiel #1
0
    def run(self, args):
        config = args.config
        logger = logging.getLogger('lobster.status')
        store = unit.UnitStore(config)
        data = list(store.workflow_status())
        headers = [x.split() for x in data.pop(0)]
        header_rows = max([len(x) for x in headers])
        for i in range(0, header_rows):
            data.insert(i, [x[i] if len(x) > i else '' for x in headers])

        widths = \
            [max(map(len, (xs[0] for xs in data)))] + \
            [max(map(len, (str(xs[i]) for xs in data)))
             for i in range(1, len(data[0]))]
        data.insert(header_rows, ['=' * w for w in widths])
        headfmt = ' '.join('{{:^{0}}}'.format(w) for w in widths)
        mainfmt = '{{:{0}}} '.format(widths[0]) + ' '.join(
            '{{:>{0}}}'.format(w) for w in widths[1:])
        report = '\n'.join(
            [headfmt.format(*data[i]) for i in range(0, header_rows)] +
            [mainfmt.format(*map(str, row)) for row in data[header_rows:]])

        logger.info("workflow summary:\n" + report)

        wdir = config.workdir
        for wflow in config.workflows:
            tasks = store.failed_units(wflow.label)
            files = store.skipped_files(wflow.label)

            if len(tasks) > 0:
                msg = "tasks with failed units for {0}:".format(wflow.label)
                for task in tasks:
                    tdir = os.path.normpath(
                        os.path.join(wdir, wflow.label, 'failed',
                                     util.id2dir(task)))
                    msg += "\n" + tdir
                logger.info(msg)

            if len(files) > 0:
                msg = "files skipped for {0}:\n".format(
                    wflow.label) + "\n".join(files)
                logger.info(msg)
Beispiel #2
0
    def __init__(self, config):
        util.Timing.__init__(self, 'dash', 'handler', 'updates', 'elk',
                             'transfers', 'cleanup', 'propagate', 'sqlite')

        self.config = config
        self.basedirs = [config.base_directory, config.startup_directory]
        self.workdir = config.workdir
        self._storage = config.storage
        self.statusfile = os.path.join(self.workdir, 'status.json')
        self.siteconf = os.path.join(self.workdir, 'siteconf')

        self.parrot_path = os.path.dirname(util.which('parrot_run'))
        self.parrot_bin = os.path.join(self.workdir, 'bin')
        self.parrot_lib = os.path.join(self.workdir, 'lib')

        self.__algo = Algo(config)
        self.__host = socket.getfqdn()
        try:
            siteconf = loadSiteLocalConfig()
            self.__ce = siteconf.siteName
            self.__se = siteconf.localStageOutPNN()
            self.__frontier_proxy = siteconf.frontierProxies[0]
        except (SiteConfigError, IndexError):
            logger.error("can't load siteconfig, defaulting to hostname")
            self.__ce = socket.getfqdn()
            self.__se = socket.getfqdn()
            try:
                self.__frontier_proxy = os.environ['HTTP_PROXY']
            except KeyError:
                logger.error(
                    "can't determine proxy for Frontier via $HTTP_PROXY")
                sys.exit(1)

        try:
            with open('/etc/cvmfs/default.local') as f:
                lines = f.readlines()
        except IOError:
            lines = []
        for l in lines:
            m = re.match('\s*CVMFS_HTTP_PROXY\s*=\s*[\'"]?(.*)[\'"]?', l)
            if m:
                self.__cvmfs_proxy = m.group(1).strip("\"'")
                break
        else:
            try:
                self.__cvmfs_proxy = os.environ['HTTP_PROXY']
            except KeyError:
                logger.error("can't determine proxy for CVMFS via $HTTP_PROXY")
                sys.exit(1)

        logger.debug("using {} as proxy for CVMFS".format(self.__cvmfs_proxy))
        logger.debug("using {} as proxy for Frontier".format(
            self.__frontier_proxy))
        logger.debug("using {} as osg_version".format(
            self.config.advanced.osg_version))
        util.sendemail("Your Lobster project has started!", self.config)

        self.__taskhandlers = {}
        self.__store = unit.UnitStore(self.config)

        self.__setup_inputs()
        self.copy_siteconf()

        create = not util.checkpoint(self.workdir, 'id')
        if create:
            self.taskid = 'lobster_{0}_{1}'.format(
                self.config.label,
                sha1(str(datetime.datetime.utcnow())).hexdigest()[-16:])
            util.register_checkpoint(self.workdir, 'id', self.taskid)
            shutil.copy(self.config.base_configuration,
                        os.path.join(self.workdir, 'config.py'))
        else:
            self.taskid = util.checkpoint(self.workdir, 'id')
            util.register_checkpoint(self.workdir, 'RESTARTED',
                                     str(datetime.datetime.utcnow()))

        if not util.checkpoint(self.workdir, 'executable'):
            # We can actually have more than one exe name (one per task label)
            # Set 'cmsRun' if any of the tasks are of that type,
            # or use cmd command if all tasks execute the same cmd,
            # or use 'noncmsRun' if task cmds are different
            # Using this for dashboard exe name reporting
            cmsconfigs = [wflow.pset for wflow in self.config.workflows]
            cmds = [wflow.command for wflow in self.config.workflows]
            if any(cmsconfigs):
                exename = 'cmsRun'
            elif all(x == cmds[0] and x is not None for x in cmds):
                exename = cmds[0]
            else:
                exename = 'noncmsRun'

            util.register_checkpoint(self.workdir, 'executable', exename)

        for wflow in self.config.workflows:
            if create and not util.checkpoint(self.workdir, wflow.label):
                wflow.setup(self.workdir, self.basedirs)
                logger.info("querying backend for {0}".format(wflow.label))
                with fs.alternative():
                    dataset_info = wflow.dataset.get_info()

                logger.info("registering {0} in database".format(wflow.label))
                self.__store.register_dataset(wflow, dataset_info,
                                              wflow.category.runtime)
                util.register_checkpoint(self.workdir, wflow.label,
                                         'REGISTERED')
            elif os.path.exists(os.path.join(wflow.workdir, 'running')):
                for id in self.get_taskids(wflow.label):
                    util.move(wflow.workdir, id, 'failed')

        for wflow in self.config.workflows:
            if wflow.parent:
                getattr(self.config.workflows,
                        wflow.parent.label).register(wflow)
                if create:
                    total_units = wflow.dataset.total_units * len(
                        wflow.unique_arguments)
                    self.__store.register_dependency(wflow.label,
                                                     wflow.parent.label,
                                                     total_units)

        if not util.checkpoint(self.workdir, 'sandbox cmssw version'):
            util.register_checkpoint(self.workdir, 'sandbox', 'CREATED')
            versions = set([w.version for w in self.config.workflows])
            if len(versions) == 1:
                util.register_checkpoint(self.workdir, 'sandbox cmssw version',
                                         list(versions)[0])

        if self.config.elk:
            if create:
                categories = {
                    wflow.category.name: []
                    for wflow in self.config.workflows
                }
                for category in categories:
                    for workflow in self.config.workflows:
                        if workflow.category.name == category:
                            categories[category].append(workflow.label)
                self.config.elk.create(categories)
            else:
                self.config.elk.resume()

        self.config.advanced.dashboard.setup(self.config)
        if create:
            self.config.save()
            self.config.advanced.dashboard.register_run()
        else:
            self.config.advanced.dashboard.update_task_status(
                (id_, dash.ABORTED) for id_ in self.__store.reset_units())

        for p in (self.parrot_bin, self.parrot_lib):
            if not os.path.exists(p):
                os.makedirs(p)

        for exe in ('parrot_run', 'chirp', 'chirp_put', 'chirp_get'):
            shutil.copy(util.which(exe), self.parrot_bin)
            subprocess.check_call(
                ["strip", os.path.join(self.parrot_bin, exe)])

        p_helper = os.path.join(os.path.dirname(self.parrot_path), 'lib',
                                'lib64', 'libparrot_helper.so')
        shutil.copy(p_helper, self.parrot_lib)