Exemple #1
0
    def savelogs(self, failed_jobs, samples=5):
        logdir = os.path.join(self.__plotdir, 'logs')
        if not os.path.exists(logdir):
            os.makedirs(logdir)

        pool = multiprocessing.Pool(processes=10)
        work = []
        codes = {}

        for exit_code, jobs in zip(*split_by_column(failed_jobs[['id', 'exit_code']], 'exit_code')):
            codes[exit_code] = [len(jobs), {}]

            logger.info("Copying sample logs for exit code {0}".format(exit_code))
            for id, e in list(jobs[-samples:]):
                codes[exit_code][1][id] = []

                try:
                    source = glob.glob(os.path.join(self.__workdir, '*', 'failed', util.id2dir(id)))[0]
                except IndexError:
                    continue

                target = os.path.join(os.path.join(self.__plotdir, 'logs'), str(id))
                if os.path.exists(target):
                    shutil.rmtree(target)
                os.makedirs(target)

                files = []
                for l in ['cmssw.log.gz', 'job.log.gz']:
                    s = os.path.join(source, l)
                    t = os.path.join(target, l[:-3])
                    if os.path.exists(s):
                        codes[exit_code][1][id].append(l[:-3])
                        work.append((exit_code, id, l[:-3], pool.apply_async(unpack, [s, t])))
        for (code, id, file, res) in work:
            if not res.get():
                codes[code][1][id].remove(file)
        pool.close()
        pool.join()

        for code in codes:
            for id in range(samples - len(codes[code][1])):
                codes[code][1][-id] = []

        return codes
Exemple #2
0
    def run(self, args):
        config = args.config
        logger = logging.getLogger('lobster.status')
        store = unit.UnitStore(config)
        data = list(store.workflow_status())
        headers = [x.split() for x in data.pop(0)]
        header_rows = max([len(x) for x in headers])
        for i in range(0, header_rows):
            data.insert(i, [x[i] if len(x) > i else '' for x in headers])

        widths = \
            [max(map(len, (xs[0] for xs in data)))] + \
            [max(map(len, (str(xs[i]) for xs in data)))
             for i in range(1, len(data[0]))]
        data.insert(header_rows, ['=' * w for w in widths])
        headfmt = ' '.join('{{:^{0}}}'.format(w) for w in widths)
        mainfmt = '{{:{0}}} '.format(widths[0]) + ' '.join(
            '{{:>{0}}}'.format(w) for w in widths[1:])
        report = '\n'.join(
            [headfmt.format(*data[i]) for i in range(0, header_rows)] +
            [mainfmt.format(*map(str, row)) for row in data[header_rows:]])

        logger.info("workflow summary:\n" + report)

        wdir = config.workdir
        for wflow in config.workflows:
            tasks = store.failed_units(wflow.label)
            files = store.skipped_files(wflow.label)

            if len(tasks) > 0:
                msg = "tasks with failed units for {0}:".format(wflow.label)
                for task in tasks:
                    tdir = os.path.normpath(
                        os.path.join(wdir, wflow.label, 'failed',
                                     util.id2dir(task)))
                    msg += "\n" + tdir
                logger.info(msg)

            if len(files) > 0:
                msg = "files skipped for {0}:\n".format(
                    wflow.label) + "\n".join(files)
                logger.info(msg)
Exemple #3
0
    def run(self, args):
        config = args.config
        logger = logging.getLogger('lobster.status')
        store = unit.UnitStore(config)
        data = list(store.workflow_status())
        headers = [x.split() for x in data.pop(0)]
        header_rows = max([len(x) for x in headers])
        for i in range(0, header_rows):
            data.insert(i, [x[i] if len(x) > i else '' for x in headers])

        widths = \
            [max(map(len, (xs[0] for xs in data)))] + \
            [max(map(len, (str(xs[i]) for xs in data)))
             for i in range(1, len(data[0]))]
        data.insert(header_rows, ['=' * w for w in widths])
        headfmt = ' '.join('{{:^{0}}}'.format(w) for w in widths)
        mainfmt = '{{:{0}}} '.format(
            widths[0]) + ' '.join('{{:>{0}}}'.format(w) for w in widths[1:])
        report = '\n'.join(
            [headfmt.format(*data[i]) for i in range(0, header_rows)] +
            [mainfmt.format(*map(str, row)) for row in data[header_rows:]])

        logger.info("workflow summary:\n" + report)

        wdir = config.workdir
        for wflow in config.workflows:
            tasks = store.failed_units(wflow.label)
            files = store.skipped_files(wflow.label)

            if len(tasks) > 0:
                msg = "tasks with failed units for {0}:".format(wflow.label)
                for task in tasks:
                    tdir = os.path.normpath(os.path.join(
                        wdir, wflow.label, 'failed', util.id2dir(task)))
                    msg += "\n" + tdir
                logger.info(msg)

            if len(files) > 0:
                msg = "files skipped for {0}:\n".format(
                    wflow.label) + "\n".join(files)
                logger.info(msg)
Exemple #4
0
 def get_report(self, label, task):
     return os.path.join(self.workdir, label, 'successful',
                         util.id2dir(task), 'report.json')
Exemple #5
0
 def get_jobdir(self, jobid, label='', status='running'):
     # See id2dir for job id formatting in filesystem paths
     return os.path.normpath(os.path.join(self.workdir, label, status, util.id2dir(jobid)))
Exemple #6
0
    def insert_block(self, dbs, primary_dataset, dataset, user, config,
                     basedir, datasetdir, stageoutdir, chunk):
        block = self.prepare_block(dataset, user)

        files = []
        tasks = []

        configs = []

        logger.info('preparing DBS entry for {} task block: {}'.format(
            len(chunk), block['block_name']))

        for task, _ in chunk:
            taskdir = os.path.join(basedir, util.id2dir(task))
            try:
                files.append(
                    self.prepare_file(dataset, block, user, taskdir,
                                      datasetdir, stageoutdir))
                cfg = config.copy()
                cfg['lfn'] = files[-1]['logical_file_name']
                configs.append(cfg)
                tasks.append(task)
            except ValueError as e:
                logger.warn(
                    'could not find expected output for task {}: {}'.format(
                        task, e.message))

        block.update({
            'file_count': len(files),
            'block_size': sum([int(f['file_size']) for f in files])
        })

        dump = {
            'dataset_conf_list': [config],
            'file_conf_list': configs,
            'files': files,
            'processing_era': {
                'processing_version': 1,
                'description': 'CRAB3_processing_era'
            },
            'primds': primary_dataset,
            'dataset': dataset,
            'acquisition_era': {
                'acquisition_era_name': user,
                'start_date': 0
            },
            'block': block,
            'file_parent_list': []
        }

        # For debugging
        # from pprint import pprint
        # pprint(config)
        try:
            dbs['local'].insertBulkBlock(dump)
        except HTTPError as e:
            if e.code in (401, 412):
                raise e
            logger.exception(e)

        return tasks, block
Exemple #7
0
def publish(args):
    with open(args.configfile) as f:
        config = yaml.load(f)

    config = apply_matching(config)

    if len(args.datasets) == 0:
        args.datasets = [task['label'] for task in config.get('tasks', [])]

    workdir = config['workdir']
    user = config.get('publish user', os.environ['USER'])
    publish_instance = config.get('dbs instance', 'phys03')
    published = {'dataset': '', 'dbs instance': publish_instance}

    print "Saving log to {0}".format(os.path.join(workdir, 'publish.log'))
    if not args.foreground:
        ttyfile = open(os.path.join(workdir, 'publish.err'), 'a')
        print "Saving stderr and stdout to {0}".format(os.path.join(workdir, 'publish.err'))

    with daemon.DaemonContext(
            detach_process=not args.foreground,
            stdout=sys.stdout if args.foreground else ttyfile,
            stderr=sys.stderr if args.foreground else ttyfile,
            working_directory=workdir,
            pidfile=util.get_lock(workdir)):
        logging.basicConfig(
                datefmt="%Y-%m-%d %H:%M:%S",
                format="%(asctime)s [%(levelname)s] - %(filename)s %(lineno)d: %(message)s",
                level=config.get('advanced', {}).get('log level', 2) * 10,
                filename=os.path.join(workdir, 'publish.log'))

        if args.foreground:
            console = logging.StreamHandler()
            console.setLevel(config.get('advanced', {}).get('log level', 2) * 10)
            console.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] - %(filename)s %(lineno)d: %(message)s"))
            logging.getLogger('').addHandler(console)

        db = JobitStore(config)
        das_interface = MetaInterface()

        dbs = {}
        for path, key in [[('global', 'DBSReader'), 'global'],
                          [(publish_instance, 'DBSWriter'), 'local'],
                          [(publish_instance, 'DBSReader'), 'reader'],
                          [(publish_instance, 'DBSMigrate'), 'migrator']]:
            dbs[key] = DbsApi('https://cmsweb.cern.ch/dbs/prod/{0}/'.format(os.path.join(*path)))

        for label in args.datasets:
            (dset,
             stageout_path,
             release,
             gtag,
             publish_label,
             cfg,
             pset_hash,
             ds_id,
             publish_hash) = [str(x) for x in db.dataset_info(label)]

            dset = dset.strip('/').split('/')[0]
            if not pset_hash or pset_hash == 'None':
                logging.info('the parameter set hash has not been calculated')
                logging.info('calculating parameter set hash now (may take a few minutes)')
                cfg_path = os.path.join(workdir, label, os.path.basename(cfg))
                tmp_path = cfg_path.replace('.py', '_tmp.py')
                with open(cfg_path, 'r') as infile:
                    with open(tmp_path, 'w') as outfile:
                        fix = "import sys \nif not hasattr(sys, 'argv'): sys.argv = ['{0}']\n"
                        outfile.write(fix.format(tmp_path))
                        outfile.write(infile.read())
                try:
                    pset_hash = createPSetHash(tmp_path)[-32:]
                    db.update_pset_hash(pset_hash, label)
                except:
                    logging.warning('error calculating the cmssw parameter set hash')
                os.remove(tmp_path)

            block = BlockDump(user, dset, dbs['global'], publish_hash, publish_label, release, pset_hash, gtag)

            if len(dbs['local'].listAcquisitionEras(acquisition_era_name=user)) == 0:
                try:
                    dbs['local'].insertAcquisitionEra({'acquisition_era_name': user})
                except Exception, ex:
                    logging.warn(ex)
            try:
                dbs['local'].insertPrimaryDataset(block.data['primds'])
                dbs['local'].insertDataset(block.data['dataset'])
            except Exception, ex:
                logging.warn(ex)
                raise

            jobs = db.finished_jobs(label)

            first_job = 0
            inserted = False
            logging.info('found %d successful %s jobs to publish' % (len(jobs), label))
            missing = []
            while first_job < len(jobs):
                block.reset()
                chunk = jobs[first_job:first_job+args.block_size]
                logging.info('preparing DBS entry for %i job block: %s' % (len(chunk), block['block']['block_name']))

                for job, merged_job in chunk:
                    status = 'merged' if merged_job else 'successful'
                    id = merged_job if merged_job else job
                    tag = 'merged_{0}'.format(merged_job) if merged_job else str(job)

                    f = gzip.open(os.path.join(workdir, label, status, util.id2dir(id), 'report.xml.gz'), 'r')
                    report = readJobReport(f)[0]
                    PFN = os.path.join(stageout_path, report.files[0]['PFN'].replace('.root', '_%s.root' % tag))
                    LFN = block.get_LFN(PFN)
                    matched_PFN = block.get_matched_PFN(PFN, LFN)
                    if not matched_PFN:
                        logging.warn('could not find expected output for job(s) {0}'.format(job))
                        missing.append(job)
                    else:
                        logging.info('adding %s to block' % LFN)
                        block.add_file_config(LFN)
                        block.add_file(LFN, report.files[0], job, merged_job)
                        block.add_dataset_config()
                        if args.migrate_parents:
                            block.add_file_parents(LFN, report)

                if args.migrate_parents:
                    parents_to_migrate = list(set([p['parent_logical_file_name'] for p in block['file_parent_list']]))
                    migrate_parents(parents_to_migrate, dbs)

                if len(block.data['files']) > 0:
                    try:
                        inserted = True
                        dbs['local'].insertBulkBlock(block.data)
                        db.update_published(block.get_publish_update())
                        logging.info('block inserted: %s' % block['block']['block_name'])
                    except HTTPError, e:
                        logging.critical(e)

                first_job += args.block_size
Exemple #8
0
 def get_report(self, label, task):
     return os.path.join(self.workdir, label, 'successful', util.id2dir(task), 'report.json')