Esempio n. 1
0
 def main(argv):
    client = AMIClient()
    if not os.path.exists(AMI_CONFIG):
       create_auth_config()
    client.read_config(AMI_CONFIG)
    datasetNamePattern=argv[0] 
    
    res =  get_datasets(client,datasetNamePattern,fields='events,nfiles', flatten=True)
    print_table( res )           
Esempio n. 2
0
class McStatsLookup(object):
    """
    Tool to look up stats in mc datasets that exist in both atlfast and
    fullsim.
    """
    def __init__(self, p_tag, origin='mc12_8TeV', backup_ptag=None):
        self.p_tag = p_tag
        self.backup_ptag = backup_ptag
        self.origin = origin
        self.ntup_filter = 'NTUP_SUSY'
        self._setup_ami_client()
        self.outstream = sys.stdout
        self.bugstream = sys.stderr
        self.atlfinder = re.compile('(_a([0-9])+)+')

    def _setup_ami_client(self):
        self.client = AMIClient()
        if not os.path.exists(AMI_CONFIG):
            create_auth_config()
        self.client.read_config(AMI_CONFIG)


    def get_atlfast_fullsim(self, ds_id, stream=None):
        args = {'dataset_number':str(ds_id)}

        match_sets = query.get_datasets(self.client,'%', **args)

        if not match_sets:
            raise DatasetMatchError('found nothing with {}'.format(
                    args.items()), match_sets)

        match_sets = _filter_by_ldn(
            match_sets, stream, self.ntup_filter, self.p_tag)

        atlfast_ds = None
        fullsim_ds = None
        for m in match_sets:
            ldn = _ldn(m)
            if self.atlfinder.search(_ldn(m)):
                if atlfast_ds:
                    raise DatasetMatchError('at least two atlfast',
                                            [atlfast_ds,m])
                atlfast_ds = m
            else:
                fullsim_ds = _largest_fullsim_filter(fullsim_ds, m)[0]

        atlfast_counts = _get_expected_counts(self.client, atlfast_ds)
        fullsim_counts = _get_expected_counts(self.client, fullsim_ds)
        atl = (_ldn(atlfast_ds), atlfast_counts) if atlfast_ds else None
        ful = (_ldn(fullsim_ds), fullsim_counts) if fullsim_ds else None
        return  atl, ful
Esempio n. 3
0
class AmiAugmenter(object):
    """
    Class to wrap ami augmentation.
    """
    def __init__(self, p_tag, origin='mc12_8TeV', backup_ptag=None):
        self.p_tag = p_tag
        self.backup_ptag = backup_ptag
        self.origin = origin
        self.ntup_filter = 'NTUP_SUSY'
        self._setup_ami_client()
        self.outstream = sys.stdout
        self.bugstream = sys.stderr

    def _setup_ami_client(self):
        self.client = AMIClient()
        if not os.path.exists(AMI_CONFIG):
            create_auth_config()
        self.client.read_config(AMI_CONFIG)

    def get_dataset_range(self, ds_range, physics_type=None):
        ds_dict = {}
        for num in ds_range:
            self.outstream.write('looking up {}, category {}\n'.format(
                    num, physics_type))
            for ds in self.ds_from_id(num):
                if physics_type:
                    ds.physics_type = physics_type

                if ds.key in ds_dict:
                    old = ds_dict[ds.key]
                    largest_ds = _filter_for_largest_tags(ds, old)
                    if len(largest_ds) == 1:
                        ds_dict[ds.key] = largest_ds[0]
                    else:
                        raise DatasetOverwriteError(
                            "tried to overwrite {},\n{} with \n{}".format(
                                ds.key, ds.full_name,
                                ds_dict[ds.key].full_name))
                else:
                    ds_dict[ds.key] = ds
        return ds_dict

    def ds_from_id(self, ds_id, stream=None):
        if stream and stream.startswith('physics'):
            args = {'run':str(ds_id)}
        else:
            args = {'dataset_number':str(ds_id)}

        qstr = '%'.join([self.origin, str(ds_id), stream or ''])
        match_sets = query.get_datasets(self.client,qstr, **args)

        if not match_sets:
            raise DatasetMatchError('found nothing with {}'.format(qstr),
                                    match_sets)

        match_sets = _filter_by_ldn(
            match_sets, stream, self.ntup_filter, self.p_tag)

        for match in match_sets:
            ldn = _ldn(match)
            yield self.ds_from_ldn(ldn)

    def ds_from_ldn(self, ldn):
        info = query.get_dataset_info(self.client, ldn)
        ds = meta.Dataset(ldn)
        self._write_ami_info(ds, info)
        if not ds.is_data:
            self._write_mc_ami_info(ds, info)
        return ds

    def get_datasets_year(self, year=12, stream=None):
        datasets = {}
        periods = query.get_periods(self.client, year=year)
        hep_periods = []
        for period in periods:
            if period.project.endswith('TeV'):
                hep_periods.append(period.name)
        runs = query.get_runs(self.client, hep_periods, year=year)
        n_runs = len(runs)
        for run_n, run in enumerate(runs):
            self.outstream.write(
                'looking for {} of {} {}...'.format(run_n, n_runs, run))
            try:
                ds = next(iter(self.ds_from_id(run, stream)))
            except DatasetMatchError as err:
                if err.matches:
                    self.outstream.write('none in stream\n')
                    self.bugstream.write(str(err) + '\n')
                    continue
                else:
                    self.outstream.write('nothing\n')
                    self.bugstream.write(str(err) + '\n')
                    continue
            self.outstream.write('found: {}\n'.format(ds.full_name))
            datasets[ds.key] = ds
        return datasets

    def _bug_report_line(self, line, ds, info):
        diagnostic = 'for {} {}. In info: {}\n'.format(
            ds.key, ds.name, ', '.join(info.extra.keys()))

        responsible = ''
        argv=["SearchQuery"]
        argv.append(
            "-sql=select physicistResponsible from dataset where "
            "logicalDatasetName='{}'".format(info.info['logicalDatasetName']))
        argv.append('project=mc12_001')
        argv.append('processingStep=production')
        result = self.client.execute(argv)
        for row in result.rows():
            if 'physicistResponsible' in row:
                assert not responsible, 'two responsible physicists found'
                responsible = row['physicistResponsible']

        tmp_str = '{} (email: {res}) {}' if responsible else '{} {}'
        return tmp_str.format(line, diagnostic, res=responsible)

    def _campaign(self, ldn):
        responsible = ''
        # argv=['ListDatasetProvenance']
        argv=["ListCampaignForDataset"]
        argv.append("logicalDatasetName={}".format(ldn))
        try:
            result = self.client.execute(argv)
            for row in result.rows():
                print 'found campaign: {}!'.format(row)
        except:
            print 'fail'

    def _write_mc_ami_info(self, ds, info, overwrite=False):
        self._campaign(ds.full_name)
        if not ds.filteff or overwrite:
            filteff_list = ['GenFiltEff_mean', 'approx_GenFiltEff']
            for name in filteff_list:
                if name in info.extra:
                    ds.filteff = float(info.extra[name])
                    break

        if not ds.filteff:
            self.bugstream.write(
                self._bug_report_line("can't find filteff", ds, info))

        new_xsec = 0.0
        xsec_list = ['crossSection_mean', 'approx_crossSection']
        for name in xsec_list:
            if name in info.extra:
                # ami stores in nb
                new_xsec = float(info.extra[name])*1e6
                break

        if not new_xsec:
            self.bugstream.write(
                self._bug_report_line("can't find xsection", ds, info))
            return

        if not ds.total_xsec_fb or overwrite:
            ds.total_xsec_fb = new_xsec
        else:
            diff = ds.total_xsec_fb - new_xsec
            rel_dif = abs(diff / ds.total_xsec_fb)
            if rel_dif > 0.1:
                warn('for sample {id} {name}: '
                     'ami gives xsec of {ami} fb, '
                     'susytools gives {st} (diff {diff:.1%})'.format(
                        id=ds.id, name=ds.name,
                        ami=new_xsec, st=ds.total_xsec_fb,
                        diff=rel_dif))

    def _write_ami_info(self, ds, info):
        ds.n_expected_entries = int(info.info['totalEvents'])
        ds.meta_sources.add('ami')
Esempio n. 4
0
# Any datasets which don't have the provenance stored properly in AMI
# should be hardcoded here (it happens)
DS_NOPROV = {}

# Cross-sections are cached so that we don't need to keep asking AMI
# for them over and over
XSEC_CACHE_FILE = os.path.join(HERE, 'xsec', 'cache.pickle')
XSEC_CACHE_MODIFIED = False
XSEC_CACHE = {}

if USE_PYAMI:
    amiclient = AMIClient()
    if not os.path.exists(AMI_CONFIG):
        create_auth_config()
    amiclient.read_config(AMI_CONFIG)


class NoMatchingDatasetsFound(Exception):
    pass


class Database(dict):

    @classmethod
    def match_to_ds(cls, match):
        """
        Construct the original NTUP dataset name from a skim match object
        """
        if match.group('year') == '11':
            ntup = 'merge.NTUP_TAUMEDIUM'
Esempio n. 5
0
# Any datasets which don't have the provenance stored properly in AMI
# should be hardcoded here (it happens)
DS_NOPROV = {}

# Cross-sections are cached so that we don't need to keep asking AMI
# for them over and over
XSEC_CACHE_FILE = os.path.join(HERE, 'xsec', 'cache.pickle')
XSEC_CACHE_MODIFIED = False
XSEC_CACHE = {}

if USE_PYAMI:
    amiclient = AMIClient()
    if not os.path.exists(AMI_CONFIG):
        create_auth_config()
    amiclient.read_config(AMI_CONFIG)


class NoMatchingDatasetsFound(Exception):
    pass


class Database(dict):
    @classmethod
    def match_to_ds(cls, match):
        """
        Construct the original NTUP dataset name from a skim match object
        """
        if match.group('year') == '11':
            ntup = 'merge.NTUP_TAUMEDIUM'
        else:
Esempio n. 6
0
def get_tag_diffs(mon, ref, g, mon_task, ref_task):

    client = AMIClient()
    if not os.path.exists(AMI_CONFIG):
        create_auth_config()
        client.read_config(AMI_CONFIG)

    mon_release = ''
    ref_release = ''
    mon_taginfo = get_configtags(client, mon.rtag)
    ref_taginfo = get_configtags(client, ref.rtag)
    configtags = [
        'SWReleaseCache', 'lvl1ps', 'hltps', 'smk', 'enlvl1prescales'
    ]
    configcomp = {}
    for n, info in enumerate(mon_taginfo):
        ref_info = ref_taginfo[n]
        for xinfo in info.keys():
            if xinfo in configtags:
                if xinfo == 'SWReleaseCache': mon_rel = info[xinfo]
                configcomp[xinfo] = [info[xinfo], ref_info[xinfo]]

    for info in ref_taginfo:
        for xinfo in info.keys():
            if xinfo == 'SWReleaseCache': ref_rel = info[xinfo]
    mon_release = mon_rel.replace('_', ',')
    ref_release = ref_rel.replace('_', ',')
    import PyCmt.Cmt as Cmt
    diffs = Cmt.get_tag_diff(ref=ref_release, chk=mon_release, verbose=False)

    g.write('<table>\n')
    g.write('<tr><td width="250"></td><td width="250"><b>Reprocessing</b></td>'
            '<td width="250"><b>Reference</b></tr>')
    ami_link = '<a href ="https://ami.in2p3.fr/AMI/servlet/net.hep.atlas.Database.Bookkeeping.AMI.Servlet.Command?Converter=/AMIXmlToAMIProdHtml.xsl&Command=FormListConfigurationTag+-configTag=%s">%s</a></td>'
    sav_link = '<a href="https://savannah.cern.ch/task/?%s"> Task #%s </a></td>'
    g.write('<tr><td>AMI Tag </td>')
    for tag in (mon.rtag, ref.rtag):
        g.write(
            '<td><a href ="https://ami.in2p3.fr/AMI/servlet/net.hep.atlas.Database.Bookkeeping.AMI.Servlet.Command?Converter=/AMIXmlToAMIProdHtml.xsl&Command=FormListConfigurationTag+-configTag=%s">%s</a></td>'
            % (tag, tag))
    g.write('</tr>')
    g.write('<tr><td> Savannah Task </td>')
    for task in (mon_task, ref_task):
        if task == None:
            g.write(
                '<td><a href="https://savannah.cern.ch/task/index.php?go_report=Apply&group=atlas-trig&func=browse&category_id=107&status_id=0"> Search Tasks </a></td>'
            )
        else:
            g.write(
                '<td><a href="https://savannah.cern.ch/task/?%s"> Task #%s </a></td>'
                % (task, task))
    g.write('</tr>\n')
    g.write('<tr><td> Run </td>')
    for run in (mon.run, ref.run):
        g.write('<td> %s </td>' % str(run))
    g.write('</tr><tr></tr>\n')
    g.write('<tr><td><b>Tag Configuration </b></td></tr>\n')
    for field in configtags:
        g.write('<tr><td>%s</td><td>%s</td><td>%s</td>' %
                (field, configcomp[field][0], configcomp[field][1]))
        g.write('</tr>\n')
    g.write('<tr></tr>')
    g.write('</table>')

    g.write('<h3> Release Tag Differences </h3>')
    g.write('<p> Found [%i] differences </p>\n' % len(diffs))

    if len(diffs):
        g.write('<table>\n')
        g.write(
            '<tr><td width = "150"><b>Reprocessing</b></td><td width="250"><b>mon-project</b></td>'
        )
        g.write(
            '<td width = "150"><b>Reference</b></td><td width="250"><b>ref-project</b></td>'
        )
        g.write('<td width = "500"><b>package name</b></td></tr>')
        for diff in diffs:
            g.write('<tr>')
            g.write('<td> %s </td>' % diff['chk'])
            g.write('<td> %s </td>' % diff['chk_proj'])
            g.write('<td> %s </td>' % diff['ref'])
            g.write('<td> %s </td>' % diff['ref_proj'])
            g.write('<td> %s </td>' % diff['full_name'])
            g.write('</tr>\n')
        g.write('</table>')

    return 0