def main(argv): client = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() client.read_config(AMI_CONFIG) datasetNamePattern=argv[0] res = get_datasets(client,datasetNamePattern,fields='events,nfiles', flatten=True) print_table( res )
class McStatsLookup(object): """ Tool to look up stats in mc datasets that exist in both atlfast and fullsim. """ def __init__(self, p_tag, origin='mc12_8TeV', backup_ptag=None): self.p_tag = p_tag self.backup_ptag = backup_ptag self.origin = origin self.ntup_filter = 'NTUP_SUSY' self._setup_ami_client() self.outstream = sys.stdout self.bugstream = sys.stderr self.atlfinder = re.compile('(_a([0-9])+)+') def _setup_ami_client(self): self.client = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() self.client.read_config(AMI_CONFIG) def get_atlfast_fullsim(self, ds_id, stream=None): args = {'dataset_number':str(ds_id)} match_sets = query.get_datasets(self.client,'%', **args) if not match_sets: raise DatasetMatchError('found nothing with {}'.format( args.items()), match_sets) match_sets = _filter_by_ldn( match_sets, stream, self.ntup_filter, self.p_tag) atlfast_ds = None fullsim_ds = None for m in match_sets: ldn = _ldn(m) if self.atlfinder.search(_ldn(m)): if atlfast_ds: raise DatasetMatchError('at least two atlfast', [atlfast_ds,m]) atlfast_ds = m else: fullsim_ds = _largest_fullsim_filter(fullsim_ds, m)[0] atlfast_counts = _get_expected_counts(self.client, atlfast_ds) fullsim_counts = _get_expected_counts(self.client, fullsim_ds) atl = (_ldn(atlfast_ds), atlfast_counts) if atlfast_ds else None ful = (_ldn(fullsim_ds), fullsim_counts) if fullsim_ds else None return atl, ful
class AmiAugmenter(object): """ Class to wrap ami augmentation. """ def __init__(self, p_tag, origin='mc12_8TeV', backup_ptag=None): self.p_tag = p_tag self.backup_ptag = backup_ptag self.origin = origin self.ntup_filter = 'NTUP_SUSY' self._setup_ami_client() self.outstream = sys.stdout self.bugstream = sys.stderr def _setup_ami_client(self): self.client = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() self.client.read_config(AMI_CONFIG) def get_dataset_range(self, ds_range, physics_type=None): ds_dict = {} for num in ds_range: self.outstream.write('looking up {}, category {}\n'.format( num, physics_type)) for ds in self.ds_from_id(num): if physics_type: ds.physics_type = physics_type if ds.key in ds_dict: old = ds_dict[ds.key] largest_ds = _filter_for_largest_tags(ds, old) if len(largest_ds) == 1: ds_dict[ds.key] = largest_ds[0] else: raise DatasetOverwriteError( "tried to overwrite {},\n{} with \n{}".format( ds.key, ds.full_name, ds_dict[ds.key].full_name)) else: ds_dict[ds.key] = ds return ds_dict def ds_from_id(self, ds_id, stream=None): if stream and stream.startswith('physics'): args = {'run':str(ds_id)} else: args = {'dataset_number':str(ds_id)} qstr = '%'.join([self.origin, str(ds_id), stream or '']) match_sets = query.get_datasets(self.client,qstr, **args) if not match_sets: raise DatasetMatchError('found nothing with {}'.format(qstr), match_sets) match_sets = _filter_by_ldn( match_sets, stream, self.ntup_filter, self.p_tag) for match in match_sets: ldn = _ldn(match) yield self.ds_from_ldn(ldn) def ds_from_ldn(self, ldn): info = query.get_dataset_info(self.client, ldn) ds = meta.Dataset(ldn) self._write_ami_info(ds, info) if not ds.is_data: self._write_mc_ami_info(ds, info) return ds def get_datasets_year(self, year=12, stream=None): datasets = {} periods = query.get_periods(self.client, year=year) hep_periods = [] for period in periods: if period.project.endswith('TeV'): hep_periods.append(period.name) runs = query.get_runs(self.client, hep_periods, year=year) n_runs = len(runs) for run_n, run in enumerate(runs): self.outstream.write( 'looking for {} of {} {}...'.format(run_n, n_runs, run)) try: ds = next(iter(self.ds_from_id(run, stream))) except DatasetMatchError as err: if err.matches: self.outstream.write('none in stream\n') self.bugstream.write(str(err) + '\n') continue else: self.outstream.write('nothing\n') self.bugstream.write(str(err) + '\n') continue self.outstream.write('found: {}\n'.format(ds.full_name)) datasets[ds.key] = ds return datasets def _bug_report_line(self, line, ds, info): diagnostic = 'for {} {}. In info: {}\n'.format( ds.key, ds.name, ', '.join(info.extra.keys())) responsible = '' argv=["SearchQuery"] argv.append( "-sql=select physicistResponsible from dataset where " "logicalDatasetName='{}'".format(info.info['logicalDatasetName'])) argv.append('project=mc12_001') argv.append('processingStep=production') result = self.client.execute(argv) for row in result.rows(): if 'physicistResponsible' in row: assert not responsible, 'two responsible physicists found' responsible = row['physicistResponsible'] tmp_str = '{} (email: {res}) {}' if responsible else '{} {}' return tmp_str.format(line, diagnostic, res=responsible) def _campaign(self, ldn): responsible = '' # argv=['ListDatasetProvenance'] argv=["ListCampaignForDataset"] argv.append("logicalDatasetName={}".format(ldn)) try: result = self.client.execute(argv) for row in result.rows(): print 'found campaign: {}!'.format(row) except: print 'fail' def _write_mc_ami_info(self, ds, info, overwrite=False): self._campaign(ds.full_name) if not ds.filteff or overwrite: filteff_list = ['GenFiltEff_mean', 'approx_GenFiltEff'] for name in filteff_list: if name in info.extra: ds.filteff = float(info.extra[name]) break if not ds.filteff: self.bugstream.write( self._bug_report_line("can't find filteff", ds, info)) new_xsec = 0.0 xsec_list = ['crossSection_mean', 'approx_crossSection'] for name in xsec_list: if name in info.extra: # ami stores in nb new_xsec = float(info.extra[name])*1e6 break if not new_xsec: self.bugstream.write( self._bug_report_line("can't find xsection", ds, info)) return if not ds.total_xsec_fb or overwrite: ds.total_xsec_fb = new_xsec else: diff = ds.total_xsec_fb - new_xsec rel_dif = abs(diff / ds.total_xsec_fb) if rel_dif > 0.1: warn('for sample {id} {name}: ' 'ami gives xsec of {ami} fb, ' 'susytools gives {st} (diff {diff:.1%})'.format( id=ds.id, name=ds.name, ami=new_xsec, st=ds.total_xsec_fb, diff=rel_dif)) def _write_ami_info(self, ds, info): ds.n_expected_entries = int(info.info['totalEvents']) ds.meta_sources.add('ami')
# Any datasets which don't have the provenance stored properly in AMI # should be hardcoded here (it happens) DS_NOPROV = {} # Cross-sections are cached so that we don't need to keep asking AMI # for them over and over XSEC_CACHE_FILE = os.path.join(HERE, 'xsec', 'cache.pickle') XSEC_CACHE_MODIFIED = False XSEC_CACHE = {} if USE_PYAMI: amiclient = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() amiclient.read_config(AMI_CONFIG) class NoMatchingDatasetsFound(Exception): pass class Database(dict): @classmethod def match_to_ds(cls, match): """ Construct the original NTUP dataset name from a skim match object """ if match.group('year') == '11': ntup = 'merge.NTUP_TAUMEDIUM'
# Any datasets which don't have the provenance stored properly in AMI # should be hardcoded here (it happens) DS_NOPROV = {} # Cross-sections are cached so that we don't need to keep asking AMI # for them over and over XSEC_CACHE_FILE = os.path.join(HERE, 'xsec', 'cache.pickle') XSEC_CACHE_MODIFIED = False XSEC_CACHE = {} if USE_PYAMI: amiclient = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() amiclient.read_config(AMI_CONFIG) class NoMatchingDatasetsFound(Exception): pass class Database(dict): @classmethod def match_to_ds(cls, match): """ Construct the original NTUP dataset name from a skim match object """ if match.group('year') == '11': ntup = 'merge.NTUP_TAUMEDIUM' else:
def get_tag_diffs(mon, ref, g, mon_task, ref_task): client = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() client.read_config(AMI_CONFIG) mon_release = '' ref_release = '' mon_taginfo = get_configtags(client, mon.rtag) ref_taginfo = get_configtags(client, ref.rtag) configtags = [ 'SWReleaseCache', 'lvl1ps', 'hltps', 'smk', 'enlvl1prescales' ] configcomp = {} for n, info in enumerate(mon_taginfo): ref_info = ref_taginfo[n] for xinfo in info.keys(): if xinfo in configtags: if xinfo == 'SWReleaseCache': mon_rel = info[xinfo] configcomp[xinfo] = [info[xinfo], ref_info[xinfo]] for info in ref_taginfo: for xinfo in info.keys(): if xinfo == 'SWReleaseCache': ref_rel = info[xinfo] mon_release = mon_rel.replace('_', ',') ref_release = ref_rel.replace('_', ',') import PyCmt.Cmt as Cmt diffs = Cmt.get_tag_diff(ref=ref_release, chk=mon_release, verbose=False) g.write('<table>\n') g.write('<tr><td width="250"></td><td width="250"><b>Reprocessing</b></td>' '<td width="250"><b>Reference</b></tr>') ami_link = '<a href ="https://ami.in2p3.fr/AMI/servlet/net.hep.atlas.Database.Bookkeeping.AMI.Servlet.Command?Converter=/AMIXmlToAMIProdHtml.xsl&Command=FormListConfigurationTag+-configTag=%s">%s</a></td>' sav_link = '<a href="https://savannah.cern.ch/task/?%s"> Task #%s </a></td>' g.write('<tr><td>AMI Tag </td>') for tag in (mon.rtag, ref.rtag): g.write( '<td><a href ="https://ami.in2p3.fr/AMI/servlet/net.hep.atlas.Database.Bookkeeping.AMI.Servlet.Command?Converter=/AMIXmlToAMIProdHtml.xsl&Command=FormListConfigurationTag+-configTag=%s">%s</a></td>' % (tag, tag)) g.write('</tr>') g.write('<tr><td> Savannah Task </td>') for task in (mon_task, ref_task): if task == None: g.write( '<td><a href="https://savannah.cern.ch/task/index.php?go_report=Apply&group=atlas-trig&func=browse&category_id=107&status_id=0"> Search Tasks </a></td>' ) else: g.write( '<td><a href="https://savannah.cern.ch/task/?%s"> Task #%s </a></td>' % (task, task)) g.write('</tr>\n') g.write('<tr><td> Run </td>') for run in (mon.run, ref.run): g.write('<td> %s </td>' % str(run)) g.write('</tr><tr></tr>\n') g.write('<tr><td><b>Tag Configuration </b></td></tr>\n') for field in configtags: g.write('<tr><td>%s</td><td>%s</td><td>%s</td>' % (field, configcomp[field][0], configcomp[field][1])) g.write('</tr>\n') g.write('<tr></tr>') g.write('</table>') g.write('<h3> Release Tag Differences </h3>') g.write('<p> Found [%i] differences </p>\n' % len(diffs)) if len(diffs): g.write('<table>\n') g.write( '<tr><td width = "150"><b>Reprocessing</b></td><td width="250"><b>mon-project</b></td>' ) g.write( '<td width = "150"><b>Reference</b></td><td width="250"><b>ref-project</b></td>' ) g.write('<td width = "500"><b>package name</b></td></tr>') for diff in diffs: g.write('<tr>') g.write('<td> %s </td>' % diff['chk']) g.write('<td> %s </td>' % diff['chk_proj']) g.write('<td> %s </td>' % diff['ref']) g.write('<td> %s </td>' % diff['ref_proj']) g.write('<td> %s </td>' % diff['full_name']) g.write('</tr>\n') g.write('</table>') return 0