def get_atlfast_fullsim(self, ds_id, stream=None): args = {'dataset_number':str(ds_id)} match_sets = query.get_datasets(self.client,'%', **args) if not match_sets: raise DatasetMatchError('found nothing with {}'.format( args.items()), match_sets) match_sets = _filter_by_ldn( match_sets, stream, self.ntup_filter, self.p_tag) atlfast_ds = None fullsim_ds = None for m in match_sets: ldn = _ldn(m) if self.atlfinder.search(_ldn(m)): if atlfast_ds: raise DatasetMatchError('at least two atlfast', [atlfast_ds,m]) atlfast_ds = m else: fullsim_ds = _largest_fullsim_filter(fullsim_ds, m)[0] atlfast_counts = _get_expected_counts(self.client, atlfast_ds) fullsim_counts = _get_expected_counts(self.client, fullsim_ds) atl = (_ldn(atlfast_ds), atlfast_counts) if atlfast_ds else None ful = (_ldn(fullsim_ds), fullsim_counts) if fullsim_ds else None return atl, ful
def main(argv): client = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() client.read_config(AMI_CONFIG) datasetNamePattern=argv[0] res = get_datasets(client,datasetNamePattern,fields='events,nfiles', flatten=True) print_table( res )
def ds_from_id(self, ds_id, stream=None): if stream and stream.startswith('physics'): args = {'run':str(ds_id)} else: args = {'dataset_number':str(ds_id)} qstr = '%'.join([self.origin, str(ds_id), stream or '']) match_sets = query.get_datasets(self.client,qstr, **args) if not match_sets: raise DatasetMatchError('found nothing with {}'.format(qstr), match_sets) match_sets = _filter_by_ldn( match_sets, stream, self.ntup_filter, self.p_tag) for match in match_sets: ldn = _ldn(match) yield self.ds_from_ldn(ldn)
def validate_single(args, child=True): if child: from cStringIO import StringIO sys.stdout = out = StringIO() sys.stderr = out name = args[0] info = args[1] complete = True try: dirs = info.dirs root_files = [] for dir in dirs: root_files += glob.glob(os.path.join(dir, info.file_pattern)) events = 0 for fname in root_files: try: with root_open(fname) as rfile: try: # skimmed dataset events += int(rfile.cutflow_event[0]) except DoesNotExist: # unskimmed dataset tree = rfile.tau events += tree.GetEntries() except IOError: log.warning("Currupt file: %s" % fname) pass # determine events in original ntuples # use first dir ds_name = info.ds log.info('NTUP: ' + ds_name) ds_info = get_dataset_info(amiclient, ds_name) ntuple_events = int(ds_info.info['totalEvents']) try: # determine events in AODs prov = get_provenance(amiclient, ds_name, type='AOD') AOD_ds = prov.values()[0][0].replace('recon', 'merge') log.info('AOD: ' + AOD_ds) AOD_events = int(get_datasets(amiclient, AOD_ds, fields='events', flatten=True)[0][0]) except IndexError: log.info('AOD: UNKNOWN') AOD_events = ntuple_events log.info(name) log.info("\tevts\tNTUP\tAOD") log.info("\t%i\t%i\t%i" % (events, ntuple_events, AOD_events)) if events != ntuple_events: log.warning("NTUP MISMATCH") if events != AOD_events: log.warning("AOD MISMATCH") if events != ntuple_events and (events != AOD_events or AOD_events == 0): log.warning("MISSING EVENTS") complete = False if child: return out.getvalue(), complete return complete except Exception, e: import traceback log.warning("dataset %s exception" % name) traceback.print_exception(*sys.exc_info()) if child: return out.getvalue(), False return False
def validate_single(args, child=True): if child: from cStringIO import StringIO sys.stdout = out = StringIO() sys.stderr = out name = args[0] info = args[1] complete = True try: dirs = info.dirs root_files = [] for dir in dirs: root_files += glob.glob(os.path.join(dir, info.file_pattern)) events = 0 for fname in root_files: try: with root_open(fname) as rfile: try: # skimmed dataset events += int(rfile.cutflow_event[0]) except DoesNotExist: # unskimmed dataset tree = rfile.tau events += tree.GetEntries() except IOError: log.warning("Currupt file: %s" % fname) pass # determine events in original ntuples # use first dir ds_name = info.ds log.info('NTUP: ' + ds_name) ds_info = get_dataset_info(amiclient, ds_name) ntuple_events = int(ds_info.info['totalEvents']) try: # determine events in AODs prov = get_provenance(amiclient, ds_name, type='AOD') AOD_ds = prov.values()[0][0].replace('recon', 'merge') log.info('AOD: ' + AOD_ds) AOD_events = int( get_datasets(amiclient, AOD_ds, fields='events', flatten=True)[0][0]) except IndexError: log.info('AOD: UNKNOWN') AOD_events = ntuple_events log.info(name) log.info("\tevts\tNTUP\tAOD") log.info("\t%i\t%i\t%i" % (events, ntuple_events, AOD_events)) if events != ntuple_events: log.warning("NTUP MISMATCH") if events != AOD_events: log.warning("AOD MISMATCH") if events != ntuple_events and (events != AOD_events or AOD_events == 0): log.warning("MISSING EVENTS") complete = False if child: return out.getvalue(), complete return complete except Exception, e: import traceback log.warning("dataset %s exception" % name) traceback.print_exception(*sys.exc_info()) if child: return out.getvalue(), False return False