def ds_from_ldn(self, ldn): info = query.get_dataset_info(self.client, ldn) ds = meta.Dataset(ldn) self._write_ami_info(ds, info) if not ds.is_data: self._write_mc_ami_info(ds, info) return ds
def _get_expected_counts(client, ds): if ds is None: return None ldn = _ldn(ds) info = query.get_dataset_info(client, ldn) matched_counts = int(info.info['totalEvents']) return matched_counts
def gdi(ds) : di = None try: di = get_dataset_info(client, ds.strip('/')) except Exception, msg: # DG 2013-11-25 # I don't know why it's returning a valid result while throwing an exception # Ignore it for now. # print Exception print 'cannot find ',ds
def validate_single(args, child=True): if child: from cStringIO import StringIO sys.stdout = out = StringIO() sys.stderr = out name = args[0] info = args[1] complete = True try: dirs = info.dirs root_files = [] for dir in dirs: root_files += glob.glob(os.path.join(dir, info.file_pattern)) events = 0 for fname in root_files: try: with root_open(fname) as rfile: try: # skimmed dataset events += int(rfile.cutflow_event[0]) except DoesNotExist: # unskimmed dataset tree = rfile.tau events += tree.GetEntries() except IOError: log.warning("Currupt file: %s" % fname) pass # determine events in original ntuples # use first dir ds_name = info.ds log.info('NTUP: ' + ds_name) ds_info = get_dataset_info(amiclient, ds_name) ntuple_events = int(ds_info.info['totalEvents']) try: # determine events in AODs prov = get_provenance(amiclient, ds_name, type='AOD') AOD_ds = prov.values()[0][0].replace('recon', 'merge') log.info('AOD: ' + AOD_ds) AOD_events = int(get_datasets(amiclient, AOD_ds, fields='events', flatten=True)[0][0]) except IndexError: log.info('AOD: UNKNOWN') AOD_events = ntuple_events log.info(name) log.info("\tevts\tNTUP\tAOD") log.info("\t%i\t%i\t%i" % (events, ntuple_events, AOD_events)) if events != ntuple_events: log.warning("NTUP MISMATCH") if events != AOD_events: log.warning("AOD MISMATCH") if events != ntuple_events and (events != AOD_events or AOD_events == 0): log.warning("MISSING EVENTS") complete = False if child: return out.getvalue(), complete return complete except Exception, e: import traceback log.warning("dataset %s exception" % name) traceback.print_exception(*sys.exc_info()) if child: return out.getvalue(), False return False
def validate_single(args, child=True): if child: from cStringIO import StringIO sys.stdout = out = StringIO() sys.stderr = out name = args[0] info = args[1] complete = True try: dirs = info.dirs root_files = [] for dir in dirs: root_files += glob.glob(os.path.join(dir, info.file_pattern)) events = 0 for fname in root_files: try: with root_open(fname) as rfile: try: # skimmed dataset events += int(rfile.cutflow_event[0]) except DoesNotExist: # unskimmed dataset tree = rfile.tau events += tree.GetEntries() except IOError: log.warning("Currupt file: %s" % fname) pass # determine events in original ntuples # use first dir ds_name = info.ds log.info('NTUP: ' + ds_name) ds_info = get_dataset_info(amiclient, ds_name) ntuple_events = int(ds_info.info['totalEvents']) try: # determine events in AODs prov = get_provenance(amiclient, ds_name, type='AOD') AOD_ds = prov.values()[0][0].replace('recon', 'merge') log.info('AOD: ' + AOD_ds) AOD_events = int( get_datasets(amiclient, AOD_ds, fields='events', flatten=True)[0][0]) except IndexError: log.info('AOD: UNKNOWN') AOD_events = ntuple_events log.info(name) log.info("\tevts\tNTUP\tAOD") log.info("\t%i\t%i\t%i" % (events, ntuple_events, AOD_events)) if events != ntuple_events: log.warning("NTUP MISMATCH") if events != AOD_events: log.warning("AOD MISMATCH") if events != ntuple_events and (events != AOD_events or AOD_events == 0): log.warning("MISSING EVENTS") complete = False if child: return out.getvalue(), complete return complete except Exception, e: import traceback log.warning("dataset %s exception" % name) traceback.print_exception(*sys.exc_info()) if child: return out.getvalue(), False return False
# except: # print "Cannot open config file", filelistconfig # sys.exit(1) # treeline = "TREENAME=%s\n" % treename # outfile.write(treeline) for ds in dslist: isData = True if not datare.search(ds): isData = False dsinfo = 0 try: dsinfo = get_dataset_info(client, ds+"/") except: try: dsinfo = get_dataset_info(client, ds) except: print "Dataset", origds, "not found in AMI, skipping." if dsinfo == 0: continue dsnumber = 0 if isData: dsnumber = dsinfo.info["runNumber"] else: dsnumber = dsinfo.info["datasetNumber"]