def convert( self, data, sort_field="panda_queue", should_be_sorted_by="panda_queue", *args, **kwargs ): """Convert the AGIS data to the desired format of being ordered by Panda queues :param data: data to be converted in the desired format""" json_data = {} if isinstance(data, dict): for key, d in data.items(): if sort_field in d: json_data[d[sort_field]] = d elif isinstance(data, list): for d in data: if sort_field in d: json_data[d[sort_field]] = d else: logger.error("Data is not type dict or list but: {}".format(type(data))) return json_data
def getRegionFromExpression(expr): #this is where you implement custom mappings to region names if args.analysis == '1Lbb': if expr == 'SRLM' or expr == 'SRMM' or expr == 'SRHM': return 0 if expr == 'SRLMincl' or expr == 'SRMMincl' or expr == 'SRHMincl': return expr if 'SRLM' in expr: return 'SRLM' if 'SRMM' in expr: return 'SRMM' if 'SRHM' in expr: return 'SRHM' elif (args.analysis == 'strong1L' or args.analysis == 'alt_strong-1L'): if args.background == 'zjets': #return without e.g. '_bin0' at the end return re.sub('_bin\d*', '', expr) else: return expr for region in args.regions: if expr == region: return expr else: logger.error('Region not found: {}'.format(expr)) return 0
def convert(self, data, append_mode=False, sort_field="panda_queue", should_be_ordered_by="panda_queue", *args, **kwargs): """Convert the REBUS data to the desired format of being ordered by Panda queues :param data: data to be converted in the desired format""" json_data = RebusDict() if isinstance(data, dict): for key, d in data.items(): if key == "NULL": # CRIC has this huge NULL entry?! continue if isinstance(d.get(sort_field, []), list): for site in d.get(sort_field, []): logger.debug("Adding {}".format(site)) logger.debug(d) json_data.update(object={site: d}, append_mode=append_mode) elif isinstance(d.get(sort_field, {}), collections.Hashable): logger.debug("Adding {}".format(d.get(sort_field, {}))) json_data.update(object={d[sort_field]: d}, append_mode=append_mode) else: logger.error("Data is not type dict or list but: {}".format( type(data))) return json_data
def get_xsec(xsec_file, my_dsid=None): with open(xsec_file) as f: f.readline() for l in f: # DSID is the first field if int(my_dsid) == int(l.rstrip().split()[0]): xsec = float(l.rstrip().split()[2]) filter_eff = float(l.rstrip().split()[3]) kfactor = float(l.rstrip().split()[4]) logger.info("Got xsec: {}".format(xsec)) return xsec * filter_eff * kfactor else: logger.error("Didn't find a xsec ... sorry!") return None
def getxsec(filename): if "oneStep" in filename: mass_string = filename.replace('GG_oneStep_', '').replace('.root', '').split('_')[0] elif "Wh_hbb" in filename: mass_string = filename.replace('C1N2_Wh_hbb_', '').replace('.root', '').split('_')[0] else: logger.error("Cannot figure out signal model.") raise ValueError("Sorry, need to exit here.") mass_string = mass_string.replace("p0", "").replace("p5", ".5") if not dict.has_key(mass_string): lowerMass = int(float(mass_string)) // 5 * 5 upperMass = lowerMass + 5 xsec = (dict[str(lowerMass)] + dict[str(upperMass)]) / 2.0 return xsec else: return dict[mass_string]
return json.load(f) panda_queues = getJSON("data/scraped_cric_pandaqueue.json") panda_resources = getJSON("data/map_PQ_names.json") site_resources = getJSON("data/scraped_cric_sites.json") ddm_resources = getJSON("data/scraped_cric_ddm.json") pledges_resources = getJSON("data/scraped_rebus_pledges.json") federations_resources = getJSON("data/scraped_rebus_federations.json") benchmarks_resources = getJSON("data/scraped_elasticsearch_benchmark.json") # get the actual job numbers from panda err, siteResourceStats = Client.get_job_statistics_per_site_label_resource(10) if err: logger.error("Panda error: " + str(err)) msg = "Panda server returned and error.\n\nError:\n" + str(e) subj = "[QMonit error] PandaServer" notifications.send_email( message=msg, subject=subj, **{"password": config.get("credentials_adcmon", "password")} ) # idb client instance for uploading data later on db_name = "monit_jobs" if not args.testDB else "test_monit_jobs" try: client = InfluxDBClient( "dbod-eschanet.cern.ch", 8080, username, password, db_name, True, False
def run(): # Each time the scrapers are run, we update the PQ map pqs = pq_map.PQ_names_map(file="data/map_PQ_names.json") if not pqs.update( ifile="data/scraped_cric_pandaqueue.json", ofile="data/map_PQ_names.json", key="panda_resource", ): logger.warning("PQ map is not available") if argparse.interval == "10m": # Now run all the scrapers that should run in 10min intervals # First the PQ CRIC information cric = CRIC() raw_data = cric.download( url="https://atlas-cric.cern.ch/api/atlas/pandaqueue/query/?json" ) json_data = cric.convert(data=raw_data, sort_field="panda_resource") if cric.save(file="data/scraped_cric_pandaqueue.json", data=json_data): logger.info("Scraped PQ CRIC") else: logger.error("Problem scraping PQ CRIC") elif argparse.interval == "1h": # Run all the scrapers that only need to be run once per hour (because they don't change too often) # Next the ATLAS sites CRIC information cric = CRIC() raw_data = cric.download( url="https://atlas-cric.cern.ch/api/atlas/site/query/?json" ) json_data = cric.convert(data=raw_data, sort_field="name") if cric.save(file="data/scraped_cric_sites.json", data=json_data): logger.info("Scraped sites CRIC") else: logger.error("Problem scraping sites CRIC") # Now the DDM info from CRIC raw_data = cric.download( url="https://atlas-cric.cern.ch/api/atlas/ddmendpoint/query/?json" ) json_data = cric.convert(data=raw_data, sort_field="site") if cric.save(file="data/scraped_cric_ddm.json", data=json_data): logger.info("Scraped DDM CRIC") else: logger.error("Problem scraping DDM CRIC") # Next up is REBUS, start with the actual federation map rebus = REBUS() raw_data = rebus.download( url="https://wlcg-cric.cern.ch/api/core/federation/query/?json" ) json_data = rebus.convert(data=raw_data, sort_field="rcsites") if rebus.save(file="data/scraped_rebus_federations.json", data=json_data): logger.info("Scraped federations CRIC") else: logger.error("Problem scraping federations CRIC") # then the pledges # can actually use same JSON raw data as before json_data = rebus.convert( data=raw_data, sort_field="accounting_name", append_mode=True ) if rebus.save(file="data/scraped_rebus_pledges.json", data=json_data): logger.info("Scraped pledges CRIC") else: logger.error("Problem scraping pledges CRIC") # we also get datadisk information from monit Grafana url = config.get("credentials_monit_grafana", "url") token = config.get("credentials_monit_grafana", "token") now = int(round(time.time() * 1000)) date_to = now - 12 * 60 * 60 * 1000 date_from = date_to - 24 * 60 * 60 * 1000 period = """"gte":{0},"lte":{1}""".format(date_from, date_to) data = ( """{"search_type":"query_then_fetch","ignore_unavailable":true,"index":["monit_prod_rucioacc_enr_site*"]}\n{"size":0,"query":{"bool":{"filter":[{"range":{"metadata.timestamp":{""" + period + ""","format":"epoch_millis"}}},{"query_string":{"analyze_wildcard":true,"query":"data.account:* AND data.campaign:* AND data.country:* AND data.cloud:* AND data.datatype:* AND data.datatype_grouped:* AND data.prod_step:* AND data.provenance:* AND data.rse:* AND data.scope:* AND data.experiment_site:* AND data.stream_name:* AND data.tier:* AND data.token:(\\\"ATLASDATADISK\\\" OR \\\"ATLASSCRATCHDISK\\\") AND data.tombstone:(\\\"primary\\\" OR \\\"secondary\\\") AND NOT(data.tombstone:UNKNOWN) AND data.rse:/.*().*/ AND NOT data.rse:/.*(none).*/"}}]}},"aggs":{"4":{"terms":{"field":"data.rse","size":500,"order":{"_term":"desc"},"min_doc_count":1},"aggs":{"1":{"sum":{"field":"data.files"}},"3":{"sum":{"field":"data.bytes"}}}}}}\n""" ) headers = { "Accept": "application/json", "Content-Type": "application/json", "Authorization": "Bearer %s" % token, } grafana = Grafana(url=url, request=data, headers=headers) raw_data = grafana.download() pprint.pprint(raw_data) json_data = grafana.convert(data=raw_data.json()) if grafana.save(file="data/scraped_grafana_datadisk.json", data=json_data): logger.info("Scraped datadisks from monit grafana") else: logger.error("Problem scraping datadisks from monit grafana") # TODO: not running ES scraper for now since the benchmark jobs are no longer being run # #get credentials # password = config.get("credentials_elasticsearch", "password") # username = config.get("credentials_elasticsearch", "username") # host = config.get("credentials_elasticsearch", "host") # arg = ([{'host': host, 'port': 9200}]) # elasticsearch = ElasticSearch(arg,**{'http_auth':(username, password)}) # kwargs = { # 'index' : "benchmarks-*", # 'body' : { # "size" : 10000,"query" : {"match_all" : {},}, # "collapse": {"field": "metadata.PanDAQueue","inner_hits": {"name": "most_recent","size": 50,"sort": [{"timestamp": "desc"}]} # } # }, # 'filter_path' : [""] # } # raw_data = elasticsearch.download(**kwargs) # json_data = elasticsearch.convert(data=raw_data) # # if elasticsearch.save(file='data/scraped_elasticsearch_benchmark.json', data=json_data): # logger.info('Scraped benchmark results from ES') # else: # logger.error('Problem scraping benchmark results from ES') else: # Nothing to do otherwise print("Dropping out")
# "collapse": {"field": "metadata.PanDAQueue","inner_hits": {"name": "most_recent","size": 50,"sort": [{"timestamp": "desc"}]} # } # }, # 'filter_path' : [""] # } # raw_data = elasticsearch.download(**kwargs) # json_data = elasticsearch.convert(data=raw_data) # # if elasticsearch.save(file='data/scraped_elasticsearch_benchmark.json', data=json_data): # logger.info('Scraped benchmark results from ES') # else: # logger.error('Problem scraping benchmark results from ES') else: # Nothing to do otherwise print("Dropping out") if __name__ == "__main__": try: run() except Exception, e: logger.error("Got error while running scrapers. " + str(e)) msg = "QMonit failed to run a scraper job.\n\nError:\n" + str(e) subj = "[QMonit error] InfluxDB" notifications.send_email( message=msg, subject=subj, **{"password": config.get("credentials_adcmon", "password")} )
def main(): for f in args.files: logger.info('Got file: {}'.format(os.path.basename(f.name))) if not os.path.basename(f.name)[-4:] == ".tex": logger.error( 'This is not a tex file. Do not try to fool me again! Skipping...' ) continue #check if we can get a background matched! if args.background.lower() in os.path.basename(f.name).lower(): logger.info('Found process: {}'.format(args.background)) else: logger.error('No process found! Dropping out.') sys.exit() #now check if we can get the systematic variation name matched sys_matches = [ s for s in args.systematics if s.lower() in os.path.basename(f.name).lower() ] if len(sys_matches) > 1: logger.warning( 'Found more than one systematic variation matching filename: {}' .format(sys_matches)) logger.warning('Will only take first one.') elif len(sys_matches) == 1: logger.info('Found systematic variation: {}'.format( sys_matches[0])) elif len(sys_matches) == 0: logger.error('No systematic variation found! Dropping out.') sys.exit() systematic = sys_matches[0] ##let's check if we are using an up or a down variation (or symmetric...) is_up = False is_down = False if "up" in os.path.basename(f.name).lower(): is_up = True logger.info('This should be an UP variation.') elif "down" in os.path.basename(f.name).lower(): is_down = True logger.info('This should be a DOWN variation.') else: logger.warning( 'Probably neither up nor down, but a symmetrised table. Sure?') ##now comes the ugly parsing part ##can we do this at least not too ugly? lines = [] #first, get the relevant part from the tex file. If the user has made it easy and tagged the respective parts with %tex2hf, we can simply use what's between it keywords = False with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as s: if s.find(b'tex2hf') != -1: logger.info( 'Found keywords in file, so now we can just use what is between them' ) keywords = True if keywords == True: copy = False for line in f: if "tex2hf" in line.strip(): copy = not copy continue elif copy: lines.append(line.strip()) else: #otherwise just drop out, I don't want to think about this any further ... logger.error( 'You need to provide keywords. I am too lazy to think about something else. Put "tex2hf" before the first and after the last line (as a comment of course, you do not want this to show up in the table, do you?).' ) sys.exit() for line in lines: #get rid of any symbols we don't need line = line.strip().replace("$", "").replace("\\", "") #latex columns, get the region first. Need to strip all whitespace region = "".join(line.split("&")[0].split()) region = getRegionFromExpression(region) if region == 0: continue #then the uncertainty, usually in the last column # print("{} : {}".format(region, line.split("&")[-1])) uncertainty = round( float( line.split("&")[-1].replace("pm", "").replace( "%", "").strip()) / 100, 4) # print(uncertainty) if is_up: if uncertainty < -1.0: uncertainty = -1.0 logger.warning( 'Uncertainty larger than 100%. Truncating to 1.-1.') values[systematic][region]["up"].append(uncertainty) elif is_down: if uncertainty < -1.0: uncertainty = -1.0 logger.warning( 'Uncertainty larger than 100%. Truncating to 1.-1.') values[systematic][region]["down"].append(uncertainty) else: up_unc = abs(uncertainty) down_unc = -up_unc if abs(uncertainty) > 1.0: logger.warning( 'Uncertainty larger than 100%. Truncating to 1.-1.') down_unc = -1 values[systematic][region]["up"].append(up_unc) values[systematic][region]["down"].append(down_unc)
elif (args.analysis == 'strong1L' or args.analysis == 'alt_strong-1L'): if args.background == 'zjets': #return without e.g. '_bin0' at the end return re.sub('_bin\d*', '', expr) else: return expr for region in args.regions: if expr == region: return expr else: logger.error('Region not found: {}'.format(expr)) return 0 if not (args.analysis or (args.background and args.regions)): logger.error('No analysis nor processes/regions given! Dropping out.') sys.exit() elif not args.analysis and (args.background and args.regions): logger.info( 'Did not provide analysis, but provided background and regions, so lets guess.' ) if args.analysis: logger.info('Considering analysis: %s' % args.analysis) if args.analysis == '1Lbb': args.regions = [ 'SRLMincl', 'SRMMincl', 'SRHMincl', 'SRLM', 'SRMM', 'SRHM', 'WR', 'STCR', 'TRLM', 'TRMM', 'TRHM', 'VRtt1on', 'VRtt2on', 'VRtt3on', 'VRtt1off', 'VRtt2off', 'VRtt3off' ] elif (args.analysis == 'strong1L' or args.analysis == 'alt_strong-1L'): regions = [
'/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt', default=None) parser.add_argument('--xsec', '-x', help='Actual xsection (in case no xsec file) in pb', default=None) parser.add_argument('--fb', help='Normalise to 1/fb instead of 1/pb', action='store_true') parser.add_argument('--applyGenWeight', help='Apply generator weight to the normalisation branch', action='store_true') args = parser.parse_args() if not os.path.isfile(args.inputfile): logger.error("Provided ROOT file does not exist or is not a file!") raise ValueError("Sorry, need to exit here.") if args.xsec_file: if not os.path.isfile(args.xsec_file): logger.error("Provided xsec file does not exist or is not a file!") raise ValueError("Sorry, need to exit here.") else: if not args.dsid: logger.warning( "Provided a xsec file, but not a DSID, will try to guess ...") else: logger.info("Provided xsec file and DSID, thanks mate!") def get_xsec(xsec_file, my_dsid=None):
parser.add_argument('--branch-name', '-n', help='the branch name to be smeared', default='mbb') parser.add_argument('--tag', '-t', help='tag after name', default='smeared') parser.add_argument('--loc', '-l', help='loc of the skewnorm', default=1.0) parser.add_argument('--scale', '-a', help='scale of the skewnorm', default=0.5) parser.add_argument('--skew', '-k', help='skew of the skewnorm', default=0) parser.add_argument('--mu', '-m', help='mu of the gaussian', default=1.0) parser.add_argument('--sigma', '-s', help='sigma of the gaussian', default=0.5) parser.add_argument('--method', '-e', help='norm or skewnorm', default='norm') parser.add_argument('--seed', help='seed for random numbers', default=1234) args = parser.parse_args() if not args.method in ['norm', 'skewnorm']: logger.error("Provided smearing method not implemented!") raise ValueError("Sorry, need to exit here.") # if not os.path.isdir(args.inputdir): # logger.error("Provided path does not exist or is not a directory!") # raise ValueError("Sorry, need to exit here.") np.random.seed(args.seed) for indx, f in enumerate(args.inputfiles): if not f.endswith("update.root"): continue logger.info("Updating " + f) treename = f.replace("_update.root", "_NoSys")
'-s', '--signal', action="store_true", help="Use signal tree naming convention (default is background trees)") args = parser.parse_args() cmd = "rootls {}".format(args.file) process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE) output, error = process.communicate() trees = output.split() if args.signal: naming = 5 else: naming = 1 raw_names = [ "_".join(fullname.split("_")[:naming]) + "_" for fullname in trees ] unique_names = list(set(raw_names)) split_trees = [[t for t in trees if b in t] for b in unique_names] length = len(split_trees[0]) for (l, bkg) in zip(split_trees, unique_names): logger.info("For process {}: {} trees".format(bkg, len(l))) if not len(l) == length: logger.error("Not the right length!")
if not processname in processes: processes.append(processname) mc16a = 0 mc16d = 0 mc16e = 0 mc16a = tree.GetEntries( "(RandomRunNumber >= 276262 && RandomRunNumber <= 320000)") mc16d = tree.GetEntries( "(RandomRunNumber >= 324320 && RandomRunNumber <= 337833)") mc16e = tree.GetEntries("(RandomRunNumber >= 348885)") if not mc16a > 0: logger.error("No MC16A in {}".format(tree)) if not mc16d > 0: logger.error("No MC16D in {}".format(tree)) if not mc16e > 0: logger.error("No MC16E in {}".format(tree)) logger.info("Checking for same amount of trees per process now.") for process in processes: i = 0 for treename in trees_list: if process in treename: i += 1 logger.info("{} has {} trees".format(process, i)) tf.Close()
import logging from commonHelpers.logger import logger logger = logger.getChild("mephisto") parser = argparse.ArgumentParser( description= 'This script helps in creating a proper normalization for trees that have been processed through e.g. SimpleAnalysis and lack the "traditional" genWeight branch.', epilog="You beautiful person, you.") parser.add_argument('inputdir', help='The directory containing all the ROOT trees') parser.add_argument('xsecsfile', help='Text file containing the xsecs') args = parser.parse_args() if not os.path.isdir(args.inputdir): logger.error("Provided path does not exist or is not a directory!") raise ValueError("Sorry, need to exit here.") if not os.path.isfile(args.xsecsfile): logger.error("Provided file does not exist or is not a file!") raise ValueError("Sorry, need to exit here.") with open(args.xsecsfile, 'r') as document: dict = {} for line in document: line = line.split() if not line: # empty line? continue dict[line[0]] = float(line[1]) print('{} : {}'.format(int(line[0]), float(line[1])))
print(test) sys.exit(0) suite = unittest.TestSuite() # based on code snippet from http://stackoverflow.com/questions/1732438/how-do-i-run-all-python-unit-tests-in-a-directory#15630454 for postfix in tests: t = "test.test_" + postfix if "." in postfix: # i don't have a better solution yet, so hack for now importTest = ".".join(t.split(".")[:-2]) else: importTest = t try: logger.info("Trying to import {}".format(importTest)) mod = __import__(importTest, globals(), locals(), ['suite']) except ImportError: logger.error("Test {} not found - try {}".format(t, testmodules)) raise try: # If the module defines a suite() function, call it to get the suite. suitefn = getattr(mod, 'suite') suite.addTest(suitefn()) except (ImportError, AttributeError): # else, just load all the test cases from the module. logger.info("Loading test {}".format(t)) suite.addTest(unittest.defaultTestLoader.loadTestsFromName(t)) result = unittest.TextTestRunner(verbosity=verbosity).run(suite) sys.exit(not result.wasSuccessful())