def get_xsec(xsec_file, my_dsid=None): with open(xsec_file) as f: f.readline() for l in f: # DSID is the first field if int(my_dsid) == int(l.rstrip().split()[0]): xsec = float(l.rstrip().split()[2]) filter_eff = float(l.rstrip().split()[3]) kfactor = float(l.rstrip().split()[4]) logger.info("Got xsec: {}".format(xsec)) return xsec * filter_eff * kfactor else: logger.error("Didn't find a xsec ... sorry!") return None
def __init__(self, selections=None, processes=None, weights=None, lumifactor=None, output_path=None, output_name=None): logger.info("Initializing yieldsTable") self.selections = selections self.processes = [_fixLength(l,4) for l in processes] self.weights = weights self.lumifactor = lumifactor self.bkg_processes = [processname for processname,type,trees,processweight in self.processes if type == "background"]
client = InfluxDBClient( "dbod-eschanet.cern.ch", 8080, username, password, "monit_jobs", True, False ) points_list = [] reader = mysql.connector.connect( user="******", password=password, host="dbod-sql-graf.cern.ch", port=5501, database="monit_jobs", ) read_cursor = reader.cursor() logger.info("Getting existing data.") read_cursor.execute( "select panda_queue, resource, prod_source, avg1h_running_jobs, avg6h_running_jobs, avg12h_running_jobs, avg24h_running_jobs, avg7d_running_jobs, avg30d_running_jobs from jobs" ) # Explicitly set timestamp in InfluxDB point. Avoids having multiple entries per 10 minute interval (can happen sometimes with acron) epoch = datetime.utcfromtimestamp(0) def unix_time_nanos(dt): return (dt - epoch).total_seconds() * 1e9 current_time = datetime.utcnow().replace(microsecond=0, second=0, minute=0) unix = int(unix_time_nanos(current_time))
def run(): config = ConfigParser.ConfigParser() config.read("config.cfg") password = config.get("credentials", "password") username = config.get("credentials", "username") database = config.get("credentials", "database") logger.info("Constructing InfluxDB queries.") if args.average == "1h": retention = "10m" delta = "2h" time_units = 6 elif args.average == "1d": retention = "1h" delta = "2d" time_units = 24 else: return 0 client = InfluxDBClient( "dbod-eschanet.cern.ch", 8080, username, password, "monit_jobs", True, False ) rs_distinct_sets = client.query( """select * from "{}"."jobs" where "prod_source" != '' group by panda_queue, prod_source, resource, job_status limit 1""".format( retention ) ) rs_result = client.query( """select * from "{}"."jobs" where time > now() - {} and "prod_source" != '' group by panda_queue, prod_source, resource, job_status """.format( retention, delta ) ) raw_dict = rs_result.raw series = raw_dict["series"] logger.info("Got data from InfluxDB.") logger.info("Averaging now.") # uploader = InfluxDBClient('dbod-eschanet.cern.ch', 8080, username, password, "test", True, False) points_list = [] for rs in rs_distinct_sets.keys(): rs = rs[1] # rs is a tuple logger.debug(rs) filtered_points = [ p for p in series if p["tags"]["panda_queue"] == rs["panda_queue"] and p["tags"]["resource"] == rs["resource"] and p["tags"]["prod_source"] == rs["prod_source"] and p["tags"]["job_status"] == rs["job_status"] ] if len(filtered_points) == 0: logger.debug("Got no points for this set of keys.") continue filtered_points = filtered_points[0] values = filtered_points["values"] tags = filtered_points["tags"] columns = filtered_points["columns"] # reverse in place, want to have latest points first values.reverse() # get me the last (most recent) point, because this is the one I want to overwrite. latest_value = values[0] # get averaged values if tags["job_status"] in ["failed", "finished", "cancelled", "closed"]: averaged_jobs = get_sum(time_units, values, columns.index("jobs")) else: averaged_jobs = get_average(time_units, values, columns.index("jobs")) # averaged_jobs = get_average(time_units, values, columns.index('jobs')) averaged_cpu = get_average(time_units, values, columns.index("resource_factor")) averaged_corepower = get_average(time_units, values, columns.index("corepower")) averaged_HS06_benchmark = get_average( time_units, values, columns.index("HS06_benchmark") ) averaged_HS06_pledge = get_average( time_units, values, columns.index("federation_HS06_pledge") ) # construct rest of the data dict data = dict(zip(columns, latest_value)) time = data["time"].replace("T", " ").replace("Z", "") if args.average == "1h": hash = time.split(".")[-1].ljust(9, "0") else: # got no hashes in 1h aggregate data yet m = hashlib.md5() m.update( str(tags["panda_queue"]) + str(tags["prod_source"]) + str(tags["resource"]) + str(tags["job_status"]) ) hash = str(int(m.hexdigest(), 16))[0:9] time = unix + int(hash) data.update(tags) data.pop("time", None) data.pop("jobs", None) data.pop("resource_factor", None) data.pop("corepower", None) data.pop("HS06_benchmark", None) data.pop("federation_HS06_pledge", None) json_body = { "measurement": "jobs", "tags": data, "time": time, "fields": { "jobs": averaged_jobs, "resource_factor": averaged_cpu, "corepower": averaged_corepower, "HS06_benchmark": averaged_HS06_benchmark, "federation_HS06_pledge": averaged_HS06_pledge, }, } # sometimes I f**k up and then I want to kill the last measurement... if args.kill_last: for key, value in json_body["fields"].iteritems(): json_body["fields"][key] = 0.0 logger.debug(json_body) points_list.append(json_body) client.write_points( points=points_list, time_precision="n", retention_policy=args.average )
logging.getLogger("yieldsTable").setLevel(logging.DEBUG) if not args.configfile: raise Exception("Need to specify a config file") if args.configfile: try: exec(open(args.configfile).read()) except: print("can't read configfile {}".format(args.configfile)) traceback.print_exc() yieldsTable = yieldsTable(**config) table = yieldsTable.createYieldstable() logger.info("Got table") ######################################################## # # okay, ugly TeX stuff starts here # ######################################################## header = r'''\documentclass{standalone} \usepackage{longtable} \usepackage{booktabs} \newcommand\MyHead[2]{% \multicolumn{1}{l}{\parbox{#1}{\centering #2}} } \begin{document} '''
def texWrite(): header = r''' import ROOT from ROOT import gSystem gSystem.Load("libSusyFitter.so") from systematic import Systematic from configManager import configMgr ''' #if args.analysis == 'strong1L': # header += r''' #Regions = [ 'BVEM', 'BTEM' ] #MeffBins = [ '_bin1', '_bin2', '_bin3', '_bin4'] #''' header += r''' {}Systematics={{}} '''.format(args.background) if args.analysis == '1Lbb': main = r'''''' for sys, d in values.items(): main += ''' ''' for region, uncertainties in d.items(): up_uncertainties = uncertainties["up"] down_uncertainties = uncertainties["down"] ups = "" for up_unc in up_uncertainties: if up_unc > 0: up_unc = "+{}".format(abs(up_unc)) else: up_unc = "-{}".format(abs(up_unc)) ups += "(1.{}),".format(up_unc) downs = "" for down_unc in down_uncertainties: if down_unc > 0: down_unc = "+{}".format(abs(down_unc)) else: down_unc = "-{}".format(abs(down_unc)) downs += "(1.{}),".format(down_unc) ups = ups[:-1] downs = downs[:-1] main += '''{bkg}Systematics['{bkg}{syst}_{region}'] = Systematic("{bkg}{syst}", configMgr.weights, [{ups}], [{downs}], "user","userHistoSys") '''.format(bkg=args.background, syst=sys, region=region, ups=ups, downs=downs) footer = r''' def TheorUnc(generatorSyst): for key in {bkg}Systematics: name=key.split('_')[-1] if "SRLMincl" in name: generatorSyst.append((("{bkg}","SRLMinclEM"), {bkg}Systematics[key])) elif "SRMMincl" in name: generatorSyst.append((("{bkg}","SRMMinclEM"), {bkg}Systematics[key])) elif "SRHMincl" in name: generatorSyst.append((("{bkg}","SRHMinclEM"), {bkg}Systematics[key])) elif "SRLM" in name: generatorSyst.append((("{bkg}","SRLMEM"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","SRLMEl"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","SRLMMu"), {bkg}Systematics[key])) elif "SRMM" in name: generatorSyst.append((("{bkg}","SRMMEM"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","SRMMEl"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","SRMMMu"), {bkg}Systematics[key])) elif "SRHM" in name: generatorSyst.append((("{bkg}","SRHMEM"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","SRHMEl"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","SRHMMu"), {bkg}Systematics[key])) elif "TRLM" in name: generatorSyst.append((("{bkg}","TRLMEM"), {bkg}Systematics[key])) elif "TRMM" in name: generatorSyst.append((("{bkg}","TRMMEM"), {bkg}Systematics[key])) elif "TRHM" in name: generatorSyst.append((("{bkg}","TRHMEM"), {bkg}Systematics[key])) elif "WR" in name: generatorSyst.append((("{bkg}","WREM"), {bkg}Systematics[key])) elif "STCR" in name: generatorSyst.append((("{bkg}","STCREM"), {bkg}Systematics[key])) elif "VRtt1on" in name: generatorSyst.append((("{bkg}","VRtt1onEM"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","VRtt1onEl"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","VRtt1onMu"), {bkg}Systematics[key])) elif "VRtt2on" in name: generatorSyst.append((("{bkg}","VRtt2onEM"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","VRtt2onEl"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","VRtt2onMu"), {bkg}Systematics[key])) elif "VRtt3on" in name: generatorSyst.append((("{bkg}","VRtt3onEM"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","VRtt3onEl"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","VRtt3onMu"), {bkg}Systematics[key])) elif "VRtt1off" in name: generatorSyst.append((("{bkg}","VRtt1offEM"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","VRtt1offEl"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","VRtt1offMu"), {bkg}Systematics[key])) elif "VRtt2off" in name: generatorSyst.append((("{bkg}","VRtt2offEM"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","VRtt2offEl"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","VRtt2offMu"), {bkg}Systematics[key])) elif "VRtt3off" in name: generatorSyst.append((("{bkg}","VRtt3offEM"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","VRtt3offEl"), {bkg}Systematics[key])) generatorSyst.append((("{bkg}","VRtt3offMu"), {bkg}Systematics[key])) '''.format(bkg=args.background) elif (args.analysis == 'strong1L' or args.analysis == 'alt_strong-1L'): main = r'''''' for sys, d in values.items(): main += ''' ''' for region, uncertainties in d.items(): up_uncertainties = uncertainties["up"] down_uncertainties = uncertainties["down"] ups = "" for up_unc in up_uncertainties: if up_unc > 0: up_unc = "+{}".format(abs(up_unc)) else: up_unc = "-{}".format(abs(up_unc)) ups += "(1.{}),".format(up_unc) downs = "" for down_unc in down_uncertainties: if down_unc > 0: down_unc = "+{}".format(abs(down_unc)) else: down_unc = "-{}".format(abs(down_unc)) downs += "(1.{}),".format(down_unc) ups = ups[:-1] downs = downs[:-1] main += '''{bkg}Systematics['{bkg}{syst}_{region}'] = Systematic("{bkg}{syst}", configMgr.weights, [{ups}], [{downs}], "user","userHistoSys") '''.format(bkg=args.background, syst=sys, region=region, ups=ups, downs=downs) if args.background == 'zjets': footer = r''' def TheorUnc(generatorSyst): for key in {bkg}Systematics: name=key.split('_') generatorSyst.append((("{bkg}",name[1]), {bkg}Systematics[key])) return generatorSyst '''.format(bkg=args.background) else: footer = r''' def TheorUnc(generatorSyst): for key in {bkg}Systematics: # regex would be better suited, but not sure we have that available, so lets work around it region = key.split('_')[2] bin = key.split('_')[3] tower = region[3:5] # Here is to hoping this doesnt break. Fingers crossed! generatorSyst.append((("{bkg}_"+tower+"_"+bin,region), {bkg}Systematics[key])) '''.format(bkg=args.background) content = header + main + footer if not os.path.exists("hf_configs/"): os.makedirs("hf_configs/") with open( "hf_configs/" + "theoryUncertainties_" + args.analysis + "_" + args.background + ".py", 'w') as f: f.write(content) logger.info("Wrote to file %s" % f)
def run(): # Each time the scrapers are run, we update the PQ map pqs = pq_map.PQ_names_map(file="data/map_PQ_names.json") if not pqs.update( ifile="data/scraped_cric_pandaqueue.json", ofile="data/map_PQ_names.json", key="panda_resource", ): logger.warning("PQ map is not available") if argparse.interval == "10m": # Now run all the scrapers that should run in 10min intervals # First the PQ CRIC information cric = CRIC() raw_data = cric.download( url="https://atlas-cric.cern.ch/api/atlas/pandaqueue/query/?json" ) json_data = cric.convert(data=raw_data, sort_field="panda_resource") if cric.save(file="data/scraped_cric_pandaqueue.json", data=json_data): logger.info("Scraped PQ CRIC") else: logger.error("Problem scraping PQ CRIC") elif argparse.interval == "1h": # Run all the scrapers that only need to be run once per hour (because they don't change too often) # Next the ATLAS sites CRIC information cric = CRIC() raw_data = cric.download( url="https://atlas-cric.cern.ch/api/atlas/site/query/?json" ) json_data = cric.convert(data=raw_data, sort_field="name") if cric.save(file="data/scraped_cric_sites.json", data=json_data): logger.info("Scraped sites CRIC") else: logger.error("Problem scraping sites CRIC") # Now the DDM info from CRIC raw_data = cric.download( url="https://atlas-cric.cern.ch/api/atlas/ddmendpoint/query/?json" ) json_data = cric.convert(data=raw_data, sort_field="site") if cric.save(file="data/scraped_cric_ddm.json", data=json_data): logger.info("Scraped DDM CRIC") else: logger.error("Problem scraping DDM CRIC") # Next up is REBUS, start with the actual federation map rebus = REBUS() raw_data = rebus.download( url="https://wlcg-cric.cern.ch/api/core/federation/query/?json" ) json_data = rebus.convert(data=raw_data, sort_field="rcsites") if rebus.save(file="data/scraped_rebus_federations.json", data=json_data): logger.info("Scraped federations CRIC") else: logger.error("Problem scraping federations CRIC") # then the pledges # can actually use same JSON raw data as before json_data = rebus.convert( data=raw_data, sort_field="accounting_name", append_mode=True ) if rebus.save(file="data/scraped_rebus_pledges.json", data=json_data): logger.info("Scraped pledges CRIC") else: logger.error("Problem scraping pledges CRIC") # we also get datadisk information from monit Grafana url = config.get("credentials_monit_grafana", "url") token = config.get("credentials_monit_grafana", "token") now = int(round(time.time() * 1000)) date_to = now - 12 * 60 * 60 * 1000 date_from = date_to - 24 * 60 * 60 * 1000 period = """"gte":{0},"lte":{1}""".format(date_from, date_to) data = ( """{"search_type":"query_then_fetch","ignore_unavailable":true,"index":["monit_prod_rucioacc_enr_site*"]}\n{"size":0,"query":{"bool":{"filter":[{"range":{"metadata.timestamp":{""" + period + ""","format":"epoch_millis"}}},{"query_string":{"analyze_wildcard":true,"query":"data.account:* AND data.campaign:* AND data.country:* AND data.cloud:* AND data.datatype:* AND data.datatype_grouped:* AND data.prod_step:* AND data.provenance:* AND data.rse:* AND data.scope:* AND data.experiment_site:* AND data.stream_name:* AND data.tier:* AND data.token:(\\\"ATLASDATADISK\\\" OR \\\"ATLASSCRATCHDISK\\\") AND data.tombstone:(\\\"primary\\\" OR \\\"secondary\\\") AND NOT(data.tombstone:UNKNOWN) AND data.rse:/.*().*/ AND NOT data.rse:/.*(none).*/"}}]}},"aggs":{"4":{"terms":{"field":"data.rse","size":500,"order":{"_term":"desc"},"min_doc_count":1},"aggs":{"1":{"sum":{"field":"data.files"}},"3":{"sum":{"field":"data.bytes"}}}}}}\n""" ) headers = { "Accept": "application/json", "Content-Type": "application/json", "Authorization": "Bearer %s" % token, } grafana = Grafana(url=url, request=data, headers=headers) raw_data = grafana.download() pprint.pprint(raw_data) json_data = grafana.convert(data=raw_data.json()) if grafana.save(file="data/scraped_grafana_datadisk.json", data=json_data): logger.info("Scraped datadisks from monit grafana") else: logger.error("Problem scraping datadisks from monit grafana") # TODO: not running ES scraper for now since the benchmark jobs are no longer being run # #get credentials # password = config.get("credentials_elasticsearch", "password") # username = config.get("credentials_elasticsearch", "username") # host = config.get("credentials_elasticsearch", "host") # arg = ([{'host': host, 'port': 9200}]) # elasticsearch = ElasticSearch(arg,**{'http_auth':(username, password)}) # kwargs = { # 'index' : "benchmarks-*", # 'body' : { # "size" : 10000,"query" : {"match_all" : {},}, # "collapse": {"field": "metadata.PanDAQueue","inner_hits": {"name": "most_recent","size": 50,"sort": [{"timestamp": "desc"}]} # } # }, # 'filter_path' : [""] # } # raw_data = elasticsearch.download(**kwargs) # json_data = elasticsearch.convert(data=raw_data) # # if elasticsearch.save(file='data/scraped_elasticsearch_benchmark.json', data=json_data): # logger.info('Scraped benchmark results from ES') # else: # logger.error('Problem scraping benchmark results from ES') else: # Nothing to do otherwise print("Dropping out")
else: return expr for region in args.regions: if expr == region: return expr else: logger.error('Region not found: {}'.format(expr)) return 0 if not (args.analysis or (args.background and args.regions)): logger.error('No analysis nor processes/regions given! Dropping out.') sys.exit() elif not args.analysis and (args.background and args.regions): logger.info( 'Did not provide analysis, but provided background and regions, so lets guess.' ) if args.analysis: logger.info('Considering analysis: %s' % args.analysis) if args.analysis == '1Lbb': args.regions = [ 'SRLMincl', 'SRMMincl', 'SRHMincl', 'SRLM', 'SRMM', 'SRHM', 'WR', 'STCR', 'TRLM', 'TRMM', 'TRHM', 'VRtt1on', 'VRtt2on', 'VRtt3on', 'VRtt1off', 'VRtt2off', 'VRtt3off' ] elif (args.analysis == 'strong1L' or args.analysis == 'alt_strong-1L'): regions = [ 'SR2J', 'SR4Jhighx', 'SR4Jlowx', 'SR6J', 'TR2J', 'WR2J', 'TR4Jhighx', 'WR4Jhighx', 'TR4Jlowx', 'WR4Jlowx', 'TR6J', 'WR6J', 'VR2Jmet', 'VR2Jmt', 'VR4Jhighxapl', 'VR4Jhighxmt', 'VR4Jlowxhybrid', 'VR4Jlowxapl', 'VR6Japl', 'VR6Jmt'
def main(): for f in args.files: logger.info('Got file: {}'.format(os.path.basename(f.name))) if not os.path.basename(f.name)[-4:] == ".tex": logger.error( 'This is not a tex file. Do not try to fool me again! Skipping...' ) continue #check if we can get a background matched! if args.background.lower() in os.path.basename(f.name).lower(): logger.info('Found process: {}'.format(args.background)) else: logger.error('No process found! Dropping out.') sys.exit() #now check if we can get the systematic variation name matched sys_matches = [ s for s in args.systematics if s.lower() in os.path.basename(f.name).lower() ] if len(sys_matches) > 1: logger.warning( 'Found more than one systematic variation matching filename: {}' .format(sys_matches)) logger.warning('Will only take first one.') elif len(sys_matches) == 1: logger.info('Found systematic variation: {}'.format( sys_matches[0])) elif len(sys_matches) == 0: logger.error('No systematic variation found! Dropping out.') sys.exit() systematic = sys_matches[0] ##let's check if we are using an up or a down variation (or symmetric...) is_up = False is_down = False if "up" in os.path.basename(f.name).lower(): is_up = True logger.info('This should be an UP variation.') elif "down" in os.path.basename(f.name).lower(): is_down = True logger.info('This should be a DOWN variation.') else: logger.warning( 'Probably neither up nor down, but a symmetrised table. Sure?') ##now comes the ugly parsing part ##can we do this at least not too ugly? lines = [] #first, get the relevant part from the tex file. If the user has made it easy and tagged the respective parts with %tex2hf, we can simply use what's between it keywords = False with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as s: if s.find(b'tex2hf') != -1: logger.info( 'Found keywords in file, so now we can just use what is between them' ) keywords = True if keywords == True: copy = False for line in f: if "tex2hf" in line.strip(): copy = not copy continue elif copy: lines.append(line.strip()) else: #otherwise just drop out, I don't want to think about this any further ... logger.error( 'You need to provide keywords. I am too lazy to think about something else. Put "tex2hf" before the first and after the last line (as a comment of course, you do not want this to show up in the table, do you?).' ) sys.exit() for line in lines: #get rid of any symbols we don't need line = line.strip().replace("$", "").replace("\\", "") #latex columns, get the region first. Need to strip all whitespace region = "".join(line.split("&")[0].split()) region = getRegionFromExpression(region) if region == 0: continue #then the uncertainty, usually in the last column # print("{} : {}".format(region, line.split("&")[-1])) uncertainty = round( float( line.split("&")[-1].replace("pm", "").replace( "%", "").strip()) / 100, 4) # print(uncertainty) if is_up: if uncertainty < -1.0: uncertainty = -1.0 logger.warning( 'Uncertainty larger than 100%. Truncating to 1.-1.') values[systematic][region]["up"].append(uncertainty) elif is_down: if uncertainty < -1.0: uncertainty = -1.0 logger.warning( 'Uncertainty larger than 100%. Truncating to 1.-1.') values[systematic][region]["down"].append(uncertainty) else: up_unc = abs(uncertainty) down_unc = -up_unc if abs(uncertainty) > 1.0: logger.warning( 'Uncertainty larger than 100%. Truncating to 1.-1.') down_unc = -1 values[systematic][region]["up"].append(up_unc) values[systematic][region]["down"].append(down_unc)
def createYieldstable(self): """ create the yieldsTable table is returned as json """ logger.info("Creating yieldstable") open_trees = {} # index "filename_treename" open_files = {} yields_dict = self._createOrderedDict() for process,type,processtrees,processweights in self.processes: logger.info("Projecting {}".format(process)) raw = 0 weighted = 0 error = 0 for selection,cuts in self.selections.iteritems(): for filename, treename in processtrees: index = "{}_{}".format(filename, treename) if index in open_trees: tree = open_trees[index] else: if filename in open_files: rootfile = open_files[filename] else: rootfile = ROOT.TFile(filename) open_files[filename] = rootfile tree = rootfile.Get(treename) open_trees[index] = tree logger.debug("Projecting {} in file {} with selection {}".format(treename,filename,selection)) h = ROOT.TH1F("h","",1,0.5,1.5) h.Sumw2() combined_weights = "({})*({})".format(self.weights,processweights) if processweights else self.weights tree.Project("h","1","({})*({})*({})".format(self.lumifactor,combined_weights,cuts)) #the following is not 100% safe because GetEntries acts weird if one adds processweights. So if you use process-specific weights instead of cuts, pay attention! combined_cuts = "({})*({})".format(cuts,processweights) if processweights else cuts yields_dict[process][selection]["raw"] += tree.GetEntries(combined_cuts) yields_dict[process][selection]["weighted"] += h.Integral() yields_dict[process][selection]["error"] += h.GetBinError(1)**2 del h yields_dict[process][selection]["error"] = math.sqrt(yields_dict[process][selection]["error"]) for selection in self.selections: logger.debug("Summing up SM -- selection {}".format(selection)) bkg_total_unweighted = 0 bkg_total_weighted = 0 bkg_total_error = 0 for process in self.bkg_processes: logger.debug("Summing up SM -- process {}".format(process)) bkg_total_unweighted += yields_dict[process][selection]["raw"] bkg_total_weighted += yields_dict[process][selection]["weighted"] bkg_total_error += yields_dict[process][selection]["error"]**2 bkg_total_error = math.sqrt(bkg_total_error) yields_dict["Total SM"][selection] = {"raw":bkg_total_unweighted, "weighted":bkg_total_weighted, "error":bkg_total_error} return yields_dict
def run(): config = ConfigParser.ConfigParser() config.read("config.cfg") password = config.get("credentials", "password") username = config.get("credentials", "username") database = config.get("credentials", "database") logger.info("Constructing InfluxDB queries.") logger.info("Getting distinct key sets") client = InfluxDBClient("dbod-eschanet.cern.ch", 8080, username, password, "monit_jobs", True, False) rs_distinct_sets = client.query( """select panda_queue, prod_source, resource, job_status, jobs from "1h"."jobs" where time > now() - 30d and "prod_source" != '' group by panda_queue, prod_source, resource, job_status limit 1""" ) logger.info("Getting 10m data") rs_result_24h = client.query( """select * from "10m"."jobs" where time > now() - 24h and "prod_source" != '' group by panda_queue, prod_source, resource, job_status """ ) logger.info("Got 10m data") raw_dict_24h = rs_result_24h.raw series_24h = raw_dict_24h["series"] logger.info("Getting 1d data") rs_result_30d = client.query( """select * from "1d"."jobs" where time > now() - 30d and "prod_source" != '' group by panda_queue, prod_source, resource, job_status """ ) logger.info("Got 1d data") raw_dict_30d = rs_result_30d.raw series_30d = raw_dict_30d["series"] logger.info("Got data from InfluxDB.") logger.info("Constructing MySQL connector.") cnx = mysql.connector.connect( user="******", password=password, host="dbod-sql-graf.cern.ch", port=5501, database="monit_jobs", ) cursor = cnx.cursor() selector = cnx.cursor() # in mysql there may still be unique pq-resource combinations that don't exist anymore pqs_mysql = get_pq_from_mysql(selector) logger.info("Building data.") data, missing_pqs = get_derived_quantities(rs_distinct_sets, series_24h, series_30d, pqs_mysql) for point in get_list_to_upload(data): if args.debug: print(point) if not args.skipSubmit: cursor.execute(point) for pq, prod_source, resource in missing_pqs: cursor.execute( 'DELETE FROM jobs WHERE panda_queue = "{panda_queue}" AND resource = "{resource}" AND prod_source = "{prod_source}"' .format(panda_queue=pq, resource=resource, prod_source=prod_source)) if not args.skipSubmit: cnx.commit() cursor.close() cnx.close()
if not args.method in ['norm', 'skewnorm']: logger.error("Provided smearing method not implemented!") raise ValueError("Sorry, need to exit here.") # if not os.path.isdir(args.inputdir): # logger.error("Provided path does not exist or is not a directory!") # raise ValueError("Sorry, need to exit here.") np.random.seed(args.seed) for indx, f in enumerate(args.inputfiles): if not f.endswith("update.root"): continue logger.info("Updating " + f) treename = f.replace("_update.root", "_NoSys") file = uproot.open(os.path.join(f)) tree = file[treename] df = tree.pandas.df(tree.keys()) if args.method == 'skewnorm': distribution = skewnorm(float(args.skew), loc=float(args.loc), scale=float(args.scale)) df[args.branch_name] = df[args.branch_name] * distribution.rvs( size=df[args.branch_name].shape) else: df[args.branch_name] = df[args.branch_name] * np.random.normal( float(args.mu), float(args.sigma), df[args.branch_name].shape)
args = parser.parse_args() if not os.path.isfile(args.inputfile): logger.error("Provided ROOT file does not exist or is not a file!") raise ValueError("Sorry, need to exit here.") if args.xsec_file: if not os.path.isfile(args.xsec_file): logger.error("Provided xsec file does not exist or is not a file!") raise ValueError("Sorry, need to exit here.") else: if not args.dsid: logger.warning( "Provided a xsec file, but not a DSID, will try to guess ...") else: logger.info("Provided xsec file and DSID, thanks mate!") def get_xsec(xsec_file, my_dsid=None): with open(xsec_file) as f: f.readline() for l in f: # DSID is the first field if int(my_dsid) == int(l.rstrip().split()[0]): xsec = float(l.rstrip().split()[2]) filter_eff = float(l.rstrip().split()[3]) kfactor = float(l.rstrip().split()[4]) logger.info("Got xsec: {}".format(xsec)) return xsec * filter_eff * kfactor else:
logger.error("Cannot figure out signal model.") raise ValueError("Sorry, need to exit here.") mass_string = mass_string.replace("p0", "").replace("p5", ".5") if not dict.has_key(mass_string): lowerMass = int(float(mass_string)) // 5 * 5 upperMass = lowerMass + 5 xsec = (dict[str(lowerMass)] + dict[str(upperMass)]) / 2.0 return xsec else: return dict[mass_string] xsec = getxsec(f) logger.info("Found xsec: " + str(xsec)) logger.info("Updating " + f) tf = ROOT.TFile(args.inputdir + "/" + f) # tree = tf.Get('OneLepton2016__ntuple') tree = tf.Get('EwkOneLeptonTwoBjets2018_simplifiedfit__ntuple') nentries = tree.GetEntries() sumofweights = 0 for event in tree: sumofweights += event.eventWeight nBJet30_MV2c10 = array('i', [0]) if int(sumofweights) != int(nentries): logger.warning( "SumW is not equal to nEntries. Did not expect that!") tree.SetBranchStatus("genWeight", 0) # tree.SetBranchAddress("nBJet20_MV2c10", nBJet30_MV2c10)
def run(): config = ConfigParser.ConfigParser() config.read("config.cfg") password = config.get("credentials", "password") username = config.get("credentials", "username") database = config.get("credentials", "database") logger.info("Constructing MySQL connector.") reader = mysql.connector.connect( user="******", password=password, host="dbod-sql-graf.cern.ch", port=5501, database="monit_jobs", ) read_cursor = reader.cursor() writer = mysql.connector.connect( user="******", password=password, host="dbod-sql-graf.cern.ch", port=5501, database="monit_jobs", ) write_cursor = writer.cursor() logger.info("Getting existing data.") read_cursor.execute("select panda_queue,prod_source, resource from jobs") def getJSON(file): with open(file) as f: return json.load(f) panda_queues = getJSON("data/scraped_cric_pandaqueue.json") panda_resources = getJSON("data/map_PQ_names.json") datadisk_info = getJSON("data/scraped_grafana_datadisk.json") federations_resources = getJSON("data/scraped_rebus_federations.json") for (panda_queue, prod_source, resource) in read_cursor: try: nickname = panda_resources[ panda_queue ] # do the mapping to actual panda queue nicknames atlas_site = panda_queues[nickname]["atlas_site"] except: logger.warning( "Does not exist: queue: %s Prod_source: %s Resource: %s" % (panda_queue, prod_source, resource) ) continue logger.debug( "Queue: %s Prod_source: %s Resource: %s" % (panda_queue, prod_source, resource) ) atlas_site = panda_queues[nickname]["atlas_site"] type = panda_queues[nickname]["type"] cloud = panda_queues[nickname]["cloud"] country = panda_queues[nickname]["country"] federation = panda_queues.get(nickname, {}).get("rc", "None") site_state = panda_queues[nickname]["status"] tier = panda_queues[nickname]["tier"] resource_type = panda_queues[nickname].get("resource_type", "None") if "MCORE" in resource: if panda_queues[nickname]["corecount"]: resource_factor = float(panda_queues[nickname]["corecount"]) else: resource_factor = 8.0 else: resource_factor = 1.0 ddm_names = ( panda_queues.get(nickname, {}).get("astorages", {}).get("read_lan", []) ) # ddm_names = panda_queues[nickname]["ddm"].split(",") datadisk_names = [d for d in ddm_names if "DATADISK" in d] if len(datadisk_names) > 1: logger.warning( "Got more than one datadisk for: %s, %s" % (atlas_site, datadisk_names) ) try: datadisk_name = datadisk_names[0] datadisk_size = datadisk_info[datadisk_name]["bytes"] / (1e9) datadisk_files = datadisk_info[datadisk_name]["files"] except: logger.warning( "Datadisk not found for: %s, %s" % (atlas_site, datadisk_names) ) datadisk_name = "NONE" datadisk_size = 0.0 datadisk_files = 0 add_point = '''INSERT INTO jobs (panda_queue, prod_source, resource) VALUES ("{panda_queue}","{prod_source}", "{resource}") ON DUPLICATE KEY UPDATE atlas_site="{atlas_site}", type="{type}", country="{country}", cloud="{cloud}",federation="{federation}", site_state="{site_state}", tier="{tier}",resource_factor="{resource_factor}",resource_type="{resource_type}", datadisk_name="{datadisk_name}", datadisk_occupied_gb="{datadisk_size}", datadisk_files="{datadisk_files}"'''.format( atlas_site=atlas_site, panda_queue=panda_queue, type=type, prod_source=prod_source, cloud=cloud, country=country, federation=federation, site_state=site_state, tier=tier, resource_factor=resource_factor, resource=resource, resource_type=resource_type, datadisk_name=datadisk_name, datadisk_size=datadisk_size, datadisk_files=datadisk_files, ) if panda_queue == "ANALY_SiGNET": print(add_point) print(atlas_site) write_cursor.execute(add_point) writer.commit() read_cursor.close() write_cursor.close() reader.close() writer.close()
'-s', '--signal', action="store_true", help="Use signal tree naming convention (default is background trees)") args = parser.parse_args() cmd = "rootls {}".format(args.file) process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE) output, error = process.communicate() trees = output.split() if args.signal: naming = 5 else: naming = 1 raw_names = [ "_".join(fullname.split("_")[:naming]) + "_" for fullname in trees ] unique_names = list(set(raw_names)) split_trees = [[t for t in trees if b in t] for b in unique_names] length = len(split_trees[0]) for (l, bkg) in zip(split_trees, unique_names): logger.info("For process {}: {} trees".format(bkg, len(l))) if not len(l) == length: logger.error("Not the right length!")
parser.add_argument("-file", help="ROOT file") parser.add_argument('--signal', help='Use signal', action='store_true') args = parser.parse_args() tf = ROOT.TFile.Open(args.file, "READ") if isinstance( args.file, basestring) else args.file trees_list = [] checklist = [] processes = [] for key in tf.GetListOfKeys(): trees_list.append(key.GetName()) logger.info("{} trees in {}".format(len(trees_list), args.file)) for treename in trees_list: # logger.info("Opening {}".format(treename)) tree = tf.Get(treename) if not args.signal: processname = treename.split("_")[0] else: processname = "_".join(treename.split("_", 4)[:4]) if not processname in processes: processes.append(processname) mc16a = 0 mc16d = 0
for test in testmodules: print(test) sys.exit(0) suite = unittest.TestSuite() # based on code snippet from http://stackoverflow.com/questions/1732438/how-do-i-run-all-python-unit-tests-in-a-directory#15630454 for postfix in tests: t = "test.test_" + postfix if "." in postfix: # i don't have a better solution yet, so hack for now importTest = ".".join(t.split(".")[:-2]) else: importTest = t try: logger.info("Trying to import {}".format(importTest)) mod = __import__(importTest, globals(), locals(), ['suite']) except ImportError: logger.error("Test {} not found - try {}".format(t, testmodules)) raise try: # If the module defines a suite() function, call it to get the suite. suitefn = getattr(mod, 'suite') suite.addTest(suitefn()) except (ImportError, AttributeError): # else, just load all the test cases from the module. logger.info("Loading test {}".format(t)) suite.addTest(unittest.defaultTestLoader.loadTestsFromName(t)) result = unittest.TextTestRunner(verbosity=verbosity).run(suite) sys.exit(not result.wasSuccessful())