def make_dataset_table_mc(dss_infos, name, caption, grouping=True): # Arguments: if not isinstance(dss_infos, list): dss_info = list(dss_infos) # Set up: table = "\\begin{table}[htbp]\n\t\caption{" + caption + "}\n" table += "\t\centering\setlength\doublerulesep{0.1in}\n\t\\resizebox{0.9\\textwidth}{!}{\n" tabular = "\t\t\\begin{tabular}{|l|r|r|}\hline\n\t\t\t\\textbf{Dataset name} & \\textbf{Entries} & \\textbf{Cross section [pb]} \\\\\hline\n" ngroup = 0 # Groups: for igroup, dss_info in enumerate(dss_infos): miniaods = OrderedDict() for info in dss_info: process = info["process"] if process not in miniaods: miniaods[process] = dataset.fetch_entries("miniaod", query=info) else: miniaods[process] += dataset.fetch_entries("miniaod", query=info) for key, maods in miniaods.items(): ngroup += 1 for miniaod in maods: href_bit = "\href{https://cmsweb.cern.ch/das/request?view=list\&limit=50\&instance=prod\%2F" + miniaod.instance + "\&input=" + urllib.quote( miniaod.name, safe='').replace("%", "\%") + "}" tabular += "\t\t\t\\begin{tabular}{@{}l@{}}" + href_bit + "{\\texttt{/" + latex_escape( miniaod.name.split("/")[1] ) + "}} \\\\ " + href_bit + "{\\footnotesize~~\\texttt{/" + "/".join( latex_escape(miniaod.name).split("/")[2:] ) + "}}\end{tabular} & " + str(miniaod.n) + " & " + str( miniaod.get_sample().sigma) + " \\\\\hline\n" # if grouping and ngroup != len(miniaods) and name != "signal": tabular += "\hline\n" if len(dss_infos) > 1 and igroup < len(dss_infos) - 1: tabular += "\hline\n" tabular += "\t\t\end{tabular}\n" table += tabular table += "\t}\n" table += "\t\label{table:datasets_" + name + "}\n" table += "\end{table}" # print table with open("tabular_datasets_{}.tex".format(name), "w") as out: out.write(tabular) with open("table_datasets_{}.tex".format(name), "w") as out: out.write(table) return table
def make_json_table(dss_info, name, caption): miniaods = OrderedDict() for info in dss_info: miniaods[info["process"]] = dataset.fetch_entries("miniaod", query=info) table = "\\begin{table}[htbp]\n\t\caption{" + caption + "}\n" table += "\t\centering\setlength\doublerulesep{0.1in}\n\t\\resizebox{0.9\\textwidth}{!}{\n" tabular = "\t\t\\begin{tabular}{|l|l|}\hline\n\t\t\t\\textbf{Run period} & \\textbf{Luminosity mask filename}\\\\\hline\n" for key, value in miniaods.items(): maod = value[0] mask_url = maod.mask mask_file = mask_url.split("/")[-1] nice_name = "2015" if key == "jetht15" else "2016" href_bit = "\href{" + mask_url + "}" tabular += "\t\t\t" + nice_name + " & " + href_bit + "{\\texttt{" + latex_escape( mask_file) + "}}\\\\\hline\n" # tabular += "\\textbf{Total integrated luminosity} & \\\\\hline\n" tabular += "\t\t\end{tabular}\n" table += tabular table += "\t}\n" table += "\t\label{table:json}\n" table += "\end{table}" with open("tabular_json_{}.tex".format(name), "w") as out: out.write(tabular) with open("table_json_{}.tex".format(name), "w") as out: out.write(table) return table
def main(): # Setup: ## ROOT: gROOT.SetBatch() ## Arguments: a = variables.arguments() args = a.args tuples = a.input if not tuples: tuples = dataset.fetch_entries("tuple", a.query) tuples = dataset.sort_datasets(tuples, collapse=True) # This combines "extension" datasets in addition to other things. else: tuples = {args.process: tuples} if not tuples: print "[!!] ERROR: The arguments provided don't define any input." sys.exit() ## Combine jetht15 and jetht16: for name in ["jetht15", "jetht16"]: if name in tuples: if "jetht" not in tuples: tuples["jetht"] = [] tuples["jetht"].extend(tuples[name]) tuples.pop(name, None) ## Print an introduction: print "The analyzer will run over the following tuples:" if isinstance(tuples, dict): for key, tups in tuples.items(): print "\t* {}:".format(key) for tup in tups: print "\t\t* {}".format(tup) else: print "\t{}".format(tuples) out_dir = None out_file = None if args.output: out_file = args.output.split("/")[-1] out_dir = "/".join(args.output.split("/")[:-1]) ana = analyzer.analyzer(tuples, save=True, v=args.verbose, out_file=out_file, use_condor=args.condor) vs_out = get_variables() ana.define_branches(vs_out) # branches = create_tuples(ana) # colors = color.pick(len(tuples.keys())) # Event loop: for key, loop in ana.loops.iteritems(): loop.treatment = treat_event # print "here" loop.progress = True if not ana.condor else False loop.run(n=args.n, rand=False, arguments={"alg": args.algorithm}) # event_loop(ana.tt[key]["analyzer/events"], branches[key], ana.tuples[key], n_events=n_events_sq) # Output: ana.write() print ana.out_path
def main(): # Setup: ## ROOT: gROOT.SetBatch() ## Arguments: a = variables.arguments() args = a.args tuples = dataset.fetch_entries("tuple", a.query) # print tuples # print tuples = dataset.sort_datasets(tuples) ## Print an introduction: print "The analyzer will run over the following tuples:" for key, tups in tuples.items(): print "\t* {}:".format(key) for tup in tups: print "\t\t* {}".format(tup) # print tuples # sys.exit() ## Analyzer object: # tuples = {} # ### food = 1: # for tup in tups: # tup.set_connections(down=False, up=True) # process = tup.sample.process # if process not in tuples: # tuples[process] = [] # tuples[process].extend(["root://cmsxrootd.fnal.gov/" + f for f in tup.files]) ## for ds in dss: ## if ds.analyze: ## for path in ds.tuple_path: ## path_full = "root://cmsxrootd.fnal.gov/" + path ## tuples[process].append(path_full) # print tuples ### food = 2: # tuples = {(tup.Name_safe if [t.process for t in tups].count(tup.process) > 1 else tup.process): tup for tup in tups} ana = analyzer.analyzer(tuples, save=True, v=args.verbose, count=False) # Add "out_dir=" and "out_file=". ana.define_branches(vs_out) # branches = create_tuples(ana) # colors = color.pick(len(tuples.keys())) # Event loop: for key, loop in ana.loops.iteritems(): loop.treatment = treat_event # print "here" loop.progress = False loop.run(n=args.n, rand=False, arguments={"alg": args.algorithm}) # event_loop(ana.tt[key]["analyzer/events"], branches[key], ana.tuples[key], n_events=n_events_sq) # Output: ana.write() print ana.out_path
def main(): a = variables.arguments() miniaods = dataset.fetch_entries("miniaod", a.query) suffix = a.suffix if not suffix: suffix = "cutpt400" indate = a.args.dir if not indate: indate = "161118_062506" for miniaod in miniaods: indir = "/uscms/home/tote/8_0_20/Analyzers/FatjetAnalyzer/test/condor_jobs/tuplizer/{}/{}_{}_{}".format(indate, miniaod.subprocess, miniaod.generation, suffix) print "\nStatusing {}".format(miniaod.Name) print "[..] Scanning the condor directory." jdls = [f for f in os.listdir(indir) if ".jdl" in f] njobs = len(jdls) log_dict = list_logs(indir) logs_log = log_dict["log"] logs_stdout = log_dict["stdout"] logs_stderr = log_dict["stderr"] print "[OK] Total jobs: {}".format(njobs) good = True if len(logs_log) != njobs: print "[!!] There are only {} .log logs".format(len(logs_log)) good = False if len(logs_stdout) != njobs: print "[!!] There are only {} .stdout logs".format(len(logs_stdout)) good = False if len(logs_stderr) != njobs: print "[!!] There are only {} .stderr logs".format(len(logs_stderr)) good = False if good: print "[OK] All logs accounted for." else: sys.exit() print "[..] Checking jobs." jobs_bad, jobs_error, jobs_unsubmitted = check_stderr_logs(indir, logs_stderr) if jobs_bad: print "[!!] There were problems with {} jobs:".format(len(jobs_bad)) print jobs_bad good = False if jobs_error: print "[!!] There were errors in {} of these jobs:".format(len(jobs_error)) print jobs_error good = False if jobs_unsubmitted: print "[!!] There are {} unsubmitted jobs:".format(len(jobs_unsubmitted)) print jobs_unsubmitted good = False if good: print "[OK] All jobs completed successfully."
def make_dataset_table_data(dss_info, name, caption): miniaods = OrderedDict() for info in dss_info: miniaods[info["process"]] = dataset.fetch_entries("miniaod", query=info) # print miniaods table = "\\begin{table}[htbp]\n\t\caption{" + caption + "}\n" table += "\t\centering\setlength\doublerulesep{0.1in}\n" tabular = "\t\t\\begin{tabular}{|l|r|}\hline\n\t\t\t\\textbf{Dataset name} & \\textbf{Int. lum. [fb\\textsuperscript{-1}]} \\\\\hline\n" ngroup = 0 # totals = [0, 2.183, 35.545] for key, maods in miniaods.items(): ngroup += 1 total = 0 for miniaod in maods: sample = miniaod.get_sample() lum = sample.luminosity / 1000 total += lum href_bit = "\href{https://cmsweb.cern.ch/das/request?view=list\&limit=50\&instance=prod\%2Fglobal\&input=" + latex_escape( urllib.quote(miniaod.name, safe='')) + "}" tabular += "\t\t\t" + href_bit + "{\\texttt{" + latex_escape( miniaod.name) + "}} & " + str(lum) + " \\\\\hline\n" tabular += "\\multicolumn{1}{|r|}{\\textbf{Total}} & \\textbf{" + "{:.3f}".format( total) + "}\\\\\hline\n" if ngroup != len(miniaods): tabular += "\hline\n" tabular += "\t\t\end{tabular}\n" table += tabular table += "\t\label{table:datasets_data}\n\end{table}" # print table with open("tabular_datasets_{}.tex".format(name), "w") as out: out.write(tabular) with open("table_datasets_{}.tex".format(name), "w") as out: out.write(table) return table
def main(): # Arguments a = variables.arguments() if not a.kinds: a.kinds = variables.all_kinds # Fetch datasets: results = {} for kind in a.kinds: entries = dataset.fetch_entries(kind, a.query) for entry in entries: if entry.subprocess not in results: results[entry.subprocess] = {} if entry.kind not in results[entry.subprocess]: results[entry.subprocess][entry.kind] = [] results[entry.subprocess][entry.kind].append(entry) # Print stuff: for subprocess, kinds in results.items(): for kind, entries in kinds.items(): for entry in entries: entry.Print() print "==========================================================" return True
def main(): # Arguments: a = variables.arguments() args = a.args if not a.kinds: a.kinds = variables.all_kinds j = not args.json # Step 1: check if anything needs to be added to or updated in the DB: print "Step 0: Checking for any new entries that need to be added to the DB ..." datasets = dataset.parse_db_yaml(completed=True) print "\t[..] Checking samples.yaml against the DB." n_added = 0 for kind, dss in datasets.items(): for ds in dss: check_result = dataset.check_yaml_against_db(ds) # Deal with entries that aren't in the DB: if not check_result: n_added += 1 print "\t[..] Adding {} to the DB.".format(ds.Name) ds.write() ## Print a summary: print "Step 0 summary:" if n_added: print "\t{} entries added.".format(n_added) else: print "\tNothing needed to be added." # Step 1: check if anything needs to be updated in the DB: print "Step 1: Checking if any specified entries in the DB need to be updated ..." dss = [] for kind in a.kinds: dss += dataset.fetch_entries(kind, a.query) print "\t[..] Checking samples.yaml against the DB (excluding tuples)." n_updated = 0 for ds in dss: if ds.kind != "tuple": print "\t{} ({})".format(ds.Name, ds.kind) check_result = dataset.check_db_against_yaml(ds) if not check_result: continue # Update entries in the DB that need updating (e.g., if you recently edited "samples.yaml"): keys_update = [ key for key, value in check_result.items() if value["change"] and key != "time" ] if keys_update: n_updated += 1 info = {key: check_result[key]["new"] for key in keys_update} print "\tUpdating the following values for {} ...".format( ds.Name) print "\t{}".format(info) ds.update(info) ## Print a summary: print "Step 1 summary:" if n_updated: print "\t{} entries updated.".format(n_updated) else: print "\tNothing needed to be updated." # Step 2: search (but don't scan) for new tuples: print "Step 2: Searching for new tuples ..." tuples_new = discover_tuples() n_added = 0 for tup in tuples_new: # print tup.update({"path": tup.path}) print "Adding {} to the DB ...".format(tup.Name) tup.write() n_added += 1 ## Print a summary: print "Step 2 summary:" if n_added: print "\t{} tuples added.".format(n_added) else: print "\tNo tuples needed to be added." # Step 3: Fetch entries to scan: print "Step 3: Scanning entries ..." entries_dict = {} for kind in a.kinds: entries_dict[kind] = dataset.fetch_entries(kind, a.query) n_entries = sum([len(l) for l in entries_dict.values()]) if n_entries: print "\tThere are {} entries to scan.".format(n_entries) else: print "\tThere were no entries to scan. Your query was the following:\n{}\nkinds = {}".format( a.query, a.kinds) for kind, entries in entries_dict.items(): for entry in entries: # print entry.das # print dataset.set_files(entry, j=j, DAS=entry.das) print "\tFixing {} ...".format(entry.Name) try: entry.fix() except Exception as ex: print "[!!] Fixing failed:" print ex print "\tScanning {} ...".format(entry.Name) try: entry.scan(j=j) except Exception as ex: print "[!!] Scanning failed:" print ex return True
from decortication.dataset import fetch_entries processes = ["qcdp", "qcdmg"] lum = 38.180*1000 for process in processes: miniaods = fetch_entries("miniaod", {"process": process, "generation":["moriond17", "moriond17ext"]}) results = {} for miniaod in miniaods: if miniaod.subprocess not in results: results[miniaod.subprocess] = [miniaod.n, miniaod.get_sample().sigma] else: results[miniaod.subprocess][0] += miniaod.n for subprocess, values in results.items(): values.append(values[1]*lum/values[0]) print subprocess, values
import os, sys from decortication import variables, dataset, production # VARIABLES: cut_pt_filter = 400 # The eta 2.5 cut is the default. # /VARIABLES if __name__ == "__main__": # Prepare: a = variables.arguments() path = "crab_configs" if not os.path.exists(path): os.makedirs(path) # Write configs: miniaods = dataset.fetch_entries("miniaod", a.query) for miniaod in miniaods: print "Making a configuration file for {} ...".format(miniaod.Name) config = production.get_crab_config( kind="tuple", miniaod=miniaod, cmssw_config="tuplizer_cfg.py", cut_pt_filter=cut_pt_filter ) with open(path + "/{}.py".format(miniaod.Name), "w") as out: out.write(config)
def main(): # Arguments: a = variables.arguments() miniaods = dataset.fetch_entries("miniaod", a.query) tstring = utilities.time_string()[:-4] suffix = "cutpt{}".format(cut_pt_filter) cmssw_version = cmssw.get_version(parsed=False) for miniaod in miniaods: print "Making condor setup for {} ...".format(miniaod.Name) sample = miniaod.get_sample() # Create groups of input files: groups = [] group = [] n_group = 0 # print miniaod.ns for i, n in enumerate(miniaod.ns): n_group += n group.append(miniaod.files[i]) if (n_group >= n_per) or (i == len(miniaod.ns) - 1): groups.append(group) group = [] n_group = 0 print "\tCreating {} jobs ...".format(len(groups)) # Prepare directories: path = "condor_jobs/tuplizer/{}/{}_{}_{}".format(tstring, miniaod.subprocess, miniaod.generation, suffix) log_path = path + "/logs" if not os.path.exists(path): os.makedirs(path) if not os.path.exists(log_path): os.makedirs(log_path) eos_path = "/store/user/tote/{}/tuple_{}_{}_{}/{}".format( sample.name, miniaod.subprocess, miniaod.generation, suffix, tstring ) # Output path. # Create job scripts: for i, group in enumerate(groups): job_script = "#!/bin/bash\n" job_script += "\n" job_script += "# Untar CMSSW area:\n" job_script += "tar -xzf {}.tar.gz\n".format(cmssw_version) job_script += "cd {}/src/Analyzers/FatjetAnalyzer/test\n".format(cmssw_version) job_script += "\n" job_script += "# Setup CMSSW:\n" job_script += "source /cvmfs/cms.cern.ch/cmsset_default.sh\n" job_script += "eval `scramv1 runtime -sh` #cmsenv\n" job_script += "\n" job_script += "# Run CMSSW:\n" list_str = ",".join(['"{}"'.format(g) for g in group]) out_file = "tuple_{}_{}_{}_{}.root".format(miniaod.subprocess, miniaod.generation, suffix, i + 1) job_script += 'cmsRun tuplizer_cfg.py subprocess="{}" generation="{}" cutPtFilter={} outDir="." outFile="{}" inFile={}'.format( miniaod.subprocess, miniaod.generation, cut_pt_filter, out_file, list_str ) if sample.data: job_script += " data=True".format(sample.data) if sample.mask: job_script += ' mask="{}"'.format(sample.mask) job_script += " &&\n" job_script += "xrdcp -f {} root://cmseos.fnal.gov/{} &&\n".format(out_file, eos_path) job_script += "rm {}\n".format(out_file) with open("{}/job_{}.sh".format(path, i + 1), "w") as out: out.write(job_script) # Create condor configs: for i, group in enumerate(groups): job_config = "universe = vanilla\n" job_config += "Executable = job_{}.sh\n".format(i + 1) job_config += "Should_Transfer_Files = YES\n" job_config += "WhenToTransferOutput = ON_EXIT\n" job_config += "Transfer_Input_Files = {}.tar.gz\n".format(cmssw_version) job_config += 'Transfer_Output_Files = ""\n' job_config += "Output = logs/job_{}.stdout\n".format(i + 1) job_config += "Error = logs/job_{}.stderr\n".format(i + 1) job_config += "Log = logs/job_{}.log\n".format(i + 1) job_config += "notify_user = ${LOGNAME}@FNAL.GOV\n" job_config += "x509userproxy = $ENV(X509_USER_PROXY)\n" job_config += "Queue 1\n" with open("{}/job_{}.jdl".format(path, i + 1), "w") as out: out.write(job_config) # Create run script: run_script = "# Update cache info:\n" run_script += "bash $HOME/condor/cache.sh\n" run_script += "\n" run_script += "# Grid proxy existence & expiration check:\n" run_script += "PCHECK=`voms-proxy-info -timeleft`\n" run_script += 'if [[ ($? -ne 0) || ("$PCHECK" -eq 0) ]]; then\n' run_script += "\tvoms-proxy-init -voms cms --valid 168:00\n" run_script += "fi\n" run_script += "\n" run_script += "# Copy python packages to CMSSW area:\n" run_script += "cp -r $HOME/decortication/decortication $CMSSW_BASE/python\n" run_script += "cp -r $HOME/decortication/resources $CMSSW_BASE/python\n" run_script += "cp -r $HOME/truculence/truculence $CMSSW_BASE/python\n" run_script += "\n" run_script += "# Make tarball:\n" run_script += "echo 'Making a tarball of the CMSSW area ...'\n" run_script += "tar --exclude-caches-all -zcf ${CMSSW_VERSION}.tar.gz -C ${CMSSW_BASE}/.. ${CMSSW_VERSION}\n" run_script += "\n" run_script += "# Prepare EOS:\n" run_script += "eos root://cmseos.fnal.gov mkdir -p {}\n".format(eos_path) run_script += "\n" run_script += "# Submit condor jobs:\n" for i, group in enumerate(groups): run_script += "condor_submit job_{}.jdl\n".format(i + 1) run_script += "\n" run_script += "# Remove tarball:\n" run_script += "#rm ${CMSSW_VERSION}.tar.gz\n" # I if remove this, the jobs might complain. run_script += "\n" run_script += "# Remove python packages:\n" run_script += "#rm -rf $CMSSW_BASE/python/decortication\n" run_script += "#rm -rf $CMSSW_BASE/python/resources\n" run_script += "#rm -rf $CMSSW_BASE/python/truculence\n" with open("{}/run.sh".format(path), "w") as out: out.write(run_script) print "\tThe jobs are in {}".format(path) return True
def main(): # Arguments: a = variables.arguments() args = a.args if not a.kinds: a.kinds = variables.all_kinds j = not args.json # Step 1: check if anything needs to be added to or updated in the DB: print "Step 1: Checking if anything needs to be added to or updated in the DB ..." datasets = dataset.parse_db_yaml(completed=False) print "\tChecking samples.yaml against the DB ..." n_added = 0 n_updated = 0 for kind, dss in datasets.items(): for ds in dss: check_result = ds.check() # Deal with entries that aren't in the DB: if not check_result: n_added += 1 print "\tAdding {} to the DB ...".format(ds.Name) ds.write() # Update enries in the DB that need updating (e.g., if you recently edited "samples.yaml"): else: keys_update = [key for key, value in check_result[1].items() if value and key != "time"] # "check_result[1]" contains information about what should be updated in the DB. if keys_update: n_updated += 1 info = {key: getattr(ds, key) for key in keys_update} print "\tUpdating the following values for {} ...".format(ds.Name) print "\t{}".format(info) ds.update(info) ## Print a summary: print "Step 1 summary:" if n_added: print "\t{} entries added.".format(n_added) else: print "\tNothing needed to be added." if n_updated: print "\t{} entries updated.".format(n_updated) else: print "\tNothing needed to be updated." # Step 2: search (but don't scan) for new tuples: print "Step 2: Searching for new tuples ..." tuples_new = discover_tuples() n_added = 0 for tup in tuples_new: # print tup.update({"path": tup.path}) print "Adding {} to the DB ...".format(tup.Name) tup.write() n_added += 1 ## Print a summary: print "Step 2 summary:" if n_added: print "\t{} tuples added.".format(n_added) else: print "\tNo tuples needed to be added." # Step 3: Fetch entries to scan: print "Step 3: Scanning entries ..." entries_dict = {} for kind in a.kinds: entries_dict[kind] = dataset.fetch_entries(kind, a.query) n_entries = sum([len(l) for l in entries_dict.values()]) if n_entries: print "\tThere are {} entries to scan.".format(n_entries) else: print "\tThere were no entries to scan. Your query was the following:\n{}\nkinds = {}".format(a.query, a.kinds) for kind, entries in entries_dict.items(): for entry in entries: # print entry.das # print dataset.set_files(entry, j=j, DAS=entry.das) print "\tFixing {} ...".format(entry.Name) entry.fix() print "\tScanning {} ...".format(entry.Name) entry.scan(j=j) return True