def parallelize(function, arguments_list, n_processes=1, description=None): if n_processes <= 1: results = [] for arguments in pi.ProgressIterator( arguments_list, description=(description if description else "calling " + str(function))): results.append(function(arguments)) return results else: pool = multiprocessing.Pool( processes=max(1, min(n_processes, len(arguments_list)))) results = pool.map_async(function, arguments_list, chunksize=1) n_tasks = len(arguments_list) left = n_tasks - 1 progress_iterator = pi.ProgressIterator( range(n_tasks), description=(description if description else "calling " + str(function))) progress_iterator.next() while (True): ready = results.ready() remaining = results._number_left if ready or (remaining < left): for i in range(left - remaining): progress_iterator.next() left = remaining if ready: break time.sleep(1.0) returnvalue = results.get(9999999) pool.close() # necessary to actually terminate the processes pool.join( ) # without these two lines, they happen to live until the whole program terminates return returnvalue
def main(): parser = argparse.ArgumentParser( description="Merge Artus outputs per nick name.", parents=[logger.loggingParser]) parser.add_argument( "project_dir", help= "Artus Project directory containing the files \"output/*/*.root\" to merge" ) parser.add_argument( "-n", "--n-processes", type=int, default=1, help="Number of (parallel) processes. [Default: %(default)s]") parser.add_argument( "--output-dir", help="Directory to store merged files. Default: Same as project_dir.") args = parser.parse_args() logger.initLogger(args) output_dirs = glob.glob(os.path.join(args.project_dir, "output/*")) nick_names = [ nick for nick in [output_dir[output_dir.rfind("/") + 1:] for output_dir in output_dirs] if not ".tar.gz" in nick ] outputs_per_nick = { nick: glob.glob(os.path.join(args.project_dir, "output", nick, "*.root")) for nick in nick_names } # drop potentially existing SvfitCaches from the filelist for nick, files in outputs_per_nick.iteritems(): outputs_per_nick[nick] = [ file for file in files if ("SvfitCache" not in file) ] outputs_per_nick = { nick: files for nick, files in outputs_per_nick.iteritems() if len(files) > 0 } commands = [] for nick_name, output_files in pi.ProgressIterator( outputs_per_nick.iteritems(), length=len(outputs_per_nick), description="Merging Artus outputs"): merged_dir = os.path.join( args.project_dir if (args.output_dir == None) else args.output_dir, "merged", nick_name) if not os.path.exists(merged_dir): os.makedirs(merged_dir) commands.append("hadd.py -a \" -f\" -t %s \"%s\"" % (os.path.join( merged_dir, nick_name + ".root"), " ".join(output_files))) tools.parallelize(_call_command, commands, n_processes=args.n_processes)
def merge_local(args): outputs_per_nick = folders_to_merge(args) if (args.project_subdir != None): # keep only single path outputs_per_nick = { args.project_subdir: outputs_per_nick[args.project_subdir] } # drop potentially existing SvfitCaches from the filelist for nick, files in outputs_per_nick.iteritems(): outputs_per_nick[nick] = [ file for file in files if ("SvfitCache" not in file) ] outputs_per_nick = { nick: files for nick, files in outputs_per_nick.iteritems() if len(files) > 0 } hadd_arguments = [] for nick_name, output_files in pi.ProgressIterator( outputs_per_nick.iteritems(), length=len(outputs_per_nick), description="Merging Artus outputs"): merged_dir = os.path.join( args.project_dir[0] if (args.output_dir == None) else args.output_dir, "merged", nick_name) if not os.path.exists(merged_dir): os.makedirs(merged_dir) target_filename = os.path.join(merged_dir, nick_name + ".root") if (args.project_subdir != None): target_filename = "merged.root" hadd_arguments.append({ "target_file": target_filename, "source_files": output_files, "hadd_args": " -f ", "max_files": 500 }) tools.parallelize(hadd2, hadd_arguments, n_processes=args.n_processes, description="Merging Artus outputs")
def get_events_list_from_tree(tree, branch_names, selection): if isinstance(branch_names, basestring): branch_names = [branch_names] events = [] # get list of entry numbers which pass selection entrylist_name = tree.GetName() + "entrylist" tree.Draw(">>" + entrylist_name, selection) root_entrylist = ROOT.gDirectory.Get(entrylist_name) entrylist = [ root_entrylist.GetEntry(i) for i in range(root_entrylist.GetN()) ] for entry in pi.ProgressIterator( entrylist, description="Reading event numbers from tree"): tree.GetEntry(entry) event = [] for branch_name in branch_names: event.append(getattr(tree, branch_name)) events.append(tuple(event)) return events
def main(): parser = argparse.ArgumentParser( description="Add friend to tree in file with constant values.", parents=[logger.loggingParser]) parser.add_argument( "files", nargs="+", help="Files containing the tree. The files will be updated.") parser.add_argument("-t", "--tree", required=True, help="Path to the tree object in the file.") parser.add_argument( "--values", nargs="+", required=True, help= "Values to add to the friend tree. Each value gets a separate branch.") parser.add_argument("-b", "--branches", nargs="+", default=[None], help="Branch names.") args = parser.parse_args() logger.initLogger(args) if len(args.branches) < len(args.values): args.branches = (args.branches + (len(args.values) * [None]))[:len(args.values)] args.branches = [ "annotation%d" % i if b is None else b for i, b in enumerate(args.branches) ] types = { bool: "O", int: "I", float: "D", str: "C", } value_types = [] for index, value in enumerate(args.values): try: value = int(value) value_types.append(int) except: try: value = float(value) value_types.append(float) except: try: # TODO: check if type is bool #value = bool(value) #value_types.append(bool) raise Exception() except: # TODO: find way to branch strings #value_types.append(str) raise Exception() args.values[index] = value log.info("New branches:") for index, (branch, value_type, value) in enumerate( zip(args.branches, value_types, args.values)): log.info("\t%s/%s = %s" % (branch, types[value_type], str(value))) for file_name in progressiterator.ProgressIterator( args.files, description="Processing files"): with tfilecontextmanager.TFileContextManager(file_name, "UPDATE") as root_file: tree = root_file.Get(args.tree) dir_name = os.path.dirname(args.tree) if not dir_name == "": root_file.Get(dir_name) elements = zip( *roottools.RootTools.walk_root_directory(root_file))[-1] friend_tree_name = None n_trials = 0 while friend_tree_name is None: tmp_friend_tree_name = (tree.GetName() + "_friend_" + str(n_trials)).rstrip("_0") if not tmp_friend_tree_name in elements: friend_tree_name = tmp_friend_tree_name n_trials += 1 friend_tree = ROOT.TTree(friend_tree_name, tree.GetTitle() + " (friend)") values = [] for branch, value_type, value in zip(args.branches, value_types, args.values): values.append(numpy.zeros(1, dtype=value_type)) values[-1][0] = value friend_tree.Branch(branch, values[-1], "%s/%s" % (branch, types[value_type])) for entry in xrange(tree.GetEntries()): friend_tree.Fill() friend_tree.AddFriend(tree, tree.GetName()) tree.AddFriend(friend_tree, friend_tree.GetName()) root_file.Write()
def run(self, plotData): root_tools = roottools.RootTools() self.hide_progressbar = plotData.plotdict["hide_progressbar"] del (plotData.plotdict["hide_progressbar"]) for index, ( root_files, folders, x_expression, y_expression, z_expression, weight, x_bins, y_bins, z_bins, nick, friend_files, friend_folders, friend_alias, option ) in enumerate( pi.ProgressIterator(zip( plotData.plotdict["files"], plotData.plotdict["folders"], [ self.expressions.replace_expressions(expression) for expression in plotData.plotdict["x_expressions"] ], [ self.expressions.replace_expressions(expression) for expression in plotData.plotdict["y_expressions"] ], [ self.expressions.replace_expressions(expression) for expression in plotData.plotdict["z_expressions"] ], [ self.expressions.replace_expressions(expression) for expression in plotData.plotdict["weights"] ], plotData.plotdict["x_bins"], plotData.plotdict["y_bins"], plotData.plotdict["z_bins"], plotData.plotdict["nicks"], plotData.plotdict["friend_files"], plotData.plotdict["friend_folders"], plotData.plotdict["friend_aliases"], plotData.plotdict["tree_draw_options"]), description="Reading ROOT inputs", visible=not self.hide_progressbar)): # check whether to read from TTree or from TDirectory root_folder_type = roottools.RootTools.check_type( root_files, folders, print_quantities=plotData.plotdict["quantities"]) root_tree_chain = None root_histogram = None if root_folder_type == "TTree": variable_expression = "%s%s%s" % ( z_expression + ":" if z_expression else "", y_expression + ":" if y_expression else "", x_expression) root_tree_chain, root_histogram = root_tools.histogram_from_tree( root_files, folders, x_expression, y_expression, z_expression, x_bins=["25"] if x_bins is None else x_bins, y_bins=["25"] if y_bins is None else y_bins, z_bins=["25"] if z_bins is None else z_bins, weight_selection=weight, option=option, name=None, friend_files=friend_files, friend_folders=friend_folders, friend_alias=friend_alias) elif root_folder_type == "TDirectory": if x_expression is None: log.error('No x_expression provided.') sys.exit(1) root_objects = [ os.path.join(folder, x_expression) for folder in folders ] root_histogram = roottools.RootTools.histogram_from_file( root_files, root_objects, x_bins=x_bins, y_bins=y_bins, z_bins=z_bins, name=None) if hasattr(root_histogram, "Sumw2"): root_histogram.Sumw2() else: log.critical("Error getting ROOT object from file. Exiting.") sys.exit(1) log.debug("Input object %d (nick %s):" % (index, nick)) if log.isEnabledFor(logging.DEBUG): root_histogram.Print() # save tree (chain) in plotData merging chains with same nick names if (not root_tree_chain is None ) and plotData.plotdict["keep_trees"]: if nick in plotData.plotdict.setdefault("root_trees", {}): plotData.plotdict["root_trees"][nick].Add(root_tree_chain) else: plotData.plotdict["root_trees"][nick] = root_tree_chain # save histogram in plotData # merging histograms with same nick names is done in upper class plotData.plotdict.setdefault("root_objects", {}).setdefault( nick, []).append(root_histogram) # run upper class function at last super(InputRoot, self).run(plotData)
def main(): parser = argparse.ArgumentParser(description="Sort trees.", parents=[logger.loggingParser]) parser.add_argument("inputs", nargs="+", help="Input files containing the tree to sort.") parser.add_argument("-t", "--tree", required=True, help="Path to the tree object in the file.") parser.add_argument("-b", "--branches", nargs="+", default=["run", "lumi", "event"], help="Branch names to be considered for the sorting.") parser.add_argument("-o", "--output", default="output.root", help="Output ROOT file.") args = parser.parse_args() logger.initLogger(args) args.branches = args.branches[:4] # https://root.cern.ch/root/roottalk/roottalk01/3646.html log.info("Opening input from") input_tree = ROOT.TChain() for input_file in args.inputs: path = os.path.join(input_file, args.tree) log.info("\t" + path) input_tree.Add(path) input_tree.SetCacheSize(128 * 1024 * 1024) n_entries = input_tree.GetEntries() values = [[] for index in xrange(len(args.branches))] n_entries_per_iteration = 10000000 # larger buffers make problems for iteration in progressiterator.ProgressIterator( range(int(math.ceil(n_entries / float(n_entries_per_iteration)))), description="Retrieving branch values for sorting"): cut = "(Entry$>=({i}*{n}))*(Entry$<(({i}+1)*{n}))".format( i=iteration, n=n_entries_per_iteration) input_tree.Draw(":".join(args.branches), cut, "goff") buffers = [ input_tree.GetV1(), input_tree.GetV2(), input_tree.GetV3(), input_tree.GetV4() ][:len(args.branches)] for index, input_buffer in enumerate(buffers): values[index].extend( list( numpy.ndarray(input_tree.GetSelectedRows(), dtype=numpy.double, buffer=input_buffer))) log.info("Sorting of the tree entry indices...") values = zip(*([range(n_entries)] + values)) values.sort(key=lambda value: value[1:]) sorted_entries = list(zip(*values)[0]) log.info("Creating output " + args.output + "...") with tfilecontextmanager.TFileContextManager(args.output, "RECREATE") as output_file: output_tree = input_tree.CloneTree(0) for entry in progressiterator.ProgressIterator( sorted_entries, description="Copying tree entries"): input_tree.GetEntry(entry) output_tree.Fill() output_file.Write() log.info("Save sorted tree in " + os.path.join(args.output, args.tree) + ".")
def main(): parser = argparse.ArgumentParser( description= "Check the validity of Artus outputs in a project directory.", parents=[logger.loggingParser]) parser.add_argument("project_directory", help="Artus project directory") parser.add_argument("--dry-run", help="Only print problematic job numbers", default=False, action="store_true") parser.add_argument("--no-resubmission", help="Only print and invalidate problematic jobs", default=False, action="store_true") args = parser.parse_args() logger.initLogger(args) # GC settings artus_gc_config = os.path.join(args.project_directory, "grid-control_config.conf") n_artus_jobs = 0 with gzip.open( os.path.join(args.project_directory, "workdir/params.map.gz"), "rb") as gc_params_map: n_artus_jobs = int(gc_params_map.read().strip().rstrip()) # locate artus outputs artus_outputs = {} artus_root_outputs = glob.glob( os.path.join(args.project_directory, "output/*/*_job_*_output.root")) for artus_root_output in progressiterator.ProgressIterator( artus_root_outputs, description="Locating Artus ROOT output"): artus_outputs.setdefault( int( re.search(".*_job_(?P<job_number>\d+)_output.root", artus_root_output).groupdict().get("job_number")), {})["root"] = artus_root_output artus_log_outputs = glob.glob( os.path.join(args.project_directory, "output/*/*_job_*_log.log")) for artus_log_output in progressiterator.ProgressIterator( artus_log_outputs, description="Locating Artus log output"): artus_outputs.setdefault( int( re.search(".*_job_(?P<job_number>\d+)_log.log", artus_log_output).groupdict().get("job_number")), {})["log"] = artus_log_output failed_job_numbers = [] # check existance of all files for job_number in progressiterator.ProgressIterator( xrange(n_artus_jobs), description="Check existance of all Artus outputs"): if ((artus_outputs.get(job_number) is None) or (artus_outputs.get(job_number).get("root") is None) or (artus_outputs.get(job_number).get("log") is None)): failed_job_numbers.append(str(job_number)) # check validity of ROOT files for job_number, outputs in progressiterator.ProgressIterator( artus_outputs.items(), description="Check validity of Artus ROOT outputs"): with tfilecontextmanager.TFileContextManager(outputs["root"], "READ") as root_file: # https://root.cern.ch/root/roottalk/roottalk02/4340.html if root_file.IsZombie() or root_file.TestBit( ROOT.TFile.kRecovered): failed_job_numbers.append(str(job_number)) else: elements = roottools.RootTools.walk_root_directory(root_file) if len(elements) <= 1: failed_job_numbers.append(str(job_number)) if len(failed_job_numbers) == 0: log.info("No problematic Artus outputs found.") else: gc_reset_command = "go.py --reset id:" + ( ",".join(failed_job_numbers)) + " " + artus_gc_config log.info(gc_reset_command) if not args.dry_run: logger.subprocessCall(shlex.split(gc_reset_command)) if not args.no_resubmission: gc_run_command = "go.py " + artus_gc_config log.info(gc_run_command) logger.subprocessCall(shlex.split(gc_run_command))
def run(self, plotData): for index, ( nick, x_bins, x_values, x_errors, x_errors_up, y_bins, y_values, y_errors, y_errors_up, z_bins, z_values, z_errors, weights, ) in enumerate(pi.ProgressIterator(zip(*[plotData.plotdict[key] for key in [ "nicks", "x_bins", "x_expressions", "x_errors", "x_errors_up", "y_bins", "y_expressions", "y_errors", "y_errors_up", "z_bins", "z_expressions", "z_errors", "weights", ]]), description="Reading inputs")): # prepare unique name name_hash = hashlib.md5("_".join([str(item) for item in [nick, x_bins, x_values, x_errors, x_errors_up, y_bins, y_values, y_errors, y_errors_up, z_bins, z_values, z_errors, weights]])).hexdigest() # determine mode create_function = False create_graph = False create_histogram = False if x_bins is None: if len(y_values) == 0: create_function = True else: create_graph = True else: if len(x_values) == 1: create_function = True else: create_histogram = True if create_function: # prepare binning default_binning = ["100,-1e3,1e3"] if x_bins is None: x_bins = copy.deepcopy(default_binning) if y_bins is None: y_bins = copy.deepcopy(default_binning) if z_bins is None: z_bins = copy.deepcopy(default_binning) x_bins = [float(x) for x in x_bins[0].split(",")] y_bins = [float(y) for y in y_bins[0].split(",")] z_bins = [float(z) for z in z_bins[0].split(",")] expression = " ".join([str(x_value) for x_value in x_values]) formula = ROOT.TFormula("formula_"+name_hash, expression) function_class = None function_class_name = "" lim_args = [] if formula.GetNdim() >= 1: function_class = ROOT.TF1 function_class_name = "ROOT.TF1" lim_args.extend(x_bins[1:]) if formula.GetNdim() >= 2: function_class = ROOT.TF2 function_class_name = "ROOT.TF2" lim_args.extend(y_bins[1:]) if formula.GetNdim() >= 3: function_class = ROOT.TF3 function_class_name = "ROOT.TF3" lim_args.extend(z_bins[1:]) log.debug(function_class_name+"(function_"+name_hash+", "+expression+", "+(", ".join([str(lim) for lim in lim_args]))+")") root_function = function_class("function_"+name_hash, expression, *lim_args) if formula.GetNdim() >= 1: root_function.SetNpx(int(x_bins[0])) if formula.GetNdim() >= 2: root_function.SetNpy(int(y_bins[0])) if formula.GetNdim() >= 3: root_function.SetNpz(int(z_bins[0])) plotData.plotdict.setdefault("root_objects", {})[nick] = root_function elif create_graph: if len(z_values) == 0: if len(x_errors_up) == 0 and len(y_errors_up) == 0: log.debug("ROOT.TGraphErrors("+ str(len(x_values))+", "+ str(array.array("d", x_values))+", "+str(array.array("d", y_values))+", "+ str(array.array("d", x_errors))+", "+str(array.array("d", y_errors))+")" ) plotData.plotdict.setdefault("root_objects", {})[nick] = ROOT.TGraphErrors( len(x_values), array.array("d", x_values), array.array("d", y_values), array.array("d", x_errors), array.array("d", y_errors) ) else: log.debug("ROOT.TGraphAsymmErrors("+ str(len(x_values))+", "+ str(array.array("d", x_values))+", "+str(array.array("d", y_values))+", "+ str(array.array("d", x_errors))+", "+str(array.array("d", x_errors_up))+", "+ str(array.array("d", y_errors))+", "+str(array.array("d", y_errors_up))+")" ) plotData.plotdict.setdefault("root_objects", {})[nick] = ROOT.TGraphAsymmErrors( len(x_values), array.array("d", x_values), array.array("d", y_values), array.array("d", x_errors), array.array("d", x_errors_up), array.array("d", y_errors), array.array("d", y_errors_up) ) else: log.debug("ROOT.TGraph2DErrors("+ str(len(x_values))+", "+ str(array.array("d", x_values))+", "+str(array.array("d", y_values))+", "+str(array.array("d", z_values))+", "+ str(array.array("d", x_errors))+", "+str(array.array("d", y_errors))+", "+str(array.array("d", z_errors))+")" ) plotData.plotdict.setdefault("root_objects", {})[nick] = ROOT.TGraph2DErrors( len(x_values), array.array("d", x_values), array.array("d", y_values), array.array("d", z_values), array.array("d", x_errors), array.array("d", y_errors), array.array("d", z_errors) ) plotData.plotdict["root_objects"][nick].SetName("graph_"+name_hash) plotData.plotdict["root_objects"][nick].SetTitle("") elif create_histogram: root_histogram = roottools.RootTools.create_root_histogram( x_bins=x_bins, y_bins=y_bins, z_bins=z_bins, profile_histogram=False, name="histogram_"+name_hash ) if root_histogram.GetDimension() == 1: if len(y_values) == 0: log.debug("ROOT.TH1.FillN("+str(len(x_values))+", "+str(array.array("d", x_values))+", "+str(array.array("d", weights))+")") root_histogram.FillN(len(x_values), array.array("d", x_values), array.array("d", weights)) else: set_bin_errors = any([bin_error != 0.0 for bin_error in y_errors]) log.debug("ROOT.TH1.SetBinContent/SetBinError(<"+str(x_values)+", "+str(y_values)+", "+str(y_errors)+">)") for x_value, y_value, bin_error in zip(x_values, y_values, y_errors): global_bin = root_histogram.FindBin(x_value) root_histogram.SetBinContent(global_bin, y_value) if set_bin_errors: root_histogram.SetBinError(global_bin, bin_error) elif root_histogram.GetDimension() == 2: if len(z_values) == 0: log.debug("ROOT.TH1.FillN("+str(len(x_values))+", "+str(array.array("d", x_values))+", "+str(array.array("d", y_values))+", "+str(array.array("d", weights))+")") root_histogram.FillN(len(x_values), array.array("d", x_values), array.array("d", y_values), array.array("d", weights)) else: set_bin_errors = any([bin_error != 0.0 for bin_error in z_errors]) log.debug("ROOT.TH1.SetBinContent/SetBinError(<"+str(x_values)+", "+str(y_values)+", "+str(z_values)+", "+str(z_errors)+">)") for x_value, y_value, z_value, bin_error in zip(x_values, y_values, z_values, z_errors): global_bin = root_histogram.FindBin(x_value, y_value) root_histogram.SetBinContent(global_bin, z_value) if set_bin_errors: root_histogram.SetBinError(global_bin, bin_error) elif root_histogram.GetDimension() == 3: log.debug("ROOT.TH1.FillN("+str(len(x_values))+", "+str(array.array("d", x_values))+", "+str(array.array("d", y_values))+", "+str(array.array("d", z_values))+", "+str(array.array("d", weights))+")") root_histogram.FillN(len(x_values), array.array("d", x_values), array.array("d", y_values), array.array("d", z_values), array.array("d", weights)) plotData.plotdict.setdefault("root_objects", {})[nick] = root_histogram # run upper class function at last super(InputInteractive, self).run(plotData)
os.path.join(output_dir, "125")) cv_cf_scan_plot_configs[0].setdefault("nicks", []).append("2d_hist_8TeV") cv_cf_scan_plot_configs[0].setdefault("2d_histogram_nicks", []).append( cv_cf_scan_plot_configs[0]["nicks"][-1]) cv_cf_scan_plot_configs[0].setdefault("contour_graph_nicks", []).append("contour_8TeV") cv_cf_scan_plot_configs[0].setdefault("labels", []).append(" 8 TeV, 20/fb") cv_cf_scan_plot_configs[0]["colors"] = [ "#bf2229", "#b2e4d1", "#7fd2b8", "#4cc1a5", "#00a88f" ] cv_cf_scan_plot_configs[0][ "filename"] = "cv_cf_scan_1sigma_over_lumi_8_13_TeV" cv_cf_scan_plot_configs[1][ "filename"] = "cv_cf_scan_1sigma_over_lumi_13_TeV" for lumi in progressiterator.ProgressIterator( args.lumis, description="Process projections"): output_dir = os.path.join(args.output_dir, "13TeV", "lumi_{l}".format(l=lumi)) if not args.plots_only: clear_output_dir(output_dir, args.print_only) # datacards command = "SMLegacyDatacards {channels} --output {output_dir} --asimov --asimov-mass 125 --energy 13 --lumi {l} --masses 125".format( channels=channels, output_dir=output_dir, l=lumi) log.info(command) if not args.print_only: logger.subprocessCall(shlex.split(command)) # limits do_limits(output_dir, args.print_only)