def data_object_names(filename): """Retrieves all data object names from a YODA file.""" data_objects = yoda.readYODA(filename) return [ key for key in data_objects.keys() if not data_objects[key].type in ('Counter', 'Scatter1D') ]
def main(): args = parser.parse_args() histos = yoda.readYODA(args.input) cutflow = histos['/DMHiggsFiducial/Cutflow'] print '{} out of {} events passed the event selection'.format( cutflow.bins[-1].area, cutflow.bins[0].area) efficiency = cutflow.bins[-1].area / cutflow.bins[0].area print 'fiducial efficiency is {}'.format(efficiency)
def getPtHistos(filelist,key): histos = {} for yFile in filelist: histos.setdefault(yFile,{}) objs = yoda.readYODA(yFile) for path, obj in objs.iteritems(): if (not histos[yFile].has_key(path)) and (key+'Z_pt' in path): histos[yFile][path] = obj return histos
def fiducialeff(requestId): resultdir = 'results/{}'.format(requestId) yodafile = '{}/Rivet.yoda'.format(resultdir) histos = yoda.readYODA(yodafile) cutflow = histos['/DMHiggsFiducial/Cutflow'] efficiency = cutflow.bins[-1].area / cutflow.bins[0].area io.Of('/monitor').Emit('efficiency_done_{}'.format(requestId)) return efficiency
def run(self, plotData): """Iterate over files,convert to ROOT and append.""" for index, filename in enumerate(plotData.plotdict['yoda_files']): yoda_objects = yoda.readYODA(filename) root_objects = [yoda.to_root(obj) for obj in yoda_objects.values()] for obj in root_objects: name = obj.GetName().replace("/", "") obj.SetName(name) plotData.plotdict.setdefault("nicks", []).append(name) plotData.plotdict.setdefault("root_objects", {})[name] = obj
def compare(self, yoda_files): """ Compare histograms from different `yoda_files`. """ for yodafile in yoda_files: try: histos = convert_histos(yoda.readYODA(os.path.join(config.get('paths', 'rivet_output'), yodafile))) self._ws.put(['compare_histos', histos]) except IOError: print "Simulation.compare: error reading yoda file {}".format(yodafile)
def bins_from_yoda(path, histogram_name, centered=True): yoda_histos = yoda.readYODA(path) yoda_histo = yoda_histos[histogram_name] if isinstance(yoda_histo, yoda.Scatter2D): return bins_with_object(yoda_histo.points, bin_height_from_yoda_Scatter2D, bin_edges_from_yoda_Scatter2D, left_bin_edge_from_yoda_Scatter2D, right_bin_edge_from_yoda_Scatter2D, centered=centered) else: return bins_with_object(yoda_histo.bins, bin_height_from_yoda_Histo1D, bin_edges_from_yoda_Histo1D, left_bin_edge_from_yoda_Histo1D, right_bin_edge_from_yoda_Histo1D, centered=centered)
def resolve_data_object(filename_or_data_object, name, divide_by=None, rebin_count=1): """Take passed data object or loads a data object from a YODA file, and return it after dividing by divide_by.""" if isinstance(filename_or_data_object, basestring): data_object = yoda.readYODA(filename_or_data_object)[name] else: data_object = filename_or_data_object.clone() if not rebin_count == 1: data_object.rebin(rebin_count) if divide_by is not None: divide_by = resolve_data_object(divide_by, name) if data_object.type == "Histo1D" and divide_by.type == "Histo1D": data_object = data_object.divideBy(divide_by) elif data_object.type == "Scatter2D" or divide_by.type == "Scatter2D": # we make sure that also divide_by is a Scatter2D before using its points property data_object = yoda.mkScatter(data_object) for point, denominator_point in zip(data_object.points, yoda.mkScatter(divide_by).points): if denominator_point.y == 0.0: new_y = 1.0 new_y_errs = [0.0, 0.0] else: new_y = point.y / denominator_point.y new_y_errs = [y_err / denominator_point.y for y_err in point.yErrs] # if new_y == 1.0 and point.yErrs == denominator_point.yErrs: # # assume this is the same data set, so use the same relative error # if denominator_point.y == 0.0: # new_y_errs = [0.0, 0.0] # else: # new_y_errs = [y_err / denominator_point.y for y_err in denominator_point.yErrs] # else: # # assume that we divide through an independent data set, use error propagation # rel_y_errs = [(y_err / point.y + den_y_err / denominator_point.y) # for y_err, den_y_err in zip(point.yErrs, denominator_point.yErrs)] # new_y_errs = [rel_y_err * new_y for rel_y_err in rel_y_errs] point.y = new_y point.yErrs = new_y_errs return data_object
def run(self): with StdRedirect(): # Change to the `output` directory, where the yoda # histogram files will be stored. os.chdir(config.get('paths', 'rivet_output')) rivet.util.check_python_version() rivet.util.set_process_name('rivet') # Add an analysis lib path for extra analyses # (path specified in `config.ini`) rivet.addAnalysisLibPath(config.get('paths', 'analysis_lib')) ah = rivet.AnalysisHandler() ah.setIgnoreBeams(False) ah.addAnalysis(self.analysis) run = rivet.Run(ah) # Retrieve reference histograms if they exist ref_histos = None ref_histos_sent = False try: ref_file = os.path.join(config.get('paths', 'refdata'), "{}.yoda".format(self.analysis)) ref_histos = convert_histos(yoda.readYODA(ref_file)) except IOError: print "No refdata for {}".format(self.analysis) # Initialize if run.init(self.fifofile): evtnum = 0 # Event loop while True: # Pause/resume loop try: msg = self._q.get(False) if msg == 'pause': self._ws.put(['signal', RIV_STP]) while True: msg = self._q.get(True) if msg == 'resume': self._ws.put(['signal', RIV_RUN]) break elif msg == 'stop': break except Queue.Empty: pass # Read and process current event if not run.readEvent() or not run.processEvent(): break evtnum += 1 self._ws.put(['rivet', "Event no. {} processed\n".format(evtnum)]) # Intermediate histograms if evtnum % self.histointerval == 0: now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S%f") yodafile = "{}.yoda".format(now) # Write histograms to yoda file ah.writeData(yodafile) # Read the file with yoda and normalize the histograms histos = convert_histos(yoda.readYODA(yodafile), True) # Delete the file (no need to keep intermediate histogram files) os.unlink(yodafile) self._ws.put(['histos', histos]) if not ref_histos_sent and ref_histos: self._ws.put(['histos', ref_histos]) ref_histos_sent = True self._ws.put(['rivet', "Finished event loop\n"]) # Finalization run.finalize() ah.finalize() now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S%f") yodafile = "final-{}.yoda".format(now) # Write final histograms to yoda file (and keep it) ah.writeData(yodafile) # Read the file with yoda histos = convert_histos(yoda.readYODA(yodafile)) self._ws.put(['histos', histos]) if ref_histos: self._ws.put(['histos', ref_histos]) # Send the yoda file name to the client # (used to later compare different runs of an analysis) self._ws.put(['yoda', "final-{}".format(now)]) self._y.put(yodafile)
import ROOT import yoda, optparse, operator, itertools, sys, copy from math import sqrt parser = optparse.OptionParser(usage=__doc__) parser.add_option('-o', '--output', default='-', dest='OUTPUT_FILE') opts, args = parser.parse_args() action=args[0] filenames=args[1:] ## Put the incoming objects into a dict from each path to a list of histos scatters_in = {} for filename in filenames: for p, ao in yoda.readYODA(filename).iteritems(): if isinstance(ao, yoda.Scatter2D): scatter = ao elif isinstance(ao, yoda.Histo1D): scatter = yoda.Scatter2D(ao.path, ao.title) for bin in ao.bins: scatter.addPoint(yoda.Point2D(bin.xMid, bin.height, 0.5*bin.xWidth, bin.heightErr)) else: print "cannot treat ", ao continue scatters_in.setdefault(ao.path, []).append(scatter) scatters_out = {} for p, scatters in scatters_in.iteritems(): scatters_out[scatters[0].path] = scatters[0].clone()
def run(self): """ Run the simulation if the analysis and update interval were set. Create the `FIFOFile` (in `/tmp`), run PYTHIA and then Rivet. """ if self.analysis == None or self.histointerval == None: self._ws.put(['error', "Missing analysis or histointerval property - nothing done"]) else: p = self.read_cmnd_file() p['_analysis'] = self.analysis db = PythiaDB() # If this analysis has already been run successfully with the supplied # parameters, just retrieve and display the stored results. if db.exists(p): yodafile = db.get_yoda(p) ref_histos = None try: ref_file = os.path.join(config.get('paths', 'refdata'), "{}.yoda".format(self.analysis)) ref_histos = convert_histos(yoda.readYODA(ref_file)) except IOError: print "No refdata for {}".format(self.analysis) try: histos = convert_histos(yoda.readYODA(os.path.join(config.get('paths', 'rivet_output'), yodafile))) self._ws.put(['histos', histos]) if ref_histos: self._ws.put(['histos', ref_histos]) self._ws.put(['yoda', yodafile.partition('.yoda')[0]]) self._ws.put(['signal', SIM_END]) except IOError: self._ws.put(['error', "Unable to retrieve saved histograms"]) self._ws.put(['signal', SIM_ERR]) else: with FIFOFile(self.fifo) as fifofile: # Generate events with PYTHIA self._generate(fifofile) # Small sleep time to allow PYTHIA subprocess to start time.sleep(0.5) # Analyse events with Rivet self._analyse(fifofile) self.rivet.join() # If Rivet does not terminate correctly (because of an exception # in the C code that cannot be caught in Python), kill PYTHIA, # otherwise a lot of "setting badbit" errors would appear in the # console, and PYTHIA wouldn't stop. if self.rivet.exitcode != 0: self._ws.put(['rivet', "Rivet process terminated... Killing PYTHIA\n"]) self.pythia.terminate() self._ws.put(['error', "Rivet error - see console for details"]) self._ws.put(['signal', SIM_ERR]) self.error = True self.pythia.join() # Back to the initial directory (of `main.py`) os.chdir(sys.path[0]) if not self.error: if self.stopped: self._ws.put(['signal', SIM_STP]) else: # Store successful analyses yodafile = self._y.get(True) p['_yoda'] = yodafile db.add(p) self._ws.put(['signal', SIM_END]) with open(os.path.join(sys.path[0], 'rivet_out.log'), 'r') as f: for line in f.readlines(): self._ws.put(['rivet_out', line]) with open(os.path.join(sys.path[0], 'rivet_err.log'), 'r') as f: for line in f.readlines(): self._ws.put(['rivet_err', line])
opts, args = op.parse_args() YODAFILES = args from mpi4py import MPI comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() binids, VVV, aix, aix_flat, central = None, None, None, None, None if rank == 0: # TODO if len(args)==1 and os.path.isdir(args[0]) --- hierarchical reading with pnames finding etc # Let's assume they are all consistent TODO add robustness DATA0 = yoda.readYODA(args[0]) L = sorted(list(DATA0.keys())) names = [x for x in L] # if not "/RAW" in x] central = [x for x in names if not x.endswith("]")] variations = [x for x in names if x.endswith("]")] # TODO In principle one probably should check that all variations are always the # same, we assume this is the case here var = [] for c in central: var.append([x for x in variations if x.startswith(c + "[")]) ## Thats the weight and weight variation order we store the data in VVV = ["CentralWeight"] import re
#!/usr/bin/env python import yoda,sys,re if len(sys.argv) != 3: sys.exit("Usage: yoda-rebin.py input.yoda output.yoda") all_aos = yoda.readYODA(sys.argv[1]) for ao in all_aos.itervalues(): # we want to rebin anything except multiplicity pattern = re.compile(".*GA_00_00.*|.*Mult.*") if not pattern.match(ao.path): ao.rebin(5) yoda.writeYODA(all_aos,sys.argv[2])
#!/usr/bin/env python import numpy as np import matplotlib.pyplot as plt import math import sys import os import shutil import itertools import yoda con = 0 analysisobjects = yoda.readYODA("LHCpp_r04.yoda") for ao in analysisobjects: for bin in ao.bins: print bin.sumW
def readHistogram(distribution, binRange, leftBinEdges, binWidths, scalingReversed, overallScaling): fileName = distribution["file"] print("Read " + fileName) if isDat(distribution): data = numpy.loadtxt(fileName) column = readConfiguration(distribution, key='column', default=2) if binRange[1] == 0: # Assume we want all bins binRange[1] = data.shape[0] binHeights = data[binRange[0]:binRange[0]+binRange[1], column] if len(leftBinEdges) == 0 and column > 1 and data.shape[1] > 2: # Assume that the first two columns give us the bin edges for edges in data[binRange[0]:binRange[0]+binRange[1], :2]: leftBinEdges.append(edges[0]) binWidths.append(edges[1] - edges[0]) elif isYODA(distribution): yodaHistos = yoda.readYODA(fileName) try: histogramName = distribution["histogramName"] except KeyError: # The histogram index is not explicitly given histogramName = "" if binRange[1] > 0: # We already have a valid bin range # Try to find a matching histogram for yodaHistoName in yodaHistos: yodaHisto = yodaHistos[yodaHistoName] if (isinstance(yodaHisto, yoda.core.Histo1D)): lastBin = binRange[0] + binRange[1] if (len(yodaHisto.bins) >= lastBin): histogramName = yodaHistoName continue if len(histogramName) == 0: raise Exception("There is no YODA histogram in " + fileName + " which has enough bins " + "to accommodate the bin range") yodaHistoBins = yodaHistos[histogramName].bins if binRange[1] == 0: binRange[1] = len(yodaHistoBins) yodaValues = [bin.height for bin in yodaHistoBins] binHeights = yodaValues[binRange[0]:binRange[0]+binRange[1]] if scalingReversed: # Manually read out scale_factor (there seems to be no interface in yoda) print(histogramName) scale_factor = None with open(fileName) as f: is_correct_histogram = False for line in f: if is_correct_histogram and line[:9] == 'ScaledBy=': scale_factor = float(line[9:]) break if line[-29:-1] == 'D0_2008_S7554427/' + 'd01-x01-y01': is_correct_histogram = True assert(scale_factor is not None) print(yodaHistos[histogramName].numEntries()) scale_factor = scale_factor * yodaHistos[histogramName].numEntries() / 100000000.0 print('Division by ', scale_factor) binHeights = [h / scale_factor for h in binHeights] print('Multiplication by ', overallScaling) binHeights = [h * overallScaling for h in binHeights] if len(leftBinEdges) == 0: leftBinEdges = [bin.xEdges[0] for bin in yodaHistoBins] binWidths = [bin.xEdges[1] - bin.xEdges[0] for bin in yodaHistoBins] else: raise Exception("Unknown file extension detected while parsing" " distribution file names.") scaledBy = readConfiguration(distribution, key='scaledBy', default=None) if args.normalize: normalizationFactor = sum(binHeights) binHeights = [h / normalizationFactor for h in binHeights] elif scaledBy is not None: binHeights = [h / scaledBy for h in binHeights] return binRange, leftBinEdges, binWidths, binHeights
def data_object_names(filename): """Retrieves all data object names from a YODA file.""" data_objects = yoda.readYODA(filename) return [key for key in data_objects.keys() if not data_objects[key].type in ('Counter', 'Scatter1D')]
#!/usr/bin/env python import yoda, sys, re if len(sys.argv) != 3: sys.exit("Usage: yoda-rebin.py input.yoda output.yoda") all_aos = yoda.readYODA(sys.argv[1]) for ao in all_aos.itervalues(): # we want to rebin anything except multiplicity pattern = re.compile(".*GA_00_00.*|.*Mult.*") if not pattern.match(ao.path): ao.rebin(5) yoda.writeYODA(all_aos, sys.argv[2])
def resolve_data_object( filename_or_data_object, name, divide_by=None, multiply_by=None, subtract_by=None, assume_correlated=False, use_correlated_division=None, # this is only for backwards-compatibility rebin_count=1, rebin_begin=0): """Take passed data object or loads a data object from a YODA file, and return it after dividing (or multiplying) by divide_by (multiply_by).""" if use_correlated_division is not None: assume_correlated = use_correlated_division print( "Heppyplotlib deprecation warning: Use assume_correlated instead of use_correlated_division" ) if isinstance(filename_or_data_object, str): data_object = yoda.readYODA(filename_or_data_object)[name] else: data_object = filename_or_data_object.clone() if not rebin_count == 1: if data_object.type == "Histo1D": data_object.rebin(rebin_count, begin=rebin_begin) else: print( "WARNING: Will assume statistical errors for rebinning a scatter plot" ) x_coords = [point.x for point in data_object.points] y_coords = get_scatter2d_y_coords(data_object) x_errs = [] x_errs.append([point.xErrs[0] for point in data_object.points]) x_errs.append([point.xErrs[1] for point in data_object.points]) if not are_points_with_errors_adjacent(x_coords, x_errs): raise Exception( "Points must be adjacent for interpreting the scatter plots as a histogram" ) new_points = data_object.points[0:rebin_begin] i = 0 while rebin_begin + i * rebin_count < len(data_object.points) - 1: first_index = rebin_begin + i * rebin_count last_index = min(first_index + rebin_count, len(data_object.points)) points = data_object.points[first_index:last_index] left_edge = points[0].x - points[0].xErrs[0] right_edge = points[-1].x + points[-1].xErrs[1] length = right_edge - left_edge new_x = left_edge + length / 2.0 new_xerrs = length / 2.0 new_y = 0.0 new_yerrs = np.array([0.0, 0.0]) for point in points: left_edge = point.x - point.xErrs[0] right_edge = point.x + point.xErrs[1] new_y += (right_edge - left_edge) * point.y new_yerrs += ((right_edge - left_edge) * np.array(point.yErrs))**2 new_y /= length new_yerrs = np.sqrt(new_yerrs) / length new_points.append( yoda.Point2D(x=new_x, y=new_y, xerrs=new_xerrs, yerrs=new_yerrs)) i = i + 1 data_object = yoda.Scatter2D(path=data_object.path, title=data_object.title) for point in new_points: data_object.addPoint(point) if subtract_by is not None: data_object = yoda.mkScatter(data_object) operand = resolve_data_object(subtract_by, name).mkScatter() for point, operand_point in zip(data_object.points, operand.points): new_y = point.y - operand_point.y if assume_correlated: new_y_errs = [y_err - operand_point.y for y_err in point.yErrs] if not assume_correlated: # assume that we subtract an independent data set, use error propagation new_y_errs = [] for y_err, operand_y_err in zip(point.yErrs, operand_point.yErrs): err2 = 0.0 if point.y != 0.0: err2 += (y_err)**2 err2 += (operand_y_err)**2 new_y_errs.append(np.sqrt(err2)) point.y = new_y point.yErrs = new_y_errs if divide_by is not None or multiply_by is not None: data_object = yoda.mkScatter(data_object) if isinstance(divide_by, float) or isinstance(multiply_by, float): for point in data_object.points: if divide_by is not None: new_y = point.y / divide_by new_y_errs = [y_err / divide_by for y_err in point.yErrs] else: new_y = point.y * multiply_by new_y_errs = [y_err * multiply_by for y_err in point.yErrs] point.y = new_y point.yErrs = new_y_errs else: if divide_by is not None: operand = resolve_data_object(divide_by, name).mkScatter() else: operand = resolve_data_object(multiply_by, name).mkScatter() for point, operand_point in zip(data_object.points, operand.points): if operand_point.y == 0.0: if divide_by is not None: new_y = 1.0 else: new_y = 0.0 new_y_errs = [0.0, 0.0] else: if divide_by is not None: new_y = point.y / operand_point.y if assume_correlated: new_y_errs = [ y_err / operand_point.y for y_err in point.yErrs ] else: new_y = point.y * operand_point.y if assume_correlated: new_y_errs = [ y_err * operand_point.y for y_err in point.yErrs ] if not assume_correlated: # assume that we divide/multiply through an independent data set, use error propagation rel_y_errs = [] for y_err, operand_y_err in zip( point.yErrs, operand_point.yErrs): err2 = 0.0 if point.y != 0.0: err2 += (y_err / point.y)**2 err2 += (operand_y_err / operand_point.y)**2 rel_y_errs.append(np.sqrt(err2)) new_y_errs = [ rel_y_err * new_y for rel_y_err in rel_y_errs ] point.y = new_y point.yErrs = new_y_errs return data_object
def fetch(filename): if len(filename)<1: print "You did not provide a filename!" return [] return yoda.readYODA(filename)
""" import yoda, os, sys, optparse from yoda.script_helpers import parse_x2y_args import yoda, os, sys, optparse from yoda.script_helpers import parse_x2y_args, filter_aos #parse arguments parser = optparse.OptionParser(usage=__doc__) parser.add_option("-m", "--match", dest="MATCH", metavar="PATT", default=None, help="Only write out histograms whose path matches this regex") parser.add_option("-M", "--unmatch", dest="UNMATCH", metavar="PATT", default=None, help="Exclude histograms whose path matches this regex") opts, args = parser.parse_args() in_out = parse_x2y_args(args, ".yoda",".dat") #check arguments if not in_out: sys.stderr.write("You must specify the FLAT and YODA file names\n") sys.exit(1) cmssw_base=os.environ["CMSSW_BASE"] for i, o in in_out: analysisobjects = yoda.readYODA(i) filter_aos(analysisobjects, opts.MATCH, opts.UNMATCH) yoda.writeAIDA(analysisobjects, o) os.system('python %s/src/UserCode/RivetAnalysis/scripts/aida2root %s'%(cmssw_base,o)) os.system('mv %s %s'%(o,o.replace('.dat','.root')))
elif ao.type() == "Counter": temp = ["{}#{}".format(base, 0)] else: suffixes = ["T", "O", "U"] if ao.type() == "Counter": suffixes.append(0) else: suffixes.extend([i for i in range(len(ao))]) temp = ["{}#{}".format(base, s) for s in suffixes] binids.extend(temp) print(binids[0]) return binids D = yoda.readYODA(sys.argv[1]) # print(list(D.keys())) L = sorted(list(D.keys())) import re # p = re.compile("^\/RAW") names = [x for x in L] # if not "/RAW" in x] central = [x for x in names if not x.endswith("]")] variations = [x for x in names if x.endswith("]")] # In principle one probably should check that all variations are always the # same, we assume this is the case vere here var = [] for c in central: var.append([x for x in variations if x.startswith(c + "[")])