def combine(files, rivet_path, error_calc, rebin_count=None, rebin_counts=None): """Combine files[1]/rivet_path, files[2]/rivet_path, ... using an error_calc function from the heppyplotlib.error_calc module and return a YODA data object. files[0] is supposed to be the CV data set. """ if rebin_count is not None and rebin_counts is not None: raise Exception("Only use one of the options 'rebin_count' and 'rebin_counts'.") elif rebin_count is not None: rebin_counts = [rebin_count] * len(files) elif rebin_counts is None: rebin_counts = [1] * len(files) import yoda from . import yodaplot y_coord_list = [] for file_name, rebin_count in zip(files, rebin_counts): data_object = yodaplot.resolve_data_object(file_name, rivet_path, rebin_count=rebin_count) y_coord_list.append(yodaplot.get_y_coords(data_object)) errs = error_calc(y_coord_list) # make sure we are dealing with a scatter object to have the correct notion of errors scatter = yoda.mkScatter(yodaplot.resolve_data_object(files[0], rivet_path, rebin_count=rebin_counts[0])) for point, point_errs in zip(scatter.points, zip(*errs)): point.yErrs = point_errs return scatter
def __init__(self, anaObj, xSec, nEv): # Construct with an input yoda aos and a scatter1D for the cross section and nEv self.signal = anaObj self.xsec = xSec self.nev = nEv # Initialize the members we always want to access self._background = False self.ref = False self.stack = yoda.Scatter2D self.lumi = 1 self.isScaled = False self.scaleFactorData = 1 self.scaleFactorSig = 1 self.conturPoints = [] self.mcLumi = 0.0 self.scaleMC = 1.0 # Call the internal functions on initialization # to fill the above members with what we want, these should all be private self.__getData() self.__getAux() self.__getMC() self.__getisScaled() if self.__has1D(): self.signal = yoda.mkScatter(self.signal) # build stack for plotting # self.__buildStack() if self.ref: self.__doScale() self.__fillPoints()
def init_ref(): """Function to load all reference data and theory *.yoda data""" refFiles = [] global scaledYet print "Gathering all reference Data (and Theory, if available)" rivet_data_dirs = rivet.getAnalysisRefPaths() for dirs in rivet_data_dirs: import glob refFiles.append(glob.glob(os.path.join(dirs, '*.yoda'))) for fileList in refFiles: for f in fileList: aos = yoda.read(f) for path, ao in aos.iteritems(): if ao.type != "Scatter2D": ao = yoda.mkScatter(ao) if ao.type == "Scatter1D": ao = util.mkScatter2D(ao) if path.startswith('/REF/'): refObj[path] = ao scaledYet[path] = False if path.startswith('/THY/'): refObj[path] = ao scaledYet[path] = False global REFLOAD REFLOAD = True
def resolve_data_object(filename_or_data_object, name, divide_by=None, rebin_count=1): """Take passed data object or loads a data object from a YODA file, and return it after dividing by divide_by.""" if isinstance(filename_or_data_object, basestring): data_object = yoda.readYODA(filename_or_data_object)[name] else: data_object = filename_or_data_object.clone() if not rebin_count == 1: data_object.rebin(rebin_count) if divide_by is not None: divide_by = resolve_data_object(divide_by, name) if data_object.type == "Histo1D" and divide_by.type == "Histo1D": data_object = data_object.divideBy(divide_by) elif data_object.type == "Scatter2D" or divide_by.type == "Scatter2D": # we make sure that also divide_by is a Scatter2D before using its points property data_object = yoda.mkScatter(data_object) for point, denominator_point in zip(data_object.points, yoda.mkScatter(divide_by).points): if denominator_point.y == 0.0: new_y = 1.0 new_y_errs = [0.0, 0.0] else: new_y = point.y / denominator_point.y new_y_errs = [y_err / denominator_point.y for y_err in point.yErrs] # if new_y == 1.0 and point.yErrs == denominator_point.yErrs: # # assume this is the same data set, so use the same relative error # if denominator_point.y == 0.0: # new_y_errs = [0.0, 0.0] # else: # new_y_errs = [y_err / denominator_point.y for y_err in denominator_point.yErrs] # else: # # assume that we divide through an independent data set, use error propagation # rel_y_errs = [(y_err / point.y + den_y_err / denominator_point.y) # for y_err, den_y_err in zip(point.yErrs, denominator_point.yErrs)] # new_y_errs = [rel_y_err * new_y for rel_y_err in rel_y_errs] point.y = new_y point.yErrs = new_y_errs return data_object
def constructRmiss(hist): '''This recreates the constructRmiss function from the routine.''' rtn = yoda.mkScatter(hist) path = hist.annotation('Path').replace('_d', 'd') numer = refhistos[path.replace('y02', 'y03')] denom = refhistos[path.replace('y02', 'y04')] rmiss = refhistos[path] for i in range(rtn.numPoints): newy = (numer.points[i].y + rtn.points[i].y) / denom.points[i].y if denom.points[i].y else 0.0 # ratio error (Rmiss = SM_num/SM_denom + BSM/SM_denom ~ Rmiss_SM + BSM/SM_denom rel_hist_err = rtn.points[i].yErrs[0] / denom.points[i].y if denom.points[i].y else 0.0 newey = sqrt(rmiss.points[i].yErrs[0] ** 2 + rel_hist_err ** 2) rtn.points[i].y = newy rtn.points[i].yErrs = (newey, newey) return rtn
def constructRmiss(hist): '''This recreates the constructRmiss function from the routine.''' rtn = yoda.mkScatter(hist) path = hist.annotation('Path').replace('_d', 'd') numer = refhistos[path.replace('y02', 'y03')] denom = refhistos[path.replace('y02', 'y04')] rmiss = refhistos[path] for i in range(rtn.numPoints): newy = (numer.points[i].y + rtn.points[i].y ) / denom.points[i].y if denom.points[i].y else 0.0 # ratio error (Rmiss = SM_num/SM_denom + BSM/SM_denom ~ Rmiss_SM + BSM/SM_denom rel_hist_err = rtn.points[i].yErrs[0] / denom.points[ i].y if denom.points[i].y else 0.0 newey = sqrt(rmiss.points[i].yErrs[0]**2 + rel_hist_err**2) rtn.points[i].y = newy rtn.points[i].yErrs = (newey, newey) return rtn
def combine(files, rivet_path, error_calc, rebin_count=None, rebin_counts=None, rebin_begin=0, ignore_missing_files=False): """Combine files[1]/rivet_path, files[2]/rivet_path, ... using an error_calc function from the heppyplotlib.error_calc module and return a YODA data object. files[0] is supposed to be the CV data set. """ if rebin_count is not None and rebin_counts is not None: raise Exception( "Only use one of the options 'rebin_count' and 'rebin_counts'.") elif rebin_count is not None: rebin_counts = [rebin_count] * len(files) elif rebin_counts is None: rebin_counts = [1] * len(files) import yoda from . import yodaplot y_coord_list = [] for file_name, rebin_count in zip(files, rebin_counts): try: data_object = yodaplot.resolve_data_object(file_name, rivet_path, rebin_count=rebin_count, rebin_begin=rebin_begin) y_coord_list.append(yodaplot.get_y_coords(data_object)) except IOError: if not ignore_missing_files: raise else: print("Ignore missing file", file_name) errs = error_calc(y_coord_list) # make sure we are dealing with a scatter object to have the correct notion of errors scatter = yoda.mkScatter( yodaplot.resolve_data_object(files[0], rivet_path, rebin_count=rebin_counts[0], rebin_begin=rebin_begin)) for point, point_errs in zip(scatter.points, zip(*errs)): point.yErrs = point_errs return scatter
def constructDiff(hist): '''This function produces a (data - MC)/sigma version of the Dalitz (2D) plot.''' path_data = hist.annotation('Path') # data central value path_stat = hist.annotation('Path').replace('d03', 'd04') # data statistical uncertainty path_unco = hist.annotation('Path').replace('d03', 'd05') # data uncorrelated uncertainty data = refhistos[path_data] stat = refhistos[path_stat] unco = refhistos[path_unco] data_integral = sum([ p.z for p in data.points ]) hist.normalize(data_integral) rtn = hist.clone(); rtn.reset() mc = yoda.mkScatter(hist) for i in range(rtn.numBins): sigma = sqrt(stat.points[i].z ** 2 + unco.points[i].z ** 2 + (mc.points[i].zErrs[0]) ** 2) newz = (data.points[i].z - mc.points[i].z) / sigma if sigma else 0.0 #newz = (0.01 * mc.points[i].z - data.points[i].z) / sigma if sigma else 0.0 rtn.fillBin(i, newz) return rtn
def constructDiff(hist): '''This function produces a (data - MC)/sigma version of the Dalitz (2D) plot.''' path_data = hist.annotation('Path') # data central value path_stat = hist.annotation('Path').replace( 'd03', 'd04') # data statistical uncertainty path_unco = hist.annotation('Path').replace( 'd03', 'd05') # data uncorrelated uncertainty data = refhistos[path_data] stat = refhistos[path_stat] unco = refhistos[path_unco] data_integral = sum([p.z for p in data.points]) hist.normalize(data_integral) rtn = hist.clone() rtn.reset() mc = yoda.mkScatter(hist) for i in range(rtn.numBins): sigma = sqrt(stat.points[i].z**2 + unco.points[i].z**2 + (mc.points[i].zErrs[0])**2) newz = (data.points[i].z - mc.points[i].z) / sigma if sigma else 0.0 #newz = (0.01 * mc.points[i].z - data.points[i].z) / sigma if sigma else 0.0 rtn.fillBin(i, newz) return rtn
def resolve_data_object( filename_or_data_object, name, divide_by=None, multiply_by=None, subtract_by=None, assume_correlated=False, use_correlated_division=None, # this is only for backwards-compatibility rebin_count=1, rebin_begin=0): """Take passed data object or loads a data object from a YODA file, and return it after dividing (or multiplying) by divide_by (multiply_by).""" if use_correlated_division is not None: assume_correlated = use_correlated_division print( "Heppyplotlib deprecation warning: Use assume_correlated instead of use_correlated_division" ) if isinstance(filename_or_data_object, str): data_object = yoda.readYODA(filename_or_data_object)[name] else: data_object = filename_or_data_object.clone() if not rebin_count == 1: if data_object.type == "Histo1D": data_object.rebin(rebin_count, begin=rebin_begin) else: print( "WARNING: Will assume statistical errors for rebinning a scatter plot" ) x_coords = [point.x for point in data_object.points] y_coords = get_scatter2d_y_coords(data_object) x_errs = [] x_errs.append([point.xErrs[0] for point in data_object.points]) x_errs.append([point.xErrs[1] for point in data_object.points]) if not are_points_with_errors_adjacent(x_coords, x_errs): raise Exception( "Points must be adjacent for interpreting the scatter plots as a histogram" ) new_points = data_object.points[0:rebin_begin] i = 0 while rebin_begin + i * rebin_count < len(data_object.points) - 1: first_index = rebin_begin + i * rebin_count last_index = min(first_index + rebin_count, len(data_object.points)) points = data_object.points[first_index:last_index] left_edge = points[0].x - points[0].xErrs[0] right_edge = points[-1].x + points[-1].xErrs[1] length = right_edge - left_edge new_x = left_edge + length / 2.0 new_xerrs = length / 2.0 new_y = 0.0 new_yerrs = np.array([0.0, 0.0]) for point in points: left_edge = point.x - point.xErrs[0] right_edge = point.x + point.xErrs[1] new_y += (right_edge - left_edge) * point.y new_yerrs += ((right_edge - left_edge) * np.array(point.yErrs))**2 new_y /= length new_yerrs = np.sqrt(new_yerrs) / length new_points.append( yoda.Point2D(x=new_x, y=new_y, xerrs=new_xerrs, yerrs=new_yerrs)) i = i + 1 data_object = yoda.Scatter2D(path=data_object.path, title=data_object.title) for point in new_points: data_object.addPoint(point) if subtract_by is not None: data_object = yoda.mkScatter(data_object) operand = resolve_data_object(subtract_by, name).mkScatter() for point, operand_point in zip(data_object.points, operand.points): new_y = point.y - operand_point.y if assume_correlated: new_y_errs = [y_err - operand_point.y for y_err in point.yErrs] if not assume_correlated: # assume that we subtract an independent data set, use error propagation new_y_errs = [] for y_err, operand_y_err in zip(point.yErrs, operand_point.yErrs): err2 = 0.0 if point.y != 0.0: err2 += (y_err)**2 err2 += (operand_y_err)**2 new_y_errs.append(np.sqrt(err2)) point.y = new_y point.yErrs = new_y_errs if divide_by is not None or multiply_by is not None: data_object = yoda.mkScatter(data_object) if isinstance(divide_by, float) or isinstance(multiply_by, float): for point in data_object.points: if divide_by is not None: new_y = point.y / divide_by new_y_errs = [y_err / divide_by for y_err in point.yErrs] else: new_y = point.y * multiply_by new_y_errs = [y_err * multiply_by for y_err in point.yErrs] point.y = new_y point.yErrs = new_y_errs else: if divide_by is not None: operand = resolve_data_object(divide_by, name).mkScatter() else: operand = resolve_data_object(multiply_by, name).mkScatter() for point, operand_point in zip(data_object.points, operand.points): if operand_point.y == 0.0: if divide_by is not None: new_y = 1.0 else: new_y = 0.0 new_y_errs = [0.0, 0.0] else: if divide_by is not None: new_y = point.y / operand_point.y if assume_correlated: new_y_errs = [ y_err / operand_point.y for y_err in point.yErrs ] else: new_y = point.y * operand_point.y if assume_correlated: new_y_errs = [ y_err * operand_point.y for y_err in point.yErrs ] if not assume_correlated: # assume that we divide/multiply through an independent data set, use error propagation rel_y_errs = [] for y_err, operand_y_err in zip( point.yErrs, operand_point.yErrs): err2 = 0.0 if point.y != 0.0: err2 += (y_err / point.y)**2 err2 += (operand_y_err / operand_point.y)**2 rel_y_errs.append(np.sqrt(err2)) new_y_errs = [ rel_y_err * new_y for rel_y_err in rel_y_errs ] point.y = new_y point.yErrs = new_y_errs return data_object
def __init__(self, anaObj, xSec, nEv, TestMethod, GridMode): # Construct with an input yoda aos and a scatter1D for the cross section and nEv self.signal = anaObj self.xsec = xSec self.nev = nEv # Overall effective integrated luminosity may be recalculated plot by plot because units change. self._mcLumi = float(nEv.numEntries()) / xSec.point(0).x # Initialize the public members we always want to access self._has1Dhisto = False self._background = False self._ref = False self._stack = yoda.Scatter2D self._refplot = yoda.Scatter2D self._sigplot = yoda.Scatter2D self._bgplot = yoda.Scatter2D self._lumi = 1 self._isScaled = False self._scaleFactorData = 1 self._scaleFactorSig = 1 self._conturPoints = [] self._scaleMC = 1.0 self._maxcl = -1 self._maxbin = -1 self._testMethod = TestMethod # Call the internal functions on initialization # to fill the above members with what we want, these should all be private self.__getData() self.__getAux() self.__getMC() self.__getisScaled() # Determine the type of object we have, and build a 2D scatter from it if it is not one already # Also recalculate MCLumi, and scalefactor, if appropriate if self.signal.type == 'Histo1D' or self.signal.type == 'Profile1D' or self.signal.type == 'Counter': self._has1Dhisto = True if self._isScaled: # if the plot is area normalised (ie scaled), work out the factor from number of events and generator xs # (this is just the integrated cross section associated with the plot) try: self._scaleFactorSig = (float( self.xsec.points[0].x)) * float( self.signal.numEntries()) / float( self.nev.numEntries()) except: print "missing info for scalefactor calc" # effective MClumi has to be calculated plot-by-plot because units change and some plots are symmetrised (in which # there will be a factor of two between this the mclumi from (number of generated events/xsec) ) if self.signal.sumW() != 0.0: self._mcLumi = float(self.signal.numEntries()) / ( float(self.signal.sumW()) * self._scaleFactorSig) self.signal = yoda.mkScatter(self.signal) # Make sure it is actually a Scatter2D - mkScatter makes Scatter1D from counter. if self.signal.type == 'Scatter1D': self.signal = util.mkScatter2D(self.signal) if not GridMode: # Public member function to build plots needed for direct histogram visualisation # avoid calling YODA.clone() unless we have to # Must be called before scaling. self.doPlot() if self._ref: # don't scale histograms that came in as 2D scatters if self._has1Dhisto: self.__doScale() self.__fillPoints()