Beispiel #1
0
def combine(files, rivet_path, error_calc, rebin_count=None, rebin_counts=None):
    """Combine files[1]/rivet_path, files[2]/rivet_path, ...
    using an error_calc function from the heppyplotlib.error_calc
    module and return a YODA data object.

    files[0] is supposed to be the CV data set.
    """
    if rebin_count is not None and rebin_counts is not None:
        raise Exception("Only use one of the options 'rebin_count' and 'rebin_counts'.")
    elif rebin_count is not None:
        rebin_counts = [rebin_count] * len(files)
    elif rebin_counts is None:
        rebin_counts = [1] * len(files)
    import yoda
    from . import yodaplot
    y_coord_list = []
    for file_name, rebin_count in zip(files, rebin_counts):
        data_object = yodaplot.resolve_data_object(file_name, rivet_path, rebin_count=rebin_count)
        y_coord_list.append(yodaplot.get_y_coords(data_object))
    errs = error_calc(y_coord_list)
    # make sure we are dealing with a scatter object to have the correct notion of errors
    scatter = yoda.mkScatter(yodaplot.resolve_data_object(files[0],
        rivet_path, rebin_count=rebin_counts[0]))
    for point, point_errs in zip(scatter.points, zip(*errs)):
        point.yErrs = point_errs
    return scatter
Beispiel #2
0
    def __init__(self, anaObj, xSec, nEv):
        # Construct with an input yoda aos and a scatter1D for the cross section and nEv
        self.signal = anaObj
        self.xsec = xSec
        self.nev = nEv

        # Initialize the members we always want to access
        self._background = False
        self.ref = False
        self.stack = yoda.Scatter2D
        self.lumi = 1
        self.isScaled = False
        self.scaleFactorData = 1
        self.scaleFactorSig = 1
        self.conturPoints = []
        self.mcLumi = 0.0
        self.scaleMC = 1.0

        # Call the internal functions on initialization
        # to fill the above members with what we want, these should all be private
        self.__getData()
        self.__getAux()
        self.__getMC()
        self.__getisScaled()
        if self.__has1D():
            self.signal = yoda.mkScatter(self.signal)
        # build stack for plotting
        # self.__buildStack()
        if self.ref:
            self.__doScale()
            self.__fillPoints()
Beispiel #3
0
def init_ref():
    """Function to load all reference data and theory *.yoda data"""
    refFiles = []
    global scaledYet
    print "Gathering all reference Data (and Theory, if available)"
    rivet_data_dirs = rivet.getAnalysisRefPaths()
    for dirs in rivet_data_dirs:
        import glob
        refFiles.append(glob.glob(os.path.join(dirs, '*.yoda')))
    for fileList in refFiles:
        for f in fileList:
            aos = yoda.read(f)
            for path, ao in aos.iteritems():
                if ao.type != "Scatter2D":
                    ao = yoda.mkScatter(ao)
                if ao.type == "Scatter1D":
                    ao = util.mkScatter2D(ao)
                if path.startswith('/REF/'):
                    refObj[path] = ao
                    scaledYet[path] = False
                if path.startswith('/THY/'):
                    refObj[path] = ao
                    scaledYet[path] = False

    global REFLOAD
    REFLOAD = True
Beispiel #4
0
def resolve_data_object(filename_or_data_object, name, divide_by=None, rebin_count=1):
    """Take passed data object or loads a data object from a YODA file,
    and return it after dividing by divide_by."""
    if isinstance(filename_or_data_object, basestring):
        data_object = yoda.readYODA(filename_or_data_object)[name]
    else:
        data_object = filename_or_data_object.clone()
    if not rebin_count == 1:
        data_object.rebin(rebin_count)
    if divide_by is not None:
        divide_by = resolve_data_object(divide_by, name)
        if data_object.type == "Histo1D" and divide_by.type == "Histo1D":
            data_object = data_object.divideBy(divide_by)
        elif data_object.type == "Scatter2D" or divide_by.type == "Scatter2D":
            # we make sure that also divide_by is a Scatter2D before using its points property
            data_object = yoda.mkScatter(data_object)
            for point, denominator_point in zip(data_object.points, yoda.mkScatter(divide_by).points):
                if denominator_point.y == 0.0:
                    new_y = 1.0
                    new_y_errs = [0.0, 0.0]
                else:
                    new_y = point.y / denominator_point.y
                    new_y_errs = [y_err / denominator_point.y for y_err in point.yErrs]
                # if new_y == 1.0 and point.yErrs == denominator_point.yErrs:
                #     # assume this is the same data set, so use the same relative error
                    # if denominator_point.y == 0.0:
                    #     new_y_errs = [0.0, 0.0]
                    # else:
                    #     new_y_errs = [y_err / denominator_point.y for y_err in denominator_point.yErrs]
                # else:
                #     # assume that we divide through an independent data set, use error propagation
                #     rel_y_errs = [(y_err / point.y + den_y_err / denominator_point.y)
                #                   for y_err, den_y_err in zip(point.yErrs, denominator_point.yErrs)]
                #     new_y_errs = [rel_y_err * new_y for rel_y_err in rel_y_errs]
                point.y = new_y
                point.yErrs = new_y_errs
    return data_object
def constructRmiss(hist):
    '''This recreates the constructRmiss function from the routine.'''
    rtn = yoda.mkScatter(hist)
    path = hist.annotation('Path').replace('_d', 'd')
    numer = refhistos[path.replace('y02', 'y03')]
    denom = refhistos[path.replace('y02', 'y04')]
    rmiss = refhistos[path]
    for i in range(rtn.numPoints):
        newy = (numer.points[i].y + rtn.points[i].y) / denom.points[i].y if denom.points[i].y else 0.0
        # ratio error (Rmiss = SM_num/SM_denom + BSM/SM_denom ~ Rmiss_SM + BSM/SM_denom
        rel_hist_err = rtn.points[i].yErrs[0] / denom.points[i].y if denom.points[i].y else 0.0
        newey = sqrt(rmiss.points[i].yErrs[0] ** 2 + rel_hist_err ** 2)
        rtn.points[i].y = newy
        rtn.points[i].yErrs = (newey, newey)
    return rtn
def constructRmiss(hist):
    '''This recreates the constructRmiss function from the routine.'''
    rtn = yoda.mkScatter(hist)
    path = hist.annotation('Path').replace('_d', 'd')
    numer = refhistos[path.replace('y02', 'y03')]
    denom = refhistos[path.replace('y02', 'y04')]
    rmiss = refhistos[path]
    for i in range(rtn.numPoints):
        newy = (numer.points[i].y + rtn.points[i].y
                ) / denom.points[i].y if denom.points[i].y else 0.0
        # ratio error (Rmiss = SM_num/SM_denom + BSM/SM_denom ~ Rmiss_SM + BSM/SM_denom
        rel_hist_err = rtn.points[i].yErrs[0] / denom.points[
            i].y if denom.points[i].y else 0.0
        newey = sqrt(rmiss.points[i].yErrs[0]**2 + rel_hist_err**2)
        rtn.points[i].y = newy
        rtn.points[i].yErrs = (newey, newey)
    return rtn
Beispiel #7
0
def combine(files,
            rivet_path,
            error_calc,
            rebin_count=None,
            rebin_counts=None,
            rebin_begin=0,
            ignore_missing_files=False):
    """Combine files[1]/rivet_path, files[2]/rivet_path, ...
    using an error_calc function from the heppyplotlib.error_calc
    module and return a YODA data object.

    files[0] is supposed to be the CV data set.
    """
    if rebin_count is not None and rebin_counts is not None:
        raise Exception(
            "Only use one of the options 'rebin_count' and 'rebin_counts'.")
    elif rebin_count is not None:
        rebin_counts = [rebin_count] * len(files)
    elif rebin_counts is None:
        rebin_counts = [1] * len(files)
    import yoda
    from . import yodaplot
    y_coord_list = []
    for file_name, rebin_count in zip(files, rebin_counts):
        try:
            data_object = yodaplot.resolve_data_object(file_name,
                                                       rivet_path,
                                                       rebin_count=rebin_count,
                                                       rebin_begin=rebin_begin)
            y_coord_list.append(yodaplot.get_y_coords(data_object))
        except IOError:
            if not ignore_missing_files:
                raise
            else:
                print("Ignore missing file", file_name)
    errs = error_calc(y_coord_list)
    # make sure we are dealing with a scatter object to have the correct notion of errors
    scatter = yoda.mkScatter(
        yodaplot.resolve_data_object(files[0],
                                     rivet_path,
                                     rebin_count=rebin_counts[0],
                                     rebin_begin=rebin_begin))
    for point, point_errs in zip(scatter.points, zip(*errs)):
        point.yErrs = point_errs
    return scatter
def constructDiff(hist):
    '''This function produces a (data - MC)/sigma version of the Dalitz (2D) plot.'''
    path_data = hist.annotation('Path') # data central value
    path_stat = hist.annotation('Path').replace('d03', 'd04') # data statistical uncertainty
    path_unco = hist.annotation('Path').replace('d03', 'd05') # data uncorrelated uncertainty
    data = refhistos[path_data]
    stat = refhistos[path_stat]
    unco = refhistos[path_unco]
    data_integral = sum([ p.z for p in data.points ])
    hist.normalize(data_integral) 
    rtn = hist.clone();  rtn.reset()
    mc = yoda.mkScatter(hist)
    for i in range(rtn.numBins):
        sigma = sqrt(stat.points[i].z ** 2 + unco.points[i].z ** 2 + (mc.points[i].zErrs[0]) ** 2)
        newz = (data.points[i].z - mc.points[i].z) / sigma if sigma else 0.0
        #newz = (0.01 * mc.points[i].z - data.points[i].z) / sigma if sigma else 0.0
        rtn.fillBin(i, newz)
    return rtn
Beispiel #9
0
def constructDiff(hist):
    '''This function produces a (data - MC)/sigma version of the Dalitz (2D) plot.'''
    path_data = hist.annotation('Path')  # data central value
    path_stat = hist.annotation('Path').replace(
        'd03', 'd04')  # data statistical uncertainty
    path_unco = hist.annotation('Path').replace(
        'd03', 'd05')  # data uncorrelated uncertainty
    data = refhistos[path_data]
    stat = refhistos[path_stat]
    unco = refhistos[path_unco]
    data_integral = sum([p.z for p in data.points])
    hist.normalize(data_integral)
    rtn = hist.clone()
    rtn.reset()
    mc = yoda.mkScatter(hist)
    for i in range(rtn.numBins):
        sigma = sqrt(stat.points[i].z**2 + unco.points[i].z**2 +
                     (mc.points[i].zErrs[0])**2)
        newz = (data.points[i].z - mc.points[i].z) / sigma if sigma else 0.0
        #newz = (0.01 * mc.points[i].z - data.points[i].z) / sigma if sigma else 0.0
        rtn.fillBin(i, newz)
    return rtn
Beispiel #10
0
def resolve_data_object(
        filename_or_data_object,
        name,
        divide_by=None,
        multiply_by=None,
        subtract_by=None,
        assume_correlated=False,
        use_correlated_division=None,  # this is only for backwards-compatibility
        rebin_count=1,
        rebin_begin=0):
    """Take passed data object or loads a data object from a YODA file,
    and return it after dividing (or multiplying) by divide_by (multiply_by)."""
    if use_correlated_division is not None:
        assume_correlated = use_correlated_division
        print(
            "Heppyplotlib deprecation warning: Use assume_correlated instead of use_correlated_division"
        )
    if isinstance(filename_or_data_object, str):
        data_object = yoda.readYODA(filename_or_data_object)[name]
    else:
        data_object = filename_or_data_object.clone()
    if not rebin_count == 1:
        if data_object.type == "Histo1D":
            data_object.rebin(rebin_count, begin=rebin_begin)
        else:
            print(
                "WARNING: Will assume statistical errors for rebinning a scatter plot"
            )
            x_coords = [point.x for point in data_object.points]
            y_coords = get_scatter2d_y_coords(data_object)
            x_errs = []
            x_errs.append([point.xErrs[0] for point in data_object.points])
            x_errs.append([point.xErrs[1] for point in data_object.points])
            if not are_points_with_errors_adjacent(x_coords, x_errs):
                raise Exception(
                    "Points must be adjacent for interpreting the scatter plots as a histogram"
                )
            new_points = data_object.points[0:rebin_begin]
            i = 0
            while rebin_begin + i * rebin_count < len(data_object.points) - 1:
                first_index = rebin_begin + i * rebin_count
                last_index = min(first_index + rebin_count,
                                 len(data_object.points))
                points = data_object.points[first_index:last_index]
                left_edge = points[0].x - points[0].xErrs[0]
                right_edge = points[-1].x + points[-1].xErrs[1]
                length = right_edge - left_edge
                new_x = left_edge + length / 2.0
                new_xerrs = length / 2.0
                new_y = 0.0
                new_yerrs = np.array([0.0, 0.0])
                for point in points:
                    left_edge = point.x - point.xErrs[0]
                    right_edge = point.x + point.xErrs[1]
                    new_y += (right_edge - left_edge) * point.y
                    new_yerrs += ((right_edge - left_edge) *
                                  np.array(point.yErrs))**2
                new_y /= length
                new_yerrs = np.sqrt(new_yerrs) / length
                new_points.append(
                    yoda.Point2D(x=new_x,
                                 y=new_y,
                                 xerrs=new_xerrs,
                                 yerrs=new_yerrs))
                i = i + 1
            data_object = yoda.Scatter2D(path=data_object.path,
                                         title=data_object.title)
            for point in new_points:
                data_object.addPoint(point)
    if subtract_by is not None:
        data_object = yoda.mkScatter(data_object)
        operand = resolve_data_object(subtract_by, name).mkScatter()
        for point, operand_point in zip(data_object.points, operand.points):
            new_y = point.y - operand_point.y
            if assume_correlated:
                new_y_errs = [y_err - operand_point.y for y_err in point.yErrs]
            if not assume_correlated:
                # assume that we subtract an independent data set, use error propagation
                new_y_errs = []
                for y_err, operand_y_err in zip(point.yErrs,
                                                operand_point.yErrs):
                    err2 = 0.0
                    if point.y != 0.0:
                        err2 += (y_err)**2
                    err2 += (operand_y_err)**2
                    new_y_errs.append(np.sqrt(err2))
            point.y = new_y
            point.yErrs = new_y_errs
    if divide_by is not None or multiply_by is not None:
        data_object = yoda.mkScatter(data_object)
        if isinstance(divide_by, float) or isinstance(multiply_by, float):
            for point in data_object.points:
                if divide_by is not None:
                    new_y = point.y / divide_by
                    new_y_errs = [y_err / divide_by for y_err in point.yErrs]
                else:
                    new_y = point.y * multiply_by
                    new_y_errs = [y_err * multiply_by for y_err in point.yErrs]
                point.y = new_y
                point.yErrs = new_y_errs
        else:
            if divide_by is not None:
                operand = resolve_data_object(divide_by, name).mkScatter()
            else:
                operand = resolve_data_object(multiply_by, name).mkScatter()
            for point, operand_point in zip(data_object.points,
                                            operand.points):
                if operand_point.y == 0.0:
                    if divide_by is not None:
                        new_y = 1.0
                    else:
                        new_y = 0.0
                    new_y_errs = [0.0, 0.0]
                else:
                    if divide_by is not None:
                        new_y = point.y / operand_point.y
                        if assume_correlated:
                            new_y_errs = [
                                y_err / operand_point.y
                                for y_err in point.yErrs
                            ]
                    else:
                        new_y = point.y * operand_point.y
                        if assume_correlated:
                            new_y_errs = [
                                y_err * operand_point.y
                                for y_err in point.yErrs
                            ]
                    if not assume_correlated:
                        # assume that we divide/multiply through an independent data set, use error propagation
                        rel_y_errs = []
                        for y_err, operand_y_err in zip(
                                point.yErrs, operand_point.yErrs):
                            err2 = 0.0
                            if point.y != 0.0:
                                err2 += (y_err / point.y)**2
                            err2 += (operand_y_err / operand_point.y)**2
                            rel_y_errs.append(np.sqrt(err2))
                        new_y_errs = [
                            rel_y_err * new_y for rel_y_err in rel_y_errs
                        ]
                point.y = new_y
                point.yErrs = new_y_errs
    return data_object
Beispiel #11
0
    def __init__(self, anaObj, xSec, nEv, TestMethod, GridMode):

        # Construct with an input yoda aos and a scatter1D for the cross section and nEv
        self.signal = anaObj
        self.xsec = xSec
        self.nev = nEv
        # Overall effective integrated luminosity may be recalculated plot by plot because units change.
        self._mcLumi = float(nEv.numEntries()) / xSec.point(0).x

        # Initialize the public members we always want to access
        self._has1Dhisto = False
        self._background = False
        self._ref = False
        self._stack = yoda.Scatter2D
        self._refplot = yoda.Scatter2D
        self._sigplot = yoda.Scatter2D
        self._bgplot = yoda.Scatter2D
        self._lumi = 1
        self._isScaled = False
        self._scaleFactorData = 1
        self._scaleFactorSig = 1
        self._conturPoints = []
        self._scaleMC = 1.0
        self._maxcl = -1
        self._maxbin = -1
        self._testMethod = TestMethod

        # Call the internal functions on initialization
        # to fill the above members with what we want, these should all be private
        self.__getData()
        self.__getAux()
        self.__getMC()
        self.__getisScaled()

        # Determine the type of object we have, and build a 2D scatter from it if it is not one already
        # Also recalculate MCLumi, and scalefactor, if appropriate
        if self.signal.type == 'Histo1D' or self.signal.type == 'Profile1D' or self.signal.type == 'Counter':

            self._has1Dhisto = True

            if self._isScaled:
                # if the plot is area normalised (ie scaled), work out the factor from number of events and generator xs
                # (this is just the integrated cross section associated with the plot)
                try:
                    self._scaleFactorSig = (float(
                        self.xsec.points[0].x)) * float(
                            self.signal.numEntries()) / float(
                                self.nev.numEntries())
                except:
                    print "missing info for scalefactor calc"

            # effective MClumi has to be calculated plot-by-plot because units change and some plots are symmetrised (in which
            # there will be a factor of two between this the mclumi from (number of generated events/xsec) )
            if self.signal.sumW() != 0.0:
                self._mcLumi = float(self.signal.numEntries()) / (
                    float(self.signal.sumW()) * self._scaleFactorSig)

            self.signal = yoda.mkScatter(self.signal)
            # Make sure it is actually a Scatter2D - mkScatter makes Scatter1D from counter.
            if self.signal.type == 'Scatter1D':
                self.signal = util.mkScatter2D(self.signal)

        if not GridMode:
            # Public member function to build plots needed for direct histogram visualisation
            # avoid calling YODA.clone() unless we have to
            # Must be called before scaling.
            self.doPlot()

        if self._ref:
            # don't scale histograms that came in as 2D scatters
            if self._has1Dhisto:
                self.__doScale()
            self.__fillPoints()