def testCountWithFilter(self): for i in xrange(11): left, right = self.simple[:i], self.simple[i:] leftCounting = Select(named("something", lambda x: x > 0.0), Count()) rightCounting = Select(named("something", lambda x: x > 0.0), Count()) for _ in left: leftCounting.fill(_) for _ in right: rightCounting.fill(_) self.assertEqual(leftCounting.cut.entries, len(list(filter(lambda x: x > 0.0, left)))) self.assertEqual(rightCounting.cut.entries, len(list(filter(lambda x: x > 0.0, right)))) finalResult = leftCounting + rightCounting self.assertEqual(finalResult.cut.entries, len(list(filter(lambda x: x > 0.0, self.simple)))) self.checkScaling(leftCounting) self.checkScaling(leftCounting.toImmutable()) self.checkJson(leftCounting) self.checkPickle(leftCounting) self.checkName(leftCounting)
def histogram(self): """Return a plain histogram by converting all sub-aggregator values into Counts""" out = CentrallyBin([c for c, v in self.bins], self.quantity, Count(), self.nanflow.copy()) out.entries = self.entries for i, v in self.bins: out.bins[i] = Count.ed(v.entries) return out.specialize()
def TwoDimensionallySparselyHistogram(xbinWidth, xquantity, ybinWidth, yquantity, selection=unweighted, xorigin=0.0, yorigin=0.0): """Convenience function for creating a sparsely binned, two-dimensional histogram.""" return Select.ing(selection, SparselyBin.ing(xbinWidth, xquantity, SparselyBin.ing(ybinWidth, yquantity, Count.ing(), Count.ing(), yorigin), Count.ing(), xorigin))
def histogram(self): """Return a plain histogram by converting all sub-aggregator values into Counts""" out = SparselyBin(self.binWidth, self.quantity, Count(), self.nanflow.copy(), self.origin) out.entries = float(self.entries) out.contentType = "Count" for i, v in self.bins.items(): out.bins[i] = Count.ed(v.entries) return out.specialize()
def SparselyHistogram(binWidth, quantity=identity, origin=0.0): """Create a sparsely binned histogram that is only capable of being added. Parameters: binWidth (float): the width of a bin. quantity (function returning float or string): function that computes the quantity of interest from the data. pass on all values by default. If a string is given, quantity is set to identity(string), in which case that column is picked up from a pandas df. origin (float): the left edge of the bin whose index is zero. """ return SparselyBin.ing(binWidth, quantity, Count.ing(), Count.ing(), origin)
def testCategorizeTrans(self): with Numpy() as numpy: if numpy is None: return sys.stderr.write("\n") self.compare("CategorizeTrans no data", Categorize(lambda x: numpy.array(numpy.floor(x["empty"]), dtype="<U5"), Count( lambda x: 0.5*x)), self.data, Categorize(lambda x: x, Count(lambda x: 0.5*x)), numpy.array(numpy.floor(self.empty), dtype="<U5")) self.compare("CategorizeTrans noholes", Categorize(lambda x: numpy.array(numpy.floor(x["noholes"]), dtype="<U5"), Count( lambda x: 0.5*x)), self.data, Categorize(lambda x: x, Count(lambda x: 0.5*x)), numpy.array(numpy.floor(self.noholes), dtype="<U5")) self.compare("CategorizeTrans holes", Categorize(lambda x: numpy.array(numpy.floor(x["withholes"]), dtype="<U5"), Count( lambda x: 0.5*x)), self.data, Categorize(lambda x: x, Count(lambda x: 0.5*x)), numpy.array(numpy.floor(self.withholes), dtype="<U5"))
def testSparselyBinTrans(self): with Numpy() as numpy: if numpy is None: return sys.stderr.write("\n") self.compare("SparselyBinTrans no data", SparselyBin(0.1, lambda x: x["empty"], Count( lambda x: 0.5*x)), self.data, SparselyBin(0.1, lambda x: x, Count(lambda x: 0.5*x)), self.empty) self.compare("SparselyBinTrans noholes", SparselyBin(0.1, lambda x: x["noholes"], Count( lambda x: 0.5*x)), self.data, SparselyBin(0.1, lambda x: x, Count(lambda x: 0.5*x)), self.noholes) self.compare("SparselyBinTrans holes", SparselyBin(0.1, lambda x: x["withholes"], Count( lambda x: 0.5*x)), self.data, SparselyBin(0.1, lambda x: x, Count(lambda x: 0.5*x)), self.withholes)
def Histogram(num, low, high, quantity=identity): """Create a conventional histogram that is capable of being filled and added. Parameters: num (int): the number of bins; must be at least one. low (float): the minimum-value edge of the first bin. high (float): the maximum-value edge of the last bin; must be strictly greater than `low`. quantity (function returning float or string): function that computes the quantity of interest from the data. pass on all values by default. If a string is given, quantity is set to identity(string), in which case that column is picked up from a pandas df. """ return Bin.ing(num, low, high, quantity, Count.ing(), Count.ing(), Count.ing(), Count.ing())
def testCentrallyBinTrans(self): with Numpy() as numpy: if numpy is None: return sys.stderr.write("\n") centers = [-3.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 3.0] self.compare("CentrallyBinTrans no data", CentrallyBin(centers, lambda x: x["empty"], Count( lambda x: 0.5*x)), self.data, CentrallyBin(centers, lambda x: x, Count(lambda x: 0.5*x)), self.empty) self.compare("CentrallyBinTrans noholes", CentrallyBin(centers, lambda x: x["noholes"], Count( lambda x: 0.5*x)), self.data, CentrallyBin(centers, lambda x: x, Count(lambda x: 0.5*x)), self.noholes) self.compare("CentrallyBinTrans holes", CentrallyBin(centers, lambda x: x["withholes"], Count( lambda x: 0.5*x)), self.data, CentrallyBin(centers, lambda x: x, Count(lambda x: 0.5*x)), self.withholes)
def testBinTrans(self): with Numpy() as numpy: if numpy is None: return sys.stderr.write("\n") for bins in [10, 100]: self.compare("BinTrans ({0} bins) no data".format(bins), Bin(bins, -3.0, 3.0, lambda x: x["empty"], Count( lambda x: 0.5*x)), self.data, Bin(bins, -3.0, 3.0, lambda x: x, Count(lambda x: 0.5*x)), self.empty) self.compare("BinTrans ({0} bins) noholes".format(bins), Bin(bins, -3.0, 3.0, lambda x: x["noholes"], Count( lambda x: 0.5*x)), self.data, Bin(bins, -3.0, 3.0, lambda x: x, Count(lambda x: 0.5*x)), self.noholes) self.compare("BinTrans ({0} bins) holes".format(bins), Bin(bins, -3.0, 3.0, lambda x: x["withholes"], Count( lambda x: 0.5*x)), self.data, Bin(bins, -3.0, 3.0, lambda x: x, Count(lambda x: 0.5*x)), self.withholes)
def __init__(self, centers, quantity=identity, value=Count(), nanflow=Count()): """Create a CentrallyBin that is capable of being filled and added. Parameters: centers (list of float): the centers of all bins quantity (function returning float): computes the quantity of interest from the data. value (:doc:`Container <histogrammar.defs.Container>`): generates sub-aggregators to put in each bin. nanflow (:doc:`Container <histogrammar.defs.Container>`): a sub-aggregator to use for data whose quantity is NaN. Other parameters: entries (float): the number of entries, initially 0.0. bins (list of float, :doc:`Container <histogrammar.defs.Container>` pairs): the bin centers and sub-aggregators in each bin. """ if not isinstance(centers, (list, tuple)) and not all( isinstance(v, (list, tuple)) and len(v) == 2 and isinstance( v[0], numbers.Real) and isinstance(v[1], Container) for v in centers): raise TypeError( "centers ({0}) must be a list of number, Container pairs". format(centers)) if value is not None and not isinstance(value, Container): raise TypeError( "value ({0}) must be None or a Container".format(value)) if not isinstance(nanflow, Container): raise TypeError( "nanflow ({0}) must be a Container".format(nanflow)) if len(centers) < 2: raise ValueError( "number of centers ({0}) must be at least two".format( len(centers))) self.entries = 0.0 if value is None: self.bins = None else: self.bins = [(float(x), value.zero()) for x in sorted(centers)] self.quantity = serializable( identity(quantity) if isinstance(quantity, str) else quantity) self.value = value self.nanflow = nanflow.copy() super(CentrallyBin, self).__init__() self.specialize()
def HistogramCut(num, low, high, quantity=identity, selection=unweighted): """Create a conventional histogram that is capable of being filled and added, with a selection cut. Parameters: num (int): the number of bins; must be at least one. low (float): the minimum-value edge of the first bin. high (float): the maximum-value edge of the last bin; must be strictly greater than `low`. quantity (function returning float or string): function that computes the quantity of interest from the data. pass on all values by default. If a string is given, quantity is set to identity(string), in which case that column is picked up from a pandas df. selection (function returning boolean): function that computes if data point is accepted or not. default is: lamba x: True """ return Select.ing(selection, Bin.ing(num, low, high, quantity, Count.ing(), Count.ing(), Count.ing(), Count.ing()))
def __init__(self, binWidth, quantity=identity, value=Count(), nanflow=Count(), origin=0.0): """Create a SparselyBin that is capable of being filled and added. Parameters: binWidth (float): the width of a bin; must be strictly greater than zero. quantity (function returning float): computes the quantity of interest from the data. value (:doc:`Container <histogrammar.defs.Container>`): generates sub-aggregators to put in each bin. nanflow (:doc:`Container <histogrammar.defs.Container>`): a sub-aggregator to use for data whose quantity is NaN. origin (float): the left edge of the bin whose index is 0. Other parameters: entries (float): the number of entries, initially 0.0. bins (dict from int to :doc:`Container <histogrammar.defs.Container>`): the map, probably a hashmap, to fill with values when their `entries` become non-zero. """ if not isinstance(binWidth, numbers.Real): raise TypeError("binWidth ({0}) must be a number".format(binWidth)) if value is not None and not isinstance(value, Container): raise TypeError("value ({0}) must be a Container".format(value)) if not isinstance(nanflow, Container): raise TypeError( "nanflow ({0}) must be a Container".format(nanflow)) if not isinstance(origin, numbers.Real): raise TypeError("origin ({0}) must be a number".format(origin)) if binWidth <= 0.0: raise ValueError( "binWidth ({0}) must be greater than zero".format(binWidth)) self.binWidth = float(binWidth) self.entries = 0.0 self.quantity = serializable( identity(quantity) if isinstance(quantity, str) else quantity) self.value = value if value is not None: self.contentType = value.name else: self.contentType = "Count" self.bins = {} self.nanflow = nanflow.copy() self.origin = float(origin) super(SparselyBin, self).__init__() self.specialize()
def __init__(self, quantity=identity, value=Count()): """Create a Categorize that is capable of being filled and added. Parameters: quantity (function returning float): computes the quantity of interest from the data. value (:doc:`Container <histogrammar.defs.Container>`): generates sub-aggregators to put in each bin. Other Parameters: entries (float): the number of entries, initially 0.0. bins (dict from str to :doc:`Container <histogrammar.defs.Container>`): the map, probably a hashmap, to fill with values when their `entries` become non-zero. """ if value is not None and not isinstance(value, Container): raise TypeError( "value ({0}) must be None or a Container".format(value)) self.entries = 0.0 self.quantity = serializable( identity(quantity) if isinstance(quantity, str) else quantity) self.value = value self.bins = {} if value is not None: self.contentType = value.name else: self.contentType = "Count" super(Categorize, self).__init__() self.specialize()
def testBranch(self): one = Histogram(5, -3.0, 7.0, lambda x: x) two = Count() three = Deviate(lambda x: x + 100.0) branching = Branch(one, two, three) for _ in self.simple: branching.fill(_) self.assertEqual(branching.i0.numericalValues, [3.0, 2.0, 2.0, 1.0, 0.0]) self.assertEqual(branching.i0.numericalUnderflow, 1.0) self.assertEqual(branching.i0.numericalOverflow, 1.0) self.assertEqual(branching.i0.numericalNanflow, 0.0) self.assertEqual(branching.i1.entries, 10.0) self.assertAlmostEqual(branching.i2.entries, 10.0) self.assertAlmostEqual(branching.i2.mean, 100.33) self.assertAlmostEqual(branching.i2.variance, 10.8381) self.checkScaling(branching) self.checkScaling(branching.toImmutable()) self.checkJson(branching) self.checkPickle(branching) self.checkName(branching)
def __init__(self, thresholds, quantity=identity, value=Count(), nanflow=Count()): """Create a Stack that is capable of being filled and added. Parameters: thresholds (list of floats): specifies ``N`` cut thresholds, so the Stack will fill ``N + 1`` aggregators, each overlapping the last. quantity (function returning float): computes the quantity of interest from the data. value (:doc:`Container <histogrammar.defs.Container>`): generates sub-aggregators for each bin. nanflow (:doc:`Container <histogrammar.defs.Container>`): a sub-aggregator to use for data whose quantity is NaN. Other parameters: entries (float): the number of entries, initially 0.0. bins (list of float, :doc:`Container <histogrammar.defs.Container>` pairs): the ``N + 1`` thresholds and sub-aggregators. (The first threshold is minus infinity; the rest are the ones specified by ``thresholds``). """ if not isinstance(thresholds, (list, tuple)) and not all( isinstance(v, (list, tuple)) and len(v) == 2 and isinstance( v[0], numbers.Real) and isinstance(v[1], Container) for v in thresholds): raise TypeError( "thresholds ({0}) must be a list of number, Container pairs". format(thresholds)) if value is not None and not isinstance(value, Container): raise TypeError( "value ({0}) must be None or a Container".format(value)) if not isinstance(nanflow, Container): raise TypeError( "nanflow ({0}) must be a Container".format(nanflow)) self.entries = 0.0 self.quantity = serializable( identity(quantity) if isinstance(quantity, str) else quantity) if value is None: self.bins = tuple(thresholds) else: self.bins = tuple((float(x), value.zero()) for x in (float("-inf"), ) + tuple(thresholds)) self.nanflow = nanflow.copy() super(Stack, self).__init__() self.specialize()
def CategorizeHistogram(quantity=identity): """Create a Categorize histogram for categorical features such as strings and booleans Parameters: quantity (function returning float or string): function that computes the quantity of interest from the data. pass on all values by default. If a string is given, quantity is set to identity(string), in which case that column is picked up from a pandas df. """ return Categorize.ing(quantity, Count.ing())
def SparselyProfile(binWidth, binnedQuantity, averagedQuantity, selection=unweighted, origin=0.0): """Convenience function for creating sparsely binned binwise averages.""" return Select.ing( selection, SparselyBin.ing(binWidth, binnedQuantity, Average.ing(averagedQuantity), Count.ing(), origin))
def SparselyProfileErr(binWidth, binnedQuantity, averagedQuantity, selection=unweighted, origin=0.0): """Convenience function for creating a physicist's sparsely binned "profile plot," which is a Profile with variances.""" return Select.ing( selection, SparselyBin.ing(binWidth, binnedQuantity, Deviate.ing(averagedQuantity), Count.ing(), origin))
def testFraction(self): fracking = Fraction(named("something", lambda x: x > 0.0), Count()) for _ in self.simple: fracking.fill(_) self.assertEqual(fracking.numerator.entries, 4.0) self.assertEqual(fracking.denominator.entries, 10.0) self.checkScaling(fracking) self.checkScaling(fracking.toImmutable()) self.checkJson(fracking) self.checkPickle(fracking) self.checkName(fracking)
def testIrregularlyBin(self): partitioning = IrregularlyBin([0.0, 2.0, 4.0, 6.0, 8.0], named("something", lambda x: x), Count()) for _ in self.simple: partitioning.fill(_) self.assertEqual([(k, v.entries) for k, v in partitioning.bins], [(float("-inf"), 4.0), (0.0, 3.0), (2.0, 2.0), (4.0, 0.0), (6.0, 1.0), (8.0, 0.0)]) self.checkScaling(partitioning) self.checkScaling(partitioning.toImmutable()) self.checkJson(partitioning) self.checkPickle(partitioning) self.checkName(partitioning)
def build(*ys): """Create a Stack out of pre-existing containers, which might have been aggregated on different streams. Parameters: aggregators (list of :doc:`Container <histogrammar.defs.Container>`): this function will attempt to add them, so they must also have the same binning/bounds/etc. """ from functools import reduce if not all(isinstance(y, Container) for y in ys): raise TypeError("ys must all be Containers") entries = sum(y.entries for y in ys) bins = [] for i in xrange(len(ys)): bins.append((float("nan"), reduce(lambda a, b: a + b, ys[i:]))) return Stack.ed(entries, bins, Count.ed(0.0))
def testStack(self): stacking = Stack([0.0, 2.0, 4.0, 6.0, 8.0], named("something", lambda x: x), Count()) for _ in self.simple: stacking.fill(_) self.assertEqual([(k, v.entries) for k, v in stacking.bins], [(float("-inf"), 10.0), (0.0, 6.0), (2.0, 3.0), (4.0, 1.0), (6.0, 1.0), (8.0, 0.0)]) self.checkScaling(stacking) self.checkScaling(stacking.toImmutable()) self.checkJson(stacking) self.checkPickle(stacking) self.checkName(stacking)
def testCount(self): for i in xrange(11): left, right = self.simple[:i], self.simple[i:] leftCounting = Count() rightCounting = Count() for _ in left: leftCounting.fill(_) for _ in right: rightCounting.fill(_) self.assertEqual(leftCounting.entries, len(left)) self.assertEqual(rightCounting.entries, len(right)) finalResult = leftCounting + rightCounting self.assertEqual(finalResult.entries, len(self.simple)) self.checkScaling(leftCounting) self.checkScaling(leftCounting.toImmutable()) self.checkJson(leftCounting) self.checkPickle(leftCounting) self.checkName(leftCounting)
def __init__(self, quantity=identity, value=Count()): """Create a Fraction that is capable of being filled and added. Parameters: quantity (function returning bool or float): computes the quantity of interest from the data and interprets it as a selection (multiplicative factor on weight). value (:doc:`Container <histogrammar.defs.Container>`): generates sub-aggregators for the numerator and denominator. Other parameters: entries (float): the number of entries, initially 0.0. numerator (:doc:`Container <histogrammar.defs.Container>`): the sub-aggregator of entries that pass the selection. denominator (:doc:`Container <histogrammar.defs.Container>`): the sub-aggregator of all entries. """ if value is not None and not isinstance(value, Container): raise TypeError("value ({0}) must be None or a Container".format(value)) self.entries = 0.0 self.quantity = serializable(identity(quantity) if isinstance(quantity, str) else quantity) if value is not None: self.numerator = value.zero() self.denominator = value.zero() super(Fraction, self).__init__() self.specialize()
def ing(quantity, value=Count()): """Synonym for ``__init__``.""" return Categorize(quantity, value)
def Histogram(num, low, high, quantity, selection=unweighted): """Convenience function for creating a conventional histogram.""" return Select.ing(selection, Bin.ing(num, low, high, quantity, Count.ing(), Count.ing(), Count.ing(), Count.ing()))
def ing(centers, quantity, value=Count(), nanflow=Count()): """Synonym for ``__init__``.""" return CentrallyBin(centers, quantity, value, nanflow)
def SparselyHistogram(binWidth, quantity, selection=unweighted, origin=0.0): """Convenience function for creating a sparsely binned histogram.""" return Select.ing(selection, SparselyBin.ing(binWidth, quantity, Count.ing(), Count.ing(), origin))
def ing(thresholds, quantity, value=Count(), nanflow=Count()): """Synonym for ``__init__``.""" return Stack(thresholds, quantity, value, nanflow)
def ing(quantity, value=Count()): """Synonym for ``__init__``.""" return Fraction(quantity, value)
def CategorizeHistogram(quantity, selection=unweighted): """Convenience function for creating a categorize histogram.""" return Select.ing(selection, Categorize.ing(quantity, Count.ing()))
def SparselyProfile(binWidth, binnedQuantity, averagedQuantity, selection=unweighted, origin=0.0): """Convenience function for creating sparsely binned binwise averages.""" return Select.ing(selection, SparselyBin.ing(binWidth, binnedQuantity, Average.ing(averagedQuantity), Count.ing(), origin))
def SparselyProfileErr(binWidth, binnedQuantity, averagedQuantity, selection=unweighted, origin=0.0): """Convenience function for creating a physicist's sparsely binned "profile plot," which is a Profile with variances.""" return Select.ing(selection, SparselyBin.ing(binWidth, binnedQuantity, Deviate.ing(averagedQuantity), Count.ing(), origin))
def SparselyProfileErr(binWidth, binnedQuantity, averagedQuantity, origin=0.0): """Convenience function for creating a sparsely binned profile plot This is a Profile with variances. """ return SparselyBin.ing(binWidth, binnedQuantity, Deviate.ing(averagedQuantity), Count.ing(), origin)