Ejemplo n.º 1
0
        Compatible function call with Bin and SparselyBin

        :param int max_length: maximum length of a label. Default if full length.
        :returns: array of labels
        :rtype: numpy.array
        """
        return self.bin_labels(max_length)

    def _center_from_key(self, bin_key):
        return bin_key

    @property
    def mpv(self):
        """Return bin-label of most probable value
        """
        bin_entries = self.bin_entries()
        bin_labels = self.bin_labels()

        # if two max elements are equal, this will return the element with the lowest index.
        max_idx = max(enumerate(bin_entries), key=lambda x: x[1])[0]
        bl = bin_labels[max_idx]
        return bl


# extra properties: number of dimensions and datatypes of sub-hists
Categorize.n_dim = n_dim
Categorize.datatype = datatype

# register extra methods such as plotting
Factory.register(Categorize)
Ejemplo n.º 2
0
 def hg(self, h):
     converter = self._sc._jvm.org.dianahep.histogrammar.sparksql.pyspark.AggregatorConverter()
     agg = h._sparksql(self._sc._jvm, converter)
     result = converter.histogrammar(self._jdf, agg)
     return Factory.fromJson(json.loads(result.toJsonString()))
Ejemplo n.º 3
0
    def __repr__(self):
        return "<Label values={0} size={1}>".format(self.values[0].name,
                                                    self.size)

    def __eq__(self, other):
        return isinstance(other, Label) and numeq(
            self.entries, other.entries) and self.pairs == other.pairs

    def __ne__(self, other):
        return not self == other

    def __hash__(self):
        return hash((self.entries, tuple(sorted(self.pairs.items()))))


Factory.register(Label)

# UntypedLabel


class UntypedLabel(Factory, Container, Collection):
    """Accumulate any number of aggregators of any type and label them with strings.

    Every sub-aggregator is filled with every input datum.

    This primitive simulates a directory of aggregators. For sub-directories, nest collections within the UntypedLabel.

    Note that sub-aggregators within an UntypedLabel may have *different types*. In strongly typed languages, this
    flexibility poses a problem: nested objects must be type-cast before they can be used. To collect objects of
    the *same type* with string-based look-up keys, use :doc:`Label <histogrammar.primitives.collection.Label>`.
Ejemplo n.º 4
0
            out.quantity.name = nameFromParent if name is None else name
            return out.specialize()

        else:
            raise JsonFormatException(json, "CentrallyBin")

    def __repr__(self):
        return "<CentrallyBin bins={0} size={1} nanflow={2}>".format(
            self.bins[0][1].name, len(self.bins), self.nanflow.name)

    def __eq__(self, other):
        return isinstance(
            other, CentrallyBin) and self.quantity == other.quantity and numeq(
                self.entries, other.entries
            ) and self.bins == other.bins and self.nanflow == other.nanflow

    def __ne__(self, other):
        return not self == other

    def __hash__(self):
        return hash(
            (self.quantity, self.entries, tuple(self.bins), self.nanflow))


# extra properties: number of dimensions and datatypes of sub-hists
CentrallyBin.n_dim = n_dim
CentrallyBin.datatype = datatype

# register extra methods
Factory.register(CentrallyBin)
Ejemplo n.º 5
0
            denominator = factory.fromJsonFragment(json["denominator"], subName)

            out = Fraction.ed(entries, numerator, denominator)
            out.quantity.name = nameFromParent if name is None else name
            return out.specialize()

        else:
            raise JsonFormatException(json, "Fraction")

    def __repr__(self):
        return "<Fraction values={0}>".format(self.numerator.name)

    def __eq__(self, other):
        return isinstance(other, Fraction) and numeq(self.entries, other.entries) and \
               self.quantity == other.quantity and self.numerator == other.numerator and \
               self.denominator == other.denominator

    def __ne__(self, other):
        return not self == other

    def __hash__(self):
        return hash((self.entries, self.quantity, self.numerator, self.denominator))


# extra properties: number of dimensions and datatypes of sub-hists
Fraction.n_dim = n_dim
Fraction.datatype = datatype

# register extra methods
Factory.register(Fraction)
Ejemplo n.º 6
0
            self.nanflow.name)

    def __eq__(self, other):
        return isinstance(other, Stack) and numeq(self.entries, other.entries) and self.quantity == other.quantity and \
               all(numeq(c1, c2) and v1 == v2 for (c1, v1), (c2, v2) in zip(self.bins, other.bins)) and \
               self.nanflow == other.nanflow

    def __ne__(self, other):
        return not self == other

    def __hash__(self):
        return hash((self.entries, self.quantity, self.bins, self.nanflow))

    def bin_entries(self):
        """
        Returns bin values

        :returns: numpy array with numbers of entries for all threshold bins
        :rtype: numpy.array
        """
        import numpy as np
        return np.array([v.entries for v in self.values])


# extra properties: number of dimensions and datatypes of sub-hists
Stack.n_dim = n_dim
Stack.datatype = datatype

# register extra methods
Factory.register(Stack)
Ejemplo n.º 7
0
        return isinstance(
            other, Minimize) and self.quantity == other.quantity and numeq(
                self.entries, other.entries) and numeq(self.min, other.min)

    def __ne__(self, other):
        return not self == other

    def __hash__(self):
        return hash((self.quantity, self.entries, self.min))


# extra properties: number of dimensions and datatypes of sub-hists
Minimize.n_dim = n_dim
Minimize.datatype = datatype

Factory.register(Minimize)


class Maximize(Factory, Container):
    """Find the maximum value of a given quantity. If no data are observed, the result is NaN."""
    @staticmethod
    def ed(entries, max):
        """Create a Maximize that is only capable of being added.

        Parameters:
            entries (float): the number of entries.
            max (float): the highest value of the quantity observed or NaN if no data were observed.
        """
        if not isinstance(entries, numbers.Real) and entries not in ("nan",
                                                                     "inf",
                                                                     "-inf"):
Ejemplo n.º 8
0
                        if v1i != v2i:
                            return False
                    else:
                        return False
            else:
                return False

            if v1 == "nan" and v2 == "nan" and w1 is None and w2 is None:
                pass
            elif isinstance(w1, numbers.Real) and isinstance(w2, numbers.Real):
                if not numeq(w1, w2):
                    return False
            else:
                return False

        return isinstance(other, Bag) and self.quantity == other.quantity and numeq(self.entries, other.entries)

    def __ne__(self, other):
        return not self == other

    def __hash__(self):
        return hash((self.quantity, self.entries, tuple(self.values.items()), self.range))


# extra properties: number of dimensions and datatypes of sub-hists
Bag.n_dim = n_dim
Bag.datatype = datatype

# register extra methods such as plotting
Factory.register(Bag)
Ejemplo n.º 9
0
            else:
                raise JsonFormatException(json["sum"], "Sum.sum")

            out = Sum.ed(entries, sum)
            out.quantity.name = nameFromParent if name is None else name
            return out.specialize()

        else:
            raise JsonFormatException(json, "Sum")

    def __repr__(self):
        return "<Sum sum={0}>".format(self.sum)

    def __eq__(self, other):
        return isinstance(other, Sum) and self.quantity == other.quantity and numeq(
            self.entries, other.entries) and numeq(self.sum, other.sum)

    def __ne__(self, other):
        return not self == other

    def __hash__(self):
        return hash((self.quantity, self.entries, self.sum))


# extra properties: number of dimensions and datatypes of sub-hists
Sum.n_dim = n_dim
Sum.datatype = datatype

# register extra methods
Factory.register(Sum)
Ejemplo n.º 10
0
 def checkJson(self, x):
     self.assertEqual(x.toJson(), Factory.fromJson(x.toJson()).toJson())
Ejemplo n.º 11
0
                raise JsonFormatException(json["mean"], "Average.mean")

            out = Average.ed(entries, mean)
            out.quantity.name = nameFromParent if name is None else name
            return out.specialize()

        else:
            raise JsonFormatException(json, "Average")

    def __repr__(self):
        return "<Average mean={0}>".format(self.mean)

    def __eq__(self, other):
        return isinstance(
            other, Average) and self.quantity == other.quantity and numeq(
                self.entries, other.entries) and numeq(self.mean, other.mean)

    def __ne__(self, other):
        return not self == other

    def __hash__(self):
        return hash((self.quantity, self.entries, self.mean))


# extra properties: number of dimensions and datatypes of sub-hists
Average.n_dim = n_dim
Average.datatype = datatype

# register extra methods such as plotting
Factory.register(Average)
Ejemplo n.º 12
0
    def toJsonFragment(self, suppressName):
        return floatToJson(self.entries)

    @staticmethod
    @inheritdoc(Factory)
    def fromJsonFragment(json, nameFromParent):
        if json in ("nan", "inf", "-inf") or isinstance(json, numbers.Real):
            return Count.ed(float(json))
        else:
            raise JsonFormatException(json, "Count")

    def __repr__(self):
        return "<Count {0}>".format(self.entries)

    def __eq__(self, other):
        return isinstance(other, Count) and numeq(self.entries, other.entries) and self.transform == other.transform

    def __ne__(self, other):
        return not self == other

    def __hash__(self):
        return hash((self.entries, self.transform))


# extra properties: number of dimensions and datatypes of sub-hists
Count.n_dim = n_dim
Count.datatype = datatype

# register extra methods
Factory.register(Count)
Ejemplo n.º 13
0
            else:
                raise JsonFormatException(json, "IrregularlyBin.bins")

        else:
            raise JsonFormatException(json, "IrregularlyBin")

    def __repr__(self):
        return "<IrregularlyBin values={0} thresholds=({1}) nanflow={2}>".format(
            self.bins[0][1].name, ", ".join([str(x) for x in self.thresholds]),
            self.nanflow.name)

    def __eq__(self, other):
        return isinstance(other, IrregularlyBin) and numeq(self.entries, other.entries) and \
               self.quantity == other.quantity and \
               all(numeq(c1, c2) and v1 == v2 for (c1, v1), (c2, v2) in zip(self.bins, other.bins)) and \
               self.nanflow == other.nanflow

    def __ne__(self, other):
        return not self == other

    def __hash__(self):
        return hash((self.entries, self.quantity, self.bins, self.nanflow))


# extra properties: number of dimensions and datatypes of sub-hists
IrregularlyBin.n_dim = n_dim
IrregularlyBin.datatype = datatype

# register extra methods
Factory.register(IrregularlyBin)
Ejemplo n.º 14
0
        """
        import numpy as np
        bin_edges = self.bin_edges(low, high)
        centers = [(bin_edges[i] + bin_edges[i + 1]) / 2.
                   for i in range(len(bin_edges) - 1)]
        return np.array(centers)

    @property
    def mpv(self):
        """Return bin-center of most probable value
        """
        bin_entries = self.bin_entries()
        bin_centers = self.bin_centers()

        # if two max elements are equal, this will return the element with the lowest index.
        max_idx = max(enumerate(bin_entries), key=lambda x: x[1])[0]
        bc = bin_centers[max_idx]
        return bc

    def _center_from_key(self, bin_key):
        xc = (bin_key + 0.5) * self.binWidth + self.origin
        return xc


# extra properties: number of dimensions and datatypes of sub-hists
SparselyBin.n_dim = n_dim
SparselyBin.datatype = datatype

# register extra methods such as plotting
Factory.register(SparselyBin)
Ejemplo n.º 15
0
    def compare(self, name, hnp, npdata, hpy, pydata):
        import numpy

        npdata2 = npdata.copy()

        hnp2 = hnp.copy()
        hnp3 = hnp.copy()
        hpy2 = hpy.copy()
        hpy3 = hpy.copy()

        startTime = time.time()
        hnp.fill.numpy(npdata)
        numpyTime = time.time() - startTime

        if pydata.dtype != numpy.unicode_:
            for key in npdata:
                diff = (npdata[key] != npdata2[key]) & numpy.bitwise_not(
                    numpy.isnan(npdata[key])) & numpy.bitwise_not(numpy.isnan(npdata2[key]))
                if numpy.any(diff):
                    raise AssertionError("npdata has been modified:\n{0}\n{1}\n{2}\n{3} vs {4}".format(npdata[key], npdata2[key], numpy.nonzero(
                        diff), npdata[key][numpy.nonzero(diff)[0][0]], npdata2[key][numpy.nonzero(diff)[0][0]]))

        hnp2.fill.numpy(npdata)
        hnp3.fill.numpy(npdata)
        hnp3.fill.numpy(npdata)
        assert (hnp + hnp2) == hnp3
        assert (hnp2 + hnp) == hnp3
        assert (hnp + hnp.zero()) == hnp2
        assert (hnp.zero() + hnp) == hnp2

        startTime = time.time()
        for d in pydata:
            if isinstance(d, numpy.unicode_):
                d = str(d)
            else:
                d = float(d)
            hpy.fill(d)
        pyTime = time.time() - startTime

        for h in [hpy2, hpy3, hpy3]:
            for d in pydata:
                if isinstance(d, numpy.unicode_):
                    d = str(d)
                else:
                    d = float(d)
                h.fill(d)

        assert (hpy + hpy) == hpy3
        assert (hpy + hpy2) == hpy3
        assert (hpy2 + hpy) == hpy3
        assert (hpy + hpy.zero()) == hpy2
        assert (hpy.zero() + hpy) == hpy2

        hnpj = json.dumps(hnp.toJson())
        hpyj = json.dumps(hpy.toJson())

        if Factory.fromJson(hnp.toJson()) != Factory.fromJson(hpy.toJson()):
            raise AssertionError("\n numpy: {0}\npython: {1}".format(hnpj, hpyj))
        else:
            sys.stderr.write("{0:45s} | numpy: {1:.3f}ms python: {2:.3f}ms = {3:g}X speedup\n".format(
                name, numpyTime*1000, pyTime*1000, self.twosigfigs(pyTime/numpyTime)))

        assert Factory.fromJson((hnp + hnp2).toJson()) == Factory.fromJson((hpy + hpy2).toJson())
        assert Factory.fromJson(hnp3.toJson()) == Factory.fromJson(hpy3.toJson())