Python properties Examples, owls_hep.expression.properties Python Examples

Example #1

0

Show file

File: histogramming.py Project: havoc-io/owls-hep

def _parallel_batcher(function, args_kwargs):
    # Create a combined set of properties necessary for all calls
    # NOTE: We don't need to handle patch properties because those are handled
    # internally by the process and we're only dealing with one process in
    # batch mode
    all_properties = set()
    for args, kwargs in args_kwargs:
        # Extract region and expressions
        _, region, expressions, _ = _parallel_extractor(*args, **kwargs)

        # Add region properties
        selection, weight = region.selection_weight()
        all_properties.update(properties(selection))
        all_properties.update(properties(weight))

        # Add expression properties
        all_properties.update(*(properties(e) for e in expressions))

    # Go through all args/kwargs pairs and call the function
    for args, kwargs in args_kwargs:
        # Call the functions with load hints
        kwargs['load_hints'] = all_properties
        function(*args, **kwargs)

    # Force garbage collection
    gc.collect()

    # Clear the load caches of the caching loader
    _caching_loader.caches.clear()

Example #2

0

Show file

def _parallel_batcher(function, args_kwargs):
    # Create a combined set of properties necessary for all calls
    # NOTE: We don't need to handle patch properties because those are handled
    # internally by the process and we're only dealing with one process in
    # batch mode
    all_properties = set()
    for args, kwargs in args_kwargs:
        # Extract region and expressions
        _, region, expressions, _ = _parallel_extractor(*args, **kwargs)

        # Add region properties
        selection, weight = region.selection_weight()
        all_properties.update(properties(selection))
        all_properties.update(properties(weight))

        # Add expression properties
        all_properties.update(*(properties(e) for e in expressions))

    # Go through all args/kwargs pairs and call the function
    for args, kwargs in args_kwargs:
        # Call the functions with load hints
        kwargs['load_hints'] = all_properties
        function(*args, **kwargs)

    # Force garbage collection
    gc.collect()

    # Clear the load caches of the caching loader
    _caching_loader.caches.clear()

Example #3

0

Show file

def _count(process, region):
    """Computes the weighted event count of a process in a region.

    Args:
        process: The process whose events should be counted
        region: The region whose weighting/selection should be applied

    Returns:
        The weighted event count in the region.
    """
    # Compute weighted selection
    selection, weight = region.selection_weight()

    # Compute the weighted selection properties
    required_properties = set()
    required_properties.update(properties(selection))
    required_properties.update(properties(weight))

    # Load data
    data = process.load(required_properties)

    # Apply selection if specified
    if selection != '':
        data = data[data.eval(normalized(selection))]

    # Compute the weighted or unweighted count
    if weight != '':
        return data.eval(normalized(weight)).sum()
    else:
        return len(data)

Example #4

0

Show file

File: process.py Project: nhaubrich/owls-hep

    def properties(self):
        """Returns a Python set of properties of the data required to evaluate
        the patch.

        Returns:
            A Python set containing strings of the required patch properties.
        """
        return properties(self._selection)

Example #5

0

Show file

File: process.py Project: havoc-io/owls-hep

    def properties(self):
        """Returns a Python set of properties of the data required to evaluate
        the patch.

        Returns:
            A Python set containing strings of the required patch properties.
        """
        return properties(self._selection)

Example #6

0

Show file

File: histogramming.py Project: havoc-io/owls-hep

def _histogram(process, region, expressions, binnings, load_hints = None):
    """Generates a ROOT histogram of a distribution a process in a region.

    Args:
        process: The process whose events should be histogrammed
        region: The region whose weighting/selection should be applied
        expressions: A tuple of expression strings
        binnings: A tuple of Binning instances
        distribution: The distribution to histogram
        load_hints: If provided, this argument will hint to _histogram that it
            should load additional properties when loading data and that it
            should use the _caching_loader.  This facilitates cached loading of
            data across multiple calls to _histogram with the same process.
            This is particularly useful for parallelized histogramming, where
            the jobs are grouped by process.

    Returns:
        A ROOT histogram, of the TH1F, TH2F, or TH3F variety.
    """
    # Compute weighted selection
    selection, weight = region.selection_weight()

    # Expand binnings to edge lists
    edges = tuple((b.edges() for b in binnings))

    # Load data
    if load_hints is not None:
        # If load_hints have been provided, just use those with the
        # _caching_loader
        data = _caching_loader(process, load_hints)
    else:
        # Otherwise manually create the set of necessary properties
        # NOTE: All we need to do are region and expression properties - patch
        # properties are handled internally by the process
        required_properties = set()

        # Add those properties necessary to evaluate region selection/weight
        required_properties.update(properties(selection))
        required_properties.update(properties(weight))

        # Add in those properties necessary to evaluate expressions
        required_properties.update(*(properties(e) for e in expressions))

        # Load data
        data = process.load(required_properties)

    # Apply selection if specified
    if selection != '':
        data = data[data.eval(normalized(selection))]

    # Evaluate each variable expression, converting the resultant Pandas Series
    # to a NumPy array
    # HACK: TH1::FillN only supports 64-bit floating point values, so convert
    # things.  Would be nice to find a better approach.
    samples = tuple((data.eval(normalized(e)).values.astype(numpy.float64)
                     for e
                     in expressions))

    # Evaluate weights, converting the resultant Pandas Series to a NumPy array
    # HACK: TH1::FillN only supports 64-bit floating point values, so convert
    # things.  Would be nice to find a better approach.
    if weight != '':
        weights = data.eval(normalized(weight)).values.astype(numpy.float64)
    else:
        weights = nullptr

    # Create a unique name and title for the histogram
    name = title = uuid4().hex

    # Create a histogram based on dimensionality
    # NOTE: When specifying explicit bin edges, you aren't passing a length
    # argument, you are passing an nbins argument, which is length - 1, hence
    # the code below.  If you pass length for n bins, then you'll get garbage
    # for the last bin's upper edge and things go nuts in ROOT.
    dimensionality = len(expressions)
    count = len(data)
    if dimensionality == 1:
        # Create a one-dimensional histogram
        result = TH1F(name, title,
                      len(edges[0]) - 1, edges[0])

        # Fill the histogram
        # HACK: TH1::FillN will die if N == 0
        if count > 0:
            result.FillN(count, samples[0], weights)
    elif dimensionality == 2:
        # Create a two-dimensional histogram
        result = TH2F(name, title,
                      len(edges[0]) - 1, edges[0],
                      len(edges[1]) - 1, edges[1])

        # Fill the histogram
        # HACK: TH1::FillN will die if N == 0
        if count > 0:
            result.FillN(count, samples[0], samples[1], weights)
    elif dimensionality == 3:
        # Create a three-dimensional histogram
        result = TH3F(name, title,
                      len(edges[0]) - 1, edges[0],
                      len(edges[1]) - 1, edges[1],
                      len(edges[2]) - 1, edges[2])

        # HACK: TH3 doesn't have a FillN method, so we have to do things the
        # slow way.
        # TODO: We may want to put a warning about this slowness
        if weights == nullptr:
            weights = numpy.ones(count, dtype = numpy.float64)
        for x, y, z, w in zip(samples[0], samples[1], samples[2], weights):
            result.Fill(x, y, z, w)
    else:
        raise ValueError('ROOT can only histograms 1 - 3 dimensions')

    # All done
    return result

Example #7

0

Show file

def _histogram(process, region, expressions, binnings, load_hints=None):
    """Generates a ROOT histogram of a distribution a process in a region.

    Args:
        process: The process whose events should be histogrammed
        region: The region whose weighting/selection should be applied
        expressions: A tuple of expression strings
        binnings: A tuple of Binning instances
        distribution: The distribution to histogram
        load_hints: If provided, this argument will hint to _histogram that it
            should load additional properties when loading data and that it
            should use the _caching_loader.  This facilitates cached loading of
            data across multiple calls to _histogram with the same process.
            This is particularly useful for parallelized histogramming, where
            the jobs are grouped by process.

    Returns:
        A ROOT histogram, of the TH1F, TH2F, or TH3F variety.
    """
    # Compute weighted selection
    selection, weight = region.selection_weight()

    # Expand binnings to edge lists
    edges = tuple((b.edges() for b in binnings))

    # Load data
    if load_hints is not None:
        # If load_hints have been provided, just use those with the
        # _caching_loader
        data = _caching_loader(process, load_hints)
    else:
        # Otherwise manually create the set of necessary properties
        # NOTE: All we need to do are region and expression properties - patch
        # properties are handled internally by the process
        required_properties = set()

        # Add those properties necessary to evaluate region selection/weight
        required_properties.update(properties(selection))
        required_properties.update(properties(weight))

        # Add in those properties necessary to evaluate expressions
        required_properties.update(*(properties(e) for e in expressions))

        # Load data
        data = process.load(required_properties)

    # Apply selection if specified
    if selection != '':
        data = data[data.eval(normalized(selection))]

    # Evaluate each variable expression, converting the resultant Pandas Series
    # to a NumPy array
    # HACK: TH1::FillN only supports 64-bit floating point values, so convert
    # things.  Would be nice to find a better approach.
    samples = tuple((data.eval(normalized(e)).values.astype(numpy.float64)
                     for e in expressions))

    # Evaluate weights, converting the resultant Pandas Series to a NumPy array
    # HACK: TH1::FillN only supports 64-bit floating point values, so convert
    # things.  Would be nice to find a better approach.
    if weight != '':
        weights = data.eval(normalized(weight)).values.astype(numpy.float64)
    else:
        weights = nullptr

    # Create a unique name and title for the histogram
    name = title = uuid4().hex

    # Create a histogram based on dimensionality
    # NOTE: When specifying explicit bin edges, you aren't passing a length
    # argument, you are passing an nbins argument, which is length - 1, hence
    # the code below.  If you pass length for n bins, then you'll get garbage
    # for the last bin's upper edge and things go nuts in ROOT.
    dimensionality = len(expressions)
    count = len(data)
    if dimensionality == 1:
        # Create a one-dimensional histogram
        result = TH1F(name, title, len(edges[0]) - 1, edges[0])

        # Fill the histogram
        # HACK: TH1::FillN will die if N == 0
        if count > 0:
            result.FillN(count, samples[0], weights)
    elif dimensionality == 2:
        # Create a two-dimensional histogram
        result = TH2F(name, title,
                      len(edges[0]) - 1, edges[0],
                      len(edges[1]) - 1, edges[1])

        # Fill the histogram
        # HACK: TH1::FillN will die if N == 0
        if count > 0:
            result.FillN(count, samples[0], samples[1], weights)
    elif dimensionality == 3:
        # Create a three-dimensional histogram
        result = TH3F(name, title,
                      len(edges[0]) - 1, edges[0],
                      len(edges[1]) - 1, edges[1],
                      len(edges[2]) - 1, edges[2])

        # HACK: TH3 doesn't have a FillN method, so we have to do things the
        # slow way.
        # TODO: We may want to put a warning about this slowness
        if weights == nullptr:
            weights = numpy.ones(count, dtype=numpy.float64)
        for x, y, z, w in zip(samples[0], samples[1], samples[2], weights):
            result.Fill(x, y, z, w)
    else:
        raise ValueError('ROOT can only histograms 1 - 3 dimensions')

    # All done
    return result

Example #8

0

Show file

File: test_expression.py Project: crangelsmith/owls-hep

 def test_properties(self):
     # Check a simple expression with duplicates
     self.assertEqual(properties("electron_pt > (x * x)"), set(["electron_pt", "x"]))

Example #9

0

Show file

File: test_expression.py Project: spiiph/owls-hep

 def test_properties(self):
     # Check a simple expression with duplicates
     self.assertEqual(properties('electron_pt > (x * x)'),
                      set(['electron_pt', 'x']))