Ejemplo n.º 1
0
def test_simfactory_factory_split(sim_factory):
    """Test function splitting is done properly."""
    # With yields
    syst_fac = ToyRandomizer(sim_factory)
    assert list(syst_fac._gen_pdfs.keys()) == ['label1', 'label2']
    assert len(syst_fac._gen_pdfs['label1']) == 2
    assert syst_fac._gen_pdfs['label1'][0].expectedEvents(
        list_to_rooargset(
            sim_factory.get_observables())) == 999
    assert syst_fac._gen_pdfs['label1'][1].expectedEvents(
        list_to_rooargset(
            sim_factory.get_observables())) == 320
    assert len(syst_fac._gen_pdfs['label2']) == 1
    assert syst_fac._gen_pdfs['label2'][0].expectedEvents(
        list_to_rooargset(
            sim_factory.get_observables())) == 231
Ejemplo n.º 2
0
 def get_constraints(self):
     child_constraints = list_to_rooargset(self._constraints)
     for child in self.get_children().values():
         tmp_constraints = child.get_constraints()
         if tmp_constraints:
             child_constraints = ROOT.RooArgSet(child_constraints,
                                                child.get_constraints())
     return child_constraints
Ejemplo n.º 3
0
def test_prodfactory_split(prod_factory):
    """Test function splitting is done properly."""
    syst_fac = ToyRandomizer(prod_factory)
    assert list(syst_fac._gen_pdfs.keys()) == [None]
    assert len(syst_fac._gen_pdfs[None]) == 1
    assert syst_fac._gen_pdfs[None][0].expectedEvents(
        list_to_rooargset(
            prod_factory.get_observables())) == 999
Ejemplo n.º 4
0
def test_sumfactory_factory_split(sum_factory, sum_factory_frac):
    """Test function splitting is done properly."""
    # With yields
    syst_fac = ToyRandomizer(sum_factory)
    assert list(syst_fac._gen_pdfs.keys()) == [None]
    assert len(syst_fac._gen_pdfs[None]) == 2
    assert syst_fac._gen_pdfs[None][0].expectedEvents(
        list_to_rooargset(
            sum_factory.get_observables())) == 999
    assert syst_fac._gen_pdfs[None][1].expectedEvents(
        list_to_rooargset(
            sum_factory.get_observables())) == 999
    # With fraction
    syst_fac = ToyRandomizer(sum_factory_frac, {'yield': 100})
    assert list(syst_fac._gen_pdfs.keys()) == [None]
    assert len(syst_fac._gen_pdfs[None]) == 1
    assert syst_fac._gen_pdfs[None][0].expectedEvents(
        list_to_rooargset(
            sum_factory.get_observables())) == 100
Ejemplo n.º 5
0
    def get_dataset(self, randomize=True):
        """Get dataset generated from the input model.

        If an acceptance was given on initialization, accept-reject is applied on the dataset,
        and an extra variable representing the inverse of the per-event weight (`fit_weight`)
        is added as weight.

        Arguments:
            randomize (bool, optional): Randomize the parameters? Defaults to `True`.

        Return:
            `ROOT.RooDataSet`.

        """
        import ROOT

        # TODO: Add weights?
        if randomize:
            logger.debug("Applying randomization")
            self.randomize()
        obs = list_to_rooargset(self._model.get_observables())
        datasets_to_merge = []
        cats = list_to_rooarglist(self._model.get_category_vars())
        for label, pdf_list in self._gen_pdfs.items():
            if cats:
                for lab_num, lab in enumerate(label.split(',')):
                    cats[lab_num].setLabel(lab)
            for pdf in pdf_list:
                logger.debug("Generating PDF -> %s", pdf.GetName())
                if self._gen_acceptance:
                    # TODO: Fixed yields
                    yield_to_generate = poisson.rvs(pdf.expectedEvents(obs))
                    pandas_dataset = None
                    while yield_to_generate:
                        events = self._gen_acceptance.apply_accept_reject(
                            pandas_from_dataset(
                                pdf.generate(obs, yield_to_generate * 2)))
                        # Sample if the dataset is too large
                        if events.shape[0] > yield_to_generate:
                            events = events.sample(yield_to_generate)
                        # Merge with existing
                        if not pandas_dataset:
                            pandas_dataset = events
                        else:
                            pandas_dataset = pandas_dataset.append(events, ignore_index=True)
                        yield_to_generate -= len(events)
                    logger.debug("Adding fitting weights")
                    pandas_dataset['fit_weight'] = self._fit_acceptance.get_fit_weights(pandas_dataset)
                    dataset = dataset_from_pandas(pandas_dataset, "GenData", "GenData", weight_var='fit_weight')
                else:
                    dataset = pdf.generate(obs, ROOT.RooFit.Extended(True))
                if cats:
                    dataset.addColumns(cats)
                datasets_to_merge.append(dataset)
        return merge_root(datasets_to_merge, 'GenData', 'GenData')
Ejemplo n.º 6
0
def test_factory_split(factory, factory_with_yield):
    """Test function splitting is done properly."""
    try:
        ToyRandomizer(factory)
    except ValueError:
        pass
    # Manual yield
    syst_fac = ToyRandomizer(factory, {'yield': 100})
    assert list(syst_fac._gen_pdfs.keys()) == [None]
    assert len(syst_fac._gen_pdfs[None]) == 1
    assert syst_fac._gen_pdfs[None][0].expectedEvents(
        list_to_rooargset(
            factory.get_observables())) == 100
    # Yield from config
    syst_fac = ToyRandomizer(factory_with_yield)
    assert list(syst_fac._gen_pdfs.keys()) == [None]
    assert len(syst_fac._gen_pdfs[None]) == 1
    assert syst_fac._gen_pdfs[None][0].expectedEvents(
        list_to_rooargset(
            factory.get_observables())) == 1000
Ejemplo n.º 7
0
    def __init__(self, factories, children_yields, parameters=None):
        """Initialize.

        In this case, the children are a map of PDF name -> Factory.

        Raise:
            InvalidRequestError: When the observables of the factories are incompatible.
            KeyError: On configuration error.

        """
        # Check observable compatibility
        if len({
                tuple([obs.GetName() for obs in factory.get_observables()])
                for factory in factories.values()
        }) != 1:
            raise InvalidRequestError("Incompatible observables")
        # Check children yields type
        if not isinstance(children_yields, OrderedDict):
            raise ValueError("children_yields must be an ordered dictionary")
        super(SumPhysicsFactory, self).__init__({}, parameters)
        # Set children
        self._children = factories
        # Set observables
        observables = {
            obs.getStringAttribute('originalName'): obs
            for obs in list(self._children.values())[0].get_observables()
        }
        for obs_name, obs in observables.items():
            for child in list(self._children.values())[1:]:
                child.set_observable(obs_name, obs=obs)
        # Set yields
        yield_ = None
        if parameters and 'yield' in parameters:
            yield_, constraint = parameters.pop('yield')
        yield_values = [
            child_yield for child_yield, _ in children_yields.values()
        ]
        if len(factories) == len(children_yields):  # Extended
            if yield_ is not None:
                raise KeyError("Specified yield on a sum of RooExtendPdf")
            self['Yield'] = ROOT.RooAddition("Yield", "Yield",
                                             list_to_rooarglist(yield_values))
            self._constraints.update(
                {constraint
                 for _, constraint in children_yields.values()})
            for child_name, child in self._children.items():
                child.set_yield_var(children_yields[child_name])
        elif (len(factories) - len(children_yields)) == 1:
            # Check order is correct
            if list(self._children.keys())[-1] in children_yields.keys():
                logger.error(
                    "The last child should not be in `children_keys` to ensure consistency."
                )
                raise ValueError("Wrong PDF ordering")
            # Store the fractions and propagate
            for yield_val in yield_values:
                if yield_val.getVal() > 1:
                    raise ValueError(
                        "Specified a fraction larger than 1 -> {}".format(
                            yield_val.GetName()))
                # Not very good heuristics
                if yield_val.getStringAttribute('shared') != 'true':
                    yield_val.SetName(yield_val.GetName().replace(
                        'Yield', 'Fraction'))
                    yield_val.SetTitle(yield_val.GetTitle().replace(
                        'Yield', 'Fraction'))
            self['Fractions'] = yield_values
            for child_name, child in self._children.items():
                if child_name in children_yields:
                    child_yield, child_constraint = children_yields[child_name]
                    child['Fraction'] = child_yield
                    child._constraints.add(child_constraint)
                else:
                    # Need no rename because RooFracRemainder needs a RooArgSet and there will be clashes
                    # between vars named 'Fraction'. It's stupid, since the name is not used after.
                    for yield_num, yield_val in enumerate(yield_values):
                        yield_val.SetName('{}_{}'.format(
                            yield_val.GetName(), yield_num))
                    child['Fraction'] = ROOT.RooFracRemainder(
                        "Fraction", "Fraction",
                        list_to_rooargset(yield_values))
                    child._constraints.update({
                        constraint
                        for _, constraint in children_yields.values()
                        if constraint
                    })
                    # Put names back where they belong
                    for yield_num, yield_val in enumerate(yield_values):
                        yield_val.SetName('_'.join(
                            yield_val.GetName().split('_')[:-1]))
            # Final rename
            if yield_ is not None:
                self.set_yield_var((yield_, constraint))
        else:
            raise KeyError("Badly specified yields/fractions")
Ejemplo n.º 8
0
def dataset_from_pandas(frame, name, title, var_list=None, weight_var=None, categories=None, ranges=None):
    """Build RooDataset from a Pandas DataFrame.

    Arguments:
        frame (pandas.DataFrame): DataFrame to convert.
        name (str): RooDataSet name.
        title (str): RooDataSet title.
        var_list (list[str], optional): List of variables to add to the dataset.
            If not given, all variables are converted.
        weight_var (str, optional): Assign the given variable name as weight.
            Defaults to None.
        categories (list[`ROOT.RooCategory`], optional): Categories to separate the data in.
            Their name must correspond to a column in the `frame`.
        ranges (dict, optional): Variables to set a range for. Defaults to `None`, in which case
            all variables are unbounded.

    Return:
        ROOT.RooDataSet: Frame converted to dataset.

    Raise:
        KeyError: If the weight_var or the category is not present in `frame`.

    """
    def fill_dataset(name, title, var_set, input_data):
        """Fill a dataset from a pandas DataFrame.

        Arguments:
            name (str): Name of the dataset.
            title (str): Title of the dataset.
            var_set (ROOT.RooArgSet): Variables in the dataset.
            input_data (pandas.DataFrame): Input data.

        Return:
            ROOT.RooDataSet: Output data set.

        """
        dataset = ROOT.RooDataSet(name, title, var_set)
        for _, row in input_data.iterrows():
            for var_name in var_names:
                if isinstance(row[var_name], (float, int)):
                    var_set.setRealValue(var_name, row[var_name])
            for cat_name in cat_names:
                var_set.setCatLabel(cat_name, row[cat_name])
            dataset.add(var_set)
        return dataset

    var_names = var_list if var_list else list(frame.columns)
    if weight_var and weight_var not in frame.columns:
        raise KeyError("Cannot find weight variable -> {}".format(weight_var))
    cat_names = []
    roovar_list = []
    if categories:
        for category in categories:
            cat_var = category.GetName()
            if cat_var not in frame.columns:
                raise KeyError("Cannot find category variable -> {}".format(cat_var))
            roovar_list.append(category)
            if cat_var in var_names:
                var_names.pop(var_names.index(cat_var))
            cat_names.append(cat_var)
        super_category = 'x'.join(cat.GetName() for cat in categories)
        if super_category in var_names:
            logger.warning("You asked for variable %s but this is the name of a SuperCategory. Ignoring it.",
                           super_category)
            var_names.pop(var_names.index(super_category))
    roovar_list.extend([ROOT.RooRealVar(var_name, var_name, 0.0) for var_name in var_names])
    dataset_set = list_to_rooargset(roovar_list)
    if ranges:
        for var_name, (min_, max_) in ranges.items():
            dataset_set[var_name].setMin(min_)
            dataset_set[var_name].setMax(max_)
    dataset = fill_dataset(name, title, dataset_set, frame)
    if weight_var:
        dataset = ROOT.RooDataSet(name, title, dataset_set,
                                  ROOT.RooFit.Import(dataset),
                                  ROOT.RooFit.WeightVar(weight_var))
    return dataset
Ejemplo n.º 9
0
def generate(physics_factory, n_events):
    """Perform generation of toys.

    Note:
        If the factory is simultaneous, events are generated in steps.
        For that reason, the configuration for 'gen/nevents' must be a dictionary
        of {label -> nevents} keys.

    Arguments:
        physics_factory (`analysis.physics.PhysicsFactory`): Physics factory object to get
            observables, parameters and PDFs from.
        n_events (dict, int): Number of events to generate.

    Return:
        `pandas.DataFrame`: Generated events.

    Raise:
        ValueError: If the number of events to generate is not properly specified.
        KeyError: If an unknown simultaneous category label is requested.

    """
    def generate_events(gen_pdf, obs_set, n_events):
        """Generate events according to the given PDF.

        Result is converted to a pandas data frame.

        Arguments:
            gen_pdf (`ROOT.RooAbsPdf`): PDF to use for generation.
            obs_set (`ROOT.RooArgSet`): Observables to generate.
            n_events (int): Number of events to generate.

        Return:
            `pandas.DataFrame`: Generated events.

        """
        data = gen_pdf.generate(obs_set, n_events)
        dataframe = pandas_from_dataset(data)
        destruct_object(data)
        return dataframe

    observables = list_to_rooargset(physics_factory.get_observables())
    if physics_factory.is_simultaneous():
        if not isinstance(n_events, dict):
            raise ValueError(
                "Generation of a simultaneous requires a dictionary for the number of events."
            )
        output_dataset = None
        for label, n_events_label in n_events.items():
            label_factory = physics_factory.get_children().get(label)
            if not label_factory:
                raise KeyError("Unknown label -> {}".format(label))
            label_df = generate_events(
                label_factory.get_pdf("GenPdf_{}".format(label),
                                      "GenPdf_{}".format(label)), observables,
                n_events_label).assign(category=label)
            if output_dataset is None:
                output_dataset = label_df
            else:
                output_dataset = output_dataset.append(label_df)
        return output_dataset
    else:
        if not isinstance(n_events, int):
            raise ValueError("Number of events to generate is not an integer")
        return generate_events(physics_factory.get_pdf("GenPdf", "GenPdf"),
                               observables, n_events)