def set_yield_var(self, yield_): if 'Fractions' not in self._objects: logger.warning( "Trying to set the yield of an Extended RooAddPdf. Ignoring.") elif 'Yield' not in self._objects: yield_ = self._create_parameter('Yield', yield_) for child in self._children.values(): if 'Yield' in child: raise InvalidRequestError( "Inconsistent state: trying to set the yield of an already configured Factory." ) # Again, not very good heuristics child.set_yield_var( ROOT.RooProduct( child['Fraction'].GetName().replace( 'Fraction', 'Yield'), child['Fraction'].GetTitle().replace( 'Fraction', 'Yield'), list_to_rooarglist([yield_, child['Fraction']]))) else: if isinstance(self['Yield'], ROOT.RooRealVar): if isinstance(yield_, ROOT.RooRealVar): self['Yield'].setVal(yield_.getVal()) self['Yield'].SetName(yield_.GetName()) self['Yield'].SetTitle(yield_.GetTitle()) elif isinstance(yield_, (float, int)): self['Yield'].setVal(yield_) else: logger.warning( "Trying to set a yield that cannot be overridden")
def get_dataset(self, randomize=True): """Get dataset generated from the input model. If an acceptance was given on initialization, accept-reject is applied on the dataset, and an extra variable representing the inverse of the per-event weight (`fit_weight`) is added as weight. Arguments: randomize (bool, optional): Randomize the parameters? Defaults to `True`. Return: `ROOT.RooDataSet`. """ import ROOT # TODO: Add weights? if randomize: logger.debug("Applying randomization") self.randomize() obs = list_to_rooargset(self._model.get_observables()) datasets_to_merge = [] cats = list_to_rooarglist(self._model.get_category_vars()) for label, pdf_list in self._gen_pdfs.items(): if cats: for lab_num, lab in enumerate(label.split(',')): cats[lab_num].setLabel(lab) for pdf in pdf_list: logger.debug("Generating PDF -> %s", pdf.GetName()) if self._gen_acceptance: # TODO: Fixed yields yield_to_generate = poisson.rvs(pdf.expectedEvents(obs)) pandas_dataset = None while yield_to_generate: events = self._gen_acceptance.apply_accept_reject( pandas_from_dataset( pdf.generate(obs, yield_to_generate * 2))) # Sample if the dataset is too large if events.shape[0] > yield_to_generate: events = events.sample(yield_to_generate) # Merge with existing if not pandas_dataset: pandas_dataset = events else: pandas_dataset = pandas_dataset.append(events, ignore_index=True) yield_to_generate -= len(events) logger.debug("Adding fitting weights") pandas_dataset['fit_weight'] = self._fit_acceptance.get_fit_weights(pandas_dataset) dataset = dataset_from_pandas(pandas_dataset, "GenData", "GenData", weight_var='fit_weight') else: dataset = pdf.generate(obs, ROOT.RooFit.Extended(True)) if cats: dataset.addColumns(cats) datasets_to_merge.append(dataset) return merge_root(datasets_to_merge, 'GenData', 'GenData')
def get_unbound_pdf(self, name, title): if 'Fractions' not in self: logger.warning( "Requested non-extended PDF on a RooAddPdf made of ExtendedPdf. " "Returning an extended PDF") return self.get_extended_pdf(name, title) pdfs = ROOT.RooArgList() for child_name, child in self._children.items(): new_name = self._add_superscript(name, child_name) pdfs.add(child.get_pdf(new_name, new_name)) return ROOT.RooAddPdf(name, title, pdfs, list_to_rooarglist(self['Fractions']))
def __init__(self, factories, children_yields, parameters=None): """Initialize. In this case, the children are a map of PDF name -> Factory. Raise: InvalidRequestError: When the observables of the factories are incompatible. KeyError: On configuration error. """ # Check observable compatibility if len({ tuple([obs.GetName() for obs in factory.get_observables()]) for factory in factories.values() }) != 1: raise InvalidRequestError("Incompatible observables") # Check children yields type if not isinstance(children_yields, OrderedDict): raise ValueError("children_yields must be an ordered dictionary") super(SumPhysicsFactory, self).__init__({}, parameters) # Set children self._children = factories # Set observables observables = { obs.getStringAttribute('originalName'): obs for obs in list(self._children.values())[0].get_observables() } for obs_name, obs in observables.items(): for child in list(self._children.values())[1:]: child.set_observable(obs_name, obs=obs) # Set yields yield_ = None if parameters and 'yield' in parameters: yield_, constraint = parameters.pop('yield') yield_values = [ child_yield for child_yield, _ in children_yields.values() ] if len(factories) == len(children_yields): # Extended if yield_ is not None: raise KeyError("Specified yield on a sum of RooExtendPdf") self['Yield'] = ROOT.RooAddition("Yield", "Yield", list_to_rooarglist(yield_values)) self._constraints.update( {constraint for _, constraint in children_yields.values()}) for child_name, child in self._children.items(): child.set_yield_var(children_yields[child_name]) elif (len(factories) - len(children_yields)) == 1: # Check order is correct if list(self._children.keys())[-1] in children_yields.keys(): logger.error( "The last child should not be in `children_keys` to ensure consistency." ) raise ValueError("Wrong PDF ordering") # Store the fractions and propagate for yield_val in yield_values: if yield_val.getVal() > 1: raise ValueError( "Specified a fraction larger than 1 -> {}".format( yield_val.GetName())) # Not very good heuristics if yield_val.getStringAttribute('shared') != 'true': yield_val.SetName(yield_val.GetName().replace( 'Yield', 'Fraction')) yield_val.SetTitle(yield_val.GetTitle().replace( 'Yield', 'Fraction')) self['Fractions'] = yield_values for child_name, child in self._children.items(): if child_name in children_yields: child_yield, child_constraint = children_yields[child_name] child['Fraction'] = child_yield child._constraints.add(child_constraint) else: # Need no rename because RooFracRemainder needs a RooArgSet and there will be clashes # between vars named 'Fraction'. It's stupid, since the name is not used after. for yield_num, yield_val in enumerate(yield_values): yield_val.SetName('{}_{}'.format( yield_val.GetName(), yield_num)) child['Fraction'] = ROOT.RooFracRemainder( "Fraction", "Fraction", list_to_rooargset(yield_values)) child._constraints.update({ constraint for _, constraint in children_yields.values() if constraint }) # Put names back where they belong for yield_num, yield_val in enumerate(yield_values): yield_val.SetName('_'.join( yield_val.GetName().split('_')[:-1])) # Final rename if yield_ is not None: self.set_yield_var((yield_, constraint)) else: raise KeyError("Badly specified yields/fractions")
def get_root_from_root_file(file_name, tree_name, kwargs): """Load a ROOT tree into a `ROOT.RooDataSet`. Needed keys in `kwargs` are: + `name`: Name of the `RooDataSet`. + `title`: Title of the `RooDataSet`. Optional keys are: + `variables`: List of variables to load. + `selection`: Selection to apply. + `ranges`: Range to apply to some variables. Arguments: file_name (str): File to load. tree_name (str): Tree to load. kwargs (dict): Extra configuration. Return: ROOT.RooDataSet: ROOT file converted to RooDataSet. Raise: KeyError: If there are errors in `kwargs`. ValueError: If the requested variables cannot be found in the input file. OSError: If the ROOT file cannot be found. """ def get_list_of_leaves(tree): """Get list of leave names from a tree matching a certain regex. Arguments: tree (`ROOT.TTree`): Tree to extract the leaves from. Return: list: Leaves of the tree. """ object_list = tree.GetListOfLeaves() it = object_list.MakeIterator() output = set() for _ in range(object_list.GetSize()): obj = it.Next() if obj: output.add(obj.GetName()) return output logger.debug("Loading ROOT file in RooDataSet format -> %s:%s", file_name, tree_name) if not os.path.exists(file_name): raise OSError("Cannot find input file -> {}".format(file_name)) try: name = kwargs['name'] title = kwargs.get('title', name) except KeyError as error: raise KeyError("Missing configuration key -> {}".format(error)) tfile = ROOT.TFile.Open(file_name) tree = tfile.Get(tree_name) if not tree: raise KeyError( "Cannot find tree in input file -> {}".format(tree_name)) leaves = get_list_of_leaves(tree) variables = set(kwargs.get('variables', leaves)) # Acceptance if 'acceptance' in kwargs: raise NotImplementedError( "Acceptance weights are not implemented for ROOT files") # Check weights try: weight_var, weights_to_normalize, weights_not_to_normalize = _analyze_weight_config( kwargs) except KeyError: raise KeyError("Badly specified weights") if variables and weight_var: variables = set(variables) | set(weights_to_normalize) | set( weights_not_to_normalize) # Crosscheck leaves if variables - leaves: raise ValueError("Cannot find leaves in input -> {}".format(variables - leaves)) selection = kwargs.get('selection') leave_set = ROOT.RooArgSet() leave_list = [] if selection: selection_expr = formulate.from_root(selection) for var in selection_expr.variables.union(variables): leave_list.append(ROOT.RooRealVar(var, var, 0.0)) leave_set.add(leave_list[-1]) name = ''.join(random.SystemRandom().choice(string.ascii_letters + string.digits) for _ in range(10)) temp_ds = ROOT.RooDataSet(name, name, leave_set, ROOT.RooFit.Import(tree), ROOT.RooFit.Cut(selection)) destruct_object(tree) tree = temp_ds var_set = ROOT.RooArgSet() var_list = {} for var in variables: var_list[var] = ROOT.RooRealVar(var, var, 0.0) var_set.add(var_list[var]) if kwargs.get('ranges'): for var_name, range_val in kwargs['ranges'].items(): if var_name not in var_list: raise KeyError( "Range specified for a variable not included in the dataset -> {}" .format(var_name)) try: if isinstance(range_val, str): min_, max_ = range_val.split() else: min_, max_ = range_val except ValueError: raise KeyError( "Malformed range specification for {} -> {}".format( var_name, range_val)) var_set[var_name].setMin(float(min_)) var_set[var_name].setMax(float(max_)) dataset = ROOT.RooDataSet(name, title, var_set, ROOT.RooFit.Import(tree)) if weight_var: # Weights to normalize to_normalize_w = ROOT.RooFormulaVar( "{}_not_normalized".format(weight_var), "{}_not_normalized".format(weight_var), "*".join(weights_to_normalize), list_to_rooarglist(var_list[weight] for weight in weights_to_normalize)) var_set.append(to_normalize_w) dataset.addColumn(to_normalize_w) sum_weights = sum( dataset.get(entry)["{}_not_normalized".format( weight_var)].getVal() for entry in dataset.sumEntries()) normalized_w = ROOT.RooFormulaVar( "{}_normalized".format(weight_var), "{}_normalized".format(weight_var), "{}_not_normalized/{}".format(weight_var, sum_weights), ROOT.RooArgList(to_normalize_w)) var_set.append(normalized_w) dataset.addColumn(normalized_w) # Non-normalized weights weights = ROOT.RooFormulaVar( weight_var, weight_var, "*".join(weights_not_to_normalize + ["{}_normalized".format(weight_var)]), list_to_rooarglist( [var_list[weight] for weight in weights_not_to_normalize] + [normalized_w])) var_set.append(weights) dataset.addColumn(weights) dataset_w = ROOT.RooDataSet(name, title, var_set, ROOT.RooFit.Import(dataset), ROOT.RooFit.WeightVar(weight_var)) destruct_object(dataset) dataset = dataset_w # ROOT Cleanup destruct_object(tree) tfile.Close() destruct_object(tfile) if selection: for leave in leave_list: destruct_object(leave) for var in variables: destruct_object(var_list[var]) # Let's return dataset.SetName(name) dataset.SetTitle(title) return dataset