def dataframe_config(): ''' Creates the definition/config of the data frame (DF). :return a list of DF configs ''' # Set up categorical binning # use simple binning for HT and N_jet htbin = Binning(boundaries=[200, 400, 600, 900, 1200]) njetbin = Binning(boundaries=[1, 2, 3, 4, 5, 6]) # Echo simply returns the value it gets nbjetbin = Echo() # explicit version # nbjetbin = Echo(nextFunc = lambda x: x+1, valid = lambda x: True) # add HEPPY component name binning, has no rules to produce the next bin # component names are strings! component = Echo(nextFunc=None) # a list of DF configs df_configs = [ dict( # which tree branches to read in keyAttrNames=('componentName', 'ht40', 'nJet40', 'nBJet40'), # which columns in the DF they should be mapped to keyOutColumnNames=('component', 'htbin', 'njetbin', 'nbjetbin'), # the binning for the categories binnings=(component, htbin, njetbin, nbjetbin), # list of weight branches that are multiplied together for the # final event weight weight=WeightCalculatorProduct(['genWeight'])), ] return df_configs
def dataframe_config(): ''' Creates the definition/config of the data frame (DF). :return a list of DF configs ''' # Set up categorical binning # use simple binning for HT and N_jet htbin = Binning(boundaries=[200, 400, 600, 900, 1200]) njetbin = Binning(boundaries=[1, 2, 3, 4, 5, 6]) # Echo simply returns the value it gets nbjetbin = Echo() # explicit version # nbjetbin = Echo(nextFunc = lambda x: x+1, valid = lambda x: True) # a list of DF configs df_configs = [ dict( # which tree branches to read in keyAttrNames=('ht40', 'nJet40', 'nBJet40'), # which columns in the DF they should be mapped to keyOutColumnNames=('htbin', 'njetbin', 'nbjetbin'), # the binning for the categories binnings=(htbin, njetbin, nbjetbin) ), ] return df_configs
def test_next_None(self): binning = Echo(nextFunc = None) self.assertIsNone(binning.next( -5)) self.assertIsNone(binning.next( 0)) self.assertIsNone(binning.next( 1)) self.assertIsNone(binning.next( 3)) self.assertIsNone(binning.next( 10))
def test_next_lambda(self): binning = Echo(nextFunc = lambda x: x + 0.1) self.assertEqual( -4.9, binning.next( -5)) self.assertEqual( 0.1, binning.next( 0)) self.assertEqual( 1.1, binning.next( 1)) self.assertEqual( 3.1, binning.next( 3)) self.assertEqual( 10.1, binning.next( 10))
def test_next_plus2(self): binning = Echo(nextFunc = plus2) self.assertEqual( -3, binning.next( -5)) self.assertEqual( 2, binning.next( 0)) self.assertEqual( 3, binning.next( 1)) self.assertEqual( 5, binning.next( 3)) self.assertEqual( 12, binning.next( 10))
def test_next_default(self): binning = Echo() self.assertEqual( -4, binning.next( -5)) self.assertEqual( 1, binning.next( 0)) self.assertEqual( 2, binning.next( 1)) self.assertEqual( 4, binning.next( 3)) self.assertEqual( 11, binning.next( 10))
def _create_one_dimension(stage_name, _in, _out, _bins=None, _index=None): if not isinstance(_in, six.string_types): msg = "{}: binning dictionary contains non-string value for 'in'" raise BadBinnedDataframeConfig(msg.format(stage_name)) if not isinstance(_out, six.string_types): msg = "{}: binning dictionary contains non-string value for 'out'" raise BadBinnedDataframeConfig(msg.format(stage_name)) if _index and not isinstance(_index, six.string_types): msg = "{}: binning dictionary contains non-string and non-integer value for 'index'" raise BadBinnedDataframeConfig(msg.format(stage_name)) if _bins is None: bin_obj = Echo(nextFunc=None) elif isinstance(_bins, dict): # - bins: {nbins: 6 , low: 1 , high: 5 , overflow: True} # - bins: {edges: [0, 200., 900], overflow: True} if "nbins" in _bins and "low" in _bins and "high" in _bins: low = _bins["low"] high = _bins["high"] nbins = _bins["nbins"] edges = np.linspace(low, high, nbins + 1) elif "edges" in _bins: edges = _bins["edges"] else: msg = "{}: No way to infer binning edges for in={}" raise BadBinnedDataframeConfig(msg.format(stage_name, _in)) bin_obj = Binning(boundaries=edges) else: msg = "{}: bins is neither None nor a dictionary for in={}" raise BadBinnedDataframeConfig(msg.format(stage_name, _in)) return (str(_in), str(_out), bin_obj, _index)
def prepare_dataframe_configs(weights=[]): ''' Creates the definition/config of the data frame (DF). :return a list of DF configs ''' # Set up categorical binning jetpt_bin = Binning(boundaries=range(0, 1000, 20)) njetbin = Echo() # a list of DF configs base = dict( keyAttrNames=('Jet_pt', 'nJet'), keyOutColumnNames=('jetpt', 'njet'), binnings=(jetpt_bin, njetbin), ) df_configs = {"data": base} #else: # df_configs = {} # # List of weight branches that are multiplied together for the final event weight # # # # TODO: Storing the product of these weights as weight_nominal for each # # event would be more efficient, but this needs communication or a # # common interface between a nominal weight scribbler and the DF # # configuration # # Build a dictionary for all the combinations of weights we need to check for systematics # weight_combinations = {"nominal": weights, "unweighted": []} # for weight in weights: # for variation in ["up", "down"]: # variation_name = "{}_{}".format(weight, variation) # weight_combinations[variation_name] = weights + [variation_name] # df_configs = {} # for name, weight_list in weight_combinations.items(): # config = copy.copy(base) # if weight_list: # config["weight"] = WeightCalculatorProduct(weight_list) # df_configs[name] = config return df_configs
def test_two(self): """ 1:composite |- 3:composite | |- 4:counter | |- 5:counter | |- 7:counter |- 8:counter """ keyComposer4 = KeyValueComposer(('var4', ), (Echo(), )) counts4 = Count() reader4 = Reader(keyComposer4, counts4) collector4 = Collector(MockResultsCombinationMethod()) keyComposer5 = KeyValueComposer(('var5', ), (Echo(), )) counts5 = Count() reader5 = Reader(keyComposer5, counts5) collector5 = Collector(MockResultsCombinationMethod()) keyComposer7 = KeyValueComposer(('var7', ), (Echo(), )) counts7 = Count() reader7 = Reader(keyComposer7, counts7) collector7 = Collector(MockResultsCombinationMethod()) keyComposer8 = KeyValueComposer(('var8', ), (Echo(), )) counts8 = Count() reader8 = Reader(keyComposer8, counts8) collector8 = Collector(MockResultsCombinationMethod()) reader3 = ReaderComposite() reader3.add(reader4) reader3.add(reader5) collector3 = CollectorComposite() collector3.add(collector4) collector3.add(collector5) reader1 = ReaderComposite() reader1.add(reader3) reader1.add(reader7) reader1.add(reader8) collector1 = CollectorComposite() collector1.add(collector3) collector1.add(collector7) collector1.add(collector8) reader1_ds1 = copy.deepcopy(reader1) reader1_ds2 = copy.deepcopy(reader1) reader3_ds1 = reader1_ds1.readers[0] reader4_ds1 = reader3_ds1.readers[0] reader5_ds1 = reader3_ds1.readers[1] reader7_ds1 = reader1_ds1.readers[1] reader8_ds1 = reader1_ds1.readers[2] self.assertIsInstance(reader1_ds1, ReaderComposite) self.assertIsInstance(reader3_ds1, ReaderComposite) self.assertIsInstance(reader4_ds1, Reader) self.assertIsInstance(reader5_ds1, Reader) self.assertIsInstance(reader7_ds1, Reader) self.assertIsInstance(reader8_ds1, Reader) self.assertIsNot(reader1, reader1_ds1) self.assertIsNot(reader3, reader3_ds1) self.assertIsNot(reader4, reader4_ds1) self.assertIsNot(reader5, reader5_ds1) self.assertIsNot(reader7, reader7_ds1) self.assertIsNot(reader8, reader8_ds1) reader3_ds2 = reader1_ds2.readers[0] reader4_ds2 = reader3_ds2.readers[0] reader5_ds2 = reader3_ds2.readers[1] reader7_ds2 = reader1_ds2.readers[1] reader8_ds2 = reader1_ds2.readers[2] self.assertIsInstance(reader1_ds2, ReaderComposite) self.assertIsInstance(reader3_ds2, ReaderComposite) self.assertIsInstance(reader4_ds2, Reader) self.assertIsInstance(reader5_ds2, Reader) self.assertIsInstance(reader7_ds2, Reader) self.assertIsInstance(reader8_ds2, Reader) self.assertIsNot(reader1, reader1_ds2) self.assertIsNot(reader3, reader3_ds2) self.assertIsNot(reader4, reader4_ds2) self.assertIsNot(reader5, reader5_ds2) self.assertIsNot(reader7, reader7_ds2) self.assertIsNot(reader8, reader8_ds2)
def test_call(self): binning = Echo() self.assertEqual(1, binning(1)) self.assertEqual(2, binning(2)) self.assertEqual(0, binning(0)) self.assertEqual(5, binning(5))
def test_valid(self): binning = Echo(valid = lambda x: x >= 10) self.assertEqual( 13, binning(13)) self.assertEqual( 10, binning(10)) self.assertIsNone(binning(7))
tblcfg = [ dict(outFileName='tbl_met.txt', branchNames=('met_pt', ), outColumnNames=('met', ), binnings=(RoundLog(0.1, 0), ), countsClass=Counts), dict(outFileName='tbl_jetpt.txt', branchNames=('jet_pt', ), binnings=(RoundLog(0.1, 0), ), indices=(0, ), countsClass=Counts), dict(outFileName='tbl_njets_nbjets.txt', branchNames=('nJet40', 'nBJet40'), outColumnNames=('njets', 'nbjets'), binnings=(Echo(), Echo()), countsClass=Counts), ] alphaTwirl.addTreeReader( analyzerName='treeProducerSusyAlphaT', fileName='tree.root', treeName='tree', tableConfigs=tblcfg, eventSelection=None, ) ##__________________________________________________________________|| import cProfile, pstats, StringIO pr = cProfile.Profile()