def test_init_retvalue(self): boundaries = (10, 20, 30, 40, 50) Binning(boundaries=boundaries) Binning(boundaries=boundaries, retvalue='number') Binning(boundaries=boundaries, retvalue='lowedge') self.assertRaises(ValueError, Binning, boundaries=boundaries, retvalue='center') self.assertRaises(ValueError, Binning, boundaries=boundaries, retvalue='yyy') self.assertRaises(ValueError, Binning, boundaries=boundaries, retvalue='lowedge', bins=(1, 2, 3, 4)) self.assertRaises(ValueError, Binning, boundaries=boundaries, retvalue='lowedge', underflow_bin=-1) self.assertRaises(ValueError, Binning, boundaries=boundaries, retvalue='lowedge', overflow_bin=-1)
def dataframe_config(): ''' Creates the definition/config of the data frame (DF). :return a list of DF configs ''' # Set up categorical binning # use simple binning for HT and N_jet htbin = Binning(boundaries=[200, 400, 600, 900, 1200]) njetbin = Binning(boundaries=[1, 2, 3, 4, 5, 6]) # Echo simply returns the value it gets nbjetbin = Echo() # explicit version # nbjetbin = Echo(nextFunc = lambda x: x+1, valid = lambda x: True) # add HEPPY component name binning, has no rules to produce the next bin # component names are strings! component = Echo(nextFunc=None) # a list of DF configs df_configs = [ dict( # which tree branches to read in keyAttrNames=('componentName', 'ht40', 'nJet40', 'nBJet40'), # which columns in the DF they should be mapped to keyOutColumnNames=('component', 'htbin', 'njetbin', 'nbjetbin'), # the binning for the categories binnings=(component, htbin, njetbin, nbjetbin), # list of weight branches that are multiplied together for the # final event weight weight=WeightCalculatorProduct(['genWeight'])), ] return df_configs
def dataframe_config(): ''' Creates the definition/config of the data frame (DF). :return a list of DF configs ''' # Set up categorical binning # use simple binning for HT and N_jet htbin = Binning(boundaries=[200, 400, 600, 900, 1200]) njetbin = Binning(boundaries=[1, 2, 3, 4, 5, 6]) # Echo simply returns the value it gets nbjetbin = Echo() # explicit version # nbjetbin = Echo(nextFunc = lambda x: x+1, valid = lambda x: True) # a list of DF configs df_configs = [ dict( # which tree branches to read in keyAttrNames=('ht40', 'nJet40', 'nBJet40'), # which columns in the DF they should be mapped to keyOutColumnNames=('htbin', 'njetbin', 'nbjetbin'), # the binning for the categories binnings=(htbin, njetbin, nbjetbin) ), ] return df_configs
def test_valid(self): obj = Binning(boundaries=(30, 40, 50), retvalue='number', valid=lambda x: x >= 10) self.assertEqual(1, obj(33)) self.assertEqual(2, obj(45)) self.assertIsNone(obj(9))
def _create_one_dimension(stage_name, _in, _out, _bins=None, _index=None): if not isinstance(_in, six.string_types): msg = "{}: binning dictionary contains non-string value for 'in'" raise BadBinnedDataframeConfig(msg.format(stage_name)) if not isinstance(_out, six.string_types): msg = "{}: binning dictionary contains non-string value for 'out'" raise BadBinnedDataframeConfig(msg.format(stage_name)) if _index and not isinstance(_index, six.string_types): msg = "{}: binning dictionary contains non-string and non-integer value for 'index'" raise BadBinnedDataframeConfig(msg.format(stage_name)) if _bins is None: bin_obj = Echo(nextFunc=None) elif isinstance(_bins, dict): # - bins: {nbins: 6 , low: 1 , high: 5 , overflow: True} # - bins: {edges: [0, 200., 900], overflow: True} if "nbins" in _bins and "low" in _bins and "high" in _bins: low = _bins["low"] high = _bins["high"] nbins = _bins["nbins"] edges = np.linspace(low, high, nbins + 1) elif "edges" in _bins: edges = _bins["edges"] else: msg = "{}: No way to infer binning edges for in={}" raise BadBinnedDataframeConfig(msg.format(stage_name, _in)) bin_obj = Binning(boundaries=edges) else: msg = "{}: bins is neither None nor a dictionary for in={}" raise BadBinnedDataframeConfig(msg.format(stage_name, _in)) return (str(_in), str(_out), bin_obj, _index)
def test_lowedge(): lows = (10.0, 20.0, 30.0, 40.0) ups = (20.0, 30.0, 40.0, 50.0) obj = Binning(lows=lows, ups=ups, retvalue='lowedge') assert 10 == obj(15) assert 20 == obj(21) assert 20 == obj(20) assert float("-inf") == obj(5) assert 50 == obj(55) obj = Binning(lows=lows, ups=ups) # 'lowedge' is default assert 10 == obj(15) assert 20 == obj(21) assert 20 == obj(20) assert float("-inf") == obj(5) assert 50 == obj(55)
def test_valid(): obj = Binning(boundaries=(30, 40, 50), retvalue='number', valid=lambda x: x >= 10) assert 1 == obj(33) assert 2 == obj(45) assert obj(9) is None
def test_lowedge(self): lows = (10.0, 20.0, 30.0, 40.0) ups = (20.0, 30.0, 40.0, 50.0) obj = Binning(lows=lows, ups=ups, retvalue='lowedge') self.assertEqual(10, obj(15)) self.assertEqual(20, obj(21)) self.assertEqual(20, obj(20)) self.assertEqual(float("-inf"), obj(5)) self.assertEqual(50, obj(55)) obj = Binning(lows=lows, ups=ups) # 'lowedge' is default self.assertEqual(10, obj(15)) self.assertEqual(20, obj(21)) self.assertEqual(20, obj(20)) self.assertEqual(float("-inf"), obj(5)) self.assertEqual(50, obj(55))
def test_next_number(): boundaries = (10, 20, 30, 40, 50) obj = Binning(boundaries=boundaries, retvalue='number') assert 1 == obj.next(0) assert 2 == obj.next(1) assert 3 == obj.next(2) assert 4 == obj.next(3) assert 5 == obj.next(4) assert 5 == obj.next(5) assert 5 == obj.next(5) # overflow_bin returns the same with pytest.raises(ValueError): obj.next(2.5) with pytest.raises(ValueError): obj.next(6)
def test_init_with_lows_ups(): bins = (1, 2, 3, 4) lows = (10.0, 20.0, 30.0, 40.0) ups = (20.0, 30.0, 40.0, 50.0) boundaries = (10, 20, 30, 40, 50) obj = Binning(lows=lows, ups=ups, retvalue='number') assert bins == obj.bins assert boundaries == obj.boundaries
def test_init_with_lows_ups(self): bins = (1, 2, 3, 4) lows = (10.0, 20.0, 30.0, 40.0) ups = (20.0, 30.0, 40.0, 50.0) boundaries = (10, 20, 30, 40, 50) obj = Binning(lows=lows, ups=ups, retvalue='number') self.assertEqual(bins, obj.bins) self.assertEqual(boundaries, obj.boundaries)
def test_init_with_boundaries(): bins = (1, 2, 3, 4) lows = (10.0, 20.0, 30.0, 40.0) ups = (20.0, 30.0, 40.0, 50.0) boundaries = (10, 20, 30, 40, 50) obj = Binning(boundaries=boundaries, retvalue='number') assert bins == obj.bins assert lows == obj.lows assert ups == obj.ups
def test_call(): bins = (1, 2, 3, 4) lows = (10.0, 20.0, 30.0, 40.0) ups = (20.0, 30.0, 40.0, 50.0) obj = Binning(bins=bins, lows=lows, ups=ups, retvalue='number') assert 1 == obj(15) assert 2 == obj(21) assert 2 == obj(20) # on the low edge assert 0 == obj(5) # underflow assert 5 == obj(55) # overflow
def test_call(self): bins = (1, 2, 3, 4) lows = (10.0, 20.0, 30.0, 40.0) ups = (20.0, 30.0, 40.0, 50.0) obj = Binning(bins=bins, lows=lows, ups=ups, retvalue='number') self.assertEqual(1, obj(15)) self.assertEqual(2, obj(21)) self.assertEqual(2, obj(20)) # on the low edge self.assertEqual(0, obj(5)) # underflow self.assertEqual(5, obj(55)) # overflow
def test_next_lowedge(): boundaries = (10, 20, 30, 40, 50) obj = Binning(boundaries=boundaries, retvalue='lowedge') # on the boundaries assert 20 == obj.next(10) assert 30 == obj.next(20) assert 40 == obj.next(30) assert 50 == obj.next(40) assert 50 == obj.next(50) # underflow_bin assert 10 == obj.next(float('-inf')) boundaries = (0.001, 0.002, 0.003, 0.004, 0.005) obj = Binning(boundaries=boundaries, retvalue='lowedge') assert 0.002 == obj.next( 0.001) assert 0.003 == obj.next( 0.002) assert 0.004 == obj.next( 0.003) assert 0.005 == obj.next( 0.004) assert 0.005 == obj.next( 0.005)
def test_init_retvalue(): boundaries = (10, 20, 30, 40, 50) Binning(boundaries=boundaries) Binning(boundaries=boundaries, retvalue='number') Binning(boundaries=boundaries, retvalue='lowedge') with pytest.raises(ValueError): Binning(boundaries=boundaries, retvalue='center') with pytest.raises(ValueError): Binning(boundaries=boundaries, retvalue='yyy') with pytest.raises(ValueError): Binning(boundaries=boundaries, retvalue='lowedge', bins=(1, 2, 3, 4)) with pytest.raises(ValueError): Binning(boundaries=boundaries, retvalue='lowedge', underflow_bin=-1) with pytest.raises(ValueError): Binning(boundaries=boundaries, retvalue='lowedge', overflow_bin=-1)
def prepare_dataframe_configs(weights=[]): ''' Creates the definition/config of the data frame (DF). :return a list of DF configs ''' # Set up categorical binning jetpt_bin = Binning(boundaries=range(0, 1000, 20)) njetbin = Echo() # a list of DF configs base = dict( keyAttrNames=('Jet_pt', 'nJet'), keyOutColumnNames=('jetpt', 'njet'), binnings=(jetpt_bin, njetbin), ) df_configs = {"data": base} #else: # df_configs = {} # # List of weight branches that are multiplied together for the final event weight # # # # TODO: Storing the product of these weights as weight_nominal for each # # event would be more efficient, but this needs communication or a # # common interface between a nominal weight scribbler and the DF # # configuration # # Build a dictionary for all the combinations of weights we need to check for systematics # weight_combinations = {"nominal": weights, "unweighted": []} # for weight in weights: # for variation in ["up", "down"]: # variation_name = "{}_{}".format(weight, variation) # weight_combinations[variation_name] = weights + [variation_name] # df_configs = {} # for name, weight_list in weight_combinations.items(): # config = copy.copy(base) # if weight_list: # config["weight"] = WeightCalculatorProduct(weight_list) # df_configs[name] = config return df_configs
def test_next_number(self): boundaries = (10, 20, 30, 40, 50) obj = Binning(boundaries=boundaries, retvalue='number') self.assertEqual(1, obj.next(0)) self.assertEqual(2, obj.next(1)) self.assertEqual(3, obj.next(2)) self.assertEqual(4, obj.next(3)) self.assertEqual(5, obj.next(4)) self.assertEqual(5, obj.next(5)) self.assertEqual(5, obj.next(5)) # overflow_bin returns the same self.assertRaises(ValueError, obj.next, 2.5) self.assertRaises(ValueError, obj.next, 6)
def test_init_exceptions(): with pytest.raises(ValueError): Binning() with pytest.raises(ValueError): Binning(lows=1) with pytest.raises(ValueError): Binning(ups=1) with pytest.raises(ValueError): Binning(boundaries=1, lows=1) with pytest.raises(ValueError): Binning(boundaries=1, ups=1) with pytest.raises(ValueError): Binning(boundaries=1, lows=1, ups=1) lows = (10.0, 20.0, 30.0, 45.0) ups = (20.0, 30.0, 40.0, 50.0) with pytest.raises(ValueError): Binning(lows=lows, ups=ups)
def test_repr(): boundaries = (10, 20, 30, 40, 50) obj = Binning(boundaries=boundaries) repr(obj)
def test_next_lowedge(): boundaries = (10, 20, 30, 40, 50) obj = Binning(boundaries=boundaries, retvalue='lowedge') # on the boundaries assert 20 == obj.next(10) assert 30 == obj.next(20) assert 40 == obj.next(30) assert 50 == obj.next(40) assert 50 == obj.next(50) # underflow_bin assert 10 == obj.next(float('-inf')) boundaries = (0.001, 0.002, 0.003, 0.004, 0.005) obj = Binning(boundaries=boundaries, retvalue='lowedge') assert 0.002 == obj.next(0.001) assert 0.003 == obj.next(0.002) assert 0.004 == obj.next(0.003) assert 0.005 == obj.next(0.004) assert 0.005 == obj.next(0.005)
def test_next_lowedge(self): boundaries = (10, 20, 30, 40, 50) obj = Binning(boundaries=boundaries, retvalue='lowedge') # on the boundaries self.assertEqual(20, obj.next(10)) self.assertEqual(30, obj.next(20)) self.assertEqual(40, obj.next(30)) self.assertEqual(50, obj.next(40)) self.assertEqual(50, obj.next(50)) # underflow_bin self.assertEqual(10, obj.next(float('-inf'))) boundaries = (0.001, 0.002, 0.003, 0.004, 0.005) obj = Binning(boundaries=boundaries, retvalue='lowedge') self.assertEqual(0.002, obj.next(0.001)) self.assertEqual(0.003, obj.next(0.002)) self.assertEqual(0.004, obj.next(0.003)) self.assertEqual(0.005, obj.next(0.004)) self.assertEqual(0.005, obj.next(0.005))
def test_onBoundary(): boundaries = (0.000001, 0.00001, 0.0001) obj = Binning(boundaries=boundaries, retvalue='number') assert 1 == obj(0.000001) assert 2 == obj(0.00001) assert 3 == obj(0.0001)
def test_init_exceptions_nobin(): boundaries = (10, ) with pytest.raises(ValueError): Binning(boundaries=boundaries)
def test_onBoundary(self): boundaries = (0.000001, 0.00001, 0.0001) obj = Binning(boundaries=boundaries, retvalue='number') self.assertEqual(1, obj(0.000001)) self.assertEqual(2, obj(0.00001)) self.assertEqual(3, obj(0.0001))