#########################################################################################
# --- now set up the chains and links based on configuration flags

ch = Chain('Data')

# --- 0. read input data
read_data = analysis.ReadToDf(name='dflooper', key='accounts', reader='csv')
read_data.path = input_files
ch.add(read_data)

# --- 1. add the record factorizer
#     Here the columns dummy and loc of the input dataset are factorized
#     e.g. x = ['apple', 'tree', 'pear', 'apple', 'pear'] becomes the column:
#     x = [0, 1, 2, 0, 2]
#     By default, the mapping is stored in a dict under key: 'map_'+store_key+'_to_original'
fact = analysis.RecordFactorizer(name='rf1')
fact.columns = ['isActive', 'eyeColor', 'favoriteFruit', 'gender']
fact.read_key = 'accounts'
fact.inplace = True
fact.sk_map_to_original = 'to_original'
fact.sk_map_to_factorized = 'to_factorized'
fact.logger.log_level = LogLevel.DEBUG
ch.add(fact)

# --- 2. Fill a roodatahist with the contents of the dataframe
df2rdh = root_analysis.RooDataHistFiller()
df2rdh.read_key = read_data.key
df2rdh.store_key = 'rdh_' + read_data.key
df2rdh.store_key_vars = 'rdh_vars'
df2rdh.store_key_cats = 'rdh_cats'
df2rdh.map_to_factorized = 'to_factorized'
Beispiel #2
0
fixer.convert_inconsistent_nans = True
fixer.logger.log_level = LogLevel.DEBUG
fixer.copy_columns_from_df = False
fixer.original_columns = usecols
fixer.var_dtype = cols_dtype
fixer.nan_dtype_map[np.float64] = FNAN

# remove spaces from the cut-column (spaces not handled by correlation-analyser)
fixer.cleanup_string_columns = ['cut']
ch.add(fixer)

ch = Chain('Fact')

# --- 1. add the record factorizer to convert categorical observables into integers
#     By default, the mapping is stored in a dict under key: 'map_'+store_key+'_to_original'
fact = analysis.RecordFactorizer(name='category_factorizer')
fact.columns = cols_str
fact.read_key = fixer.store_key
fact.inplace = True
# factorizer stores a dict with the mappings that have been applied to all observables
fact.sk_map_to_original = 'to_original'
# factorizer also stores a dict with the mappings back to the original observables
fact.sk_map_to_factorized = 'to_factorized'
ch.add(fact)

# --- 2. turn the dataframe into a roofit dataset (= roodataset)
df2rds = ConvertDataFrame2RooDataSet()
df2rds.columns = usecols
df2rds.read_key = fixer.store_key
df2rds.store_key = 'rds_' + fixer.store_key
df2rds.store_key_vars = 'rds_varset'