######################################################################################### # --- now set up the chains and links based on configuration flags ch = Chain('Data') # --- 0. read input data read_data = analysis.ReadToDf(name='dflooper', key='accounts', reader='csv') read_data.path = input_files ch.add(read_data) # --- 1. add the record factorizer # Here the columns dummy and loc of the input dataset are factorized # e.g. x = ['apple', 'tree', 'pear', 'apple', 'pear'] becomes the column: # x = [0, 1, 2, 0, 2] # By default, the mapping is stored in a dict under key: 'map_'+store_key+'_to_original' fact = analysis.RecordFactorizer(name='rf1') fact.columns = ['isActive', 'eyeColor', 'favoriteFruit', 'gender'] fact.read_key = 'accounts' fact.inplace = True fact.sk_map_to_original = 'to_original' fact.sk_map_to_factorized = 'to_factorized' fact.logger.log_level = LogLevel.DEBUG ch.add(fact) # --- 2. Fill a roodatahist with the contents of the dataframe df2rdh = root_analysis.RooDataHistFiller() df2rdh.read_key = read_data.key df2rdh.store_key = 'rdh_' + read_data.key df2rdh.store_key_vars = 'rdh_vars' df2rdh.store_key_cats = 'rdh_cats' df2rdh.map_to_factorized = 'to_factorized'
fixer.convert_inconsistent_nans = True fixer.logger.log_level = LogLevel.DEBUG fixer.copy_columns_from_df = False fixer.original_columns = usecols fixer.var_dtype = cols_dtype fixer.nan_dtype_map[np.float64] = FNAN # remove spaces from the cut-column (spaces not handled by correlation-analyser) fixer.cleanup_string_columns = ['cut'] ch.add(fixer) ch = Chain('Fact') # --- 1. add the record factorizer to convert categorical observables into integers # By default, the mapping is stored in a dict under key: 'map_'+store_key+'_to_original' fact = analysis.RecordFactorizer(name='category_factorizer') fact.columns = cols_str fact.read_key = fixer.store_key fact.inplace = True # factorizer stores a dict with the mappings that have been applied to all observables fact.sk_map_to_original = 'to_original' # factorizer also stores a dict with the mappings back to the original observables fact.sk_map_to_factorized = 'to_factorized' ch.add(fact) # --- 2. turn the dataframe into a roofit dataset (= roodataset) df2rds = ConvertDataFrame2RooDataSet() df2rds.columns = usecols df2rds.read_key = fixer.store_key df2rds.store_key = 'rds_' + fixer.store_key df2rds.store_key_vars = 'rds_varset'