myan = MultiAnalyzer(data_param[case], case, typean, doanaperperiod) mysis = Systematics(data_param[case], case, typean) normalizecross = data_param[case]["analysis"][typean]["normalizecross"] #creating folder if not present counter = 0 if doconversionmc is True: counter = counter + checkdirlist(dirpklmc) if doconversiondata is True: counter = counter + checkdirlist(dirpkldata) if doskimmingmc is True: checkdirlist(dirpklskmc) counter = counter + checkdir(dirpklevtcounter_allmc) if doskimmingdata is True: counter = counter + checkdirlist(dirpklskdata) counter = counter + checkdir(dirpklevtcounter_alldata) if domergingmc is True: counter = counter + checkdirlist(dirpklmlmc) if domergingdata is True: counter = counter + checkdirlist(dirpklmldata) if domergingperiodsmc is True: counter = counter + checkdir(dirpklmltotmc) if domergingperiodsdata is True:
def do_entire_analysis(data_config: dict, data_param: dict, data_param_overwrite: dict, # pylint: disable=too-many-locals, too-many-statements, too-many-branches data_model: dict, run_param: dict, clean: bool): # Disable any graphical stuff. No TCanvases opened and shown by default gROOT.SetBatch(True) logger = get_logger() logger.info("Do analysis chain") # If we are here we are interested in the very first key in the parameters database case = list(data_param.keys())[0] # Update database accordingly if needed update_config(data_param, data_config, data_param_overwrite) dodownloadalice = data_config["download"]["alice"]["activate"] doconversionmc = data_config["conversion"]["mc"]["activate"] doconversiondata = data_config["conversion"]["data"]["activate"] domergingmc = data_config["merging"]["mc"]["activate"] domergingdata = data_config["merging"]["data"]["activate"] doskimmingmc = data_config["skimming"]["mc"]["activate"] doskimmingdata = data_config["skimming"]["data"]["activate"] domergingperiodsmc = data_config["mergingperiods"]["mc"]["activate"] domergingperiodsdata = data_config["mergingperiods"]["data"]["activate"] doml = data_config["ml_study"]["activate"] docorrelation = data_config["ml_study"]['docorrelation'] dotraining = data_config["ml_study"]['dotraining'] dotesting = data_config["ml_study"]['dotesting'] doapplytodatamc = data_config["ml_study"]['doapplytodatamc'] docrossvalidation = data_config["ml_study"]['docrossvalidation'] dolearningcurve = data_config["ml_study"]['dolearningcurve'] doroc = data_config["ml_study"]['doroc'] doroctraintest = data_config["ml_study"]['doroctraintest'] doboundary = data_config["ml_study"]['doboundary'] doimportance = data_config["ml_study"]['doimportance'] doimportanceshap = data_config["ml_study"]['doimportanceshap'] dogridsearch = data_config["ml_study"]['dogridsearch'] dobayesianopt = data_config["ml_study"]['dobayesianopt'] doefficiencyml = data_config["ml_study"]['doefficiency'] dosignifopt = data_config["ml_study"]['dosignifopt'] doscancuts = data_config["ml_study"]["doscancuts"] doplotdistr = data_config["ml_study"]["doplotdistr"] doapplydata = data_config["mlapplication"]["data"]["doapply"] doapplymc = data_config["mlapplication"]["mc"]["doapply"] domergeapplydata = data_config["mlapplication"]["data"]["domergeapply"] domergeapplymc = data_config["mlapplication"]["mc"]["domergeapply"] docontinueapplydata = data_config["mlapplication"]["data"]["docontinueafterstop"] docontinueapplymc = data_config["mlapplication"]["mc"]["docontinueafterstop"] dohistomassmc = data_config["analysis"]["mc"]["histomass"] dohistomassdata = data_config["analysis"]["data"]["histomass"] doefficiency = data_config["analysis"]["mc"]["efficiency"] doresponse = data_config["analysis"]["mc"]["response"] dofeeddown = data_config["analysis"]["mc"]["feeddown"] dounfolding = data_config["analysis"]["mc"]["dounfolding"] dojetsystematics = data_config["analysis"]["data"]["dojetsystematics"] dofit = data_config["analysis"]["dofit"] doeff = data_config["analysis"]["doeff"] docross = data_config["analysis"]["docross"] doplotsval = data_config["analysis"]["doplotsval"] doplots = data_config["analysis"]["doplots"] dosyst = data_config["analysis"]["dosyst"] dosystprob = data_config["systematics"]["cutvar"]["activate"] do_syst_prob_mass = data_config["systematics"]["cutvar"]["probvariationmass"] do_syst_prob_eff = data_config["systematics"]["cutvar"]["probvariationeff"] do_syst_prob_fit = data_config["systematics"]["cutvar"]["probvariationfit"] do_syst_prob_cross = data_config["systematics"]["cutvar"]["probvariationcross"] dosystptshape = data_config["systematics"]["mcptshape"]["activate"] doanaperperiod = data_config["analysis"]["doperperiod"] typean = data_config["analysis"]["type"] dojetstudies = data_config["analysis"]["dojetstudies"] dirpklmc = data_param[case]["multi"]["mc"]["pkl"] dirpklevtcounter_allmc = data_param[case]["multi"]["mc"]["pkl_evtcounter_all"] dirpklskmc = data_param[case]["multi"]["mc"]["pkl_skimmed"] dirpklmlmc = data_param[case]["multi"]["mc"]["pkl_skimmed_merge_for_ml"] dirpklmltotmc = data_param[case]["multi"]["mc"]["pkl_skimmed_merge_for_ml_all"] dirpkldata = data_param[case]["multi"]["data"]["pkl"] dirpklevtcounter_alldata = data_param[case]["multi"]["data"]["pkl_evtcounter_all"] dirpklskdata = data_param[case]["multi"]["data"]["pkl_skimmed"] dirpklmldata = data_param[case]["multi"]["data"]["pkl_skimmed_merge_for_ml"] dirpklmltotdata = data_param[case]["multi"]["data"]["pkl_skimmed_merge_for_ml_all"] dirpklskdecmc = data_param[case]["mlapplication"]["mc"]["pkl_skimmed_dec"] dirpklskdec_mergedmc = data_param[case]["mlapplication"]["mc"]["pkl_skimmed_decmerged"] dirpklskdecdata = data_param[case]["mlapplication"]["data"]["pkl_skimmed_dec"] dirpklskdec_mergeddata = data_param[case]["mlapplication"]["data"]["pkl_skimmed_decmerged"] dirresultsdata = data_param[case]["analysis"][typean]["data"]["results"] dirresultsmc = data_param[case]["analysis"][typean]["mc"]["results"] dirresultsdatatot = data_param[case]["analysis"][typean]["data"]["resultsallp"] dirresultsmctot = data_param[case]["analysis"][typean]["mc"]["resultsallp"] binminarray = data_param[case]["ml"]["binmin"] binmaxarray = data_param[case]["ml"]["binmax"] raahp = data_param[case]["ml"]["opt"]["raahp"] mltype = data_param[case]["ml"]["mltype"] training_vars = data_param[case]["variables"]["var_training"] mlout = data_param[case]["ml"]["mlout"] mlplot = data_param[case]["ml"]["mlplot"] proc_type = data_param[case]["analysis"][typean]["proc_type"] #creating folder if not present counter = 0 if doconversionmc is True: counter = counter + checkdirlist(dirpklmc) if doconversiondata is True: counter = counter + checkdirlist(dirpkldata) if doskimmingmc is True: checkdirlist(dirpklskmc) counter = counter + checkdir(dirpklevtcounter_allmc) if doskimmingdata is True: counter = counter + checkdirlist(dirpklskdata) counter = counter + checkdir(dirpklevtcounter_alldata) if domergingmc is True: counter = counter + checkdirlist(dirpklmlmc) if domergingdata is True: counter = counter + checkdirlist(dirpklmldata) if domergingperiodsmc is True: counter = counter + checkdir(dirpklmltotmc) if domergingperiodsdata is True: counter = counter + checkdir(dirpklmltotdata) if doml is True: counter = counter + checkdir(mlout) counter = counter + checkdir(mlplot) if docontinueapplymc is False: if doapplymc is True: counter = counter + checkdirlist(dirpklskdecmc) if domergeapplymc is True: counter = counter + checkdirlist(dirpklskdec_mergedmc) if docontinueapplydata is False: if doapplydata is True: counter = counter + checkdirlist(dirpklskdecdata) if domergeapplydata is True: counter = counter + checkdirlist(dirpklskdec_mergeddata) if dohistomassmc is True: counter = counter + checkdirlist(dirresultsmc) counter = counter + checkdir(dirresultsmctot) if dohistomassdata is True: counter = counter + checkdirlist(dirresultsdata) counter = counter + checkdir(dirresultsdatatot) if counter < 0: sys.exit() # check and create directories if doconversionmc is True: checkmakedirlist(dirpklmc) if doconversiondata is True: checkmakedirlist(dirpkldata) if doskimmingmc is True: checkmakedirlist(dirpklskmc) checkmakedir(dirpklevtcounter_allmc) if doskimmingdata is True: checkmakedirlist(dirpklskdata) checkmakedir(dirpklevtcounter_alldata) if domergingmc is True: checkmakedirlist(dirpklmlmc) if domergingdata is True: checkmakedirlist(dirpklmldata) if domergingperiodsmc is True: checkmakedir(dirpklmltotmc) if domergingperiodsdata is True: checkmakedir(dirpklmltotdata) if doml is True: checkmakedir(mlout) checkmakedir(mlplot) if docontinueapplymc is False: if doapplymc is True: checkmakedirlist(dirpklskdecmc) if domergeapplymc is True: checkmakedirlist(dirpklskdec_mergedmc) if docontinueapplydata is False: if doapplydata is True: checkmakedirlist(dirpklskdecdata) if domergeapplydata is True: checkmakedirlist(dirpklskdec_mergeddata) if dohistomassmc is True: checkmakedirlist(dirresultsmc) checkmakedir(dirresultsmctot) if dohistomassdata is True: checkmakedirlist(dirresultsdata) checkmakedir(dirresultsdatatot) proc_class = Processer ana_class = Analyzer syst_class = Systematics if proc_type == "Dhadrons": print("Using new feature for Dhadrons") proc_class = ProcesserDhadrons ana_class = AnalyzerDhadrons if proc_type == "Dhadrons_mult": print("Using new feature for Dhadrons_mult") proc_class = ProcesserDhadrons_mult ana_class = AnalyzerDhadrons_mult if proc_type == "Dhadrons_jet": print("Using new feature for Dhadrons_jet") proc_class = ProcesserDhadrons_jet ana_class = AnalyzerJet mymultiprocessmc = MultiProcesser(case, proc_class, data_param[case], typean, run_param, "mc") mymultiprocessdata = MultiProcesser(case, proc_class, data_param[case], typean, run_param,\ "data") ana_mgr = AnalyzerManager(ana_class, data_param[case], case, typean, doanaperperiod) # Has to be done always period-by-period syst_mgr = AnalyzerManager(syst_class, data_param[case], case, typean, True, run_param) #perform the analysis flow if dodownloadalice == 1: subprocess.call("../cplusutilities/Download.sh") if doconversionmc == 1: mymultiprocessmc.multi_unpack_allperiods() if doconversiondata == 1: mymultiprocessdata.multi_unpack_allperiods() if doskimmingmc == 1: mymultiprocessmc.multi_skim_allperiods() if doskimmingdata == 1: mymultiprocessdata.multi_skim_allperiods() if domergingmc == 1: mymultiprocessmc.multi_mergeml_allperiods() if domergingdata == 1: mymultiprocessdata.multi_mergeml_allperiods() if domergingperiodsmc == 1: mymultiprocessmc.multi_mergeml_allinone() if domergingperiodsdata == 1: mymultiprocessdata.multi_mergeml_allinone() if doml is True: index = 0 for binmin, binmax in zip(binminarray, binmaxarray): myopt = Optimiser(data_param[case], case, typean, data_model[mltype], binmin, binmax, raahp[index], training_vars[index]) if docorrelation is True: myopt.do_corr() if dotraining is True: myopt.do_train() if dotesting is True: myopt.do_test() if doapplytodatamc is True: myopt.do_apply() if docrossvalidation is True: myopt.do_crossval() if dolearningcurve is True: myopt.do_learningcurve() if doroc is True: myopt.do_roc() if doroctraintest is True: myopt.do_roc_train_test() if doplotdistr is True: myopt.do_plot_model_pred() if doimportance is True: myopt.do_importance() if doimportanceshap is True: myopt.do_importance_shap() if dogridsearch is True: myopt.do_grid() if dobayesianopt is True: myopt.do_bayesian_opt() if doboundary is True: myopt.do_boundary() if doefficiencyml is True: myopt.do_efficiency() if dosignifopt is True: myopt.do_significance() if doscancuts is True: myopt.do_scancuts() index = index + 1 if doapplydata is True: mymultiprocessdata.multi_apply_allperiods() if doapplymc is True: mymultiprocessmc.multi_apply_allperiods() if domergeapplydata is True: mymultiprocessdata.multi_mergeapply_allperiods() if domergeapplymc is True: mymultiprocessmc.multi_mergeapply_allperiods() if dohistomassmc is True: mymultiprocessmc.multi_histomass() if dohistomassdata is True: # After-burner in case of a mult analysis to obtain "correctionsweight.root" # for merged-period data # pylint: disable=fixme # FIXME Can only be run here because result directories are constructed when histomass # is run. If this step was independent, histomass would always complain that the # result directory already exists. mymultiprocessdata.multi_histomass() if doefficiency is True: mymultiprocessmc.multi_efficiency() if doresponse is True: mymultiprocessmc.multi_response() # Collect all desired analysis steps analyze_steps = [] if dofit is True: analyze_steps.append("fit") if dosyst is True: analyze_steps.append("yield_syst") if doeff is True: analyze_steps.append("efficiency") if dojetstudies is True: if dofit is False: analyze_steps.append("fit") if doeff is False: analyze_steps.append("efficiency") analyze_steps.append("sideband_sub") if dofeeddown is True: analyze_steps.append("feeddown") if dounfolding is True: analyze_steps.append("unfolding") analyze_steps.append("unfolding_closure") if dojetsystematics is True: analyze_steps.append("jetsystematics") if docross is True: analyze_steps.append("makenormyields") if doplots is True: analyze_steps.append("plotternormyields") if doplotsval is True: analyze_steps.append("plottervalidation") # Now do the analysis ana_mgr.analyze(*analyze_steps) ml_syst_steps = [] if dosystprob is True: if do_syst_prob_mass: ml_syst_steps.append("ml_cutvar_mass") if do_syst_prob_eff: ml_syst_steps.append("ml_cutvar_eff") if do_syst_prob_fit: ml_syst_steps.append("ml_cutvar_fit") if do_syst_prob_cross: ml_syst_steps.append("ml_cutvar_cross") if dosystptshape is True: ml_syst_steps.append("mcptshape") syst_mgr.analyze(*ml_syst_steps) # Delete per-period results. if clean: print("Cleaning") if doanaperperiod: print("Per-period analysis enabled. Skipping.") else: if not delete_dirlist(dirresultsmc + dirresultsdata): print("Error: Failed to complete cleaning.") print("Done")
mymultiprocessmc = MultiProcesser(data_param[case], run_param, "mc") mymultiprocessdata = MultiProcesser(data_param[case], run_param, "data") #creating folder if not present if doconversionmc is True: if checkdirlist(dirpklmc) is True: exit() if doconversiondata is True: if checkdirlist(dirpkldata) is True: exit() if doskimmingmc is True: if checkdirlist( dirpklskmc) or checkdir(dirpklevtcounter_allmc) is True: exit() if doskimmingdata is True: if checkdirlist( dirpklskdata) or checkdir(dirpklevtcounter_alldata) is True: exit() if domergingmc is True: if checkdirlist(dirpklmlmc) is True: exit() if domergingdata is True: if checkdirlist(dirpklmldata) is True: exit()
mlout = mlout + "/prefilter" mlplot = mlplot + "/prefilter" if domlprefilterstep_indb is False: data_param[case]["ml"]["mlout"] = mlout + "/analysis" data_param[case]["ml"]["mlplot"] = mlplot + "/analysis" mlout = mlout + "/analysis" mlplot = mlplot + "/analysis" opti_hyperpar_hipe4ml = data_param[case]["hipe4ml"]["hyper_par_opt"][ "do_hyp_opt"] hipe4ml_hyper_pars = data_param[case]["hipe4ml"]["hipe4ml_hyper_pars"] #creating folder if not present counter = 0 if doml is True and dotraining is True: counter = counter + checkdir(mlout) counter = counter + checkdir(mlplot) if counter < 0: sys.exit() # check and create directories if doml is True and dotraining is True: checkmakedir(mlout) checkmakedir(mlplot) # if doml is True and domloption == 1: # index = 0 # for binmin, binmax in zip(binminarray, binmaxarray): # myopt = Optimiser(data_param[case], case, typean,
def do_entire_analysis(analysis_config_file): # pylint: disable=too-many-locals, too-many-statements, too-many-branches # Load configuration file specifying main options to execute: conversion, skimming, analysis, etc. with open("default_complete.yaml", 'r') as run_config: data_config = yaml.load(run_config, Loader=yaml.FullLoader) # Load configuration file containing all relevant analysis parameters with open(analysis_config_file, 'r', encoding='utf-8') as param_config: data_param = yaml.load(param_config, Loader=yaml.FullLoader) with open("data/config_model_parameters.yml", 'r') as mod_config: data_model = yaml.load(mod_config, Loader=yaml.FullLoader) with open("data/database_run_list.yml", 'r') as runlist_config: run_param = yaml.load(runlist_config, Loader=yaml.FullLoader) with open("data/database_ml_gridsearch.yml", 'r') as grid_config: grid_param = yaml.load(grid_config, Loader=yaml.FullLoader) # Load parameters from data_config -- Required parameters usemc = data_config["use_mc"] usedata = data_config["use_data"] case = data_config["case"] doconversionmc = data_config["conversion"]["mc"]["activate"] doconversiondata = data_config["conversion"]["data"]["activate"] domergingmc = data_config["merging"]["mc"]["activate"] domergingdata = data_config["merging"]["data"]["activate"] doskimmingmc = data_config["skimming"]["mc"]["activate"] doskimmingdata = data_config["skimming"]["data"]["activate"] domergingperiodsmc = data_config["mergingperiods"]["mc"]["activate"] domergingperiodsdata = data_config["mergingperiods"]["data"]["activate"] doapplydata = data_config["analysis"]["data"]["doapply"] doapplymc = data_config["analysis"]["mc"]["doapply"] domergeapplydata = data_config["analysis"]["data"]["domergeapply"] domergeapplymc = data_config["analysis"]["mc"]["domergeapply"] dojetdata = data_config["jetanalysis"]["data"]["activate"] dojetmc = data_config["jetanalysis"]["mc"]["activate"] # Load parameters from data_config -- Optional parameters doml = docorrelation = dotraining = dotesting = doapplytodatamc = docrossvalidation = \ dolearningcurve = doroc = doboundary = doimportance = dogridsearch = dosignifopt = \ dohistomassmc = dohistomassdata = doefficiency = None if 'Jet' not in case: doml = data_config["ml_study"]["activate"] docorrelation = data_config["ml_study"]['docorrelation'] dotraining = data_config["ml_study"]['dotraining'] dotesting = data_config["ml_study"]['dotesting'] doapplytodatamc = data_config["ml_study"]['applytodatamc'] docrossvalidation = data_config["ml_study"]['docrossvalidation'] dolearningcurve = data_config["ml_study"]['dolearningcurve'] doroc = data_config["ml_study"]['doroc'] doboundary = data_config["ml_study"]['doboundary'] doimportance = data_config["ml_study"]['doimportance'] dogridsearch = data_config["ml_study"]['dogridsearch'] dosignifopt = data_config["ml_study"]['dosignifopt'] #doefficiency = run_config['doefficiency'] dohistomassmc = data_config["analysis"]["mc"]["histomass"] dohistomassdata = data_config["analysis"]["data"]["histomass"] doefficiency = data_config["analysis"]["mc"]["efficiency"] # Load parameters from data_param -- Required parameters dirpklmc = data_param[case]["multi"]["mc"]["pkl"] dirpklevtcounter_allmc = data_param[case]["multi"]["mc"][ "pkl_evtcounter_all"] dirpklskmc = data_param[case]["multi"]["mc"]["pkl_skimmed"] dirpkldata = data_param[case]["multi"]["data"]["pkl"] dirpklevtcounter_alldata = data_param[case]["multi"]["data"][ "pkl_evtcounter_all"] dirpklskdata = data_param[case]["multi"]["data"]["pkl_skimmed"] dirresultsdata = data_param[case]["analysis"]["data"]["results"] dirresultsmc = data_param[case]["analysis"]["mc"]["results"] # Load parameters from data_param -- Optional parameters dirpklmlmc = dirpklmltotmc = dirpklmldata = dirpklmltotdata = dirpklskdecmc = \ dirpklskdec_mergedmc = dirpklskdecdata = dirpklskdec_mergeddata = \ binminarray = binmaxarray = raahp = mltype = mlout = mlplot = None if 'Jet' not in case: dirpklmlmc = data_param[case]["multi"]["mc"][ "pkl_skimmed_merge_for_ml"] dirpklmltotmc = data_param[case]["multi"]["mc"][ "pkl_skimmed_merge_for_ml_all"] dirpklmldata = data_param[case]["multi"]["data"][ "pkl_skimmed_merge_for_ml"] dirpklmltotdata = data_param[case]["multi"]["data"][ "pkl_skimmed_merge_for_ml_all"] dirpklskdecmc = data_param[case]["analysis"]["mc"]["pkl_skimmed_dec"] dirpklskdec_mergedmc = data_param[case]["analysis"]["mc"][ "pkl_skimmed_decmerged"] dirpklskdecdata = data_param[case]["analysis"]["data"][ "pkl_skimmed_dec"] dirpklskdec_mergeddata = data_param[case]["analysis"]["data"][ "pkl_skimmed_decmerged"] binminarray = data_param[case]["ml"]["binmin"] binmaxarray = data_param[case]["ml"]["binmax"] raahp = data_param[case]["ml"]["opt"]["raahp"] mltype = data_param[case]["ml"]["mltype"] mlout = data_param[case]["ml"]["mlout"] mlplot = data_param[case]["ml"]["mlplot"] # Create instance of multiprocessor class mymultiprocessmc = None if usemc: mymultiprocessmc = MultiProcesser(case, data_param[case], run_param, "mc") if usedata: mymultiprocessdata = MultiProcesser(case, data_param[case], run_param, "data") #creating folder if not present if doconversionmc is True: if checkdirlist(dirpklmc) is True: exit() if doconversiondata is True: if checkdirlist(dirpkldata) is True: exit() if doskimmingmc is True: if checkdirlist( dirpklskmc) or checkdir(dirpklevtcounter_allmc) is True: exit() if doskimmingdata is True: if checkdirlist( dirpklskdata) or checkdir(dirpklevtcounter_alldata) is True: exit() if domergingmc is True: if checkdirlist(dirpklmlmc) is True: exit() if domergingdata is True: if checkdirlist(dirpklmldata) is True: exit() if domergingperiodsmc is True: if checkdir(dirpklmltotmc) is True: exit() if domergingperiodsdata is True: if checkdir(dirpklmltotdata) is True: exit() if doml is True: if checkdir(mlout) or checkdir(mlplot) is True: print("check mlout and mlplot") if doapplymc is True: if checkdirlist(dirpklskdecmc) is True: exit() if doapplydata is True: if checkdirlist(dirpklskdecdata) is True: exit() if domergeapplymc is True: if checkdirlist(dirpklskdec_mergedmc) is True: exit() if domergeapplydata is True: if checkdirlist(dirpklskdec_mergeddata) is True: exit() if dohistomassmc is True: if checkdirlist(dirresultsmc) is True: exit() if dohistomassdata is True: if checkdirlist(dirresultsdata) is True: print("folder exists") # Perform the analysis flow # Convert ROOT to pickle files if required if doconversionmc: mymultiprocessmc.multi_unpack_allperiods() if doconversiondata: mymultiprocessdata.multi_unpack_allperiods() # Skim the data if required if doskimmingmc: mymultiprocessmc.multi_skim_allperiods() if doskimmingdata: mymultiprocessdata.multi_skim_allperiods() # Do jet finding & analysis if desired if dojetdata: mymultiprocessdata.multi_jet() if dojetmc: mymultiprocessmc.multi_jet() # Merge data files for ML if required if domergingmc: mymultiprocessmc.multi_mergeml_allperiods() if domergingdata: mymultiprocessdata.multi_mergeml_allperiods() if domergingperiodsmc: mymultiprocessmc.multi_mergeml_allinone() if domergingperiodsdata: mymultiprocessdata.multi_mergeml_allinone() # Do machine learning if required if doml: index = 0 for binmin, binmax in zip(binminarray, binmaxarray): myopt = Optimiser(data_param[case], case, data_model[mltype], grid_param, binmin, binmax, raahp[index]) if docorrelation: myopt.do_corr() if dotraining: myopt.do_train() if dotesting: myopt.do_test() if doapplytodatamc: myopt.do_apply() if docrossvalidation: myopt.do_crossval() if dolearningcurve: myopt.do_learningcurve() if doroc: myopt.do_roc() if doimportance: myopt.do_importance() if dogridsearch: myopt.do_grid() if doboundary: myopt.do_boundary() if dosignifopt: myopt.do_significance() index = index + 1 if doapplydata: mymultiprocessapplydata = MultiProcesser(data_param[case], run_param, "data") mymultiprocessapplydata.multi_apply_allperiods() if doapplymc: mymultiprocessapplymc = MultiProcesser(data_param[case], run_param, "mc") mymultiprocessapplymc.multi_apply_allperiods() if domergeapplydata: mymultiprocessmergeapplydata = MultiProcesser(data_param[case], run_param, "data") mymultiprocessmergeapplydata.multi_mergeapply_allperiods() if domergeapplymc: mymultiprocessmergeapplymc = MultiProcesser(data_param[case], run_param, "mc") mymultiprocessmergeapplymc.multi_mergeapply_allperiods() if dohistomassmc: mymultiprocessapplymc = MultiProcesser(data_param[case], run_param, "mc") mymultiprocessapplymc.multi_histomass() if dohistomassdata: mymultiprocessapplydata = MultiProcesser(data_param[case], run_param, "data") mymultiprocessapplydata.multi_histomass() if doefficiency: mymultiprocesseffmc = MultiProcesser(data_param[case], run_param, "mc") mymultiprocesseffmc.multi_efficiency()
dirpklmltotdatamax = dirpklmltotdata + "_max" v_max_ncand_merge = data_param[case]["multi"].get("max_ncand_merge", -1) #creating folder if not present counter = 0 if checkiffileexist is False: if doconversionmc is True: counter = counter + checkdirlist(dirpklmc) if doconversiondata is True: counter = counter + checkdirlist(dirpkldata) if doskimmingmc is True: checkdirlist(dirpklskmc) counter = counter + checkdir(dirpklevtcounter_allmc) if doskimmingdata is True: counter = counter + checkdirlist(dirpklskdata) counter = counter + checkdir(dirpklevtcounter_alldata) if domergingmc is True: counter = counter + checkdirlist(dirpklmlmc) if v_max_ncand_merge > 0: counter = counter + checkdirlist(dirpklmlmcmax) if domergingdata is True: counter = counter + checkdirlist(dirpklmldata) if v_max_ncand_merge > 0: counter = counter + checkdirlist(dirpklmldatamax)
dofitbkgparamsbkgscan = data_config["analysis"]["fitbkgparamsbkgscan"] doexpectedsignf = data_config["analysis"]["expectedsignf"] dobkgshapestudy = data_config["analysis"]["bkgshapestudy"] dirresultsdata = data_param[case]["analysis"][typean]["data"]["results"] dirresultsmc = data_param[case]["analysis"][typean]["mc"]["results"] dirresultsdatatot = data_param[case]["analysis"][typean]["data"][ "resultsallp"] dirresultsmctot = data_param[case]["analysis"][typean]["mc"]["resultsallp"] #creating folder if not present counter = 0 if doanalysismc is True: counter = counter + checkdirlist(dirresultsmc) counter = counter + checkdir(dirresultsmctot) if doanalysisdata is True: counter = counter + checkdirlist(dirresultsdata) counter = counter + checkdir(dirresultsdatatot) if counter < 0: if doprobscan is True: sys.exit() else: logger.warning( "Directories already exists (see above), but no new prob scan") else: # check and create directories if doanalysismc is True: checkmakedirlist(dirresultsmc)
def do_entire_analysis(): # pylint: disable=too-many-locals, too-many-statements, too-many-branches with open("default_complete.yaml", 'r') as run_config: data_config = yaml.load(run_config) with open("data/database_ml_parameters.yml", 'r') as param_config: data_param = yaml.load(param_config) with open("data/config_model_parameters.yml", 'r') as mod_config: data_model = yaml.load(mod_config) with open("data/database_run_list.yml", 'r') as runlist_config: run_param = yaml.load(runlist_config) with open("data/database_ml_gridsearch.yml", 'r') as grid_config: grid_param = yaml.load(grid_config) case = data_config["case"] doconversionmc = data_config["conversion"]["mc"]["activate"] doconversiondata = data_config["conversion"]["data"]["activate"] domergingmc = data_config["merging"]["mc"]["activate"] domergingdata = data_config["merging"]["data"]["activate"] doskimmingmc = data_config["skimming"]["mc"]["activate"] doskimmingdata = data_config["skimming"]["data"]["activate"] domergingperiodsmc = data_config["mergingperiods"]["mc"]["activate"] domergingperiodsdata = data_config["mergingperiods"]["data"]["activate"] doml = data_config["ml_study"]["activate"] docorrelation = data_config["ml_study"]['docorrelation'] dotraining = data_config["ml_study"]['dotraining'] dotesting = data_config["ml_study"]['dotesting'] doapplytodatamc = data_config["ml_study"]['applytodatamc'] docrossvalidation = data_config["ml_study"]['docrossvalidation'] dolearningcurve = data_config["ml_study"]['dolearningcurve'] doroc = data_config["ml_study"]['doroc'] doboundary = data_config["ml_study"]['doboundary'] doimportance = data_config["ml_study"]['doimportance'] dogridsearch = data_config["ml_study"]['dogridsearch'] dosignifopt = data_config["ml_study"]['dosignifopt'] #doefficiency = run_config['doefficiency'] doapplydata = data_config["analysis"]["data"]["doapply"] doapplymc = data_config["analysis"]["mc"]["doapply"] domergeapplydata = data_config["analysis"]["data"]["domergeapply"] domergeapplymc = data_config["analysis"]["mc"]["domergeapply"] dohistomassmc = data_config["analysis"]["mc"]["histomass"] dohistomassdata = data_config["analysis"]["data"]["histomass"] doefficiency = data_config["analysis"]["mc"]["efficiency"] dirpklmc = data_param[case]["multi"]["mc"]["pkl"] dirpklevtcounter_allmc = data_param[case]["multi"]["mc"]["pkl_evtcounter_all"] dirpklskmc = data_param[case]["multi"]["mc"]["pkl_skimmed"] dirpklmlmc = data_param[case]["multi"]["mc"]["pkl_skimmed_merge_for_ml"] dirpklmltotmc = data_param[case]["multi"]["mc"]["pkl_skimmed_merge_for_ml_all"] dirpkldata = data_param[case]["multi"]["data"]["pkl"] dirpklevtcounter_alldata = data_param[case]["multi"]["data"]["pkl_evtcounter_all"] dirpklskdata = data_param[case]["multi"]["data"]["pkl_skimmed"] dirpklmldata = data_param[case]["multi"]["data"]["pkl_skimmed_merge_for_ml"] dirpklmltotdata = data_param[case]["multi"]["data"]["pkl_skimmed_merge_for_ml_all"] dirpklskdecmc = data_param[case]["analysis"]["mc"]["pkl_skimmed_dec"] dirpklskdec_mergedmc = data_param[case]["analysis"]["mc"]["pkl_skimmed_decmerged"] dirpklskdecdata = data_param[case]["analysis"]["data"]["pkl_skimmed_dec"] dirpklskdec_mergeddata = data_param[case]["analysis"]["data"]["pkl_skimmed_decmerged"] dirresultsdata = data_param[case]["analysis"]["data"]["results"] dirresultsmc = data_param[case]["analysis"]["mc"]["results"] binminarray = data_param[case]["ml"]["binmin"] binmaxarray = data_param[case]["ml"]["binmax"] raahp = data_param[case]["ml"]["opt"]["raahp"] mltype = data_param[case]["ml"]["mltype"] mlout = data_param[case]["ml"]["mlout"] mlplot = data_param[case]["ml"]["mlplot"] mymultiprocessmc = MultiProcesser(data_param[case], run_param, "mc") mymultiprocessdata = MultiProcesser(data_param[case], run_param, "data") #creating folder if not present if doconversionmc is True: if checkdirlist(dirpklmc) is True: exit() if doconversiondata is True: if checkdirlist(dirpkldata) is True: exit() if doskimmingmc is True: if checkdirlist(dirpklskmc) or checkdir(dirpklevtcounter_allmc) is True: exit() if doskimmingdata is True: if checkdirlist(dirpklskdata) or checkdir(dirpklevtcounter_alldata) is True: exit() if domergingmc is True: if checkdirlist(dirpklmlmc) is True: exit() if domergingdata is True: if checkdirlist(dirpklmldata) is True: exit() if domergingperiodsmc is True: if checkdir(dirpklmltotmc) is True: exit() if domergingperiodsdata is True: if checkdir(dirpklmltotdata) is True: exit() if doml is True: if checkdir(mlout) or checkdir(mlplot) is True: print("check mlout and mlplot") if doapplymc is True: if checkdirlist(dirpklskdecmc) is True: exit() if doapplydata is True: if checkdirlist(dirpklskdecdata) is True: exit() if domergeapplymc is True: if checkdirlist(dirpklskdec_mergedmc) is True: exit() if domergeapplydata is True: if checkdirlist(dirpklskdec_mergeddata) is True: exit() if dohistomassmc is True: if checkdirlist(dirresultsmc) is True: exit() if dohistomassdata is True: if checkdirlist(dirresultsdata) is True: print("folder exists") #perform the analysis flow if doconversionmc == 1: mymultiprocessmc.multi_unpack_allperiods() if doconversiondata == 1: mymultiprocessdata.multi_unpack_allperiods() if doskimmingmc == 1: mymultiprocessmc.multi_skim_allperiods() if doskimmingdata == 1: mymultiprocessdata.multi_skim_allperiods() if domergingmc == 1: mymultiprocessmc.multi_mergeml_allperiods() if domergingdata == 1: mymultiprocessdata.multi_mergeml_allperiods() if domergingperiodsmc == 1: mymultiprocessmc.multi_mergeml_allinone() if domergingperiodsdata == 1: mymultiprocessdata.multi_mergeml_allinone() if doml is True: index = 0 for binmin, binmax in zip(binminarray, binmaxarray): myopt = Optimiser(data_param[case], case, data_model[mltype], grid_param, binmin, binmax, raahp[index]) if docorrelation is True: myopt.do_corr() if dotraining is True: myopt.do_train() if dotesting is True: myopt.do_test() if doapplytodatamc is True: myopt.do_apply() if docrossvalidation is True: myopt.do_crossval() if dolearningcurve is True: myopt.do_learningcurve() if doroc is True: myopt.do_roc() if doimportance is True: myopt.do_importance() if dogridsearch is True: myopt.do_grid() if doboundary is True: myopt.do_boundary() if dosignifopt is True: myopt.do_significance() index = index + 1 if doapplydata is True: mymultiprocessapplydata = MultiProcesser(data_param[case], run_param, "data") mymultiprocessapplydata.multi_apply_allperiods() if doapplymc is True: mymultiprocessapplymc = MultiProcesser(data_param[case], run_param, "mc") mymultiprocessapplymc.multi_apply_allperiods() if domergeapplydata is True: mymultiprocessmergeapplydata = MultiProcesser(data_param[case], run_param, "data") mymultiprocessmergeapplydata.multi_mergeapply_allperiods() if domergeapplymc is True: mymultiprocessmergeapplymc = MultiProcesser(data_param[case], run_param, "mc") mymultiprocessmergeapplymc.multi_mergeapply_allperiods() if dohistomassmc is True: mymultiprocessapplymc = MultiProcesser(data_param[case], run_param, "mc") mymultiprocessapplymc.multi_histomass() if dohistomassdata is True: mymultiprocessapplydata = MultiProcesser(data_param[case], run_param, "data") mymultiprocessapplydata.multi_histomass() if doefficiency is True: mymultiprocesseffmc = MultiProcesser(data_param[case], run_param, "mc") mymultiprocesseffmc.multi_efficiency()