Ejemplo n.º 1
0
def test():
    data = get_database_ml_parameters()
    nevt_sig = 10000
    nevt_bkg = 10000
    mltype = "BinaryClassification"
    mlsubtype = "HFmeson"
    case = "Lc"
    var_skimming = ["pt_cand_ML"]
    varmin = [2]
    varmax = [4]
    test_frac = 0.2
    rnd_splt = 12
    rnd_shuffle = 12

    logger = get_logger()
    summary_string = f"#sg events: {nevt_sig}\n#bkg events: {nevt_bkg}\nmltype: {mltype}\n" \
                     f"mlsubtype: {mlsubtype}\ncase: {case}"
    logger.debug(summary_string)

    var_all = data[case]["var_all"]
    var_signal = data[case]["var_signal"]
    sel_signal = data[case]["sel_signal"]
    sel_bkg = data[case]["sel_bkg"]

#     var_training = data[case]["var_training"]
#     var_target = data[case]["var_target"]
#     var_corr_x, var_corr_y = data[case]["var_correlation"]

    loadsampleoption = 1

    if loadsampleoption == 1:
        filesig, filebkg = data[case]["sig_bkg_files"]
        trename = data[case]["tree_name"]
        df_sig = getdataframe(filesig, trename, var_all)
        df_bkg = getdataframe(filebkg, trename, var_all)
        df_sig = filterdataframe(df_sig, var_skimming, varmin, varmax)
        df_bkg = filterdataframe(df_bkg, var_skimming, varmin, varmax)
        df_sig = df_sig.query(sel_signal, random_state=rnd_shuffle)
        df_bkg = df_bkg.query(sel_bkg, random_state=rnd_shuffle)
        df_sig = shuffle(df_sig)
        df_bkg = shuffle(df_bkg)
        df_ml_train, df_ml_test = \
            prep_mlsamples(df_sig, df_bkg, var_signal, nevt_sig, nevt_bkg, test_frac, rnd_splt)
        df_sig_train, df_bkg_train = split_df_sigbkg(df_ml_train, var_signal)
        df_sig_test, df_bkg_test = split_df_sigbkg(df_ml_test, var_signal)
        logger.info("events for ml train %d and test %d", len(df_ml_train), len(df_ml_test))
        logger.info("events for signal train %d and test %d", len(df_sig_train), len(df_sig_test))
        logger.info("events for bkg train %d and test %d", len(df_bkg_train), len(df_bkg_test))
Ejemplo n.º 2
0
def post_form(req):  # pylint: disable=too-many-locals, too-many-statements, too-many-branches

    # Collect configuration in a dictionary for further processing
    run_config = {}

    mltype = "BinaryClassification"
    run_config["mltype"] = mltype
    case = get_form(req, "case")
    run_config["case"] = case
    filesig = get_form(req, "filesig")
    filebkg = get_form(req, "filebkg")
    trename = get_form(req, "tree_name")
    var_all_str = get_form(req, "var_all")
    var_all = var_all_str.split(',')
    var_signal = get_form(req, "var_signal")
    sel_signal = get_form(req, "sel_signal")
    sel_bkg_str = get_form(req, "sel_bkg")
    sel_bkg = ''
    for i in sel_bkg_str:
        if i == ',':
            sel_bkg += ' '
        elif i == '<':
            sel_bkg += '<'
        elif i == '&gt;':
            sel_bkg += '>'
        else:
            sel_bkg += i

    var_training_str = get_form(req, "var_training")
    var_training = var_training_str.split(',')
    var_corr_x_str = get_form(req, "var_correlation_x")
    var_corr_y_str = get_form(req, "var_correlation_y")
    var_corr_x = var_corr_x_str.split(',')
    var_corr_y = var_corr_y_str.split(',')
    #    var_binning_str = get_form(req, "var_binning")
    #    var_binning = var_binning_str.split(',')
    #    var_binning_min_str = get_form(req, "var_binning_min_str")
    #    varmin = [int(i) for i in var_binning_min_str.split(',')]
    #    var_binning_max_str = get_form(req, "var_binning_max_str")
    #    varmax = [int(i) for i in var_binning_max_str.split(',')]
    var_binning = get_form(req, "var_binning")
    var_binning_min = float(get_form(req, 'var_binning_min', var_type=float))
    var_binning_max = float(get_form(req, 'var_binning_max', var_type=float))
    run_config["binmin"] = var_binning_min
    run_config["binmax"] = var_binning_max
    presel_reco_str = get_form(req, "presel_reco")

    presel_reco = ''
    if presel_reco_str == 'None':
        presel_reco = None
    else:
        for i in presel_reco_str:
            if i == ',':
                presel_reco += ' '
            elif i == '&lt;':
                presel_reco += '<'
            elif i == '&gt;':
                presel_reco += '>'
            else:
                presel_reco += i

    activate_scikit = get_form(req, 'activate_scikit', var_type=bool)
    activate_xgboost = get_form(req, 'activate_xgboost', var_type=bool)
    activate_keras = get_form(req, 'activate_keras', var_type=bool)

    docorrelation = get_form(req, 'docorrelation', var_type=bool)
    run_config["docorrelation"] = docorrelation
    dotraining = get_form(req, 'dotraining', var_type=bool)
    run_config["dotraining"] = dotraining
    doROC = get_form(req, 'doROC', var_type=bool)
    run_config["doROC"] = doROC
    dolearningcurve = get_form(req, 'dolearningcurve', var_type=bool)
    run_config["dolearningcurve"] = dolearningcurve
    docrossvalidation = get_form(req, 'docrossvalidation', var_type=bool)
    run_config["docrossvalidation"] = docrossvalidation
    doimportance = get_form(req, 'doimportance', var_type=bool)
    run_config["doimportance"] = doimportance
    dogridsearch = get_form(req, 'dogridsearch', var_type=bool)
    run_config["dogridsearch"] = dogridsearch

    rnd_shuffle = int(get_form(req, 'rnd_shuffle', var_type=int))
    run_config["rnd_shuffle"] = rnd_shuffle
    nevt_sig = int(get_form(req, 'nevt_sig', var_type=int))
    run_config["nevt_sig"] = nevt_sig
    nevt_bkg = int(get_form(req, 'nevt_bkg', var_type=int))
    run_config["nevt_bkg"] = nevt_bkg
    test_frac = float(get_form(req, 'test_frac', var_type=float))
    run_config["test_frac"] = test_frac
    rnd_splt = int(get_form(req, 'rnd_splt', var_type=int))
    run_config["rnd_splt"] = rnd_splt
    nkfolds = int(get_form(req, 'nkfolds', var_type=int))
    run_config["nkfolds"] = nkfolds
    ncores = int(get_form(req, 'ncores', var_type=int))
    run_config["ncores"] = ncores

    data = get_database_ml_parameters()

    # Construct Configuration object from run_config
    conf = Configuration(run_config_input=run_config)
    conf.configure()

    model_config = conf.get_model_config()

    string_selection = createstringselection(var_binning, var_binning_min,
                                             var_binning_max)
    suffix = f"nevt_sig{nevt_sig}_nevt_bkg{nevt_bkg}_" \
             f"{mltype}{case}_{string_selection}"

    dataframe = f"dataframes_{suffix}"
    plotdir = f"plots_{suffix}"
    output = f"output_{suffix}"
    checkdir(dataframe)
    checkdir(plotdir)
    checkdir(output)

    classifiers = []
    classifiers_scikit = []
    classifiers_xgboost = []
    classifiers_keras = []

    names = []
    names_scikit = []
    names_xgboost = []
    names_keras = []

    trainedmodels = []

    df_sig = getdataframe(filesig, trename, var_all)
    df_bkg = getdataframe(filebkg, trename, var_all)
    if presel_reco is not None:
        df_sig = df_sig.query(presel_reco)
        df_bkg = df_bkg.query(presel_reco)
    df_sig = filterdataframe_singlevar(df_sig, var_binning, var_binning_min,
                                       var_binning_max)
    df_bkg = filterdataframe_singlevar(df_bkg, var_binning, var_binning_min,
                                       var_binning_max)

    # Output images
    imageIO_vardist: BytesIO = None
    imageIO_scatterplot: BytesIO = None
    imageIO_corr_sig: BytesIO = None
    imageIO_corr_bkg: BytesIO = None
    imageIO_precision_recall: BytesIO = None
    imageIO_ROC: BytesIO = None
    imageIO_plot_learning_curves: BytesIO = None
    img_scoresRME: BytesIO = None
    img_import: BytesIO = None
    img_gridsearch: BytesIO = None

    # pylint: disable=unused-variable
    _, _, df_sig_train, df_bkg_train, _, _, x_train, y_train, x_test, y_test = \
        create_mlsamples(df_sig, df_bkg, sel_signal, data[case], sel_bkg, rnd_shuffle,
                         var_signal, var_training, nevt_sig, nevt_bkg, test_frac, rnd_splt)
    if docorrelation:
        imageIO_vardist, imageIO_scatterplot, imageIO_corr_sig, imageIO_corr_bkg = \
            do_correlation(df_sig_train, df_bkg_train, var_all, var_corr_x, var_corr_y, plotdir)

    # Using the activate_* flags is for now a work-around
    if activate_scikit:
        classifiers_scikit, names_scikit = getclf_scikit(model_config)
        classifiers = classifiers + classifiers_scikit
        names = names + names_scikit

    if activate_xgboost:
        classifiers_xgboost, names_xgboost = getclf_xgboost(model_config)
        classifiers = classifiers + classifiers_xgboost
        names = names + names_xgboost

    if activate_keras:
        classifiers_keras, names_keras = getclf_keras(model_config,
                                                      len(x_train.columns))
        classifiers = classifiers + classifiers_keras
        names = names + names_keras

    if dotraining:
        trainedmodels = fit(names, classifiers, x_train, y_train)
        savemodels(names, trainedmodels, output, suffix)

    if doROC:
        imageIO_precision_recall, imageIO_ROC = \
            precision_recall(names, classifiers, suffix, x_train, y_train, nkfolds, plotdir)

    if docrossvalidation:
        df_scores = []
        if mltype == "Regression":
            df_scores = cross_validation_mse_continuous(
                names, classifiers, x_train, y_train, nkfolds, ncores)
        if mltype == "BinaryClassification":
            df_scores = cross_validation_mse(names, classifiers, x_train,
                                             y_train, nkfolds, ncores)
        img_scoresRME = plot_cross_validation_mse(names, df_scores, suffix,
                                                  plotdir)

    if doimportance:
        img_import = importanceplotall(
            var_training, names_scikit + names_xgboost,
            classifiers_scikit + classifiers_xgboost, suffix, plotdir)

    if dolearningcurve:
        npoints = 10
        imageIO_plot_learning_curves = plot_learning_curves(
            names, classifiers, suffix, plotdir, x_train, y_train, npoints)

    if dogridsearch:
        datasearch = get_database_ml_gridsearch()
        analysisdb = datasearch[mltype]
        names_cv, clf_cv, par_grid_cv, refit_cv, var_param, \
            par_grid_cv_keys = read_grid_dict(analysisdb)
        _, _, dfscore = do_gridsearch(names_cv, clf_cv, par_grid_cv, refit_cv,
                                      x_train, y_train, nkfolds, ncores)
        img_gridsearch = perform_plot_gridsearch(names_cv, dfscore,
                                                 par_grid_cv, par_grid_cv_keys,
                                                 var_param, plotdir, suffix,
                                                 0.1)

    return JENV.get_template("display.html").render(
        imageIO_vardist=imageIO_vardist,
        imageIO_scatterplot=imageIO_scatterplot,
        imageIO_corr_sig=imageIO_corr_sig,
        imageIO_corr_bkg=imageIO_corr_bkg,
        imageIO_precision_recall=imageIO_precision_recall,
        imageIO_ROC=imageIO_ROC,
        imageIO_plot_learning_curves=imageIO_plot_learning_curves,
        img_scoresRME=img_scoresRME,
        img_import=img_import,
        img_gridsearch=img_gridsearch)
Ejemplo n.º 3
0
def post_continue(req):  # pylint: disable=unused-argument
    """Serve the configuration page."""
    subtype = get_form(req, "slct1")
    case = get_form(req, "slct2")
    data = get_database_ml_parameters()
    filesig, filebkg = data[case]["sig_bkg_files"]
    filesig = os.path.join(DATA_PREFIX, filesig)
    filebkg = os.path.join(DATA_PREFIX, filebkg)
    trename = data[case]["tree_name"]
    var_all = data[case]["var_all"]
    var_all_str = ','.join(var_all)
    var_signal = data[case]["var_signal"]
    sel_signal = data[case]["sel_signal"]
    sel_bkg = data[case]["sel_bkg"]
    sel_bkg_str = ''
    for i in sel_bkg:
        if i == '<':
            sel_bkg_str += '&lt;'
        elif i == '>':
            sel_bkg_str += '&gt;'
        elif i == ' ':
            sel_bkg_str += ','
        else:
            sel_bkg_str += i
    var_training = data[case]["var_training"]
    var_training_str = ','.join(var_training)
    var_corr_x, var_corr_y = data[case]["var_correlation"]
    var_corr_x_str = ','.join(var_corr_x)
    var_corr_y_str = ','.join(var_corr_y)
    #    var_binning = [data[case]["var_binning"]]
    #    var_binning_str = ','.join(var_binning)
    #    varmin = ['0']
    #    var_binning_min_str = ','.join(varmin)
    #    varmax = ['100']
    #    var_binning_max_str = ','.join(varmax)
    var_binning = data[case]["var_binning"]
    var_binning_min = 2
    var_binning_max = 3
    presel_reco = data[case]["presel_reco"]
    presel_reco_str = None
    if presel_reco is not None:
        presel_reco_str = ''
        for i in presel_reco:
            if i == '<':
                presel_reco_str += '&lt;'
            elif i == '>':
                presel_reco_str += '&gt;'
            elif i == ' ':
                presel_reco_str += ','
            else:
                presel_reco_str += i

    return JENV.get_template("test.html").render(
        subtype=subtype,
        case=case,
        filesig=filesig,
        filebkg=filebkg,
        trename=trename,
        var_all_str=var_all_str,
        var_signal=var_signal,
        sel_signal=sel_signal,
        sel_bkg_str=sel_bkg_str,
        var_training_str=var_training_str,
        var_corr_x_str=var_corr_x_str,
        var_corr_y_str=var_corr_y_str,
        var_binning=var_binning,
        var_binning_min=var_binning_min,
        var_binning_max=var_binning_max,
        presel_reco_str=presel_reco_str)
Ejemplo n.º 4
0
    plt.savefig(f'{plot_dir}/FONLL_curve_{suffix}.png')


#pylint: disable=too-many-statements, too-many-locals
def study_signif(case, names, bin_lim, file_mc_gen, file_data_evt_ml,
                 file_data_evt_tot, df_mc_reco, df_ml_test, df_data_dec,
                 suffix, plot_dir):
    """
    Study the efficiency and the expected signal significance as a function of
    the threshold value on a ML model output.
    """
    logger = get_logger()
    gROOT.SetBatch(True)
    gROOT.ProcessLine("gErrorIgnoreLevel = kWarning;")

    gen_dict = get_database_ml_parameters()[case]
    mass = gen_dict["mass"]
    mass_fit_lim = gen_dict['mass_fit_lim']
    bin_width = gen_dict['bin_width']
    var_bin = gen_dict['variables']['var_binning']
    sopt_dict = gen_dict['signif_opt']
    bkg_fract = sopt_dict['bkg_data_fraction']
    save_fit = sopt_dict['save_fit']

    df_mc_gen = pd.read_pickle(file_mc_gen)
    df_mc_gen = df_mc_gen.query(gen_dict['presel_gen'])
    df_mc_gen = filterdataframe_singlevar(df_mc_gen, var_bin, bin_lim[0],
                                          bin_lim[1])
    df_evt_ml = pd.read_pickle(
        file_data_evt_ml)  # portion of data events used for ML
    n_events_ml = len(df_evt_ml.query(sopt_dict['sel_event']))
Ejemplo n.º 5
0
def doclassification_regression(conf):  # pylint: disable=too-many-locals, too-many-statements, too-many-branches

    logger = get_logger()
    logger.info(f"Start classification_regression run")

    run_config = conf.get_run_config()
    model_config = conf.get_model_config()

    mltype = run_config['mltype']
    mlsubtype = run_config['mlsubtype']
    case = run_config['case']
    loadsampleoption = run_config['loadsampleoption']
    binmin = run_config['binmin']
    binmax = run_config['binmax']
    rnd_shuffle = run_config['rnd_shuffle']
    nevt_sig = run_config['nevt_sig']
    nevt_bkg = run_config['nevt_bkg']
    test_frac = run_config['test_frac']
    rnd_splt = run_config['rnd_splt']
    docorrelation = run_config['docorrelation']
    dostandard = run_config['dostandard']
    dopca = run_config['dopca']
    dotraining = run_config['dotraining']
    dotesting = run_config['dotesting']
    applytodatamc = run_config['applytodatamc']
    docrossvalidation = run_config['docrossvalidation']
    dolearningcurve = run_config['dolearningcurve']
    doROC = run_config['doROC']
    doboundary = run_config['doboundary']
    doimportance = run_config['doimportance']
    dopltregressionxy = run_config['dopltregressionxy']
    dogridsearch = run_config['dogridsearch']
    dosignifopt = run_config['dosignifopt']
    nkfolds = run_config['nkfolds']
    ncores = run_config['ncores']

    data = get_database_ml_parameters()
    filesig, filebkg = data[case]["sig_bkg_files"]
    filedata, filemc = data[case]["data_mc_files"]
    trename = data[case]["tree_name"]
    var_all = data[case]["var_all"]
    var_signal = data[case]["var_signal"]
    sel_signal = data[case]["sel_signal"]
    sel_bkg = data[case]["sel_bkg"]
    var_training = data[case]["var_training"]
    var_target = data[case]["var_target"]
    var_corr_x, var_corr_y = data[case]["var_correlation"]
    var_boundaries = data[case]["var_boundaries"]
    var_binning = data[case]['var_binning']
    presel_reco = data[case]["presel_reco"]

    summary_string = f"#sig events: {nevt_sig}\n#bkg events: {nevt_bkg}\nmltype: {mltype}\n" \
                     f"mlsubtype: {mlsubtype}\ncase: {case}"
    logger.debug(summary_string)

    string_selection = createstringselection(var_binning, binmin, binmax)
    suffix = f"nevt_sig{nevt_sig}_nevt_bkg{nevt_bkg}_" \
             f"{mltype}{case}_{string_selection}"
    dataframe = f"dataframes_{suffix}"
    plotdir = f"plots_{suffix}"
    output = f"output_{suffix}"
    checkdir(dataframe)
    checkdir(plotdir)
    checkdir(output)

    classifiers = []
    classifiers_scikit = []
    classifiers_xgboost = []
    classifiers_keras = []

    names = []
    names_scikit = []
    names_xgboost = []
    names_keras = []

    filesig = os.path.join(DATA_PREFIX, filesig)
    filebkg = os.path.join(DATA_PREFIX, filebkg)
    filedata = os.path.join(DATA_PREFIX, filedata)
    filemc = os.path.join(DATA_PREFIX, filemc)

    trainedmodels = []

    if loadsampleoption == 1:
        df_sig = getdataframe(filesig, trename, var_all)
        df_bkg = getdataframe(filebkg, trename, var_all)
        if presel_reco is not None:
            df_sig = df_sig.query(presel_reco)
            df_bkg = df_bkg.query(presel_reco)
        df_sig = filterdataframe_singlevar(df_sig, var_binning, binmin, binmax)
        df_bkg = filterdataframe_singlevar(df_bkg, var_binning, binmin, binmax)
        _, df_ml_test, df_sig_train, df_bkg_train, _, _, \
        x_train, y_train, x_test, y_test = \
            create_mlsamples(df_sig, df_bkg, sel_signal, sel_bkg, rnd_shuffle,
                             var_signal, var_training,
                             nevt_sig, nevt_bkg, test_frac, rnd_splt)

    if docorrelation == 1:
        do_correlation(df_sig_train, df_bkg_train, var_all, var_corr_x,
                       var_corr_y, plotdir)

    if dostandard == 1:
        x_train = getdataframe_standardised(x_train)

    if dopca == 1:
        n_pca = 9
        x_train, pca = get_pcadataframe_pca(x_train, n_pca)
        plotvariance_pca(pca, plotdir)

    classifiers_scikit, names_scikit = getclf_scikit(model_config)

    classifiers_xgboost, names_xgboost = getclf_xgboost(model_config)

    classifiers_keras, names_keras = getclf_keras(model_config,
                                                  len(x_train.columns))

    classifiers = classifiers_scikit + classifiers_xgboost + classifiers_keras
    names = names_scikit + names_xgboost + names_keras

    if dotraining == 1:
        trainedmodels = fit(names, classifiers, x_train, y_train)
        savemodels(names, trainedmodels, output, suffix)

    if dotesting == 1:
        # The model predictions are added to the test dataframe
        df_ml_test = test(mltype, names, trainedmodels, df_ml_test,
                          var_training, var_signal)
        df_ml_test_to_df = output + "/testsample_%s_mldecision.pkl" % (suffix)
        df_ml_test_to_root = output + "/testsample_%s_mldecision.root" % (
            suffix)
        df_ml_test.to_pickle(df_ml_test_to_df)
        write_tree(df_ml_test_to_root, trename, df_ml_test)

    if applytodatamc == 1:
        df_data = getdataframe(filedata, trename, var_all)
        df_mc = getdataframe(filemc, trename, var_all)
        if presel_reco is not None:
            df_mc = df_mc.query(presel_reco)
            df_data = df_data.query(presel_reco)
        df_data = filterdataframe_singlevar(df_data, var_binning, binmin,
                                            binmax)
        df_mc = filterdataframe_singlevar(df_mc, var_binning, binmin, binmax)
        # The model predictions are added to the dataframes of data and MC
        df_data = apply(mltype, names, trainedmodels, df_data, var_training)
        df_mc = apply(mltype, names, trainedmodels, df_mc, var_training)
        df_data_to_root = output + "/data_%s_mldecision.root" % (suffix)
        df_mc_to_root = output + "/mc_%s_mldecision.root" % (suffix)
        write_tree(df_data_to_root, trename, df_data)
        write_tree(df_mc_to_root, trename, df_mc)

    if docrossvalidation == 1:
        df_scores = []
        if mltype == "Regression":
            df_scores = cross_validation_mse_continuous(
                names, classifiers, x_train, y_train, nkfolds, ncores)
        if mltype == "BinaryClassification":
            df_scores = cross_validation_mse(names, classifiers, x_train,
                                             y_train, nkfolds, ncores)
        plot_cross_validation_mse(names, df_scores, suffix, plotdir)

    if dolearningcurve == 1:
        #         confusion(names, classifiers, suffix, x_train, y_train, nkfolds, plotdir)
        npoints = 10
        plot_learning_curves(names, classifiers, suffix, plotdir, x_train,
                             y_train, npoints)

    if doROC == 1:
        precision_recall(names, classifiers, suffix, x_train, y_train, nkfolds,
                         plotdir)

    if doboundary == 1:
        classifiers_scikit_2var, names_2var = getclf_scikit(mltype)
        classifiers_keras_2var, names_keras_2var = getclf_keras(
            model_config, 2)
        classifiers_2var = classifiers_scikit_2var + classifiers_keras_2var
        names_2var = names_2var + names_keras_2var
        x_test_boundary = x_test[var_boundaries]
        trainedmodels_2var = fit(names_2var, classifiers_2var, x_test_boundary,
                                 y_test)
        decisionboundaries(names_2var, trainedmodels_2var, suffix + "2var",
                           x_test_boundary, y_test, plotdir)

    if doimportance == 1:
        importanceplotall(var_training, names_scikit + names_xgboost,
                          classifiers_scikit + classifiers_xgboost, suffix,
                          plotdir)

    if dopltregressionxy == 1:
        plotdistributiontarget(names, df_ml_test, var_target, suffix, plotdir)
        plotscattertarget(names, df_ml_test, var_target, suffix, plotdir)

    if dogridsearch == 1:
        datasearch = get_database_ml_gridsearch()
        analysisdb = datasearch[mltype]
        names_cv, clf_cv, par_grid_cv, refit_cv, var_param, \
            par_grid_cv_keys = read_grid_dict(analysisdb)
        _, _, dfscore = do_gridsearch(names_cv, clf_cv, par_grid_cv, refit_cv,
                                      x_train, y_train, nkfolds, ncores)
        perform_plot_gridsearch(names_cv, dfscore, par_grid_cv,
                                par_grid_cv_keys, var_param, plotdir, suffix,
                                0.1)

    if dosignifopt == 1:
        logger.info("Doing significance optimization")
        if dotraining and dotesting and applytodatamc:
            if (mlsubtype == "HFmeson") and case in ("Dsnew", "Lcnew", "Dzero",
                                                     "Dplus", "Dstar"):
                df_data_opt = df_data.query(sel_bkg)
                df_data_opt = shuffle(df_data_opt, random_state=rnd_shuffle)
                study_signif(case, names, [binmin, binmax], filemc, filedata,
                             df_mc, df_ml_test, df_data_opt, suffix, plotdir)
            else:
                logger.error(
                    "Optimisation is not implemented for this classification problem."
                )
        else:
            logger.error(
                "Training, testing and applytodata flags must be set to 1")