def histVariables2D(self, vX, vY, plot_name, sample, cat):

        # get number of bins and binrange from config file
        binsX = binning.getNbins(vX)
        binsY = binning.getNbins(vY)
        rangeX = binning.getBinrange(vX)
        rangeY = binning.getBinrange(vY)

        # check if bin_range was found
        if not rangeX:
            maxValue = max(self.samples[sample].cut_data[cat][vX].values)
            minValue = min(self.samples[sample].cut_data[cat][vX].values)
            config_string = "variables[\""+vX+"\"]\t\t\t= Variable(bin_range = [{},{}])\n".format(minValue, maxValue)
            with open("new_variable_configs.txt", "a") as f:
                f.write(config_string)
            rangeX = [minValue, maxValue]

        if not rangeY:
            maxValue = max(self.samples[sample].cut_data[cat][vY].values)
            minValue = min(self.samples[sample].cut_data[cat][vY].values)
            config_string = "variables[\""+vY+"\"]\t\t\t= Variable(bin_range = [{},{}])\n".format(minValue, maxValue)
            with open("new_variable_configs.txt", "a") as f:
                f.write(config_string)
            rangeY = [minValue, maxValue]


        # fill hist
        weights = self.samples[sample].cut_data[cat]["weight"].values
        valuesX = self.samples[sample].cut_data[cat][vX].values
        valuesY = self.samples[sample].cut_data[cat][vY].values

        hist = setup.setupHistogram2D(
            valuesX     = valuesX,
            valuesY     = valuesY,
            weights     = weights,
            binsX       = binsX,
            binsY       = binsY,
            rangeX      = rangeX,
            rangeY      = rangeY,
            titleX      = vX,
            titleY      = vY)

        canvas = setup.drawHistOnCanvas2D(
            hist        = hist,
            canvasName  = vX+"_vs_"+vY,
            catLabel    = JTcut.getJTlabel(cat),
            sampleName  = sample)

        # add lumi and category to plot
        setup.printLumi(canvas, lumi = self.options["lumiScale"], twoDim = True)

        # save canvas
        setup.saveCanvas(canvas, plot_name)
Exemple #2
0
    def __init__(self,
                 save_path,
                 input_samples,
                 event_category,
                 train_variables,
                 batch_size=5000,
                 train_epochs=500,
                 early_stopping=10,
                 optimizer=None,
                 loss_function="categorical_crossentropy",
                 test_percentage=0.2,
                 eval_metrics=None,
                 additional_cut=None,
                 use_pca=False):

        # save some information
        # list of samples to load into dataframe
        self.input_samples = input_samples

        # output directory for results
        self.save_path = save_path
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)

        # name of event category (usually nJet/nTag category)
        self.JTstring = event_category
        self.event_category = JTcut.getJTstring(event_category)
        self.categoryLabel = JTcut.getJTlabel(event_category)

        # list of input variables
        self.train_variables = train_variables

        # batch size for training
        self.batch_size = batch_size
        # number of training epochs
        self.train_epochs = train_epochs
        # number of early stopping epochs
        self.early_stopping = early_stopping
        # percentage of events saved for testing
        self.test_percentage = test_percentage

        # loss function for training
        self.loss_function = loss_function
        # additional metrics for evaluation of the training process
        self.eval_metrics = eval_metrics

        # additional cuts to be applied after variable norm
        self.additional_cut = additional_cut

        # option for principle component analysis
        self.PCA = use_pca

        # load data set
        self.data = self._load_datasets()
        self.event_classes = self.data.output_classes

        # save variable norm
        self.cp_path = self.save_path + "/checkpoints/"
        if not os.path.exists(self.cp_path):
            os.makedirs(self.cp_path)
        out_file = self.cp_path + "/variable_norm.csv"
        self.data.norm_csv.to_csv(out_file)
        print("saved variabe norms at " + str(out_file))

        # make plotdir
        self.plot_path = self.save_path + "/plots/"
        if not os.path.exists(self.plot_path):
            os.makedirs(self.plot_path)

        # dict with architectures for analysis
        self.architecture = arch.getArchitecture(self.JTstring)
        self.inputName = "inputLayer"
        self.outputName = "outputLayer"

        # optimizer for training
        if not (optimizer):
            self.optimizer = self.architecture["optimizer"]
        else:
            self.optimizer = optimizer
Exemple #3
0
	def __init__(self, in_path, save_path,
				event_classes,
				event_category,
				train_variables,
				batch_size = 5000,
				train_epochs = 10,
				early_stopping = 10,
				optimizer = None,
				loss_function = "categorical_crossentropy",
				test_percentage = 0.2,
				eval_metrics = None,
				additional_cut = None,
				phi_padding = 0):

		# save some information

		# path to input files
		self.in_path = in_path
		# output directory for results
		self.save_path = save_path
		if not os.path.exists(self.save_path):
			os.makedirs( self.save_path )
		# list of classes
		self.event_classes = event_classes
		# name of event category (usually nJet/nTag category)

		self.JTstring       = event_category
		self.event_category = JTcut.getJTstring(event_category)
		self.categoryLabel  = JTcut.getJTlabel(event_category)

		# list of input features
		self.train_variables = train_variables

		# batch size for training
		self.batch_size = batch_size
		# number of maximum training epochs
		self.train_epochs = train_epochs
		# number of early stopping epochs
		self.early_stopping = early_stopping
		# percentage of events saved for testing
		self.test_percentage = test_percentage

		# loss function for training
		self.loss_function = loss_function
		# additional metrics for evaluation of training process
		self.eval_metrics = eval_metrics

		# additional cut to be applied after variable norm
		self.additional_cut = additional_cut

		self.optimizer = optimizer

		self.phi_padding = phi_padding

		# load dataset
		self.data = self._load_datasets()
		self.data.get_train_data_cnn
		#print(self.data.get_train_data_cnn.values)
		out_path = self.save_path+"/checkpoints"
		if not os.path.exists(out_path):
			os.makedirs(out_path)
		out_file = out_path+"/variable_norm.csv"
		#self.data.norm_csv.to_csv(out_file)
		print("saved variable norms at "+str(out_file))

		# make plotdir
		self.plot_path = self.save_path+"/plots/"
		if not os.path.exists(self.plot_path):
			os.makedirs(self.plot_path)


		self.inputName = "inputLayer"
		self.outputName = "outputLayer"

		# optimizer for training
		if not(optimizer):
			self.optimizer = "adam"
		else:
			self.optimizer = optimizer
    def perform1Danalysis(self, metric="KS"):
        # loop over categories and get list of variables
        for cat in self.categories:
            print("starting with category {}".format(cat))

            cat_dir = self.output_dir + "/" + cat + "/"
            if not os.path.exists(cat_dir):
                os.makedirs(cat_dir)
            output_csv = self.output_dir + "/" + cat + "_1Ddistances_" + metric + ".csv"
            good_variables_file = self.output_dir + "/" + cat + "_good_vars_1D.txt"

            # load list of variables from variable set
            if cat in self.variable_set.variables:
                variables = self.variable_set.variables[cat] + self.add_vars
            else:
                variables = self.variable_set.all_variables + self.add_vars

            # filter events according to JT category

            for key in self.sampleNames:
                self.samples[key].cutData(cat, variables)

            # loop over all variables and perform plot each time
            variable_info = {}
            good_variables = []
            for variable in variables:
                print("analyzing variable: {}".format(variable))

                # generate plot output name
                plot_name = cat_dir + "/{}.pdf".format(variable)
                plot_name = plot_name.replace("[", "_").replace("]", "")

                distanceDictionary = self.calculateAllDistances(
                    variable=variable, cat=cat, metric=metric)

                variable_info[variable] = distanceDictionary
                max_pvalue = distanceDictionary[max(
                    distanceDictionary, key=lambda k: distanceDictionary[k])]
                if max_pvalue < 0.05:
                    good_variables.append(variable)

                distanceMatrix = self.generateMatrix(distanceDictionary)
                m = setup.setup2DHistogram(
                    matrix=distanceMatrix,
                    ncls=len(self.sampleNames),
                    xtitle=setup.generateLatexLabel(variable),
                    ytitle="",
                    binlabel=self.sampleNames)

                canvas = setup.draw2DHistOnCanvas(m,
                                                  "KSpvalues" + cat + variable,
                                                  JTcut.getJTlabel(cat))
                setup.saveCanvas(canvas, plot_name)

            # generate dataframe info
            df = pandas.DataFrame(variable_info)
            df.to_csv(output_csv)
            with open(good_variables_file, "w") as f:
                f.write("variables[\"{}\"] = [\n".format(cat))
                for v in good_variables:
                    f.write("    \"{}\",\n".format(v))
                f.write("    ]\n\n")
            print("saving distances in csv file {}".format(output_csv))
Exemple #5
0
    def __init__(self,
                 save_path,
                 input_samples,
                 category_name,
                 train_variables,
                 category_cutString=None,
                 category_label=None,
                 norm_variables=True,
                 train_epochs=500,
                 test_percentage=0.2,
                 eval_metrics=None,
                 shuffle_seed=None,
                 balanceSamples=False,
                 evenSel=None,
                 addSampleSuffix=""):

        # save some information
        # list of samples to load into dataframe
        self.input_samples = input_samples

        # suffix of additional (ttbb) sample
        self.addSampleSuffix = addSampleSuffix

        # output directory for results
        self.save_path = save_path
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)

        # name of event category (usually nJet/nTag category)
        self.category_name = category_name

        # string containing event selection requirements;
        # if not specified (default), deduced via JTcut
        self.category_cutString = (category_cutString
                                   if category_cutString is not None else
                                   JTcut.getJTstring(category_name))
        # category label (string);
        # if not specified (default), deduced via JTcut
        self.category_label = (category_label if category_label is not None
                               else JTcut.getJTlabel(category_name))

        # selection
        self.evenSel = ""
        self.oddSel = "1."
        if not evenSel == None:
            if evenSel == True:
                self.evenSel = "(Evt_Odd==0)"
                self.oddSel = "(Evt_Odd==1)"
            elif evenSel == False:
                self.evenSel = "(Evt_Odd==1)"
                self.oddSel = "(Evt_Odd==0)"

        # list of input variables
        self.train_variables = train_variables

        # percentage of events saved for testing
        self.test_percentage = test_percentage

        # number of train epochs
        self.train_epochs = train_epochs

        # additional metrics for evaluation of the training process
        self.eval_metrics = eval_metrics

        # normalize variables in DataFrame
        self.norm_variables = norm_variables

        # load data set
        self.data = self._load_datasets(shuffle_seed, balanceSamples)
        self.event_classes = self.data.output_classes

        # save variable norm
        self.cp_path = self.save_path + "/checkpoints/"
        if not os.path.exists(self.cp_path):
            os.makedirs(self.cp_path)

        if self.norm_variables:
            out_file = self.cp_path + "/variable_norm.csv"
            self.data.norm_csv.to_csv(out_file)
            print("saved variabe norms at " + str(out_file))

        # make plotdir
        self.plot_path = self.save_path + "/plots/"
        if not os.path.exists(self.plot_path):
            os.makedirs(self.plot_path)

        # layer names for in and output (needed for c++ implementation)
        self.inputName = "inputLayer"
        self.outputName = "outputLayer"
Exemple #6
0
    def histVariable(self, variable, plot_name, cat):
        # get number of bins and binrange from config filea
        bins = binning.getNbins(variable)
        bin_range = binning.getBinrange(variable)

        # check if bin_range was found
        if not bin_range:
            maxValue = -999
            minValue = 999
            for key in self.samples:
                maxValue = max(
                    maxValue,
                    max(self.samples[key].cut_data[cat][variable].values))
                minValue = min(
                    minValue,
                    min(self.samples[key].cut_data[cat][variable].values))
            config_string = "variables[\"" + variable + "\"]\t\t\t= Variable(bin_range = [{},{}])\n".format(
                minValue, maxValue)
            with open("new_variable_configs.txt", "a") as f:
                f.write(config_string)
            bin_range = [minValue, maxValue]

        bkgHists = []
        bkgLabels = []
        weightIntegral = 0

        # loop over bachgrounds and fill hists
        for sampleName in self.ordered_stack:
            sample = self.samples[sampleName]

            # get weights
            weights = sample.cut_data[cat]["weight"].values
            weightIntegral += sum(weights)

            # setup histogram
            hist = setup.setupHistogram(
                values=sample.cut_data[cat][variable].values,
                weights=weights,
                nbins=bins,
                bin_range=bin_range,
                color=sample.plotColor,
                xtitle=cat + "_" + sample.sampleName + "_" + variable,
                ytitle=setup.GetyTitle(),
                filled=True)

            bkgHists.append(hist)
            bkgLabels.append(sample.sampleName)

        sigHists = []
        sigLabels = []
        sigScales = []

        # if not background was added, the weight integral is equal to 0
        if weightIntegral == 0:
            self.options["scaleSignal"] = 1

        # loop over signals and fill hists
        for key in self.samples:
            sample = self.samples[key]
            if not sample.isSignal: continue

            # get weights
            weights = sample.cut_data[cat]["weight"].values

            # determine scale factor
            if self.options["scaleSignal"] == -1:
                scaleFactor = weightIntegral / (sum(weights) + 1e-9)
            else:
                scaleFactor = float(self.options["scaleSignal"])

            # setup histogram
            hist = setup.setupHistogram(
                values=sample.cut_data[cat][variable].values,
                weights=weights,
                nbins=bins,
                bin_range=bin_range,
                color=sample.plotColor,
                xtitle=cat + "_" + sample.sampleName + "_" + variable,
                ytitle=setup.GetyTitle(),
                filled=False)

            hist.Scale(scaleFactor)

            sigHists.append(hist)
            sigLabels.append(sample.sampleName)
            sigScales.append(scaleFactor)

        # init canvas
        canvas = setup.drawHistsOnCanvas(sigHists,
                                         bkgHists,
                                         self.options,
                                         canvasName=cat + "_" + variable)

        # setup legend
        legend = setup.getLegend()
        # add signal entries
        for iSig in range(len(sigHists)):
            legend.AddEntry(
                sigHists[iSig],
                sigLabels[iSig] + " x {:4.0f}".format(sigScales[iSig]), "L")
        # add background entries
        for iBkg in range(len(bkgHists)):
            legend.AddEntry(bkgHists[iBkg], bkgLabels[iBkg], "F")

        # draw loegend
        legend.Draw("same")

        # add lumi and category to plot
        setup.printLumi(canvas,
                        lumi=self.options["lumiScale"],
                        ratio=self.options["ratio"])
        setup.printCategoryLabel(canvas,
                                 JTcut.getJTlabel(cat),
                                 ratio=self.options["ratio"])

        # save canvas
        setup.saveCanvas(canvas, plot_name)
Exemple #7
0
    def histVariable(self, variable, plot_name, cat):
        histInfo = {}

        if variable in self.variableconfig.index:
            # get variable info from config file
            bins = int(self.variableconfig.loc[variable, 'numberofbins'])
            minValue = float(self.variableconfig.loc[variable, 'minvalue'])
            maxValue = float(self.variableconfig.loc[variable, 'maxvalue'])
            displayname = self.variableconfig.loc[variable, 'displayname']
            logoption = self.variableconfig.loc[variable, 'logoption']
        else:
            bins = 50
            maxValue = max([
                max(self.samples[sample].cut_data[cat][variable].values)
                for sample in self.samples
            ])
            minValue = min([
                min(self.samples[sample].cut_data[cat][variable].values)
                for sample in self.samples
            ])
            displayname = variable
            logoption = "-"

            config_string = "{},{},{},{},{},{}\n".format(
                variable, minValue, maxValue, bins, logoption, displayname)
            with open("new_variable_configs.csv", "a") as f:
                f.write(config_string)

        bin_range = [minValue, maxValue]
        if logoption == "x" or logoption == "X":
            logoption = True
        else:
            logoption = False

        histInfo["nbins"] = bins
        histInfo["range"] = bin_range

        bkgHists = []
        bkgLabels = []
        weightIntegral = 0

        # loop over backgrounds and fill hists
        for sampleName in self.ordered_stack:
            sample = self.samples[sampleName]

            # get weights
            weights = sample.cut_data[cat]["weight"].values
            # get values
            values = sample.cut_data[cat][variable].values

            #weights = [weights[i] for i in range(len(weights)) if not np.isnan(values[i])]
            #values =  [values[i]  for i in range(len(values))  if not np.isnan(values[i])]

            weightIntegral += sum(weights)
            # setup histogram
            hist = setup.setupHistogram(
                values=values,
                weights=weights,
                nbins=bins,
                bin_range=bin_range,
                color=sample.plotColor,
                xtitle=cat + "_" + sample.sampleName + "_" + variable,
                ytitle=setup.GetyTitle(self.options["privateWork"]),
                filled=sample.filled)
            bkgHists.append(hist)
            bkgLabels.append(sample.sampleName)

        sigHists = []
        sigLabels = []
        sigScales = []

        # if not background was added, the weight integral is equal to 0
        if weightIntegral == 0:
            self.options["scaleSignal"] = 0
        histInfo["bkgYield"] = weightIntegral

        # scale stack to one if lumiScale is set to zero
        if self.options["lumiScale"] == 0:
            for hist in bkgHists:
                hist.Scale(1. / weightIntegral)
            weightIntegral = 1.

        # loop over signals and fill hists
        for key in self.samples:
            sample = self.samples[key]
            if not sample.isSignal: continue

            # get weights
            weights = sample.cut_data[cat]["weight"].values
            # determine scale factor
            if self.options["scaleSignal"] == -1:
                scaleFactor = weightIntegral / (sum(weights) + 1e-9)
            elif self.options["scaleSignal"] == 0:
                scaleFactor = (1. / (sum(weights) + 1e-9))
            else:
                scaleFactor = float(self.options["scaleSignal"])

            # setup histogram
            hist = setup.setupHistogram(
                values=sample.cut_data[cat][variable].values,
                weights=weights,
                nbins=bins,
                bin_range=bin_range,
                color=sample.plotColor,
                xtitle=cat + "_" + sample.sampleName + "_" + variable,
                ytitle=setup.GetyTitle(),
                filled=sample.filled)

            hist.Scale(scaleFactor)

            sigHists.append(hist)
            sigLabels.append(sample.sampleName)
            sigScales.append(scaleFactor)

        # init canvas
        canvas = setup.drawHistsOnCanvas(sigHists,
                                         bkgHists,
                                         self.options,
                                         canvasName=variable,
                                         displayname=displayname,
                                         logoption=logoption)

        # setup legend
        legend = setup.getLegend()
        # add signal entriesa
        for iSig in range(len(sigHists)):
            labelstring = sigLabels[iSig]
            if not self.options["lumiScale"] == 0.:
                labelstring = sigLabels[iSig] + " x {:4.0f}".format(
                    sigScales[iSig])

            # add KS score to label if activated
            if self.options["KSscore"]:
                KSscore = setup.calculateKSscore(bkgHists[0], sigHists[iSig])
                labelstring = "#splitline{" + labelstring + "}{KSscore = %.3f}" % (
                    KSscore)
                histInfo["KSScore"] = KSscore

            legend.AddEntry(sigHists[iSig], labelstring, "L")

        # add background entries
        for iBkg in range(len(bkgHists)):
            legend.AddEntry(bkgHists[iBkg], bkgLabels[iBkg], "F")

        # draw loegend
        legend.Draw("same")

        # add lumi and category to plot
        setup.printLumi(canvas,
                        lumi=self.options["lumiScale"],
                        ratio=self.options["ratio"])
        setup.printCategoryLabel(canvas,
                                 JTcut.getJTlabel(cat),
                                 ratio=self.options["ratio"])
        if self.options["privateWork"]:
            setup.printPrivateWork(canvas, ratio=self.options["ratio"])

        # save canvas
        setup.saveCanvas(canvas, plot_name)

        return histInfo
    def histVariable2D(self, name, vX, vY, plot_name, cat):
        # get number of bins and binrange from config file
        binsX = binning.getNbins(vX)
        binsY = binning.getNbins(vY)
        rangeX = binning.getBinrange(vX)
        rangeY = binning.getBinrange(vY)

        # check if bin_range was found
        if not rangeX:
            maxValue = max([max(self.samples[sample].cut_data[cat][vX].values) for sample in self.samples])
            minValue = min([min(self.samples[sample].cut_data[cat][vX].values) for sample in self.samples])
            config_string = "variables[\""+vX+"\"]\t\t\t= Variable(bin_range = [{},{}])\n".format(minValue, maxValue)
            with open("new_variable_configs.txt", "a") as f:
                f.write(config_string)
            rangeX = [minValue, maxValue]

        if not rangeY:
            maxValue = max([max(self.samples[sample].cut_data[cat][vY].values) for sample in self.samples])
            minValue = min([min(self.samples[sample].cut_data[cat][vY].values) for sample in self.samples])
            config_string = "variables[\""+vY+"\"]\t\t\t= Variable(bin_range = [{},{}])\n".format(minValue, maxValue)
            with open("new_variable_configs.txt", "a") as f:
                f.write(config_string)
            rangeY = [minValue, maxValue]

        # init value lists
        weights = np.array([])
        valuesX = np.array([])
        valuesY = np.array([])
        for sample in self.samples:
            weights = np.append(weights, self.samples[sample].cut_data[cat]["weight"].values)
            valuesX = np.append(valuesX, self.samples[sample].cut_data[cat][vX].values)
            valuesY = np.append(valuesY, self.samples[sample].cut_data[cat][vY].values)

        hist = setup.setupHistogram2D(
            valuesX     = valuesX,
            valuesY     = valuesY,
            weights     = weights,
            binsX       = binsX,
            binsY       = binsY,
            rangeX      = rangeX,
            rangeY      = rangeY,
            titleX      = vX,
            titleY      = vY)

        canvas = setup.drawHistOnCanvas2D(
            hist        = hist,
            canvasName  = name,
            catLabel    = JTcut.getJTlabel(cat),
            sampleName  = name)

        # add lumi
        setup.printLumi(canvas, lumi = self.options["lumiScale"], twoDim = True)
        if self.options["privateWork"]: 
            setup.printPrivateWork(canvas, ratio = self.options["ratio"], twoDim = True)

        if self.options["getCorr"]:
            correlation = hist.GetCorrelationFactor()
            setup.printCorrelation(canvas, correlation)

        # save canvas
        setup.saveCanvas(canvas, plot_name)

        # plot distributions in 1D if activated
        if self.options["plot1D"]:
             # get averages
            bins = int((binsX + binsY)/2.)
            bin_range = [min(rangeX[0],rangeY[0]), max(rangeX[1],rangeY[1])]

            hX = setup.setupHistogram(
                    values      = valuesX,
                    weights     = weights,
                    nbins       = bins,
                    bin_range   = bin_range,
                    color       = ROOT.kBlack,
                    xtitle      = vX+"1D",
                    ytitle      = setup.GetyTitle(self.options["lumiScale"]),
                    filled      = False)

            if self.options["lumiScale"] == 0.:
                hXInt = hX.Integral()
                hX.Scale(1./hXInt)

            hY = setup.setupHistogram(
                    values      = valuesY,
                    weights     = weights,
                    nbins       = bins,
                    bin_range   = bin_range,
                    color       = ROOT.kRed,
                    xtitle      = vY+"1D",
                    ytitle      = setup.GetyTitle(self.options["lumiScale"]),
                    filled      = False)

            if self.options["lumiScale"] == 0.:
                hYInt = hY.Integral()
                hY.Scale(1./hYInt)

            # init canvas
            canvas = setup.drawHistsOnCanvas(
                hX, hY, self.options,
                canvasName = name)

            # setup legend
            legend = setup.getLegend()
            legend.AddEntry( hX, self.options["xName"], "L")

            labelY = self.options["yName"]
            # add KS score to label if activated
            if self.options["KSscore"]:
                KSscore = setup.calculateKSscore(hX, hY)
                labelY="#splitline{"+labelY+"}{KSscore = %.3f}"%(KSscore)
            legend.AddEntry( hY, labelY, "L")

            # draw loegend
            legend.Draw("same")

            # add lumi and category to plot
            setup.printLumi(canvas, lumi = self.options["lumiScale"], ratio = self.options["ratio"])
            setup.printCategoryLabel(canvas, JTcut.getJTlabel(cat), ratio = self.options["ratio"])
            if self.options["privateWork"]: 
                setup.printPrivateWork(canvas, ratio = self.options["ratio"])

            # save canvas
            setup.saveCanvas(canvas, plot_name.replace(".pdf","_1D.pdf"))
    def histVariable1D(self, sample, name, vX, vY, plot_name, cat):
        # get number of bins and binrange from config file
        binsX = binning.getNbins(vX)
        binsY = binning.getNbins(vY)
        rangeX = binning.getBinrange(vX)
        rangeY = binning.getBinrange(vY)

        # check if bin_range was found
        if not rangeX:
            maxValue = max(sample.cut_data[cat][vX].values)
            minValue = min(sample.cut_data[cat][vX].values)
            config_string = "variables[\""+vX+"\"]\t\t\t= Variable(bin_range = [{},{}])\n".format(minValue, maxValue)
            with open("new_variable_configs.txt", "a") as f:
                f.write(config_string)
            rangeX = [minValue, maxValue]

        if not rangeY:
            maxValue = max(sample.cut_data[cat][vY].values)
            minValue = min(sample.cut_data[cat][vY].values)
            config_string = "variables[\""+vY+"\"]\t\t\t= Variable(bin_range = [{},{}])\n".format(minValue, maxValue)
            with open("new_variable_configs.txt", "a") as f:
                f.write(config_string)
            rangeY = [minValue, maxValue]

        # get averages
        bins = int((binsX + binsY)/2.)
        bin_range = [min(rangeX[0],rangeY[0]), max(rangeX[1],rangeY[1])]

        weights = sample.cut_data[cat]["weight"].values
        valuesX = sample.cut_data[cat][vX].values
        valuesY = sample.cut_data[cat][vY].values

        hX = setup.setupHistogram(
                values      = valuesX,
                weights     = weights,
                nbins       = bins,
                bin_range   = bin_range,
                color       = ROOT.kBlack,
                xtitle      = cat+"_"+sample.sampleName+"_"+vX,
                ytitle      = setup.GetyTitle(self.options["lumiScale"]),
                filled      = False)

        hY = setup.setupHistogram(
                values      = valuesY,
                weights     = weights,
                nbins       = bins,
                bin_range   = bin_range,
                color       = ROOT.kRed,
                xtitle      = cat+"_"+sample.sampleName+"_"+vY,
                ytitle      = setup.GetyTitle(self.options["lumiScale"]),
                filled      = False)
        
        # init canvas
        canvas = setup.drawHistsOnCanvas(
            hX, hY, self.options,
            canvasName = "[{}] {}".format(sample.sampleName, name))

        # setup legend
        legend = setup.getLegend()
        legend.AddEntry( hX, self.options["xName"], "L")

        labelY = self.options["yName"]
        # add KS score to label if activated
        if self.options["KSscore"]:
            KSscore = setup.calculateKSscore(hX, hY)
            labelY="#splitline{"+labelY+"}{KSscore = %.3f}"%(KSscore)
        legend.AddEntry( hY, labelY, "L")

        # draw loegend
        legend.Draw("same")

        # add lumi and category to plot
        setup.printLumi(canvas, lumi = self.options["lumiScale"], ratio = self.options["ratio"])
        setup.printCategoryLabel(canvas, JTcut.getJTlabel(cat), ratio = self.options["ratio"])
        if self.options["privateWork"]:
            setup.printPrivateWork(canvas, ratio = self.options["ratio"])

        # save canvas
        setup.saveCanvas(canvas, plot_name)