def legend4Plot(plot, left = False): if left: theLeg = TLegend(0.2, 0.62, 0.55, 0.92, "", "NDC") else: theLeg = TLegend(0.60, 0.62, 0.92, 0.92, "", "NDC") theLeg.SetName('theLegend') theLeg.SetBorderSize(0) theLeg.SetLineColor(0) theLeg.SetFillColor(0) theLeg.SetFillStyle(0) theLeg.SetLineWidth(0) theLeg.SetLineStyle(0) theLeg.SetTextFont(42) theLeg.SetTextSize(.045) entryCnt = 0 for obj in range(0, int(plot.numItems())): objName = plot.nameOf(obj) if (not plot.getInvisible(objName)): theObj = plot.getObject(obj) objTitle = theObj.GetTitle() if len(objTitle) < 1: objTitle = objName dopts = plot.getDrawOptions(objName).Data() # print 'obj:',theObj,'title:',objTitle,'opts:',dopts,'type:',type(dopts) if theObj.IsA().InheritsFrom('TNamed'): theLeg.AddEntry(theObj, objTitle, dopts) entryCnt += 1 theLeg.SetY1NDC(0.9 - 0.05*entryCnt - 0.005) theLeg.SetY1(theLeg.GetY1NDC()) return theLeg
def addPlots(plots): from ROOT import RooCurve, RooHist, TLine, TLegend, RooPlot, RooAbsData, \ SetOwnership, TGraphErrors #print plots outplot = plots[0].emptyClone(plots[0].GetName()) newMax = 0. for item in range(0, int(plots[0].numItems())): itemName = plots[0].nameOf(item) firstItem = plots[0].getObject(item) if (type(firstItem) == RooCurve): fullCurve = clipCurve(firstItem) for plot in range(1, len(plots)): nextCurve = clipCurve(plots[plot].getCurve(itemName)) fullCurve = RooCurve(fullCurve.GetName(), fullCurve.GetTitle(), fullCurve, nextCurve) fullCurve.addPoint(fullCurve.GetX()[fullCurve.GetN() - 1], 0) fullCurve.addPoint(fullCurve.GetX()[0], 0) fullCurve.SetLineColor(firstItem.GetLineColor()) fullCurve.SetLineStyle(firstItem.GetLineStyle()) fullCurve.SetFillColor(firstItem.GetFillColor()) fullCurve.SetFillStyle(firstItem.GetFillStyle()) outplot.addPlotable(fullCurve, plots[0].getDrawOptions(itemName).Data()) SetOwnership(fullCurve, False) if (type(firstItem) == RooHist): fullHist = firstItem for plot in range(1, len(plots)): nextHist = plots[plot].getHist(itemName) fullHist = addHists(fullHist, nextHist) fullHist.SetName(itemName) fullHist.SetTitle(firstItem.GetTitle()) outplot.addPlotable(fullHist, plots[0].getDrawOptions(itemName).Data()) SetOwnership(fullHist, False) if (type(firstItem) == TGraphErrors): fullErrors = firstItem for plot in range(1, len(plots)): nextErrors = plots[plot].findObject(itemName) fullErrors = addErrors(fullErrors, nextErrors) fullErrors.SetName(itemName) fullErrors.SetTitle(firstItem.GetTitle()) outplot.addObject(fullErrors, plots[0].getDrawOptions(itemName).Data()) SetOwnership(fullErrors, False) if (type(firstItem) == TLine): newLine = TLine(firstItem) newLine.SetY2(outplot.GetMaximum()) SetOwnership(newLine, False) outplot.addObject(newLine) pass if (type(firstItem) == TLegend): newLeg = TLegend(firstItem) newLeg.SetY1NDC(0.92 - \ 0.04*newLeg.GetListOfPrimitives().GetSize() - \ 0.02) newLeg.SetY1(newLeg.GetY1NDC()) SetOwnership(newLeg, False) outplot.addObject(newLeg) for plot in plots: newMax += plot.GetMaximum() outplot.SetMaximum(outplot.GetMaximum() * 1.3) outplot.GetYaxis().SetTitle("Events / GeV") ## outplot.SetMaximum(newMax) ## outplot.Print("v") return outplot
def MakeOneDHist(pathToDir, distribution): numFittingSamples = 0 HeaderLabel = TPaveLabel(header_x_left, header_y_bottom, header_x_right, header_y_top, HeaderText, "NDC") HeaderLabel.SetTextAlign(32) HeaderLabel.SetBorderSize(0) HeaderLabel.SetFillColor(0) HeaderLabel.SetFillStyle(0) LumiLabel = TPaveLabel(topLeft_x_left, topLeft_y_bottom, topLeft_x_right, topLeft_y_top, LumiText, "NDC") LumiLabel.SetBorderSize(0) LumiLabel.SetFillColor(0) LumiLabel.SetFillStyle(0) NormLabel = TPaveLabel() NormLabel.SetDrawOption("NDC") NormLabel.SetX1NDC(topLeft_x_left) NormLabel.SetX2NDC(topLeft_x_right) NormLabel.SetBorderSize(0) NormLabel.SetFillColor(0) NormLabel.SetFillStyle(0) NormText = "" if arguments.normalizeToUnitArea: NormText = "Scaled to unit area" elif arguments.normalizeToData: NormText = "MC scaled to data" NormLabel.SetLabel(NormText) YieldsLabel = TPaveText(0.39, 0.7, 0.59, 0.9, "NDC") YieldsLabel.SetBorderSize(0) YieldsLabel.SetFillColor(0) YieldsLabel.SetFillStyle(0) YieldsLabel.SetTextAlign(12) RatiosLabel = TPaveText() RatiosLabel.SetDrawOption("NDC") RatiosLabel.SetBorderSize(0) RatiosLabel.SetFillColor(0) RatiosLabel.SetFillStyle(0) RatiosLabel.SetTextAlign(32) Legend = TLegend() Legend.SetBorderSize(0) Legend.SetFillColor(0) Legend.SetFillStyle(0) fittingIntegral = 0 scaleFactor = 1 HistogramsToFit = [] TargetDataset = distribution['target_dataset'] FittingLegendEntries = [] DataLegendEntries = [] FittingHistogramDatasets = [] Stack_list = [] Stack_list.append(THStack("stack_before", distribution['name'])) Stack_list.append(THStack("stack_after", distribution['name'])) fileName = condor_dir + "/" + distribution['target_dataset'] + ".root" if not os.path.exists(fileName): return inputFile = TFile(fileName) if inputFile.IsZombie() or not inputFile.GetNkeys(): return Target = inputFile.Get("OSUAnalysis/" + distribution['channel'] + "/" + distribution['name']).Clone() Target.SetDirectory(0) inputFile.Close() Target.SetMarkerStyle(20) Target.SetMarkerSize(0.8) Target.SetFillStyle(0) Target.SetLineColor(colors[TargetDataset]) Target.SetLineStyle(1) Target.SetLineWidth(2) targetIntegral = Target.Integral() if (arguments.normalizeToUnitArea and Target.Integral() > 0): Target.Scale(1. / Target.Integral()) if arguments.rebinFactor: RebinFactor = int(arguments.rebinFactor) #don't rebin histograms which will have less than 5 bins or any gen-matching histograms if Target.GetNbinsX() >= RebinFactor * 5 and Target.GetName().find( "GenMatch") is -1: Target.Rebin(RebinFactor) ### formatting target histogram and adding to legend legendIndex = 0 Legend.AddEntry(Target, labels[TargetDataset], "LEP") legendIndex = legendIndex + 1 if not outputFile.Get("OSUAnalysis"): outputFile.mkdir("OSUAnalysis") if not outputFile.Get("OSUAnalysis/" + distribution['channel']): outputFile.Get("OSUAnalysis").mkdir(distribution['channel']) for sample in distribution[ 'datasets']: # loop over different samples requested to be fit dataset_file = "%s/%s.root" % (condor_dir, sample) inputFile = TFile(dataset_file) HistogramObj = inputFile.Get(pathToDir + "/" + distribution['channel'] + "/" + distribution['name']) if not HistogramObj: print "WARNING: Could not find histogram " + pathToDir + "/" + distribution[ 'channel'] + "/" + distribution[ 'name'] + " in file " + dataset_file + ". Will skip it and continue." continue Histogram = HistogramObj.Clone() Histogram.SetDirectory(0) inputFile.Close() if arguments.rebinFactor: RebinFactor = int(arguments.rebinFactor) #don't rebin histograms which will have less than 5 bins or any gen-matching histograms if Histogram.GetNbinsX() >= RebinFactor * 5 and Histogram.GetName( ).find("GenMatch") is -1: Histogram.Rebin(RebinFactor) xAxisLabel = Histogram.GetXaxis().GetTitle() unitBeginIndex = xAxisLabel.find("[") unitEndIndex = xAxisLabel.find("]") if unitBeginIndex is not -1 and unitEndIndex is not -1: #x axis has a unit yAxisLabel = "Entries / " + str(Histogram.GetXaxis().GetBinWidth( 1)) + " " + xAxisLabel[unitBeginIndex + 1:unitEndIndex] else: yAxisLabel = "Entries per bin (" + str( Histogram.GetXaxis().GetBinWidth(1)) + " width)" if not arguments.makeFancy: histoTitle = Histogram.GetTitle() else: histoTitle = "" legLabel = labels[sample] if (arguments.printYields): yieldHist = Histogram.Integral() legLabel = legLabel + " (%.1f)" % yieldHist FittingLegendEntries.append(legLabel) if (types[sample] == "bgMC"): numFittingSamples += 1 fittingIntegral += Histogram.Integral() Histogram.SetLineStyle(1) if (arguments.noStack): Histogram.SetFillStyle(0) Histogram.SetLineColor(colors[sample]) Histogram.SetLineWidth(2) else: Histogram.SetFillStyle(1001) Histogram.SetFillColor(colors[sample]) Histogram.SetLineColor(1) Histogram.SetLineWidth(1) elif (types[sample] == "signalMC"): numFittingSamples += 1 Histogram.SetFillStyle(0) Histogram.SetLineColor(colors[sample]) Histogram.SetLineStyle(1) Histogram.SetLineWidth(2) if (arguments.normalizeToUnitArea and Histogram.Integral() > 0): Histogram.Scale(1. / Histogram.Integral()) HistogramsToFit.append(Histogram) FittingHistogramDatasets.append(sample) #scaling histograms as per user's specifications if targetIntegral > 0 and fittingIntegral > 0: scaleFactor = targetIntegral / fittingIntegral for fittingHist in HistogramsToFit: if arguments.normalizeToData: fittingHist.Scale(scaleFactor) if arguments.normalizeToUnitArea and not arguments.noStack and fittingIntegral > 0: fittingHist.Scale(1. / fittingIntegral) elif arguments.normalizeToUnitArea and arguments.noStack and fittingHist.Integral( ) > 0: fittingHist.Scale(1. / fittingHist.Integral()) def fitf(x, par): xBin = HistogramsToFit[0].FindBin(x[0]) value = 0.0 for i in range(0, len(HistogramsToFit)): value += par[i] * HistogramsToFit[i].GetBinContent(xBin) + par[ i + len(HistogramsToFit)] * HistogramsToFit[i].GetBinError(xBin) return value lowerLimit = Target.GetBinLowEdge(1) upperLimit = Target.GetBinLowEdge(Target.GetNbinsX()) + Target.GetBinWidth( Target.GetNbinsX()) if 'lowerLimit' in distribution: lowerLimit = distribution['lowerLimit'] if 'upperLimit' in distribution: upperLimit = distribution['upperLimit'] func = TF1("fit", fitf, lowerLimit, upperLimit, 2 * len(HistogramsToFit)) for i in range(0, len(HistogramsToFit)): if 'fixed_datasets' in distribution and distribution['datasets'][ i] in distribution['fixed_datasets']: func.FixParameter(i, 1.0) else: func.SetParameter(i, 1.0) # func.SetParLimits (i, 0.0, 1.0e2) # comment this out so we don't have to pre-normalize the QCD input sample func.SetParName(i, labels[FittingHistogramDatasets[i]]) shiftedScaleFactors = [] if arguments.parametricErrors: # loop over all input histograms and shift them +- 1 sigma for i in range(0, len(HistogramsToFit)): sfs = [] # -1 => -1 sigma, +1 => +1 sigma for j in [-1, 1]: # loop over the parameters holding the errors for each dataset, fixing all to 0 for k in range(len(HistogramsToFit), 2 * len(HistogramsToFit)): func.FixParameter(k, 0) # fix the error of the dataset of interest to +-1 func.FixParameter(i + len(HistogramsToFit), j) # perform new fit for k in range(0, distribution['iterations'] - 1): if j == -1: print "Scale down " + labels[FittingHistogramDatasets[ i]] + " iteration " + str(k + 1) + "..." if j == 1: print "Scale up " + labels[FittingHistogramDatasets[ i]] + " iteration " + str(k + 1) + "..." Target.Fit("fit", "QEMR0") Target.Fit("fit", "VEMR0") # save the new scale factors for each dataset for k in range(0, len(HistogramsToFit)): sfs.append(func.GetParameter(k)) shiftedScaleFactors.append(sfs) # reset the parameters with the errors of each dataset to 0 for i in range(len(HistogramsToFit), 2 * len(HistogramsToFit)): func.FixParameter(i, 0) # do the fit to get the central values for i in range(0, distribution['iterations'] - 1): print "Iteration " + str(i + 1) + "..." Target.Fit("fit", "QEMR0") Target.Fit("fit", "VEMR0") if arguments.parametricErrors: # make a list of the largest errors on each contribution by shifting any other contribution parErrors = [] # loop over all the datasets for i in range(0, len(HistogramsToFit)): centralValue = func.GetParameter(i) maxError = 0 # find the maximum deviation from the central value and save that for shiftedScaleFactor in shiftedScaleFactors[i]: currentError = abs(shiftedScaleFactor - centralValue) if currentError > maxError: maxError = currentError parErrors.append(maxError) finalMax = 0 if not arguments.noStack: for fittingHist in HistogramsToFit: finalMax += fittingHist.GetMaximum() else: for fittingHist in HistogramsToFit: if (fittingHist.GetMaximum() > finalMax): finalMax = fittingHist.GetMaximum() if (Target.GetMaximum() > finalMax): finalMax = Target.GetMaximum() Target.SetMaximum(1.1 * finalMax) Target.SetMinimum(0.0001) Canvas = TCanvas(distribution['name'] + "_FitFunction") Canvas.cd(1) Target.Draw() func.Draw("same") outputFile.cd("OSUAnalysis/" + distribution['channel']) Canvas.Write() if arguments.savePDFs: if histogram == input_histograms[0]: Canvas.Print(pdfFileName + "(", "pdf") else: Canvas.Print(pdfFileName, "pdf") Target.SetStats(0) ### formatting bgMC histograms and adding to legend legendIndex = numFittingSamples - 1 for Histogram in reversed(HistogramsToFit): if (arguments.noStack): Legend.AddEntry(Histogram, FittingLegendEntries[legendIndex], "L") else: Legend.AddEntry(Histogram, FittingLegendEntries[legendIndex], "F") legendIndex = legendIndex - 1 ### Drawing histograms to canvas makeRatioPlots = arguments.makeRatioPlots makeDiffPlots = arguments.makeDiffPlots yAxisMin = 0.0001 if arguments.setYMin: yAxisMin = float(arguments.setYMin) ### Draw everything to the canvases !!!! for i in range(0, 2): # 0 => before, 1 => after integrals = [] ratios = [] errors = [] if i == 1: # loop over each dataset, saving it's yield and the errors on it for j in range(0, len(HistogramsToFit)): integrals.append(HistogramsToFit[j].Integral()) HistogramsToFit[j].Scale(func.GetParameter(j)) ratios.append(func.GetParameter(j)) errors.append(func.GetParError(j)) for fittingHist in HistogramsToFit: if not arguments.noStack: Stack_list[i].Add(fittingHist) #creating the histogram to represent the statistical errors on the stack if not arguments.noStack: ErrorHisto = HistogramsToFit[0].Clone("errors") ErrorHisto.SetFillStyle(3001) ErrorHisto.SetFillColor(13) ErrorHisto.SetLineWidth(0) if i == 1: Legend.AddEntry(ErrorHisto, "Stat. Errors", "F") for Histogram in HistogramsToFit: if Histogram is not HistogramsToFit[0]: ErrorHisto.Add(Histogram) if i == 0: Canvas = TCanvas(distribution['name'] + "_Before") if i == 1: Canvas = TCanvas(distribution['name'] + "_After") if makeRatioPlots or makeDiffPlots: Canvas.SetFillStyle(0) Canvas.Divide(1, 2) Canvas.cd(1) gPad.SetPad(0, 0.25, 1, 1) gPad.SetMargin(0.15, 0.05, 0.01, 0.07) gPad.SetFillStyle(0) gPad.Update() gPad.Draw() if arguments.setLogY: gPad.SetLogy() Canvas.cd(2) gPad.SetPad(0, 0, 1, 0.25) # format: gPad.SetMargin(l,r,b,t) gPad.SetMargin(0.15, 0.05, 0.4, 0.01) gPad.SetFillStyle(0) gPad.SetGridy(1) gPad.Update() gPad.Draw() Canvas.cd(1) ### finding the maximum value of anything going on the canvas, so we know how to set the y-axis finalMax = 0 if numFittingSamples is not 0 and not arguments.noStack: finalMax = ErrorHisto.GetMaximum() + ErrorHisto.GetBinError( ErrorHisto.GetMaximumBin()) else: for bgMCHist in HistogramsToFit: if (bgMCHist.GetMaximum() > finalMax): finalMax = bgMCHist.GetMaximum() if (Target.GetMaximum() > finalMax): finalMax = Target.GetMaximum() + Target.GetBinError( Target.GetMaximumBin()) finalMax = 1.15 * finalMax if arguments.setYMax: finalMax = float(arguments.setYMax) if not arguments.noStack: # draw stacked background samples Stack_list[i].SetTitle(histoTitle) Stack_list[i].Draw("HIST") Stack_list[i].GetXaxis().SetTitle(xAxisLabel) Stack_list[i].GetYaxis().SetTitle(yAxisLabel) Stack_list[i].SetMaximum(finalMax) Stack_list[i].SetMinimum(yAxisMin) if makeRatioPlots or makeDiffPlots: Stack_list[i].GetHistogram().GetXaxis().SetLabelSize(0) #draw shaded error bands ErrorHisto.Draw("A E2 SAME") else: #draw the unstacked backgrounds HistogramsToFit[0].SetTitle(histoTitle) HistogramsToFit[0].Draw("HIST") HistogramsToFit[0].GetXaxis().SetTitle(xAxisLabel) HistogramsToFit[0].GetYaxis().SetTitle(yAxisLabel) HistogramsToFit[0].SetMaximum(finalMax) HistogramsToFit[0].SetMinimum(yAxisMin) for bgMCHist in HistogramsToFit: bgMCHist.Draw("A HIST SAME") Target.Draw("A E X0 SAME") #legend coordinates, empirically determined :-) x_left = 0.6761745 x_right = 0.9328859 x_width = x_right - x_left y_max = 0.9 entry_height = 0.05 if (numFittingSamples is not 0): #then draw the data & bgMC legend numExtraEntries = 2 # count the target and (lack of) title Legend.SetX1NDC(x_left) numExtraEntries = numExtraEntries + 1 # count the stat. errors entry Legend.SetY1NDC(y_max - entry_height * (numExtraEntries + numFittingSamples)) Legend.SetX2NDC(x_right) Legend.SetY2NDC(y_max) Legend.Draw() RatiosLabel.SetX1NDC(x_left - 0.1) RatiosLabel.SetX2NDC(x_right) RatiosLabel.SetY2NDC(Legend.GetY1NDC() - 0.1) RatiosLabel.SetY1NDC(RatiosLabel.GetY2NDC() - entry_height * (numFittingSamples)) # Deciding which text labels to draw and drawing them drawLumiLabel = False drawNormLabel = False offsetNormLabel = False drawHeaderLabel = False if not arguments.normalizeToUnitArea: #don't draw the lumi label if there's no data and it's scaled to unit area drawLumiLabel = True # move the normalization label down before drawing if we drew the lumi. label offsetNormLabel = True if arguments.normalizeToUnitArea or arguments.normalizeToData: drawNormLabel = True if arguments.makeFancy: drawHeaderLabel = True drawLumiLabel = False # now that flags are set, draw the appropriate labels if drawLumiLabel: LumiLabel.Draw() if drawNormLabel: if offsetNormLabel: NormLabel.SetY1NDC(topLeft_y_bottom - topLeft_y_offset) NormLabel.SetY2NDC(topLeft_y_top - topLeft_y_offset) else: NormLabel.SetY1NDC(topLeft_y_bottom) NormLabel.SetY2NDC(topLeft_y_top) NormLabel.Draw() if drawHeaderLabel: HeaderLabel.Draw() YieldsLabel.Clear() mcYield = Stack_list[i].GetStack().Last().Integral() dataYield = Target.Integral() if i == 0: YieldsLabel.AddText("Before Fit to Data") if i == 1: YieldsLabel.AddText("After Fit to Data") YieldsLabel.AddText("data yield: " + '%.1f' % dataYield) YieldsLabel.AddText("bkgd yield: " + '%.1f' % mcYield) YieldsLabel.AddText("data/bkgd: " + '%.2f' % (dataYield / mcYield)) if i == 1: for j in range(0, len(FittingLegendEntries)): if abs(ratios[j] - 1) < 0.001 and abs( errors[j] ) < 0.001: #then it probably was held fixed continue if arguments.showFittedYields: yield_ = ratios[j] * integrals[j] yielderror_ = errors[j] * yield_ text = FittingLegendEntries[ j] + " yield: " + '%.0f' % yield_ + ' #pm %.0f' % yielderror_ else: text = FittingLegendEntries[ j] + " ratio: " + '%.2f' % ratios[ j] + ' #pm %.2f' % errors[j] text = text + " (fit)" if arguments.parametricErrors: yield_ = ratios[j] * integrals[j] yieldParError_ = parErrors[j] * yield_ if arguments.showFittedYields: text += ' #pm %.2f' % yieldParError_ else: text += ' #pm %.2f' % parErrors[j] text = text + " (sys)" RatiosLabel.AddText(text) YieldsLabel.Draw() RatiosLabel.Draw() # drawing the ratio or difference plot if requested if (makeRatioPlots or makeDiffPlots): Canvas.cd(2) BgSum = Stack_list[i].GetStack().Last() if makeRatioPlots: if arguments.ratioRelErrMax: Comparison = ratioHistogram(Target, BgSum, arguments.ratioRelErrMax) else: Comparison = ratioHistogram(Target, BgSum) elif makeDiffPlots: Comparison = Target.Clone("diff") Comparison.Add(BgSum, -1) Comparison.SetTitle("") Comparison.GetYaxis().SetTitle("Data-Bkgd") Comparison.GetXaxis().SetTitle(xAxisLabel) Comparison.GetYaxis().CenterTitle() Comparison.GetYaxis().SetTitleSize(0.1) Comparison.GetYaxis().SetTitleOffset(0.5) Comparison.GetXaxis().SetTitleSize(0.15) Comparison.GetYaxis().SetLabelSize(0.1) Comparison.GetXaxis().SetLabelSize(0.15) if makeRatioPlots: RatioYRange = 1.15 if arguments.ratioYRange: RatioYRange = float(arguments.ratioYRange) Comparison.GetYaxis().SetRangeUser(-1 * RatioYRange, RatioYRange) elif makeDiffPlots: YMax = Comparison.GetMaximum() YMin = Comparison.GetMinimum() if YMax <= 0 and YMin <= 0: Comparison.GetYaxis().SetRangeUser(-1.2 * YMin, 0) elif YMax >= 0 and YMin >= 0: Comparison.GetYaxis().SetRangeUser(0, 1.2 * YMax) else: #axis crosses y=0 if abs(YMax) > abs(YMin): Comparison.GetYaxis().SetRangeUser( -1.2 * YMax, 1.2 * YMax) else: Comparison.GetYaxis().SetRangeUser( -1.2 * YMin, 1.2 * YMin) Comparison.GetYaxis().SetNdivisions(205) Comparison.Draw("E0") if i == 0: Canvas.Write(distribution['name'] + "_Before") if arguments.savePDFs: pathToDirString = plainTextString(pathToDir) Canvas.SaveAs(condor_dir + "/fitting_histogram_pdfs/" + pathToDirString + "/" + distribution['name'] + "_Before.pdf") if i == 1: Canvas.Write(distribution['name'] + "_After") if arguments.savePDFs: pathToDirString = plainTextString(pathToDir) Canvas.SaveAs(condor_dir + "/fitting_histogram_pdfs/" + pathToDirString + "/" + distribution['name'] + "_After.pdf")
def train_and_apply(): np.random.seed(1) ROOT.gROOT.SetBatch() #Extract data from root file tree = uproot.open("out_all.root")["outA/Tevts"] branch_mc = [ "MC_B_P", "MC_B_eta", "MC_B_phi", "MC_B_pt", "MC_D0_P", "MC_D0_eta", "MC_D0_phi", "MC_D0_pt", "MC_Dst_P", "MC_Dst_eta", "MC_Dst_phi", "MC_Dst_pt", "MC_Est_mu", "MC_M2_miss", "MC_mu_P", "MC_mu_eta", "MC_mu_phi", "MC_mu_pt", "MC_pis_P", "MC_pis_eta", "MC_pis_phi", "MC_pis_pt", "MC_q2" ] branch_rec = [ "B_P", "B_eta", "B_phi", "B_pt", "D0_P", "D0_eta", "D0_phi", "D0_pt", "Dst_P", "Dst_eta", "Dst_phi", "Dst_pt", "Est_mu", "M2_miss", "mu_P", "mu_eta", "mu_phi", "mu_pt", "pis_P", "pis_eta", "pis_phi", "pis_pt", "q2" ] nvariable = len(branch_mc) x_train = tree.array(branch_mc[0], entrystop=options.maxevents) for i in range(1, nvariable): x_train = np.vstack( (x_train, tree.array(branch_mc[i], entrystop=options.maxevents))) x_test = tree.array(branch_rec[0], entrystop=options.maxevents) for i in range(1, nvariable): x_test = np.vstack( (x_test, tree.array(branch_rec[i], entrystop=options.maxevents))) x_train = x_train.T x_test = x_test.T x_test = array2D_float(x_test) #Different type of reconstruction variables #BN normalization gamma = 0 beta = 0.2 ar = np.array(x_train) a = K.constant(ar[:, 0]) mean = K.mean(a) var = K.var(a) x_train = K.eval(K.batch_normalization(a, mean, var, gamma, beta)) for i in range(1, nvariable): a = K.constant(ar[:, i]) mean = K.mean(a) var = K.var(a) a = K.eval(K.batch_normalization(a, mean, var, gamma, beta)) x_train = np.vstack((x_train, a)) x_train = x_train.T ar = np.array(x_test) a = K.constant(ar[:, 0]) mean = K.mean(a) var = K.var(a) x_test = K.eval(K.batch_normalization(a, mean, var, gamma, beta)) for i in range(1, nvariable): a = K.constant(ar[:, i]) mean = K.mean(a) var = K.var(a) a = K.eval(K.batch_normalization(a, mean, var, gamma, beta)) x_test = np.vstack((x_test, a)) x_test = x_test.T #Add noise, remain to be improved noise = np.random.normal(loc=0.0, scale=0.01, size=x_train.shape) x_train_noisy = x_train + noise noise = np.random.normal(loc=0.0, scale=0.01, size=x_test.shape) x_test_noisy = x_test + noise x_train = np.clip(x_train, -1., 1.) x_test = np.clip(x_test, -1., 1.) x_train_noisy = np.clip(x_train_noisy, -1., 1.) x_test_noisy = np.clip(x_test_noisy, -1., 1.) # Network parameters input_shape = (x_train.shape[1], ) batch_size = 128 latent_dim = 2 # Build the Autoencoder Model # First build the Encoder Model inputs = Input(shape=input_shape, name='encoder_input') x = inputs # Shape info needed to build Decoder Model shape = K.int_shape(x) # Generate the latent vector latent = Dense(latent_dim, name='latent_vector')(x) # Instantiate Encoder Model encoder = Model(inputs, latent, name='encoder') encoder.summary() # Build the Decoder Model latent_inputs = Input(shape=(latent_dim, ), name='decoder_input') x = Dense(shape[1])(latent_inputs) x = Reshape((shape[1], ))(x) outputs = Activation('tanh', name='decoder_output')(x) # Instantiate Decoder Model decoder = Model(latent_inputs, outputs, name='decoder') decoder.summary() # Autoencoder = Encoder + Decoder # Instantiate Autoencoder Model autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') autoencoder.summary() autoencoder.compile(loss='mse', optimizer='adam') # Train the autoencoder autoencoder.fit(x_train_noisy, x_train, validation_data=(x_test_noisy, x_test), epochs=options.epochs, batch_size=batch_size) # Predict the Autoencoder output from corrupted test imformation x_decoded = autoencoder.predict(x_test_noisy) # Draw Comparision Plots c = TCanvas("c", "c", 700, 700) fPads1 = TPad("pad1", "Run2", 0.0, 0.29, 1.00, 1.00) fPads2 = TPad("pad2", "", 0.00, 0.00, 1.00, 0.29) fPads1.SetBottomMargin(0.007) fPads1.SetLeftMargin(0.10) fPads1.SetRightMargin(0.03) fPads2.SetLeftMargin(0.10) fPads2.SetRightMargin(0.03) fPads2.SetBottomMargin(0.25) fPads1.Draw() fPads2.Draw() fPads1.cd() nbin = 50 min = -1. max = 1. variable = "P^{B}" lbin = (max - min) / nbin lbin = str(float((max - min) / nbin)) xtitle = branch_rec[options.branch - 1] ytitle = "Events/" + lbin + "GeV" h_rec = TH1D("h_rec", "" + ";%s;%s" % (xtitle, ytitle), nbin, min, max) h_rec.Sumw2() h_pre = TH1D("h_pre", "" + ";%s;%s" % (xtitle, ytitle), nbin, min, max) h_pre.Sumw2() for i in range(x_test_noisy.shape[0]): h_rec.Fill(x_test_noisy[i][options.branch - 1]) h_pre.Fill(x_decoded[i][options.branch - 1]) h_rec = UnderOverFlow1D(h_rec) h_pre = UnderOverFlow1D(h_pre) maxY = TMath.Max(h_rec.GetMaximum(), h_pre.GetMaximum()) h_rec.SetLineColor(2) h_rec.SetFillStyle(0) h_rec.SetLineWidth(2) h_rec.SetLineStyle(1) h_pre.SetLineColor(3) h_pre.SetFillStyle(0) h_pre.SetLineWidth(2) h_pre.SetLineStyle(1) h_rec.SetStats(0) h_pre.SetStats(0) h_rec.GetYaxis().SetRangeUser(0, maxY * 1.1) h_rec.Draw("HIST") h_pre.Draw("same HIST") h_rec.GetYaxis().SetTitleSize(0.06) h_rec.GetYaxis().SetTitleOffset(0.78) theLeg = TLegend(0.5, 0.45, 0.95, 0.82, "", "NDC") theLeg.SetName("theLegend") theLeg.SetBorderSize(0) theLeg.SetLineColor(0) theLeg.SetFillColor(0) theLeg.SetFillStyle(0) theLeg.SetLineWidth(0) theLeg.SetLineStyle(0) theLeg.SetTextFont(42) theLeg.SetTextSize(.05) theLeg.AddEntry(h_rec, "Reconstruction", "L") theLeg.AddEntry(h_pre, "Prediction", "L") theLeg.SetY1NDC(0.9 - 0.05 * 6 - 0.005) theLeg.SetY1(theLeg.GetY1NDC()) fPads1.cd() theLeg.Draw() title = TLatex( 0.91, 0.93, "AE prediction compare with reconstruction, epochs=" + str(options.epochs)) title.SetNDC() title.SetTextSize(0.05) title.SetTextFont(42) title.SetTextAlign(31) title.SetLineWidth(2) title.Draw() fPads2.cd() h_Ratio = h_pre.Clone("h_Ratio") h_Ratio.Divide(h_rec) h_Ratio.SetLineColor(1) h_Ratio.SetLineWidth(2) h_Ratio.SetMarkerStyle(8) h_Ratio.SetMarkerSize(0.7) h_Ratio.GetYaxis().SetRangeUser(0, 2) h_Ratio.GetYaxis().SetNdivisions(504, 0) h_Ratio.GetYaxis().SetTitle("Pre/Rec") h_Ratio.GetYaxis().SetTitleOffset(0.35) h_Ratio.GetYaxis().SetTitleSize(0.13) h_Ratio.GetYaxis().SetTitleSize(0.13) h_Ratio.GetYaxis().SetLabelSize(0.11) h_Ratio.GetXaxis().SetLabelSize(0.1) h_Ratio.GetXaxis().SetTitleOffset(0.8) h_Ratio.GetXaxis().SetTitleSize(0.14) h_Ratio.SetStats(0) axis1 = TGaxis(min, 1, max, 1, 0, 0, 0, "L") axis1.SetLineColor(1) axis1.SetLineWidth(1) for i in range(1, h_Ratio.GetNbinsX() + 1, 1): D = h_rec.GetBinContent(i) eD = h_rec.GetBinError(i) if D == 0: eD = 0.92 B = h_pre.GetBinContent(i) eB = h_pre.GetBinError(i) if B < 0.1 and eB >= B: eB = 0.92 Err = 0. if B != 0.: Err = TMath.Sqrt((eD * eD) / (B * B) + (D * D * eB * eB) / (B * B * B * B)) h_Ratio.SetBinContent(i, D / B) h_Ratio.SetBinError(i, Err) if B == 0.: Err = TMath.Sqrt((eD * eD) / (eB * eB) + (D * D * eB * eB) / (eB * eB * eB * eB)) h_Ratio.SetBinContent(i, D / 0.92) h_Ratio.SetBinError(i, Err) if D == 0 and B == 0: h_Ratio.SetBinContent(i, -1) h_Ratio.SetBinError(i, 0) h_Ratio.Draw("e0") axis1.Draw() c.SaveAs(branch_rec[options.branch - 1] + "_comparision.png")