def __init__(self, filename, scalefactortxtfilename, treename, lumi, channel, savelabel): self.tmpplot = TreePlot() self.batch_ = False self.filedict_ = filename self.scalefactortxtfilename_ = scalefactortxtfilename self.treename_ = treename self.histograms_ = {} self.multiplicitylabel_ = {} self.lumi_ = lumi self.channel_ = channel self.savelabel_ = savelabel self.constrains_ = [] self.fit_ = None self.qcdfraction_ = 0.0 self.qcdfractionerror_ = 0.0
def __init__(self,CHANNEL):#CHANNEL = 'mu', or 'el' self.thstack = THStack("MC","MC") self.samples = Samples(CHANNEL) self.drawhistograms = {} self.tmpplot = TreePlot() self.bin_ = 0 self.min_ = 0. self.max_ = 0. self.cutname_ = "" self.weightname_ = "" self.plotdir_ = "" self.setlogy_ = False self.channel = CHANNEL self.multiplicitylabel = {} self.c1 = TCanvas()
''' 读取序列化对象 ''' def grabTree(filename): fr = open(filename, "rb") return pickle.load(fr) if __name__ == '__main__': # 创建示例数据集 dataSet, labels = createDataSet() lebelsCopy = labels[:] # 学习构建决策树 tree = createTree(dataSet, labels) print(tree) # 画决策树 TreePlot.createPlot(tree) # 序列化存储树结构 storeTree(tree, "object.txt") # 文件中读取数结构 myTree = grabTree("object.txt") print(myTree) print(classifyDecisionTree(myTree, lebelsCopy, [1, 1]))
firstLabelIndex = labels.index(firstLabel) # 属性对应的index secondDict = tree[firstLabel] value = testData[firstLabelIndex] # 属性值 if type(secondDict[value]).__name__ == "dict": # 假如还是一棵树则递归 classLabel = classifyDecisionTree(secondDict[value], labels, testData) else: classLabel = secondDict[value] return classLabel if __name__ == '__main__': # 读取数据 lensesData, lensesLable = loadDataSet("lenses.txt") # 复制属性标签, # createTree()操作会影响传入的类别标签 lensesLableCopy = lensesLable[:] # 创建树 decisionTree = createTree(lensesData, lensesLableCopy) print(decisionTree) # 对树进行绘图 TreePlot.createPlot(decisionTree) # 进行预测 classLabel = classifyDecisionTree(decisionTree, lensesLable, ["young", "hyper", "yes", "normal"]) print(classLabel)
classCount[vote]=0 classCount[vote]+=1 sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) return sortedClassCount[0][0] def createTree(dataSet,labels): classList=[example[-1] for example in dataSet] # 类别:男或女 if classList.count(classList[0])==len(classList): return classList[0] if len(dataSet[0])==1: return majorityCnt(classList) bestFeat=chooseBestFeatureToSplit(dataSet) #选择最优特征 bestFeatLabel=labels[bestFeat] myTree={bestFeatLabel:{}} #分类结果以字典形式保存 del(labels[bestFeat]) featValues=[example[bestFeat] for example in dataSet] uniqueVals=set(featValues) for value in uniqueVals: subLabels=labels[:] myTree[bestFeatLabel][value]=createTree(splitDataSet\ (dataSet,bestFeat,value),subLabels) return myTree if __name__=='__main__': dataSet, labels=createDataSet1() # 创造示列数据 mytree=createTree(dataSet, labels) # 输出决策树模型结果 print mytree TreePlot.createPlot(mytree)
__author__ = 'wanghao' """ the glasses examples """ import ID3Tree import TreePlot fr = open('./lenses.txt') DataList = fr.readlines() DataSet = [] for data in DataList: DataSet.append(data.strip().split('\t')) print "The dateSet is ", DataSet Labels = ['age', 'prescript', 'astigmatic', 'tearRate'] LenseTree = ID3Tree.createTree(DataSet, Labels) print "the result ID3 Tree is ", LenseTree TreePlot.createPlot(LenseTree)
class ProducePlot: def __init__(self,CHANNEL):#CHANNEL = 'mu', or 'el' self.thstack = THStack("MC","MC") self.samples = Samples(CHANNEL) self.drawhistograms = {} self.tmpplot = TreePlot() self.bin_ = 0 self.min_ = 0. self.max_ = 0. self.cutname_ = "" self.weightname_ = "" self.plotdir_ = "" self.setlogy_ = False self.channel = CHANNEL self.multiplicitylabel = {} self.c1 = TCanvas() def SetBinMinMax(self,bin,min,max): self.bin_ = int(bin) self.min_ = float(min) self.max_ = float(max) def SetCutWeightName(self,cutname,weightname): self.cutname_ = cutname self.weightname_ = weightname def SetFilePath(self,filepath): self.samples.SetFilePath(filepath) def SetLumiTree(self,lumi,treename): self.samples.SetLumi(lumi) self.samples.SetTreeName(treename) def SetPlotDir(self,plotdir): self.plotdir_ = plotdir def SetLogy(self,setlogy): if(setlogy == "False"): self.setlogy_ = False if(setlogy == "True"): self.setlogy_ = True def SetChannel(self,Channel): self.channel = Channel def DrawTHStack(self,drawname,index,xtitle,ytitle,scalefactortxtfilename,n,ymin,latexcuttitle): self.samples.SetFileNames() print "We are ploting the variable: " + drawname print "We are using the cut: " + self.cutname_ print "We are using the weight: " + self.weightname_ ttbarregionsignal = "" for tmpfiletype in self.samples.GetFileNames().keys(): #print tmpfiletype tmpcutname = self.cutname_ if self.plotdir_ == "controlPlots_ttbar" and (str(tmpfiletype).find("H") != -1): tmpcutname = "&&".join(self.cutname_.split("&&")[:-1]) self.tmpplot.SetBinMinMax(self.bin_,self.min_,self.max_) self.tmpplot.SetCutWeightName(tmpcutname,self.weightname_) tmphistogram = TH1D(tmpfiletype,tmpfiletype,self.bin_,self.min_,self.max_) if (tmpfiletype != "data"): self.tmpplot.DrawTrees(self.samples.GetFileNames()[tmpfiletype],self.samples.GetTreeName(),drawname,index,xtitle,ytitle,"%s:%s"%(scalefactortxtfilename,tmpfiletype),self.samples.GetLumi(),tmpfiletype,self.drawhistograms,self.multiplicitylabel,ttbarregionsignal) else: self.tmpplot.DrawTrees(self.samples.GetFileNames()[tmpfiletype],self.samples.GetTreeName(),drawname,index,xtitle,ytitle,1.0,1.0,tmpfiletype,self.drawhistograms,self.multiplicitylabel,ttbarregionsignal) self.drawhistograms[tmpfiletype].SetStats(kFALSE) ################################Data############################ self.drawhistograms["data"].SetMarkerStyle(20) self.drawhistograms["data"].SetMarkerSize(1) self.drawhistograms["data"].Sumw2() ################################Data############################ ##########TTbar########################################### tt1D = TH1D("tt1D","tt1D",self.bin_,self.min_,self.max_) self.drawhistograms["TTbar"].SetFillColor(kRed+1) tt1D = self.drawhistograms["TTbar"] tt1D.SetFillColor(kRed+1) ########################################################## #############WJet######################################## wjet1D = TH1D("wjet1D","wjet1D",self.bin_,self.min_,self.max_) wjet1D.Sumw2() wjet1D.SetStats(kFALSE) if(self.samples.GetFileNames().has_key("Wbb") and self.samples.GetFileNames().has_key("Wcc") and self.samples.GetFileNames().has_key("Wlight")): wjet1D = self.drawhistograms["Wbb"] + self.drawhistograms["Wcc"] + self.drawhistograms["Wlight"] else: wjet1D = self.drawhistograms["WJets_Pythia"] wjet1D.SetFillColor(kGreen-3) #############WJet######################################## ###########QCD############################################ qcd1D = TH1D("qcd1D","qcd1D",self.bin_,self.min_,self.max_) qcd1D.Sumw2() qcd1D.SetStats(kFALSE) if (self.samples.GetFileNames().has_key("QCDBCtoE3080") and self.samples.GetFileNames().has_key("QCDBCtoE80170") and self.samples.GetFileNames().has_key("QCDEmEn3080") and self.samples.GetFileNames().has_key("QCDEmEn80170")): qcd1D = self.drawhistograms["QCDBCtoE3080"] + self.drawhistograms["QCDBCtoE80170"] + self.drawhistograms["QCDEmEn3080"] + self.drawhistograms["QCDEmEn80170"] qcd1D.SetFillColor(kYellow) ###########QCD############################################ ############################DiBoson####################################### diboson1D = TH1D("diboson1D","diboson1D",self.bin_,self.min_,self.max_) diboson1D.Sumw2() if (self.samples.GetFileNames().has_key("WW") and self.samples.GetFileNames().has_key("WZ") and self.samples.GetFileNames().has_key("ZZ")): diboson1D = self.drawhistograms["WW"]+ self.drawhistograms["WZ"] + self.drawhistograms["ZZ"] else: if(self.samples.GetFileNames().has_key("WW") and self.samples.GetFileNames().has_key("WZ")): diboson1D = self.drawhistograms["WW"]+ self.drawhistograms["WZ"] diboson1D.SetStats(kFALSE) diboson1D.SetFillColor(kBlue) ############################DiBoson####################################### #####################################SumSingleTop########################### sumsttop1D = TH1D("sumsttop1D","sumsttop1D",self.bin_,self.min_,self.max_) sumsttop1D.Sumw2() if (self.samples.GetFileNames().has_key("tch") and self.samples.GetFileNames().has_key("tWch") and self.samples.GetFileNames().has_key("sch") and self.samples.GetFileNames().has_key("tch_bar") and self.samples.GetFileNames().has_key("tWch_bar") and self.samples.GetFileNames().has_key("sch_bar")): sumsttop1D = self.drawhistograms["tch"] + self.drawhistograms["tWch"] + self.drawhistograms["sch"] + self.drawhistograms["tch_bar"] + self.drawhistograms["tWch_bar"] + self.drawhistograms["sch_bar"] sumsttop1D.SetStats(kFALSE) sumsttop1D.SetFillColor(kMagenta) #####################################SumSingleTop########################### #####################################ZJet################################### zjet1D = TH1D("zjet1D","zjet1D",self.bin_,self.min_,self.max_) zjet1D.Sumw2() if (self.samples.GetFileNames().has_key("ZJets")): zjet1D = self.drawhistograms["ZJets"] zjet1D.SetStats(kFALSE) zjet1D.SetFillColor(kAzure-3) #####################################ZJet################################### mcbackground = [self.drawhistograms["TTbar"], wjet1D, qcd1D, diboson1D, sumsttop1D, zjet1D] #####################################Data-MC/MC############################ allmc1D = TH1D("allmc1D","allmc1D",self.bin_,self.min_,self.max_) allmc1D.Sumw2() allmc1D = tt1D + wjet1D + diboson1D + sumsttop1D + zjet1D if(qcd1D.GetEntries() > 0): allmc1D = tt1D + wjet1D + diboson1D + sumsttop1D + zjet1D + qcd1D allmc1D.SetStats(kFALSE) dataminusmc1D = TH1D("dataminusmc1D","dataminusmc1D",self.bin_,self.min_,self.max_) dataminusmc1D.Sumw2() dataminusmc1D.Add(self.drawhistograms["data"],allmc1D,1.,-1.) dataminusmc1D.SetStats(kFALSE) dataovermc1D = TH1D("dataovermc1D","dataovermc1D",self.bin_,self.min_,self.max_) dataovermc1D.Sumw2() dataovermc1D.Divide(dataminusmc1D,allmc1D,1,1,"B") dataminusmc1D.SetStats(kFALSE) dataovermc1D.SetXTitle(xtitle) dataovermc1D.SetYTitle("(Data - MC) / MC") dataovermc1D.SetMarkerStyle(20) dataovermc1D.SetMarkerSize(1) dataovermc1D.SetTitle("") dataovermc1D.SetTitleSize(0.05,"X") dataovermc1D.SetTitleSize(0.05,"Y") dataovermc1D.GetYaxis().SetRangeUser(-1,1) #####################################Data-MC/MC############################ datamcksresult_ = self.drawhistograms["data"].KolmogorovTest(allmc1D) #####################################Error_Band########################### xvalue = array('d') yvalue = array('d') xlefterror = array('d') xrighterror = array('d') ylowerror = array('d') yhigherror = array('d') xlist = [] ylist = [] xleftlist = [] xrightlist = [] ylowlist = [] yhighlist = [] for ibin in range(1,allmc1D.GetNbinsX() + 1): xlist.append(allmc1D.GetBinCenter(ibin)) ylist.append(allmc1D.GetBinContent(ibin)) xleftlist.append(0.5 * allmc1D.GetBinWidth(ibin)) xrightlist.append(0.5 * allmc1D.GetBinWidth(ibin)) ttbarerror = self.drawhistograms["TTbar"].GetBinContent(ibin) * 0.15 wjeterror = wjet1D.GetBinContent(ibin) * 0.3 lumierror = allmc1D.GetBinContent(ibin) * 0.022 statisticerror = allmc1D.GetBinError(ibin) allerror = sqrt(pow(ttbarerror,2) + pow(wjeterror,2) + pow(lumierror,2) + pow(statisticerror,2)) allerror = statisticerror ylowlist.append(allerror) yhighlist.append(allerror) xvalue.fromlist(xlist) yvalue.fromlist(ylist) xlefterror.fromlist(xleftlist) xrighterror.fromlist(xrightlist) ylowerror.fromlist(ylowlist) yhigherror.fromlist(yhighlist) mc1Derror = TGraphAsymmErrors(allmc1D.GetNbinsX(),xvalue,yvalue,xlefterror,xrighterror,ylowerror,yhigherror) mc1Derror.SetName("MC Uncerntainty") mc1Derror.SetFillColor(920+3) mc1Derror.SetFillStyle(3008) #####################################Error_Band########################### self.thstack.SetHistogram(self.drawhistograms["TTbar"]) if(self.drawhistograms["TTbar"].GetEntries() > 0): self.thstack.Add(self.drawhistograms["TTbar"]) if(wjet1D.GetEntries() > 0): self.thstack.Add(wjet1D) if(sumsttop1D.GetEntries() > 0): self.thstack.Add(sumsttop1D) if(self.samples.GetFileNames().has_key("ZJets")): if(zjet1D.GetEntries() > 0): self.thstack.Add(zjet1D) if(diboson1D.GetEntries() > 0): self.thstack.Add(diboson1D) if(qcd1D.GetEntries() > 0): self.thstack.Add(qcd1D) self.thstack.SetMaximum(float(n) * self.thstack.GetMaximum()) self.thstack.SetMinimum(float(ymin)) #L = TLegend(0.66,0.65,0.93,0.93) L = TLegend(0.7270115,0.6510417,0.9971264,0.9300595) L.SetBorderSize(0) L.SetLineStyle(0) L.SetTextFont(42) L.SetFillStyle(0) L.SetMargin(0.12) L.SetTextSize(0.025) L.SetFillColor(10) L.SetBorderSize(0) if(self.drawhistograms["data"].GetEntries() > 0): L.AddEntry(self.drawhistograms["data"],"Data", "lp") if(self.drawhistograms["TTbar"].GetEntries() > 0): L.AddEntry(self.drawhistograms["TTbar"],"t#bar{t}", "f") if(wjet1D.GetEntries() > 0): L.AddEntry(wjet1D,"W#rightarrowl#nu", "f") if(self.samples.GetFileNames().has_key("ZJets")): if(zjet1D.GetEntries() > 0): L.AddEntry(zjet1D,"Z/#gamma^{\*}#rightarrowl^{+}l^{-}", "f") if(qcd1D.GetEntries() > 0): L.AddEntry(qcd1D,"QCD", "f") if(diboson1D.GetEntries() > 0): L.AddEntry(diboson1D,"Dibosons", "f") if(sumsttop1D.GetEntries() > 0): L.AddEntry(sumsttop1D,"Single-Top","f") if(self.multiplicitylabel["ggH600"] != 1.0): L.AddEntry(self.drawhistograms["ggH600"],"ggHWW 600 #times %d"%self.multiplicitylabel["ggH600"],"f") else: L.AddEntry(self.drawhistograms["ggH600"],"ggHWW 600","f") banner = TLatex(0.25,0.88,("#splitline{CMS Preliminary}{%.1f fb^{-1} at #sqrt{s}=8TeV %s+jets}"%(self.samples.GetLumi(),self.channel))) banner.SetNDC() banner.SetTextSize(0.035) tl = TLatex(0.25,0.85,("#splitline{KS=%.4f}{%s}"%(datamcksresult_,latexcuttitle))) tl.SetNDC() tl.SetTextSize(0.06) self.c1 = TCanvas(drawname + self.cutname_,drawname + self.cutname_,10,10,700,700) self.c1.cd() pad1 = TPad("pad1","pad1",0.00,0.25,1.00,0.97) pad2 = TPad("pad2","pad2",0.00,0.00,1.00,0.25) pad1.SetFillColor(0) pad2.SetFillColor(0) pad1.Draw() pad2.Draw() pad1.SetTicks(1,1) pad2.SetTicks(1,1) pad2.SetGridx() pad2.SetGridy() pad1.cd() if(self.setlogy_): pad1.SetLogy() self.thstack.Draw("hist") mc1Derror.Draw("2 same") self.drawhistograms["data"].Draw("Esame") self.drawhistograms["ggH600"].SetLineStyle(2) self.drawhistograms["ggH600"].Draw("histsame") L.Draw() banner.Draw() pad2.cd() dataovermc1D.Draw() tl.Draw() PWD = os.getcwd() + "/" if os.path.isdir(PWD + self.plotdir_): print "%s direcotry has aleady been created"%(self.plotdir_) else: os.system("mkdir %s"%(PWD + self.plotdir_)) tmplatexcuttitle = latexcuttitle tmplatexcuttitle = "_".join(tmplatexcuttitle.split(" ")) tmplatexcuttitle = "".join(tmplatexcuttitle.split("&&")) tmplatexcuttitle = "_".join(tmplatexcuttitle.split("__")) tmplatexcuttitle = string.replace(tmplatexcuttitle,">","large") tmplatexcuttitle = string.replace(tmplatexcuttitle,"=","euqal") tmplatexcuttitle = string.replace(tmplatexcuttitle,"<","small") tmplatexcuttitle = string.replace(tmplatexcuttitle,"#","") self.c1.Print( PWD + self.plotdir_ + "/" + self.channel + "_" + drawname + "_" + tmplatexcuttitle + ".png") self.c1.Print( PWD + self.plotdir_ + "/" + self.channel + "_" + drawname + "_" + tmplatexcuttitle + ".eps") self.c1.Print( PWD + self.plotdir_ + "/" + self.channel + "_" + drawname + "_" + tmplatexcuttitle + ".pdf") self.c1.Update()
class QCDFractionFitter: def __init__(self, filename, scalefactortxtfilename, treename, lumi, channel, savelabel): self.tmpplot = TreePlot() self.batch_ = False self.filedict_ = filename self.scalefactortxtfilename_ = scalefactortxtfilename self.treename_ = treename self.histograms_ = {} self.multiplicitylabel_ = {} self.lumi_ = lumi self.channel_ = channel self.savelabel_ = savelabel self.constrains_ = [] self.fit_ = None self.qcdfraction_ = 0.0 self.qcdfractionerror_ = 0.0 def SetBatch(self): self.batch_ = True def SetVariableCutWeightName(self, variable, cutname, weightname): self.variable_ = variable self.cutname_ = cutname self.weightname_ = weightname def SetXtitleYtitle(self, xtitle, ytitle): self.xtitle_ = xtitle self.ytitle_ = ytitle def SetBinMinMax(self, bin, min, max): self.bin_ = bin self.min_ = min self.max_ = max def SetConstrainPartLabel(self, constrains): self.constrains_ = constrains def SetAnalysisCut(self, metcut): self.metcut_ = metcut def GetQCDFractionResult(self): return [self.qcdfraction_, self.qcdfractionerror_] def FitAndPlot(self, fitpattern): #####Prepare the Histogram to the TFractionFitter#################### print "We are Fitting the variable: " + self.variable_ print "We are using the cut: " + self.cutname_ print "We are using the weight: " + self.weightname_ ttbarregionsignal = "" for tmpfiletype in self.filedict_.keys(): tmpcutname = self.cutname_ self.tmpplot.SetBinMinMax(self.bin_, self.min_, self.max_) self.tmpplot.SetCutWeightName(tmpcutname, self.weightname_) if (tmpfiletype != "data" and tmpfiletype != "QCD" and tmpfiletype != "WJets"): self.tmpplot.DrawTrees( self.filedict_[tmpfiletype], self.treename_, self.variable_, 0, self.xtitle_, self.ytitle_, "%s:%s" % (self.scalefactortxtfilename_, tmpfiletype), self.lumi_, tmpfiletype, self.histograms_, self.multiplicitylabel_, ttbarregionsignal) elif (tmpfiletype == "WJets"): self.tmpplot.SetCutWeightName( tmpcutname, self.weightname_ + "*W_nParton_weight") self.tmpplot.DrawTrees( self.filedict_[tmpfiletype], self.treename_, self.variable_, 0, self.xtitle_, self.ytitle_, "%s:%s" % (self.scalefactortxtfilename_, tmpfiletype), self.lumi_, tmpfiletype, self.histograms_, self.multiplicitylabel_, ttbarregionsignal) elif (tmpfiletype == "data"): self.tmpplot.DrawTrees(self.filedict_[tmpfiletype], self.treename_, self.variable_, 0, self.xtitle_, self.ytitle_, 1.0, 1.0, tmpfiletype, self.histograms_, self.multiplicitylabel_, ttbarregionsignal) elif (tmpfiletype == "QCD"): self.tmpplot.SetCutWeightName( tmpcutname + "&&((EWK_W_2jets_l_tagjet2_deltaR<EWK_W_2jets_l_tagjet1_deltaR?EWK_W_2jets_l_tagjet2_deltaR:EWK_W_2jets_l_tagjet1_deltaR)>1.0)", self.weightname_) self.tmpplot.DrawTrees(self.filedict_[tmpfiletype], self.treename_, self.variable_, 0, self.xtitle_, self.ytitle_, 1.0, 1.0, tmpfiletype, self.histograms_, self.multiplicitylabel_, ttbarregionsignal) if fitpattern == "QCDfraction": wjet1D = TH1D("wjet1D", "wjet1D", self.bin_, self.min_, self.max_) firstbin = 1 lastbin = self.histograms_["data"].GetNbinsX() dataentries = self.histograms_["data"].Integral(firstbin, lastbin) print "data Entries: " + str(dataentries) self.histograms_["QCD"].Scale( dataentries / self.histograms_["QCD"].Integral(firstbin, lastbin + 1)) wjet1D = self.histograms_["WJets"] + self.histograms_[ "W1Jets"] + self.histograms_["W2Jets"] + self.histograms_[ "W3Jets"] + self.histograms_["W4Jets"] wjet1D.Scale(dataentries / wjet1D.Integral(firstbin, lastbin + 1)) constrainfaction = 0. tconstrain1D = TH1D("tconstrain", "tconstrain", self.bin_, self.min_, self.max_) if len(self.constrains_) > 0: for icons in self.constrains_: tconstrain1D.Add(self.histograms_[icons]) #tconstrain1D.Scale(dataentries/tconstrain1D.Integral(firstbin,lastbin)) constrainfaction = tconstrain1D.Integral(firstbin, lastbin) / dataentries print "TTbar constrainfaction: " + str(constrainfaction) tconstrain1D.Scale( dataentries / tconstrain1D.Integral(firstbin, lastbin + 1)) mc = TObjArray() mc.Add(self.histograms_["QCD"]) mc.Add(wjet1D) if len(self.constrains_) > 0: mc.Add(tconstrain1D) self.fit_ = TFractionFitter(self.histograms_["data"], mc) self.fit_.Constrain(1, 0.0, 1.0) self.fit_.Constrain(2, 0.0, 1.0) if len(self.constrains_) > 0: #self.fit_.Constrain(3,constrainfaction - 0.0001*constrainfaction,constrainfaction + 0.0001*constrainfaction) self.fit_.Constrain(3, 0.0, 1.0) self.fit_.SetRangeX(firstbin, lastbin) status = self.fit_.Fit() print "fit status: " + str(status) self.SetUpPlotEnviroment() if (self.batch_): gROOT.SetBatch() canvas = TCanvas("c1", "c1", 10, 10, 700, 700) lgnd = TLegend(0.6, 0.6, 0.90, 0.9) lgnd.SetFillColor(kWhite) result = self.fit_.GetPlot() result.SetFillColor(kYellow) self.histograms_["data"].SetMinimum(0) self.histograms_["data"].Draw("Ep") result.Draw("histsame") frac_value = array('d', [0.]) frac_error = array('d', [0.]) self.fit_.GetResult(0, frac_value, frac_error) integralanalysisbin = self.histograms_["data"].FindBin( self.metcut_) dataentriesanalysisbin = self.histograms_["data"].Integral( integralanalysisbin, lastbin) qcdintegralanalysisbin = self.histograms_["QCD"].Integral( integralanalysisbin, lastbin) wjetsintegralanalysisbin = wjet1D.Integral(integralanalysisbin, lastbin) qcdfrac_valuemetcut = frac_value[ 0] * qcdintegralanalysisbin / dataentriesanalysisbin print "QCD: frac_value= " + str(qcdfrac_valuemetcut) + "+/-" + str( frac_error[0]) + " NEvts=" + str( dataentries * qcdfrac_valuemetcut) + "+/-" + str( dataentries * frac_error[0]) self.qcdfraction_ = qcdfrac_valuemetcut #self.qcdfraction_ = frac_value[0] self.qcdfractionerror_ = frac_error[0] self.histograms_["QCD"].Scale(frac_value[0]) self.histograms_["QCD"].SetLineWidth(2) self.histograms_["QCD"].SetLineColor(kBlue) self.histograms_["QCD"].Draw("histesame") self.fit_.GetResult(1, frac_value, frac_error) wjetsfrac_valuemetcut = frac_value[ 0] * wjetsintegralanalysisbin / dataentriesanalysisbin print "WJets: frac_value= " + str( wjetsfrac_valuemetcut) + "+/-" + str( frac_error[0]) + " NEvts=" + str( dataentries * wjetsfrac_valuemetcut) + "+/-" + str( dataentries * frac_error[0]) #self.histograms_["WJets"].Scale(frac_value[0]) #self.histograms_["WJets"].SetLineWidth(2) #self.histograms_["WJets"].SetLineColor(kRed) #self.histograms_["WJets"].Draw("histesame") wjet1D.Scale(frac_value[0]) wjet1D.SetLineWidth(2) wjet1D.SetLineColor(kRed) wjet1D.Draw("histesame") if len(self.constrains_) > 0: self.fit_.GetResult(2, frac_value, frac_error) tconstrain1D.Scale(frac_value[0]) #tconstrain1D.Scale(frac_value[0]*dataentries/tconstrain1D.Integral(firstbin,lastbin+1)) tconstrain1D.SetLineWidth(2) tconstrain1D.SetLineColor(kGreen) tconstrain1D.Draw("histsame") lgnd.AddEntry(result, "TemplatePrediction", "f") lgnd.AddEntry(self.histograms_["data"], "data", "p") lgnd.AddEntry(self.histograms_["QCD"], "QCD", "l") #lgnd.AddEntry(self.histograms_["WJets"], "WJetsMC","l") lgnd.AddEntry(wjet1D, "WJetsMC", "l") if len(self.constrains_) > 0: #lgnd.AddEntry(tconstrain1D, "Fixed Processes", "l") lgnd.AddEntry(tconstrain1D, "Other Processes", "l") chi2ndof = -999.0 if len(self.constrains_) > 0: print "FitChiSquare/ndof= " + str(self.fit_.GetChisquare() / (self.bin_ - 3)) chi2ndof = self.fit_.GetChisquare() / (self.bin_ - 3) else: print "FitChiSquare/ndof= " + str(self.fit_.GetChisquare() / (self.bin_ - 2)) chi2ndof = self.fit_.GetChisquare() / (self.bin_ - 2) print "FitProbatility= " + str(self.fit_.GetProb()) latex = TLatex(0.60, 0.50, "#chi2/ndof=%s" % (round(chi2ndof, 2))) latex.SetNDC() latex.SetTextSize(0.035) self.histograms_["data"].SetMarkerStyle(20) self.histograms_["data"].SetMarkerSize(1) self.histograms_["data"].Draw("Esame") lgnd.Draw() latex.Draw() if len(self.constrains_) > 0: self.channel_ = self.channel_ + "constrainotherprocess" self.variable_ = self.variable_.replace(".", "_") self.savelabel_ = self.savelabel_.replace(".", "_") canvas.SaveAs("QCDFractionFit/" + self.channel_ + "_" + self.variable_ + self.savelabel_ + "_qcdfraction.png") canvas.SaveAs("QCDFractionFit/" + self.channel_ + "_" + self.variable_ + self.savelabel_ + "_qcdfraction.pdf") canvas.SaveAs("QCDFractionFit/" + self.channel_ + "_" + self.variable_ + self.savelabel_ + "_qcdfraction.eps") def SetUpPlotEnviroment(self): if os.path.isfile('tdrstyle.C'): gROOT.ProcessLine('.L tdrstyle.C') ROOT.setTDRStyle() print "Found tdrstyle.C file, using this style." HasCMSStyle = "True" if os.path.isfile('CMSTopStyle.cc'): gROOT.ProcessLine('.L CMSTopStyle.cc+') style = CMSTopStyle() style.setupICHEPv1() print "Found CMSTopStyle.cc file, use TOP style if requested in xml file."
print "---------------------------------------\n" # Get the leaf Node LeafNodeNum = ID3Tree.getNumLeafs(myTree) print "This ID3 Tree leaf node num is ", LeafNodeNum print "---------------------------------------\n" # Get the depth of the ID3 MaxDepth = ID3Tree.getTreeDepth(myTree) print "This ID3 tree max depth is ", MaxDepth print "---------------------------------------\n" # Plot the tree TreePlot.createPlot(myTree) print "---------------------------------------\n"
from TestTrees import calcShannonEnt from TestTrees import chooseBestFeatureToSplit from TestTrees import createTree import TestTrees import TreePlot def createDataSet(): dataSet = [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']] labels = ['no surfacing', 'flippers'] return dataSet, labels fr = open('matpilb\lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['ages', 'prescript', 'astigmatic', 'tearRate'] print(lenses) lensesTree = TestTrees.createTree(lenses, lensesLabels) TreePlot.createPlot(lensesTree)
'>=' + str(splite))] = CreateTree( son_data, label_lisan, label_lianxu) elif j[feature] < splite and i == 1: son_data.append(j) if len(son_data) == 0: return leaf(data) else: Tree[labels_word[feature]][str( '<' + str(splite))] = CreateTree( son_data, label_lisan, label_lianxu) else: for j in data: if j[feature] == label_value[feature][i]: son_data.append(j) if len(son_data) == 0: return leaf(data) else: Tree[labels_word[feature]][ label_value[feature][i]] = CreateTree( son_data, label_lisan, labels_lianxu) return Tree if __name__ == '__main__': Tree = CreateTree(dataSet, labels_lisan, labels_lianxu) TreePlot.createPlot(Tree)