Example #1
0
    def __init__(self, filename, scalefactortxtfilename, treename, lumi,
                 channel, savelabel):

        self.tmpplot = TreePlot()

        self.batch_ = False

        self.filedict_ = filename

        self.scalefactortxtfilename_ = scalefactortxtfilename

        self.treename_ = treename

        self.histograms_ = {}

        self.multiplicitylabel_ = {}

        self.lumi_ = lumi

        self.channel_ = channel

        self.savelabel_ = savelabel

        self.constrains_ = []

        self.fit_ = None

        self.qcdfraction_ = 0.0

        self.qcdfractionerror_ = 0.0
     def __init__(self,CHANNEL):#CHANNEL = 'mu', or 'el'
          
          self.thstack = THStack("MC","MC")

          self.samples = Samples(CHANNEL)
          self.drawhistograms = {}
          
          self.tmpplot = TreePlot()

          self.bin_ = 0
          self.min_ = 0.
          self.max_ = 0.

          self.cutname_ = ""
          self.weightname_ = ""

          self.plotdir_ = ""
          self.setlogy_ = False

          self.channel = CHANNEL

          self.multiplicitylabel = {} 

          self.c1 = TCanvas()
'''
读取序列化对象
'''


def grabTree(filename):
    fr = open(filename, "rb")
    return pickle.load(fr)


if __name__ == '__main__':

    # 创建示例数据集
    dataSet, labels = createDataSet()
    lebelsCopy = labels[:]

    # 学习构建决策树
    tree = createTree(dataSet, labels)
    print(tree)

    # 画决策树
    TreePlot.createPlot(tree)

    # 序列化存储树结构
    storeTree(tree, "object.txt")
    # 文件中读取数结构
    myTree = grabTree("object.txt")
    print(myTree)
    print(classifyDecisionTree(myTree, lebelsCopy, [1, 1]))
Example #4
0
    firstLabelIndex = labels.index(firstLabel)  # 属性对应的index
    secondDict = tree[firstLabel]
    value = testData[firstLabelIndex]   #  属性值

    if type(secondDict[value]).__name__ == "dict":  # 假如还是一棵树则递归
        classLabel = classifyDecisionTree(secondDict[value], labels, testData)
    else:
        classLabel = secondDict[value]

    return classLabel


if __name__ == '__main__':
    # 读取数据
    lensesData, lensesLable = loadDataSet("lenses.txt")

    # 复制属性标签,
    # createTree()操作会影响传入的类别标签
    lensesLableCopy = lensesLable[:]

    # 创建树
    decisionTree = createTree(lensesData, lensesLableCopy)
    print(decisionTree)

    # 对树进行绘图
    TreePlot.createPlot(decisionTree)

    # 进行预测
    classLabel = classifyDecisionTree(decisionTree, lensesLable,
                                      ["young", "hyper", "yes", "normal"])
    print(classLabel)
Example #5
0
            classCount[vote]=0
        classCount[vote]+=1
    sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
    return sortedClassCount[0][0]

def createTree(dataSet,labels):
    classList=[example[-1] for example in dataSet]  # 类别:男或女
    if classList.count(classList[0])==len(classList):
        return classList[0]
    if len(dataSet[0])==1:
        return majorityCnt(classList)
    bestFeat=chooseBestFeatureToSplit(dataSet) #选择最优特征
    bestFeatLabel=labels[bestFeat]
    myTree={bestFeatLabel:{}} #分类结果以字典形式保存
    del(labels[bestFeat])
    featValues=[example[bestFeat] for example in dataSet]
    uniqueVals=set(featValues)
    for value in uniqueVals:
        subLabels=labels[:]
        myTree[bestFeatLabel][value]=createTree(splitDataSet\
                            (dataSet,bestFeat,value),subLabels)
    return myTree



if __name__=='__main__':
    dataSet, labels=createDataSet1()  # 创造示列数据
    mytree=createTree(dataSet, labels)  # 输出决策树模型结果
    print mytree
    TreePlot.createPlot(mytree)
Example #6
0
__author__ = 'wanghao'


"""
    the glasses examples
"""

import ID3Tree
import TreePlot

fr = open('./lenses.txt')
DataList = fr.readlines()

DataSet = []
for data in DataList:
    DataSet.append(data.strip().split('\t'))

print "The dateSet is ", DataSet
Labels = ['age', 'prescript', 'astigmatic', 'tearRate']

LenseTree = ID3Tree.createTree(DataSet, Labels)
print "the result ID3 Tree is ", LenseTree

TreePlot.createPlot(LenseTree)
class ProducePlot:
      
     def __init__(self,CHANNEL):#CHANNEL = 'mu', or 'el'
          
          self.thstack = THStack("MC","MC")

          self.samples = Samples(CHANNEL)
          self.drawhistograms = {}
          
          self.tmpplot = TreePlot()

          self.bin_ = 0
          self.min_ = 0.
          self.max_ = 0.

          self.cutname_ = ""
          self.weightname_ = ""

          self.plotdir_ = ""
          self.setlogy_ = False

          self.channel = CHANNEL

          self.multiplicitylabel = {} 

          self.c1 = TCanvas()

     def SetBinMinMax(self,bin,min,max):
          
          self.bin_ = int(bin)
          self.min_ = float(min)
          self.max_ = float(max)
      
     def SetCutWeightName(self,cutname,weightname):
          
          self.cutname_  = cutname
          self.weightname_ = weightname

     def SetFilePath(self,filepath):
          
          self.samples.SetFilePath(filepath)
      
     def SetLumiTree(self,lumi,treename):
          
          self.samples.SetLumi(lumi)
          self.samples.SetTreeName(treename)

     def SetPlotDir(self,plotdir):
          
          self.plotdir_ = plotdir
     
     def SetLogy(self,setlogy):
         
         if(setlogy == "False"):
           self.setlogy_ = False
         
         if(setlogy == "True"):
           self.setlogy_ = True

     def SetChannel(self,Channel):

         self.channel = Channel

     def DrawTHStack(self,drawname,index,xtitle,ytitle,scalefactortxtfilename,n,ymin,latexcuttitle):
         
         self.samples.SetFileNames()
         
         print "We are ploting the variable: " + drawname

         print "We are using the cut: " + self.cutname_

         print "We are using the weight: " + self.weightname_
         
         ttbarregionsignal = ""
            
         for tmpfiletype in self.samples.GetFileNames().keys():
             #print tmpfiletype
             tmpcutname = self.cutname_
             if self.plotdir_ == "controlPlots_ttbar" and (str(tmpfiletype).find("H") != -1): tmpcutname = "&&".join(self.cutname_.split("&&")[:-1])
             self.tmpplot.SetBinMinMax(self.bin_,self.min_,self.max_)
             self.tmpplot.SetCutWeightName(tmpcutname,self.weightname_)
             tmphistogram = TH1D(tmpfiletype,tmpfiletype,self.bin_,self.min_,self.max_)
             if (tmpfiletype != "data"):
                 self.tmpplot.DrawTrees(self.samples.GetFileNames()[tmpfiletype],self.samples.GetTreeName(),drawname,index,xtitle,ytitle,"%s:%s"%(scalefactortxtfilename,tmpfiletype),self.samples.GetLumi(),tmpfiletype,self.drawhistograms,self.multiplicitylabel,ttbarregionsignal)
             else:
                 self.tmpplot.DrawTrees(self.samples.GetFileNames()[tmpfiletype],self.samples.GetTreeName(),drawname,index,xtitle,ytitle,1.0,1.0,tmpfiletype,self.drawhistograms,self.multiplicitylabel,ttbarregionsignal)
             self.drawhistograms[tmpfiletype].SetStats(kFALSE)
         
         ################################Data############################
         self.drawhistograms["data"].SetMarkerStyle(20)
         self.drawhistograms["data"].SetMarkerSize(1)
         self.drawhistograms["data"].Sumw2()
         ################################Data############################

         ##########TTbar###########################################
         tt1D = TH1D("tt1D","tt1D",self.bin_,self.min_,self.max_)
         self.drawhistograms["TTbar"].SetFillColor(kRed+1)
         tt1D = self.drawhistograms["TTbar"]
         tt1D.SetFillColor(kRed+1)
         ##########################################################
         
         #############WJet########################################
         wjet1D = TH1D("wjet1D","wjet1D",self.bin_,self.min_,self.max_)
         wjet1D.Sumw2()
         wjet1D.SetStats(kFALSE)

         if(self.samples.GetFileNames().has_key("Wbb") and self.samples.GetFileNames().has_key("Wcc") and self.samples.GetFileNames().has_key("Wlight")):
            wjet1D = self.drawhistograms["Wbb"] + self.drawhistograms["Wcc"] + self.drawhistograms["Wlight"]
         else:
            wjet1D = self.drawhistograms["WJets_Pythia"]
         wjet1D.SetFillColor(kGreen-3)
         #############WJet########################################
         
         ###########QCD############################################
         qcd1D = TH1D("qcd1D","qcd1D",self.bin_,self.min_,self.max_)
         qcd1D.Sumw2()
         qcd1D.SetStats(kFALSE)
         if (self.samples.GetFileNames().has_key("QCDBCtoE3080") and self.samples.GetFileNames().has_key("QCDBCtoE80170") and self.samples.GetFileNames().has_key("QCDEmEn3080") and self.samples.GetFileNames().has_key("QCDEmEn80170")):
            qcd1D = self.drawhistograms["QCDBCtoE3080"] + self.drawhistograms["QCDBCtoE80170"] + self.drawhistograms["QCDEmEn3080"]  + self.drawhistograms["QCDEmEn80170"]
         qcd1D.SetFillColor(kYellow)
         ###########QCD############################################
         
         ############################DiBoson#######################################
         diboson1D = TH1D("diboson1D","diboson1D",self.bin_,self.min_,self.max_)
         diboson1D.Sumw2()
         if (self.samples.GetFileNames().has_key("WW") and self.samples.GetFileNames().has_key("WZ") and self.samples.GetFileNames().has_key("ZZ")):
            diboson1D = self.drawhistograms["WW"]+ self.drawhistograms["WZ"] + self.drawhistograms["ZZ"] 
         else:
            if(self.samples.GetFileNames().has_key("WW") and self.samples.GetFileNames().has_key("WZ")):
               diboson1D = self.drawhistograms["WW"]+ self.drawhistograms["WZ"]
         diboson1D.SetStats(kFALSE)
         diboson1D.SetFillColor(kBlue)
         ############################DiBoson#######################################
         
         #####################################SumSingleTop###########################
         sumsttop1D = TH1D("sumsttop1D","sumsttop1D",self.bin_,self.min_,self.max_)
         sumsttop1D.Sumw2()
         if (self.samples.GetFileNames().has_key("tch") and self.samples.GetFileNames().has_key("tWch") and self.samples.GetFileNames().has_key("sch") and self.samples.GetFileNames().has_key("tch_bar") and self.samples.GetFileNames().has_key("tWch_bar") and self.samples.GetFileNames().has_key("sch_bar")):
            sumsttop1D = self.drawhistograms["tch"] + self.drawhistograms["tWch"] + self.drawhistograms["sch"] + self.drawhistograms["tch_bar"] + self.drawhistograms["tWch_bar"] + self.drawhistograms["sch_bar"]
         sumsttop1D.SetStats(kFALSE)
         sumsttop1D.SetFillColor(kMagenta)
         #####################################SumSingleTop###########################
         
         #####################################ZJet###################################
         zjet1D = TH1D("zjet1D","zjet1D",self.bin_,self.min_,self.max_)
         zjet1D.Sumw2()
         if (self.samples.GetFileNames().has_key("ZJets")):
            zjet1D = self.drawhistograms["ZJets"]
         zjet1D.SetStats(kFALSE)
         zjet1D.SetFillColor(kAzure-3)
         #####################################ZJet###################################

         mcbackground = [self.drawhistograms["TTbar"], wjet1D, qcd1D, diboson1D, sumsttop1D, zjet1D]

         #####################################Data-MC/MC############################
         allmc1D = TH1D("allmc1D","allmc1D",self.bin_,self.min_,self.max_)
         allmc1D.Sumw2()
         allmc1D = tt1D +  wjet1D + diboson1D + sumsttop1D +  zjet1D
         if(qcd1D.GetEntries() > 0):
            allmc1D = tt1D +  wjet1D + diboson1D + sumsttop1D +  zjet1D + qcd1D
         allmc1D.SetStats(kFALSE)

         dataminusmc1D = TH1D("dataminusmc1D","dataminusmc1D",self.bin_,self.min_,self.max_)
         dataminusmc1D.Sumw2()
         dataminusmc1D.Add(self.drawhistograms["data"],allmc1D,1.,-1.)
         dataminusmc1D.SetStats(kFALSE)

         dataovermc1D = TH1D("dataovermc1D","dataovermc1D",self.bin_,self.min_,self.max_)
         dataovermc1D.Sumw2()
         dataovermc1D.Divide(dataminusmc1D,allmc1D,1,1,"B")
         dataminusmc1D.SetStats(kFALSE)
         dataovermc1D.SetXTitle(xtitle)
         dataovermc1D.SetYTitle("(Data - MC) / MC")
         dataovermc1D.SetMarkerStyle(20)
         dataovermc1D.SetMarkerSize(1)
         dataovermc1D.SetTitle("")
         dataovermc1D.SetTitleSize(0.05,"X")
         dataovermc1D.SetTitleSize(0.05,"Y")
         dataovermc1D.GetYaxis().SetRangeUser(-1,1)
         #####################################Data-MC/MC############################
         
         datamcksresult_ = self.drawhistograms["data"].KolmogorovTest(allmc1D)

         #####################################Error_Band###########################
         xvalue = array('d')
         yvalue = array('d')
         xlefterror = array('d')
         xrighterror = array('d')
         ylowerror = array('d')
         yhigherror = array('d')

         xlist = []
         ylist = []
         xleftlist = []
         xrightlist = []
         ylowlist = []
         yhighlist = []

         for ibin in range(1,allmc1D.GetNbinsX() + 1):
             xlist.append(allmc1D.GetBinCenter(ibin))
             ylist.append(allmc1D.GetBinContent(ibin))
             xleftlist.append(0.5 * allmc1D.GetBinWidth(ibin))
             xrightlist.append(0.5 * allmc1D.GetBinWidth(ibin))
             ttbarerror = self.drawhistograms["TTbar"].GetBinContent(ibin) * 0.15
             wjeterror = wjet1D.GetBinContent(ibin) * 0.3
             lumierror = allmc1D.GetBinContent(ibin) * 0.022
             statisticerror = allmc1D.GetBinError(ibin)
             allerror = sqrt(pow(ttbarerror,2) + pow(wjeterror,2) + pow(lumierror,2) + pow(statisticerror,2))
             allerror = statisticerror             
             ylowlist.append(allerror)
             yhighlist.append(allerror)

         xvalue.fromlist(xlist)
         yvalue.fromlist(ylist)
         xlefterror.fromlist(xleftlist)
         xrighterror.fromlist(xrightlist)
         ylowerror.fromlist(ylowlist)
         yhigherror.fromlist(yhighlist)

         mc1Derror = TGraphAsymmErrors(allmc1D.GetNbinsX(),xvalue,yvalue,xlefterror,xrighterror,ylowerror,yhigherror)
         mc1Derror.SetName("MC Uncerntainty")
         mc1Derror.SetFillColor(920+3)
         mc1Derror.SetFillStyle(3008)
         #####################################Error_Band###########################
         
         self.thstack.SetHistogram(self.drawhistograms["TTbar"])
         if(self.drawhistograms["TTbar"].GetEntries() > 0):
           self.thstack.Add(self.drawhistograms["TTbar"])
         if(wjet1D.GetEntries() > 0):
           self.thstack.Add(wjet1D)
         if(sumsttop1D.GetEntries() > 0):
            self.thstack.Add(sumsttop1D)
         if(self.samples.GetFileNames().has_key("ZJets")):
           if(zjet1D.GetEntries() > 0):
              self.thstack.Add(zjet1D)
         if(diboson1D.GetEntries() > 0):
           self.thstack.Add(diboson1D)
         if(qcd1D.GetEntries() > 0):
           self.thstack.Add(qcd1D)
         self.thstack.SetMaximum(float(n) * self.thstack.GetMaximum())
         self.thstack.SetMinimum(float(ymin))
                 
         #L = TLegend(0.66,0.65,0.93,0.93)
         L = TLegend(0.7270115,0.6510417,0.9971264,0.9300595)
         L.SetBorderSize(0)
         L.SetLineStyle(0)
         L.SetTextFont(42)
         L.SetFillStyle(0)
         L.SetMargin(0.12)
         L.SetTextSize(0.025)
         L.SetFillColor(10)
         L.SetBorderSize(0)
         if(self.drawhistograms["data"].GetEntries() > 0):
           L.AddEntry(self.drawhistograms["data"],"Data", "lp")
         if(self.drawhistograms["TTbar"].GetEntries() > 0):
           L.AddEntry(self.drawhistograms["TTbar"],"t#bar{t}", "f")
         if(wjet1D.GetEntries() > 0):
           L.AddEntry(wjet1D,"W#rightarrowl#nu", "f")
         if(self.samples.GetFileNames().has_key("ZJets")):
           if(zjet1D.GetEntries() > 0):
             L.AddEntry(zjet1D,"Z/#gamma^{\*}#rightarrowl^{+}l^{-}", "f")
         if(qcd1D.GetEntries() > 0):
           L.AddEntry(qcd1D,"QCD", "f")
         if(diboson1D.GetEntries() > 0):
           L.AddEntry(diboson1D,"Dibosons", "f")
         if(sumsttop1D.GetEntries() > 0):
           L.AddEntry(sumsttop1D,"Single-Top","f")
         if(self.multiplicitylabel["ggH600"] != 1.0):
           L.AddEntry(self.drawhistograms["ggH600"],"ggHWW 600 #times %d"%self.multiplicitylabel["ggH600"],"f")
         else:
           L.AddEntry(self.drawhistograms["ggH600"],"ggHWW 600","f")

         banner = TLatex(0.25,0.88,("#splitline{CMS Preliminary}{%.1f fb^{-1} at #sqrt{s}=8TeV %s+jets}"%(self.samples.GetLumi(),self.channel)))
         banner.SetNDC()
         banner.SetTextSize(0.035)

         tl = TLatex(0.25,0.85,("#splitline{KS=%.4f}{%s}"%(datamcksresult_,latexcuttitle)))
         tl.SetNDC()
         tl.SetTextSize(0.06)

         self.c1 = TCanvas(drawname + self.cutname_,drawname + self.cutname_,10,10,700,700)
         self.c1.cd()
         pad1 = TPad("pad1","pad1",0.00,0.25,1.00,0.97)
         pad2 = TPad("pad2","pad2",0.00,0.00,1.00,0.25)
         pad1.SetFillColor(0)
         pad2.SetFillColor(0)
         pad1.Draw()
         pad2.Draw()
         pad1.SetTicks(1,1)
         pad2.SetTicks(1,1)
         pad2.SetGridx()
         pad2.SetGridy()

         pad1.cd()
         if(self.setlogy_):
            pad1.SetLogy()
         self.thstack.Draw("hist")
         mc1Derror.Draw("2 same")
         self.drawhistograms["data"].Draw("Esame")
         self.drawhistograms["ggH600"].SetLineStyle(2)
         self.drawhistograms["ggH600"].Draw("histsame")
         L.Draw()
         banner.Draw()
         
         pad2.cd()
         dataovermc1D.Draw()
         tl.Draw()
         
         PWD = os.getcwd() + "/"

         if os.path.isdir(PWD + self.plotdir_):
            print "%s direcotry has aleady been created"%(self.plotdir_)
         else:
            os.system("mkdir %s"%(PWD + self.plotdir_))
         
         tmplatexcuttitle = latexcuttitle
         tmplatexcuttitle = "_".join(tmplatexcuttitle.split(" "))
         tmplatexcuttitle = "".join(tmplatexcuttitle.split("&&"))
         tmplatexcuttitle = "_".join(tmplatexcuttitle.split("__"))
         tmplatexcuttitle = string.replace(tmplatexcuttitle,">","large")
         tmplatexcuttitle = string.replace(tmplatexcuttitle,"=","euqal")
         tmplatexcuttitle = string.replace(tmplatexcuttitle,"<","small")
         tmplatexcuttitle = string.replace(tmplatexcuttitle,"#","")
         
         self.c1.Print( PWD  + self.plotdir_ + "/" + self.channel + "_" + drawname + "_" +  tmplatexcuttitle + ".png")
         self.c1.Print( PWD  + self.plotdir_ + "/" + self.channel + "_" + drawname + "_" +  tmplatexcuttitle + ".eps")
         self.c1.Print( PWD  + self.plotdir_ + "/" + self.channel + "_" + drawname + "_" +  tmplatexcuttitle + ".pdf")
         self.c1.Update()
Example #8
0
class QCDFractionFitter:
    def __init__(self, filename, scalefactortxtfilename, treename, lumi,
                 channel, savelabel):

        self.tmpplot = TreePlot()

        self.batch_ = False

        self.filedict_ = filename

        self.scalefactortxtfilename_ = scalefactortxtfilename

        self.treename_ = treename

        self.histograms_ = {}

        self.multiplicitylabel_ = {}

        self.lumi_ = lumi

        self.channel_ = channel

        self.savelabel_ = savelabel

        self.constrains_ = []

        self.fit_ = None

        self.qcdfraction_ = 0.0

        self.qcdfractionerror_ = 0.0

    def SetBatch(self):

        self.batch_ = True

    def SetVariableCutWeightName(self, variable, cutname, weightname):

        self.variable_ = variable
        self.cutname_ = cutname
        self.weightname_ = weightname

    def SetXtitleYtitle(self, xtitle, ytitle):

        self.xtitle_ = xtitle
        self.ytitle_ = ytitle

    def SetBinMinMax(self, bin, min, max):

        self.bin_ = bin
        self.min_ = min
        self.max_ = max

    def SetConstrainPartLabel(self, constrains):

        self.constrains_ = constrains

    def SetAnalysisCut(self, metcut):

        self.metcut_ = metcut

    def GetQCDFractionResult(self):

        return [self.qcdfraction_, self.qcdfractionerror_]

    def FitAndPlot(self, fitpattern):

        #####Prepare the Histogram to the TFractionFitter####################
        print "We are Fitting the variable: " + self.variable_
        print "We are using the cut: " + self.cutname_
        print "We are using the weight: " + self.weightname_

        ttbarregionsignal = ""

        for tmpfiletype in self.filedict_.keys():

            tmpcutname = self.cutname_
            self.tmpplot.SetBinMinMax(self.bin_, self.min_, self.max_)
            self.tmpplot.SetCutWeightName(tmpcutname, self.weightname_)

            if (tmpfiletype != "data" and tmpfiletype != "QCD"
                    and tmpfiletype != "WJets"):
                self.tmpplot.DrawTrees(
                    self.filedict_[tmpfiletype], self.treename_,
                    self.variable_, 0, self.xtitle_, self.ytitle_,
                    "%s:%s" % (self.scalefactortxtfilename_, tmpfiletype),
                    self.lumi_, tmpfiletype, self.histograms_,
                    self.multiplicitylabel_, ttbarregionsignal)
            elif (tmpfiletype == "WJets"):
                self.tmpplot.SetCutWeightName(
                    tmpcutname, self.weightname_ + "*W_nParton_weight")
                self.tmpplot.DrawTrees(
                    self.filedict_[tmpfiletype], self.treename_,
                    self.variable_, 0, self.xtitle_, self.ytitle_,
                    "%s:%s" % (self.scalefactortxtfilename_, tmpfiletype),
                    self.lumi_, tmpfiletype, self.histograms_,
                    self.multiplicitylabel_, ttbarregionsignal)
            elif (tmpfiletype == "data"):
                self.tmpplot.DrawTrees(self.filedict_[tmpfiletype],
                                       self.treename_, self.variable_, 0,
                                       self.xtitle_, self.ytitle_, 1.0, 1.0,
                                       tmpfiletype, self.histograms_,
                                       self.multiplicitylabel_,
                                       ttbarregionsignal)
            elif (tmpfiletype == "QCD"):
                self.tmpplot.SetCutWeightName(
                    tmpcutname +
                    "&&((EWK_W_2jets_l_tagjet2_deltaR<EWK_W_2jets_l_tagjet1_deltaR?EWK_W_2jets_l_tagjet2_deltaR:EWK_W_2jets_l_tagjet1_deltaR)>1.0)",
                    self.weightname_)
                self.tmpplot.DrawTrees(self.filedict_[tmpfiletype],
                                       self.treename_, self.variable_, 0,
                                       self.xtitle_, self.ytitle_, 1.0, 1.0,
                                       tmpfiletype, self.histograms_,
                                       self.multiplicitylabel_,
                                       ttbarregionsignal)

        if fitpattern == "QCDfraction":

            wjet1D = TH1D("wjet1D", "wjet1D", self.bin_, self.min_, self.max_)

            firstbin = 1
            lastbin = self.histograms_["data"].GetNbinsX()

            dataentries = self.histograms_["data"].Integral(firstbin, lastbin)
            print "data Entries: " + str(dataentries)
            self.histograms_["QCD"].Scale(
                dataentries /
                self.histograms_["QCD"].Integral(firstbin, lastbin + 1))
            wjet1D = self.histograms_["WJets"] + self.histograms_[
                "W1Jets"] + self.histograms_["W2Jets"] + self.histograms_[
                    "W3Jets"] + self.histograms_["W4Jets"]
            wjet1D.Scale(dataentries / wjet1D.Integral(firstbin, lastbin + 1))

            constrainfaction = 0.
            tconstrain1D = TH1D("tconstrain", "tconstrain", self.bin_,
                                self.min_, self.max_)
            if len(self.constrains_) > 0:
                for icons in self.constrains_:
                    tconstrain1D.Add(self.histograms_[icons])
                #tconstrain1D.Scale(dataentries/tconstrain1D.Integral(firstbin,lastbin))
                constrainfaction = tconstrain1D.Integral(firstbin,
                                                         lastbin) / dataentries
                print "TTbar constrainfaction: " + str(constrainfaction)
                tconstrain1D.Scale(
                    dataentries / tconstrain1D.Integral(firstbin, lastbin + 1))

            mc = TObjArray()
            mc.Add(self.histograms_["QCD"])
            mc.Add(wjet1D)

            if len(self.constrains_) > 0:
                mc.Add(tconstrain1D)

            self.fit_ = TFractionFitter(self.histograms_["data"], mc)
            self.fit_.Constrain(1, 0.0, 1.0)
            self.fit_.Constrain(2, 0.0, 1.0)
            if len(self.constrains_) > 0:
                #self.fit_.Constrain(3,constrainfaction - 0.0001*constrainfaction,constrainfaction + 0.0001*constrainfaction)
                self.fit_.Constrain(3, 0.0, 1.0)

            self.fit_.SetRangeX(firstbin, lastbin)

            status = self.fit_.Fit()

            print "fit status: " + str(status)

            self.SetUpPlotEnviroment()

            if (self.batch_):
                gROOT.SetBatch()

            canvas = TCanvas("c1", "c1", 10, 10, 700, 700)
            lgnd = TLegend(0.6, 0.6, 0.90, 0.9)
            lgnd.SetFillColor(kWhite)

            result = self.fit_.GetPlot()
            result.SetFillColor(kYellow)
            self.histograms_["data"].SetMinimum(0)
            self.histograms_["data"].Draw("Ep")
            result.Draw("histsame")

            frac_value = array('d', [0.])
            frac_error = array('d', [0.])

            self.fit_.GetResult(0, frac_value, frac_error)

            integralanalysisbin = self.histograms_["data"].FindBin(
                self.metcut_)

            dataentriesanalysisbin = self.histograms_["data"].Integral(
                integralanalysisbin, lastbin)
            qcdintegralanalysisbin = self.histograms_["QCD"].Integral(
                integralanalysisbin, lastbin)
            wjetsintegralanalysisbin = wjet1D.Integral(integralanalysisbin,
                                                       lastbin)

            qcdfrac_valuemetcut = frac_value[
                0] * qcdintegralanalysisbin / dataentriesanalysisbin

            print "QCD: frac_value= " + str(qcdfrac_valuemetcut) + "+/-" + str(
                frac_error[0]) + " NEvts=" + str(
                    dataentries * qcdfrac_valuemetcut) + "+/-" + str(
                        dataentries * frac_error[0])
            self.qcdfraction_ = qcdfrac_valuemetcut
            #self.qcdfraction_ = frac_value[0]
            self.qcdfractionerror_ = frac_error[0]
            self.histograms_["QCD"].Scale(frac_value[0])
            self.histograms_["QCD"].SetLineWidth(2)
            self.histograms_["QCD"].SetLineColor(kBlue)
            self.histograms_["QCD"].Draw("histesame")

            self.fit_.GetResult(1, frac_value, frac_error)
            wjetsfrac_valuemetcut = frac_value[
                0] * wjetsintegralanalysisbin / dataentriesanalysisbin
            print "WJets: frac_value= " + str(
                wjetsfrac_valuemetcut) + "+/-" + str(
                    frac_error[0]) + " NEvts=" + str(
                        dataentries * wjetsfrac_valuemetcut) + "+/-" + str(
                            dataentries * frac_error[0])
            #self.histograms_["WJets"].Scale(frac_value[0])
            #self.histograms_["WJets"].SetLineWidth(2)
            #self.histograms_["WJets"].SetLineColor(kRed)
            #self.histograms_["WJets"].Draw("histesame")
            wjet1D.Scale(frac_value[0])
            wjet1D.SetLineWidth(2)
            wjet1D.SetLineColor(kRed)
            wjet1D.Draw("histesame")

            if len(self.constrains_) > 0:
                self.fit_.GetResult(2, frac_value, frac_error)
                tconstrain1D.Scale(frac_value[0])
                #tconstrain1D.Scale(frac_value[0]*dataentries/tconstrain1D.Integral(firstbin,lastbin+1))
                tconstrain1D.SetLineWidth(2)
                tconstrain1D.SetLineColor(kGreen)
                tconstrain1D.Draw("histsame")

            lgnd.AddEntry(result, "TemplatePrediction", "f")
            lgnd.AddEntry(self.histograms_["data"], "data", "p")
            lgnd.AddEntry(self.histograms_["QCD"], "QCD", "l")
            #lgnd.AddEntry(self.histograms_["WJets"], "WJetsMC","l")
            lgnd.AddEntry(wjet1D, "WJetsMC", "l")
            if len(self.constrains_) > 0:
                #lgnd.AddEntry(tconstrain1D, "Fixed Processes", "l")
                lgnd.AddEntry(tconstrain1D, "Other Processes", "l")

            chi2ndof = -999.0
            if len(self.constrains_) > 0:
                print "FitChiSquare/ndof= " + str(self.fit_.GetChisquare() /
                                                  (self.bin_ - 3))
                chi2ndof = self.fit_.GetChisquare() / (self.bin_ - 3)
            else:
                print "FitChiSquare/ndof= " + str(self.fit_.GetChisquare() /
                                                  (self.bin_ - 2))
                chi2ndof = self.fit_.GetChisquare() / (self.bin_ - 2)
            print "FitProbatility= " + str(self.fit_.GetProb())

            latex = TLatex(0.60, 0.50, "#chi2/ndof=%s" % (round(chi2ndof, 2)))
            latex.SetNDC()
            latex.SetTextSize(0.035)

            self.histograms_["data"].SetMarkerStyle(20)
            self.histograms_["data"].SetMarkerSize(1)
            self.histograms_["data"].Draw("Esame")
            lgnd.Draw()
            latex.Draw()

            if len(self.constrains_) > 0:
                self.channel_ = self.channel_ + "constrainotherprocess"

            self.variable_ = self.variable_.replace(".", "_")
            self.savelabel_ = self.savelabel_.replace(".", "_")

            canvas.SaveAs("QCDFractionFit/" + self.channel_ + "_" +
                          self.variable_ + self.savelabel_ +
                          "_qcdfraction.png")
            canvas.SaveAs("QCDFractionFit/" + self.channel_ + "_" +
                          self.variable_ + self.savelabel_ +
                          "_qcdfraction.pdf")
            canvas.SaveAs("QCDFractionFit/" + self.channel_ + "_" +
                          self.variable_ + self.savelabel_ +
                          "_qcdfraction.eps")

    def SetUpPlotEnviroment(self):
        if os.path.isfile('tdrstyle.C'):
            gROOT.ProcessLine('.L tdrstyle.C')
            ROOT.setTDRStyle()
            print "Found tdrstyle.C file, using this style."
            HasCMSStyle = "True"
            if os.path.isfile('CMSTopStyle.cc'):
                gROOT.ProcessLine('.L CMSTopStyle.cc+')
                style = CMSTopStyle()
                style.setupICHEPv1()
                print "Found CMSTopStyle.cc file, use TOP style if requested in xml file."
print "---------------------------------------\n"

# Get the leaf Node
LeafNodeNum = ID3Tree.getNumLeafs(myTree)
print "This ID3 Tree leaf node num is ", LeafNodeNum
print "---------------------------------------\n"


# Get the depth of the ID3
MaxDepth = ID3Tree.getTreeDepth(myTree)
print "This ID3 tree max depth is ", MaxDepth
print "---------------------------------------\n"


# Plot the tree
TreePlot.createPlot(myTree)
print "---------------------------------------\n"













Example #10
0
from TestTrees import calcShannonEnt
from TestTrees import chooseBestFeatureToSplit
from TestTrees import createTree
import TestTrees
import TreePlot


def createDataSet():
    dataSet = [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'],
               [0, 1, 'no']]

    labels = ['no surfacing', 'flippers']
    return dataSet, labels


fr = open('matpilb\lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['ages', 'prescript', 'astigmatic', 'tearRate']
print(lenses)
lensesTree = TestTrees.createTree(lenses, lensesLabels)
TreePlot.createPlot(lensesTree)
Example #11
0
                            '>=' + str(splite))] = CreateTree(
                                son_data, label_lisan, label_lianxu)

                elif j[feature] < splite and i == 1:
                    son_data.append(j)
                    if len(son_data) == 0:
                        return leaf(data)
                    else:
                        Tree[labels_word[feature]][str(
                            '<' + str(splite))] = CreateTree(
                                son_data, label_lisan, label_lianxu)

        else:
            for j in data:
                if j[feature] == label_value[feature][i]: son_data.append(j)

            if len(son_data) == 0:
                return leaf(data)

            else:
                Tree[labels_word[feature]][
                    label_value[feature][i]] = CreateTree(
                        son_data, label_lisan, labels_lianxu)

    return Tree


if __name__ == '__main__':
    Tree = CreateTree(dataSet, labels_lisan, labels_lianxu)
    TreePlot.createPlot(Tree)