class HistogramTest(unittest.TestCase):

    def setUp(self):
        self.h = Histogram()
        self.h.add("Apache")
        self.h.add("Apache")

    def test_add(self):
        assert self.h.get_dict().get("Apache") is not None

    def test_count(self):
        self.assertEqual(self.h.count("Apache"), 2)
def make_histo(reads):
    '''Given a dict of reads keyed by sequence, create a Histogram
  object summarising the distribution of duplicated reads.'''
    LOGGER.debug("making histogram...")
    total = 0
    histo = Histogram()
    for read in reads.iterkeys():
        histo.add(reads[read])
        if reads[read] >= 1000:
            sys.stdout.write("%s\t%d\n" % (read, reads[read]))
        total += 1
        if total % 100000 == 0 and ISATTY:
            sys.stderr.write("%9d\r" % (total, ))
    return histo
예제 #3
0
class Dictogram(dict):
    def __init__(self, text_file=None):

        self.histogram = None
        if text_file is not None:
            self.histogram = Histogram(text_file)
            self.create_dictogram()

    def create_dictogram(self):
        if self.histogram is not None:
            for index, word in enumerate(self.histogram.words_list):
                if index + 1 > len(self.histogram.words_list) - 1:
                    return
                next_word = self.histogram.words_list[index + 1]

                if word not in self:
                    self[word] = {}
                    histogram = Histogram()
                    #pdb.set_trace()
                    #histogram = {next_word:0}

                    self[word] = histogram.add(next_word)
                else:
                    self.histogram = self[word]
                    self[word] = self.histogram.add(next_word)
예제 #4
0
    def get_headers(self):

        all_links = self.parse_html()
        server_names = ["Apache", "nginx", "Zeus", "Microsoft-IIS"]
        histogram = Histogram()

        for link in all_links:
            try:
                if link is not None and "link.php?id=" in link:
                    req = requests.head(self.url + link, timeout=3, allow_redirects=True)
                    get_server = req.headers["Server"]
                    print(get_server)
                    for servers in server_names:
                        if servers in get_server:
                            get_server = servers
                            histogram.add(get_server)
                            break
                        if servers == server_names[-1]:
                            get_server = "other servers"
                            histogram.add(get_server)
            except Exception:
               print("The page is not responding...")
               continue
        return histogram
예제 #5
0
파일: app.py 프로젝트: presian/HackBulgaria
def getServersDict(all_servers):
    h = Histogram()
    for server in all_servers:
        if server is not None:
            if 'apache' in server.lower():
                h.add('Apache')
            if 'nginx' in server.lower():
                h.add('nginx')
            if 'IIS' in server.upper():
                h.add('IIS')
    return h
예제 #6
0
    def create_dictogram(self):
        if self.histogram is not None:
            for index, word in enumerate(self.histogram.words_list):
                if index + 1 > len(self.histogram.words_list) - 1:
                    return
                next_word = self.histogram.words_list[index + 1]

                if word not in self:
                    self[word] = {}
                    histogram = Histogram()
                    #pdb.set_trace()
                    #histogram = {next_word:0}

                    self[word] = histogram.add(next_word)
                else:
                    self.histogram = self[word]
                    self[word] = self.histogram.add(next_word)
예제 #7
0
class HistogramTests(unittest.TestCase):

    def setUp(self):
        self.ap = 'Apache'
        self.ng = 'nginx'
        self.iis = 'IIS'
        self.h = Histogram()

    def testAddRecord(self):
        self.h.add(self.ap)
        self.assertEqual(1, len(self.h.get_dict()))

    def testAddRecord_2(self):
        self.h.add(self.ap)
        self.h.add(self.ng)
        self.assertEqual(2, len(self.h.get_dict()))

    def testCountRecord(self):
        self.h.add(self.ap)
        self.assertEqual(1, self.h.count("Apache"))

    def testCountRecord_2(self):
        self.h.add(self.ap)
        self.h.add(self.ap)
        self.assertEqual(2, self.h.count("Apache"))

    def testCountRecord_3(self):
        self.h.add(self.ap)
        self.h.add(self.ap)
        self.h.add(self.ng)
        self.assertEqual(None, self.h.count("IBM Web Server"))

    def testGetDict_4(self):
        self.h.add(self.ap)
        self.h.add(self.ap)
        self.h.add(self.ng)
        self.h.add(self.ng)
        self.h.add(self.iis)
        self.assertEqual(self.h.get_dict(), {"Apache": 2, "nginx": 2, "IIS": 1})
예제 #8
0
    def scalePlots(self, outputDir):
        groups = dict([(g.name, g) for g in self.groups])
        fakeGroups = ['EGFake', 'JGFake']

        candClass = 'PhotonAnd' + self.lepton
        jlClass = 'PhotonAndFake' + self.lepton
        fakeClasses = ['ElePhotonAnd' + self.lepton, 'FakePhotonAnd' + self.lepton]

        ### CREATE TEMPLATES FROM SAMPLE HISTOGRAMS IN GROUPS
        
        observed = self.tempDef.generate(suffix = 'observed')
        fakeHistograms = dict([(gname, Histogram(self.tempDef, suffix = gname)) for gname in fakeGroups])
        vgHistogram = Histogram(self.tempDef, suffix = 'vg')
        qcdHistogram = Histogram(self.tempDef, suffix = 'qcd')

        for sample in groups['Observed'].samples:
            observed.Add(sample.histograms[self.tempDef.name].hWeighted)
        for sample in groups['EWK'].samples:
            observed.Add(sample.histograms[self.tempDef.name].hWeighted, -1.)
        for gname in fakeGroups:
            for sample in groups[gname].samples:
                fakeHistograms[gname].add(sample.histograms[self.tempDef.name])
        for sample in groups['VGamma'].getSamples(candClass):
            vgHistogram.add(sample.histograms[self.tempDef.name])
        for sample in groups['JLFake'].samples:
            qcdHistogram.add(sample.histograms[self.tempDef.name])

        ### FIT FOR CENTRAL VALUES

        fitter = ROOT.TemplateChi2Fitter.singleton()
        
        vgScale = ROOT.RooRealVar('vg', 'vg', 1., 0., 5.)
        qcdScale = ROOT.RooRealVar('qcd', 'qcd', 0.1, 0., 1.)

        target = observed.Clone('target')
        for histo in fakeHistograms.values():
            target.Add(histo.hWeighted, -1.)

        fitter.setTarget(target)
        fitter.addTemplate(vgHistogram.hWeighted, 'vg', vgScale)
        fitter.addTemplate(qcdHistogram.hWeighted, 'qcd', qcdScale)

        target.Delete()

        directory = outputDir.GetDirectory('PreTemplateFit')
        if not directory:
            directory = outputDir.mkdir('PreTemplateFit')
        directory.cd()

        fitter.plot(directory)
        directory.Write()

        if fitter.fit() != 0:
            raise RuntimeError('Template fit did not converge')

        directory = outputDir.GetDirectory('PostTemplateFit')
        if not directory:
            directory = outputDir.mkdir('PostTemplateFit')
        directory.cd()

        fitter.plot(directory)
        directory.Write()

        vgCentral = vgScale.getVal()
        vgCentralErr = vgScale.getError()
        qcdCentral = qcdScale.getVal()
        qcdCentralErr = qcdScale.getError()

        ### ERROR ESTIMATION

        directory = outputDir.GetDirectory('TemplateFitError')
        if not directory:
            directory = outputDir.mkdir('TemplateFitError')
        directory.cd()

#        tree = ROOT.TTree('toys', 'Toys')
#        vg = array.array('d', [0.])
#        qcd = array.array('d', [0.])
#        targetContents = array.array('d', [0.] * self.tempDef.nx)
#        targetErrors = array.array('d', [0.] * self.tempDef.nx)
#        vgContents = array.array('d', [0.] * self.tempDef.nx)
#        vgErrors = array.array('d', [0.] * self.tempDef.nx)

#        tree.Branch('vg', vg, 'vg/D')
#        tree.Branch('qcd', qcd, 'qcd/D')
#        tree.Branch('targetContents', targetContents, 'content[%d]/D' % self.tempDef.nx)
#        tree.Branch('targetErrors', targetErrors, 'error[%d]/D' % self.tempDef.nx)
#        tree.Branch('vgContents', vgContents, 'content[%d]/D' % self.tempDef.nx)
#        tree.Branch('vgErrors', vgErrors, 'error[%d]/D' % self.tempDef.nx)
            
        vgScales = []
        qcdScales = []
        vgScalesNoEff = []
        qcdScalesNoEff = []

        def modifyTemplate(histogram, sigma, name = 'template'):
            result = histogram.hWeighted.Clone(name)
            for iX in range(1, result.GetNbinsX() + 1):
                err = max(abs(histogram.hScaleUp.GetBinContent(iX) - result.GetBinContent(iX)), abs(result.GetBinContent(iX) - histogram.hScaleDown.GetBinContent(iX)))
                result.SetBinContent(iX, result.GetBinContent(iX) + err * sigma)

            return result

        canvas = ROOT.TCanvas('c1', 'c1')

        print 'Error evaluation with 1000 toys:'
        for iToy in range(1000):
            if iToy % 100 == 0:
                sys.stdout.write('\r' + str(iToy))
                sys.stdout.flush()

            egScaleVar = random.gauss(0., 1.)
            jgScaleVar = random.gauss(0., 1.)
            effScaleVar = random.gauss(0., 1.)
                
            target = observed.Clone('target')

            egTemplate = modifyTemplate(fakeHistograms['EGFake'], egScaleVar)
            target.Add(egTemplate, -1.)
            egTemplate.Delete()

            jgTemplate = modifyTemplate(fakeHistograms['JGFake'], jgScaleVar)
            target.Add(jgTemplate, -1.)
            jgTemplate.Delete()

            vgTemplate = modifyTemplate(vgHistogram, effScaleVar, 'vg')

#            for iX in range(self.tempDef.nx):
#                targetContents[iX] = target.GetBinContent(iX + 1)
#                targetErrors[iX] = target.GetBinError(iX + 1)
#                vgContents[iX] = vgTemplate.GetBinContent(iX + 1)
#                vgErrors[iX] = vgTemplate.GetBinError(iX + 1)

            fitter.setTarget(target)
            fitter.addTemplate(vgTemplate, 'vg', vgScale)
            fitter.addTemplate(qcdHistogram.hWeighted, 'qcd', qcdScale)

            if fitter.fit(-1) != 0: continue

            vgScales.append(vgScale.getVal())
            qcdScales.append(qcdScale.getVal())

#            vg[0] = vgScale.getVal()
#            qcd[0] = qcdScale.getVal()
#            tree.Fill()

            vgTemplate.Delete()

            vgTemplate = modifyTemplate(vgHistogram, 0., 'vg')

            fitter.setTarget(target)
            fitter.addTemplate(vgTemplate, 'vg', vgScale)
            fitter.addTemplate(qcdHistogram.hWeighted, 'qcd', qcdScale)

            if fitter.fit(-1) != 0: continue

            vgScalesNoEff.append(vgScale.getVal())
            qcdScalesNoEff.append(qcdScale.getVal())

            target.Delete()
            vgTemplate.Delete()

        sys.stdout.write('\n')

        directory.cd()
#        tree.Write()

        for scales, scalesNoEff, central, centralErr, name in [(vgScales, vgScalesNoEff, vgCentral, vgCentralErr, 'VGamma'), (qcdScales, qcdScalesNoEff, qcdCentral, qcdCentralErr, 'QCD')]:
            scales.sort()
            err = scales[int(len(scales) * 0.84)] - central
            errHigh = math.sqrt(err * err + centralErr * centralErr)
            err = central - scales[int(len(scales) * 0.16)]
            errLow = math.sqrt(err * err + centralErr * centralErr)

            graph = ROOT.TGraphAsymmErrors(1)
            graph.SetPoint(0, 0., central)
            graph.SetPointEYhigh(0, errHigh)
            graph.SetPointEYlow(0, errLow)
            directory.cd()
            graph.Write(name)

            scalesNoEff.sort()
            err = scalesNoEff[int(len(scalesNoEff) * 0.84)] - central
            errHigh = math.sqrt(err * err + centralErr * centralErr)
            err = central - scalesNoEff[int(len(scalesNoEff) * 0.16)]
            errLow = math.sqrt(err * err + centralErr * centralErr)

            graph = ROOT.TGraphAsymmErrors(1)
            graph.SetPoint(0, 0., central)
            graph.SetPointEYhigh(0, errHigh)
            graph.SetPointEYlow(0, errLow)
            directory.cd()
            graph.Write(name + 'NoEff')

            if name == 'VGamma':
                vgErrHigh = errHigh
                vgErrLow = errLow

        ### SET SCALES AND ERRORS

        for sample in groups['VGamma'].getSamples(candClass):
            count = sample.counter.GetBinContent(1)
            scaleError = max(vgErrHigh, vgErrLow)
            scaleHigh = vgCentral + vgErrHigh
            scaleLow = vgCentral - vgErrLow

            for histogram in sample.histograms.values():
                histogram.hWeighted.Scale(vgCentral)
                histogram.hScaleUp.Scale(scaleHigh)
                histogram.hScaleDown.Scale(scaleLow)

            sample.counter.SetBinContent(1, count * vgCentral)
            sample.counter.SetBinContent(2, count * scaleError)

        for sample in groups['JLFake'].samples:
            for histogram in sample.histograms.values():
                histogram.hWeighted.Scale(qcdCentral)
                histogram.hScaleUp.Scale(qcdCentral)
                histogram.hScaleDown.Scale(qcdCentral)

            cont = sample.counter.GetBinContent(1)
            sample.counter.SetBinContent(1, cont * qcdCentral)

        for gname in fakeGroups:
            for sample in groups[gname].samples:
                for histogram in sample.histograms.values():
                    name = histogram.hScaleUp.GetName()
                    histogram.hScaleUp.Delete()
                    histogram.hScaleUp = histogram.hWeighted.Clone(name)
                    name = histogram.hScaleDown.GetName()
                    histogram.hScaleDown.Delete()
                    histogram.hScaleDown = histogram.hWeighted.Clone(name)
                    
                sample.counter.SetBinContent(2, 0.)

        for group in self.groups:
            if group.name != 'EWK' and group.category != Group.SIGNAL: continue

            for sample in group.samples:
                count = sample.counter.GetBinContent(1)
                countError = sample.counter.GetBinContent(2)

                if sample.eventClass == jlClass:
                    for histogram in sample.histograms.values():
                        histogram.hWeighted.Scale(-qcdCentral)
                        histogram.hScaleUp.Scale(-qcdCentral)
                        histogram.hScaleDown.Scale(-qcdCentral)

                        sample.counter.SetBinContent(1, -count * qcdCentral)
                        sample.counter.SetBinContent(2, -countError * qcdCentral)

                elif sample.eventClass != candClass:
                    for histogram in sample.histograms.values():
                        histogram.hWeighted.Scale(-1.)
                        histogram.hScaleUp.Scale(-1.)
                        histogram.hScaleDown.Scale(-1.)

                        sample.counter.SetBinContent(1, -count)
                        sample.counter.SetBinContent(2, -countError)