class HistogramTest(unittest.TestCase): def setUp(self): self.h = Histogram() self.h.add("Apache") self.h.add("Apache") def test_add(self): assert self.h.get_dict().get("Apache") is not None def test_count(self): self.assertEqual(self.h.count("Apache"), 2)
def make_histo(reads): '''Given a dict of reads keyed by sequence, create a Histogram object summarising the distribution of duplicated reads.''' LOGGER.debug("making histogram...") total = 0 histo = Histogram() for read in reads.iterkeys(): histo.add(reads[read]) if reads[read] >= 1000: sys.stdout.write("%s\t%d\n" % (read, reads[read])) total += 1 if total % 100000 == 0 and ISATTY: sys.stderr.write("%9d\r" % (total, )) return histo
class Dictogram(dict): def __init__(self, text_file=None): self.histogram = None if text_file is not None: self.histogram = Histogram(text_file) self.create_dictogram() def create_dictogram(self): if self.histogram is not None: for index, word in enumerate(self.histogram.words_list): if index + 1 > len(self.histogram.words_list) - 1: return next_word = self.histogram.words_list[index + 1] if word not in self: self[word] = {} histogram = Histogram() #pdb.set_trace() #histogram = {next_word:0} self[word] = histogram.add(next_word) else: self.histogram = self[word] self[word] = self.histogram.add(next_word)
def get_headers(self): all_links = self.parse_html() server_names = ["Apache", "nginx", "Zeus", "Microsoft-IIS"] histogram = Histogram() for link in all_links: try: if link is not None and "link.php?id=" in link: req = requests.head(self.url + link, timeout=3, allow_redirects=True) get_server = req.headers["Server"] print(get_server) for servers in server_names: if servers in get_server: get_server = servers histogram.add(get_server) break if servers == server_names[-1]: get_server = "other servers" histogram.add(get_server) except Exception: print("The page is not responding...") continue return histogram
def getServersDict(all_servers): h = Histogram() for server in all_servers: if server is not None: if 'apache' in server.lower(): h.add('Apache') if 'nginx' in server.lower(): h.add('nginx') if 'IIS' in server.upper(): h.add('IIS') return h
def create_dictogram(self): if self.histogram is not None: for index, word in enumerate(self.histogram.words_list): if index + 1 > len(self.histogram.words_list) - 1: return next_word = self.histogram.words_list[index + 1] if word not in self: self[word] = {} histogram = Histogram() #pdb.set_trace() #histogram = {next_word:0} self[word] = histogram.add(next_word) else: self.histogram = self[word] self[word] = self.histogram.add(next_word)
class HistogramTests(unittest.TestCase): def setUp(self): self.ap = 'Apache' self.ng = 'nginx' self.iis = 'IIS' self.h = Histogram() def testAddRecord(self): self.h.add(self.ap) self.assertEqual(1, len(self.h.get_dict())) def testAddRecord_2(self): self.h.add(self.ap) self.h.add(self.ng) self.assertEqual(2, len(self.h.get_dict())) def testCountRecord(self): self.h.add(self.ap) self.assertEqual(1, self.h.count("Apache")) def testCountRecord_2(self): self.h.add(self.ap) self.h.add(self.ap) self.assertEqual(2, self.h.count("Apache")) def testCountRecord_3(self): self.h.add(self.ap) self.h.add(self.ap) self.h.add(self.ng) self.assertEqual(None, self.h.count("IBM Web Server")) def testGetDict_4(self): self.h.add(self.ap) self.h.add(self.ap) self.h.add(self.ng) self.h.add(self.ng) self.h.add(self.iis) self.assertEqual(self.h.get_dict(), {"Apache": 2, "nginx": 2, "IIS": 1})
def scalePlots(self, outputDir): groups = dict([(g.name, g) for g in self.groups]) fakeGroups = ['EGFake', 'JGFake'] candClass = 'PhotonAnd' + self.lepton jlClass = 'PhotonAndFake' + self.lepton fakeClasses = ['ElePhotonAnd' + self.lepton, 'FakePhotonAnd' + self.lepton] ### CREATE TEMPLATES FROM SAMPLE HISTOGRAMS IN GROUPS observed = self.tempDef.generate(suffix = 'observed') fakeHistograms = dict([(gname, Histogram(self.tempDef, suffix = gname)) for gname in fakeGroups]) vgHistogram = Histogram(self.tempDef, suffix = 'vg') qcdHistogram = Histogram(self.tempDef, suffix = 'qcd') for sample in groups['Observed'].samples: observed.Add(sample.histograms[self.tempDef.name].hWeighted) for sample in groups['EWK'].samples: observed.Add(sample.histograms[self.tempDef.name].hWeighted, -1.) for gname in fakeGroups: for sample in groups[gname].samples: fakeHistograms[gname].add(sample.histograms[self.tempDef.name]) for sample in groups['VGamma'].getSamples(candClass): vgHistogram.add(sample.histograms[self.tempDef.name]) for sample in groups['JLFake'].samples: qcdHistogram.add(sample.histograms[self.tempDef.name]) ### FIT FOR CENTRAL VALUES fitter = ROOT.TemplateChi2Fitter.singleton() vgScale = ROOT.RooRealVar('vg', 'vg', 1., 0., 5.) qcdScale = ROOT.RooRealVar('qcd', 'qcd', 0.1, 0., 1.) target = observed.Clone('target') for histo in fakeHistograms.values(): target.Add(histo.hWeighted, -1.) fitter.setTarget(target) fitter.addTemplate(vgHistogram.hWeighted, 'vg', vgScale) fitter.addTemplate(qcdHistogram.hWeighted, 'qcd', qcdScale) target.Delete() directory = outputDir.GetDirectory('PreTemplateFit') if not directory: directory = outputDir.mkdir('PreTemplateFit') directory.cd() fitter.plot(directory) directory.Write() if fitter.fit() != 0: raise RuntimeError('Template fit did not converge') directory = outputDir.GetDirectory('PostTemplateFit') if not directory: directory = outputDir.mkdir('PostTemplateFit') directory.cd() fitter.plot(directory) directory.Write() vgCentral = vgScale.getVal() vgCentralErr = vgScale.getError() qcdCentral = qcdScale.getVal() qcdCentralErr = qcdScale.getError() ### ERROR ESTIMATION directory = outputDir.GetDirectory('TemplateFitError') if not directory: directory = outputDir.mkdir('TemplateFitError') directory.cd() # tree = ROOT.TTree('toys', 'Toys') # vg = array.array('d', [0.]) # qcd = array.array('d', [0.]) # targetContents = array.array('d', [0.] * self.tempDef.nx) # targetErrors = array.array('d', [0.] * self.tempDef.nx) # vgContents = array.array('d', [0.] * self.tempDef.nx) # vgErrors = array.array('d', [0.] * self.tempDef.nx) # tree.Branch('vg', vg, 'vg/D') # tree.Branch('qcd', qcd, 'qcd/D') # tree.Branch('targetContents', targetContents, 'content[%d]/D' % self.tempDef.nx) # tree.Branch('targetErrors', targetErrors, 'error[%d]/D' % self.tempDef.nx) # tree.Branch('vgContents', vgContents, 'content[%d]/D' % self.tempDef.nx) # tree.Branch('vgErrors', vgErrors, 'error[%d]/D' % self.tempDef.nx) vgScales = [] qcdScales = [] vgScalesNoEff = [] qcdScalesNoEff = [] def modifyTemplate(histogram, sigma, name = 'template'): result = histogram.hWeighted.Clone(name) for iX in range(1, result.GetNbinsX() + 1): err = max(abs(histogram.hScaleUp.GetBinContent(iX) - result.GetBinContent(iX)), abs(result.GetBinContent(iX) - histogram.hScaleDown.GetBinContent(iX))) result.SetBinContent(iX, result.GetBinContent(iX) + err * sigma) return result canvas = ROOT.TCanvas('c1', 'c1') print 'Error evaluation with 1000 toys:' for iToy in range(1000): if iToy % 100 == 0: sys.stdout.write('\r' + str(iToy)) sys.stdout.flush() egScaleVar = random.gauss(0., 1.) jgScaleVar = random.gauss(0., 1.) effScaleVar = random.gauss(0., 1.) target = observed.Clone('target') egTemplate = modifyTemplate(fakeHistograms['EGFake'], egScaleVar) target.Add(egTemplate, -1.) egTemplate.Delete() jgTemplate = modifyTemplate(fakeHistograms['JGFake'], jgScaleVar) target.Add(jgTemplate, -1.) jgTemplate.Delete() vgTemplate = modifyTemplate(vgHistogram, effScaleVar, 'vg') # for iX in range(self.tempDef.nx): # targetContents[iX] = target.GetBinContent(iX + 1) # targetErrors[iX] = target.GetBinError(iX + 1) # vgContents[iX] = vgTemplate.GetBinContent(iX + 1) # vgErrors[iX] = vgTemplate.GetBinError(iX + 1) fitter.setTarget(target) fitter.addTemplate(vgTemplate, 'vg', vgScale) fitter.addTemplate(qcdHistogram.hWeighted, 'qcd', qcdScale) if fitter.fit(-1) != 0: continue vgScales.append(vgScale.getVal()) qcdScales.append(qcdScale.getVal()) # vg[0] = vgScale.getVal() # qcd[0] = qcdScale.getVal() # tree.Fill() vgTemplate.Delete() vgTemplate = modifyTemplate(vgHistogram, 0., 'vg') fitter.setTarget(target) fitter.addTemplate(vgTemplate, 'vg', vgScale) fitter.addTemplate(qcdHistogram.hWeighted, 'qcd', qcdScale) if fitter.fit(-1) != 0: continue vgScalesNoEff.append(vgScale.getVal()) qcdScalesNoEff.append(qcdScale.getVal()) target.Delete() vgTemplate.Delete() sys.stdout.write('\n') directory.cd() # tree.Write() for scales, scalesNoEff, central, centralErr, name in [(vgScales, vgScalesNoEff, vgCentral, vgCentralErr, 'VGamma'), (qcdScales, qcdScalesNoEff, qcdCentral, qcdCentralErr, 'QCD')]: scales.sort() err = scales[int(len(scales) * 0.84)] - central errHigh = math.sqrt(err * err + centralErr * centralErr) err = central - scales[int(len(scales) * 0.16)] errLow = math.sqrt(err * err + centralErr * centralErr) graph = ROOT.TGraphAsymmErrors(1) graph.SetPoint(0, 0., central) graph.SetPointEYhigh(0, errHigh) graph.SetPointEYlow(0, errLow) directory.cd() graph.Write(name) scalesNoEff.sort() err = scalesNoEff[int(len(scalesNoEff) * 0.84)] - central errHigh = math.sqrt(err * err + centralErr * centralErr) err = central - scalesNoEff[int(len(scalesNoEff) * 0.16)] errLow = math.sqrt(err * err + centralErr * centralErr) graph = ROOT.TGraphAsymmErrors(1) graph.SetPoint(0, 0., central) graph.SetPointEYhigh(0, errHigh) graph.SetPointEYlow(0, errLow) directory.cd() graph.Write(name + 'NoEff') if name == 'VGamma': vgErrHigh = errHigh vgErrLow = errLow ### SET SCALES AND ERRORS for sample in groups['VGamma'].getSamples(candClass): count = sample.counter.GetBinContent(1) scaleError = max(vgErrHigh, vgErrLow) scaleHigh = vgCentral + vgErrHigh scaleLow = vgCentral - vgErrLow for histogram in sample.histograms.values(): histogram.hWeighted.Scale(vgCentral) histogram.hScaleUp.Scale(scaleHigh) histogram.hScaleDown.Scale(scaleLow) sample.counter.SetBinContent(1, count * vgCentral) sample.counter.SetBinContent(2, count * scaleError) for sample in groups['JLFake'].samples: for histogram in sample.histograms.values(): histogram.hWeighted.Scale(qcdCentral) histogram.hScaleUp.Scale(qcdCentral) histogram.hScaleDown.Scale(qcdCentral) cont = sample.counter.GetBinContent(1) sample.counter.SetBinContent(1, cont * qcdCentral) for gname in fakeGroups: for sample in groups[gname].samples: for histogram in sample.histograms.values(): name = histogram.hScaleUp.GetName() histogram.hScaleUp.Delete() histogram.hScaleUp = histogram.hWeighted.Clone(name) name = histogram.hScaleDown.GetName() histogram.hScaleDown.Delete() histogram.hScaleDown = histogram.hWeighted.Clone(name) sample.counter.SetBinContent(2, 0.) for group in self.groups: if group.name != 'EWK' and group.category != Group.SIGNAL: continue for sample in group.samples: count = sample.counter.GetBinContent(1) countError = sample.counter.GetBinContent(2) if sample.eventClass == jlClass: for histogram in sample.histograms.values(): histogram.hWeighted.Scale(-qcdCentral) histogram.hScaleUp.Scale(-qcdCentral) histogram.hScaleDown.Scale(-qcdCentral) sample.counter.SetBinContent(1, -count * qcdCentral) sample.counter.SetBinContent(2, -countError * qcdCentral) elif sample.eventClass != candClass: for histogram in sample.histograms.values(): histogram.hWeighted.Scale(-1.) histogram.hScaleUp.Scale(-1.) histogram.hScaleDown.Scale(-1.) sample.counter.SetBinContent(1, -count) sample.counter.SetBinContent(2, -countError)