def concatSeq(genome_file, dir): """ Concatenate separated CDS sequence fasta files located in dir into one file """ util.checkDir(dir) if os.path.exists(genome_file): os.remove(genome_file) cmd = "cat %s/*.faa > %s" % (dir, genome_file) util.runProcess(cmd) logger.info("concatSeq finished")
def __init__( self, data, results, client=None, use_client=False, xcorr_append="", append="", create_symbolic_links=True ): # raise an Eception if __init__ is not called from a child if self.__class__ == Data: raise NotImplementedError("This function has to be called from or " "implemented by the daughter class.") self.data = data self.results = results self.raw = data + "/raw/%s_%d_%03d.mseed" self.getstr = self.x_prep = data + "/xcorr%s" % xcorr_append + "/prep/%s_%d_%03d" self.x_res = xcorr_results = results + "/xcorr%s" % xcorr_append self.xcorr = xcorr_results + "/xcorr/%s_%s%s_%s" # period, correlation, filter, time ->1 # self.x_filter = xcorr_results + '/filter/%s_%s%s_%s' # 1 self.x_stack = xcorr_results + "/stack/%s_%s%s_stack%s" # period, correlation, filter, number of stacks -> 2 self.x_plot = xcorr_results + "/plots/%s_%s%s_%s" # 1 self.x_plot_stack = xcorr_results + "/plots_stack/%s_%s%s_stack%s" # 2 self.x_sac = xcorr_results + "/sac/%s_%s.SAC" # self.x_ev_prep = self.x_ev_getstr = (data + '/xcorr%s' % xcorr_append + # '/prep/%d') # self.x_ev_corr = xcorr_results + '/xcorr/%s%s_%s' # correlation, filter, time ->1 # self.x_stack = xcorr_results + '/stack/%s%s_stack%s' # correlation, filter, number of stacks -> 2 # self.x_plot = xcorr_results + '/plots/%s%s_%s' # 1 # self.x_plot_stack = xcorr_results + '/plots_stack/%s%s_stack%s' # 2 # self.x_day = xcorr_results + '/day/%s_day_%s' # self.x_day_stack = xcorr_results + '/stack/%s_stack_%s' # self.x_plot_day = xcorr_results + '/plots/%s_day_%s' # self.x_plot_day_stack = xcorr_results + '/plots_stack/%s_stack_%s' # self.x_hour = xcorr_results + '/hour/%s_hour_%d_%03d' self.rf_events = data + "/receiver/events/%s_%s" + append # M5.5_events self.rf_results = results + "/receiver/results/%s" + append self.rf_sac = self.rf_results + "/sac/%s_%s.SAC" self.client = client self.use_client = use_client self.stations = self.eventfile = None if create_symbolic_links: util.checkDir(self.x_res + "/bla") try: util.checkDir(self.x_prep) except OSError: import warnings warnings.warn("Error with external HD") else: prepdir = os.path.dirname(self.x_prep) if not os.path.islink(prepdir + "/to_results"): os.symlink(self.x_res, prepdir + "/to_results") if not os.path.islink(self.x_res + "/to_prep"): os.symlink(prepdir, self.x_res + "/to_prep")
def topFastaHits(res_dir, extractedseq_dir): """ Extract top fasta alignment hits that cover at least 80% of the length of both sequences with at least 30% identity. Creates an in-house fasta sequence file for each hit Returns a dictionnary of hits """ # Identity cutoff for reciprocal searches ident_cutoff = 0.3; # Length of hit cutoff for reciprocal searches len_cutoff = 0.8; # Extracted sequence directory util.createDir(extractedseq_dir) # TODO Create MSP crunch file # Top hits dictionnary fastahits_dict = {} # Loop over the fasta results util.checkDir(res_dir) for (path, dirs, files) in os.walk(res_dir): for file in files: if not '.fa' in file: continue res_file = path + "/" + file logger.info("Reading... " + res_file) # Read the fasta alignment results with biopython AlignIO fasta-m10 alignments = AlignIO.parse(open(res_file), "fasta-m10", seq_count=2) for alignment in alignments: # Select the hit based on cutoffs if float(alignment._annotations["sw_ident"]) < ident_cutoff: continue record_query = alignment[0] record_match = alignment[1] overlap = float(alignment._annotations["sw_overlap"]) if overlap/float(record_query.annotations["original_length"]) < len_cutoff and overlap/float(record_match.annotations["original_length"]) < len_cutoff: continue # Create SeqRecord of selected hit extractedseq_record = SeqRecord(seq=Seq(str(record_match.seq).replace('-', '')), id=record_match.id, description=res_file) extractedseq_file = "%s/%s.faa" % (extractedseq_dir, record_match.id) # Print match sequence of selected hit into fasta file output_handle = open(extractedseq_file, "w") SeqIO.write([extractedseq_record], output_handle, "fasta") output_handle.close() logger.info(" ...sequence extracted into %s" % extractedseq_file) record_query_region = "%s-%s" % (record_query._al_start, record_query._al_stop) record_match_region = "%s-%s" % (record_match._al_start, record_match._al_stop) # add hit into dictionnary key = "%s||%s" % (record_query.id, record_match.id) # value in MSP crunch format value = "%s %s %s %s %s %s" % (alignment._annotations["sw_score"], alignment._annotations["sw_ident"], record_query_region, record_query.id, record_match_region, record_match.id) fastahits_dict[key] = value logger.info("Extract fasta alignment hits finished") return fastahits_dict
def writeX(self, stream, *args, **kwargs): """ Write file for xcorr of 1 hour. The parameters are passed to getXHour() :param stream: stream to write :param st1: first station :param st2: second station :param time: UTCDateTime object (year, julday and hour properties) """ filename = self.getX(*args, **kwargs) util.checkDir(filename) stream.write(filename, "Q")
def writeRFEvents(self, stream, station, time): """ Write file with extracted traces around onsets of events. The filename is defined by completing self.rf_events with the arguments station and time.year. :param stream: stream to write :param station: station :param time: UTCDateTime object (only year property is used) """ filename = self.rf_events % (station, time.year) util.checkDir(filename) stream.write(filename, 'Q')
def writeX(self, stream, *args, **kwargs): """ Write file for xcorr of 1 hour. The parameters are passed to getXHour() :param stream: stream to write :param st1: first station :param st2: second station :param time: UTCDateTime object (year, julday and hour properties) """ filename = self.getX(*args, **kwargs) util.checkDir(filename) stream.write(filename, 'Q')
def writeRFEvents(self, stream, station, time): """ Write file with extracted traces around onsets of events. The filename is defined by completing self.rf_events with the arguments station and time.year. :param stream: stream to write :param station: station :param time: UTCDateTime object (only year property is used) """ filename = self.rf_events % (station, time.year) util.checkDir(filename) stream.write(filename, "Q")
def translateSeq(dir): """ Translate nucleic acid sequence in fasta format into protein sequence using EMBOSS transeq Usage: transeq Online documentation: http://emboss.open-bio.org/wiki/Appdoc:Transeq Standard (Mandatory) qualifiers: [-sequence] seqall Nucleotide sequence(s) filename and optional format, or reference (input USA) [-outseq] seqoutall [.] Protein sequence set(s) filename and optional format (output USA) Additional (Optional) qualifiers: -table menu [0] Code to use (Values: 0 (Standard); 1 (Standard (with alternative initiation codons)); 2 (Vertebrate Mitochondrial); 3 (Yeast Mitochondrial); 4 (Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma); 5 (Invertebrate Mitochondrial); 6 (Ciliate Macronuclear and Dasycladacean); 9 (Echinoderm Mitochondrial); 10 (Euplotid Nuclear); 11 (Bacterial); 12 (Alternative Yeast Nuclear); 13 (Ascidian Mitochondrial); 14 (Flatworm Mitochondrial); 15 (Blepharisma Macronuclear); 16 (Chlorophycean Mitochondrial); 21 (Trematode Mitochondrial); 22 (Scenedesmus obliquus); 23 (Thraustochytrium Mitochondrial)) The basic USA syntax is one of: "file" "file:entry" "format::file" "format::file:entry" "database:entry" "database" "@file" """ util.checkDir(dir) for file in os.listdir(dir): if '.ffn' in file: infasta = file outpep = file.split(".")[0] + ".faa" cmd = "transeq -sequence fasta::%s/%s -outseq fasta::%s/%s -table 11" % (dir, infasta, dir, outpep) util.runProcess(cmd) logger.info("Sequences translated.")
def runReciprocalFasta(seq_dir, genome_file, fasta_dir): """ Run FASTA between extracted in-house protein sequences against new genome FASTA searches a protein or DNA sequence data bank version 35.04 Aug. 25, 2009 W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448 """ util.createDir(fasta_dir) # Check new genome util.checkFile(genome_file) # Check ref genome extracted sequences util.checkDir(seq_dir) res_dir = fasta_dir if IS_LSF: # Rename new genome sequences for job array to be refgenome_1.faa refgenome_2.faa ... seq_num = 0 for seq_file in os.listdir(seq_dir): if not '.faa' in seq_file: continue seq_num += 1 if 'refgenome_' in seq_file and '.faa' in seq_file: continue seq_newfilepath = "%s/refgenome_%s.faa" % (seq_dir, seq_num) seq_filepath = "%s/%s" % (seq_dir, seq_file) os.rename(seq_filepath, seq_newfilepath) # Submit bsub job array on refgenome_${LSB_JOBINDEX}.faa against mygenome bsub_dir = "bsub" util.checkDir(bsub_dir) cmd = "fasta35 -z 1 -Q -H -S -m 10 %s/refgenome_${LSB_JOBINDEX}.faa %s > %s/refgenome_${LSB_JOBINDEX}.fa" % (seq_dir, genome_file, res_dir) util.submitJobArray(jobname="genepy-recipfasta", jobnum=seq_num, jobdir=bsub_dir, cmd=cmd) util.submitJobDependency('genepy-recipfasta') logger.info("Reciprocal Fasta on LSF finished") else: # List of inhouse extracted genome sequences for seq_file in os.listdir(seq_dir): if not '.faa' in seq_file: continue res_file = seq_file.split(".")[0] + ".fa" cmd = "fasta35 -z 1 -Q -H -S -m 10 %s/%s %s > %s/%s" % (seq_dir, seq_file, genome_file, res_dir, res_file) util.runProcess(cmd) logger.info(seq_file) logger.info("Reciprocal Fasta finished")
def topReciprocalFastaHits(res_dir): """ Extract top hits that cover at least 80% of the length of both sequences with at least 30% identity. Returns a dictionary of hits """ # Identity cutoff for reciprocal searches ident_cutoff = 0.3; # Length of hit cutoff for reciprocal searches len_cutoff = 0.8; # TODO Create MSP crunch file # Top hits dictionnary fastahits_dict = {} # Loop over the fasta results util.checkDir(res_dir) for (path, dirs, files) in os.walk(res_dir): for file in files: if not '.fa' in file: continue res_file = path + "/" + file logger.info("Reading... " + res_file) # Read the fasta alignment results with biopython AlignIO fasta-m10 alignments = AlignIO.parse(open(res_file), "fasta-m10", seq_count=2) for alignment in alignments: # Select the hit based on cutoffs if float(alignment._annotations["sw_ident"]) < ident_cutoff: continue record_query = alignment[0] record_match = alignment[1] overlap = float(alignment._annotations["sw_overlap"]) if overlap/float(record_query.annotations["original_length"]) < len_cutoff and overlap/float(record_match.annotations["original_length"]) < len_cutoff: continue record_query_region = "%s-%s" % (record_query._al_start, record_query._al_stop) record_match_region = "%s-%s" % (record_match._al_start, record_match._al_stop) # add hit into dictionnary key = "%s||%s" % (record_match.id, record_query.id) # inverted key to be comparable with fasta hits value = "%s %s %s %s %s %s" % (alignment._annotations["sw_score"], alignment._annotations["sw_ident"], record_query_region, record_query.id, record_match_region, record_match.id) fastahits_dict[key] = value logger.info("Extract reciprocal fasta alignment hits finished") return fastahits_dict
def runHamapScan(seq_dir, hamap_dir): """ HAMAP: High-quality Automated and Manual Annotation of microbial Proteomes ftp download site: ftp://ftp.expasy.org/databases/hamap/ pfscan compares a protein or nucleic acid sequence against a profile library. The result is an unsorted list of profile-sequence matches. download site: http://www.isrec.isb-sib.ch/ftp-server/pftools/pft2.3/ """ util.createDir(hamap_dir) util.checkDir(seq_dir) hamap_profile_file = "%s/hamap/hamap.prf" % os.path.dirname(__file__) if IS_LSF: # Rename new genome sequences for job array to be mygenome_1.faa mygenome_2.faa ... seq_num = 0 for seq_file in os.listdir(seq_dir): if not '.faa' in seq_file: continue seq_num += 1 if 'mygenome_' in seq_file and '.faa' in seq_file: continue seq_newfilepath = "%s/mygenome_%s.faa" % (seq_dir, seq_num) seq_filepath = "%s/%s" % (seq_dir, seq_file) os.rename(seq_filepath, seq_newfilepath) # Submit bsub job array on mygenome_${LSB_JOBINDEX}.faa against hamap profile bsub_dir = "bsub" util.checkDir(bsub_dir) cmd = "pfscan -klf %s/mygenome_${LSB_JOBINDEX}.faa %s > %s/mygenome_${LSB_JOBINDEX}.out" % (seq_dir, hamap_profile_file, hamap_dir) util.submitJobArray(jobname='genepy-hamap', jobnum=seq_num, jobdir=bsub_dir, cmd=cmd) util.submitJobDependency('genepy-hamap') logger.info("HAMAP scan on LSF finished") else: # List of new genome sequences for seq_file in os.listdir(seq_dir): if not '.faa' in seq_file: continue res_file = seq_file.split(".")[0] + ".out" cmd = "pfscan -klf %s/%s %s > %s/%s" % (seq_dir, seq_file, hamap_profile_file, hamap_dir, res_file) util.runProcess(cmd) logger.info("HAMAP scan finished")
def runFasta(seq_dir, genomes_dir, fasta_dir): """ Run FASTA on protein sequences between new genome against all in house genomes FASTA searches a protein or DNA sequence data bank version 35.04 Aug. 25, 2009 W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448 """ util.createDir(fasta_dir) # List of in-house genomes util.checkDir(genomes_dir) genome_files = [] logger.info("Create fasta results directory for each in-house reference genome") for genome_file in os.listdir(genomes_dir): if '.faa' in genome_file: genome_files.append(genome_file) # Create fasta results directory for each in-house genome util.createDir("%s/%s" % (fasta_dir, genome_file.split(".")[0])) logger.info(genome_file) util.checkDir(seq_dir) if IS_LSF: # Rename new genome sequences for job array to be mygenome_1.faa mygenome_2.faa ... seq_num = 0 for seq_file in os.listdir(seq_dir): if not '.faa' in seq_file: continue seq_num += 1 if 'mygenome_' in seq_file and '.faa' in seq_file: continue seq_newfilepath = "%s/mygenome_%s.faa" % (seq_dir, seq_num) seq_filepath = "%s/%s" % (seq_dir, seq_file) os.rename(seq_filepath, seq_newfilepath) # Submit bsub job array on mygenome_${LSB_JOBINDEX}.faa against one refgenome at a time bsub_dir = "bsub" util.checkDir(bsub_dir) for genome_file in genome_files: res_dir = "%s/%s" % (fasta_dir, genome_file.split(".")[0]) cmd = "fasta35 -z 1 -Q -H -S -m 10 %s/mygenome_${LSB_JOBINDEX}.faa %s/%s > %s/mygenome_${LSB_JOBINDEX}.fa" % (seq_dir, genomes_dir, genome_file, res_dir) util.submitJobArray(jobname="genepy-fasta", jobnum=seq_num, jobdir=bsub_dir, cmd=cmd) util.submitJobDependency('genepy-fasta') logger.info("Fasta on LSF finished") else: # List of new genome sequences for seq_file in os.listdir(seq_dir): if not '.faa' in seq_file: continue res_file = seq_file.split(".")[0] + ".fa" for genome_file in genome_files: res_dir = "%s/%s" % (fasta_dir, genome_file.split(".")[0]) cmd = "fasta35 -z 1 -Q -H -S -m 10 %s/%s %s/%s > %s/%s" % (seq_dir, seq_file, genomes_dir, genome_file, res_dir, res_file) util.runProcess(cmd) logger.info(seq_file) logger.info("Fasta finished")
def __init__(self, data, results, client=None, use_client=False, xcorr_append='', append='', create_symbolic_links=True): # raise an Eception if __init__ is not called from a child if self.__class__ == Data: raise NotImplementedError('This function has to be called from or ' 'implemented by the daughter class.') self.data = data self.results = results self.raw = data + '/raw/%s_%d_%03d.mseed' self.getstr = self.x_prep = (data + '/xcorr%s' % xcorr_append + '/prep/%s_%d_%03d') self.x_res = xcorr_results = results + '/xcorr%s' % xcorr_append self.xcorr = xcorr_results + '/xcorr/%s_%s%s_%s' # period, correlation, filter, time ->1 #self.x_filter = xcorr_results + '/filter/%s_%s%s_%s' # 1 self.x_stack = xcorr_results + '/stack/%s_%s%s_stack%s' # period, correlation, filter, number of stacks -> 2 self.x_plot = xcorr_results + '/plots/%s_%s%s_%s' # 1 self.x_plot_stack = xcorr_results + '/plots_stack/%s_%s%s_stack%s' # 2 self.x_sac = xcorr_results + '/sac/%s_%s.SAC' #self.x_ev_prep = self.x_ev_getstr = (data + '/xcorr%s' % xcorr_append + # '/prep/%d') #self.x_ev_corr = xcorr_results + '/xcorr/%s%s_%s' # correlation, filter, time ->1 #self.x_stack = xcorr_results + '/stack/%s%s_stack%s' # correlation, filter, number of stacks -> 2 #self.x_plot = xcorr_results + '/plots/%s%s_%s' # 1 #self.x_plot_stack = xcorr_results + '/plots_stack/%s%s_stack%s' # 2 # self.x_day = xcorr_results + '/day/%s_day_%s' # self.x_day_stack = xcorr_results + '/stack/%s_stack_%s' # self.x_plot_day = xcorr_results + '/plots/%s_day_%s' # self.x_plot_day_stack = xcorr_results + '/plots_stack/%s_stack_%s' # self.x_hour = xcorr_results + '/hour/%s_hour_%d_%03d' self.rf_events = data + '/receiver/events/%s_%s' + append # M5.5_events self.rf_results = results + '/receiver/results/%s' + append self.rf_sac = self.rf_results + '/sac/%s_%s.SAC' self.client = client self.use_client = use_client self.stations = self.eventfile = None if create_symbolic_links: util.checkDir(self.x_res + '/bla') try: util.checkDir(self.x_prep) except OSError: import warnings warnings.warn('Error with external HD') else: prepdir = os.path.dirname(self.x_prep) if not os.path.islink(prepdir + '/to_results'): os.symlink(self.x_res, prepdir + '/to_results') if not os.path.islink(self.x_res + '/to_prep'): os.symlink(prepdir, self.x_res + '/to_prep')
def writeXEv(self, stream, *args, **kwargs): filename = self.getXEv(*args, **kwargs) util.checkDir(filename) stream.write(filename, 'Q')
def main(): # Fasta file extension: # .ffn for the untranslated nucleotide sequences for each CDS; .faa for protein coding sequences (CDS) # .fa for the fasta alignment results # .fna for whole genomic DNA sequences; .frn for nucleotide sequences of RNA related features usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-d", "--dna", metavar="FILE", help="input dna FILE in fasta format", action="store", type="string", dest="dna") parser.add_option("-t", "--tab", metavar="FILE", help="input tab FILE in embl format", action="store", type="string", dest="tab") parser.add_option("-e", "--embl", metavar="FILE", help="input embl FILE with CDS features in embl format", action="store", type="string", dest="embl") parser.add_option("--genedb", help="extract reference genome protein sequences from geneDB", action="store_true", dest="db") parser.add_option("--fasta", help="run fasta against each extracted in-house genomes", action="store_true", dest="fasta") parser.add_option("--hamap", help="run pfscan against HAMAP profiles", action="store_true", dest="hamap") parser.add_option("--clean", help="delete all results without deleting reference genomes", action="store_true", dest="clean") parser.add_option("--deepclean", help="delete all reference genomes and results", action="store_true", dest="deepclean") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Print command line cmdline = "$ python " for argv in sys.argv: cmdline += argv + " " logger.debug(cmdline) # >>> --------------------------------------------------------------------- # >>> DATA PREPARATION # >>> --------------------------------------------------------------------- # List of needed software for softname in soft_lists: util.checkSoft(softname) # Prepare new genome data if options.dna and options.tab and not options.embl: util.checkFile(options.dna) mygenome_emblfile = fasta2embl(options.dna) mygenome_emblfile_withcds = concatFeatures(mygenome_emblfile, options.tab) splitSeq(mygenome_dir, mygenome_emblfile_withcds, "CDS") translateSeq(mygenome_dir) elif not options.dna and not options.tab and options.embl: mygenome_emblfile_withcds = options.embl splitSeq(mygenome_dir, mygenome_emblfile_withcds, "CDS") #splitSeqWithBiopython(mygenome_emblfile_withcds, "CDS") # does not work with testdata_01 translateSeq(mygenome_dir) elif not options.deepclean: util.checkDir(mygenome_dir) # Extract in house genomes from chado db if options.db: chadoDump(refgenomes_dir) elif not options.deepclean: util.checkDir(refgenomes_dir) # bsub output directory if IS_LSF and not (options.clean or options.deepclean): util.createDir(bsub_dir) # >>> --------------------------------------------------------------------- # >>> ORTHOLOG SEARCH # >>> --------------------------------------------------------------------- # Run fasta & reciprocal fasta if options.fasta: runFasta(mygenome_dir, refgenomes_dir, fasta_dir) fasta_hits = topFastaHits(fasta_dir, refgenomes_extractedseq_dir) concatSeq(mygenome_fastafile_allcds, mygenome_dir) runReciprocalFasta(refgenomes_extractedseq_dir, mygenome_fastafile_allcds, reciprocalfasta_dir) reciprocalfasta_hits = topReciprocalFastaHits(reciprocalfasta_dir) printMSPCrunch(fasta_hits, reciprocalfasta_hits) hits = getHits(fasta_hits, reciprocalfasta_hits) logger.info("ORTHOLOGS") logger.info(hits['ortholog']) logger.info("SIMILARITY") logger.info(hits['similarity']) transferFeatures(hits['ortholog']) # Run hamap scan if options.hamap: runHamapScan(mygenome_dir, hamap_dir) # >>> --------------------------------------------------------------------- # >>> CLEANING OUTPUT DATA # >>> --------------------------------------------------------------------- # Clean results before a re-run if options.clean: # fasta results util.rmDir(fasta_dir) util.rmDir(reciprocalfasta_dir) util.rmDir(refgenomes_extractedseq_dir) util.rmFile(mygenome_fastafile_allcds) # hamap results util.rmDir(hamap_dir) # bsub outputs if IS_LSF: util.rmDir(bsub_dir) # Deep clean - remove all if options.deepclean: util.rmDir(refgenomes_dir) util.rmDir(mygenome_dir) util.rmDir(fasta_dir) util.rmDir(reciprocalfasta_dir) util.rmDir(refgenomes_extractedseq_dir) util.rmFile(mygenome_fastafile_allcds) util.rmDir(hamap_dir)
def PlotCompare(Config): rootfile = rt.TFile(Config.General['input'], 'READ') for cut in Config.CutList: lg.logging('Processing cut %s' % (cut)) for var in Config.Vars[cut]: lg.logging('\tProcessing var %s' % (var)) rt.gROOT.ProcessLine('SetAtlasStyle()') canvas = RC.HtbCompCanvas() canvas.canvas.Draw() canvas.pad1.Draw() canvas.pad2.Draw() canvas.pad1.cd() fHists = util.GetHists(Config, rootfile, cut, var) hStack = rt.THStack('hStack', 'hStack') h_tot = None g_tot = None xtitle = Config.fConfig[cut][var]['xname'] ytitle = Config.fConfig[cut][var]['yname'] h_data = None hasData = False if fHists.DATA: h_data = fHists.DATA hasData = True i_color = 2 for _name, hist in fHists.STACK.items(): if i_color == 5 or i_color == 8: i_color += 1 if i_color == 10: i_color = 41 hist.SetFillColor(i_color) hist.SetLineWidth(0) hist.SetLineColor(rt.kBlack) hStack.Add(hist) if h_tot is None: h_tot = hist.Clone('allmc') else: h_tot.Add(hist) i_color += 1 b_ShowYields = Config.General['ShowYields'] b_logy = False if 'logy' in Config.fConfig[cut][var]: b_logy = Config.fConfig[cut][var]['logy'] rt.gStyle.SetEndErrorSize(4.0) h_dummy = h_tot.Clone('h_dummy') h_dummy.Scale(0) h_dummy.Draw('HIST') hStack.Draw('same HIST') g_tot = rt.TGraphAsymmErrors(h_tot) g_tot.SetFillStyle(3354) g_tot.SetFillColor(rt.kBlack) g_tot.SetLineColor(rt.kWhite) g_tot.SetLineWidth(0) g_tot.SetMarkerSize(0) g_tot.Draw('same E2') g_data = None if hasData: h_data.SetMarkerStyle(20) h_data.SetLineColor(rt.kBlack) h_data.SetLineWidth(2) h_data.SetMarkerSize(1.4) g_data = rt.TGraphAsymmErrors(h_data) g_data.SetMarkerSize(h_data.GetMarkerSize()) g_data.SetMarkerColor(h_data.GetMarkerColor()) g_data.SetMarkerStyle(h_data.GetMarkerStyle()) g_data.SetLineWidth(h_data.GetLineWidth()) else: h_data = h_tot.Clone('dummyData') h_data.SetTitle('Asimov Data') g_data = rt.TGraphAsymmErrors(h_data) if fHists.SINGLE: i_color = 2 for _name, hist in fHists.SINGLE.items(): if i_color == 5: i_color += 1 hist.SetLineColor(rt.TColor.GetColorBright(i_color)) hist.SetLineStyle(2) hist.SetLineWidth(3) ntotal = h_tot.Integral() nhist = hist.Integral() if nhist != 0: hist.Scale(ntotal / nhist) hist.Draw('same HIST') i_color += 1 if hasData: g_data.Draw('same Ep1') h_dummy.GetXaxis().SetTitle(xtitle) h_dummy.GetYaxis().SetTitle(ytitle) # h_dummy.GetYaxis().SetTitleOffset(2.3) if b_logy: h_dummy.SetMinimum(0.1) else: h_dummy.SetMinimum(0) if hasData: ymax = rt.TMath.Max( h_tot.GetMaximum(), h_data.GetMaximum() ) if fHists.SINGLE: for _name, hist in fHists.SINGLE.items(): if hist.Integral() != 0: hist_max = hist.GetMaximum() * h_tot.Integral() / hist.Integral() if ymax < hist_max: ymax = hist_max if b_logy: h_dummy.SetMaximum(800 * ymax) canvas.pad1.SetLogy(True) else: h_dummy.SetMaximum(1.5 * ymax) else: ymax = h_tot.GetMaximum() if not fHists.SINGLE == {}: for _name, hist in fHists.SINGLE.items(): hist_max = hist.GetMaximum() * h_tot.Integral() / hist.Integral() if ymax < hist_max: ymax = hist_max if b_logy: h_dummy.SetMaximum(500 * ymax) canvas.pad1.SetLogy(True) else: h_dummy.SetMaximum(1.5 * ymax) canvas.pad1.RedrawAxis() canvas.pad1.SetTickx() canvas.pad1.SetTicky() legX1 = 1 - 0.41 * (596.0 / canvas.pad1.GetWw()) - 0.08 legX2 = 0.91 legXmid = legX1 + 0.5 * (legX2 - legX1) if b_ShowYields: legXmid = legX1 + 0.6 * (legX2 - legX1) leg = rt.TLegend(legX1, 0.93 - ( len(fHists.STACK) + len(fHists.SINGLE) + 2) * 0.04, legXmid, 0.93) leg1 = rt.TLegend(legXmid, leg.GetY1(), legX2, leg.GetY2()) leg.SetFillStyle(0) leg.SetBorderSize(0) leg.SetTextAlign(32) leg.SetTextFont(rt.gStyle.GetTextFont()) leg.SetTextSize(rt.gStyle.GetTextSize() * 0.6) leg.SetMargin(0.22) leg1.SetFillStyle(0) leg1.SetBorderSize(0) leg1.SetTextAlign(32) leg1.SetTextFont(rt.gStyle.GetTextFont()) leg1.SetTextSize(rt.gStyle.GetTextSize() * 0.6) leg1.SetMargin(0.0) if hasData: leg.AddEntry(h_data, 'DATA', 'lep') leg1.AddEntry( None, str('%.1f' % (h_data.Integral())), '' ) if fHists.SINGLE: for _name, hist in fHists.SINGLE.items(): leg.AddEntry(hist, _name, 'f') leg1.AddEntry( None, str('%.1f' % (hist.Integral())), '' ) if fHists.STACK: for _name, hist in fHists.STACK.items(): leg.AddEntry(hist, _name, 'f') leg1.AddEntry( None, str('%.1f' % (hist.Integral())), '' ) leg.AddEntry(None, 'Total', '') leg1.AddEntry(None, str('%.1f' % (h_tot.Integral())), '') leg.AddEntry(g_tot, 'Uncertainty', 'f') leg1.AddEntry(None, ' ', '') leg.Draw() leg1.Draw() else: leg = rt.TLegend(legX1, 0.93 - ( (len(fHists.STACK) + len(fHists.SINGLE) + 2) / 2 ) * 0.06, legX2, 0.93) leg.SetNColumns(2) leg.SetFillStyle(0) leg.SetBorderSize(0) leg.SetTextAlign(32) leg.SetTextFont(rt.gStyle.GetTextFont()) leg.SetTextSize(rt.gStyle.GetTextSize() * 0.55) leg.SetMargin(0.22) if hasData: leg.AddEntry(h_data, 'DATA', 'lep') if fHists.SINGLE: for _name, hist in fHists.SINGLE.items(): leg.AddEntry(hist, _name, 'f') if fHists.STACK: for _name, hist in fHists.STACK.items(): leg.AddEntry(hist, _name, 'f') leg.AddEntry(g_tot, 'Uncertainty', 'f') leg.Draw() for textObj in Config.Text[cut]: canvas.DrawText(textObj) sqrts = {"text": "#sqrt{s} = 13TeV", "xPos": 0.18, "yPos": 0.82, "size": 0.035, "color": 1} atlas = {"text": "#bf{#it{ATLAS}} Work in Progress", "xPos": 0.16, "yPos": 0.89, "size": 0.06, "color": 1} lumi = {"text": "#intLdt =" + Config.General['lumi'] + " pb^{-1}", "xPos": 0.32, "yPos": 0.82, "size": 0.035, "color": 1} canvas.DrawText(sqrts) canvas.DrawText(atlas) canvas.DrawText(lumi) canvas.pad2.cd() canvas.pad2.GetFrame().SetY1(2) h_dummy2 = h_tot.Clone('h_dummy2') h_dummy2.Scale(0) h_dummy2.Draw('HIST') # h_dummy2.GetYaxis().SetTitleOffset( # 1.0 * h_dummy.GetYaxis().GetTitleOffset() # ) h_ratio = h_data.Clone('h_ratio') h_tot_noerr = h_tot.Clone('h_tot_noerr') for i_bin in range(1, h_tot_noerr.GetNbinsX() + 1): h_tot_noerr.SetBinError(i_bin, 0) g_ratio2 = g_tot.Clone('g_ratio2') for i_bin in range(1, h_tot_noerr.GetNbinsX() + 1): if h_tot_noerr.GetBinContent(i_bin) == 0: continue g_ratio2.SetPoint( i_bin - 1, g_ratio2.GetX()[i_bin - 1], g_ratio2.GetY()[i_bin - 1] / h_tot_noerr.GetBinContent(i_bin) ) g_ratio2.SetPointEXlow( i_bin - 1, g_ratio2.GetEXlow()[i_bin - 1] ) g_ratio2.SetPointEXhigh( i_bin - 1, g_ratio2.GetEXhigh()[i_bin - 1] ) g_ratio2.SetPointEYlow( i_bin - 1, g_ratio2.GetEYlow()[i_bin - 1] / h_tot_noerr.GetBinContent(i_bin) ) g_ratio2.SetPointEYhigh( i_bin - 1, g_ratio2.GetEYhigh()[i_bin - 1] / h_tot_noerr.GetBinContent(i_bin)) h_dummy2.SetTitle('Data/MC') h_dummy2.GetYaxis().CenterTitle() h_dummy2.GetYaxis().SetTitle('Data/Bkg.') # h_dummy2.GetYaxis().SetLabelSize( # 1.0 * h_ratio.GetYaxis().GetLabelSize() # ) h_dummy2.GetYaxis().SetLabelOffset(0.02) h_dummy.GetYaxis().SetLabelOffset(0.02) h_dummy2.GetYaxis().SetNdivisions(504, False) rt.gStyle.SetEndErrorSize(4.0) canvas.pad1.SetTicky() h_ratio.Divide(h_tot_noerr) h_ratio.SetMarkerStyle(20) h_ratio.SetMarkerSize(1.4) h_ratio.SetMarkerColor(rt.kBlack) h_ratio.SetLineWidth(2) g_ratio = rt.TGraphAsymmErrors(h_ratio) g_ratio.SetMarkerStyle(h_ratio.GetMarkerStyle()) g_ratio.SetMarkerSize(h_ratio.GetMarkerSize()) g_ratio.SetMarkerColor(h_ratio.GetMarkerColor()) g_ratio.SetLineWidth(h_ratio.GetLineWidth()) g_ratio.SetLineColor(h_ratio.GetLineColor()) g_ratio.SetLineStyle(h_ratio.GetLineStyle()) hline = rt.TLine( h_dummy2.GetXaxis().GetXmin(), 1, h_dummy2.GetXaxis().GetXmax(), 1 ) hline.SetLineColor(rt.kRed) hline.SetLineWidth(2) hline.SetLineStyle(2) if hasData: g_ratio.Draw('Ep1 same') hline.Draw() h_dummy2.SetMinimum(0.5) h_dummy2.SetMaximum(1.5) h_dummy2.GetXaxis().SetTitle(h_dummy.GetXaxis().GetTitle()) # h_dummy2.GetXaxis().SetTitleOffset(5.0) h_dummy.GetXaxis().SetTitle('') h_dummy.GetXaxis().SetLabelSize(0) labelsize = h_dummy.GetYaxis().GetLabelSize() titlesize = h_dummy.GetYaxis().GetTitleSize() titleoffset = h_dummy.GetYaxis().GetTitleOffset() h_dummy.GetYaxis().SetLabelSize(0.7 * labelsize) h_dummy2.GetYaxis().SetLabelSize(1.5 * labelsize) h_dummy.GetYaxis().SetTitleSize(0.75 * titlesize) h_dummy2.GetYaxis().SetTitleSize(1.7 * titlesize) h_dummy2.GetXaxis().SetTitleSize(2.0 * titlesize) h_dummy2.GetXaxis().SetLabelSize(1.7 * labelsize) h_dummy2.GetYaxis().SetTitleOffset(0.45 * titleoffset) h_dummy.GetYaxis().SetTitleOffset(1.1 * titleoffset) h_dummy2.GetXaxis().SetLabelOffset(0.02) g_ratio2.Draw('same E2') canvas.pad2.RedrawAxis() plotname = var + '.png' outDir = util.checkDir(Config.General['plotdir']) outDir = outDir + cut + '/' util.MakeDir(outDir) canvas.SavePrint(outDir + plotname) lg.logging('\t%s Done' % (var), 'SUCCESS') del canvas lg.logging('%s Done' % (cut), 'SUCCESS') rootfile.Close()
def writeXEv(self, stream, *args, **kwargs): filename = self.getXEv(*args, **kwargs) util.checkDir(filename) stream.write(filename, "Q")