def Index_Fasta( self ): """ Create/update .fasta.fai file using samtools faidx. """ LE.info('Creating index {0}.fai.'.format(self.outfastapath)) self.cleanUpExecution( *COMPASSCFG['tools']['samtools'].execute(append="faidx {0}".format(self.outfastapath)))
def merge(self): LE.info("Merging SAMfiles from different readgroup mappings") input = pysam.Samfile(self.input) self.seqstat = self.generateSeqStats(self.input) newheaders = dict(input.header.items()) # samheader = pysam.Samfile( # self.tmpdir + "/" + self.readgroups[0] + ".sam") samheader = input newheaders["SQ"] = samheader.header["SQ"] samheader.close() input.close() newheaders["PG"] = [{ "PN": "bwa", "VN": "0.7.10", "CL": self.commandsHistory[0] }] if "CO" in newheaders.keys(): newheaders["CO"] = list(set(newheaders["CO"])) else: newheaders["CO"] = [] newheaders["CO"].append("CMD:{0}".format(" ".join(sys.argv))) for i in self.commandsHistory: newheaders["CO"].append("CMD:{0}".format(i)) LE.debug("Doing merge, writing in " + self.output) unsortedBamName = self.output + "_unsorted.bam" # print unsortedBamName output = pysam.Samfile(unsortedBamName, "wb", header=newheaders) # print "========" # print self.readgroups # print "========" for i in self.readgroups: with pysam.Samfile(i + "_alignment.sam") as source: for j in source: if not j.flag & 2048: output.write(j) # os.unlink(self.tmpdir + "/" + i + ".sam") output.close() outputNames = self.output.split(".") outputPrefix = ".".join(outputNames[0:len(outputNames) - 1]) pysam.sort(unsortedBamName, self.output) os.unlink(unsortedBamName) shutil.move(self.output + ".bam", self.output)
def Fix_Fasta_Headers( self ): """ Create correctly formatted fasta file. Contigs must be in the form REFID[, REFID-2, REFID-3,...]. """ LE.info('Creating master fasta file {0}.'.format(self.outfastapath)) # helper function to reformat each fasta record on the fly def _fixed_records(): """ """ for i, contig in enumerate(SeqIO.parse(self.infasta, 'fasta'), 1): correct_name = contig.id = self.newrefid + \ ('-{0}'.format(i), '')[i == 1] if contig.name != correct_name: contig.name = correct_name contig.description = '{0} {1} {2}'.format( correct_name, self.newrefid, contig.description) yield contig SeqIO.write(_fixed_records(), self.outfastapath, 'fasta') return
parser.add_argument( '-dh', dest="headerinfo", help= "Default ummaped header, you must specify [readgroup,platform,lib,sample,SeqCentre] ex: -dh RG0045,ILLUMINA,LIB03,SN123,Sanger", default=None) parser.add_argument( '-o', dest="output.bam", help="You must specifiy a file with the SAM header in text format", default="-") args = parser.parse_args() # print " ".join(sys.argv) LE.info("Input fastq files in {0} , {1}".format(args.fq1, args.fq2)) LE.info("Output bam in {0}".format(args.output)) if args.headerinfo == "None": args.headerinfo = None if (not args.header and not args.headerinfo) or (args.header and args.headerinfo): print( "You must specify either a header file for the SAM header or default header information (-H/-dh)" ) sys.exit(-1) '''if args.output.bam=="-": output.bam=sys.stdout else: output.bam=open(args.output.bam,"w")'''
calcLDforadjacentsites=options.bcftools_calcLDforadjacentsites, scaledsubstmutrate=options.bcftools_scaledsubstmutrate, indeltosubstratio=options.bcftools_indeltosubstratio, variantifprobltint=options.bcftools_variantifprobltint, typeofprior=options.bcftools_typeofprior, inbam=options.inbam, inref=options.ref_id, pileup_out=options.outpileup) try: c.runPileup() if c.annotate(): print "Error with annotation!" c.dumpStdError() sys.exit(-1) c.merge(options.output) c.clean() LE.info("Finished!") except: c.dumpStdError() c.clean() dump_exc() try: pass c.dumpStdError() c.clean() except: pass dump_exc()
def generateAll( self ): self.Fix_Fasta_Headers() self.Create_Indexes() self.Make_Repeat_Mask_Txt() LE.info("Everything went OKAY!")