def Map(inputBAM, inputReference, outputSAM, log, quantseqMapping, endtoendMapping, threads=1, parameter="--no-progress --slam-seq 2" , outputSuffix="_ngm_slamdunk", trim5p=0, maxPolyA=-1, topn=1, sampleId=None, sampleName="NA", sampleType="NA", sampleTime=0, printOnly=False, verbose=True, force=False): if(quantseqMapping is True) : parameter = "--no-progress" if(trim5p > 0): parameter = parameter + " -5 " + str(trim5p) if(maxPolyA > -1): parameter = parameter + " --max-polya " + str(maxPolyA) if(endtoendMapping is True): parameter = parameter + " -e " else: parameter = parameter + " -l " if(sampleId != None): parameter = parameter + " --rg-id " + str(sampleId) if(sampleName != ""): parameter = parameter + " --rg-sm " + sampleName + ":" + sampleType + ":" + str(sampleTime) if(topn > 1): parameter = parameter + " -n " + str(topn) + " --strata " if(checkStep([inputReference, inputBAM], [replaceExtension(outputSAM, ".bam")], force)): if outputSAM.endswith(".sam"): # Output SAM run(getBinary("ngm") + " -r " + inputReference + " -q " + inputBAM + " -t " + str(threads) + " " + parameter + " -o " + outputSAM, log, verbose=verbose, dry=printOnly) else: # Output BAM directly run(getBinary("ngm") + " -b -r " + inputReference + " -q " + inputBAM + " -t " + str(threads) + " " + parameter + " -o " + outputSAM, log, verbose=verbose, dry=printOnly) else: print("Skipped mapping for " + inputBAM, file=log)
def runSam2bam(inFile, outFile, log, index=True, sort=True, delinFile=False, onlyUnique=False, onlyProperPaired=False, filterMQ=0, L=None, threads=1, verbose=False, dry=False): if(delinFile and files_exist(outFile) and not files_exist(inFile)): print("Skipping sam2bam for " + outFile, file=log) else: if(onlyUnique and filterMQ == 0): filterMQ = 1; success = True cmd = [getBinary("samtools"), "view", "-@", str(threads), "-Sb", "-o", outFile, inFile] if filterMQ > 0: cmd+=["-q", str(filterMQ)] if onlyProperPaired: cmd+=["-f", "2"] if not L is None: cmd+=["-L", L] run(" ".join(cmd), log, verbose=verbose, dry=dry) if(sort): tmp = outFile + "_tmp" if(not dry): os.rename(outFile, tmp) run(" ".join([getBinary("samtools"), "sort", "-@", str(threads), "-o", outFile, tmp]), log, verbose=verbose, dry=dry) if(success): removeFile(tmp) if(success and delinFile): if(not dry): removeFile(inFile) if(index): pysamIndex(outFile)
def simulateReads(bed12, bed12Fasta, explv, bedReads, faReads, readLength, readCount, seqError): #output = shell(getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " -b " + rNASeqReadSimulatorPath + "demo/input/sampleposbias.txt --stranded " + bed12 + " > " + bedReads) output = shell(getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " --stranded " + bed12 + " 2> /dev/null > " + bedReads) if len(output.strip()) > 5: print(output) output = shell(getBinary("getseqfrombed.py") + " -f -r " + str(seqError) + " -l " + str(readLength) + " " + bedReads + " " + bed12Fasta + " 2> /dev/null > " + faReads) if len(output.strip()) > 5: print(output)
def simulateReads(bed12, bed12Fasta, explv, bedReads, faReads, readLength, readCount, seqError): #output = shell(getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " -b " + rNASeqReadSimulatorPath + "demo/input/sampleposbias.txt --stranded " + bed12 + " > " + bedReads) output = shell( getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " --stranded " + bed12 + " 2> /dev/null > " + bedReads) if len(output.strip()) > 5: print(output) output = shell( getBinary("getseqfrombed.py") + " -f -r " + str(seqError) + " -l " + str(readLength) + " " + bedReads + " " + bed12Fasta + " 2> /dev/null > " + faReads) if len(output.strip()) > 5: print(output)
def checkNextGenMapVersion(): ngmHelp = shellerr(getBinary("ngm"), raiseError = False) matchObj = re.match( r'.*([0-9]+\.[0-9]+\.[0-9]+).*', ngmHelp, re.M|re.I) if matchObj: version = matchObj.group(1) if version != __ngm_version__: raise RuntimeError('NextGenMap version expected: ' + __ngm_version__ + " but found " + version + ". Please reinstall slamdunk package.") else: raise RuntimeError('Could not get NextGenMap version. Please reinstall slamdunk package.')
def SNPs(inputBAM, outputSNP, referenceFile, minVarFreq, minCov, minQual, log, printOnly=False, verbose=True, force=False): if(checkStep([inputBAM, referenceFile], [outputSNP], force)): fileSNP = open(outputSNP, 'w') mpileupCmd = getBinary("samtools") + " mpileup -B -A -f " + referenceFile + " " + inputBAM if(verbose): print(mpileupCmd, file=log) if(not printOnly): mpileup = subprocess.Popen(mpileupCmd, shell=True, stdout=subprocess.PIPE, stderr=log) varscanCmd = "java -jar " + getBinary("VarScan.v2.4.1.jar") + " mpileup2snp --strand-filter 0 --output-vcf --min-var-freq " + str(minVarFreq) + " --min-coverage " + str(minCov) + " --variants 1" if(verbose): print(varscanCmd, file=log) if(not printOnly): varscan = subprocess.Popen(varscanCmd, shell=True, stdin=mpileup.stdout, stdout=fileSNP, stderr=log) varscan.wait() fileSNP.close() else: print("Skipping SNP calling", file=log)
def SNPs(inputBAM, outputSNP, referenceFile, minVarFreq, minCov, minQual, log, printOnly=False, verbose=True, force=False): if(checkStep([inputBAM, referenceFile], [outputSNP], force)): fileSNP = open(outputSNP, 'w') mpileupCmd = getBinary("samtools") + " mpileup -B -A -f " + referenceFile + " " + inputBAM if(verbose): print(mpileupCmd, file=log) if(not printOnly): mpileup = subprocess.Popen(mpileupCmd, shell=True, stdout=subprocess.PIPE, stderr=log) varscanCmd = "java -jar " + getBinary("VarScan.v2.4.1.jar") + " mpileup2snp --strand-filter 0 --output-vcf --min-var-freq " + str(minVarFreq) + " --min-coverage " + str(minCov) + " --variants 1" if(verbose): print(varscanCmd, file=log) if(not printOnly): varscan = subprocess.Popen(varscanCmd, shell=True, stdin=mpileup.stdout, stdout=fileSNP, stderr=log) varscan.wait() fileSNP.close() else: print("Skipping SNP calling", file=log)
def prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength, polyALength, explv, snpRate, vcfFile): # Read utrs from BED file utrs = parseUtrBedFile(bed) vcf = open(vcfFile, "w") print("##fileformat=VCFv4.1", file=vcf) print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", file=vcf) bedFile = BedTool(bed) bedFasta = bedFile.sequence(fi=referenceFasta, s=True, name=True) bed12FastaFile = open(bed12Fasta, "w") utrName = None for line in bedFasta.print_sequence().splitlines(): if(line[0] == ">"): print(line, file=bed12FastaFile) utrName = line[1:] else: print(simulateUTR(line, utrs[utrName], polyALength, snpRate, vcf), file=bed12FastaFile) bed12FastaFile.close() vcf.close() bed12File = open(bed12, "w") totalLength = 0 minFragmentLength = 150 maxFragmentLength = 450 for utr in BedIterator(bed): fragmentLength = random.randrange(minFragmentLength, maxFragmentLength, 1) #+ readLength fragmentLength = min(fragmentLength, utr.getLength()) start = max(0, utr.getLength() - fragmentLength) end = utr.getLength() #- readLength totalLength += (end - start) # min(utr.getLength() + readLength / 4, fragmentLength + readLength / 4) print(utr.name, start, end, utr.name, utr.score, "+", start, end, "255,0,0", "1", (end - start), 0, sep="\t", file=bed12File) bed12File.close() output = shell(getBinary("genexplvprofile.py") + " --geometric 1 " + bed12 + " 2> /dev/null > " + explv) if len(output.strip()) > 5: print(output) return totalLength
def bamSort(outputBAM, log, newHeader, verbose): tmp = outputBAM + "_tmp" if(newHeader != None): pyOutputBAM = pysam.AlignmentFile(outputBAM, "rb") pyTmp = pysam.AlignmentFile(tmp, "wb", header=newHeader) for read in pyOutputBAM: pyTmp.write(read) pyOutputBAM.close() pyTmp.close() else: os.rename(outputBAM, tmp) #run(" ".join(["samtools", "sort", "-@", str(threads) , tmp, replaceExtension(outFile, "")]), log, verbose=verbose, dry=dry) run(" ".join([getBinary("samtools"), "sort", "-o", outputBAM, tmp]), log, verbose=verbose, dry=False) #pysam.sort(tmp, outputBAM) # @UndefinedVariable removeFile(tmp)
def bamSort(outputBAM, log, newHeader, verbose): tmp = outputBAM + "_tmp" if(newHeader != None): pyOutputBAM = pysam.AlignmentFile(outputBAM, "rb") pyTmp = pysam.AlignmentFile(tmp, "wb", header=newHeader) for read in pyOutputBAM: pyTmp.write(read) pyOutputBAM.close() pyTmp.close() else: os.rename(outputBAM, tmp) #run(" ".join(["samtools", "sort", "-@", str(threads) , tmp, replaceExtension(outFile, "")]), log, verbose=verbose, dry=dry) run(" ".join([getBinary("samtools"), "sort", "-o", outputBAM, tmp]), log, verbose=verbose, dry=False) #pysam.sort(tmp, outputBAM) # @UndefinedVariable removeFile(tmp)
def prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength, polyALength, explv, snpRate, vcfFile): # Read utrs from BED file utrs = parseUtrBedFile(bed) vcf = open(vcfFile, "w") print("##fileformat=VCFv4.1", file=vcf) print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", file=vcf) bedFile = BedTool(bed) bedFasta = bedFile.sequence(fi=referenceFasta, s=True, name=True) bed12FastaFile = open(bed12Fasta, "w") utrName = None for line in bedFasta.print_sequence().splitlines(): if (line[0] == ">"): print(line, file=bed12FastaFile) utrName = line[1:] else: print(simulateUTR(line, utrs[utrName], polyALength, snpRate, vcf), file=bed12FastaFile) bed12FastaFile.close() vcf.close() bed12File = open(bed12, "w") totalLength = 0 minFragmentLength = 150 maxFragmentLength = 450 for utr in BedIterator(bed): fragmentLength = random.randrange(minFragmentLength, maxFragmentLength, 1) #+ readLength fragmentLength = min(fragmentLength, utr.getLength()) start = max(0, utr.getLength() - fragmentLength) end = utr.getLength() #- readLength totalLength += (end - start) # min(utr.getLength() + readLength / 4, fragmentLength + readLength / 4) print(utr.name, start, end, utr.name, utr.score, "+", start, end, "255,0,0", "1", (end - start), 0, sep="\t", file=bed12File) bed12File.close() output = shell( getBinary("genexplvprofile.py") + " --geometric 1 " + bed12 + " 2> /dev/null > " + explv) if len(output.strip()) > 5: print(output) return totalLength