Ejemplo n.º 1
0
def Map(inputBAM, inputReference, outputSAM, log, quantseqMapping, endtoendMapping, threads=1, parameter="--no-progress --slam-seq 2" , outputSuffix="_ngm_slamdunk", trim5p=0, maxPolyA=-1, topn=1, sampleId=None, sampleName="NA", sampleType="NA", sampleTime=0, printOnly=False, verbose=True, force=False):

    if(quantseqMapping is True) :
        parameter = "--no-progress"
            
    if(trim5p > 0):
        parameter = parameter + " -5 " + str(trim5p)
    
    if(maxPolyA > -1):
        parameter = parameter + " --max-polya " + str(maxPolyA)
    
    if(endtoendMapping is True):
        parameter = parameter + " -e "
    else:
        parameter = parameter + " -l "

    if(sampleId != None):    
        parameter = parameter + " --rg-id " + str(sampleId)
        if(sampleName != ""):
            parameter = parameter + " --rg-sm " + sampleName + ":" + sampleType + ":" + str(sampleTime)
    
    if(topn > 1):
        parameter = parameter + " -n " + str(topn) + " --strata "
        
    if(checkStep([inputReference, inputBAM], [replaceExtension(outputSAM, ".bam")], force)):
        if outputSAM.endswith(".sam"):
            # Output SAM
            run(getBinary("ngm") + " -r " + inputReference + " -q " + inputBAM + " -t " + str(threads) + " " + parameter + " -o " + outputSAM, log, verbose=verbose, dry=printOnly)
        else:
            # Output BAM directly
            run(getBinary("ngm") + " -b -r " + inputReference + " -q " + inputBAM + " -t " + str(threads) + " " + parameter + " -o " + outputSAM, log, verbose=verbose, dry=printOnly)        
    else:
        print("Skipped mapping for " + inputBAM, file=log)
Ejemplo n.º 2
0
def runSam2bam(inFile, outFile, log, index=True, sort=True, delinFile=False, onlyUnique=False, onlyProperPaired=False, filterMQ=0, L=None, threads=1, verbose=False, dry=False):
    if(delinFile and files_exist(outFile) and not files_exist(inFile)):
        print("Skipping sam2bam for " + outFile, file=log)
    else:
        if(onlyUnique and filterMQ == 0):
            filterMQ = 1;
             
        success = True    
        cmd = [getBinary("samtools"), "view", "-@", str(threads), "-Sb", "-o", outFile, inFile]
        if filterMQ > 0:
            cmd+=["-q", str(filterMQ)]
        if onlyProperPaired:
            cmd+=["-f", "2"]
        if not L is None:
            cmd+=["-L", L]
        run(" ".join(cmd), log, verbose=verbose, dry=dry)
         
        if(sort):         
            tmp = outFile + "_tmp"
            if(not dry):
                os.rename(outFile, tmp)                      
            run(" ".join([getBinary("samtools"), "sort", "-@", str(threads), "-o",  outFile, tmp]), log, verbose=verbose, dry=dry)
            if(success):
                removeFile(tmp)
        if(success and delinFile):
            if(not dry):
                removeFile(inFile)
         
    if(index):
        pysamIndex(outFile)
Ejemplo n.º 3
0
def simulateReads(bed12, bed12Fasta, explv, bedReads, faReads, readLength, readCount, seqError):    
    #output = shell(getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " -b " + rNASeqReadSimulatorPath + "demo/input/sampleposbias.txt --stranded " + bed12 + " > " + bedReads)
    output = shell(getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " --stranded " + bed12 + " 2> /dev/null > " + bedReads)
    if len(output.strip()) > 5:
        print(output)
    output = shell(getBinary("getseqfrombed.py") + " -f -r " + str(seqError) + " -l " + str(readLength) + " " + bedReads + " " + bed12Fasta + " 2> /dev/null > " + faReads)
    if len(output.strip()) > 5:
        print(output)
Ejemplo n.º 4
0
def simulateReads(bed12, bed12Fasta, explv, bedReads, faReads, readLength,
                  readCount, seqError):
    #output = shell(getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " + explv + " -n " + str(readCount) + " -b " + rNASeqReadSimulatorPath + "demo/input/sampleposbias.txt --stranded " + bed12 + " > " + bedReads)
    output = shell(
        getBinary("gensimreads.py") + " -l " + str(readLength) + " -e " +
        explv + " -n " + str(readCount) + " --stranded " + bed12 +
        " 2> /dev/null > " + bedReads)
    if len(output.strip()) > 5:
        print(output)
    output = shell(
        getBinary("getseqfrombed.py") + " -f -r " + str(seqError) + " -l " +
        str(readLength) + " " + bedReads + " " + bed12Fasta +
        " 2> /dev/null > " + faReads)
    if len(output.strip()) > 5:
        print(output)
Ejemplo n.º 5
0
def checkNextGenMapVersion():
    ngmHelp = shellerr(getBinary("ngm"), raiseError = False)
    matchObj = re.match( r'.*([0-9]+\.[0-9]+\.[0-9]+).*', ngmHelp, re.M|re.I)
    if matchObj:
        version = matchObj.group(1) 
        if version != __ngm_version__:
            raise RuntimeError('NextGenMap version expected: ' + __ngm_version__ + " but found " + version + ". Please reinstall slamdunk package.")
    else:
        raise RuntimeError('Could not get NextGenMap version. Please reinstall slamdunk package.')
Ejemplo n.º 6
0
def SNPs(inputBAM, outputSNP, referenceFile, minVarFreq, minCov, minQual, log, printOnly=False, verbose=True, force=False):
    if(checkStep([inputBAM, referenceFile], [outputSNP], force)):
        fileSNP = open(outputSNP, 'w')
        
        mpileupCmd = getBinary("samtools") + " mpileup -B -A -f " + referenceFile + " " + inputBAM
        if(verbose):
            print(mpileupCmd, file=log)
        if(not printOnly):
            mpileup = subprocess.Popen(mpileupCmd, shell=True, stdout=subprocess.PIPE, stderr=log)
            
        varscanCmd = "java -jar " + getBinary("VarScan.v2.4.1.jar") + " mpileup2snp  --strand-filter 0 --output-vcf --min-var-freq " + str(minVarFreq) + " --min-coverage " + str(minCov) + " --variants 1"
        if(verbose):
            print(varscanCmd, file=log)
        if(not printOnly):
            varscan = subprocess.Popen(varscanCmd, shell=True, stdin=mpileup.stdout, stdout=fileSNP, stderr=log)
            varscan.wait()
        
        fileSNP.close()
    else:
        print("Skipping SNP calling", file=log)    
Ejemplo n.º 7
0
def SNPs(inputBAM, outputSNP, referenceFile, minVarFreq, minCov, minQual, log, printOnly=False, verbose=True, force=False):
    if(checkStep([inputBAM, referenceFile], [outputSNP], force)):
        fileSNP = open(outputSNP, 'w')
        
        mpileupCmd = getBinary("samtools") + " mpileup -B -A -f " + referenceFile + " " + inputBAM
        if(verbose):
            print(mpileupCmd, file=log)
        if(not printOnly):
            mpileup = subprocess.Popen(mpileupCmd, shell=True, stdout=subprocess.PIPE, stderr=log)
            
        varscanCmd = "java -jar " + getBinary("VarScan.v2.4.1.jar") + " mpileup2snp  --strand-filter 0 --output-vcf --min-var-freq " + str(minVarFreq) + " --min-coverage " + str(minCov) + " --variants 1"
        if(verbose):
            print(varscanCmd, file=log)
        if(not printOnly):
            varscan = subprocess.Popen(varscanCmd, shell=True, stdin=mpileup.stdout, stdout=fileSNP, stderr=log)
            varscan.wait()
        
        fileSNP.close()
    else:
        print("Skipping SNP calling", file=log)    
Ejemplo n.º 8
0
def prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength, polyALength, explv, snpRate, vcfFile):
    
    # Read utrs from BED file
    utrs = parseUtrBedFile(bed)
    
    vcf = open(vcfFile, "w")
    print("##fileformat=VCFv4.1", file=vcf)
    print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", file=vcf)
    
    bedFile = BedTool(bed)
    
    bedFasta = bedFile.sequence(fi=referenceFasta, s=True, name=True)
      
    bed12FastaFile = open(bed12Fasta, "w")
    utrName = None
    for line in bedFasta.print_sequence().splitlines():
        if(line[0] == ">"):
            print(line, file=bed12FastaFile)
            utrName = line[1:] 
        else:
            print(simulateUTR(line, utrs[utrName], polyALength, snpRate, vcf), file=bed12FastaFile)
    bed12FastaFile.close()
    vcf.close()
    
    bed12File = open(bed12, "w")
    
    totalLength = 0
    
    minFragmentLength = 150
    maxFragmentLength = 450
    for utr in BedIterator(bed):
        
        fragmentLength = random.randrange(minFragmentLength, maxFragmentLength, 1) #+ readLength
        fragmentLength = min(fragmentLength, utr.getLength())
        
        start = max(0, utr.getLength() - fragmentLength)
        end = utr.getLength() #- readLength

        totalLength += (end - start)
#         min(utr.getLength() + readLength / 4, fragmentLength + readLength / 4)
        print(utr.name, start, end, utr.name, utr.score, "+", start, end, "255,0,0", "1", (end - start), 0, sep="\t", file=bed12File)
        
    bed12File.close()    
    
    output = shell(getBinary("genexplvprofile.py") + " --geometric 1 " + bed12 + " 2> /dev/null > " + explv)
    if len(output.strip()) > 5:
        print(output)
        
    return totalLength
Ejemplo n.º 9
0
def bamSort(outputBAM, log, newHeader, verbose):
    
    tmp = outputBAM + "_tmp"
    if(newHeader != None):
        pyOutputBAM = pysam.AlignmentFile(outputBAM, "rb")    
        pyTmp = pysam.AlignmentFile(tmp, "wb", header=newHeader)
        for read in pyOutputBAM:
            pyTmp.write(read)
        pyOutputBAM.close()
        pyTmp.close()
    else:
        os.rename(outputBAM, tmp)
                              
    #run(" ".join(["samtools", "sort", "-@", str(threads) , tmp, replaceExtension(outFile, "")]), log, verbose=verbose, dry=dry)
    run(" ".join([getBinary("samtools"), "sort", "-o", outputBAM, tmp]), log, verbose=verbose, dry=False)
    #pysam.sort(tmp, outputBAM)  # @UndefinedVariable
    removeFile(tmp)
Ejemplo n.º 10
0
def bamSort(outputBAM, log, newHeader, verbose):
    
    tmp = outputBAM + "_tmp"
    if(newHeader != None):
        pyOutputBAM = pysam.AlignmentFile(outputBAM, "rb")    
        pyTmp = pysam.AlignmentFile(tmp, "wb", header=newHeader)
        for read in pyOutputBAM:
            pyTmp.write(read)
        pyOutputBAM.close()
        pyTmp.close()
    else:
        os.rename(outputBAM, tmp)
                              
    #run(" ".join(["samtools", "sort", "-@", str(threads) , tmp, replaceExtension(outFile, "")]), log, verbose=verbose, dry=dry)
    run(" ".join([getBinary("samtools"), "sort", "-o", outputBAM, tmp]), log, verbose=verbose, dry=False)
    #pysam.sort(tmp, outputBAM)  # @UndefinedVariable
    removeFile(tmp)
Ejemplo n.º 11
0
def prepareUTRs(bed, bed12, bed12Fasta, referenceFasta, readLength,
                polyALength, explv, snpRate, vcfFile):

    # Read utrs from BED file
    utrs = parseUtrBedFile(bed)

    vcf = open(vcfFile, "w")
    print("##fileformat=VCFv4.1", file=vcf)
    print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", file=vcf)

    bedFile = BedTool(bed)

    bedFasta = bedFile.sequence(fi=referenceFasta, s=True, name=True)

    bed12FastaFile = open(bed12Fasta, "w")
    utrName = None
    for line in bedFasta.print_sequence().splitlines():
        if (line[0] == ">"):
            print(line, file=bed12FastaFile)
            utrName = line[1:]
        else:
            print(simulateUTR(line, utrs[utrName], polyALength, snpRate, vcf),
                  file=bed12FastaFile)
    bed12FastaFile.close()
    vcf.close()

    bed12File = open(bed12, "w")

    totalLength = 0

    minFragmentLength = 150
    maxFragmentLength = 450
    for utr in BedIterator(bed):

        fragmentLength = random.randrange(minFragmentLength, maxFragmentLength,
                                          1)  #+ readLength
        fragmentLength = min(fragmentLength, utr.getLength())

        start = max(0, utr.getLength() - fragmentLength)
        end = utr.getLength()  #- readLength

        totalLength += (end - start)
        #         min(utr.getLength() + readLength / 4, fragmentLength + readLength / 4)
        print(utr.name,
              start,
              end,
              utr.name,
              utr.score,
              "+",
              start,
              end,
              "255,0,0",
              "1", (end - start),
              0,
              sep="\t",
              file=bed12File)

    bed12File.close()

    output = shell(
        getBinary("genexplvprofile.py") + " --geometric 1 " + bed12 +
        " 2> /dev/null > " + explv)
    if len(output.strip()) > 5:
        print(output)

    return totalLength