Ejemplo n.º 1
0
def phaseVCF(vcfpath, phasevcfpath):
    beagle_path = params.GetSoftwarePath()[1]
    java_path = params.GetSoftwarePath()[0]

    print(" ___ phasing vcf file ___ ")
    if (not vcfpath.endswith('.vcf.gz')):
        gzipFile(vcfpath)
        vcfpath = vcfpath + '.gz'
    path, vcffn = os.path.split(vcfpath)
    path2, vcffn2 = os.path.split(phasevcfpath)
    phasevcffn = sub('.vcf.gz$', '_phased', vcffn)
    command = " ".join([
        java_path, "-Xmx4g -jar", beagle_path, "gt=" + vcfpath,
        "out=" + "/".join([path2, phasevcffn]), "2> beagle.log"
    ])

    runCommand(command)
    return phasevcffn
Ejemplo n.º 2
0
def thinVCF(invcf, outvcf):
    vcftools_path = params.GetSoftwarePath()[4]
    command = " ".join(
        [vcftools_path, "--vcf", invcf, "--thin 50 --out", outvcf, "--recode"])
    runCommand(command)
Ejemplo n.º 3
0
def removeDupSambamba(bamrepairedfinalsortfn, tmpbams_path=''):
    bamrepairedfinalmarkedfn = sub('.sorted.bam$', ".marked.bam", bamrepairedfinalsortfn)
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join([sambamba_path, "markdup","--remove-duplicates", "--nthreads", str(4), bamrepairedfinalsortfn, bamrepairedfinalmarkedfn])
    runCommand(command)
    return bamrepairedfinalmarkedfn
Ejemplo n.º 4
0
def extractPairedBAMfromROI(inbamfn, bedfn, outbamfn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join([samtools_path, "view -b -f 0x0001 -L", bedfn, inbamfn, ">", outbamfn])
    runCommand(command)
Ejemplo n.º 5
0
def extractPairedReadfromROI(inbamfn, bedfn, outbamfn, flag="either"):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join(
        [bedtools_path, "pairtobed -abam", inbamfn, "-b", bedfn, "-type", flag, ">", outbamfn, "2> bedtool.log"])
    runCommand(command)
Ejemplo n.º 6
0
def removeIfEmpty(bamdir, file):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    if file.endswith(".bam"):
        command = " ".join([samtools_path, "view", "/".join([bamdir, file]), "| less | head -1 | wc -l"])
        nline = subprocess.check_output(command, shell=True)
        if os.path.isfile("/".join([bamdir, file])) and (int(nline) == 0):
            os.remove("/".join([bamdir, file]))
Ejemplo n.º 7
0
def splitStrands(inbamfn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    read_strand1sortfn = sub('.bam$', '.read_pos.bam', inbamfn)
    read_strand2sortfn = sub('.bam$', '.read_neg.bam', inbamfn)

    mapped_all = sub('sorted.bam$', 'mapped_all.bam', inbamfn)

    command1 = " ".join([samtools_path, "view -u -h -f 33", inbamfn, ">", read_strand1sortfn])
    command2 = " ".join([samtools_path, "view -u -h -f 17", inbamfn, ">", read_strand2sortfn])

    runCommand(command1)
    runCommand(command2)
Ejemplo n.º 8
0
def find_unpaired_reads(inbamfn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    unpairedfn = sub('.bam$', '.unpairedfn.bam', inbamfn)
    command1 = " ".join([samtools_path, "view -u -h -f  0x0004", inbamfn, ">", unpairedfn])
    runCommand(command1)
Ejemplo n.º 9
0
def sortByName(inbamfn, outbamfn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()

    if (inbamfn is not None):
        command = " ".join([sambamba_path, "sort -n", inbamfn, "-o", outbamfn])
        print(command)
        runCommand(command)
Ejemplo n.º 10
0
def splitBamByChr(inbamfn, path, chr):
    if (chr is not None):
        java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
        command = " ".join([samtools_path, "view -bh", inbamfn, str(chr), ">", "/".join([path, str(chr) + ".bam"])])
        runCommand(command)
Ejemplo n.º 11
0
def subsample(bamfn1, bamfn2, samplingrate=0.5):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join([samtools_path, "view -s", samplingrate, "-b", bamfn1, ">", bamfn2])
    runCommand(command)
Ejemplo n.º 12
0
def extractBAMfromROI_All(inbamfn, bedfn, outbamfn):
    java_path, beagle_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath(
    )
    command = " ".join(
        [samtool_path, "view -b -L", bedfn, inbamfn, ">", outbamfn])
    runCommand(command)
Ejemplo n.º 13
0
def mergeSortBamFiles(mergedBamfn, finalbamdir):
    java_path, beagle_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath(
    )
    command = ""
    os.chdir(finalbamdir)
    matches = []

    for root, dirnames, filenames in os.walk(finalbamdir):
        for filename in fnmatch.filter(filenames, '*.bam'):

            path = os.path.join(root, filename)
            if os.path.islink(path):
                path = os.path.realpath(path)

            if (not matches.__contains__(path)):
                matches.append(path)
                command = " ".join([path, command])

    command2 = " ".join([sambamba_path, "merge", mergedBamfn, command])
    runCommand(command2)
Ejemplo n.º 14
0
def mergeSortBamFiles(mergedBamfn, finalbamdir):
    java_path, beagle_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = ""
    os.chdir(finalbamdir)
    matches = []
    num_files = 0

    for root, dirnames, filenames in os.walk(finalbamdir):
        for filename in fnmatch.filter(filenames, '*.bam'):

            path = os.path.join(root, filename)
            if os.path.islink(path):
                path = os.path.realpath(path)

            if (not matches.__contains__(path)):
                matches.append(path)
                command = " ".join([path, command])
                num_files = num_files + 1

    if (num_files > 1):
        command2 = " ".join([sambamba_path, "merge", mergedBamfn, command])
        runCommand(command2)
    elif (num_files == 1):

        outbam = sub('.bam$', '.sort.bam', str(command.strip()))
        sortBam(command, outbam, finalbamdir)
        os.remove(str(command.strip()))
Ejemplo n.º 15
0
def getStrands(inbamfn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    outbamfn_forward = sub('.bam$', '_forward.bam', inbamfn)
    outbamfn_reverse = sub('.bam$', '_reverse.bam', inbamfn)
    command1 = " ".join([samtools_path, "view -F 0x10", inbamfn, ">", outbamfn_forward])
    command2 = " ".join([samtools_path, "view -f 0x10", inbamfn, ">", outbamfn_reverse])
    runCommand(command1)
    runCommand(command2)
Ejemplo n.º 16
0
def countReads(inbamfn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    cmd = " ".join([samtools_path, "view", inbamfn, "|wc -l"])
    out, err = subprocess.Popen(cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                stdin=subprocess.PIPE, shell=True).communicate()
    return "".join(out.split())
Ejemplo n.º 17
0
def sortIndexBam(inbamfn, outbamfn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join([sambamba_path, "sort", inbamfn, "-o", outbamfn])
    command2 = " ".join([sambamba_path, "index", outbamfn])

    runCommand(command)
    runCommand(command2)
Ejemplo n.º 18
0
def splitPairAndStrands(inbamfn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    read1_strand1sortfn = sub('.bam$', '.read1_pos.bam', inbamfn)
    read1_strand2sortfn = sub('.bam$', '.read1_neg.bam', inbamfn)
    read2_strand1sortfn = sub('.bam$', '.read2_pos.bam', inbamfn)
    read2_strand2sortfn = sub('.bam$', '.read2_neg.bam', inbamfn)

    mapped_all = sub('sorted.bam$', 'mapped_all.bam', inbamfn)

    command1 = " ".join([samtools_path, "view -u -h -f 0x0061", inbamfn, ">", read1_strand1sortfn])
    command2 = " ".join([samtools_path, "view -u -h -f 0x0051", inbamfn, ">", read1_strand2sortfn])
    command3 = " ".join([samtools_path, "view -u -h -f 0x0091", inbamfn, ">", read2_strand1sortfn])
    command4 = " ".join([samtools_path, "view -u -h -f 0x00A1", inbamfn, ">", read2_strand2sortfn])

    runCommand(command1)
    runCommand(command2)
    runCommand(command3)
    runCommand(command4)
Ejemplo n.º 19
0
def sortBam(inbamfn, outbamfn, tmpbams_path=''):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join([sambamba_path, "sort", inbamfn, "-o", outbamfn, '--tmpdir=', tmpbams_path])
    runCommand(command)
Ejemplo n.º 20
0
def extract_proper_paired_reads(inbamfn, properfn):
    # properfn = sub('.bam$', '_proper.bam', inbamfn)
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join([samtools_path, "view -f 0x03 -bSq 30", inbamfn, ">", properfn])
    runCommand(command)
    os.remove(inbamfn)
Ejemplo n.º 21
0
def getProperPairs(inbamfn, outbamfn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join([samtools_path, "view -u -h -f 0x0003", inbamfn, ">", outbamfn])
    runCommand(command)
Ejemplo n.º 22
0
def thinVCF(invcf, outvcf):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join([vcftools_path, "--vcf", invcf, "--thin 50 --out", outvcf, "--recode"])
    runCommand(command)
Ejemplo n.º 23
0
def merge_bams(bamfn1, bamfn2, mergefn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join([sambamba_path, "merge", mergefn, bamfn1, bamfn2, "--nthreads", str(4)])
    runCommand(command)
Ejemplo n.º 24
0
def extractAllReadsfromROI(inbamfn, bedfn, outbamfn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join(
        [bedtools_path, "intersect -abam", inbamfn, "-b", bedfn, ">", outbamfn, "2> bedtool.log"])
    runCommand(command)
Ejemplo n.º 25
0
def merge_final(mergefn, finalbamdir):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    mergemarkedfn = sub('.bam$', ".marked.bam", mergefn)
    os.chdir(finalbamdir)
    command1 = " ".join([sambamba_path, "merge", mergefn, "*.bam", "--nthreads", str(4)])
    command2 = " ".join([sambamba_path, "markdup","--remove-duplicates", "--nthreads", str(4), mergefn, mergemarkedfn])
    runCommand(command1)
    print (" ___ removing merged duplicates near breakpoints ___ ")
    runCommand(command2)
    os.remove(mergefn)
    os.remove(mergefn + '.bai')
    os.rename(mergemarkedfn, mergefn)
    os.rename(mergemarkedfn + '.bai', mergefn + '.bai')
Ejemplo n.º 26
0
def dedupBam(inbamfn, outbamfn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join([samtool_path, "rmdup", inbamfn, outbamfn])
    runCommand(command)
Ejemplo n.º 27
0
def mergeSortBamFiles(mergedBamfn, finalbamdir):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = ""
    os.chdir(finalbamdir)
    matches = []
    num_files = 0

    for root, dirnames, filenames in os.walk(finalbamdir):
        for filename in fnmatch.filter(filenames, '*.bam'):

            path = os.path.join(root, filename)
            if os.path.islink(path):
                path = os.path.realpath(path)

            if not matches.__contains__(path):
                matches.append(path)
                command = " ".join([path, command])
                num_files = num_files + 1

    if num_files > 1:
        command2 = " ".join([sambamba_path, "merge", mergedBamfn, command, "--nthreads", str(4)])
        runCommand(command2)
    elif num_files == 1:

        if str(command.strip()).endswith("GAIN.bam"):
            path, fname = os.path.split(str(command.strip()))
            inbam_original = '/'.join([params.GetSplitBamsPath(), sub('_gain', '', fname.lower())])

            command2 = " ".join([sambamba_path, "merge", mergedBamfn, command, inbam_original, "--nthreads", str(4)])
            runCommand(command2)

        elif str(command.strip()).endswith("LOSS.bam"):

            outbam = sub('.bam$', '.sort.bam', str(command.strip()))
            sortBam(command, outbam, finalbamdir)
            os.remove(str(command.strip()))
Ejemplo n.º 28
0
def removeDupPicard(bamrepairedfinalsortfn, tmpbams_path=''):
    print (" ___ removing repaired duplicates ___ ")

    bamrepairedfinalmarkedfn = sub('.re_paired_final.sorted.bam$', ".re_paired_final.marked.bam", bamrepairedfinalsortfn)
    markedmetricsfn = sub('.re_paired_final.sorted.bam$', ".marked_metrics.txt", bamrepairedfinalsortfn) 
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    command = " ".join([java_path, "-Xmx8g -jar", picard_path, "MarkDuplicates", "I=" + bamrepairedfinalsortfn, "O=" + bamrepairedfinalmarkedfn, "M=" + markedmetricsfn, "REMOVE_DUPLICATES=true", "ASSUME_SORTED=true"])
    runCommand(command)
    return bamrepairedfinalmarkedfn
Ejemplo n.º 29
0
def splitPairs(inbamfn):
    java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath()
    pair1fn = sub('.bam$', '_read1.bam', inbamfn)
    pair2fn = sub('.bam$', '_read2.bam', inbamfn)
    command1 = " ".join([samtools_path, "view -u -h -f 0x0043", inbamfn, ">", pair1fn])
    command2 = " ".join([samtools_path, "view -u -h -f 0x0083", inbamfn, ">", pair2fn])
    runCommand(command1)
    runCommand(command2)