from vartools import getmyconfig

gatk4 = getmyconfig.getConfig('Variation', 'gatk4')
samtools = getmyconfig.getConfig('Variation', 'samtools')
sniffles = getmyconfig.getConfig('Variation', 'Sniffles')


def tgs_snp_indel(ref, input, sample):
    ### gatk4 pipeline ###
    input_new = input.replace('.rmdup.bam', '')
    outfile = """{gatk4} HaplotypeCaller -R {ref} -I {input}.sorted.bam \\
 --pcr-indel-model AGGRESSIVE \\
 --annotation-group AS_StandardAnnotation \\
 --minimum-mapping-quality 60 \\
-O {sample}.vcf

{gatk4} SelectVariants  -R {ref} -V {sample}.vcf --select-type-to-include SNP -O {sample}.raw.snp.vcf
{gatk4} SelectVariants  -R {ref} -V {sample}.vcf --select-type-to-include INDEL -O {sample}.raw.indel.vcf

{gatk4} VariantFiltration -R {ref} -V {sample}.raw.snp.vcf \\
-filter "AS_QD < 2.0" --filter-name "ASQD2" \\
-O {sample}.snps.gatk.vcf

{gatk4} VariantFiltration -R {ref} -V {sample}.raw.indel.vcf \\
-filter "AS_QD < 5.0" --filter-name "ASQD5" \\
-O {sample}.indel.gatk.vcf
""".format(gatk4=gatk4, ref=ref, input=input_new, sample=sample)
    return outfile


def tgs_sv(sample,
Example #2
0
import os,sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('./'))))
from vartools import getmyconfig,make_freec_config

samtools = getmyconfig.getConfig('Variation', 'samtools')
bcftools = getmyconfig.getConfig('Variation', 'bcftools')
vcfutils = getmyconfig.getConfig('Variation','vcfutils')
gatk4 = getmyconfig.getConfig('Variation', 'gatk4')
breakdancer = getmyconfig.getConfig('Variation', 'BreakDancer')
bam2cfg = getmyconfig.getConfig('Variation','bam2cfg')
crest = getmyconfig.getConfig('Variation','Crest')
extractSClip = getmyconfig.getConfig('Variation','extractSClip')
cnvnator = getmyconfig.getConfig('Variation', 'CNVnator')
cnvnator2VCF = getmyconfig.getConfig('Variation','cnvnator2VCF')
control_freec = getmyconfig.getConfig('Variation','control_freec')
freec_WGS_config = getmyconfig.getConfig('Variation','freec_WGS_config')
freec_WES_config = getmyconfig.getConfig('Variation','freec_WES_config')
splitSNPindelVCF = getmyconfig.getConfig('Variation','splitSNPindelVCF')
makeGraph = getmyconfig.getConfig('Variation','makeGraph')

## known_site='--known-sites /path/to/ref1.vcf --known-sites /path/to/ref2.vcf ....'
def snp_indel_samtools(ref, input, sample, v_valling, bcftools_filter):
    outfile = ''
    #samtools_p = 'mpileup -C 50 -m 2 -F 0.002 -d 1000 -u -f'
    # vcfutils_p = 'varFilter -Q 20 -d 4 -D 1000'
    bcftools_mpileup = 'mpileup -d 1000 -Ov -f'
    bcftools_call = 'call -mv -Oz -o'
    ### Hard filtering
    if v_valling == 'single':  ## input is a single bam file
        outfile = """{bcftools} {bcftools_mpileup} {ref} {input_bam} | {bcftools} {bcftools_call} {sample}.all.vcf.gz
{bcftools} filter {bcftools_filter} {sample}.all.vcf.gz -o {sample}.filter.vcf.gz
Example #3
0
import os
import re
from vartools import parsering, getmyconfig

BWA = getmyconfig.getConfig('Variation', 'bwa')
minimap2 = getmyconfig.getConfig('Variation', 'minimap2')
ngml = getmyconfig.getConfig('Variation', 'ngml')
samtools = getmyconfig.getConfig('Variation', 'samtools')
#picard = getmyconfig.getConfig(('Variation','picard'))
gatk4 = getmyconfig.getConfig('Variation', 'gatk4')


class Mapping(object):
    def __init__(self, maptools, inputs, outputs, refs, parameters):
        if maptools == 'BWA':
            self.maptools = BWA
        elif maptools == 'Minimap2':
            self.maptools = minimap2
        elif maptools == 'NGMLR':
            self.maptools = ngml
        self.inputs = inputs
        self.outputs = outputs
        self.refs = refs
        self.parameters = parameters

    def parse(self):
        input_path = os.path.abspath(self.inputs) + '/'
        out_path = os.path.abspath(self.outputs) + '/'
        #lst = os.listdir(input_path)
        outfile = []
        return self.maptools, self.parameters, self.refs, input_path, out_path, outfile
Example #4
0
import os
from vartools import getmyconfig
annovar_dir = getmyconfig.getConfig('Variation', 'ANNOVAR')
gff3ToGenePred = getmyconfig.getConfig('Variation', 'gff3ToGenePred')
hg19_db = os.path.join(
    os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
    'database/genomicsdb/hg19/annovar')
hg38_db = os.path.join(
    os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
    'database/genomicsdb/hg38/annovar')


def annotation(tool, ref, vcf, gff3, out, species='hg19'):
    cmd = ''
    if tool == 'annovar':
        if species == 'hg19':
            cmd = """perl {annovar_dir}/convert2annovar.pl -format vcf4 {vcf} > {out}.avinput
perl {annovar_dir}/annotate_variation.pl -buildver hg19 -geneanno -dbtype refGene {out}.avinput {humandb} --outfile {out}
""".format(annovar_dir=annovar_dir, vcf=vcf, out=out, humandb=hg19_db)
        elif species == 'hg38':
            cmd = """perl {annovar_dir}/convert2annovar.pl -format vcf4 {vcf} > {out}.avinput
perl {annovar_dir}/annotate_variation.pl -buildver hg38 -geneanno -dbtype refGene {out}.avinput {humandb} --outfile {out}
""".format(annovar_dir=annovar_dir, vcf=vcf, out=out, humandb=hg38_db)
        else:
            cmd = """perl {annovar_dir}/convert2annovar.pl -format vcf4 {vcf} > {out}.avinput
{gff3ToGenePred} {gff3} {species}/{species}_refGene.txt
perl {annovar_dir}/retrieve_seq_from_fasta.pl --format refGene --seqfile {ref} {species}/{species}_refGene.txt --out {species}/{species}_efGeneMrna.fa
perl {annovar_dir}/annotate_variation.pl -dbtype refGene {out}.avinput {species} --outfile {out}
""".format(annovar_dir=annovar_dir,
            vcf=vcf,
            ref=ref,
Example #5
0
### Do VQSR or Hard-filtering for GATK4 pipelines
import os
import vartools.getmyconfig as getmyconfig
gatk4 = getmyconfig.getConfig('Variation', 'gatk4')


def vqsr(ref, vcf, vqsr_dir, out):
    vqsr_config = os.path.join(
        os.path.abspath(vqsr_dir),
        'vqsr_config.txt')  ### resource data for training
    with open(vqsr_config) as fh:
        snp_resource = []
        indel_resource = []
        for lines in fh:
            if lines.startswith('#'):
                continue
            if lines.startswith('SNP'):
                snp, args, snp_file = lines.strip().split(' ')
                new_snp_file = os.path.join(vqsr_dir, snp_file)
                snp_resource.append(args + ' ' + new_snp_file + ' \\')
            elif lines.startswith('INDEL'):
                indel, args, indel_file = lines.strip().split(' ')
                new_indel_file = os.path.join(vqsr_dir, indel_file)
                indel_resource.append(args + ' ' + new_indel_file + ' \\')
        snp_resource_all = '\n'.join(snp_resource).strip('\\')
        indel_resource_all = '\n'.join(indel_resource).strip('\\')
        cmd = """{gatk4} VariantRecalibrator -R {ref} -V {vcf} \\
{snp_resource_all} \\
--trust-all-polymorphic \\
-tranche 100.0 -tranche 99.95 -tranche 99.9 -tranche 99.8 -tranche 99.6 -tranche 99.5 -tranche 99.4 -tranche 99.3 -tranche 99.0 -tranche 98.0 -tranche 97.0 -tranche 90.0 \\
-an QD -an MQRankSum -an ReadPosRankSum -an FS -an MQ -an SOR -an DP \\
import vartools.getmyconfig as getmyconfig

bcftools = getmyconfig.getConfig('Variation', 'bcftools')
gatk4 = getmyconfig.getConfig('Variation', 'gatk4')


def merge(files, type, prefix, *gvcf_p):
    cmd = ''
    if type == 'vcf':
        combine_vcf = ' '.join([i + 'sort.gz' for i in files])
        for vcf in files:
            cmd = """{bcftools} view {vcf} -Oz -o {vcf}.gz
{bcftools} sort {vcf}.gz -o {vcf}.sort.gz
{bcftools} index {vcf}.sort.gz """.format(
                bcftools=bcftools,
                vcf=vcf,
            )
        cmd += """{bcftools} merge {combine_vcf} -o {prefix}.merged.vcf""".format(
            bcftools=bcftools, combine_vcf=combine_vcf, prefix=prefix)
    elif type == 'gvcf':
        combine_gvcf = ' '.join(['-V ' + i for i in files])
        mem, ref, genomicsdb, chr_list, b_size, map_file, reader_threads, num, tmp = gvcf_p[:]

        if len(files) < 1000:
            cmd = """{gatk4} --java-options "-Xmx{mem}g" CombineGVCFs -R {ref} {combine_gvcf} -O {prefix}.combined.g.vcf
{gatk4} --java-options "-Xmx{mem}g" GenotypeGVCFs -R {ref} -V {prefix}.combined.g.vcf -G StandardAnnotation -new-qual \\
-O {prefix}.combined.vcf""".format(gatk4=gatk4,
                                   mem=mem,
                                   ref=ref,
                                   combine_gvcf=combine_gvcf,
                                   prefix=prefix)