from vartools import getmyconfig gatk4 = getmyconfig.getConfig('Variation', 'gatk4') samtools = getmyconfig.getConfig('Variation', 'samtools') sniffles = getmyconfig.getConfig('Variation', 'Sniffles') def tgs_snp_indel(ref, input, sample): ### gatk4 pipeline ### input_new = input.replace('.rmdup.bam', '') outfile = """{gatk4} HaplotypeCaller -R {ref} -I {input}.sorted.bam \\ --pcr-indel-model AGGRESSIVE \\ --annotation-group AS_StandardAnnotation \\ --minimum-mapping-quality 60 \\ -O {sample}.vcf {gatk4} SelectVariants -R {ref} -V {sample}.vcf --select-type-to-include SNP -O {sample}.raw.snp.vcf {gatk4} SelectVariants -R {ref} -V {sample}.vcf --select-type-to-include INDEL -O {sample}.raw.indel.vcf {gatk4} VariantFiltration -R {ref} -V {sample}.raw.snp.vcf \\ -filter "AS_QD < 2.0" --filter-name "ASQD2" \\ -O {sample}.snps.gatk.vcf {gatk4} VariantFiltration -R {ref} -V {sample}.raw.indel.vcf \\ -filter "AS_QD < 5.0" --filter-name "ASQD5" \\ -O {sample}.indel.gatk.vcf """.format(gatk4=gatk4, ref=ref, input=input_new, sample=sample) return outfile def tgs_sv(sample,
import os,sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('./')))) from vartools import getmyconfig,make_freec_config samtools = getmyconfig.getConfig('Variation', 'samtools') bcftools = getmyconfig.getConfig('Variation', 'bcftools') vcfutils = getmyconfig.getConfig('Variation','vcfutils') gatk4 = getmyconfig.getConfig('Variation', 'gatk4') breakdancer = getmyconfig.getConfig('Variation', 'BreakDancer') bam2cfg = getmyconfig.getConfig('Variation','bam2cfg') crest = getmyconfig.getConfig('Variation','Crest') extractSClip = getmyconfig.getConfig('Variation','extractSClip') cnvnator = getmyconfig.getConfig('Variation', 'CNVnator') cnvnator2VCF = getmyconfig.getConfig('Variation','cnvnator2VCF') control_freec = getmyconfig.getConfig('Variation','control_freec') freec_WGS_config = getmyconfig.getConfig('Variation','freec_WGS_config') freec_WES_config = getmyconfig.getConfig('Variation','freec_WES_config') splitSNPindelVCF = getmyconfig.getConfig('Variation','splitSNPindelVCF') makeGraph = getmyconfig.getConfig('Variation','makeGraph') ## known_site='--known-sites /path/to/ref1.vcf --known-sites /path/to/ref2.vcf ....' def snp_indel_samtools(ref, input, sample, v_valling, bcftools_filter): outfile = '' #samtools_p = 'mpileup -C 50 -m 2 -F 0.002 -d 1000 -u -f' # vcfutils_p = 'varFilter -Q 20 -d 4 -D 1000' bcftools_mpileup = 'mpileup -d 1000 -Ov -f' bcftools_call = 'call -mv -Oz -o' ### Hard filtering if v_valling == 'single': ## input is a single bam file outfile = """{bcftools} {bcftools_mpileup} {ref} {input_bam} | {bcftools} {bcftools_call} {sample}.all.vcf.gz {bcftools} filter {bcftools_filter} {sample}.all.vcf.gz -o {sample}.filter.vcf.gz
import os import re from vartools import parsering, getmyconfig BWA = getmyconfig.getConfig('Variation', 'bwa') minimap2 = getmyconfig.getConfig('Variation', 'minimap2') ngml = getmyconfig.getConfig('Variation', 'ngml') samtools = getmyconfig.getConfig('Variation', 'samtools') #picard = getmyconfig.getConfig(('Variation','picard')) gatk4 = getmyconfig.getConfig('Variation', 'gatk4') class Mapping(object): def __init__(self, maptools, inputs, outputs, refs, parameters): if maptools == 'BWA': self.maptools = BWA elif maptools == 'Minimap2': self.maptools = minimap2 elif maptools == 'NGMLR': self.maptools = ngml self.inputs = inputs self.outputs = outputs self.refs = refs self.parameters = parameters def parse(self): input_path = os.path.abspath(self.inputs) + '/' out_path = os.path.abspath(self.outputs) + '/' #lst = os.listdir(input_path) outfile = [] return self.maptools, self.parameters, self.refs, input_path, out_path, outfile
import os from vartools import getmyconfig annovar_dir = getmyconfig.getConfig('Variation', 'ANNOVAR') gff3ToGenePred = getmyconfig.getConfig('Variation', 'gff3ToGenePred') hg19_db = os.path.join( os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'database/genomicsdb/hg19/annovar') hg38_db = os.path.join( os.path.abspath(os.path.dirname(os.path.dirname(__file__))), 'database/genomicsdb/hg38/annovar') def annotation(tool, ref, vcf, gff3, out, species='hg19'): cmd = '' if tool == 'annovar': if species == 'hg19': cmd = """perl {annovar_dir}/convert2annovar.pl -format vcf4 {vcf} > {out}.avinput perl {annovar_dir}/annotate_variation.pl -buildver hg19 -geneanno -dbtype refGene {out}.avinput {humandb} --outfile {out} """.format(annovar_dir=annovar_dir, vcf=vcf, out=out, humandb=hg19_db) elif species == 'hg38': cmd = """perl {annovar_dir}/convert2annovar.pl -format vcf4 {vcf} > {out}.avinput perl {annovar_dir}/annotate_variation.pl -buildver hg38 -geneanno -dbtype refGene {out}.avinput {humandb} --outfile {out} """.format(annovar_dir=annovar_dir, vcf=vcf, out=out, humandb=hg38_db) else: cmd = """perl {annovar_dir}/convert2annovar.pl -format vcf4 {vcf} > {out}.avinput {gff3ToGenePred} {gff3} {species}/{species}_refGene.txt perl {annovar_dir}/retrieve_seq_from_fasta.pl --format refGene --seqfile {ref} {species}/{species}_refGene.txt --out {species}/{species}_efGeneMrna.fa perl {annovar_dir}/annotate_variation.pl -dbtype refGene {out}.avinput {species} --outfile {out} """.format(annovar_dir=annovar_dir, vcf=vcf, ref=ref,
### Do VQSR or Hard-filtering for GATK4 pipelines import os import vartools.getmyconfig as getmyconfig gatk4 = getmyconfig.getConfig('Variation', 'gatk4') def vqsr(ref, vcf, vqsr_dir, out): vqsr_config = os.path.join( os.path.abspath(vqsr_dir), 'vqsr_config.txt') ### resource data for training with open(vqsr_config) as fh: snp_resource = [] indel_resource = [] for lines in fh: if lines.startswith('#'): continue if lines.startswith('SNP'): snp, args, snp_file = lines.strip().split(' ') new_snp_file = os.path.join(vqsr_dir, snp_file) snp_resource.append(args + ' ' + new_snp_file + ' \\') elif lines.startswith('INDEL'): indel, args, indel_file = lines.strip().split(' ') new_indel_file = os.path.join(vqsr_dir, indel_file) indel_resource.append(args + ' ' + new_indel_file + ' \\') snp_resource_all = '\n'.join(snp_resource).strip('\\') indel_resource_all = '\n'.join(indel_resource).strip('\\') cmd = """{gatk4} VariantRecalibrator -R {ref} -V {vcf} \\ {snp_resource_all} \\ --trust-all-polymorphic \\ -tranche 100.0 -tranche 99.95 -tranche 99.9 -tranche 99.8 -tranche 99.6 -tranche 99.5 -tranche 99.4 -tranche 99.3 -tranche 99.0 -tranche 98.0 -tranche 97.0 -tranche 90.0 \\ -an QD -an MQRankSum -an ReadPosRankSum -an FS -an MQ -an SOR -an DP \\
import vartools.getmyconfig as getmyconfig bcftools = getmyconfig.getConfig('Variation', 'bcftools') gatk4 = getmyconfig.getConfig('Variation', 'gatk4') def merge(files, type, prefix, *gvcf_p): cmd = '' if type == 'vcf': combine_vcf = ' '.join([i + 'sort.gz' for i in files]) for vcf in files: cmd = """{bcftools} view {vcf} -Oz -o {vcf}.gz {bcftools} sort {vcf}.gz -o {vcf}.sort.gz {bcftools} index {vcf}.sort.gz """.format( bcftools=bcftools, vcf=vcf, ) cmd += """{bcftools} merge {combine_vcf} -o {prefix}.merged.vcf""".format( bcftools=bcftools, combine_vcf=combine_vcf, prefix=prefix) elif type == 'gvcf': combine_gvcf = ' '.join(['-V ' + i for i in files]) mem, ref, genomicsdb, chr_list, b_size, map_file, reader_threads, num, tmp = gvcf_p[:] if len(files) < 1000: cmd = """{gatk4} --java-options "-Xmx{mem}g" CombineGVCFs -R {ref} {combine_gvcf} -O {prefix}.combined.g.vcf {gatk4} --java-options "-Xmx{mem}g" GenotypeGVCFs -R {ref} -V {prefix}.combined.g.vcf -G StandardAnnotation -new-qual \\ -O {prefix}.combined.vcf""".format(gatk4=gatk4, mem=mem, ref=ref, combine_gvcf=combine_gvcf, prefix=prefix)