def parallel_clipoverlap(self, input_dir, output_dir, samples_list, bam_suffix="", poolsize=None, samtools_dir=""): from RouToolPa.Tools.Samtools import SamtoolsV1 SamtoolsV1.path = samtools_dir samples_to_handle = samples_list if samples_list else self.get_sample_list( input_dir) self.safe_mkdir(output_dir) options_list = [] samtools_option_list = [] for sample in samples_to_handle: sample_dir = "%s/%s/" % (output_dir, sample) self.safe_mkdir(sample_dir) input_bam = "%s/%s%s.bam" % (sample_dir, sample, bam_suffix) output_bam = "%s/%s/%s.clipped.bam" % (output_dir, sample, sample) options_list.append( self.parse_options(input_bam, output_bam, poolsize=poolsize)) samtools_option_list.append(output_bam) self.parallel_execute(options_list=options_list) SamtoolsV1.parallel_execute(options_list=samtools_option_list, cmd="samtools index")
def mkdup(self, input_bam, output_prefix): output_bam = "%s.bam" % output_prefix stat_file = "%s.stat" % output_prefix options = self.parse_options(input_bam, output_bam, stat_file) self.execute(options=options) SamtoolsV1.index(output_bam)
default=1000, help="Maximum value to show. Default: 1000") parser.add_argument("-g", "--logbase", action="store", dest="logbase", type=int, default=10, help="Logbase to use for log-scaled histograms") parser.add_argument( "-e", "--extensions", action="store", dest="extensions", type=lambda x: x.split(","), default=["png"], help= "Comma-separated list of extensions for histogram files. Default: png only" ) args = parser.parse_args() SamtoolsV1.draw_insert_size_distribution(args.input, args.output_prefix, width_of_bin=args.width_of_bins, max_insert_size=args.max_insert_size, min_insert_size=args.min_insert_size, extensions=args.extensions, separator=args.separator, logbase=args.logbase)
def clipoverlap(self, input, output, poolsize=None): from RouToolPa.Tools.Samtools import SamtoolsV1 options = self.parse_options(input, output, poolsize=poolsize) self.execute(options=options, cmd="bam clipOverlap") SamtoolsV1.index(output)
def get_insert_size_distribution(self, sample_directory, forward_files, reverse_files, estimated_insert_size, output_prefix, genome, genome_index, input_files_are_fasta=False, read_orientation="fr", parsing_mode="index_db", number_of_bins=100, genome_format="fasta", store_sam=False, aligner="bowtie2", aligner_binary_dir="", xlimit_for_histo=None): sample_dir = os.path.abspath(sample_directory) output_pref = "%s/%s" % (sample_dir, output_prefix) min_contig_len_threshold = 3 * estimated_insert_size region_bed_file = "%s/%s.contig.bed" % (sample_dir, output_prefix) self.make_region_bed_file_from_file(genome, region_bed_file, min_len=min_contig_len_threshold, parsing_mode=parsing_mode, input_format=genome_format) output_filtered_len_file = "%s.filtered.len" % output_pref output_all_len_file = "%s.all.len" % output_pref output_sam = "%s.sam" % output_pref output_bam = "%s.bam" % output_pref aligner_log = "%s.aligner.log" forward_reads = forward_files if isinstance( forward_files, str) else ",".join(forward_files) reverse_reads = reverse_files if isinstance( reverse_files, str) else ",".join(reverse_files) bowtie_options = " --very-sensitive" bowtie_options += " -x %s" % genome_index bowtie_options += " -1 %s" % forward_reads bowtie_options += " -2 %s" % reverse_reads bowtie_options += " -p %i" % self.threads bowtie_options += " -X %i" % min_contig_len_threshold bowtie_options += " --%s" % read_orientation bowtie_options += " -f" if input_files_are_fasta else "" bowtie2_string = "bowtie2 %s 2>%s" % (bowtie_options, aligner_log) bwa_options = " mem" bwa_options += " -t %i" % self.threads bwa_options += " %s" % genome_index bwa_options += " %s %s" % (forward_reads, reverse_reads) bwa_string = "bwa %s" % bwa_options tee_string = "tee %s" % output_sam samtools_string = "samtools view -L %s -" % region_bed_file awk_string = "awk -F'\\t' '{ if ($9 > 0) print $9}'" if aligner == "bowtie2": aligner_string = bowtie2_string elif aligner == "bwa": aligner_string = bwa_string else: raise ValueError("Unrecognized aligner: %s" % aligner) aligner_string = "%s%s" % (self.check_path(aligner_binary_dir), aligner_string) final_string = "%s | %s | %s | %s > %s" % (aligner_string, tee_string, samtools_string, awk_string, output_filtered_len_file) full_len_string = "%s %s > %s" % (awk_string, output_sam, output_all_len_file) self.execute(cmd=final_string) self.execute(cmd=full_len_string) self.draw_histogram_from_file( output_filtered_len_file, output_pref, max_length=xlimit_for_histo if xlimit_for_histo else min_contig_len_threshold, number_of_bins=number_of_bins, xlabel="Insert size", ylabel="Number of fragments", title="Insert size distribution", extensions=("png", "svg")) SamtoolsV1.convert_sam_and_index(output_sam, output_bam) if not store_sam: os.remove(output_sam)
parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", action="store", dest="input", help="Input sam file. Default: stdin") parser.add_argument("-o", "--output", action="store", dest="output", help="Output file with reads. Default: stdout") parser.add_argument("-r", "--read_name_file", action="store", dest="read_name_file", required=True, help="File with full read names or their fragments") parser.add_argument("-m", "--mode", action="store", dest="mode", default="include", help="Output mode. Allowed: include(default), remove") parser.add_argument("-c", "--comparison_mode", action="store", dest="comparison_mode", default="exact", help="Read name comparison mode. Allowed: exact(default), partial") args = parser.parse_args() input_sam_fd = open(args.input, "r") if args.input else sys.stdin output_sam_fd = open(args.output, "w") if args.output else sys.stdout read_name_list = IdList(filename=args.read_name_file) SamtoolsV1.get_reads_by_name(read_name_list, input_sam_fd, output_sam_fd, mode=args.mode, search_mode=args.comparison_mode) if args.input: input_sam_fd.close() if args.output: output_sam_fd.close()
#!/usr/bin/env python __author__ = 'Sergei F. Kliver' import argparse from RouToolPa.Tools.Samtools import SamtoolsV1 parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", action="store", dest="input", help="Input sam file") parser.add_argument("-o", "--output", action="store", dest="output", help="Output file with read names") args = parser.parse_args() SamtoolsV1.get_read_names(args.input, args.output)
sort_by_name=False, max_per_sorting_thread_memory="10G") if args.add_read_groups_by_picard: sorted_alignment_picard_groups = "%s.picard_groups.%s" % ( args.prefix, args.alignment_format) AddOrReplaceReadGroups.add_read_groups(sorted_alignment, sorted_alignment_picard_groups, RGID=args.prefix, RGLB=args.prefix, RGPL=args.prefix, RGSM=args.prefix, RGPU=args.prefix) if args.alignment_format == "bam": SamtoolsV1.index(sorted_alignment_picard_groups if sorted_alignment_picard_groups else sorted_alignment) MarkDuplicates.run( sorted_alignment_picard_groups if sorted_alignment_picard_groups else sorted_alignment, final_alignment, duplicates_stat_file) if args.alignment_format == "bam": SamtoolsV1.index(final_alignment) """ GenomeCov.get_coverage(final_alignment, genome_bed, coverage_file) if not args.retain_temp: os.remove(sorted_alignment) if args.add_read_groups_by_picard: os.remove(sorted_alignment_picard_groups) if args.calculate_median_coverage or args.calculate_mean_coverage:
def align(self, genome_dir, forward_read_list, reverse_read_list=None, annotation_gtf=None, sample=None, feature_from_gtf_to_use_as_exon=None, exon_tag_to_use_as_transcript_id=None, exon_tag_to_use_as_gene_id=None, length_of_sequences_flanking_junction=None, junction_tab_file_list=None, three_prime_trim=None, five_prime_trim=None, adapter_seq_for_three_prime_clip=None, max_mismatch_percent_for_adapter_trimming=None, three_prime_trim_after_adapter_clip=None, output_type="BAM", sort_bam=True, max_memory_per_thread_for_bam_sorting="4G", include_unmapped_reads_in_bam=True, output_unmapped_reads=True, output_dir="./", two_pass_mode=False, max_intron_length=None): if reverse_read_list: if len(forward_read_list) != len(reverse_read_list): raise ValueError("Wrong read file pairing") options = " --runThreadN %i" % self.threads options += " --genomeDir %s" % os.path.abspath(genome_dir) options += " --sjdbGTFfile %s" % annotation_gtf if annotation_gtf else "" options += " --sjdbGTFtagExonParentTranscript %s" % exon_tag_to_use_as_transcript_id if exon_tag_to_use_as_transcript_id else "" options += " --sjdbGTFtagExonParentGene %s" % exon_tag_to_use_as_gene_id if exon_tag_to_use_as_gene_id else "" options += " --sjdbGTFfeatureExon %s" % feature_from_gtf_to_use_as_exon if feature_from_gtf_to_use_as_exon else "" options += " --sjdbOverhang %i" % length_of_sequences_flanking_junction if length_of_sequences_flanking_junction else "" options += (" --sjdbFileChrStartEnd %s" % (os.path.abspath(junction_tab_file_list) if isinstance( junction_tab_file_list, str) else " ".join( map(os.path.abspath, junction_tab_file_list))) ) if junction_tab_file_list else "" #print(forward_read_list) forward_read_abs_path_list = [ os.path.abspath(forward_read_list) ] if isinstance(forward_read_list, str) else list( map(os.path.abspath, forward_read_list)) reverse_read_abs_path_list = ( [os.path.abspath(reverse_read_list)] if isinstance( reverse_read_list, str) else list( map(os.path.abspath, reverse_read_list))) if reverse_read_list else None #print(forward_read_abs_path_list) forward_read_abs_path_list = self.add_external_extraction_to_filelist( forward_read_abs_path_list) reverse_read_abs_path_list = self.add_external_extraction_to_filelist( reverse_read_abs_path_list) if reverse_read_list else None #print(forward_read_abs_path_list) options += " --readFilesIn %s" % " ".join(forward_read_abs_path_list) options += ( " %s" % " ".join(reverse_read_abs_path_list) if reverse_read_abs_path_list else "") if reverse_read_abs_path_list else "" options += " --clip3pNbases %i" % three_prime_trim if three_prime_trim else "" options += " --clip5pNbases %i" % five_prime_trim if five_prime_trim else "" options += " --clip3pAdapterSeq %s" % adapter_seq_for_three_prime_clip if adapter_seq_for_three_prime_clip else "" options += " --clip3pAdapterMMp %f" % max_mismatch_percent_for_adapter_trimming if max_mismatch_percent_for_adapter_trimming else "" options += " --clip3pAfterAdapterNbases %i" % three_prime_trim_after_adapter_clip if three_prime_trim_after_adapter_clip else "" options += " --outSAMtype %s %s" % ( output_type, "Unsorted" ) # "SortedByCoordinate" if sort_bam else "Unsorted") #options += " --limitBAMsortRAM %i" % max_memory_for_bam_sorting if max_memory_for_bam_sorting else "" options += " --outSAMunmapped Within" if include_unmapped_reads_in_bam else "" options += " --outReadsUnmapped Fastx" if output_unmapped_reads else "" options += " --outFileNamePrefix %s" % output_dir if output_dir else "" options += " --twopassMode Basic" if two_pass_mode else "" options += " --alignIntronMax %i" % max_intron_length if max_intron_length else "" self.execute(options) if sort_bam: print("\tSorting...") unsorted_bam = "%s/Aligned.out.bam" % output_dir sorted_bam = "%s/%s.bam" % (output_dir, ("%s.sorted" % sample if sample else "Aligned.sortedByCoord.out")) SamtoolsV1.threads = self.threads SamtoolsV1.sort( unsorted_bam, sorted_bam, max_memory_per_thread=max_memory_per_thread_for_bam_sorting) print("\tIndexing bam file...") SamtoolsV1.index(sorted_bam)
def align(self, sample_dir, reference_index, aligner="bwa", sample_list=None, outdir="./", quality_score_type="phred33", read_suffix="", read_extension="fastq", alignment_format="bam", threads=None, mark_duplicates=True, platform="Illumina", add_read_groups_by_picard=False, gzipped_reads=False): self.init_tools(threads=threads) samples = self.get_sample_list(sample_dir, sample_list=sample_list) self.prepare_dirs(samples, outdir=outdir) if aligner == "bowtie2": aligner_tool = Bowtie2 elif aligner == "bwa": aligner_tool = BWA else: raise ValueError("") for sample in samples: read_prefix = "%s/%s/%s%s" % (sample_dir, sample, sample, read_suffix) forward_reads = "%s_1.%s%s" % (read_prefix, read_extension, ".gz" if gzipped_reads else "") reverse_reads = "%s_2.%s%s" % (read_prefix, read_extension, ".gz" if gzipped_reads else "") output_prefix = "%s/%s/%s" % (outdir, sample, sample) raw_alignment = "%s.%s" % (output_prefix, alignment_format) final_alignment = "%s.mkdup.%s" % (output_prefix, alignment_format) duplicates_stat_file = "%s.duplicates.stat" % output_prefix coverage_file = "%s.coverage.bed" % output_prefix sorted_alignment_picard_groups = None aligner_tool.align( reference_index, forward_reads_list=forward_reads, reverse_reads_list=reverse_reads, unpaired_reads_list=None, quality_score=quality_score_type, output_prefix=output_prefix, output_format=alignment_format, read_group_name=sample, PU="x", SM=sample, platform=platform, LB="x", sort_by_coordinate=True, sort_by_name=False, max_per_sorting_thread_memory=str( max(int(self.max_memory / self.threads), 1)) + "G") if add_read_groups_by_picard: sorted_alignment_picard_groups = "%s.picard_groups.%s" % ( output_prefix, alignment_format) AddOrReplaceReadGroups.add_read_groups( raw_alignment, sorted_alignment_picard_groups, RGID=sample, RGLB=sample, RGPL=platform, RGSM=sample, RGPU=sample) if alignment_format == "bam": SamtoolsV1.index( sorted_alignment_picard_groups if sorted_alignment_picard_groups else raw_alignment) if mark_duplicates: MarkDuplicates.run( sorted_alignment_picard_groups if sorted_alignment_picard_groups else raw_alignment, final_alignment, duplicates_stat_file) if alignment_format == "bam": SamtoolsV1.index(final_alignment)
SamtoolsV1.threads = args.threads if args.prepare_bam or args.mix_ends: FileRoutines.safe_mkdir(FileRoutines.check_path(args.temp_dir)) prepared_pe_bam_file = "%s.bam" % args.prepared_bam_prefix prepared_unpaired_bam_file = ( "%s.unpaired.bam" % args.prepared_bam_prefix) if args.mix_ends else None """ SamtoolsV1.prepare_bam_for_read_extraction(args.input, args.prepared_bam, temp_file_prefix=args.temp_dir, max_memory_per_thread=args.max_memory_per_thread) """ SamtoolsV1.prepare_bam_for_read_extraction( args.input, prepared_pe_bam_file, temp_file_prefix=args.temp_dir, max_memory_per_thread=args.max_memory_per_thread, bam_file_to_write_unpaired_reads=prepared_unpaired_bam_file) if args.paired: left_fastq = "%s_1.fastq" % args.out_prefix right_fastq = "%s_2.fastq" % args.out_prefix unpaired_fastq = "%s.unpaired.fastq" % args.out_prefix else: left_fastq = "%s.fastq" % args.out_prefix right_fastq = None if args.mix_ends: BamToFastq.convert(prepared_unpaired_bam_file, unpaired_fastq, out_right_fastq=None)
sample_list = args.samples if args.samples else Pipeline.get_sample_list( args.samples_dir) FileRoutines.safe_mkdir(args.output_dir) for sample in sample_list: print("Handling %s" % sample) sample_dir = "%s/%s/" % (args.samples_dir, sample) alignment_sample_dir = "%s/%s/" % (args.output_dir, sample) FileRoutines.safe_mkdir(alignment_sample_dir) filetypes, forward_files, reverse_files, se_files = FileRoutines.make_lists_forward_and_reverse_files( sample_dir) print("\tAligning reads...") STAR.align_miRNA( args.genome_dir, se_files, output_dir=alignment_sample_dir, annotation_gtf=args.annotation_gtf if not args.genome_fasta else None, max_memory_for_bam_sorting=args.max_memory_for_bam_sorting, max_alignments_per_read=args.max_number_of_alignments_per_read, no_soft_clip=args.enable_soft_clipping, max_number_of_mismatches=args.max_number_of_mismatches, max_relative_number_of_mismatches=args. max_relative_number_of_mismatches) print("\tIndexing bam file...") resulting_bam_file = "%s/Aligned.sortedByCoord.out.bam" % alignment_sample_dir SamtoolsV1.index(resulting_bam_file)
def call_variants(self, sample_dir, reference, merged_prefix, sample_list=None, outdir="./", suffix=None, input="alignment", input_filetype="bam", threads=None, mark_duplicates=False, known_variants_vcf=None, genotyping_mode="DISCOVERY", output_mode="EMIT_VARIANTS_ONLY", stand_call_conf=30, skip_base_score_recalibration=False, iteration_number=3, SNP_QD=2.0, SNP_FS=30.0, SNP_MQ=40.0, SNP_MappingQualityRankSum=-12.5, SNP_ReadPosRankSum=-8.0, indel_QD=2.0, indel_ReadPosRankSum=-20.0, indel_FS=200.0, SNP_filter_name="ambiguous_snp", indel_filter_name="ambiguous_indel", analyze_covariates=True, include_region_id_file=None, exclude_region_id_file=None): SamtoolsV1.check_for_fasta_index(reference) CreateSequenceDictionary.jar_path = self.Picard_dir CreateSequenceDictionary.check_for_fasta_dict(reference) for tool in VariantFiltration, \ MarkDuplicates, \ RealignerTargetCreator, \ IndelRealigner, \ BaseRecalibrator, \ PrintReads, \ HaplotypeCaller, \ SelectVariants, \ GenotypeGVCFs,\ CombineVariants: tool.threads = threads if threads else self.threads tool.max_memory = "%ig" % self.max_memory tool.jar_path = self.GATK_dir samples = self.get_sample_list(sample_dir, sample_list=sample_list) self.prepare_dirs(samples, outdir=outdir, include_alignment_dir=input == "reads") if input == "reads": pass elif input == "alignment": alignment_filename_prefix_template = "%%s%s" % suffix known_sites = known_variants_vcf iterations = 1 if skip_base_score_recalibration else iteration_number # do only one(zero) iteration if skip base recalibration for sample in samples: if mark_duplicates: """ java -Xmx100g -jar ~/tools/picard-tools-2.5.0/picard.jar MarkDuplicates \ I=${bam} \ O=${bam%bam}rmdup.bam \ M=${bam}.mark_dup_metrics.txt java -jar ~/tools/picard-tools-2.5.0/picard.jar BuildBamIndex \ INPUT=${bam%bam}rmdup.bam """ pass # sample_alignment_prefix = else: sample_alignment_prefix = "%s/%s/%s" % ( sample_dir, sample, alignment_filename_prefix_template % sample) sample_alignment = "%s.%s" % (sample_alignment_prefix, input_filetype) sample_intervals_for_realignment = "%s.forIndelRealigner.intervals" % sample_alignment_prefix sample_realigned_bam = "%s.realigned.bam" % sample_alignment_prefix """ RealignerTargetCreator.create(reference, sample_alignment, output=sample_intervals_for_realignment, known_indels_vcf=None, max_interval_size=None, min_reads_cov=None, mismatch_fraction=None, window_size=None, default_base_qualities=None) IndelRealigner.realign(reference, sample_alignment, sample_realigned_bam, target_intervals=sample_intervals_for_realignment, known_indels_vcf=None, model=None, lod_threshold=None, entropy_threshold=None, max_cons=None, max_size_for_movement=None, max_pos_move=None, max_reads_for_cons=None, max_reads_for_realignment=None, max_reads_in_memory=None, no_original_tags=False, nway_out=False, default_base_qualities=None) """ for iteration_index in range(0, iterations): gvcf_list = [] sample_recall_table = "%s.recall_data.iteration%i.grp" % ( sample_alignment_prefix, iteration_index) sample_postrecall_table = "%s.postrecall_data.iteration%i.grp" % ( sample_alignment_prefix, iteration_index) sample_recall_plots = "%s.recall.iteration%i.pdf" % ( sample_alignment_prefix, iteration_index) sample_recall_csv = "%s.recall.iteration%i.csv" % ( sample_alignment_prefix, iteration_index) sample_recalled_reads_bam = "%s.recal_reads.iteration%i.bam" % ( sample_alignment_prefix, iteration_index) merged_vcf_prefix = "%s/SNPcall/%s.iteration%i" % ( outdir, merged_prefix, iteration_index) merged_raw_vcf_prefix = "%s.raw" % merged_vcf_prefix merged_raw_vcf = "%s.vcf" % merged_raw_vcf_prefix merged_raw_snp_vcf = "%s.raw.snp.vcf" % merged_vcf_prefix merged_with_filters_snp_vcf = "%s.with_filters.snp.vcf" % merged_vcf_prefix merged_filtered_snp_vcf = "%s.filtered.snp.vcf" % merged_vcf_prefix merged_raw_indel_vcf = "%s.raw.indel.vcf" % merged_vcf_prefix merged_with_filters_indel_vcf = "%s.with_filters.indel.vcf" % merged_vcf_prefix merged_filtered_indel_vcf = "%s.filtered.indel.vcf" % merged_vcf_prefix merged_filtered_combined_vcf = "%s.filtered.combined.vcf" % merged_vcf_prefix for sample in samples: vcf_prefix = "%s/SNPcall/%s/%s.iteration%i" % ( outdir, sample, sample, iteration_index) gvcf = "%s.g.vcf" % vcf_prefix #raw_snp_vcf = "%s.raw.snp.gvcf" % vcf_prefix #raw_indel_vcf = "%s.raw.indel.gvcf" % vcf_prefix sample_alignment_prefix = "%s/%s/%s" % ( sample_dir, sample, alignment_filename_prefix_template % sample) sample_realigned_bam = "%s.realigned.bam" % sample_alignment_prefix gvcf_list.append(gvcf) if ((not skip_base_score_recalibration) and known_sites is not None) or (iteration_index > 0): BaseRecalibrator.get_recalibration_table( reference, sample_realigned_bam, sample_recall_table, known_sites, include_region_id_file=include_region_id_file, exclude_region_id_file=exclude_region_id_file) BaseRecalibrator.get_recalibration_table( reference, sample_realigned_bam, sample_postrecall_table, known_sites, BQSR=sample_recall_table, include_region_id_file=include_region_id_file, exclude_region_id_file=exclude_region_id_file) if analyze_covariates: AnalyzeCovariates.plot_two_recall_table( reference, sample_recall_table, sample_postrecall_table, sample_recall_plots, csv_out=sample_recall_csv) PrintReads.get_recalled_reads(reference, sample_realigned_bam, sample_recall_table, sample_recalled_reads_bam) #HaplotypeCaller.call(reference, sample_realigned, raw_vcf, genotyping_mode=genotyping_mode, # output_mode=output_mode, stand_emit_conf=stand_emit_conf, stand_call_conf=stand_call_conf) """ HaplotypeCaller.gvcf_call(reference, sample_recalled_reads_bam, gvcf, genotyping_mode=genotyping_mode, output_mode=output_mode, stand_call_conf=stand_call_conf, include_region_id_file=include_region_id_file, exclude_region_id_file=exclude_region_id_file) """ else: HaplotypeCaller.gvcf_call( reference, sample_realigned_bam, gvcf, genotyping_mode=genotyping_mode, output_mode=output_mode, stand_call_conf=stand_call_conf, include_region_id_file=include_region_id_file, exclude_region_id_file=exclude_region_id_file) GenotypeGVCFs.genotype(reference, gvcf_list, merged_raw_vcf_prefix) self.hardfilter_variants( reference, merged_raw_vcf, merged_vcf_prefix, SNP_QD=SNP_QD, SNP_FS=SNP_FS, SNP_MQ=SNP_MQ, SNP_MappingQualityRankSum=SNP_MappingQualityRankSum, SNP_ReadPosRankSum=SNP_ReadPosRankSum, indel_QD=indel_QD, indel_ReadPosRankSum=indel_ReadPosRankSum, indel_FS=indel_FS, SNP_filter_name=SNP_filter_name, indel_filter_name=indel_filter_name, threads=threads) """ SelectVariants.select_variants(reference, merged_raw_vcf, merged_raw_snp_vcf, vartype="SNP", varfilter=None) SelectVariants.select_variants(reference, merged_raw_vcf, merged_raw_indel_vcf, vartype="INDEL", varfilter=None) VariantFiltration.filter_bad_SNP(reference, merged_raw_snp_vcf, merged_with_filters_snp_vcf, filter_name=SNP_filter_name, QD=SNP_QD, FS=SNP_FS, MQ=SNP_MQ, MappingQualityRankSum=SNP_MappingQualityRankSum, ReadPosRankSum=SNP_ReadPosRankSum) VariantFiltration.filter_bad_indel(reference, merged_raw_indel_vcf, merged_with_filters_indel_vcf, filter_name=indel_filter_name, QD=indel_QD, ReadPosRankSum=indel_ReadPosRankSum, FS=indel_FS) SelectVariants.remove_entries_with_filters(reference, merged_with_filters_snp_vcf, merged_filtered_snp_vcf) SelectVariants.remove_entries_with_filters(reference, merged_with_filters_indel_vcf, merged_filtered_indel_vcf) CombineVariants.combine_from_same_source(reference, [merged_filtered_snp_vcf, merged_filtered_indel_vcf], merged_filtered_combined_vcf) """ known_sites = merged_filtered_combined_vcf