Example #1
0
    def parallel_clipoverlap(self,
                             input_dir,
                             output_dir,
                             samples_list,
                             bam_suffix="",
                             poolsize=None,
                             samtools_dir=""):
        from RouToolPa.Tools.Samtools import SamtoolsV1
        SamtoolsV1.path = samtools_dir
        samples_to_handle = samples_list if samples_list else self.get_sample_list(
            input_dir)

        self.safe_mkdir(output_dir)

        options_list = []
        samtools_option_list = []

        for sample in samples_to_handle:
            sample_dir = "%s/%s/" % (output_dir, sample)
            self.safe_mkdir(sample_dir)
            input_bam = "%s/%s%s.bam" % (sample_dir, sample, bam_suffix)
            output_bam = "%s/%s/%s.clipped.bam" % (output_dir, sample, sample)

            options_list.append(
                self.parse_options(input_bam, output_bam, poolsize=poolsize))
            samtools_option_list.append(output_bam)

        self.parallel_execute(options_list=options_list)
        SamtoolsV1.parallel_execute(options_list=samtools_option_list,
                                    cmd="samtools index")
Example #2
0
    def mkdup(self, input_bam, output_prefix):
        output_bam = "%s.bam" % output_prefix
        stat_file = "%s.stat" % output_prefix

        options = self.parse_options(input_bam, output_bam, stat_file)

        self.execute(options=options)
        SamtoolsV1.index(output_bam)
Example #3
0
                    default=1000,
                    help="Maximum value to show. Default: 1000")
parser.add_argument("-g",
                    "--logbase",
                    action="store",
                    dest="logbase",
                    type=int,
                    default=10,
                    help="Logbase to use for log-scaled histograms")
parser.add_argument(
    "-e",
    "--extensions",
    action="store",
    dest="extensions",
    type=lambda x: x.split(","),
    default=["png"],
    help=
    "Comma-separated list of extensions for histogram files. Default: png only"
)

args = parser.parse_args()

SamtoolsV1.draw_insert_size_distribution(args.input,
                                         args.output_prefix,
                                         width_of_bin=args.width_of_bins,
                                         max_insert_size=args.max_insert_size,
                                         min_insert_size=args.min_insert_size,
                                         extensions=args.extensions,
                                         separator=args.separator,
                                         logbase=args.logbase)
Example #4
0
    def clipoverlap(self, input, output, poolsize=None):
        from RouToolPa.Tools.Samtools import SamtoolsV1
        options = self.parse_options(input, output, poolsize=poolsize)

        self.execute(options=options, cmd="bam clipOverlap")
        SamtoolsV1.index(output)
Example #5
0
    def get_insert_size_distribution(self,
                                     sample_directory,
                                     forward_files,
                                     reverse_files,
                                     estimated_insert_size,
                                     output_prefix,
                                     genome,
                                     genome_index,
                                     input_files_are_fasta=False,
                                     read_orientation="fr",
                                     parsing_mode="index_db",
                                     number_of_bins=100,
                                     genome_format="fasta",
                                     store_sam=False,
                                     aligner="bowtie2",
                                     aligner_binary_dir="",
                                     xlimit_for_histo=None):

        sample_dir = os.path.abspath(sample_directory)
        output_pref = "%s/%s" % (sample_dir, output_prefix)
        min_contig_len_threshold = 3 * estimated_insert_size
        region_bed_file = "%s/%s.contig.bed" % (sample_dir, output_prefix)
        self.make_region_bed_file_from_file(genome,
                                            region_bed_file,
                                            min_len=min_contig_len_threshold,
                                            parsing_mode=parsing_mode,
                                            input_format=genome_format)

        output_filtered_len_file = "%s.filtered.len" % output_pref
        output_all_len_file = "%s.all.len" % output_pref
        output_sam = "%s.sam" % output_pref
        output_bam = "%s.bam" % output_pref
        aligner_log = "%s.aligner.log"
        forward_reads = forward_files if isinstance(
            forward_files, str) else ",".join(forward_files)
        reverse_reads = reverse_files if isinstance(
            reverse_files, str) else ",".join(reverse_files)

        bowtie_options = " --very-sensitive"
        bowtie_options += " -x %s" % genome_index
        bowtie_options += " -1 %s" % forward_reads

        bowtie_options += " -2 %s" % reverse_reads
        bowtie_options += " -p %i" % self.threads
        bowtie_options += " -X %i" % min_contig_len_threshold
        bowtie_options += " --%s" % read_orientation
        bowtie_options += " -f" if input_files_are_fasta else ""

        bowtie2_string = "bowtie2 %s 2>%s" % (bowtie_options, aligner_log)

        bwa_options = " mem"
        bwa_options += " -t %i" % self.threads
        bwa_options += " %s" % genome_index
        bwa_options += " %s %s" % (forward_reads, reverse_reads)

        bwa_string = "bwa %s" % bwa_options

        tee_string = "tee %s" % output_sam
        samtools_string = "samtools view -L %s -" % region_bed_file
        awk_string = "awk -F'\\t' '{ if ($9 > 0) print $9}'"

        if aligner == "bowtie2":
            aligner_string = bowtie2_string
        elif aligner == "bwa":
            aligner_string = bwa_string
        else:
            raise ValueError("Unrecognized aligner: %s" % aligner)

        aligner_string = "%s%s" % (self.check_path(aligner_binary_dir),
                                   aligner_string)

        final_string = "%s | %s | %s | %s > %s" % (aligner_string, tee_string,
                                                   samtools_string, awk_string,
                                                   output_filtered_len_file)

        full_len_string = "%s %s > %s" % (awk_string, output_sam,
                                          output_all_len_file)

        self.execute(cmd=final_string)
        self.execute(cmd=full_len_string)

        self.draw_histogram_from_file(
            output_filtered_len_file,
            output_pref,
            max_length=xlimit_for_histo
            if xlimit_for_histo else min_contig_len_threshold,
            number_of_bins=number_of_bins,
            xlabel="Insert size",
            ylabel="Number of fragments",
            title="Insert size distribution",
            extensions=("png", "svg"))

        SamtoolsV1.convert_sam_and_index(output_sam, output_bam)

        if not store_sam:
            os.remove(output_sam)
Example #6
0


parser = argparse.ArgumentParser()

parser.add_argument("-i", "--input", action="store", dest="input",
                    help="Input sam file. Default: stdin")
parser.add_argument("-o", "--output", action="store", dest="output",
                    help="Output file with reads. Default: stdout")
parser.add_argument("-r", "--read_name_file", action="store", dest="read_name_file", required=True,
                    help="File with full read names or their fragments")
parser.add_argument("-m", "--mode", action="store", dest="mode", default="include",
                    help="Output mode. Allowed: include(default), remove")
parser.add_argument("-c", "--comparison_mode", action="store", dest="comparison_mode",
                    default="exact",
                    help="Read name comparison mode. Allowed: exact(default), partial")

args = parser.parse_args()

input_sam_fd = open(args.input, "r") if args.input else sys.stdin
output_sam_fd = open(args.output, "w") if args.output else sys.stdout

read_name_list = IdList(filename=args.read_name_file)
SamtoolsV1.get_reads_by_name(read_name_list, input_sam_fd, output_sam_fd,
                             mode=args.mode, search_mode=args.comparison_mode)

if args.input:
    input_sam_fd.close()
if args.output:
    output_sam_fd.close()
Example #7
0
#!/usr/bin/env python
__author__ = 'Sergei F. Kliver'

import argparse
from RouToolPa.Tools.Samtools import SamtoolsV1



parser = argparse.ArgumentParser()

parser.add_argument("-i", "--input", action="store", dest="input",
                    help="Input sam file")
parser.add_argument("-o", "--output", action="store", dest="output",
                    help="Output file with read names")

args = parser.parse_args()

SamtoolsV1.get_read_names(args.input, args.output)
Example #8
0
              sort_by_name=False,
              max_per_sorting_thread_memory="10G")

if args.add_read_groups_by_picard:
    sorted_alignment_picard_groups = "%s.picard_groups.%s" % (
        args.prefix, args.alignment_format)
    AddOrReplaceReadGroups.add_read_groups(sorted_alignment,
                                           sorted_alignment_picard_groups,
                                           RGID=args.prefix,
                                           RGLB=args.prefix,
                                           RGPL=args.prefix,
                                           RGSM=args.prefix,
                                           RGPU=args.prefix)

if args.alignment_format == "bam":
    SamtoolsV1.index(sorted_alignment_picard_groups
                     if sorted_alignment_picard_groups else sorted_alignment)

MarkDuplicates.run(
    sorted_alignment_picard_groups if sorted_alignment_picard_groups else
    sorted_alignment, final_alignment, duplicates_stat_file)

if args.alignment_format == "bam":
    SamtoolsV1.index(final_alignment)
"""
GenomeCov.get_coverage(final_alignment, genome_bed, coverage_file)
if not args.retain_temp:
    os.remove(sorted_alignment)
    if args.add_read_groups_by_picard:
        os.remove(sorted_alignment_picard_groups)

if args.calculate_median_coverage or args.calculate_mean_coverage:
Example #9
0
    def align(self,
              genome_dir,
              forward_read_list,
              reverse_read_list=None,
              annotation_gtf=None,
              sample=None,
              feature_from_gtf_to_use_as_exon=None,
              exon_tag_to_use_as_transcript_id=None,
              exon_tag_to_use_as_gene_id=None,
              length_of_sequences_flanking_junction=None,
              junction_tab_file_list=None,
              three_prime_trim=None,
              five_prime_trim=None,
              adapter_seq_for_three_prime_clip=None,
              max_mismatch_percent_for_adapter_trimming=None,
              three_prime_trim_after_adapter_clip=None,
              output_type="BAM",
              sort_bam=True,
              max_memory_per_thread_for_bam_sorting="4G",
              include_unmapped_reads_in_bam=True,
              output_unmapped_reads=True,
              output_dir="./",
              two_pass_mode=False,
              max_intron_length=None):
        if reverse_read_list:
            if len(forward_read_list) != len(reverse_read_list):
                raise ValueError("Wrong read file pairing")

        options = " --runThreadN %i" % self.threads
        options += " --genomeDir %s" % os.path.abspath(genome_dir)
        options += " --sjdbGTFfile %s" % annotation_gtf if annotation_gtf else ""
        options += " --sjdbGTFtagExonParentTranscript %s" % exon_tag_to_use_as_transcript_id if exon_tag_to_use_as_transcript_id else ""
        options += " --sjdbGTFtagExonParentGene %s" % exon_tag_to_use_as_gene_id if exon_tag_to_use_as_gene_id else ""
        options += " --sjdbGTFfeatureExon %s" % feature_from_gtf_to_use_as_exon if feature_from_gtf_to_use_as_exon else ""

        options += " --sjdbOverhang %i" % length_of_sequences_flanking_junction if length_of_sequences_flanking_junction else ""
        options += (" --sjdbFileChrStartEnd %s" %
                    (os.path.abspath(junction_tab_file_list) if isinstance(
                        junction_tab_file_list, str) else " ".join(
                            map(os.path.abspath, junction_tab_file_list)))
                    ) if junction_tab_file_list else ""

        #print(forward_read_list)

        forward_read_abs_path_list = [
            os.path.abspath(forward_read_list)
        ] if isinstance(forward_read_list, str) else list(
            map(os.path.abspath, forward_read_list))
        reverse_read_abs_path_list = (
            [os.path.abspath(reverse_read_list)] if isinstance(
                reverse_read_list, str) else list(
                    map(os.path.abspath,
                        reverse_read_list))) if reverse_read_list else None

        #print(forward_read_abs_path_list)
        forward_read_abs_path_list = self.add_external_extraction_to_filelist(
            forward_read_abs_path_list)
        reverse_read_abs_path_list = self.add_external_extraction_to_filelist(
            reverse_read_abs_path_list) if reverse_read_list else None

        #print(forward_read_abs_path_list)

        options += " --readFilesIn %s" % " ".join(forward_read_abs_path_list)

        options += (
            " %s" %
            " ".join(reverse_read_abs_path_list) if reverse_read_abs_path_list
            else "") if reverse_read_abs_path_list else ""

        options += " --clip3pNbases %i" % three_prime_trim if three_prime_trim else ""
        options += " --clip5pNbases %i" % five_prime_trim if five_prime_trim else ""
        options += " --clip3pAdapterSeq %s" % adapter_seq_for_three_prime_clip if adapter_seq_for_three_prime_clip else ""
        options += " --clip3pAdapterMMp %f" % max_mismatch_percent_for_adapter_trimming if max_mismatch_percent_for_adapter_trimming else ""
        options += " --clip3pAfterAdapterNbases %i" % three_prime_trim_after_adapter_clip if three_prime_trim_after_adapter_clip else ""

        options += " --outSAMtype %s %s" % (
            output_type, "Unsorted"
        )  # "SortedByCoordinate" if sort_bam else "Unsorted")
        #options += " --limitBAMsortRAM %i" % max_memory_for_bam_sorting if max_memory_for_bam_sorting else ""
        options += " --outSAMunmapped Within" if include_unmapped_reads_in_bam else ""
        options += " --outReadsUnmapped Fastx" if output_unmapped_reads else ""
        options += " --outFileNamePrefix %s" % output_dir if output_dir else ""
        options += " --twopassMode Basic" if two_pass_mode else ""
        options += " --alignIntronMax %i" % max_intron_length if max_intron_length else ""

        self.execute(options)

        if sort_bam:
            print("\tSorting...")
            unsorted_bam = "%s/Aligned.out.bam" % output_dir
            sorted_bam = "%s/%s.bam" % (output_dir,
                                        ("%s.sorted" % sample if sample else
                                         "Aligned.sortedByCoord.out"))
            SamtoolsV1.threads = self.threads
            SamtoolsV1.sort(
                unsorted_bam,
                sorted_bam,
                max_memory_per_thread=max_memory_per_thread_for_bam_sorting)

            print("\tIndexing bam file...")
            SamtoolsV1.index(sorted_bam)
Example #10
0
    def align(self,
              sample_dir,
              reference_index,
              aligner="bwa",
              sample_list=None,
              outdir="./",
              quality_score_type="phred33",
              read_suffix="",
              read_extension="fastq",
              alignment_format="bam",
              threads=None,
              mark_duplicates=True,
              platform="Illumina",
              add_read_groups_by_picard=False,
              gzipped_reads=False):

        self.init_tools(threads=threads)

        samples = self.get_sample_list(sample_dir, sample_list=sample_list)

        self.prepare_dirs(samples, outdir=outdir)

        if aligner == "bowtie2":
            aligner_tool = Bowtie2
        elif aligner == "bwa":
            aligner_tool = BWA
        else:
            raise ValueError("")

        for sample in samples:
            read_prefix = "%s/%s/%s%s" % (sample_dir, sample, sample,
                                          read_suffix)
            forward_reads = "%s_1.%s%s" % (read_prefix, read_extension,
                                           ".gz" if gzipped_reads else "")
            reverse_reads = "%s_2.%s%s" % (read_prefix, read_extension,
                                           ".gz" if gzipped_reads else "")

            output_prefix = "%s/%s/%s" % (outdir, sample, sample)

            raw_alignment = "%s.%s" % (output_prefix, alignment_format)
            final_alignment = "%s.mkdup.%s" % (output_prefix, alignment_format)

            duplicates_stat_file = "%s.duplicates.stat" % output_prefix
            coverage_file = "%s.coverage.bed" % output_prefix

            sorted_alignment_picard_groups = None

            aligner_tool.align(
                reference_index,
                forward_reads_list=forward_reads,
                reverse_reads_list=reverse_reads,
                unpaired_reads_list=None,
                quality_score=quality_score_type,
                output_prefix=output_prefix,
                output_format=alignment_format,
                read_group_name=sample,
                PU="x",
                SM=sample,
                platform=platform,
                LB="x",
                sort_by_coordinate=True,
                sort_by_name=False,
                max_per_sorting_thread_memory=str(
                    max(int(self.max_memory / self.threads), 1)) + "G")

            if add_read_groups_by_picard:
                sorted_alignment_picard_groups = "%s.picard_groups.%s" % (
                    output_prefix, alignment_format)
                AddOrReplaceReadGroups.add_read_groups(
                    raw_alignment,
                    sorted_alignment_picard_groups,
                    RGID=sample,
                    RGLB=sample,
                    RGPL=platform,
                    RGSM=sample,
                    RGPU=sample)

            if alignment_format == "bam":
                SamtoolsV1.index(
                    sorted_alignment_picard_groups
                    if sorted_alignment_picard_groups else raw_alignment)

            if mark_duplicates:
                MarkDuplicates.run(
                    sorted_alignment_picard_groups
                    if sorted_alignment_picard_groups else raw_alignment,
                    final_alignment, duplicates_stat_file)

                if alignment_format == "bam":
                    SamtoolsV1.index(final_alignment)
Example #11
0
SamtoolsV1.threads = args.threads

if args.prepare_bam or args.mix_ends:
    FileRoutines.safe_mkdir(FileRoutines.check_path(args.temp_dir))
    prepared_pe_bam_file = "%s.bam" % args.prepared_bam_prefix
    prepared_unpaired_bam_file = (
        "%s.unpaired.bam" %
        args.prepared_bam_prefix) if args.mix_ends else None
    """
    SamtoolsV1.prepare_bam_for_read_extraction(args.input, args.prepared_bam, temp_file_prefix=args.temp_dir,
                                               max_memory_per_thread=args.max_memory_per_thread)
    """
    SamtoolsV1.prepare_bam_for_read_extraction(
        args.input,
        prepared_pe_bam_file,
        temp_file_prefix=args.temp_dir,
        max_memory_per_thread=args.max_memory_per_thread,
        bam_file_to_write_unpaired_reads=prepared_unpaired_bam_file)
if args.paired:
    left_fastq = "%s_1.fastq" % args.out_prefix
    right_fastq = "%s_2.fastq" % args.out_prefix
    unpaired_fastq = "%s.unpaired.fastq" % args.out_prefix
else:
    left_fastq = "%s.fastq" % args.out_prefix
    right_fastq = None

if args.mix_ends:
    BamToFastq.convert(prepared_unpaired_bam_file,
                       unpaired_fastq,
                       out_right_fastq=None)
Example #12
0
sample_list = args.samples if args.samples else Pipeline.get_sample_list(
    args.samples_dir)

FileRoutines.safe_mkdir(args.output_dir)

for sample in sample_list:
    print("Handling %s" % sample)
    sample_dir = "%s/%s/" % (args.samples_dir, sample)
    alignment_sample_dir = "%s/%s/" % (args.output_dir, sample)
    FileRoutines.safe_mkdir(alignment_sample_dir)
    filetypes, forward_files, reverse_files, se_files = FileRoutines.make_lists_forward_and_reverse_files(
        sample_dir)

    print("\tAligning reads...")

    STAR.align_miRNA(
        args.genome_dir,
        se_files,
        output_dir=alignment_sample_dir,
        annotation_gtf=args.annotation_gtf if not args.genome_fasta else None,
        max_memory_for_bam_sorting=args.max_memory_for_bam_sorting,
        max_alignments_per_read=args.max_number_of_alignments_per_read,
        no_soft_clip=args.enable_soft_clipping,
        max_number_of_mismatches=args.max_number_of_mismatches,
        max_relative_number_of_mismatches=args.
        max_relative_number_of_mismatches)

    print("\tIndexing bam file...")
    resulting_bam_file = "%s/Aligned.sortedByCoord.out.bam" % alignment_sample_dir
    SamtoolsV1.index(resulting_bam_file)
Example #13
0
    def call_variants(self,
                      sample_dir,
                      reference,
                      merged_prefix,
                      sample_list=None,
                      outdir="./",
                      suffix=None,
                      input="alignment",
                      input_filetype="bam",
                      threads=None,
                      mark_duplicates=False,
                      known_variants_vcf=None,
                      genotyping_mode="DISCOVERY",
                      output_mode="EMIT_VARIANTS_ONLY",
                      stand_call_conf=30,
                      skip_base_score_recalibration=False,
                      iteration_number=3,
                      SNP_QD=2.0,
                      SNP_FS=30.0,
                      SNP_MQ=40.0,
                      SNP_MappingQualityRankSum=-12.5,
                      SNP_ReadPosRankSum=-8.0,
                      indel_QD=2.0,
                      indel_ReadPosRankSum=-20.0,
                      indel_FS=200.0,
                      SNP_filter_name="ambiguous_snp",
                      indel_filter_name="ambiguous_indel",
                      analyze_covariates=True,
                      include_region_id_file=None,
                      exclude_region_id_file=None):

        SamtoolsV1.check_for_fasta_index(reference)

        CreateSequenceDictionary.jar_path = self.Picard_dir
        CreateSequenceDictionary.check_for_fasta_dict(reference)

        for tool in VariantFiltration, \
                    MarkDuplicates, \
                    RealignerTargetCreator, \
                    IndelRealigner, \
                    BaseRecalibrator, \
                    PrintReads, \
                    HaplotypeCaller, \
                    SelectVariants, \
                    GenotypeGVCFs,\
                    CombineVariants:

            tool.threads = threads if threads else self.threads
            tool.max_memory = "%ig" % self.max_memory
            tool.jar_path = self.GATK_dir

        samples = self.get_sample_list(sample_dir, sample_list=sample_list)

        self.prepare_dirs(samples,
                          outdir=outdir,
                          include_alignment_dir=input == "reads")

        if input == "reads":
            pass
        elif input == "alignment":
            alignment_filename_prefix_template = "%%s%s" % suffix

        known_sites = known_variants_vcf

        iterations = 1 if skip_base_score_recalibration else iteration_number  # do only one(zero) iteration if skip base recalibration

        for sample in samples:
            if mark_duplicates:
                """
                java -Xmx100g -jar ~/tools/picard-tools-2.5.0/picard.jar MarkDuplicates \
                     I=${bam} \
                     O=${bam%bam}rmdup.bam \
                      M=${bam}.mark_dup_metrics.txt

                java -jar ~/tools/picard-tools-2.5.0/picard.jar BuildBamIndex \
                        INPUT=${bam%bam}rmdup.bam

                """

                pass
                # sample_alignment_prefix =
            else:
                sample_alignment_prefix = "%s/%s/%s" % (
                    sample_dir, sample,
                    alignment_filename_prefix_template % sample)

            sample_alignment = "%s.%s" % (sample_alignment_prefix,
                                          input_filetype)
            sample_intervals_for_realignment = "%s.forIndelRealigner.intervals" % sample_alignment_prefix
            sample_realigned_bam = "%s.realigned.bam" % sample_alignment_prefix
            """
            RealignerTargetCreator.create(reference, sample_alignment,
                                          output=sample_intervals_for_realignment,
                                          known_indels_vcf=None,
                                          max_interval_size=None,
                                          min_reads_cov=None,
                                          mismatch_fraction=None,
                                          window_size=None,
                                          default_base_qualities=None)

            IndelRealigner.realign(reference,
                                   sample_alignment,
                                   sample_realigned_bam,
                                   target_intervals=sample_intervals_for_realignment,
                                   known_indels_vcf=None, model=None, lod_threshold=None,
                                   entropy_threshold=None, max_cons=None,
                                   max_size_for_movement=None, max_pos_move=None, max_reads_for_cons=None,
                                   max_reads_for_realignment=None, max_reads_in_memory=None, no_original_tags=False,
                                   nway_out=False, default_base_qualities=None)
            """
        for iteration_index in range(0, iterations):
            gvcf_list = []

            sample_recall_table = "%s.recall_data.iteration%i.grp" % (
                sample_alignment_prefix, iteration_index)
            sample_postrecall_table = "%s.postrecall_data.iteration%i.grp" % (
                sample_alignment_prefix, iteration_index)
            sample_recall_plots = "%s.recall.iteration%i.pdf" % (
                sample_alignment_prefix, iteration_index)
            sample_recall_csv = "%s.recall.iteration%i.csv" % (
                sample_alignment_prefix, iteration_index)

            sample_recalled_reads_bam = "%s.recal_reads.iteration%i.bam" % (
                sample_alignment_prefix, iteration_index)

            merged_vcf_prefix = "%s/SNPcall/%s.iteration%i" % (
                outdir, merged_prefix, iteration_index)
            merged_raw_vcf_prefix = "%s.raw" % merged_vcf_prefix
            merged_raw_vcf = "%s.vcf" % merged_raw_vcf_prefix

            merged_raw_snp_vcf = "%s.raw.snp.vcf" % merged_vcf_prefix
            merged_with_filters_snp_vcf = "%s.with_filters.snp.vcf" % merged_vcf_prefix
            merged_filtered_snp_vcf = "%s.filtered.snp.vcf" % merged_vcf_prefix

            merged_raw_indel_vcf = "%s.raw.indel.vcf" % merged_vcf_prefix
            merged_with_filters_indel_vcf = "%s.with_filters.indel.vcf" % merged_vcf_prefix
            merged_filtered_indel_vcf = "%s.filtered.indel.vcf" % merged_vcf_prefix

            merged_filtered_combined_vcf = "%s.filtered.combined.vcf" % merged_vcf_prefix

            for sample in samples:
                vcf_prefix = "%s/SNPcall/%s/%s.iteration%i" % (
                    outdir, sample, sample, iteration_index)
                gvcf = "%s.g.vcf" % vcf_prefix
                #raw_snp_vcf = "%s.raw.snp.gvcf" % vcf_prefix
                #raw_indel_vcf = "%s.raw.indel.gvcf" % vcf_prefix

                sample_alignment_prefix = "%s/%s/%s" % (
                    sample_dir, sample,
                    alignment_filename_prefix_template % sample)
                sample_realigned_bam = "%s.realigned.bam" % sample_alignment_prefix

                gvcf_list.append(gvcf)

                if ((not skip_base_score_recalibration)
                        and known_sites is not None) or (iteration_index > 0):

                    BaseRecalibrator.get_recalibration_table(
                        reference,
                        sample_realigned_bam,
                        sample_recall_table,
                        known_sites,
                        include_region_id_file=include_region_id_file,
                        exclude_region_id_file=exclude_region_id_file)

                    BaseRecalibrator.get_recalibration_table(
                        reference,
                        sample_realigned_bam,
                        sample_postrecall_table,
                        known_sites,
                        BQSR=sample_recall_table,
                        include_region_id_file=include_region_id_file,
                        exclude_region_id_file=exclude_region_id_file)
                    if analyze_covariates:
                        AnalyzeCovariates.plot_two_recall_table(
                            reference,
                            sample_recall_table,
                            sample_postrecall_table,
                            sample_recall_plots,
                            csv_out=sample_recall_csv)

                    PrintReads.get_recalled_reads(reference,
                                                  sample_realigned_bam,
                                                  sample_recall_table,
                                                  sample_recalled_reads_bam)

                    #HaplotypeCaller.call(reference, sample_realigned, raw_vcf, genotyping_mode=genotyping_mode,
                    #                     output_mode=output_mode, stand_emit_conf=stand_emit_conf, stand_call_conf=stand_call_conf)
                    """
                    HaplotypeCaller.gvcf_call(reference, sample_recalled_reads_bam, gvcf, genotyping_mode=genotyping_mode,
                                              output_mode=output_mode, stand_call_conf=stand_call_conf,
                                              include_region_id_file=include_region_id_file,
                                              exclude_region_id_file=exclude_region_id_file)
                    """
                else:

                    HaplotypeCaller.gvcf_call(
                        reference,
                        sample_realigned_bam,
                        gvcf,
                        genotyping_mode=genotyping_mode,
                        output_mode=output_mode,
                        stand_call_conf=stand_call_conf,
                        include_region_id_file=include_region_id_file,
                        exclude_region_id_file=exclude_region_id_file)

            GenotypeGVCFs.genotype(reference, gvcf_list, merged_raw_vcf_prefix)

            self.hardfilter_variants(
                reference,
                merged_raw_vcf,
                merged_vcf_prefix,
                SNP_QD=SNP_QD,
                SNP_FS=SNP_FS,
                SNP_MQ=SNP_MQ,
                SNP_MappingQualityRankSum=SNP_MappingQualityRankSum,
                SNP_ReadPosRankSum=SNP_ReadPosRankSum,
                indel_QD=indel_QD,
                indel_ReadPosRankSum=indel_ReadPosRankSum,
                indel_FS=indel_FS,
                SNP_filter_name=SNP_filter_name,
                indel_filter_name=indel_filter_name,
                threads=threads)
            """
            SelectVariants.select_variants(reference, merged_raw_vcf, merged_raw_snp_vcf, vartype="SNP", varfilter=None)
            SelectVariants.select_variants(reference, merged_raw_vcf, merged_raw_indel_vcf, vartype="INDEL", varfilter=None)

            VariantFiltration.filter_bad_SNP(reference, merged_raw_snp_vcf, merged_with_filters_snp_vcf,
                                             filter_name=SNP_filter_name,
                                             QD=SNP_QD, FS=SNP_FS, MQ=SNP_MQ,
                                             MappingQualityRankSum=SNP_MappingQualityRankSum,
                                             ReadPosRankSum=SNP_ReadPosRankSum)
            VariantFiltration.filter_bad_indel(reference, merged_raw_indel_vcf, merged_with_filters_indel_vcf,
                                               filter_name=indel_filter_name, QD=indel_QD,
                                               ReadPosRankSum=indel_ReadPosRankSum, FS=indel_FS)

            SelectVariants.remove_entries_with_filters(reference, merged_with_filters_snp_vcf, merged_filtered_snp_vcf)
            SelectVariants.remove_entries_with_filters(reference, merged_with_filters_indel_vcf, merged_filtered_indel_vcf)

            CombineVariants.combine_from_same_source(reference,
                                                     [merged_filtered_snp_vcf, merged_filtered_indel_vcf],
                                                     merged_filtered_combined_vcf)
            """
            known_sites = merged_filtered_combined_vcf