コード例 #1
0
    def filter_call(sample):
        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        log.info('Filtering {0} by coverage'.format(sample))
        helpers.filter_vcf_by_coverage_cutoffs(
            vcf=(sample + in_file_extension),
            cutoff_table=mglobals.coverage_cutoffs)

        log.info('Filtering {0} according to SNP file: {1}'.format(
            sample, mglobals.current_snp_file))
        dgrp_intersect_command = [
            'nice',
            '-n',
            '5',
            'intersectBed',
            '-a',
            (sample + '_covfil.vcf'),  # the output of the helper
            # function above.
            '-b',
            mglobals.current_snp_file,
            '-wa'
        ]
        sample_dgrp_intersect = sample + out_file_extension
        with open(sample_dgrp_intersect, 'w') as out:
            helpers.sub_call(dgrp_intersect_command, stdout=out)
    def tophat_call(sample, ref_fasta):

        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        ref_fasta_base = ref_fasta.split('.')[0]

        mismatches = '2'
        number_of_samples = len(mglobals.samples_list)
        threads_per_sample = mglobals.cpu_count//number_of_samples

        threads = str(threads_per_sample)
        log.info('threads per sample ' + threads)

        log.info('tophat: aligning sample {} with ref fasta {}'.format(sample, ref_fasta))
        tophat_params = ['nice', '-n', '5',
                         'tophat',
                         '-p', threads,
                         '-G', mglobals.dros_gtf,
                         '--transcriptome-index=../transcriptome_data/known',
                         '-N', mismatches,
                         '--b2-L', '20',
                         '--b2-N', '1',
                         '--read-edit-dist', mismatches,
                         '-o', (sample + '_thout'),
                         '--no-novel-juncs',
                         ref_fasta_base,
                         join(mglobals.samples_path, (sample + '.fastq'))]
        helpers.sub_call(tophat_params)

        log.info('tophat: finished analyzing sample: {} with ref fasta: {}'.format(sample, ref_fasta))
コード例 #3
0
    def variant_calls_call(sample):
        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        log.info('Varscan: creating csv for: ' + sample)

        varscan_command = [
            'nice',
            '-n',
            '5',
            'java',
            '-jar',
            mglobals.varscan_path,
            'mpileup2snp',
            (sample + in_file_extension),
            '--min-coverage',
            '2',
            '--min-avg-qual',
            '20',
            '--strand-filter',
            '0',
            '--p-value',
            '1',
            '--min-var-freq',
            '1e-10',
            '--output-vcf',
            '1',
        ]

        output_file = sample + out_file_extension
        with open(output_file, 'w') as out:
            helpers.sub_call(varscan_command, stdout=out)
        log.info('varscan finished for: ' + sample)
    def filter_call(sample):
        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        log.info("Filtering {0} by coverage".format(sample))
        helpers.filter_vcf_by_coverage_cutoffs(vcf=(sample + in_file_extension), cutoff_table=mglobals.coverage_cutoffs)

        log.info("Filtering {0} according to SNP file: {1}".format(sample, mglobals.current_snp_file))
        dgrp_intersect_command = [
            "nice",
            "-n",
            "5",
            "intersectBed",
            "-a",
            (sample + "_covfil.vcf"),  # the output of the helper
            # function above.
            "-b",
            mglobals.current_snp_file,
            "-wa",
        ]
        sample_dgrp_intersect = sample + out_file_extension
        with open(sample_dgrp_intersect, "w") as out:
            helpers.sub_call(dgrp_intersect_command, stdout=out)
    def variant_calls_call(sample):
        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        log.info("Varscan: creating csv for: " + sample)

        varscan_command = [
            "nice",
            "-n",
            "5",
            "java",
            "-jar",
            mglobals.varscan_path,
            "mpileup2snp",
            (sample + in_file_extension),
            "--min-coverage",
            "2",
            "--min-avg-qual",
            "20",
            "--strand-filter",
            "0",
            "--p-value",
            "1",
            "--min-var-freq",
            "1e-10",
            "--output-vcf",
            "1",
        ]

        output_file = sample + out_file_extension
        with open(output_file, "w") as out:
            helpers.sub_call(varscan_command, stdout=out)
        log.info("varscan finished for: " + sample)
コード例 #6
0
    def tophat_call(sample, ref_fasta):

        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        ref_fasta_base = ref_fasta.split('.')[0]

        mismatches = '5'
        mate_inner = sample.split('_')[2]
        number_of_samples = len(mglobals.samples_list)
        threads_per_sample = mglobals.cpu_count // number_of_samples

        threads = str(threads_per_sample)
        log.info('threads per sample ' + threads)

        log.info('tophat: aligning sample {} with ref fasta {}'.format(
            sample, ref_fasta))
        tophat_params = [
            'nice', '-n', '5', 'tophat', '-p', threads, '-G',
            mglobals.dros_gtf,
            '--transcriptome-index=../transcriptome_data/known', '-N',
            mismatches, '--b2-L', '20',
            '--b2-N', '1', '--read-edit-dist', mismatches, '-o',
            (sample + '_thout'), '--no-novel-juncs', '--mate-inner-dist',
            mate_inner, ref_fasta_base,
            join(mglobals.samples_path, (sample + '_trim_R1.fastq')),
            join(mglobals.samples_path, (sample + '_trim_R2.fastq'))
        ]
        helpers.sub_call(tophat_params)

        log.info(
            'tophat: finished analyzing sample: {} with ref fasta: {}'.format(
                sample, ref_fasta))
コード例 #7
0
    def trim_call(sample):
        log.info('Trimming sample {}'.format(sample))

        trimmed_path = join(mglobals.trimmed_path, sample)

        p_trim_R1 = trimmed_path + '_trim_R1.fastq'
        u_trim_R1 = trimmed_path + '_u_trim_R1.fastq'
        p_trim_R2 = trimmed_path + '_trim_R2.fastq'
        u_trim_R2 = trimmed_path + '_u_trim_R2.fastq'

        if os.path.exists(p_trim_R1) and os.path.exists(p_trim_R2):
            log.info('Sample already trimmed, linking from trimmed_path')
            # Need the try block because will fail if link already exists
            try:
                os.symlink(p_trim_R1, os.path.basename(p_trim_R1))
                os.symlink(p_trim_R2, os.path.basename(p_trim_R2))
            except OSError:
                pass
        else:
            threads = str(mglobals.cpu_count //
                          10)  # This optimal for ~40 samples but would
            # probably crash with a higher number.

            log.info('Sample not already trimmed, trimming now')
            trim_params = ([
                'nice', '-n', '5', 'java', '-jar', mglobals.trimmomatic_path,
                'PE', '-threads', threads, '-phred33', sample + '_R1.fastq',
                sample + '_R2.fastq', p_trim_R1, u_trim_R1, p_trim_R2,
                u_trim_R2, 'SLIDINGWINDOW:4:20', 'TRAILING:20', 'MINLEN:50'
            ])
            helpers.sub_call(trim_params)
            log.info('Finished trimming sample {}, linking in.'.format(sample))
            os.symlink(p_trim_R1, os.path.basename(p_trim_R1))
            os.symlink(p_trim_R2, os.path.basename(p_trim_R2))
コード例 #8
0
    def annotate_call(sample):
        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        log.info('Annotating ' + sample + in_file_extension + ' with ' +
                 mglobals.current_genes_file)
        gtf_intersect_command = [
            'nice', '-n', '5', 'intersectBed', '-a',
            (sample + in_file_extension), '-b', mglobals.current_genes_file,
            '-wa', '-wb'
        ]
        sample_gtf_intersect = sample + out_file_extension
        with open(sample_gtf_intersect, 'w') as out:
            helpers.sub_call(gtf_intersect_command, stdout=out)
    def annotate_call(sample):
        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        log.info('Annotating ' + sample + in_file_extension + ' with ' + mglobals.current_genes_file)
        gtf_intersect_command = ['nice', '-n', '5',
                                 'intersectBed',
                                 '-a', (sample + in_file_extension),
                                 '-b', mglobals.current_genes_file,
                                 '-wa',
                                 '-wb'
                                 ]
        sample_gtf_intersect = sample + out_file_extension
        with open(sample_gtf_intersect, 'w') as out:
            helpers.sub_call(gtf_intersect_command, stdout=out)
 def build_fastas_call(sample):
     os.chdir(join(mglobals.original_path, sample))
     log.info("Beginning to build alternate fasta for: " + sample)
     fixed_vcf = sample + "_fix.vcf"
     log.info("Removing duplicated annotations (per transcript annotations)")
     helpers.remove_dups(input_f=(sample + in_file_extension), output_f=(sample + ".temp"))
     log.info("Removing duplicate alleles and adding header")
     # The fact that the original vcf was named sample.vcf is hardcoded
     # here. Be careful.
     helpers.vcf_fix(template_f=(sample + ".vcf"), input_f=(sample + ".temp"), output_f=fixed_vcf)
     # Delete temporary file
     os.remove(sample + ".temp")
     log.info("Creating alternate fasta")
     new_fasta = sample + "_unfixed.fa"
     helpers.sub_call(
         [
             "nice",
             "-n",
             "5",
             "java",
             "-Xmx2g",
             "-jar",
             mglobals.gatk_path,
             "-R",
             "genome.fa",
             "-T",
             "FastaAlternateReferenceMaker",
             "-o",
             new_fasta,
             "--variant",
             fixed_vcf,
         ]
     )
     # Fix the fasta
     log.info("Fixing gatk fasta")
     # If you change this name, you need to change the alternate fastas list as well.
     final_fasta = sample + ".fa"
     helpers.fasta_fix(input_f=new_fasta, output_f=final_fasta)
     # Delete the unfixed version
     os.remove(new_fasta)
     log.info("Moving new fasta to: " + join(mglobals.alternate_path, sample))
     shutil.move(final_fasta, join(mglobals.alternate_path, sample))
     log.info("Indexing new fasta")
     os.chdir(join(mglobals.alternate_path, sample))
     helpers.sub_call(["bowtie2-build", "-f", final_fasta, sample])
    def pileup_call(sample, ref_fasta):
        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        log.info('mpileup: creating .mpileup file for {} with ref fasta: {}'.format(sample, ref_fasta))
        pileup_command = ['nice', '-n', '5',
                          'samtools', 'mpileup',
                          '-B',
                          '-d10000000',
                          '-f', ref_fasta,
                          join((sample + '_thout'), 'filter.bam')]

        output_file = sample + out_file_extension
        with open(output_file, 'w') as output_file:
            helpers.sub_call(pileup_command, stdout=output_file)
        log.info('mpileup: finished for {} with ref fasta: {}'.format(sample, ref_fasta))
コード例 #12
0
    def pileup_call(sample, ref_fasta):
        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        log.info('mpileup: creating .mpileup file for {} with ref fasta: {}'.format(sample, ref_fasta))
        pileup_command = ['nice', '-n', '5',
                          'samtools', 'mpileup',
                          '-B',
                          '-d10000000',
                          '-f', ref_fasta,
                          join((sample + '_thout'), 'filter.bam')]

        output_file = sample + out_file_extension
        with open(output_file, 'w') as output_file:
            helpers.sub_call(pileup_command, stdout=output_file)
        log.info('mpileup: finished for {} with ref fasta: {}'.format(sample, ref_fasta))
    def tophat_call(sample, ref_fasta):

        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        ref_fasta_base = ref_fasta.split(".")[0]

        mismatches = "5"
        number_of_samples = len(mglobals.samples_list)
        threads_per_sample = mglobals.cpu_count // number_of_samples

        threads = str(threads_per_sample)
        log.info("threads per sample " + threads)

        log.info("tophat: aligning sample {} with ref fasta {}".format(sample, ref_fasta))
        tophat_params = [
            "nice",
            "-n",
            "5",
            "tophat",
            "-p",
            threads,
            "-G",
            mglobals.dros_gtf,
            "--transcriptome-index=../transcriptome_data/known",
            "-N",
            mismatches,
            "--b2-L",
            "20",
            "--b2-N",
            "1",
            "--read-edit-dist",
            mismatches,
            "-o",
            (sample + "_thout"),
            "--no-novel-juncs",
            ref_fasta_base,
            join(mglobals.samples_path, (sample + ".fastq")),
        ]
        helpers.sub_call(tophat_params)

        log.info("tophat: finished analyzing sample: {} with ref fasta: {}".format(sample, ref_fasta))
    def trim_call(sample):
        log.info('Trimming sample {}'.format(sample))

        trimmed_path = join(mglobals.trimmed_path, sample)

        p_trim_R1 = trimmed_path + '_trim_R1.fastq'
        u_trim_R1 = trimmed_path + '_u_trim_R1.fastq'
        p_trim_R2 = trimmed_path + '_trim_R2.fastq'
        u_trim_R2 = trimmed_path + '_u_trim_R2.fastq'

        if os.path.exists(p_trim_R1) and os.path.exists(p_trim_R2):
            log.info('Sample already trimmed, linking from trimmed_path')
            # Need the try block because will fail if link already exists
            try:
                os.symlink(p_trim_R1, os.path.basename(p_trim_R1))
                os.symlink(p_trim_R2, os.path.basename(p_trim_R2))
            except OSError:
                pass
        else:
            threads = str(mglobals.cpu_count//10)  # This optimal for ~40 samples but would
                                                   # probably crash with a higher number.

            log.info('Sample not already trimmed, trimming now')
            trim_params = (['nice', '-n', '5',
                           'java', '-jar', mglobals.trimmomatic_path,
                           'PE',
                           '-threads', threads,
                           '-phred33',
                           sample + '_R1.fastq',
                           sample + '_R2.fastq',
                           p_trim_R1,
                           u_trim_R1,
                           p_trim_R2,
                           u_trim_R2,
                           'SLIDINGWINDOW:4:20',
                           'TRAILING:20',
                           'MINLEN:50'])
            helpers.sub_call(trim_params)
            log.info('Finished trimming sample {}, linking in.'.format(sample))
            os.symlink(p_trim_R1, os.path.basename(p_trim_R1))
            os.symlink(p_trim_R2, os.path.basename(p_trim_R2))
    def filter_call(sample):
        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        log.info('Filtering {0} by coverage'.format(sample))
        helpers.filter_vcf_by_coverage_cutoffs(vcf=(sample + in_file_extension),
                                               cutoff_table=mglobals.coverage_cutoffs)

        log.info('Filtering {0} according to SNP file: {1}'.format(sample, mglobals.current_snp_file))
        dgrp_intersect_command = ['nice', '-n', '5',
                                  'intersectBed',
                                  '-a', (sample + '_covfil.vcf'),  # the output of the helper
                                                                   # function above.
                                  '-b', mglobals.current_snp_file,
                                  '-wa'
                                  ]
        sample_dgrp_intersect = sample + out_file_extension
        with open(sample_dgrp_intersect, 'w') as out:
            helpers.sub_call(dgrp_intersect_command, stdout=out)
    def annotate_call(sample):
        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        log.info("Annotating " + sample + in_file_extension + " with " + mglobals.current_genes_file)
        gtf_intersect_command = [
            "nice",
            "-n",
            "5",
            "intersectBed",
            "-a",
            (sample + in_file_extension),
            "-b",
            mglobals.current_genes_file,
            "-wa",
            "-wb",
        ]
        sample_gtf_intersect = sample + out_file_extension
        with open(sample_gtf_intersect, "w") as out:
            helpers.sub_call(gtf_intersect_command, stdout=out)
コード例 #17
0
 def build_fastas_call(sample):
     os.chdir(join(mglobals.original_path, sample))
     log.info('Beginning to build alternate fasta for: ' + sample)
     fixed_vcf = sample + '_fix.vcf'
     log.info('Removing duplicated annotations (per transcript annotations)')
     helpers.remove_dups(input_f=(sample + in_file_extension),
                         output_f=(sample + '.temp'))
     log.info('Removing duplicate alleles and adding header')
     # The fact that the original vcf was named sample.vcf is hardcoded
     # here. Be careful.
     helpers.vcf_fix(template_f=(sample + '.vcf'),
                     input_f=(sample + '.temp'),
                     output_f=fixed_vcf)
     # Delete temporary file
     os.remove(sample + '.temp')
     log.info('Creating alternate fasta')
     new_fasta = sample + '_unfixed.fa'
     helpers.sub_call(['nice', '-n', '5',
                       'java', '-Xmx2g', '-jar',
                       mglobals.gatk_path,
                       '-R', 'genome.fa',
                       '-T', 'FastaAlternateReferenceMaker',
                       '-o', new_fasta,
                       '--variant', fixed_vcf])
     # Fix the fasta
     log.info('Fixing gatk fasta')
     # If you change this name, you need to change the alternate fastas list as well.
     final_fasta = sample + '.fa'
     helpers.fasta_fix(input_f=new_fasta, output_f=final_fasta)
     # Delete the unfixed version
     os.remove(new_fasta)
     log.info('Moving new fasta to: ' + join(mglobals.alternate_path, sample))
     shutil.move(final_fasta, join(mglobals.alternate_path, sample))
     log.info('Indexing new fasta')
     os.chdir(join(mglobals.alternate_path, sample))
     helpers.sub_call(['bowtie2-build',
                       '-f', final_fasta,
                       sample])
 def build_fastas_call(sample):
     os.chdir(join(mglobals.original_path, sample))
     log.info('Beginning to build alternate fasta for: ' + sample)
     fixed_vcf = sample + '_fix.vcf'
     log.info('Removing duplicated annotations (per transcript annotations)')
     helpers.remove_dups(input_f=(sample + in_file_extension),
                         output_f=(sample + '.temp'))
     log.info('Removing duplicate alleles and adding header')
     # The fact that the original vcf was named sample.vcf is hardcoded
     # here. Be careful.
     helpers.vcf_fix(template_f=(sample + '.vcf'),
                     input_f=(sample + '.temp'),
                     output_f=fixed_vcf)
     # Delete temporary file
     os.remove(sample + '.temp')
     log.info('Creating alternate fasta')
     new_fasta = sample + '_unfixed.fa'
     helpers.sub_call(['nice', '-n', '5',
                       'java', '-Xmx2g', '-jar',
                       mglobals.gatk_path,
                       '-R', 'genome.fa',
                       '-T', 'FastaAlternateReferenceMaker',
                       '-o', new_fasta,
                       '--variant', fixed_vcf])
     # Fix the fasta
     log.info('Fixing gatk fasta')
     # If you change this name, you need to change the alternate fastas list as well.
     final_fasta = sample + '.fa'
     helpers.fasta_fix(input_f=new_fasta, output_f=final_fasta)
     # Delete the unfixed version
     os.remove(new_fasta)
     log.info('Moving new fasta to: ' + join(mglobals.alternate_path, sample))
     shutil.move(final_fasta, join(mglobals.alternate_path, sample))
     log.info('Indexing new fasta')
     os.chdir(join(mglobals.alternate_path, sample))
     helpers.sub_call(['bowtie2-build',
                       '-f', final_fasta,
                       sample])
    def variant_calls_call(sample):
        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        log.info('Varscan: creating csv for: ' + sample)

        varscan_command = ['nice', '-n', '5',
                           'java', '-jar', mglobals.varscan_path,
                           'mpileup2snp',
                           (sample + in_file_extension),
                           '--min-coverage', '2',
                           '--min-avg-qual', '20',
                           '--strand-filter', '0',
                           '--p-value', '1',
                           '--min-var-freq', '1e-10',
                           '--output-vcf', '1',
                           ]

        output_file = sample + out_file_extension
        with open(output_file, 'w') as out:
            helpers.sub_call(varscan_command, stdout=out)
        log.info('varscan finished for: ' + sample)
    def pileup_call(sample, ref_fasta):
        if mglobals.original:
            os.chdir(join(mglobals.original_path, sample))
        else:
            os.chdir(join(mglobals.alternate_path, sample))

        log.info("mpileup: creating .mpileup file for {} with ref fasta: {}".format(sample, ref_fasta))
        pileup_command = [
            "nice",
            "-n",
            "5",
            "samtools",
            "mpileup",
            "-B",
            "-d10000000",
            "-f",
            ref_fasta,
            join((sample + "_thout"), "filter.bam"),
        ]

        output_file = sample + out_file_extension
        with open(output_file, "w") as output_file:
            helpers.sub_call(pileup_command, stdout=output_file)
        log.info("mpileup: finished for {} with ref fasta: {}".format(sample, ref_fasta))