Exemple #1
0
def run_mutect_paired(
        normal_bam_file,
        tumour_bam_file,
        ref_genome_fasta_file,
        region,
        out_file,
        normal_name='normal',
        tumour_name='tumour'):

    cmd = [
        'gatk',
        'Mutect2',
        '-R', ref_genome_fasta_file,
        '-I', tumour_bam_file,
        '-tumor', tumour_name,
        '-I', normal_bam_file,
        '-normal', normal_name,
        '-L', region,
        '-O', out_file
    ]

    cli.execute(*cmd)

    idx_file = out_file + '.idx'

    if os.path.exists(idx_file):
        os.unlink(idx_file)
Exemple #2
0
def rename_chroms(chrom_map_file, in_file, out_file):
    if not os.path.exists(in_file + '.csi'):
        tmp_index = True

        cmd = ['bcftools', 'index', in_file]

        cli.execute(*cmd)

    else:
        tmp_index = False

    cmd = [
        'bcftools', 'annotate', '--rename-chrs', chrom_map_file, in_file, '|',
        'bcftools', 'view'
    ]

    out_file_type = _detect_file_type(out_file)

    if out_file_type == 'bcf':
        cmd.extend(['-O', 'b'])

    else:
        cmd.extend(['-O', 'z'])

    cmd.extend(['>', out_file])

    cli.execute(*cmd)

    if tmp_index:
        os.unlink(in_file + '.csi')
Exemple #3
0
def split_vcf(in_file, out_file_callback, tmp_dir, split_size=int(1e5)):
    if os.path.exists(tmp_dir):
        shutil.rmtree(tmp_dir)

    os.makedirs(tmp_dir)

    tmp_file = os.path.join(tmp_dir, os.path.basename(in_file))

    os.link(in_file, tmp_file)

    cmd = [
        'SnpSift',
        'split',
        '-l',
        split_size,
        tmp_file,
    ]

    cli.execute(*cmd)

    prefix = tmp_file.split('.')[0]

    os.unlink(tmp_file)

    tmp_split_files = sorted(glob.glob(prefix + '*.vcf'))

    for idx, tmp_out_file in enumerate(tmp_split_files):
        out_file = out_file_callback[idx]

        shutil.move(tmp_out_file, out_file)

    shutil.rmtree(tmp_dir)
Exemple #4
0
def markdups(in_files, out_file, tmp_dir, threads=1):
    """ Merge files and mark duplicate reads in a file.

        :param in_files: The path of a BAM file or dictionary with values being paths of BAM files. If a dictionary is
            passed the keys are ignored. The BAM files must be coordinate sorted.
        :param out_file: Path where merged and duplicate marked BAM file will be written.
        :param tmp_dir: Path where a directory will be created to store temporary files.
        :param threads: Number of threads to use.
    """
    if os.path.exists(tmp_dir):
        shutil.rmtree(tmp_dir)

    cmd = [
        'sambamba', 'markdup', '-t', threads, '--tmpdir', tmp_dir,
        '--sort-buffer-size', 2048, '--io-buffer-size', 640,
        '--overflow-list-size', 1000000, '--hash-table-size', 262144
    ]

    cmd.extend(flatten_input(in_files))

    cmd.append(out_file)

    cli.execute(*cmd)

    shutil.rmtree(tmp_dir)
Exemple #5
0
def build_coverage_file(normal_wig,
                        tumour_wig,
                        gc_wig,
                        mappability_wig,
                        out_file,
                        target_file=None):

    script = pkg_resources.resource_filename(
        'soil', 'wrappers/titan/scripts/build_titan_coverage.py')

    cmd = [
        'python',
        script,
        '-n',
        normal_wig,
        '-t',
        tumour_wig,
        '-g',
        gc_wig,
        '-m',
        mappability_wig,
        '-o',
        out_file,
    ]

    if target_file is not None:
        cmd.extend(['--target-bed-file', target_file])

    cli.execute(*cmd)
Exemple #6
0
def bam_readcount(config, bam_type, bam, result, tmp_file):
    execute(
        'bam-readcount',
        '-d',
        50000,
        '-w',
        0,
        '--min-mapping-quality',
        config['map_q'],
        '--min-base-quality',
        config['base_q'],
        '-f',
        config['reference_genome'],
        bam,
        result['chr'] + ":" + result['pos'] + "-" + result['pos'],
        '>',
        tmp_file,
    )

    with open(tmp_file, 'rb') as temp:
        for row in temp:
            read_counts = row.split()
            result[bam_type + '_coverage'] = float(read_counts[3])
            for nucleotide_count in read_counts[5:10]:
                nucleotide_count_split = nucleotide_count.split(":")
                result[bam_type + '_' + nucleotide_count_split[0]] = float(
                    nucleotide_count_split[1])

            result[bam_type +
                   '_vaf'] = result[bam_type + '_' +
                                    result['alt']] / result[bam_type +
                                                            '_coverage']
def logd(log_density_csv, trace_log_density_pdf, docker_image=None):
    execute('logd.R',
            '-f',
            log_density_csv,
            '-o',
            trace_log_density_pdf,
            docker_image=docker_image)
Exemple #8
0
def run_feature_counts(in_file, gene_gtf_file, out_file, tmp_dir):
    """ Run featureCounts command to get the number of reads covering genes.

    :param in_file: Path to WTS BAM file to count reads from.
    :param gene_gtf_file: Path to file with gene annotations in GTF format.
    :out_file: Path where output will be written in tsv format.
    :tmp_dir: Temporary director for featureCounts. Will be removed.
    """

    if os.path.exists(tmp_dir):
        shutil.rmtree(tmp_dir)

    os.makedirs(tmp_dir)

    tmp_out_file = os.path.join(tmp_dir, 'counts.tsv')

    cmd = [
        'featureCounts',
        '-a', gene_gtf_file,
        '-o', tmp_out_file,
        in_file
    ]

    cli.execute(*cmd)

    shutil.move(tmp_out_file, out_file)

    shutil.rmtree(tmp_dir)
Exemple #9
0
def run_transdecoder(in_file, out_cds_fasta_file, out_protein_fasta_file,
                     out_gff_file, tmp_dir):
    if os.path.exists(tmp_dir):
        shutil.rmtree(tmp_dir)

    os.makedirs(tmp_dir)

    tmp_in_file = os.path.join(tmp_dir, 'cdna.fasta')

    shutil.copyfile(in_file, tmp_in_file)

    old_wd = os.getcwd()

    os.chdir(tmp_dir)

    cmd = ['TransDecoder.LongOrfs', '-t', tmp_in_file]

    cli.execute(*cmd)

    cmd = ['TransDecoder.Predict', '-t', tmp_in_file]

    cli.execute(*cmd)

    os.chdir(old_wd)

    shutil.move(tmp_in_file + '.transdecoder.cds', out_cds_fasta_file)

    shutil.move(tmp_in_file + '.transdecoder.pep', out_protein_fasta_file)

    shutil.move(tmp_in_file + '.transdecoder.gff3', out_gff_file)

    shutil.rmtree(tmp_dir)
Exemple #10
0
def LoLoPicker_somatic(config, tumour_bam, normal_bam, region_bed, temp_dir,
                       somatic_file):
    os.makedirs(temp_dir)

    execute(
        'LoLoPicker_somatic.py',
        '--tumoralteredratio',
        config['T_vaf_threshold'],
        '--normalalteredratio',
        config['N_vaf_threshold'],
        '--basequality',
        config['base_q'],
        '--mappingquality',
        config['map_q'],
        '-t',
        tumour_bam,
        '-n',
        normal_bam,
        '-r',
        config['reference_genome'],
        '-b',
        region_bed,
        '-o',
        temp_dir,
    )

    copyfile(os.path.join(temp_dir, 'raw_somatic_varants.txt'), somatic_file)
Exemple #11
0
def annotate_outputs(config, temp_space, input_file, output_txt):
    os.makedirs(temp_space)

    execute(os.path.join(config['annovar'], 'convert2annovar.pl'), '-format',
            'vcf4', '-allsample', '-withfreq', input_file, '>',
            os.path.join(temp_space, 'anno_in'))

    execute(
        os.path.join(config['annovar'], 'table_annovar.pl'),
        os.path.join(temp_space, 'anno_in'),
        config['annovar_humandb'],
        '-buildver',
        'hg19',
        '-out',
        os.path.join(temp_space, 'ANNO'),
        '-remove',
        '-protocol',
        'refGene,cytoBand',
        '-operation',
        'g,r',
        '-nastring',
        '.',
        '-polish',
    )

    copyfile(os.path.join(temp_space, 'ANNO.hg19_multianno.txt'), output_txt)
def data_exploration(straighten_jitter, output, docker_image=None):
    execute('data_exploration.R',
            '-f',
            straighten_jitter,
            '-o',
            output,
            docker_image=docker_image)
Exemple #13
0
def convert_msgf_to_pin(in_decoy_files, in_target_files, out_file, tmp_space):
    def link_tmp_files(file_list, meta_file, prefix):
        tmp_files = []

        for split_id, file_name in file_list.items():
            tmp_file = os.path.join(tmp_space,
                                    '{0}_{1}.mzid'.format(prefix, split_id))

            os.link(file_name, tmp_file)

            tmp_files.append(tmp_file)

        with open(meta_file, 'w') as fh:
            fh.write('\n'.join(tmp_files))

    if os.path.exists(tmp_space):
        shutil.rmtree(tmp_space)

    os.makedirs(tmp_space)

    decoy_meta_file = os.path.join(tmp_space, 'decoy.txt')

    link_tmp_files(in_decoy_files, decoy_meta_file, 'decoy')

    target_meta_file = os.path.join(tmp_space, 'target.txt')

    link_tmp_files(in_target_files, target_meta_file, 'target')

    cmd = ['msgf2pin', target_meta_file, decoy_meta_file, '-o', out_file]

    cli.execute(*cmd)

    shutil.rmtree(tmp_space)
Exemple #14
0
def trim_fastq(fastq_1, fastq_2, temp_dir, output_fastq_1, output_fastq_2):
    os.makedirs(temp_dir)

    execute(
        'trim_galore',
        '--paired',
        '--output_dir',
        temp_dir,
        '--clip_R1',
        13,
        '--three_prime_clip_R1',
        13,
        '--clip_R2',
        13,
        '--three_prime_clip_R2',
        13,
        fastq_1,
        fastq_2
        )

    trimmed_fastq_1 = os.path.basename(fastq_1).replace(".fastq.gz", "_val_1.fq.gz")
    trimmed_fastq_2 = os.path.basename(fastq_2).replace(".fastq.gz", "_val_2.fq.gz")

    copyfile(os.path.join(temp_dir, trimmed_fastq_1), output_fastq_1)
    copyfile(os.path.join(temp_dir, trimmed_fastq_2), output_fastq_2)
Exemple #15
0
def index_bam(input_bam, output_bai):
    execute(
        'samtools',
        'index',
        input_bam,
        output_bai
        )
Exemple #16
0
def count_fasta_bases(ref_genome_fasta_file, out_file):
    share_dir = os.path.join(os.environ['CONDA_PREFIX'], 'share')

    exe = soil.utils.file_system.find('countFastaBases', share_dir)

    cmd = [exe, ref_genome_fasta_file, '>', out_file]

    cli.execute(*cmd)
Exemple #17
0
def configure_iedb_module(in_sentinel, out_sentinel):
    iedb_dir = os.path.dirname(in_sentinel)

    os.chdir(iedb_dir)

    cli.execute('tcsh', 'configure')

    open(out_sentinel, 'w').close()
Exemple #18
0
def sort_bam(input_bam, output_sorted_bam):
    execute(
        'samtools',
        'sort',
        input_bam,
        '-o',
        output_sorted_bam
        )
def error_rates_viz(fnr_csv, trace_fnr_pdf, box_fnr_pdf, docker_image=None):
    execute('error_rates_viz.R',
            '-f',
            fnr_csv,
            '-t',
            trace_fnr_pdf,
            '-b',
            box_fnr_pdf,
            docker_image=docker_image)
Exemple #20
0
def convert_gtf_to_gff_file(in_file, out_file):
    opt_dir = os.path.join(os.environ['CONDA_PREFIX'], 'opt')

    gtf_to_gff_script = soil.utils.file_system.find(
        'cufflinks_gtf_to_alignment_gff3.pl', opt_dir)

    cmd = ['perl', gtf_to_gff_script, in_file, '>', out_file]

    cli.execute(*cmd)
Exemple #21
0
def sam_to_bam(input_sam, output_bam):
    execute(
        'samtools',
        'view',
        '-bS',
        input_sam,
        '-o',
        output_bam
        )
Exemple #22
0
def vcf_annotate_outputs(config, temp_space, input_file, output_vcf):
    os.makedirs(temp_space)

    execute(os.path.join(config['annovar'], 'table_annovar.pl'), input_file,
            config['annovar_humandb'], '-buildver', 'hg19', '-out',
            os.path.join(temp_space,
                         'ANNO'), '-remove', '-protocol', 'refGene,cytoBand',
            '-operation', 'g,r', '-nastring', '.', '-polish', '-vcfinput')

    copyfile(os.path.join(temp_space, 'ANNO.hg19_multianno.vcf'), output_vcf)
Exemple #23
0
def fastq_to_sam(ref_genome, fastq_1, fastq_2, output_sam):
    execute(
        'bwa',
        'mem',
        ref_genome,
        fastq_1,
        fastq_2,
        '>',
        output_sam
        )
Exemple #24
0
def decompress(in_file, out_file):
    file_type = _guess_file_type(in_file)

    if file_type == 'gz':
        cmd = ['gzip', '-cd', in_file, '>', out_file]

    else:
        cmd = ['cp', in_file, out_file]

    cli.execute(*cmd)
Exemple #25
0
def compress_vcf(in_file, out_file, index_file=None):
    """ Compress a VCF file using bgzip.

    :param in_file: Path of uncompressed VCF file.
    :param out_file: Path were compressed VCF file will be written.
    """
    cli.execute('bgzip', '-c', in_file, '>', out_file)

    if index_file is not None:
        index_vcf(out_file, index_file=index_file)
Exemple #26
0
def merge_normal(config, input_bams, output_file, output_bai):
    normal_list = list(bam for bam in input_bams.itervalues())
    cmd = ['samtools', 'merge', '-f', output_file]
    cmd.extend(normal_list)
    execute(*cmd)

    execute(
        'samtools',
        'index',
        output_file,
        output_bai,
    )
Exemple #27
0
def run_pileup2snp(in_file, out_file):
    cmd = [
        'varscan',
        'pileup2snp',
        in_file,
        '|',
        'gzip',
        '>',
        out_file
    ]

    cli.execute(*cmd)
Exemple #28
0
def mpileup2snp(in_file, out_file):
    if os.stat(in_file).st_size == 0:
        cmd = ['echo', 'a']

    else:
        cmd = ['cat', in_file]

    cmd.extend(
        ['|'
         'varscan', 'mpileup2snp', '--output-vcf', 1, '>', out_file])

    cli.execute(*cmd)
Exemple #29
0
def build_alignment_gff(cdna_fasta_file, gff_file, ref_gff_file, out_file):
    opt_dir = os.path.join(os.environ['CONDA_PREFIX'], 'opt')

    cdna_to_genome_script = soil.utils.file_system.find(
        'cdna_alignment_orf_to_genome_orf.pl', opt_dir)

    cmd = [
        'perl', cdna_to_genome_script, gff_file, ref_gff_file, cdna_fasta_file,
        '>', out_file
    ]

    cli.execute(*cmd)
Exemple #30
0
def convert_gtf_to_cdna_fasta(in_file, ref_genome_fasta_file, out_file):
    opt_dir = os.path.join(os.environ['CONDA_PREFIX'], 'opt')

    gtf_to_fasta_script = soil.utils.file_system.find(
        'cufflinks_gtf_genome_to_cdna_fasta.pl', opt_dir)

    cmd = [
        'perl', gtf_to_fasta_script, in_file, ref_genome_fasta_file, '>',
        out_file
    ]

    cli.execute(*cmd)