コード例 #1
0
def create_transdecoder_workflow(in_gtf_file, ref_gtf_file,
                                 ref_genome_fasta_file, out_alignment_gff_file,
                                 out_cdna_fasta_file, out_cds_fasta_file,
                                 out_protein_fasta_file):

    sandbox = soil.utils.workflow.get_sandbox([
        'transdecoder',
    ])

    workflow = pypeliner.workflow.Workflow(default_sandbox=sandbox)

    workflow.transform(name='convert_gtf_to_cdna_fasta',
                       ctx={
                           'mem': 8,
                           'mem_retry_increment': 8,
                           'num_retry': 3
                       },
                       func=tasks.convert_gtf_to_cdna_fasta,
                       args=(
                           mgd.InputFile(in_gtf_file),
                           mgd.InputFile(ref_genome_fasta_file),
                           mgd.OutputFile(out_cdna_fasta_file),
                       ))

    workflow.transform(name='convert_gtf_to_gff',
                       ctx={
                           'mem': 8,
                           'mem_retry_increment': 8,
                           'num_retry': 3
                       },
                       func=tasks.convert_gtf_to_gff_file,
                       args=(
                           mgd.InputFile(in_gtf_file),
                           mgd.TempOutputFile('ref.gff'),
                       ))

    workflow.transform(
        name='run_transdecoder',
        ctx={
            'mem': 8,
            'mem_retry_increment': 8,
            'num_retry': 3
        },
        func=tasks.run_transdecoder,
        args=(
            mgd.InputFile(out_cdna_fasta_file),
            mgd.OutputFile(out_cds_fasta_file),
            mgd.OutputFile(out_protein_fasta_file),
            mgd.TempOutputFile('transdecoder.gff'),
            mgd.TempSpace('transdecoder_tmp'),
        ),
    )

    workflow.transform(name='buil_alignment_gff',
                       ctx={
                           'mem': 8,
                           'mem_retry_increment': 8,
                           'num_retry': 3
                       },
                       func=tasks.build_alignment_gff,
                       args=(
                           mgd.InputFile(out_cdna_fasta_file),
                           mgd.TempInputFile('transdecoder.gff'),
                           mgd.TempInputFile('ref.gff'),
                           mgd.OutputFile(out_alignment_gff_file),
                       ))

    return workflow
コード例 #2
0
ファイル: workflows.py プロジェクト: aroth85/soil
def create_topiary_workflow(hla_alleles,
                            in_file,
                            out_file,
                            copy_pyensembl_cache_dir=False,
                            iedb_dir=None,
                            genome='GRCh37',
                            predictor='netmhc',
                            pyensembl_cache_dir=None):
    """ Run topiary.

    Parameters
    ----------
    hla_alleles: list
        List of HLA alleles i.e. A*02:01.
    in_file: str
        Path to VCF file with variants.
    out_file: str
        Path where output will be written in tsv format.
    """
    sandbox = soil.utils.workflow.get_sandbox([
        'topiary',
    ])

    workflow = pypeliner.workflow.Workflow(default_sandbox=sandbox)

    workflow.setobj(obj=mgd.TempOutputObj('raw_hla_alleles'),
                    value=hla_alleles)

    workflow.setobj(obj=mgd.OutputChunks('pep_len'), value=[8, 9, 10, 11])

    workflow.transform(name='filter_hla_alleles',
                       func=tasks.filter_hla_alleles,
                       args=(mgd.TempInputObj('raw_hla_alleles'), ),
                       kwargs={
                           'iedb_dir': iedb_dir,
                           'predictor': predictor,
                       },
                       ret=mgd.TempOutputObj('hla_alleles'))

    workflow.transform(name='run_topiary',
                       axes=('pep_len', ),
                       ctx={
                           'mem': 8,
                           'mem_retry_increment': 4,
                           'num_retry': 3
                       },
                       func=tasks.run_topiary,
                       args=(mgd.TempInputObj('hla_alleles'),
                             mgd.InputFile(in_file),
                             mgd.TempOutputFile('raw.tsv', 'pep_len')),
                       kwargs={
                           'copy_pyensembl_cache_dir':
                           copy_pyensembl_cache_dir,
                           'iedb_dir': iedb_dir,
                           'genome': genome,
                           'peptide_length':
                           mgd.Template('{pep_len}', 'pep_len'),
                           'predictor': predictor,
                           'pyensembl_cache_dir': pyensembl_cache_dir
                       })

    workflow.transform(name='reformat_output',
                       axes=(),
                       func=tasks.reformat_output,
                       args=(mgd.TempInputFile('raw.tsv', 'pep_len'),
                             mgd.OutputFile(out_file)))

    return workflow
コード例 #3
0
def create_multiple_lane_align_workflow(fastq_files_1,
                                        fastq_files_2,
                                        ref_genome_dir,
                                        out_bam_file,
                                        add_xs_tag=False,
                                        align_threads=1,
                                        merge_threads=1,
                                        read_group_info=None,
                                        sort_threads=1):

    if read_group_info is None:
        read_group_info = {}

        for key in fastq_files_1:
            read_group_info[key] = None

    sandbox = soil.utils.workflow.get_sandbox(['sambamba', 'samtools'])

    workflow = pypeliner.workflow.Workflow(default_sandbox=sandbox)

    workflow.setobj(obj=mgd.TempOutputObj('read_group_info', 'lane'),
                    value=read_group_info)

    workflow.subworkflow(name='align',
                         axes=('lane', ),
                         func=create_align_workflow,
                         args=(
                             mgd.InputFile('R1.fq.gz',
                                           'lane',
                                           fnames=fastq_files_1),
                             mgd.InputFile('R2.fq.gz',
                                           'lane',
                                           fnames=fastq_files_2),
                             ref_genome_dir,
                             mgd.TempOutputFile('lane.bam', 'lane'),
                         ),
                         kwargs={
                             'add_xs_tag':
                             add_xs_tag,
                             'align_threads':
                             align_threads,
                             'read_group_info':
                             mgd.TempInputObj('read_group_info', 'lane'),
                             'sort_threads':
                             sort_threads,
                         })

    workflow.transform(name='markdups_and_merge',
                       axes=(),
                       ctx={
                           'mem': 24,
                           'mem_retry_increment': 8,
                           'num_retry': 3,
                           'threads': merge_threads
                       },
                       func=soil.wrappers.sambamba.tasks.markdups,
                       args=(
                           mgd.TempInputFile('lane.bam', 'lane'),
                           mgd.OutputFile(out_bam_file),
                           mgd.TempSpace('markdup_tmp'),
                       ),
                       kwargs={
                           'threads': merge_threads,
                       })

    workflow.commandline(name='index',
                         args=(
                             'samtools',
                             'index',
                             mgd.InputFile(out_bam_file),
                             mgd.OutputFile(out_bam_file + '.bai'),
                         ))
    return workflow
コード例 #4
0
def create_mutect_paired_workflow(normal_bam_file,
                                  tumour_bam_file,
                                  ref_genome_fasta_file,
                                  out_file,
                                  chromosomes=None,
                                  normal_name='normal',
                                  split_size=int(1e7),
                                  tumour_name='tumour'):

    normal_name = get_sample(normal_bam_file, normal_name)

    tumour_name = get_sample(tumour_bam_file, tumour_name)

    sandbox = soil.utils.workflow.get_sandbox(['bcftools', 'gatk', 'samtools'])

    workflow = pypeliner.workflow.Workflow(default_ctx=low_mem_ctx,
                                           default_sandbox=sandbox)

    workflow.setobj(obj=pypeliner.managed.TempOutputObj('config', 'regions'),
                    value=soil.utils.genome.get_bam_regions(
                        normal_bam_file, split_size, chromosomes=chromosomes))

    workflow.transform(name='run_mutect',
                       axes=('regions', ),
                       ctx=med_mem_ctx,
                       func=tasks.run_mutect_paired,
                       args=(mgd.InputFile(normal_bam_file),
                             mgd.InputFile(tumour_bam_file),
                             mgd.InputFile(ref_genome_fasta_file),
                             mgd.TempInputObj('config', 'regions'),
                             mgd.TempOutputFile('region.vcf', 'regions')),
                       kwargs={
                           'normal_name': normal_name,
                           'tumour_name': tumour_name
                       })

    workflow.transform(name='run_mutect_filter',
                       axes=('regions', ),
                       ctx=med_mem_ctx,
                       func=tasks.run_filter_mutect,
                       args=(mgd.TempInputFile('region.vcf', 'regions'),
                             mgd.TempOutputFile('flagged.vcf', 'regions')))

    workflow.transform(name='concatenate_vcfs',
                       func=soil.wrappers.samtools.tasks.concatenate_vcf,
                       args=(
                           mgd.TempInputFile('flagged.vcf', 'regions'),
                           mgd.TempOutputFile('merged.vcf.gz'),
                       ))

    workflow.commandline(name='filter_vcf',
                         ctx=low_mem_ctx,
                         args=(
                             'bcftools',
                             'view',
                             '-O',
                             'z',
                             '-f',
                             '.,PASS',
                             '-o',
                             mgd.OutputFile(out_file),
                             mgd.TempInputFile('merged.vcf.gz'),
                         ))

    return workflow
コード例 #5
0
def create_index_ref_data_workflow(out_dir, cosmic=False, threads=1):
    """ Create index files for references.

    This workflow is extremely compute and memory heavy. It should be run on a cluster with large memory nodes
    available.
    """
    ref_data_paths = soil.ref_data.paths.SoilRefDataPaths(out_dir)

    sandbox = soil.utils.workflow.get_sandbox(
        ['bwa', 'bcftools', 'kallisto', 'picard', 'samtools', 'star'])

    workflow = pypeliner.workflow.Workflow(default_sandbox=sandbox)

    workflow.commandline(
        name='link_bwa_ref',
        args=('ln', mgd.InputFile(ref_data_paths.genome_fasta_file),
              mgd.OutputFile(ref_data_paths.bwa_genome_fasta_file)))

    workflow.transform(
        name='bwa_index_ref_genome',
        ctx={
            'mem': 8,
            'mem_retry_increment': 8,
            'num_retry': 3
        },
        func=soil.wrappers.bwa.tasks.index,
        args=(mgd.InputFile(ref_data_paths.bwa_genome_fasta_file),
              mgd.OutputFile(ref_data_paths.bwa_genome_fasta_file +
                             '.bwa_index.done')))

    workflow.subworkflow(
        name='build_bwa_mappability_file',
        func=tasks.mappability_wrapper,
        args=(mgd.InputFile(ref_data_paths.bwa_genome_fasta_file +
                            '.bwa_index.done'),
              mgd.OutputFile(ref_data_paths.genome_bwa_mappability_wig_file)),
        kwargs={
            'k': 100,
            'max_map_qual': 60,
            'threads': threads
        })

    workflow.commandline(
        name='link_star_ref',
        args=('ln', mgd.InputFile(ref_data_paths.genome_fasta_file),
              mgd.OutputFile(ref_data_paths.star_genome_fasta_file)))

    workflow.transform(
        name='star_index_ref_genome',
        ctx={
            'mem': 32,
            'mem_retry_increment': 16,
            'num_retry': 3,
            'threads': threads
        },
        func=soil.wrappers.star.tasks.index,
        args=(mgd.InputFile(ref_data_paths.star_genome_fasta_file),
              mgd.InputFile(ref_data_paths.gene_annotations_gtf_file),
              mgd.OutputFile(ref_data_paths.star_genome_fasta_file +
                             '.star_index.done')),
        kwargs={'threads': threads})

    workflow.transform(name='samtools_index_ref_genome',
                       func=soil.wrappers.samtools.tasks.index_fasta,
                       args=(mgd.InputFile(ref_data_paths.genome_fasta_file),
                             mgd.OutputFile(ref_data_paths.genome_fasta_file +
                                            '.fai')))

    workflow.commandline(
        name='build_ref_genom_dict',
        args=('picard', 'CreateSequenceDictionary', 'R={}'.format(
            mgd.InputFile(ref_data_paths.genome_fasta_file)), 'O={}'.format(
                mgd.OutputFile(
                    os.path.splitext(ref_data_paths.genome_fasta_file)[0] +
                    '.dict'))))

    workflow.transform(
        name='kallisto_index',
        ctx={
            'mem': 4,
            'mem_retry_increment': 4,
            'num_retry': 3
        },
        func=soil.wrappers.kallisto.tasks.build_index,
        args=(mgd.InputFile(ref_data_paths.transcriptome_fasta_file),
              mgd.OutputFile(ref_data_paths.kallisto_index_file)),
        kwargs={'kmer_length': 31})

    if cosmic:
        workflow.transform(
            name='index_cosmic',
            func=soil.wrappers.samtools.tasks.index_vcf,
            args=(mgd.InputFile(ref_data_paths.cosmic_vcf_file),
                  mgd.OutputFile(ref_data_paths.cosmic_vcf_file + '.tbi')))

    workflow.transform(name='index_dbsnp',
                       func=soil.wrappers.samtools.tasks.index_vcf,
                       args=(mgd.InputFile(ref_data_paths.dbsnp_vcf_file),
                             mgd.OutputFile(ref_data_paths.dbsnp_vcf_file +
                                            '.tbi')))

    return workflow
コード例 #6
0
def crete_download_ref_data_workflow(config,
                                     out_dir,
                                     cosmic=False,
                                     local_download=False):
    """ Download reference files.

    This workflow mainly retrieves files from the internet. There are some light to moderately heavy computational tasks
    as well.
    """
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    ref_data_paths = soil.ref_data.paths.SoilRefDataPaths(out_dir)

    with open(ref_data_paths.config_file, 'w') as fh:
        yaml.dump(config, fh)

    if cosmic:
        cosmic_user = click.prompt('Please enter COSMIC user ID')

        cosmic_password = click.prompt('Please enter COSMIC password',
                                       hide_input=True)

    sandbox = soil.utils.workflow.get_sandbox(['bcftools', 'samtools'])

    workflow = pypeliner.workflow.Workflow(default_sandbox=sandbox)

    for key in config:
        if key.endswith('url') or key.endswith('urls'):
            workflow.setobj(obj=mgd.TempOutputObj(key), value=config[key])

    workflow.setobj(mgd.TempOutputObj('snpeff_url'),
                    value=config['snpeff']['url'])

    workflow.subworkflow(
        name='download_ref_gene_annotations',
        func=_create_download_decompress_concat_workflow,
        args=(mgd.TempInputObj('ref_gene_annotations_gtf_urls'),
              mgd.OutputFile(ref_data_paths.gene_annotations_gtf_file)),
        kwargs={'local_download': local_download})

    workflow.subworkflow(name='download_ref_genome',
                         func=_create_download_decompress_concat_workflow,
                         args=(mgd.TempInputObj('ref_genome_fasta_urls'),
                               mgd.TempOutputFile('raw_ref.fasta')),
                         kwargs={'local_download': local_download})

    workflow.transform(name='lexsort_ref_genome',
                       func=tasks.lex_sort_fasta,
                       args=(mgd.TempInputFile('raw_ref.fasta'),
                             mgd.OutputFile(ref_data_paths.genome_fasta_file)))

    workflow.subworkflow(name='download_ref_proteome',
                         func=_create_download_decompress_concat_workflow,
                         args=(mgd.TempInputObj('ref_proteome_fasta_urls'),
                               mgd.TempOutputFile('raw_ref_prot.fasta')),
                         kwargs={'local_download': local_download})

    workflow.transform(name='filter_bad_proteins',
                       func=tasks.filter_bad_proiteins,
                       args=(mgd.TempInputFile('raw_ref_prot.fasta'),
                             mgd.OutputFile(
                                 ref_data_paths.proteome_fasta_file)))

    workflow.subworkflow(
        name='download_ref_transcriptome',
        func=_create_download_decompress_concat_workflow,
        args=(mgd.TempInputObj('ref_transcriptome_fasta_urls'),
              mgd.OutputFile(ref_data_paths.transcriptome_fasta_file)),
        kwargs={'local_download': local_download})

    workflow.transform(name='download_dbsnp',
                       ctx={'local': local_download},
                       func=tasks.download,
                       args=(mgd.TempInputObj('dbsnp_url'),
                             mgd.OutputFile(ref_data_paths.dbsnp_vcf_file)))

    if cosmic:
        workflow.subworkflow(
            name='download_cosmic',
            func=_create_download_cosmic_workflow,
            args=(config['cosmic']['ref_genome_version'],
                  mgd.OutputFile(ref_data_paths.cosmic_vcf_file), cosmic_user,
                  cosmic_password),
            kwargs={'local_download': local_download})

    workflow.transform(name='download_snpeff_db',
                       ctx={'local': local_download},
                       func=tasks.download,
                       args=(mgd.TempInputObj('snpeff_url'),
                             mgd.TempOutputFile('snpeff.zip')))

    workflow.transform(
        name='unzip_snpeff',
        func=tasks.unzip_file,
        args=(mgd.TempInputFile('snpeff.zip'),
              mgd.OutputFile(
                  os.path.join(os.path.dirname(ref_data_paths.snpeff_data_dir),
                               'done.txt')), mgd.TempSpace('snpeff_tmp')))

    workflow.transform(name='download_genetic_map',
                       ctx={'local': local_download},
                       func=tasks.download,
                       args=(mgd.TempInputObj('genetic_map_txt_url'),
                             mgd.OutputFile(ref_data_paths.genetic_map_file)))

    workflow.subworkflow(
        name='ref_haplotype_panel',
        func=soil.ref_data.haplotype.workflows.create_eagle_ref_data_workflow,
        args=(mgd.TempInputObj('haplotype_vcf_template_url'),
              mgd.OutputFile(ref_data_paths.haplotypes_bcf)),
        kwargs={'local_download': local_download})

    workflow.transform(name='download_iedb_mhc_one',
                       ctx={'local': local_download},
                       func=tasks.download,
                       args=(mgd.TempInputObj('iedb_mhc_one_url'),
                             mgd.TempOutputFile('mhc1.tar.gz')))

    workflow.transform(name='extract_iedb_mhc_one',
                       func=tasks.extract_tar_file,
                       args=(mgd.TempInputFile('mhc1.tar.gz'),
                             mgd.OutputFile(
                                 os.path.join(ref_data_paths.iedb_mhc_one_dir,
                                              'extract.done'))))

    workflow.transform(name='config_iedb_mhc_one',
                       func=tasks.configure_iedb_module,
                       args=(mgd.InputFile(
                           os.path.join(ref_data_paths.iedb_mhc_one_dir,
                                        'extract.done')),
                             mgd.OutputFile(
                                 os.path.join(ref_data_paths.iedb_mhc_one_dir,
                                              'configure.done'))))

    workflow.transform(name='download_vep_cache',
                       ctx={'local': local_download},
                       func=tasks.download,
                       args=(mgd.TempInputObj('vep_cache_url'),
                             mgd.TempOutputFile('vep.tar.gz')))

    workflow.transform(name='extract_vep_cache',
                       func=tasks.extract_tar_file,
                       args=(mgd.TempInputFile('vep.tar.gz'),
                             mgd.OutputFile(
                                 os.path.join(ref_data_paths.vep_cache_dir,
                                              'homo_sapiens',
                                              'extract.done'))))

    workflow.subworkflow(name='download_vep_plugins',
                         func=_create_download_vep_plugins_workflow,
                         args=(mgd.TempInputObj('vep_plugins_urls'),
                               ref_data_paths.vep_plugins_dir),
                         kwargs={'local_download': local_download})

    workflow.setobj(obj=mgd.TempOutputObj('pyensembl_version'),
                    value=config['pyensembl']['version'])

    workflow.transform(name='download_pyensembl_cache',
                       ctx={'local': local_download},
                       func=tasks.download_pyensembl_cache,
                       args=(mgd.TempInputObj('pyensembl_version'),
                             mgd.OutputFile(
                                 os.path.join(
                                     ref_data_paths.pyensembl_cache_dir,
                                     'download.done'))),
                       sandbox=soil.utils.workflow.get_sandbox(['pyensembl']))

    return workflow
コード例 #7
0
ファイル: workflows.py プロジェクト: aroth85/soil
def create_vardict_paired_workflow(normal_bam_file,
                                   tumour_bam_file,
                                   ref_genome_fasta_file,
                                   out_file,
                                   chromosomes=None,
                                   split_size=int(5e6)):

    sandbox = soil.utils.workflow.get_sandbox(
        ['bcftools', 'samtools', 'vardict', 'vardict-java'])

    workflow = pypeliner.workflow.Workflow(default_ctx=low_mem_ctx,
                                           default_sandbox=sandbox)

    workflow.setobj(obj=pypeliner.managed.TempOutputObj('config', 'regions'),
                    value=soil.utils.genome.get_bam_regions(
                        normal_bam_file, split_size, chromosomes=chromosomes))

    workflow.transform(name='run_vardict',
                       axes=('regions', ),
                       ctx=med_mem_ctx,
                       func=tasks.run_vardict_paired,
                       args=(mgd.InputFile(normal_bam_file),
                             mgd.InputFile(tumour_bam_file),
                             mgd.InputFile(ref_genome_fasta_file),
                             mgd.TempInputObj('config', 'regions'),
                             mgd.TempOutputFile('call.tsv', 'regions')))

    workflow.transform(name='test_somatic',
                       axes=('regions', ),
                       func=tasks.run_test_somatic,
                       args=(mgd.TempInputFile('call.tsv', 'regions'),
                             mgd.TempOutputFile('somatic.tsv', 'regions')))

    workflow.transform(name='write_vcf',
                       axes=('regions', ),
                       func=tasks.run_build_paired_vcf,
                       args=(mgd.TempInputFile('somatic.tsv', 'regions'),
                             mgd.TempOutputFile('region.vcf', 'regions')))

    workflow.commandline(name='compress_vcf',
                         axes=('regions', ),
                         args=('bcftools', 'view', '-O', 'z', '-o',
                               mgd.TempOutputFile('region.vcf.gz', 'regions'),
                               mgd.TempInputFile('region.vcf', 'regions')))

    workflow.transform(name='concatenate_vcfs',
                       func=soil.wrappers.samtools.tasks.concatenate_vcf,
                       args=(
                           mgd.TempInputFile('region.vcf.gz', 'regions'),
                           mgd.TempOutputFile('merged.vcf.gz'),
                       ))

    workflow.commandline(name='filter_vcf',
                         args=(
                             'bcftools',
                             'view',
                             '-O',
                             'z',
                             '-f',
                             '.,PASS',
                             '-o',
                             mgd.TempOutputFile('filtered.vcf.gz'),
                             mgd.TempInputFile('merged.vcf.gz'),
                         ))

    workflow.commandline(name='filter_somatics',
                         args=('bcftools', 'filter', '-i',
                               'INFO/STATUS[0]="StrongSomatic"', '-O', 'z',
                               '-o', mgd.OutputFile(out_file),
                               mgd.TempInputFile('filtered.vcf.gz')))

    return workflow
コード例 #8
0
ファイル: workflows.py プロジェクト: aroth85/soil
def create_mappability_workflow(
        ref_genome_fasta_file,
        out_file,
        k=100,
        max_map_qual=None,
        split_size=int(1e7),
        threads=1):

    sandbox = soil.utils.workflow.get_sandbox(['bwa', 'samtools', 'ucsc-bedgraphtobigwig'])

    workflow = pypeliner.workflow.Workflow(default_sandbox=sandbox)

    workflow.transform(
        name='split_fasta_by_chrom',
        func=tasks.split_fasta_by_chrom,
        args=(
            mgd.InputFile(ref_genome_fasta_file),
            mgd.TempOutputFile('chrom.fasta', 'chrom')
        )
    )

    workflow.transform(
        name='create_kmer_reads',
        axes=('chrom',),
        ctx={'mem': 4, 'mem_retry_increment': 2, 'num_retry': 3},
        func=tasks.create_kmer_reads,
        args=(
            mgd.TempInputFile('chrom.fasta', 'chrom'),
            mgd.TempOutputFile('reads.fa', 'chrom', 'kmer_group')
        ),
        kwargs={
            'k': k,
            'split_size': split_size
        }
    )

    workflow.transform(
        name='align_kmers',
        axes=('chrom', 'kmer_group'),
        ctx={'mem': 8, 'mem_retry_increment': 8, 'num_retry': 3, 'threads': threads},
        func=tasks.bwa_mem_align,
        args=(
            mgd.TempInputFile('reads.fa', 'chrom', 'kmer_group'),
            mgd.InputFile(ref_genome_fasta_file),
            mgd.TempOutputFile('aligned.bam', 'chrom', 'kmer_group')
        ),
        kwargs={
            'threads': threads
        }
    )

    workflow.transform(
        name='compute_mappability',
        axes=('chrom', 'kmer_group'),
        ctx={'mem': 4, 'mem_retry_increment': 2, 'num_retry': 3},
        func=tasks.compute_mappability,
        args=(
            mgd.TempInputFile('aligned.bam', 'chrom', 'kmer_group'),
            mgd.TempOutputFile('mappability.tsv', 'chrom', 'kmer_group')
        ),
        kwargs={
            'max_map_qual': max_map_qual,
        }
    )

    workflow.transform(
        name='compute_mappability_segs',
        axes=('chrom', 'kmer_group'),
        ctx={'mem': 4, 'mem_retry_increment': 2, 'num_retry': 3},
        func=tasks.compute_mappability_segs,
        args=(
            mgd.TempInputFile('mappability.tsv', 'chrom', 'kmer_group'),
            mgd.TempOutputFile('mappability_segs.tsv', 'chrom', 'kmer_group')
        )
    )

    workflow.transform(
        name='compute_chrom_mean_mappability',
        axes=('chrom',),
        ctx={'mem': 8, 'mem_retry_increment': 8, 'num_retry': 3},
        func=tasks.compute_chrom_mean_mappability,
        args=(
            mgd.TempInputFile('mappability_segs.tsv', 'chrom', 'kmer_group'),
            mgd.TempOutputFile('mean_mappability.tsv', 'chrom')
        )
    )

    workflow.transform(
        name='write_bed',
        ctx={'mem': 8, 'mem_retry_increment': 8, 'num_retry': 3},
        func=tasks.write_bed,
        args=(
            mgd.TempInputFile('mean_mappability.tsv', 'chrom'),
            mgd.TempOutputFile('mean_mappability.bed')
        )
    )

    workflow.transform(
        name='write_chrom_sizes',
        func=tasks.write_chrom_sizes,
        args=(
            mgd.InputFile(ref_genome_fasta_file),
            mgd.TempOutputFile('chrom_sizes.txt'),
        )
    )

    workflow.commandline(
        name='write_big_wig',
        args=(
            'bedGraphToBigWig',
            mgd.TempInputFile('mean_mappability.bed'),
            mgd.TempInputFile('chrom_sizes.txt'),
            mgd.OutputFile(out_file)
        )
    )

    return workflow