コード例 #1
0
def main():
    """
    This wrapper script will run the tool mutect within the  mutect docker
    container for the precision immuno project. The wrapper requires
    1. mutect
    2. java (For running mutect)
    3. twoBitToFa from the kent tools library (For extracting the reference
            genome in case indexing is required)
    4. lftp for downloading the cosmic vcf

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.
    """
    # Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'mutect', 'mutect_calls')
    # params ERROR handling
    if not (params.java_Xmx.endswith('G') or params.java_Xmx.endswith('M')):
        raise pi_errors.ParameterError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': Please use a suitable value for --Xmx.', params.logfile)
    params.java_executable = pi_errors.test_param_value(params.java_executable,
                                                        'java',
                                                        '--java',
                                                        params.logfile)
    params.mutect_jar = pi_errors.test_param_value(params.mutect_jar,
                                                   'Mutect jar',
                                                   '--mutect_jar',
                                                   params.logfile)
    #  If Indexing is required, does twoBitToFa point to a valid file?
    if params.index_location is None:
        params.tbtf_executable = pi_errors.test_param_value(
            params.tbtf_executable, 'twoBitToFa', '--twoBitToFa',
            params.logfile)
    #  Do the dnsnp and cosmic vcfs exist?
    if params.dbsnp_file == 'DOWNLOAD' or params.cosmic_file == 'DOWNLOAD':
        #  First ensure the vcf storage location has been provided
        if params.vcf_location is None:
            raise pi_errors.ParameterError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': --vcf_location cannot be empty if either --cosmic, ' + \
                '--dbsnp, or --genome_fasta are empty.', params.logfile)
        else:
            params.vcf_location = os.path.abspath(params.vcf_location)
        # Download dbsnp file if required
        if params.dbsnp_file == 'DOWNLOAD':
            if os.path.exists('/'.join([params.vcf_location, '00-All.vcf'])):
                params.dbsnp_file = '/'.join([params.vcf_location,
                                              '00-All.vcf'])
            else:
                params.dbsnp_file = prepare.download_vcf('dbsnp', params)
        # Download cosmic file if required
        if params.cosmic_file == 'DOWNLOAD':
            if os.path.exists('/'.join([params.vcf_location,
                                        'Cosmic_sorted.vcf'])):
                params.cosmic_file = '/'.join([params.vcf_location,
                                               'Cosmic_sorted.vcf'])
            else:
                params.cosmic_file = prepare.download_vcf('cosmic', params)
    # Download genome fasta if required
    if params.genome_fasta == 'DOWNLOAD':
        if params.vcf_location is None:
            #  If params.vcf_location is None, set it to the output directory
            params.vcf_location = params.outdir
        #  Does the fasta exist in the vcf_location directory?
        if os.path.exists(''.join([params.vcf_location, '/',
                                   params.genome_version, '.fa'])):
            params.genome_fasta = ''.join([params.vcf_location, '/',
                                           params.genome_version, '.fa'])
        else:
            params.genome_fasta = prepare.get_genome(params.genome_version,
                                                     params.vcf_location,
                                                     params.tbtf_executable,
                                                     params.logfile)
    else:
        params.genome_fasta = pi_errors.test_param_value(params.genome_fasta,
                                                         'Genomic Fasta',
                                                         '--genome_fasta',
                                                         params.logfile)

    # Move to working directory before doing I/O intensive work
    os.chdir(params.working_dir)

    # Call the program
    mutect_call = [params.java_executable, ''.join(['-Xmx', params.java_Xmx]),
                   '-jar'] #  Base java call
    mutect_call.append(params.mutect_jar)
    mutect_call.extend(['-T', 'MuTect'])
    mutect_call.extend(['-R', params.genome_fasta])
    mutect_call.extend(['--cosmic', params.cosmic_file])
    mutect_call.extend(['--dbsnp', params.dbsnp_file])
    mutect_call.extend(['--input_file:normal', params.norm_d_file])
    mutect_call.extend(['--input_file:tumor', params.tum_d_file])
    mutect_call.extend(['--out', ''.join([params.out_prefix, '.out'])])
    return_value = call(mutect_call)
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': MuTect failed.', params.logfile)

    with open(''.join([params.out_prefix, '.out']), 'r') as mutect_file, \
            open(''.join([params.out_prefix, 'non_rejected.out']), 'w') as \
            nr_file:
        for line in mutect_file:
            line = line.strip()
            if line.startswith('#'):
                print(line, file=nr_file)
                continue
            if line.startswith('contig'):
                print('#', line, sep='', file=nr_file)
                continue
            line = line.split('\t')
            if line[50] == 'REJECT':
                continue
            else:
                print(line, sep='\t', file=nr_file)

    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Mutect run completed. Finishing up...', file=params.logfile)
    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed', file=params.logfile)
    params.logfile.close()
コード例 #2
0
def process_parameters(params):
    '''
    This module conducts the error handling for all parmeters passed to the
    program.
    '''
    #  Does the provided radia binary provided exist?
    params.radia_executable = pi_errors.test_param_value(
        params.radia_executable, 'radia', '--radia', params.logfile)
    #  Setup filterRadia.py
    params.filter_radia_executable = '/'.join([os.path.split(
        params.radia_executable)[0], 'filterRadia.py'])
    params.filter_radia_executable = pi_errors.test_param_value(
        params.filter_radia_executable, 'filterradia', '--radia',
        params.logfile)
    #  Test input files
    params.tum_d_file = pi_errors.test_param_value(params.tum_d_file,
                                                   'Tumor DNA',
                                                   '--tum_dna_file',
                                                   params.logfile)
    params.norm_d_file = pi_errors.test_param_value(params.norm_d_file,
                                                    'Normal DNA',
                                                    '--norm_dna_file',
                                                    params.logfile)
    if params.tum_r_file is not None:
        params.tum_r_file = pi_errors.test_param_value(params.tum_r_file,
                                                       'Tumor RNA',
                                                       '--tum_rna_file',
                                                       params.logfile)

    #  If you don't have a reference, you need twoBitToFasta
    if params.index_location is None:
        params.tbtf_executable = pi_errors.test_param_value(
            params.tbtf_executable, 'twoBitToFa', '--twoBitToFa',
            params.logfile)
    #  Are dnsnp or cosmic vcf required?
    if params.dbsnp_file == 'DOWNLOAD' or params.cosmic_file == 'DOWNLOAD' or \
            params.genome_fasta == 'DOWNLOAD':
        # Ensure the vcf storage location has been provided
        if params.vcf_location is None:
            raise pi_errors.ParameterError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': --vcf_location cannot be empty if either --cosmic, ' + \
                '--dbsnp, or --genome_fasta are empty.', params.logfile)
        else:
            params.vcf_location = os.path.abspath(params.vcf_location)
        # Download dbsnp file if required
        if params.dbsnp_file == 'DOWNLOAD':
            if os.path.exists('/'.join([params.vcf_location, '00-All.vcf'])):
                params.dbsnp_file = '/'.join([params.vcf_location,
                                              '00-All.vcf'])
            else:
                params.dbsnp_file = prepare.download_vcf('dbsnp', params)
        # Download cosmic file if required
        if params.cosmic_file == 'DOWNLOAD':
            if os.path.exists('/'.join([params.vcf_location,
                                        'Cosmic_sorted.vcf'])):
                params.cosmic_file = '/'.join([params.vcf_location,
                                               'Cosmic_sorted.vcf'])
            else:
                params.cosmic_file = prepare.download_vcf('cosmic', params)
        # Download genome fasta if required
        if params.genome_fasta == 'DOWNLOAD' or not \
                os.path.exists(params.genome_fasta):
            if os.path.exists(''.join([params.vcf_location, '/',
                                       params.genome_version, '.fa'])):
                params.genome_fasta = ''.join([params.vcf_location, '/',
                                               params.genome_version, '.fa'])
            else:
                params.genome_fasta = prepare.get_genome(
                    params.genome_version, params.vcf_location,
                    params.twoBitToFa_executable, sys.stderr)
        else:
            params.genome_fasta = os.path.abspath(params.genome_fasta)
    #  Set up the value for rna_fasta
    if params.rna_fasta == 'GENOME_FASTA':
        params.rna_fasta = params.genome_fasta
    else:
        params.rna_fasta = pi_errors.test_param_value(params.rna_fasta,
                                                      'RNA Fasta',
                                                      '--rna_fasta',
                                                      params.logfile)
    #  Ensure the other databases are set up correctly
    #  The package path is 2 levels above the
    radia_pkg_path = os.path.split(os.path.split(params.radia_executable)[0])[0]
    database_map = defaultdict()
    test_database(params.blacklist, 'blacklist', radia_pkg_path,
                  params.vcf_location, 'data/hg19/blacklists/1000Genomes/' + \
                  'phase1/', database_map)
    test_database(params.retrogenes, 'retrogenes', radia_pkg_path,
                  params.vcf_location, 'data/hg19/retroGenes/', database_map)
    test_database(params.pseudogenes, 'pseudogenes', radia_pkg_path,
                  params.vcf_location, 'data/hg19/peudoGenes/', database_map)
    test_database(params.broad_targets, 'broad_targets', radia_pkg_path,
                  params.vcf_location, 'data/hg19/broadTargets/',
                  database_map)
    test_database(params.rna_blacklist, 'rna_blacklist', radia_pkg_path,
                  params.vcf_location, 'data/rnaGeneBlacklist.tab',
                  database_map)
    test_database(params.rna_family_blacklist, 'rna_family_blacklist',
                  radia_pkg_path, params.vcf_location,
                  'data/rnaGeneFamilyBlacklist.tab', database_map)
    #  If any of the above were returned as 'DOWNLOAD' then download the radia
    #  data folder to a temp directory from git and set the values for the
    #  invalid ones.
    if len([db for db, val in database_map.items() if val == 'DOWNLOAD']) > 0:
        download_databases(database_map, params.logfile)
    # if the -C all option was specified, expand params.chromosome
    if params.chromosome == 'all':
        params.chromosome = [''.join(['chr', str(i)]) for i in \
                             range(1, 23)+['X', 'Y']]
    return database_map
コード例 #3
0
def process_parameters(params):
    '''
    This module conducts the error handling for all parmeters passed to the
    program.
    '''
    #  Does the provided radia binary provided exist?
    params.radia_executable = pi_errors.test_param_value(
        params.radia_executable, 'radia', '--radia', params.logfile)
    #  Setup filterRadia.py
    params.filter_radia_executable = '/'.join(
        [os.path.split(params.radia_executable)[0], 'filterRadia.py'])
    params.filter_radia_executable = pi_errors.test_param_value(
        params.filter_radia_executable, 'filterradia', '--radia',
        params.logfile)
    #  Test input files
    params.tum_d_file = pi_errors.test_param_value(params.tum_d_file,
                                                   'Tumor DNA',
                                                   '--tum_dna_file',
                                                   params.logfile)
    params.norm_d_file = pi_errors.test_param_value(params.norm_d_file,
                                                    'Normal DNA',
                                                    '--norm_dna_file',
                                                    params.logfile)
    if params.tum_r_file is not None:
        params.tum_r_file = pi_errors.test_param_value(params.tum_r_file,
                                                       'Tumor RNA',
                                                       '--tum_rna_file',
                                                       params.logfile)

    #  If you don't have a reference, you need twoBitToFasta
    if params.index_location is None:
        params.tbtf_executable = pi_errors.test_param_value(
            params.tbtf_executable, 'twoBitToFa', '--twoBitToFa',
            params.logfile)
    #  Are dnsnp or cosmic vcf required?
    if params.dbsnp_file == 'DOWNLOAD' or params.cosmic_file == 'DOWNLOAD' or \
            params.genome_fasta == 'DOWNLOAD':
        # Ensure the vcf storage location has been provided
        if params.vcf_location is None:
            raise pi_errors.ParameterError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': --vcf_location cannot be empty if either --cosmic, ' + \
                '--dbsnp, or --genome_fasta are empty.', params.logfile)
        else:
            params.vcf_location = os.path.abspath(params.vcf_location)
        # Download dbsnp file if required
        if params.dbsnp_file == 'DOWNLOAD':
            if os.path.exists('/'.join([params.vcf_location, '00-All.vcf'])):
                params.dbsnp_file = '/'.join(
                    [params.vcf_location, '00-All.vcf'])
            else:
                params.dbsnp_file = prepare.download_vcf('dbsnp', params)
        # Download cosmic file if required
        if params.cosmic_file == 'DOWNLOAD':
            if os.path.exists('/'.join(
                [params.vcf_location, 'Cosmic_sorted.vcf'])):
                params.cosmic_file = '/'.join(
                    [params.vcf_location, 'Cosmic_sorted.vcf'])
            else:
                params.cosmic_file = prepare.download_vcf('cosmic', params)
        # Download genome fasta if required
        if params.genome_fasta == 'DOWNLOAD' or not \
                os.path.exists(params.genome_fasta):
            if os.path.exists(''.join(
                [params.vcf_location, '/', params.genome_version, '.fa'])):
                params.genome_fasta = ''.join(
                    [params.vcf_location, '/', params.genome_version, '.fa'])
            else:
                params.genome_fasta = prepare.get_genome(
                    params.genome_version, params.vcf_location,
                    params.twoBitToFa_executable, sys.stderr)
        else:
            params.genome_fasta = os.path.abspath(params.genome_fasta)
    #  Set up the value for rna_fasta
    if params.rna_fasta == 'GENOME_FASTA':
        params.rna_fasta = params.genome_fasta
    else:
        params.rna_fasta = pi_errors.test_param_value(params.rna_fasta,
                                                      'RNA Fasta',
                                                      '--rna_fasta',
                                                      params.logfile)
    #  Ensure the other databases are set up correctly
    #  The package path is 2 levels above the
    radia_pkg_path = os.path.split(os.path.split(
        params.radia_executable)[0])[0]
    database_map = defaultdict()
    test_database(params.blacklist, 'blacklist', radia_pkg_path,
                  params.vcf_location, 'data/hg19/blacklists/1000Genomes/' + \
                  'phase1/', database_map)
    test_database(params.retrogenes, 'retrogenes', radia_pkg_path,
                  params.vcf_location, 'data/hg19/retroGenes/', database_map)
    test_database(params.pseudogenes, 'pseudogenes', radia_pkg_path,
                  params.vcf_location, 'data/hg19/peudoGenes/', database_map)
    test_database(params.broad_targets, 'broad_targets', radia_pkg_path,
                  params.vcf_location, 'data/hg19/broadTargets/', database_map)
    test_database(params.rna_blacklist, 'rna_blacklist', radia_pkg_path,
                  params.vcf_location, 'data/rnaGeneBlacklist.tab',
                  database_map)
    test_database(params.rna_family_blacklist, 'rna_family_blacklist',
                  radia_pkg_path, params.vcf_location,
                  'data/rnaGeneFamilyBlacklist.tab', database_map)
    #  If any of the above were returned as 'DOWNLOAD' then download the radia
    #  data folder to a temp directory from git and set the values for the
    #  invalid ones.
    if len([db for db, val in database_map.items() if val == 'DOWNLOAD']) > 0:
        download_databases(database_map, params.logfile)
    # if the -C all option was specified, expand params.chromosome
    if params.chromosome == 'all':
        params.chromosome = [''.join(['chr', str(i)]) for i in \
                             range(1, 23)+['X', 'Y']]
    return database_map