Example #1
0
def main():
    '''
    This wrapper script will run the entire alignment pipeline for RNA from
    alignment of fastqs (to both the genome, and the transcriptome), to sorting
    and indexing. The wrapper can even download and produce STAR references if
    required. The wrapper requires
    1. STAR (For aligning reads)
    2. twoBitToFa from the kent tools library (For extracting the reference
                genome in case indexing is required)

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.
    '''
    # Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'star', 'STAR_alignment')

    #  Params ERROR handling
    #  Do the STAT executables point to valid files?
    star_executable, index_path = process_parameters(params)

    # Move to working directory before doing I/O intensive alignment
    os.chdir(params.working_dir)

    # Align reads to sam file
    star_alignment(star_executable, index_path, params)

    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed',
          file=params.logfile)
    params.logfile.close()
Example #2
0
def main():
    '''
    This wrapper script will run the entire alignment pipeline for RNA from
    alignment of fastqs (to both the genome, and the transcriptome), to sorting
    and indexing. The wrapper can even download and produce STAR references if
    required. The wrapper requires
    1. STAR (For aligning reads)
    2. twoBitToFa from the kent tools library (For extracting the reference
                genome in case indexing is required)

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.
    '''
    # Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'star', 'STAR_alignment')

    #  Params ERROR handling
    #  Do the STAT executables point to valid files?
    star_executable, index_path = process_parameters(params)

    # Move to working directory before doing I/O intensive alignment
    os.chdir(params.working_dir)

    # Align reads to sam file
    star_alignment(star_executable, index_path, params)

    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed', file=params.logfile)
    params.logfile.close()
def main():
    """
    This wrapper script will run the tool snpeff for the precision immuno
    project. The wrapper requires
    1. snpeff

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.

    A standard snpeff database looks like
    <database folder>
        |-data
           |
           |-<reference name>
                 |-genes.gtf
                 |-sequences.fa
                 |-snpEffectPredictor.bin

    When using --index_location, provide <database folder> as mentioned above.
    DO NOT ADD THE THE data folder at the end of the file path.  If the database
    was made using this script, the snpEff.config file will be in
    <database folder> and will be used to run snpeff using --snpeff_reference
     GENOME_VERISON_custom.  If the database was created manually, and
    snpEff.config is not in <database folder>, it must be explicitly provided
    with --config and --snpeff_reference flags.

    NOTE: If you want to use a snpEff packaged database, use --config=PACKAGED
    and --snpeff_reference=<snpeff reference name>
    If used along with --require_index (instead of --index_location), the
    script download the database
    """
    # Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'snpeff', 'snpeffed')

    # params ERROR handling
    process_parameters(params)

    # Create databses if required
    if params.index_location is None:
        #  If we are not using a snpeff packaged database, we need to create a
        #  config file
        if not params.use_snpeff_db:
            create_custom_config_file(params)
            build_snpeff_database(params)
    # Move to working directory before doing I/O intensive work
    os.chdir(params.working_dir)
    # Run Snpeff
    run_snpeff(params)
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Snpeff ' +
          'run finished. Finishing up...', file=params.logfile)
    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed', file=params.logfile)
    params.logfile.close()
def main():
    """
    This wrapper script will run the tool cutadapt within the  cutadapt docker
    container for the precision immuno project. The wrapper requires
    1. cutadapt
    2. GNU sed (Tested on version 4.2.1)

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.
    """
    #  Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'cutadapt', 'adapter_fixed')

    # params ERROR handling
    params.cutadapt_executable = pi_errors.test_param_value(
        params.cutadapt_executable, 'cutadapt', '--cutadapt', params.logfile)
    if not(set(params.fwd_3pr_adapter).issubset(set("ACTGN")) and \
           set(params.rev_3pr_adapter).issubset(set("ACTGN"))):
        raise pi_errors.ParameterError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': Adapter sequences can only contain A, C, T, G, and N.',
            params.logfile)

    #  Move to working directory before doing I/O intensive work
    os.chdir(params.working_dir)

    #  Remvove adapter contamination
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + \
          ': Trimming adapters using cutadapt.', file=params.logfile)
    cutadapt_call = [params.cutadapt_executable] # base call
    cutadapt_call.extend(['-a', params.fwd_3pr_adapter])  # Fwd read 3' adapter
    cutadapt_call.extend(['-A', params.rev_3pr_adapter])  # Rev read 3' adapter
    cutadapt_call.extend(['-m', '35'])  # Minimum size of read
    cutadapt_call.extend(['-o', ''.join([params.file_prefix,
                                         '_cutadapt_1.fastq'])])
    cutadapt_call.extend(['-p', ''.join([params.file_prefix,
                                         '_cutadapt_2.fastq'])])
    cutadapt_call.append(''.join([params.file_path, '/', params.file_prefix,
                                  '_1.fastq']))
    cutadapt_call.append(''.join([params.file_path, '/', params.file_prefix,
                                  '_2.fastq']))
    print(' '.join(cutadapt_call), file=params.logfile)
    return_value = call(cutadapt_call)
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': cutadapt failed', params.logfile)
    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed', file=params.logfile)
    params.logfile.close()
Example #5
0
def main():
    '''
    This wrapper script will run the gene expression analysis for RNA-seq data.
    The input material can be ither a bam file containing RNA-seq reads mapped
    to the genome (Using STAR or bowtie), or the RNA-Seq fastq files themselves.
    The wrapper can produce the rsem references if required. The wrapper
    requires the following
    1. rsem
    2. bowtie/bowtie2 - Only if input is fastq
    3. twoBitToFa from the kent tools library (For extracting the reference
                genome in case indexing is required)

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.
    '''
    # Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'rsem', 'RSEM_quant')

    # params ERROR handling
    bowtie_path, bowtie_version, index_path, index_prefix = \
                                                      process_parameters(params)
    # Move to working directory before doing I/O intensive alignment
    os.chdir(params.working_dir)

    # Process with rsem
    rsem_calculate_expression(''.join([params.rsem_path,
                                       '/rsem-calculate-expression']),
                              (bowtie_path, bowtie_version),
                              (index_path, index_prefix), params)

    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed', file=params.logfile)
    params.logfile.close()
Example #6
0
def main():
    '''
    This wrapper script will run the gene expression analysis for RNA-seq data.
    The input material can be ither a bam file containing RNA-seq reads mapped
    to the genome (Using STAR or bowtie), or the RNA-Seq fastq files themselves.
    The wrapper can produce the rsem references if required. The wrapper
    requires the following
    1. rsem
    2. bowtie/bowtie2 - Only if input is fastq
    3. twoBitToFa from the kent tools library (For extracting the reference
                genome in case indexing is required)

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.
    '''
    # Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'rsem', 'RSEM_quant')

    # params ERROR handling
    bowtie_path, bowtie_version, index_path, index_prefix = \
                                                      process_parameters(params)
    # Move to working directory before doing I/O intensive alignment
    os.chdir(params.working_dir)

    # Process with rsem
    rsem_calculate_expression(
        ''.join([params.rsem_path, '/rsem-calculate-expression']),
        (bowtie_path, bowtie_version), (index_path, index_prefix), params)

    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed',
          file=params.logfile)
    params.logfile.close()
Example #7
0
def main():
    '''
    This wrapper script will run the entire alignment pipeline for genomic DNA
    (WGS or WXS) from alignment of fastqs, to sorting, indexing, and Read Group
    incorporation. The wrapper can even download and produce bwa references if
    required. The wrapper requires
    1. bwa (For aligning reads)
    2. java (For picard)
    3. picard tools (For read groups)
    4. samtools (For sam/bam manipulation)
    5. twoBitToFa from the kent tools library (For extracting the reference
            genome in case indexing is required)

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.
    '''
    #  Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'bwa', 'bwa_alignment')

    #  Params ERROR handling
    #  The memory option for java should be of the form Xmx10G or Xmx10M
    if not (params.java_Xmx.endswith('G') or params.java_Xmx.endswith('M')):
        raise pi_errors.ParameterError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': Please use a suitable value for --Xmx.', params.logfile)
    params.bwa_executable = pi_errors.test_param_value(params.bwa_executable,
                                                       'bwa',
                                                       '--bwa',
                                                       params.logfile)
    params.samtools_executable = pi_errors.test_param_value(
        params.samtools_executable, 'samtools', '--samtools', params.logfile)
    params.java_executable = pi_errors.test_param_value(params.java_executable,
                                                        'java',
                                                        '--java',
                                                        params.logfile)
    #  If Indexing is required, does twoBitToFa point to a valid file?
    if params.index_location is None:
        params.tbtf_executable = pi_errors.test_param_value(
            params.tbtf_executable, 'twoBitToFa', '--twoBitToFa',
            params.logfile)
    if not params.picard_jar.endswith('jar'):
        raise pi_errors.ParameterError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': Please specify a valid jar file for picard!', params.logfile)
    else:
        params.picard_jar = pi_errors.test_param_value(params.picard_jar,
                                                       'picard',
                                                       '--picard_jar',
                                                       params.logfile)

    if params.RGID is None:
        params.RGID = params.file_prefix

    #read_group = ''.join(['\'@RG\\tID:', params.RGID, '\\tPL:ILLUMINA\\tSM:',
    #                      params.sample_type, '\''])
    # Check for indexes. If the user has specified that indexes need to
    # be created then do so.
    if params.index_location is None:
        print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
              'Indexing fasta...', file=params.logfile)
        if not os.path.exists(params.index_destination):
            prepare.py_mkdir(params.index_destination)
        index_path = params.index_destination
        genome_fasta = prepare.get_genome(params.genome_version, index_path,
                                          params.twoBitToFa_executable,
                                          params.logfile)
        print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ':    ' +
              'Running BWA index on fasta reference.', file=params.logfile)
        return_value = call([params.bwa_executable, 'index', genome_fasta])
        if return_value != 0:
            raise pi_errors.MyRuntimeError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': bwa index failed.', params.logfile)
        print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ':    ' +
              'Running samtools faidx.', file=params.logfile)
        return_value = call([params.samtools_executable, 'faidx', genome_fasta])
        if return_value != 0:
            raise pi_errors.MyRuntimeError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': samtools faidx failed', params.logfile)
        index_prefix = genome_fasta
        print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
              'Indexing completed.', file=params.logfile)
    else:
        if params.index_location.endswith('.fa'):
            assert os.path.exists(params.index_location), 'Index file not found'
            index_prefix = params.index_location
        else:
            fastas = [x for x in os.listdir(params.index_location) if
                      x.endswith(".fa")]
            if len(fastas) == 1:
                index_prefix = "".join([params.index_location, '/', fastas[0]])
            elif len(fastas) == 0:
                raise pi_errors.InputFileError(
                    dt.now().strftime('%I:%M %p %b %d, %Y') + \
                    ': No valid fasta found in provided index folder',
                    params.logfile)
            else:
                raise pi_errors.InputFileError(
                    dt.now().strftime('%I:%M %p %b %d, %Y') + \
                    ':Multiple fastas found in provided index folder. Try ' + \
                    'running with --index_location /path/to/file/filename.fa',
                    params.logfile)

    # Move to working directory before doing I/O intensive alignment
    os.chdir(params.working_dir)

    # Align reads to sam file
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Aligning' +
          ' reads to reference.', file=params.logfile)
    bwa_call = [params.bwa_executable, 'mem'] # base call
    bwa_call.extend(['-t', str(params.n)])  # Number of threads
    #bwa_call.extend(['-R', read_group])  # Read group
    bwa_call.append(index_prefix)  # bwa index
    bwa_call.append(''.join([params.file_path, '/', params.file_prefix,
                             '_1.fastq']))
    bwa_call.append(''.join([params.file_path, '/', params.file_prefix,
                             '_2.fastq']))
    print(' '.join(bwa_call), file=params.logfile)
    with open(''.join([params.file_prefix, '.sam']), 'w') as samfile, \
            open(''.join([params.file_prefix, '_bwa_log.txt']), 'w') as logfile:
        return_value = call(bwa_call, stdout=samfile, stderr=logfile)
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': bwa mem failed.', params.logfile)
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Alignment completed. Converting to bam', file=params.logfile)
    # Convert the sam to a bam file
    with open(''.join([params.file_prefix, '.bam']), 'w') as bamfile:
        call([params.samtools_executable, 'view', '-bS',
              ''.join([params.file_prefix, '.sam'])], stdout=bamfile)
    call(['rm', ''.join([params.file_prefix, '.sam'])])
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': bam file' +
          ' created. Preparing file for inserting RG into header.',
          file=params.logfile)
    # Fix PG line
    sam_header = check_output([params.samtools_executable, 'view', '-H',
                               ''.join([params.file_prefix, '.bam'])])
    sam_header = sam_header.strip().split('\n')  # Strip whitespace and separate
    pg_line = sam_header[-1].split('\t')  # Grab @PG line + split by tab
    # Then remove the CL field form the PG line
    sam_header[-1] = '\t'.join([x for x in pg_line if not x.startswith('CL')])
    with open(''.join([params.file_prefix, '_sam.header']), 'w') as hdr_file:
        print('\n'.join(sam_header), file=hdr_file)
    with open(''.join([params.file_prefix, '_fixPG.bam']), 'w') as \
              fixpg_bamfile:
        return_value = call([params.samtools_executable, 'reheader',
                             ''.join([params.file_prefix, '_sam.header']),
                             ''.join([params.file_prefix, '.bam'])],
                            stdout=fixpg_bamfile)
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': samtools reheader failed', params.logfile)
    call(['rm', ''.join([params.file_prefix, '.bam']),
          ''.join([params.file_prefix, '_sam.header'])])
    # Sort and Index the _fixPG.bam file
    return_value = call([params.samtools_executable, 'sort',
                         ''.join([params.file_prefix, '_fixPG.bam']),
                         ''.join([params.file_prefix, '_fixPG_sorted'])])
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': samtools sort failed.', params.logfile)
    return_value = call([params.samtools_executable, 'index',
                         ''.join([params.file_prefix, '_fixPG_sorted.bam'])])
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': samtools index failed.', params.logfile)
    call(['rm', ''.join([params.file_prefix, '_fixPG.bam'])])
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Inserting @RG tag into header.', file=params.logfile)
    # Reheader the indexed _fixPG_sorted.bam to prepare for mutect
    picard_call = [params.java_executable, ''.join(['-Xmx', params.java_Xmx]),
                   '-jar'] #  Base java call
    picard_call.append(params.picard_jar)  # picard
    picard_call.append('AddOrReplaceReadGroups')  # module
    picard_call.append('CREATE_INDEX=true')
    picard_call.append(''.join(['I=', params.file_prefix, '_fixPG_sorted.bam']))
    picard_call.append(''.join(['O=', params.file_prefix,
                                '_fixPG_sorted_reheader.bam']))
    picard_call.append('SO=coordinate')
    picard_call.append('ID=1')
    picard_call.append(''.join(['LB=', params.file_prefix]))
    picard_call.append('PL=ILLUMINA')
    picard_call.append('PU=12345')
    picard_call.append(''.join(['SM=', params.sample_type]))
    with open(''.join([params.file_prefix, '_picard_log.txt']), 'w') as logfile:
        return_value = call(picard_call, stdout=logfile)
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': picard AddOrReplaceReadGroups failed.', params.logfile)
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': @RG ' +
          'inserted. Indexing bam', file=params.logfile)
    # Index _fixPG_sorted_reheader.bam file
    return_value = call([params.samtools_executable, 'index',
                         ''.join([params.file_prefix,
                                  '_fixPG_sorted_reheader.bam'])])
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': samtools index failed.', params.logfile)
    # Remove intermediate files
    call(['rm', ''.join([params.file_prefix, '_fixPG_sorted.bam']),
          ''.join([params.file_prefix, '_fixPG_sorted.bam.bai'])])

    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Alignment completed. Finishing up...', params.logfile)
    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed', file=params.logfile)
    params.logfile.close()
Example #8
0
def main():
    """
    This wrapper script will run the tool radia within the  radia docker
    container for the precision immuno project. The wrapper requires
    1. radia
    2. snpeff (if the --use_snpeff flag is used)
    3. twoBitToFa from the kent tools library (For extracting the reference
            genome in case indexing is required)
    4. lftp for downloading the cosmic vcf

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.

    If you want to use a genome build other than hg19 then download cosmic
    and dbsnp vcfs manually and pass them to this program. This program
    currently only works with hg19 due to how cosmic and NCBI's ownload pages
    work. Other options may be made available in the future.

    The vcfs for the various databases are assumed to be in the parent folders
    of the radia executable (../data/*).  If the data isn't found is the parent
    directories, the program will search VCF_LOCATION before throwing a warning
    and continuing without the said database.  Only dbsnp and cosmic are
    downloaded if not present.
    """
    # Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'radia', 'radia_calls')
    # params ERROR handling and processing
    database_map = process_parameters(params)

    # Move to working directory before doing I/O intensive alignment
    os.chdir(params.working_dir)

    # Call the program
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Starting radia run.', file=params.logfile)
    for chrom in params.chromosome:
        radia_call = [params.radia_executable] #  Base radia call
        radia_call.extend([params.out_prefix, chrom])
        radia_call.extend(['-n', params.norm_d_file])
        radia_call.extend(['-t', params.tum_d_file])
        if params.tum_r_file is not None:
            radia_call.extend(['-r', params.tum_r_file])
        radia_call.append(''.join(['--rnaTumorFasta=', params.rna_fasta]))
        radia_call.extend(['-f', params.genome_fasta])
        radia_call.extend(['-o', ''.join([params.out_prefix, '_', chrom, '.vcf']
                                        )])
        radia_call.extend(['-i', params.genome_version])
        radia_call.extend(['-m', params.genome_fasta])
        radia_call.extend(['-d', params.data_source])
        radia_call.extend(['-q', params.seq_platform])
        radia_call.extend(['--disease', 'params.disease'])
        radia_call.extend(['-l', '\"INFO\"'])
        radia_call.extend(['-g', ''.join([params.out_prefix, '_', chrom, '.log']
                                        )])
        return_value = call(radia_call)
        if return_value != 0:
            raise pi_errors.MyRuntimeError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': radia failed.', params.logfile)
    # Call radia filtering
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Radia completed. Running FilterRadia now.', file=params.logfile)
    for chrom in params.chromosome:
        filter_radia_call = [params.radia_executable] #  Base filter radia call
        filter_radia_call.extend([params.out_prefix, chrom])
        filter_radia_call.append(''.join([params.out_prefix, '_', chrom,
                                          '_filtered.vcf']))
        filter_radia_call.append(params.working_dir)
        filter_radia_call.append(os.path.split(params.radia_executable)[0])
        filter_radia_call.extend(['-b', database_map['blacklist']])
        filter_radia_call.extend(['-d'])
        filter_radia_call.extend(['-r', database_map['retrogenes']])
        filter_radia_call.extend(['-p', database_map['pseudogenes']])
        filter_radia_call.extend(['-c'])
        filter_radia_call.extend(['-t', database_map['broad_targets']])
        if params.use_snpeff:
            filter_radia_call.extend(['-s', params.snpeff_jar])
            filter_radia_call.extend(['-e', params.genome_version])
            if not params.no_canonical:
                filter_radia_call.append(['--canonical'])
        else:
            filter_radia_call.append('--noSnpEff')
        filter_radia_call.extend(['--rnaGeneBlckFile',
                                  database_map['rna_blacklist']])
        filter_radia_call.extend(['--rnaGeneFamilyBlckFile',
                                  database_map['rna_family_blacklist']])
        filter_radia_call.extend(['-f', params.genome_fasta])
        filter_radia_call.extend(['-l', '\"INFO\"'])
        filter_radia_call.extend(['-g', ''.join([params.out_prefix, '_', chrom,
                                                 '_filter.log'])])
        return_value = call(radia_call)
        if return_value != 0:
            raise pi_errors.MyRuntimeError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': FilterRadia failed.', params.logfile)
        call(['rm', ''.join([params.out_prefix, '_', chrom, '.log']),
              ''.join([params.out_prefix, '_', chrom, '.vcf'])])


    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Radia run completed. Finishing up...', file=params.logfile)
    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed', file=params.logfile)
    params.logfile.close()
Example #9
0
def main():
    '''
    This wrapper script will run the entire alignment pipeline for genomic DNA
    (WGS or WXS) from alignment of fastqs, to sorting, indexing, and Read Group
    incorporation. The wrapper can even download and produce bwa references if
    required. The wrapper requires
    1. bwa (For aligning reads)
    2. java (For picard)
    3. picard tools (For read groups)
    4. samtools (For sam/bam manipulation)
    5. twoBitToFa from the kent tools library (For extracting the reference
            genome in case indexing is required)

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.
    '''
    #  Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'bwa', 'bwa_alignment')

    #  Params ERROR handling
    #  The memory option for java should be of the form Xmx10G or Xmx10M
    if not (params.java_Xmx.endswith('G') or params.java_Xmx.endswith('M')):
        raise pi_errors.ParameterError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': Please use a suitable value for --Xmx.', params.logfile)
    params.bwa_executable = pi_errors.test_param_value(params.bwa_executable,
                                                       'bwa', '--bwa',
                                                       params.logfile)
    params.samtools_executable = pi_errors.test_param_value(
        params.samtools_executable, 'samtools', '--samtools', params.logfile)
    params.java_executable = pi_errors.test_param_value(
        params.java_executable, 'java', '--java', params.logfile)
    #  If Indexing is required, does twoBitToFa point to a valid file?
    if params.index_location is None:
        params.tbtf_executable = pi_errors.test_param_value(
            params.tbtf_executable, 'twoBitToFa', '--twoBitToFa',
            params.logfile)
    if not params.picard_jar.endswith('jar'):
        raise pi_errors.ParameterError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': Please specify a valid jar file for picard!', params.logfile)
    else:
        params.picard_jar = pi_errors.test_param_value(params.picard_jar,
                                                       'picard',
                                                       '--picard_jar',
                                                       params.logfile)

    if params.RGID is None:
        params.RGID = params.file_prefix

    #read_group = ''.join(['\'@RG\\tID:', params.RGID, '\\tPL:ILLUMINA\\tSM:',
    #                      params.sample_type, '\''])
    # Check for indexes. If the user has specified that indexes need to
    # be created then do so.
    if params.index_location is None:
        print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
              'Indexing fasta...',
              file=params.logfile)
        if not os.path.exists(params.index_destination):
            prepare.py_mkdir(params.index_destination)
        index_path = params.index_destination
        genome_fasta = prepare.get_genome(params.genome_version, index_path,
                                          params.twoBitToFa_executable,
                                          params.logfile)
        print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ':    ' +
              'Running BWA index on fasta reference.',
              file=params.logfile)
        return_value = call([params.bwa_executable, 'index', genome_fasta])
        if return_value != 0:
            raise pi_errors.MyRuntimeError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': bwa index failed.', params.logfile)
        print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ':    ' +
              'Running samtools faidx.',
              file=params.logfile)
        return_value = call(
            [params.samtools_executable, 'faidx', genome_fasta])
        if return_value != 0:
            raise pi_errors.MyRuntimeError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': samtools faidx failed', params.logfile)
        index_prefix = genome_fasta
        print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
              'Indexing completed.',
              file=params.logfile)
    else:
        if params.index_location.endswith('.fa'):
            assert os.path.exists(
                params.index_location), 'Index file not found'
            index_prefix = params.index_location
        else:
            fastas = [
                x for x in os.listdir(params.index_location)
                if x.endswith(".fa")
            ]
            if len(fastas) == 1:
                index_prefix = "".join([params.index_location, '/', fastas[0]])
            elif len(fastas) == 0:
                raise pi_errors.InputFileError(
                    dt.now().strftime('%I:%M %p %b %d, %Y') + \
                    ': No valid fasta found in provided index folder',
                    params.logfile)
            else:
                raise pi_errors.InputFileError(
                    dt.now().strftime('%I:%M %p %b %d, %Y') + \
                    ':Multiple fastas found in provided index folder. Try ' + \
                    'running with --index_location /path/to/file/filename.fa',
                    params.logfile)

    # Move to working directory before doing I/O intensive alignment
    os.chdir(params.working_dir)

    # Align reads to sam file
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') +
          ': Aligning' + ' reads to reference.',
          file=params.logfile)
    bwa_call = [params.bwa_executable, 'mem']  # base call
    bwa_call.extend(['-t', str(params.n)])  # Number of threads
    #bwa_call.extend(['-R', read_group])  # Read group
    bwa_call.append(index_prefix)  # bwa index
    bwa_call.append(''.join(
        [params.file_path, '/', params.file_prefix, '_1.fastq']))
    bwa_call.append(''.join(
        [params.file_path, '/', params.file_prefix, '_2.fastq']))
    print(' '.join(bwa_call), file=params.logfile)
    with open(''.join([params.file_prefix, '.sam']), 'w') as samfile, \
            open(''.join([params.file_prefix, '_bwa_log.txt']), 'w') as logfile:
        return_value = call(bwa_call, stdout=samfile, stderr=logfile)
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': bwa mem failed.', params.logfile)
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Alignment completed. Converting to bam',
          file=params.logfile)
    # Convert the sam to a bam file
    with open(''.join([params.file_prefix, '.bam']), 'w') as bamfile:
        call([
            params.samtools_executable, 'view', '-bS', ''.join(
                [params.file_prefix, '.sam'])
        ],
             stdout=bamfile)
    call(['rm', ''.join([params.file_prefix, '.sam'])])
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') +
          ': bam file' +
          ' created. Preparing file for inserting RG into header.',
          file=params.logfile)
    # Fix PG line
    sam_header = check_output([
        params.samtools_executable, 'view', '-H',
        ''.join([params.file_prefix, '.bam'])
    ])
    sam_header = sam_header.strip().split(
        '\n')  # Strip whitespace and separate
    pg_line = sam_header[-1].split('\t')  # Grab @PG line + split by tab
    # Then remove the CL field form the PG line
    sam_header[-1] = '\t'.join([x for x in pg_line if not x.startswith('CL')])
    with open(''.join([params.file_prefix, '_sam.header']), 'w') as hdr_file:
        print('\n'.join(sam_header), file=hdr_file)
    with open(''.join([params.file_prefix, '_fixPG.bam']), 'w') as \
              fixpg_bamfile:
        return_value = call([
            params.samtools_executable, 'reheader', ''.join([
                params.file_prefix, '_sam.header'
            ]), ''.join([params.file_prefix, '.bam'])
        ],
                            stdout=fixpg_bamfile)
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': samtools reheader failed', params.logfile)
    call([
        'rm', ''.join([params.file_prefix, '.bam']),
        ''.join([params.file_prefix, '_sam.header'])
    ])
    # Sort and Index the _fixPG.bam file
    return_value = call([
        params.samtools_executable, 'sort',
        ''.join([params.file_prefix,
                 '_fixPG.bam']), ''.join([params.file_prefix, '_fixPG_sorted'])
    ])
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': samtools sort failed.', params.logfile)
    return_value = call([
        params.samtools_executable, 'index',
        ''.join([params.file_prefix, '_fixPG_sorted.bam'])
    ])
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': samtools index failed.', params.logfile)
    call(['rm', ''.join([params.file_prefix, '_fixPG.bam'])])
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Inserting @RG tag into header.',
          file=params.logfile)
    # Reheader the indexed _fixPG_sorted.bam to prepare for mutect
    picard_call = [
        params.java_executable, ''.join(['-Xmx', params.java_Xmx]), '-jar'
    ]  #  Base java call
    picard_call.append(params.picard_jar)  # picard
    picard_call.append('AddOrReplaceReadGroups')  # module
    picard_call.append('CREATE_INDEX=true')
    picard_call.append(''.join(['I=', params.file_prefix,
                                '_fixPG_sorted.bam']))
    picard_call.append(''.join(
        ['O=', params.file_prefix, '_fixPG_sorted_reheader.bam']))
    picard_call.append('SO=coordinate')
    picard_call.append('ID=1')
    picard_call.append(''.join(['LB=', params.file_prefix]))
    picard_call.append('PL=ILLUMINA')
    picard_call.append('PU=12345')
    picard_call.append(''.join(['SM=', params.sample_type]))
    with open(''.join([params.file_prefix, '_picard_log.txt']),
              'w') as logfile:
        return_value = call(picard_call, stdout=logfile)
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': picard AddOrReplaceReadGroups failed.', params.logfile)
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': @RG ' +
          'inserted. Indexing bam',
          file=params.logfile)
    # Index _fixPG_sorted_reheader.bam file
    return_value = call([
        params.samtools_executable, 'index',
        ''.join([params.file_prefix, '_fixPG_sorted_reheader.bam'])
    ])
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': samtools index failed.', params.logfile)
    # Remove intermediate files
    call([
        'rm', ''.join([params.file_prefix, '_fixPG_sorted.bam']),
        ''.join([params.file_prefix, '_fixPG_sorted.bam.bai'])
    ])

    print(
        'PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
        'Alignment completed. Finishing up...', params.logfile)
    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed',
          file=params.logfile)
    params.logfile.close()
    #  set up the strip allele regex
    strip_allele_regex = re.compile('[\*:-]')
    for allele in params.alleles:
        for peptide_length in params.peplen:
            #  Setup the output file
            strip_allele = re.sub(strip_allele_regex, '_', allele)
            #  Setup the call
            mhc_i_call = ['python', params.mhc_executable] #  base call
            mhc_i_call.append(params.pred_meth) #  prediction method
            mhc_i_call.append(allele) #  Allele
            mhc_i_call.append(peptide_length)
            mhc_i_call.append(peplen_filenames[peptide_length])
            mhc_outfile_name = ''.join([params.out_prefix, '_', allele, '.tsv'])
            with open(mhc_outfile_name, 'w') as mhc_outfile:
                return_value = call(mhc_i_call, stdout=mhc_outfile,
                                    stderr=params.logfile)
            if return_value != 0:
                raise pi_errors.MyRuntimeError(
                    dt.now().strftime('%I:%M %p %b %d, %Y') + \
                    ': MHCI prediction failed.', params.logfile)
    
    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed', file=params.logfile)
    params.logfile.close()


if __name__ == "__main__":
    sys.exit(main())
def main():
    '''
    This wrapper script will run the tool PHLAT within the phlat docker
    container for the precision immuno project. The wrapper requires:
    1. PHLAT.py
    2. bowtie2
    3. gdown.pl (For donwloading the PHLAT Index - available from
       https://raw.githubusercontent.com/Nanolx/patchimage/master/tools/gdown.pl)

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.
    '''
    # Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'phlat', 'MHC_typing')

    # params ERROR handling
    if not params.phlat_executable.endswith('PHLAT.py'):
        params.phlat_executable = '/'.join([params.phlat_executable,
                                            'PHLAT.py'])
    params.phlat_executable = pi_errors.test_param_value(
        params.phlat_executable, 'PHLAT', '--phlat', params.logfile)
    params.bowtie2_executable = pi_errors.test_param_value(
        params.bowtie2_executable, 'bowtie2', '--bowtie2', params.logfile)
    phlat_dir = os.path.split(os.path.split(params.phlat_executable)[0])[0]
    params.gdownpl_executable = pi_errors.test_param_value(
        params.gdownpl_executable, 'gdown.pl', '--gdownpl', params.logfile)

    if params.index_location is None:
        print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') +': ' +
              'Downloading Indexes...', file=params.logfile)
        params.index_destination = os.path.abspath(params.index_destination)
        if not os.path.exists(params.index_destination):
            prepare.py_mkdir(params.index_destination)
        getindex_call = [params.gdownpl_executable, 'https://drive.google.com' +
                         '/uc?export=download&confirm=yAjx&id=0Bz-w5tutuZIYY3' +
                         'h5YlMzTjhnbGM', ''.join([params.index_destination,
                                                   '/index4phlat.tar.gz'])]
        print(getindex_call, file=params.logfile)
        return_value = call(getindex_call)
        if return_value != 0:
            raise pi_errors.MyRuntimeError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': Could not download indexes. Try manually downloading.',
                params.logfile)
        extract_call = ['tar', '-C', params.index_destination, '-zxvf',
                        '/'.join([params.index_destination,
                                  'index4phlat.tar.gz'])]
        return_value = call(extract_call)
        if return_value != 0:
            raise pi_errors.MyRuntimeError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': Index4phlat could not be extracted.', params.logfile)
        else:
            call(['rm', '/'.join([params.index_destination,
                                  'index4phlat.tar.gz'])])
        index_path = '/'.join([params.index_destination, 'index4phlat'])
        print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
              'Indexes Downloaded.', file=params.logfile)
    else:
        params.index_location = os.path.abspath(params.index_location)
        if not os.path.exists(''.join([params.index_location,
                                       '/ucsc.artHLA.1.bt2'])):
            raise pi_errors.InputFileError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': Index file not found.', params.logfile)
        else:
            index_path = params.index_location

    # Move to working directory before doing I/O intensive alignment
    os.chdir(params.working_dir)

    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') +':    ' +
          'Begining MHC Haplotyping', file=params.logfile)
    system_call = ['/usr/bin/env', 'python2.7', '-O', params.phlat_executable]
    system_call.extend(['-1', ''.join([params.file_path, '/',
                                       params.file_prefix, '_1.fastq'])]) # Fq1
    system_call.extend(['-2', ''.join([params.file_path, "/",
                                       params.file_prefix, '_2.fastq'])]) # Fq2
    system_call.extend(['-index', index_path]) # Index files
    system_call.extend(['-b2url', params.bowtie2_executable]) # Bowtie2
    system_call.extend(['-tag', ''.join([params.out_prefix])]) # DNA/RNA
    system_call.extend(['-e', phlat_dir]) # Phlat directory home
    system_call.extend(['-o', params.outdir]) # Output directory
    system_call.extend(['-p', str(params.n)]) # Number of threads

    # Call the program
    return_value = call(system_call)
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': MHC Haplotyping failed.', params.logfile)
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Alignment completed. Finishing up...', file=params.logfile)
    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed', file=params.logfile)
    params.logfile.close()
def main():
    """
    This wrapper script will run the tool mutect within the  mutect docker
    container for the precision immuno project. The wrapper requires
    1. mutect
    2. java (For running mutect)
    3. twoBitToFa from the kent tools library (For extracting the reference
            genome in case indexing is required)
    4. lftp for downloading the cosmic vcf

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.
    """
    # Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'mutect', 'mutect_calls')
    # params ERROR handling
    if not (params.java_Xmx.endswith('G') or params.java_Xmx.endswith('M')):
        raise pi_errors.ParameterError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': Please use a suitable value for --Xmx.', params.logfile)
    params.java_executable = pi_errors.test_param_value(params.java_executable,
                                                        'java',
                                                        '--java',
                                                        params.logfile)
    params.mutect_jar = pi_errors.test_param_value(params.mutect_jar,
                                                   'Mutect jar',
                                                   '--mutect_jar',
                                                   params.logfile)
    #  If Indexing is required, does twoBitToFa point to a valid file?
    if params.index_location is None:
        params.tbtf_executable = pi_errors.test_param_value(
            params.tbtf_executable, 'twoBitToFa', '--twoBitToFa',
            params.logfile)
    #  Do the dnsnp and cosmic vcfs exist?
    if params.dbsnp_file == 'DOWNLOAD' or params.cosmic_file == 'DOWNLOAD':
        #  First ensure the vcf storage location has been provided
        if params.vcf_location is None:
            raise pi_errors.ParameterError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': --vcf_location cannot be empty if either --cosmic, ' + \
                '--dbsnp, or --genome_fasta are empty.', params.logfile)
        else:
            params.vcf_location = os.path.abspath(params.vcf_location)
        # Download dbsnp file if required
        if params.dbsnp_file == 'DOWNLOAD':
            if os.path.exists('/'.join([params.vcf_location, '00-All.vcf'])):
                params.dbsnp_file = '/'.join([params.vcf_location,
                                              '00-All.vcf'])
            else:
                params.dbsnp_file = prepare.download_vcf('dbsnp', params)
        # Download cosmic file if required
        if params.cosmic_file == 'DOWNLOAD':
            if os.path.exists('/'.join([params.vcf_location,
                                        'Cosmic_sorted.vcf'])):
                params.cosmic_file = '/'.join([params.vcf_location,
                                               'Cosmic_sorted.vcf'])
            else:
                params.cosmic_file = prepare.download_vcf('cosmic', params)
    # Download genome fasta if required
    if params.genome_fasta == 'DOWNLOAD':
        if params.vcf_location is None:
            #  If params.vcf_location is None, set it to the output directory
            params.vcf_location = params.outdir
        #  Does the fasta exist in the vcf_location directory?
        if os.path.exists(''.join([params.vcf_location, '/',
                                   params.genome_version, '.fa'])):
            params.genome_fasta = ''.join([params.vcf_location, '/',
                                           params.genome_version, '.fa'])
        else:
            params.genome_fasta = prepare.get_genome(params.genome_version,
                                                     params.vcf_location,
                                                     params.tbtf_executable,
                                                     params.logfile)
    else:
        params.genome_fasta = pi_errors.test_param_value(params.genome_fasta,
                                                         'Genomic Fasta',
                                                         '--genome_fasta',
                                                         params.logfile)

    # Move to working directory before doing I/O intensive work
    os.chdir(params.working_dir)

    # Call the program
    mutect_call = [params.java_executable, ''.join(['-Xmx', params.java_Xmx]),
                   '-jar'] #  Base java call
    mutect_call.append(params.mutect_jar)
    mutect_call.extend(['-T', 'MuTect'])
    mutect_call.extend(['-R', params.genome_fasta])
    mutect_call.extend(['--cosmic', params.cosmic_file])
    mutect_call.extend(['--dbsnp', params.dbsnp_file])
    mutect_call.extend(['--input_file:normal', params.norm_d_file])
    mutect_call.extend(['--input_file:tumor', params.tum_d_file])
    mutect_call.extend(['--out', ''.join([params.out_prefix, '.out'])])
    return_value = call(mutect_call)
    if return_value != 0:
        raise pi_errors.MyRuntimeError(
            dt.now().strftime('%I:%M %p %b %d, %Y') + \
            ': MuTect failed.', params.logfile)

    with open(''.join([params.out_prefix, '.out']), 'r') as mutect_file, \
            open(''.join([params.out_prefix, 'non_rejected.out']), 'w') as \
            nr_file:
        for line in mutect_file:
            line = line.strip()
            if line.startswith('#'):
                print(line, file=nr_file)
                continue
            if line.startswith('contig'):
                print('#', line, sep='', file=nr_file)
                continue
            line = line.split('\t')
            if line[50] == 'REJECT':
                continue
            else:
                print(line, sep='\t', file=nr_file)

    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Mutect run completed. Finishing up...', file=params.logfile)
    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed', file=params.logfile)
    params.logfile.close()
Example #13
0
def main():
    """
    This wrapper script will run the tool radia within the  radia docker
    container for the precision immuno project. The wrapper requires
    1. radia
    2. snpeff (if the --use_snpeff flag is used)
    3. twoBitToFa from the kent tools library (For extracting the reference
            genome in case indexing is required)
    4. lftp for downloading the cosmic vcf

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.

    If you want to use a genome build other than hg19 then download cosmic
    and dbsnp vcfs manually and pass them to this program. This program
    currently only works with hg19 due to how cosmic and NCBI's ownload pages
    work. Other options may be made available in the future.

    The vcfs for the various databases are assumed to be in the parent folders
    of the radia executable (../data/*).  If the data isn't found is the parent
    directories, the program will search VCF_LOCATION before throwing a warning
    and continuing without the said database.  Only dbsnp and cosmic are
    downloaded if not present.
    """
    # Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'radia', 'radia_calls')
    # params ERROR handling and processing
    database_map = process_parameters(params)

    # Move to working directory before doing I/O intensive alignment
    os.chdir(params.working_dir)

    # Call the program
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Starting radia run.',
          file=params.logfile)
    for chrom in params.chromosome:
        radia_call = [params.radia_executable]  #  Base radia call
        radia_call.extend([params.out_prefix, chrom])
        radia_call.extend(['-n', params.norm_d_file])
        radia_call.extend(['-t', params.tum_d_file])
        if params.tum_r_file is not None:
            radia_call.extend(['-r', params.tum_r_file])
        radia_call.append(''.join(['--rnaTumorFasta=', params.rna_fasta]))
        radia_call.extend(['-f', params.genome_fasta])
        radia_call.extend(
            ['-o', ''.join([params.out_prefix, '_', chrom, '.vcf'])])
        radia_call.extend(['-i', params.genome_version])
        radia_call.extend(['-m', params.genome_fasta])
        radia_call.extend(['-d', params.data_source])
        radia_call.extend(['-q', params.seq_platform])
        radia_call.extend(['--disease', 'params.disease'])
        radia_call.extend(['-l', '\"INFO\"'])
        radia_call.extend(
            ['-g', ''.join([params.out_prefix, '_', chrom, '.log'])])
        return_value = call(radia_call)
        if return_value != 0:
            raise pi_errors.MyRuntimeError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': radia failed.', params.logfile)
    # Call radia filtering
    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Radia completed. Running FilterRadia now.',
          file=params.logfile)
    for chrom in params.chromosome:
        filter_radia_call = [params.radia_executable
                             ]  #  Base filter radia call
        filter_radia_call.extend([params.out_prefix, chrom])
        filter_radia_call.append(''.join(
            [params.out_prefix, '_', chrom, '_filtered.vcf']))
        filter_radia_call.append(params.working_dir)
        filter_radia_call.append(os.path.split(params.radia_executable)[0])
        filter_radia_call.extend(['-b', database_map['blacklist']])
        filter_radia_call.extend(['-d'])
        filter_radia_call.extend(['-r', database_map['retrogenes']])
        filter_radia_call.extend(['-p', database_map['pseudogenes']])
        filter_radia_call.extend(['-c'])
        filter_radia_call.extend(['-t', database_map['broad_targets']])
        if params.use_snpeff:
            filter_radia_call.extend(['-s', params.snpeff_jar])
            filter_radia_call.extend(['-e', params.genome_version])
            if not params.no_canonical:
                filter_radia_call.append(['--canonical'])
        else:
            filter_radia_call.append('--noSnpEff')
        filter_radia_call.extend(
            ['--rnaGeneBlckFile', database_map['rna_blacklist']])
        filter_radia_call.extend(
            ['--rnaGeneFamilyBlckFile', database_map['rna_family_blacklist']])
        filter_radia_call.extend(['-f', params.genome_fasta])
        filter_radia_call.extend(['-l', '\"INFO\"'])
        filter_radia_call.extend(
            ['-g', ''.join([params.out_prefix, '_', chrom, '_filter.log'])])
        return_value = call(radia_call)
        if return_value != 0:
            raise pi_errors.MyRuntimeError(
                dt.now().strftime('%I:%M %p %b %d, %Y') + \
                ': FilterRadia failed.', params.logfile)
        call([
            'rm', ''.join([params.out_prefix, '_', chrom, '.log']),
            ''.join([params.out_prefix, '_', chrom, '.vcf'])
        ])

    print('PROGRESS ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': ' +
          'Radia run completed. Finishing up...',
          file=params.logfile)
    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed',
          file=params.logfile)
    params.logfile.close()
def main():
    """
    This wrapper script will run the IEDB tools within the  cutadapt docker
    container for the precision immuno project. The wrapper requires
    1. IEDB tools for MHCI prediction - http://tools.iedb.org/mhci
    2. netMHCIIpan - In case IEDB tools fails
    3. python

    This script requires an input file (--file_prefix) that contains
                        (2 * PEPLEN - 1)-mer
    fasta records for analysis. FILE_PREFIX must be a .faa file.

    Unless specified, the program will look for default executables on $PATH.
    The program DOES NOT look for jar files and they are required to be
    passed during execution.
    """
    # Parse the arguments using prepare.parse_args()
    params = prepare.parse_args(main.__doc__, 'mhc', 'mhci_predictions')

    #  Params ERROR handling
    #  peplen_filenames is a dictionary with peptide length as key and the full
    #  path to the filename associated with the peplen as the value.
    pepilename = process_parameters(params)

    # Move to working directory before doing I/O intensive work
    os.chdir(params.working_dir)

    #  set up the different allele regexes
    strip_allele_regex = re.compile(r'[\*:/-]') # For strip allele
    dpqa_allele_regex_1 = re.compile(r'[\*:]')  # For DPA and DQA if netMHCIIpan 
    dpqa_allele_regex_2 = re.compile(r'/')      # is used
    for allele in params.alleles:
        for peptide_length in params.peplen:
            #  Setup the output file
            #  Strip allele converts HLA-DRB1*15:01 to HLA_DRB1_15_01 and
            #  HLA-DQA1*01:02/DQB1*03:02 to HLA_DQA1_01_02_DQB1_03_02
            strip_allele = re.sub(strip_allele_regex, '_', allele)
            #  Setup the call
            mhc_ii_call = ['python', params.mhc_executable] #  base call
            mhc_ii_call.append(params.pred_meth) #  prediction method
            mhc_ii_call.append(allele) #  Allele
            mhc_ii_call.append(pepfilename)
            mhc_outfile_name = ''.join([params.out_prefix, '_', allele, '.tsv'])
            with open(mhc_outfile_name, 'w') as mhc_outfile:
                return_value = call(mhc_ii_call, stdout=mhc_outfile,
                                    stderr=params.logfile)
            if return_value != 0:
                print('WARNING: IEDBtools failed.  Attempting netMHCIIpan',
                      file=params.logfile)
                #  netmHCIIpan needs a different formatting for allele
                #  HLA-DQA1*01:02/DQB1*03:02 should be HLA-DQA10102-DQB10302
                #  HLA-DRB1*15:01 should be DRB1_1501. DP and DQ are similar
                if allele.startswith('HLA-DQ') or allele.startswith('HLA-DP'):
                    allele = re.sub(dpqa_allele_regex_1, '', allele)
                    allele = re.sub(dpqa_allele_regex_2, '-', allele)
                else:
                    allele = strip_allele[4:] # Easier than starting from allele
                netmhc_ii_call = [params.netmhciipan_executable]
                netmhc_ii_call.extend(['-a', allele]) #  Allele
                netmhc_ii_call.extend(['-xls', 1])
                netmhc_ii_call.extend(['-xlsfile', mhc_outfile_name])
                netmhc_ii_call.extend(['-f', pepfilename])
                return_value = call(mhc_ii_call, stderr=params.logfile)
                if return_value != 0:
                    raise pi_errors.MyRuntimeError(
                        dt.now().strftime('%I:%M %p %b %d, %Y') + \
                        ': MHCII prediction failed.', params.logfile)
    # Move files from temp directory to outdir
    prepare.move_output(params)
    print('RESULT ' + dt.now().strftime('%I:%M %p %b %d, %Y') + ': Process ' +
          'completed', file=params.logfile)
    params.logfile.close()