Example #1
0
def run_merge_paired_reads(input, output, params=None):
    """Merge R1 and R2 ends into interleaving file for bfast to work on.
    
    """
    
    # Update input and output from global config object
    params = config['merge_paired_reads_params']
    input = sorted(input)
    params['input_read1'] = input[0]
    params['input_read2'] = input[1]
    params['output'] = output
    
    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir'] 
    
    cmd = "python %(exec)s --gzipped -1 %(input_read1)s -2  %(input_read2)s | gzip -c > %(output)s" % params
    # cmd = "python %(exec)s -1 %(input_read1)s -2  %(input_read2)s | gzip -c > %(output)s" % params
    
    logger.debug("cmd = %s" % (cmd,))

    job_id = utils.safe_qsub_run(cmd, jobname="mergepairs",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #2
0
def run_sickle(input, output, output_singles, params=None):
    """Run sickle to trim ends of reads based on sequence quality and length.
    
    Also gzips the output to save space.
    
    """
    
    input = sorted(input)
    params['input_read1'] = input[0]
    params['input_read2'] = input[1]

    params['input_read1_base'] = os.path.splitext(os.path.basename(input[0]))[0]
    params['input_read2_base'] = os.path.splitext(os.path.basename(input[1]))[0]

    params['output_read1'] = output[0].split(".gz")[0] # "%(output_dir)s/%(input_read1_base)s" % (params)
    params['output_read2'] = output[1].split(".gz")[0] # "%(output_dir)s/%(input_read1_base)s" % (params)
    params['output_singles'] = output_singles

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmd = ("module load %(modules)s\n"
           "%(exec)s pe -t sanger -l %(length)s -q %(quality)s -f %(input_read1)s -r %(input_read2)s -o %(output_read1)s -p %(output_read2)s -s %(output_singles)s\n"
           "gzip %(output_read1)s &\n" 
           "gzip %(output_read2)s &\n" 
           "wait" % params)
    
    logger.debug("cmd = %s" % (cmd,))
    
    job_id = utils.safe_qsub_run(cmd, jobname="sickle",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #3
0
def run_write_recalibrated_bam(input, output, bqsr_file, params=None):
    """GATK write BAM file with recalibrated base scores.

    """
    
    params['input'] = input
    params['output'] = output
    params['bqsr_file'] = bqsr_file
    
    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmd = "module load %(modules)s\n" % params
    cmd += ("java -Xmx%(maxjheap)s -Djava.io.tmpdir=%(tmp_dir)s -jar %(jar_file)s "
            "-T PrintReads -I %(input)s -R %(reference_fasta)s -BQSR %(bqsr_file)s "
            "-o %(output)s" % params)

    logger.debug("cmd = %s" % (cmd,))

    job_id = utils.safe_qsub_run(cmd, jobname="writerecal",
                                 nodes=params['qsub_nodes'],
                                 # params=params['qsub_params'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #4
0
def run_realign_indel_creator(input, output, params=None):
    """First part of GATK recalibration.

    """

    realign_params = config['gatk_realigner_target_creator_params']
    realign_params['input'] = input
    realign_params['output'] = output

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    knowns = ""
    for known in config['gatk_realigner_target_creator_params']['known_files']:
        knowns += "--known %s " % known

    realign_params['knowns'] = knowns
    
    cmd = "java -Xmx%(maxjheap)s -jar %(jar_file)s -nt %(threads)s -R %(reference_fasta)s -T RealignerTargetCreator -o %(output)s %(knowns)s" % realign_params

    job_id = utils.safe_qsub_run(cmd, jobname="realignTC",
                                 nodes=realign_params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #5
0
def run_indel_realigner(input, output, params=None):
    """Second part of GATK recalibration.

    """

    realign_params = config['gatk_indel_realigner_params']
    realign_params['input'] = input
    realign_params['output'] = output

    knowns = ""
    for known in config['gatk_realigner_target_creator_params']['known_files']:
        knowns += "-known %s " % known

    realign_params['knowns'] = knowns
    realign_params['target_intervals'] = config['gatk_realigner_target_creator_params']['output_file']
    
    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmd = "java -Xmx%(maxjheap)s -Djava.io.tmpdir=%(tmp_dir)s -jar %(jar_file)s -I %(input)s -R %(reference_fasta)s -T IndelRealigner -targetIntervals %(target_intervals)s -o %(output)s %(knowns)s --consensusDeterminationModel KNOWNS_ONLY -LOD 0.4" % realign_params

    logger.debug("cmd = %s" % (cmd,))

    job_id = utils.safe_qsub_run(cmd, jobname="indelRe",
                                 nodes=realign_params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #6
0
def run_mark_duplicates(input, output, params=None):
    """Set up and run the Picard MarkDuplicates program.
    
    """

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    # # Update input and output from global config object
    # params = config['picard_markduplicates_params']

    params['input'] = input
    params['output'] = output
    params['metrics_file'] = "%s.metrics" % output

    cmd = "module load %(modules)s\n" % params
    cmd += ("java -Xmx%(maxjheap)s -Djava.io.tmpdir=%(tmp_dir)s -jar %(jar_file)s "
            "INPUT=%(input)s OUTPUT=%(output)s METRICS_FILE=%(metrics_file)s CREATE_INDEX=true" 
            "OPTICAL_DUPLICATE_PIXEL_DISTANCE=%(optical_duplicate_pixel_distance)s" % params)
    
    job_id = utils.safe_qsub_run(cmd, jobname="markdups",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #7
0
def run_base_score_recalibrator(input, output, params=None):
    """GATK base score recalibration.

    """
    
    realign_params = config['gatk_base_score_recal_params']
    realign_params['input'] = input
    realign_params['output'] = output

    knowns = ""
    for known in config['gatk_base_score_recal_params']['known_files']:
        knowns += "-knownSites %s " % known

    realign_params['knowns'] = knowns
    
    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmd = "java -Xmx%(maxjheap)s -Djava.io.tmpdir=%(tmp_dir)s -jar %(jar_file)s -T BaseRecalibrator -I %(input)s -R %(reference_fasta)s -o %(output)s %(knowns)s" % realign_params

    logger.debug("cmd = %s" % (cmd,))

    job_id = utils.safe_qsub_run(cmd, jobname="bsRecal",
                                 nodes=realign_params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #8
0
def run_index_splitbam(input, output, params=None):
    """Run samtools index on bam file.
    
    """

    # params = dict(sample_name=sample_name, index=index)

    # Let a parser argument handle setting up arguments and options
    parser = argparse.ArgumentParser()
        
    # Update input and output from global config object
    bamindex_params = config['bamindex_params']
    bamindex_params['input'] = input
    bamindex_params['output'] = output

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmd = "%(exec)s index %(input)s %(output)s" % bamindex_params

    job_id = utils.safe_qsub_run(cmd, jobname="splitbamindex",
                                 nodes=bamindex_params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #9
0
def run_mutect(input, output, text_file, params=None):
    """Run mutect.

    """

    params = config['mutect_params']

    assert len(input) == 2, "Not the right number of input files (should be 2, received %i)" % len(input)

    # Figure out which file is normal, which is tumor...should be alphabetical?

    input = sorted(input)
    params['input_normal'] = input[0]
    params['input_tumor'] = input[1]

    params['vcf_file'] = output
    params['call_stats_file'] = text_file
    ## params['coverage_file'] = coverage_file

    params['log_file'] =  config['mutect_params']['output_dir']

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmd = "%(java_exec)s -Xmx%(maxjheap)s -Djava.io.tmpdir=%(tmp_dir)s -jar %(jar_file)s --analysis_type MuTect --dbsnp %(dbsnp_file)s --cosmic %(cosmic_file)s --input_file:normal %(input_normal)s --input_file:tumor %(input_tumor)s --reference_sequence %(reference_fasta)s --out %(call_stats_file)s --vcf %(vcf_file)s --enable_extended_output" % params

    logger.debug("cmd = %s" % (cmd,))

    job_id = utils.safe_qsub_run(cmd, jobname="mutect",
                                 nodes=params['qsub_nodes'],
                                 params=params['qsub_params'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #10
0
def run_sort_sam(input, output, sample_name=None, index=None):
    """Run Picard SortSam to convert to sorted bam file.

    """

    params = dict(sample_name=sample_name, index=index)
    
    # Update input and output from global config object
    picard_params = config['picard_sortsam_params']
    picard_params['input'] = input
    picard_params['output'] = output

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmdline = "--maxjheap=%(maxjheap)s --jar=%(jar_file)s --input=%(input)s --output=%(output)s --sort_order=%(sort_order)s SortSam " % picard_params

    picard_cmd = "python -m ccrngspy.tasks.Picard %s" % cmdline

    # stdout, stderr = utils.safe_run(picard_cmd, shell=False)
    # logger.debug("stdout = %s, err = %s" % (stdout, stderr))
    
    logger.debug("params = %s" % (params, ))
    job_id = utils.safe_qsub_run(picard_cmd, jobname="sortsam",
                                 nodes=picard_params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    

    logger.debug("job_id = %s" % (job_id,))
Example #11
0
def run_mergebam(input, output, patient_id=None):
    """Merge all bam files for a single patient together (this includes matched tumor and normal into one file).
    The separate tumor/normal sample info is encoded in the read groups of the BAM files.
    This is done for future steps (GATK recommends that tumor/normal paired data is run through re-align/re-calibrate step together).
    """
    
    params = dict(patient_id=patient_id)
        
    # Update input and output from global config object
    mergebam_params = config['mergebam_params']
    mergebam_params['input'] = " ".join(input)
    mergebam_params['output'] = output

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmdline = "--maxjheap=%(maxjheap)s --jar=%(jar_file)s --input %(input)s --output=%(output)s --sort_order=%(sort_order)s MergeSamFiles --use_threading=true" % mergebam_params

    cmd = "python -m ccrngspy.tasks.Picard %s" % cmdline
    logger.debug("cmd = %s" % (cmd,))

    job_id = utils.safe_qsub_run(cmd, jobname="mb%s" % (params['patient_id']),
                                 nodes=mergebam_params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #12
0
def run_fastqc(input, output, params=None):
    """Set up and run the fastqc program.
    
    """

    params = config['fastqc_params']

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']
    
    try:
        tmp = params['casava']
    except KeyError as e:
        logger.info("Casava parameter not specified, assuming false.")
        params['casava'] = False
        
        
    fastqc_task = FastQC.FastQC(input_files=input, output_directory=params['output_dir'],
                                casava=params['casava'], threads=params['threads'])

    cmd = fastqc_task.make_command()

    logger.debug("cmd = %s" % (cmd,))

    job_id = utils.safe_qsub_run(cmd, jobname="fastqc",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #13
0
def run_bwa_sampe(input, output, sample_name, params=None):
    """Run bwa sampe.


    """

    params['output'] = output
    params['sai_R1'] = input[0][0]
    params['sai_R2'] = input[1][0]

    # For some reason, -f param didn't work with samtools sort
    # So, need to use prefix verison (without bam suffix)
    
    (params['fastq_R1'], params['fastq_R2']) = input[0][1]
        
    params['read_group_string'] = bwa_helpers.sample_name_to_read_group_string(sample_name=sample_name, samples=samples, config=config)
    cmd = "module load %(modules)s\n" % params
    cmd = ('bwa sampe -r "%(read_group_string)s" %(reference_fasta)s %(sai_R1)s %(sai_R2)s %(fastq_R1)s %(fastq_R2)s | '
           'samtools view -Sb - > %(output)s' % params)
    
    logger.debug("cmd = %s" % (cmd))
    
    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']
    
    job_id = utils.safe_qsub_run(cmd, jobname="bwa_sampe",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #14
0
def run_tophat(input, output, params=None):
    """Run Tophat on paired reads.
    
    """
    
    # Let a parser argument handle setting up arguments and options
    parser = argparse.ArgumentParser()
    
    # Add Tophat arguments
    tophat = Tophat.TophatRunner()
    parser = tophat.argparse(parser)
    
    # Update input and output from global config object
    tophat_params = config['tophat_params']
    tophat_params['input'] = input

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['log_file_dir']
    stderr = config['general_params']['log_file_dir']

    ## fastq read files
    tophat_params['file1'] = input[0]
    tophat_params['file2'] = input[1]
    tophat_params['sample'] = params['sample']
    tophat_params['output'] = output

    logger.debug('tophat_params = %s' % (tophat_params, ))

    cmdline = '--bowtie_index=%(bowtie_index)s -1 %(file1)s -2 %(file2)s -o %(output)s --threads=%(threads)s --other_params="%(other_params)s"' % tophat_params    
    # tophat_cmd = "python -m ccrngspy.tasks.Tophat %s" % cmdline

    args = parser.parse_args(shlex.split(cmdline))
    logger.debug("cmdline = %s" % (shlex.split(cmdline), ))


    tophat.set_options(args)
    
    tophat_command = tophat.make_command()
    
    logger.debug("cmd = %s" % (tophat_command, ))
    logger.debug("params = %s" % (params, ))
    
    # job_stdout, job_stderr = utils.safe_qsub_run(tophat_command, jobname="tophat_%s" % params['sample'],
    #                                              nodes=tophat_params['qsub_nodes'],
    #                                              params="-v np=%(threads)s" % tophat_params,
    #                                              stdout=stdout, stderr=stderr)
    # logger.debug("stdout = %s, stderr = %s" % (job_stdout, job_stderr))

    job_stdout = utils.safe_qsub_run(tophat_command, jobname="tophat_%s" % params['sample'],
                                     nodes=tophat_params['qsub_nodes'],
                                     params="-v np=%(threads)s" % tophat_params,
                                     stdout=stdout, stderr=stderr)
    
    logger.debug("stdout = %s" % (job_stdout))
Example #15
0
def run_bfast_postprocess(input, output, flowcell_id=None, sample_name=None, index=None):
    """Run postprocess.
    
    """

    params = dict(sample_name=sample_name, index=index)

    # Let a parser argument handle setting up arguments and options
    parser = argparse.ArgumentParser()
    
    # Add BFAST arguments
    bfast = BFAST.BFASTBase()

    # Use bfast provided by PATH; set using the module functions.
    bfast.set_exec("bfast")

    parser = bfast.argparse(parser)
    
    # Update input and output from global config object
    bfast_params = config['bfast_postprocess_params']
    bfast_params['input'] = input
    bfast_params['output'] = output
    read_group_id = read_group_id_dict[sample_name]

    read_group_string = "@RG\tID:%s\tPL:ILLUMINA\tPU:%s\tLB:%s\tSM:%s" % (read_group_id, flowcell_id, config['general_params']['library_name'], sample_name)

    read_group_tempfile = tempfile.NamedTemporaryFile(mode="w", dir=bfast_params['scratch_dir'], delete=True)
    read_group_tempfile.write("%s\n" % read_group_string)
    read_group_tempfile.flush()

    bfast_params['read_group_string'] = read_group_tempfile.name

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']
    
    cmdline = ("--bfast_threads=%(threads)s --bfast_output_file=%(output)s "
               "--bfast_reference_fasta=%(reference_fasta)s postprocess "
               "--bfast_read_group_string=%(read_group_string)s "
               "--bfast_aligned_file=%(input)s" % bfast_params)

    args = parser.parse_args(cmdline.split(" "))
    
    bfast_command ="module load %(modules)s\n" % bfast_params
    bfast_command += bfast.make_postprocess_command(args)

    # stdout, stderr = utils.safe_run(bfast_command, shell=False)
    # logger.debug("stdout = %s, err = %s" % (stdout, stderr))

    job_id = utils.safe_qsub_run(bfast_command, jobname="bfpp%s%s" % (params['sample_name'], params['index']),
                                 nodes=bfast_params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #16
0
def run_bfast_match(input, output, params=None):
    """Run match.
    
    """
    
    # Let a parser argument handle setting up arguments and options
    parser = argparse.ArgumentParser()
    
    # Add BFAST arguments
    bfast = BFAST.BFASTBase()
    
    # Use bfast provided by PATH; set using the module functions.
    bfast.set_exec("bfast")
    
    parser = bfast.argparse(parser)
    
    # Update input and output from global config object
    bfast_params = config['bfast_match_params']
    bfast_params['input'] = input
    bfast_params['output'] = output
    
    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']
    
    bfast_params['sample'] = params['sample_name']
    bfast_params['bfast_ref_index'] = params['bfast_ref_index']
    
    cmdline = ("--bfast_temp_dir=%(temp_dir)s --bfast_threads=%(threads)s "
               "--bfast_reference_fasta=%(reference_fasta)s "
               "--bfast_output_file=%(output)s match --bfast_gzipped "
               "--bfast_reads_file=%(input)s --bfast_space=0 "
               "--bfast_main_indexes=%(bfast_ref_index)s" % bfast_params)
    
    args = parser.parse_args(cmdline.split(" "))
    
    bfast_command = "module load %(modules)s\n"  % bfast_params
    bfast_command += bfast.make_match_command(args)
    
    logger.debug("cmd = %s" % (bfast_command,))
    
    # stdout, stderr = utils.safe_run(bfast_command, shell=False)
    # logger.debug("stdout = %s, err = %s" % (stdout, stderr))
    
    job_id = utils.safe_qsub_run(bfast_command, jobname="bfmatch",
                                 nodes=bfast_params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #17
0
def run_sort_sam(input, output, params=None):
    """Set up and run the Picard SortSam program.

    This task works differently than the others; instead of calling the program directly
    by writing out the command line string needed to run it, this runs a python script
    by calling the main function of ccrngspy.tasks.Picard. This is because the Picard code
    is based off of the Galaxy wrapper for Picard, and doesn't work exactly the same as the
    rest.

    2012-03-30 I will consider re-writing it so that it is consistent. (dailykm)
    
    """
    
    # # Let a parser argument handle setting up arguments and options
    # parser = argparse.ArgumentParser()
    
    # # Add Picard arguments
    # picard = Picard.PicardBase()
    # parser = picard.argparse(parser)

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['log_file_dir']
    stderr = config['general_params']['log_file_dir']

    # Update input and output from global config object
    picard_params = config['picard_sortsam_params']

    picard_params['input'] = input
    picard_params['output'] = output

    logger.debug("picard_params = %s" % (picard_params,))
    # Set up using the default arguments, specifying the input and output files since they are required!
    cmdline = "--maxjheap=%(maxjheap)s --jar=%(jar_file)s --input=%(input)s --output=%(output)s --sort_order=%(sort_order)s SortSam" % picard_params

    picard_cmd = "python -m ccrngspy.tasks.Picard %s" % cmdline

    # stdout, stderr = utils.safe_run(picard_cmd, shell=False)
    # logger.debug("stdout = %s, err = %s" % (stdout, stderr))
    
    logger.debug("params = %s" % (params, ))
    job_stdout = utils.safe_qsub_run(picard_cmd, jobname="sort_sam",
                                     nodes=picard_params['qsub_nodes'],
                                     stdout=stdout, stderr=stderr)
    
    logger.debug("stdout = %s" % (job_stdout))
Example #18
0
def run_sort_sam(input, output, params=None):
    """Set up and run the Picard SortSam program.

    This task works differently than the others; instead of calling the program directly
    by writing out the command line string needed to run it, this runs a python script
    by calling the main function of ccrngspy.tasks.Picard. This is because the Picard code
    is based off of the Galaxy wrapper for Picard, and doesn't work exactly the same as the
    rest.

    2012-03-30 I will consider re-writing it so that it is consistent. (dailykm)
    
    """
    
    # # Let a parser argument handle setting up arguments and options
    # parser = argparse.ArgumentParser()
    
    # # Add Picard arguments
    # picard = Picard.PicardBase()
    # parser = picard.argparse(parser)

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['log_file_dir']
    stderr = config['general_params']['log_file_dir']

    # Update input and output from global config object
    picard_params = config['picard_sortsam_params']

    picard_params['input'] = input
    picard_params['output'] = output

    logger.debug("picard_params = %s" % (picard_params,))
    # Set up using the default arguments, specifying the input and output files since they are required!
    cmdline = "--jar=%(jar_file)s --input=%(input)s --output=%(output)s --sort_order=%(sort_order)s SortSam" % picard_params

    picard_cmd = "python -m ccrngspy.tasks.Picard %s" % cmdline

    # stdout, stderr = utils.safe_run(picard_cmd, shell=False)
    # logger.debug("stdout = %s, err = %s" % (stdout, stderr))
    
    logger.debug("params = %s" % (params, ))
    job_stdout, job_stderr = utils.safe_qsub_run(picard_cmd, jobname="sort_sam",
                                                 nodes=picard_params['qsub_nodes'],
                                                 stdout=stdout, stderr=stderr)
    
    logger.debug("stdout = %s, stderr = %s" % (job_stdout, job_stderr))
Example #19
0
def run_collect_rnaseq_metrics(input, output, sample):
    """Set up and run the Picard CollectRnaSeqMetrics program.

    This task works differently than the others; instead of calling the program directly
    by writing out the command line string needed to run it, this runs a python script
    by calling the main function of ccrngspy.tasks.Picard. This is because the Picard code
    is based off of the Galaxy wrapper for Picard, and doesn't work exactly the same as the
    rest.

    2012-03-30 I will consider re-writing it so that it is consistent. (dailykm)

    """
    
    # # Let a parser argument handle setting up arguments and options
    # parser = argparse.ArgumentParser()
    
    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['log_file_dir']
    stderr = config['general_params']['log_file_dir']

    # Update input and output from global config object
    picard_params = config['picard_params']
    picard_params['input'] = input
    picard_params['output'] = output
    
    # Set up using the default arguments, specifying the input and output files since they are required!
    cmdline = "--jar=%(jar_file)s --input=%(input)s --output=%(output)s --ref_flat=%(ref_flat)s --ref_file=%(ref_file)s CollectRnaSeqMetrics --minimum_length=%(minimum_length)s --chart_output=%(chart_output)s --metric_accumulation_level=%(metric_accumulation_level)s --stop_after=%(stop_after)s" % picard_params

    # args = parser.parse_args(cmdline.split())
    
    # # Run the function for collecting RNASeq metrics
    # args.func(args)
    
    picard_cmd = "python -m ccrngspy.tasks.Picard %s" % cmdline

    # stdout, stderr = utils.safe_run(picard_cmd, shell=False)
    # logger.debug("stdout = %s, err = %s" % (stdout, stderr))

    job_stdout, job_stderr = utils.safe_qsub_run(picard_cmd, jobname="rum_%s" % sample,
                                                 nodes=picard_params['qsub_nodes'],
                                                 stdout=stdout, stderr=stderr)
    
    logger.debug("stdout = %s, stderr = %s" % (job_stdout, job_stderr))
Example #20
0
def run_collect_rnaseq_metrics(input, output, sample):
    """Set up and run the Picard CollectRnaSeqMetrics program.

    This task works differently than the others; instead of calling the program directly
    by writing out the command line string needed to run it, this runs a python script
    by calling the main function of ccrngspy.tasks.Picard. This is because the Picard code
    is based off of the Galaxy wrapper for Picard, and doesn't work exactly the same as the
    rest.

    2012-03-30 I will consider re-writing it so that it is consistent. (dailykm)

    """
    
    # # Let a parser argument handle setting up arguments and options
    # parser = argparse.ArgumentParser()
    
    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['log_file_dir']
    stderr = config['general_params']['log_file_dir']

    # Update input and output from global config object
    picard_params = config['picard_params']
    picard_params['input'] = input
    picard_params['output'] = output
    
    # Set up using the default arguments, specifying the input and output files since they are required!
    cmdline = "--maxjheap=%(maxjheap)s --jar=%(jar_file)s --input=%(input)s --output=%(output)s --ref_flat=%(ref_flat)s --ref_file=%(ref_file)s CollectRnaSeqMetrics --minimum_length=%(minimum_length)s --chart_output=%(chart_output)s --metric_accumulation_level=%(metric_accumulation_level)s --stop_after=%(stop_after)s" % picard_params

    # args = parser.parse_args(cmdline.split())
    
    # # Run the function for collecting RNASeq metrics
    # args.func(args)
    
    picard_cmd = "python -m ccrngspy.tasks.Picard %s" % cmdline

    # stdout, stderr = utils.safe_run(picard_cmd, shell=False)
    # logger.debug("stdout = %s, err = %s" % (stdout, stderr))

    job_stdout = utils.safe_qsub_run(picard_cmd, jobname="rnaseqmet_%s" % sample,
                                     nodes=picard_params['qsub_nodes'],
                                     stdout=stdout, stderr=stderr)
    
    logger.debug("stdout = %s" % (job_stdout))
Example #21
0
def run_flagstat(input, output, params=None):
    """Run samtools flagstat on bam file.
    
    """
    
    params['input'] = input
    params['output'] = output

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmd = "module load %(modules)s\n" % params
    cmd += "samtools flagstat %(input)s > %(output)s" % params

    job_id = utils.safe_qsub_run(cmd, jobname="flagstat",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #22
0
def run_cleansam(input, output, params=None):
    """Clean up BAM file.
    """
    
    params['input'] = input
    params['output'] = output

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']
    
    cmd = "module load %(modules)s\n" % params
    cmd += ("java -Xmx%(maxjheap)s -Djava.io.tmpdir=%(tmp_dir)s -jar %(jar_file)s "
            "INPUT=%(input)s OUTPUT=%(output)s CREATE_INDEX=true" % params)

    job_id = utils.safe_qsub_run(cmd, jobname="cleansam",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #23
0
def run_fastqc(input, output, params=None):
    """Set up and run fastqc.
    
    """

    # Let a parser argument handle setting up arguments and options
    parser = argparse.ArgumentParser()
    
    # Add FastQC arguments
    fastqc = FastQC.FastQC()
    parser = fastqc.argparse(parser)
    
    # Update input and output from global config object
    fastqc_params = config['fastqc_params']
    fastqc_params['input'] = input

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['log_file_dir']
    stderr = config['general_params']['log_file_dir']
    
    cmdline = "--outdir=%(output_dir)s --threads=%(threads)s %(input)s" % fastqc_params

    args = parser.parse_args(cmdline.split())
    fastqc.set_options(args)

    # Final command to run
    fastqc_command = fastqc.make_command()
    
    # if fastqc_params['run_type'] == 'remote':
    #     stdout, stderr = utils.safe_qsub_run(fastqc_command, jobname="run_fastqc")
    # elif fastqc_params['run_type'] == 'local':
    job_stdout, job_stderr = utils.safe_qsub_run(fastqc_command, jobname="fastqc",
                                                 nodes=fastqc_params['qsub_nodes'],
                                                 stdout=stdout, stderr=stderr)
    
    logger.debug("stdout = %s, stderr = %s" % (job_stdout, job_stderr))

    # post task, touch output file!
    of = file(output, mode="w")
    of.close()
Example #24
0
def run_rum(input, output, params=None):
    """Run RUM on paired reads.
    
    """
    
    # Let a parser argument handle setting up arguments and options
    parser = argparse.ArgumentParser()
    
    # Add RUM arguments
    rum = RUM.RUMrunner()
    parser = rum.argparse(parser)
    
    # Update input and output from global config object
    rum_params = config['rum_params']
    rum_params['input'] = input

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['log_file_dir']
    stderr = config['general_params']['log_file_dir']

    ## fastq read files
    rum_params['file1'] = input[0]
    rum_params['file2'] = input[1]
    rum_params['sample'] = params['sample']
    
    cmdline = "--rum_config_file=%(config_file)s --rum_run_name=%(sample)s --rum_outdir=%(output_dir)s/%(sample)s --rum_read_files %(file1)s %(file2)s --rum_chunks=%(chunks)s --rum_ram=%(ram_per_chunk)s" % rum_params
    args = parser.parse_args(cmdline.split())

    rum.set_options(args)
    
    rum_command = rum.make_command()

    # stdout, stderr = utils.safe_run(rum_command, shell=False)
    # logger.debug("stdout = %s, err = %s" % (stdout, stderr))

    job_stdout, job_stderr = utils.safe_qsub_run(rum_command, jobname="rum_%s" % params['sample'],
                                                 nodes=rum_params['qsub_nodes'], params="-l walltime=168:00:00",
                                                 stdout=stdout, stderr=stderr)
    
    logger.debug("stdout = %s, stderr = %s" % (job_stdout, job_stderr))
Example #25
0
def run_bwa_aln(input, output, params=None):
    """Run bwa on individual gzipped fastq files.

    """
    
    params['input'] = " ".join(input)
    params['output'] = output
    
    cmd = "module load %(modules)s\n" % params
    cmd += "bwa aln -t %(threads)s %(reference_fasta)s %(input)s > %(output)s" % params

    logger.debug("cmd = %s" % (cmd))

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']
    
    job_id = utils.safe_qsub_run(cmd, jobname="bwa_aln",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #26
0
def run_filterbam(input, output, params=None):
    """
    
    """
    
    params['input'] = input
    params['output'] = output
    
    cmd = "module load %(modules)s\n" % params
    cmd += "samtools view -h -F uUfd -q 1 -b %(input)s > %(output)s" % params
    
    logger.debug("cmd = %s" % (cmd))
    
    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']
    
    job_id = utils.safe_qsub_run(cmd, jobname="filterbam",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #27
0
def run_index_splitbam(input, output, params=None):
    """Run samtools index on bam file.
    
    """

        
    # Update input and output from global config object
    params['input'] = input
    params['output'] = output

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmd = "module load %(modules)s\n" % params
    cmd += "samtools index %(input)s %(output)s" % params

    job_id = utils.safe_qsub_run(cmd, jobname="splitbamindex",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #28
0
def run_merge_bfastmatch(input, output, sample_name=None, index=None):
    """Merge bfast match files using bfast bmfmerge utility with default parameters.

    """
    
    # Update input and output from global config object
    merge_params = config['bfast_mergematch_params']
    merge_params['input'] = " ".join(input)
    merge_params['output'] = output

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmd = ("module load %(modules)s\n"
           "%(exec)s %(input)s > %(output)s" % merge_params)

    job_id = utils.safe_qsub_run(cmd, jobname="mergebfmatch",
                                 nodes=merge_params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #29
0
def run_sortbam(input, output, output_prefix, params=None):
    """Sort bam file.


    """
    
    params['input'] = input
    params['output'] = output_prefix
    
    cmd = "module load %(modules)s\n" % params
    cmd = ('samtools sort -m 500M -@ %(threads)s %(input)s %(output)s' % params)
    
    logger.debug("cmd = %s" % (cmd))
    
    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']
    
    job_id = utils.safe_qsub_run(cmd, jobname="sortbam",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #30
0
def run_split_bam(input, output, params=None):
    """Split BAM files into separate tumor/normal files based on read group.

    """

    params['input'] = input
    ## params['output_dir'] = config['gatk_base_score_recal_params']['output_dir']

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmd = '%(exec)s %(input)s --output_dir=%(output_dir)s ' % params
    cmd += '--output_file_expr="%(SM)s.bam"'

    logger.debug("cmd = %s" % (cmd,))

    job_id = utils.safe_qsub_run(cmd, jobname="splitbam",
                                 nodes=params['qsub_nodes'],
                                 # params=params['qsub_params'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #31
0
def run_bam2cfg(input, output, params=None):
    """Create breakdancer config files.
    
    """

    # Update input and output from global config object
    params['input'] = input
    params['output'] = output

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmd = "module load %(modules)s\n" % params
    cmd += "%(exec)s -g -h %(input)s > %(output)s" % params
    
    logger.debug("cmd = %s" % (cmd,))

    job_id = utils.safe_qsub_run(cmd, jobname="bam2cfg",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))
Example #32
0
def run_breakdancer(input, output, bedfile, fastqfile, params=None):
    """Run breakdancer, only to find intra-chromosomal structural variations (-t param).
    
    """

    params['input'] = input
    params['output'] = output
    params['bedfile'] = bedfile
    params['fastqfile'] = fastqfile

    # Output dir for qsub stdout and stderr
    stdout = config['general_params']['stdout_log_file_dir']
    stderr = config['general_params']['stderr_log_file_dir']

    cmd = "module load %(modules)s\n" % params
    cmd += "%(exec)s -g %(bedfile)s -d %(fastqfile)s %(transchrom)s -r %(min_reads)s %(input)s > %(output)s" % params
    
    logger.debug("cmd = %s" % (cmd,))

    job_id = utils.safe_qsub_run(cmd, jobname="breakdancer",
                                 nodes=params['qsub_nodes'],
                                 stdout=stdout, stderr=stderr)
    
    logger.debug("job_id = %s" % (job_id,))