Beispiel #1
0
def generate_trim_commands(forward_seqs, reverse_seqs, map_file,
                           out_dir, parameters):
    """Generates the QC_Trim commands

    Parameters
    ----------
    forward_seqs : list of str
        The list of forward seqs filepaths
    reverse_seqs : list of str
        The list of reverse seqs filepaths
    map_file : str
        The path to the mapping file
    out_dir : str
        The job output directory
    parameters : dict
        The command's parameters, keyed by parameter name

    Returns
    -------
    cmds: list of str
        The QC_Trim commands
    samples: list of tup
        list of 4-tuples with run prefix, sample name, fwd read fp, rev read fp

    Notes
    -----
    Currently this is requiring matched pairs in the make_read_pairs_per_sample
    step but implicitly allowing empty reverse reads in the actual command
    generation. This behavior may allow support of situations with empty
    reverse reads in some samples, for example after trimming and QC.
    """
    # we match filenames, samples, and run prefixes
    samples = make_read_pairs_per_sample(forward_seqs, reverse_seqs, map_file)
    cmds = []

    param_string = _format_params(parameters, ATROPOS_PARAMS)

    for run_prefix, sample, f_fp, r_fp in samples:
        if r_fp is None:
            cmds.append("atropos trim %s -o %s  -se %s" % (
                param_string, join(out_dir, '%s.R1.fastq.gz' % run_prefix),
                f_fp))
        else:
            cmds.append('atropos trim %s -o %s -p %s -pe1 %s -pe2 %s'
                        % (param_string, join(out_dir, '%s.R1.fastq.gz' %
                           run_prefix), join(out_dir, '%s.R2.fastq.gz' %
                           run_prefix), f_fp, r_fp))
    return cmds, samples
Beispiel #2
0
    def test_format_filter_params(self):
        db_path = os.environ["QC_FILTER_DB_DP"]
        obs = _format_params(self.params, BOWTIE2_PARAMS)
        exp = ('-p 1 -x %sphix/phix') % db_path

        self.assertEqual(obs, exp)
Beispiel #3
0
def generate_filter_commands(forward_seqs, reverse_seqs, map_file, out_dir,
                             temp_dir, parameters):
    """Generates the QC_Filter commands

    Parameters
    ----------
    forward_seqs : list of str
        The list of forward seqs filepaths
    reverse_seqs : list of str
        The list of reverse seqs filepaths
    map_file : str
        The path to the mapping file
    out_dir : str
        The job output directory
    parameters : dict
        The command's parameters, keyed by parameter name

    Returns
    -------
    cmds: list of str
        The QC_Filter commands
    samples: list of tup
        list of 4-tuples with run prefix, sample name, fwd read fp, rev read fp

    Notes
    -----
    Currently this is requiring matched pairs in the make_read_pairs_per_sample
    step but implicitly allowing empty reverse reads in the actual command
    generation. This behavior may allow support of situations with empty
    reverse reads in some samples, for example after trimming and QC.
    """
    # we match filenames, samples, and run prefixes
    samples = make_read_pairs_per_sample(forward_seqs, reverse_seqs, map_file)

    cmds = []

    param_string = _format_params(parameters, BOWTIE2_PARAMS)
    threads = parameters['Number of threads']

    for run_prefix, sample, f_fp, r_fp in samples:
        cmds.append(
            'bowtie2 {params} --very-sensitive -1 {fwd_ip} -2 {rev_ip}'
            ' | samtools view -f 12 -F 256 -b -o {bow_op}; '
            'samtools sort -T {sample_path} -@ {thrds} -n -o {sam_op} '
            '{sam_un_op}; '
            'bedtools bamtofastq -i {sam_op} -fq {bedtools_op_one} '
            '-fq2 {bedtools_op_two}; '
            'pigz -p {thrds} -c {bedtools_op_one} > {gz_op_one}; '
            'pigz -p {thrds} -c {bedtools_op_two} > {gz_op_two};'.format(
                params=param_string,
                thrds=threads,
                fwd_ip=f_fp,
                rev_ip=r_fp,
                bow_op=join(temp_dir, '%s.unsorted.bam' % sample),
                sample_path=join(temp_dir, '%s' % sample),
                sam_op=join(temp_dir, '%s.bam' % sample),
                sam_un_op=join(temp_dir, '%s.unsorted.bam' % sample),
                bedtools_op_one=join(temp_dir,
                                     '%s.R1.trimmed.filtered.fastq' % sample),
                bedtools_op_two=join(temp_dir,
                                     '%s.R2.trimmed.filtered.fastq' % sample),
                gz_op_one=join(out_dir,
                               '%s.R1.trimmed.filtered.fastq.gz' % sample),
                gz_op_two=join(out_dir,
                               '%s.R2.trimmed.filtered.fastq.gz' % sample)))

    return cmds, samples
Beispiel #4
0
 def test_format_trim_params(self):
     obs = _format_params(self.params, ATROPOS_PARAMS)
     exp = ('-A GATCGGAAGAGCGTCGTGTAGGGAAAGGAGTGT --adapter GATCGGAAGAGCACA'
            'CGTCTGAACTCCAGTCAC --max-n 80 --minimum-length 80 '
            '--pair-filter any --quality-cutoff 15 --threads 5 --trim-n')
     self.assertEqual(obs, exp)