예제 #1
0
    def test_make_read_pairs_per_sample_match_fwd_2match(self):
        fd, fp = mkstemp()
        close(fd)
        with open(fp, 'w') as f:
            f.write(MAPPING_FILE)
        self._clean_up_files.append(fp)

        fwd_fp = ['./folder/s3_S013_L001_R1.fastq.gz',
                  './folder/s2_S011_L001_R1.fastq.gz',
                  './folder/s2_S009_L001_R1.fastq.gz']

        rev_fp = []

        with self.assertRaises(ValueError):
            make_read_pairs_per_sample(fwd_fp, rev_fp, fp)
예제 #2
0
    def test_make_read_pairs_per_sample_match_fwd_rev(self):
        fd, fp = mkstemp()
        close(fd)
        with open(fp, 'w') as f:
            f.write(MAPPING_FILE)
        self._clean_up_files.append(fp)

        fwd_fp = ['./folder/s3_S013_L001_R1.fastq.gz',
                  './folder/s2_S011_L001_R1.fastq.gz',
                  './folder/s1_S009_L001_R1.fastq.gz']

        rev_fp = ['./folder/s3_S013_L001_R2.fastq.gz',
                  './folder/s2_S011_L001_R2.fastq.gz',
                  './folder/s1_S009_L001_R2.fastq.gz']

        exp = [('s1', 'SKB8.640193', './folder/s1_S009_L001_R1.fastq.gz',
                './folder/s1_S009_L001_R2.fastq.gz'),
               ('s2', 'SKD8.640184', './folder/s2_S011_L001_R1.fastq.gz',
                './folder/s2_S011_L001_R2.fastq.gz'),
               ('s3', 'SKB7.640196', './folder/s3_S013_L001_R1.fastq.gz',
                './folder/s3_S013_L001_R2.fastq.gz')]

        obs = make_read_pairs_per_sample(fwd_fp, rev_fp, fp)

        self.assertEqual(obs, exp)
예제 #3
0
def generate_trim_commands(forward_seqs, reverse_seqs, map_file,
                           out_dir, parameters):
    """Generates the QC_Trim commands

    Parameters
    ----------
    forward_seqs : list of str
        The list of forward seqs filepaths
    reverse_seqs : list of str
        The list of reverse seqs filepaths
    map_file : str
        The path to the mapping file
    out_dir : str
        The job output directory
    parameters : dict
        The command's parameters, keyed by parameter name

    Returns
    -------
    cmds: list of str
        The QC_Trim commands
    samples: list of tup
        list of 4-tuples with run prefix, sample name, fwd read fp, rev read fp

    Notes
    -----
    Currently this is requiring matched pairs in the make_read_pairs_per_sample
    step but implicitly allowing empty reverse reads in the actual command
    generation. This behavior may allow support of situations with empty
    reverse reads in some samples, for example after trimming and QC.
    """
    # we match filenames, samples, and run prefixes
    samples = make_read_pairs_per_sample(forward_seqs, reverse_seqs, map_file)
    cmds = []

    param_string = _format_params(parameters, ATROPOS_PARAMS)

    for run_prefix, sample, f_fp, r_fp in samples:
        if r_fp is None:
            cmds.append("atropos trim %s -o %s  -se %s" % (
                param_string, join(out_dir, '%s.R1.fastq.gz' % run_prefix),
                f_fp))
        else:
            cmds.append('atropos trim %s -o %s -p %s -pe1 %s -pe2 %s'
                        % (param_string, join(out_dir, '%s.R1.fastq.gz' %
                           run_prefix), join(out_dir, '%s.R2.fastq.gz' %
                           run_prefix), f_fp, r_fp))
    return cmds, samples
예제 #4
0
def generate_filter_commands(forward_seqs, reverse_seqs, map_file, out_dir,
                             temp_dir, parameters):
    """Generates the QC_Filter commands

    Parameters
    ----------
    forward_seqs : list of str
        The list of forward seqs filepaths
    reverse_seqs : list of str
        The list of reverse seqs filepaths
    map_file : str
        The path to the mapping file
    out_dir : str
        The job output directory
    parameters : dict
        The command's parameters, keyed by parameter name

    Returns
    -------
    cmds: list of str
        The QC_Filter commands
    samples: list of tup
        list of 4-tuples with run prefix, sample name, fwd read fp, rev read fp

    Notes
    -----
    Currently this is requiring matched pairs in the make_read_pairs_per_sample
    step but implicitly allowing empty reverse reads in the actual command
    generation. This behavior may allow support of situations with empty
    reverse reads in some samples, for example after trimming and QC.
    """
    # we match filenames, samples, and run prefixes
    samples = make_read_pairs_per_sample(forward_seqs, reverse_seqs, map_file)

    cmds = []

    param_string = _format_params(parameters, BOWTIE2_PARAMS)
    threads = parameters['Number of threads']

    for run_prefix, sample, f_fp, r_fp in samples:
        cmds.append(
            'bowtie2 {params} --very-sensitive -1 {fwd_ip} -2 {rev_ip}'
            ' | samtools view -f 12 -F 256 -b -o {bow_op}; '
            'samtools sort -T {sample_path} -@ {thrds} -n -o {sam_op} '
            '{sam_un_op}; '
            'bedtools bamtofastq -i {sam_op} -fq {bedtools_op_one} '
            '-fq2 {bedtools_op_two}; '
            'pigz -p {thrds} -c {bedtools_op_one} > {gz_op_one}; '
            'pigz -p {thrds} -c {bedtools_op_two} > {gz_op_two};'.format(
                params=param_string,
                thrds=threads,
                fwd_ip=f_fp,
                rev_ip=r_fp,
                bow_op=join(temp_dir, '%s.unsorted.bam' % sample),
                sample_path=join(temp_dir, '%s' % sample),
                sam_op=join(temp_dir, '%s.bam' % sample),
                sam_un_op=join(temp_dir, '%s.unsorted.bam' % sample),
                bedtools_op_one=join(temp_dir,
                                     '%s.R1.trimmed.filtered.fastq' % sample),
                bedtools_op_two=join(temp_dir,
                                     '%s.R2.trimmed.filtered.fastq' % sample),
                gz_op_one=join(out_dir,
                               '%s.R1.trimmed.filtered.fastq.gz' % sample),
                gz_op_two=join(out_dir,
                               '%s.R2.trimmed.filtered.fastq.gz' % sample)))

    return cmds, samples
예제 #5
0
def shogun(qclient, job_id, parameters, out_dir):
    """Run Shogun with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        The path to the job's output directory

    Returns
    -------
    bool, list, str
        The results of the job
    """
    # Step 1 get the rest of the information need to run Atropos
    qclient.update_job_step(job_id, "Step 1 of 5: Collecting information")
    artifact_id = parameters['input']
    del parameters['input']

    # Get the artifact filepath information
    artifact_info = qclient.get("/qiita_db/artifacts/%s/" % artifact_id)
    fps = artifact_info['files']

    # Get the artifact metadata
    prep_info = qclient.get('/qiita_db/prep_template/%s/' %
                            artifact_info['prep_information'][0])
    qiime_map = prep_info['qiime-map']

    # Step 2 converting to fna
    qclient.update_job_step(job_id,
                            "Step 2 of 5: Converting to FNA for Shogun")

    with TemporaryDirectory(dir=out_dir, prefix='shogun_') as temp_dir:
        rs = fps['raw_reverse_seqs'] if 'raw_reverse_seqs' in fps else []
        samples = make_read_pairs_per_sample(fps['raw_forward_seqs'], rs,
                                             qiime_map)

        # Combining files
        comb_fp = generate_fna_file(temp_dir, samples)

        # Formatting parameters
        parameters = _format_params(parameters, SHOGUN_PARAMS)

        # Step 3 align
        align_cmd = generate_shogun_align_commands(comb_fp, temp_dir,
                                                   parameters)
        sys_msg = "Step 3 of 5: Aligning FNA with Shogun (%d/{0})".format(
            len(align_cmd))
        success, msg = _run_commands(qclient, job_id, align_cmd, sys_msg,
                                     'Shogun Align')

        if not success:
            return False, None, msg

        # Step 4 taxonomic profile
        sys_msg = "Step 4 of 5: Taxonomic profile with Shogun (%d/{0})"
        assign_cmd, profile_fp = generate_shogun_assign_taxonomy_commands(
            temp_dir, parameters)
        success, msg = _run_commands(qclient, job_id, assign_cmd, sys_msg,
                                     'Shogun taxonomy assignment')
        if not success:
            return False, None, msg

        sys_msg = "Step 5 of 5: Converting output to BIOM"
        qclient.update_job_step(job_id, msg)
        output = run_shogun_to_biom(profile_fp, [None, None, None, True],
                                    out_dir, 'profile')

        ainfo = [
            ArtifactInfo('Shogun Alignment Profile', 'BIOM',
                         [(output, 'biom')])
        ]

    return True, ainfo, ""
예제 #6
0
def shogun(qclient, job_id, parameters, out_dir):
    """Run Shogun with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        The path to the job's output directory

    Returns
    -------
    bool, list, str
        The results of the job
    """
    # Step 1 get the rest of the information need to run Atropos
    qclient.update_job_step(job_id, "Step 1 of 7: Collecting information")
    artifact_id = parameters['input']
    del parameters['input']

    # Get the artifact filepath information
    artifact_info = qclient.get("/qiita_db/artifacts/%s/" % artifact_id)
    fps = artifact_info['files']

    # Get the artifact metadata
    prep_info = qclient.get('/qiita_db/prep_template/%s/' %
                            artifact_info['prep_information'][0])
    qiime_map = prep_info['qiime-map']

    # Step 2 converting to fna
    qclient.update_job_step(job_id,
                            "Step 2 of 7: Converting to FNA for Shogun")

    with TemporaryDirectory(dir=out_dir, prefix='shogun_') as temp_dir:
        rs = fps['raw_reverse_seqs'] if 'raw_reverse_seqs' in fps else []
        samples = make_read_pairs_per_sample(fps['raw_forward_seqs'], rs,
                                             qiime_map)

        # Combining files
        comb_fp = generate_fna_file(temp_dir, samples)

        # Formatting parameters
        parameters = _format_params(parameters, SHOGUN_PARAMS)

        # Step 3 align
        sys_msg = "Step 3 of 7: Aligning FNA with Shogun (%d/{0})"
        align_cmd = generate_shogun_align_commands(comb_fp, temp_dir,
                                                   parameters)
        success, msg = _run_commands(qclient, job_id, align_cmd, sys_msg,
                                     'Shogun Align')

        if not success:
            return False, None, msg

        # Step 4 taxonomic profile
        sys_msg = "Step 4 of 7: Taxonomic profile with Shogun (%d/{0})"
        assign_cmd, profile_fp = generate_shogun_assign_taxonomy_commands(
            temp_dir, parameters)
        success, msg = _run_commands(qclient, job_id, assign_cmd, sys_msg,
                                     'Shogun taxonomy assignment')

        if not success:
            return False, None, msg

        # Step 5 redistribute profile
        sys_msg = "Step 5 of 7: Redistributed profile with Shogun (%d/{0})"
        levels = ['genus', 'species', 'strain']
        redist_fps = []
        for level in levels:
            redist_cmd, output = generate_shogun_redist_commands(
                profile_fp, temp_dir, parameters, level)
            redist_fps.append(output)
            success, msg = _run_commands(qclient, job_id, redist_cmd, sys_msg,
                                         'Shogun redistribute')
            if not success:
                return False, None, msg

        # Step 6 functional profile
        sys_msg = "Step 6 of 7: Functional profile with Shogun (%d/{0})"
        levels = ['species']
        func_fp = ''
        for level in levels:
            func_cmd, output = generate_shogun_functional_commands(
                profile_fp, temp_dir, parameters, level)
            func_fp = output
            success, msg = _run_commands(qclient, job_id, func_cmd, sys_msg,
                                         'Shogun functional')
            if not success:
                return False, None, msg
        # Step 6 functional profile
        sys_msg = "Step 7 of 7: Converting results to BIOM (%d/{0})"
        func_biom_outputs = []
        redist_biom_outputs = []
        # Converting redistributed files to biom
        redist_levels = ['genus', 'species', 'strain']
        for redist_fp, level in zip(redist_fps, redist_levels):
            biom_cmd, output = generate_biom_conversion_commands(
                redist_fp, out_dir, level, 'redist')
            success, msg = _run_commands(qclient, job_id, biom_cmd, sys_msg,
                                         'Redistribute Biom conversion')
            if not success:
                return False, None, msg
            else:
                redist_biom_outputs.append(output)
        # Coverting funcitonal files to biom
        for level in levels:

            func_to_biom_fps = [
                "kegg.modules.coverage", "kegg.modules",
                "kegg.pathways.coverage", "kegg.pathways", "kegg", "normalized"
            ]
            for biom_in in func_to_biom_fps:
                biom_in_fp = join(func_fp,
                                  "profile.%s.%s.txt" % (level, biom_in))
                biom_cmd, output = generate_biom_conversion_commands(
                    biom_in_fp, out_dir, level, biom_in)
                success, msg = _run_commands(qclient, job_id, biom_cmd,
                                             sys_msg,
                                             ' Functional Biom conversion')
                if not success:
                    return False, None, msg
                else:
                    func_biom_outputs.append(output)
    func_files_type_name = 'Functional Predictions'
    redist_files_type_name = 'Taxonomic Predictions'
    ainfo = [
        ArtifactInfo(func_files_type_name, 'BIOM', func_biom_outputs),
        ArtifactInfo(redist_files_type_name, 'BIOM', redist_biom_outputs)
    ]

    return True, ainfo, ""
예제 #7
0
def shogun(qclient, job_id, parameters, out_dir):
    """Run Shogun with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        The path to the job's output directory

    Returns
    -------
    bool, list, str
        The results of the job
    """
    # Step 1 get the rest of the information need to run Atropos
    qclient.update_job_step(job_id, "Step 1 of 6: Collecting information")
    artifact_id = parameters['input']
    del parameters['input']

    # Get the artifact filepath information
    artifact_info = qclient.get("/qiita_db/artifacts/%s/" % artifact_id)
    fps = artifact_info['files']

    # Get the artifact metadata
    prep_info = qclient.get('/qiita_db/prep_template/%s/' %
                            artifact_info['prep_information'][0])
    qiime_map = prep_info['qiime-map']

    # Step 2 converting to fna
    qclient.update_job_step(job_id,
                            "Step 2 of 6: Converting to FNA for Shogun")

    rs = fps['raw_reverse_seqs'] if 'raw_reverse_seqs' in fps else []
    samples = make_read_pairs_per_sample(fps['raw_forward_seqs'], rs,
                                         qiime_map)

    # Combining files
    comb_fp = generate_fna_file(out_dir, samples)

    # Formatting parameters
    parameters = _format_params(parameters, SHOGUN_PARAMS)

    # Step 3 align
    align_cmd = generate_shogun_align_commands(comb_fp, out_dir, parameters)
    sys_msg = "Step 3 of 6: Aligning FNA with Shogun (%d/{0})".format(
        len(align_cmd))
    success, msg = _run_commands(qclient, job_id, align_cmd, sys_msg,
                                 'Shogun Align')

    if not success:
        return False, None, msg

    # Step 4 taxonomic profile
    sys_msg = "Step 4 of 6: Taxonomic profile with Shogun (%d/{0})"
    assign_cmd, profile_fp = generate_shogun_assign_taxonomy_commands(
        out_dir, parameters)
    success, msg = _run_commands(qclient, job_id, assign_cmd, sys_msg,
                                 'Shogun taxonomy assignment')
    if not success:
        return False, None, msg

    sys_msg = "Step 5 of 6: Compressing and converting alignment to BIOM"
    qclient.update_job_step(job_id, msg)
    alignment_fp = join(
        out_dir, 'alignment.%s.%s' %
        (parameters['aligner'], ALN2EXT[parameters['aligner']]))
    xz_cmd = 'xz -9 -T%s %s' % (parameters['threads'], alignment_fp)
    std_out, std_err, return_value = system_call(xz_cmd)
    if return_value != 0:
        error_msg = ("Error during %s:\nStd out: %s\nStd err: %s"
                     "\n\nCommand run was:\n%s" %
                     (sys_msg, std_out, std_err, xz_cmd))
        return False, None, error_msg
    output = run_shogun_to_biom(profile_fp, [None, None, None, True], out_dir,
                                'profile')

    ainfo = [
        ArtifactInfo('Shogun Alignment Profile', 'BIOM',
                     [(output, 'biom'), ('%s.xz' % alignment_fp, 'log')])
    ]

    # Step 5 redistribute profile
    sys_msg = "Step 6 of 6: Redistributed profile with Shogun (%d/{0})"
    levels = ['phylum', 'genus', 'species']
    redist_fps = []
    for level in levels:
        redist_cmd, output = generate_shogun_redist_commands(
            profile_fp, out_dir, parameters, level)
        redist_fps.append(output)
        success, msg = _run_commands(qclient, job_id, redist_cmd, sys_msg,
                                     'Shogun redistribute')
        if not success:
            return False, None, msg
    # Converting redistributed files to biom
    for redist_fp, level in zip(redist_fps, levels):
        biom_in = ["redist", None, '', True]
        output = run_shogun_to_biom(redist_fp, biom_in, out_dir, level,
                                    'redist')
        aname = 'Taxonomic Predictions - %s' % level
        ainfo.append(ArtifactInfo(aname, 'BIOM', [(output, 'biom')]))

    return True, ainfo, ""