Esempio n. 1
0
def filter(qclient, job_id, parameters, out_dir):
    """Run filtering using Bowtie2 with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        The path to the job's output directory

    Returns
    -------
    bool, list, str
        The results of the job
    """
    # Step 1 get the rest of the information need to run Bowtie2
    qclient.update_job_step(job_id, "Step 1 of 4: Collecting information")
    artifact_id = parameters['input']
    del parameters['input']

    # Get the artifact filepath information
    artifact_info = qclient.get("/qiita_db/artifacts/%s/" % artifact_id)
    fps = artifact_info['files']

    # Get the artifact metadata
    prep_info = qclient.get('/qiita_db/prep_template/%s/' %
                            artifact_info['prep_information'][0])
    qiime_map = prep_info['qiime-map']

    # Step 2 generating command
    qclient.update_job_step(job_id, "Step 2 of 4: Generating"
                            " QC_Filter commands")
    # Creating temporary directory for intermediate files
    with TemporaryDirectory(dir=out_dir, prefix='filter_') as temp_dir:
        rs = fps['raw_reverse_seqs'] if 'raw_reverse_seqs' in fps else []
        commands, samples = generate_filter_commands(fps['raw_forward_seqs'],
                                                     rs, qiime_map, out_dir,
                                                     temp_dir, parameters)

        # Step 3 execute filtering command
        len_cmd = len(commands)
        msg = "Step 3 of 4: Executing QC_Filter job (%d/{0})".format(len_cmd)
        success, msg = _run_commands(qclient, job_id, commands, msg,
                                     'QC_Filter')
        if not success:
            return False, None, msg

    # Step 4 generating artifacts
    msg = "Step 4 of 4: Generating new artifacts (%d/{0})".format(len_cmd)
    suffixes = ['%s.R1.fastq.gz', '%s.R2.fastq.gz']
    prg_name = 'Filtering'
    file_type_name = 'Filtered files'
    ainfo = _per_sample_ainfo(out_dir, samples, suffixes, prg_name,
                              file_type_name, bool(rs))

    return True, ainfo, ""
Esempio n. 2
0
def shogun(qclient, job_id, parameters, out_dir):
    """Run Shogun with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        The path to the job's output directory

    Returns
    -------
    bool, list, str
        The results of the job
    """
    # Step 1 get the rest of the information need to run Atropos
    qclient.update_job_step(job_id, "Step 1 of 5: Collecting information")
    artifact_id = parameters['input']
    del parameters['input']

    # Get the artifact filepath information
    artifact_info = qclient.get("/qiita_db/artifacts/%s/" % artifact_id)
    fps = artifact_info['files']

    # Get the artifact metadata
    prep_info = qclient.get('/qiita_db/prep_template/%s/' %
                            artifact_info['prep_information'][0])
    qiime_map = prep_info['qiime-map']

    # Step 2 converting to fna
    qclient.update_job_step(job_id,
                            "Step 2 of 5: Converting to FNA for Shogun")

    with TemporaryDirectory(dir=out_dir, prefix='shogun_') as temp_dir:
        rs = fps['raw_reverse_seqs'] if 'raw_reverse_seqs' in fps else []
        samples = make_read_pairs_per_sample(fps['raw_forward_seqs'], rs,
                                             qiime_map)

        # Combining files
        comb_fp = generate_fna_file(temp_dir, samples)

        # Formatting parameters
        parameters = _format_params(parameters, SHOGUN_PARAMS)

        # Step 3 align
        align_cmd = generate_shogun_align_commands(comb_fp, temp_dir,
                                                   parameters)
        sys_msg = "Step 3 of 5: Aligning FNA with Shogun (%d/{0})".format(
            len(align_cmd))
        success, msg = _run_commands(qclient, job_id, align_cmd, sys_msg,
                                     'Shogun Align')

        if not success:
            return False, None, msg

        # Step 4 taxonomic profile
        sys_msg = "Step 4 of 5: Taxonomic profile with Shogun (%d/{0})"
        assign_cmd, profile_fp = generate_shogun_assign_taxonomy_commands(
            temp_dir, parameters)
        success, msg = _run_commands(qclient, job_id, assign_cmd, sys_msg,
                                     'Shogun taxonomy assignment')
        if not success:
            return False, None, msg

        sys_msg = "Step 5 of 5: Converting output to BIOM"
        qclient.update_job_step(job_id, msg)
        output = run_shogun_to_biom(profile_fp, [None, None, None, True],
                                    out_dir, 'profile')

        ainfo = [
            ArtifactInfo('Shogun Alignment Profile', 'BIOM',
                         [(output, 'biom')])
        ]

    return True, ainfo, ""
Esempio n. 3
0
def shogun(qclient, job_id, parameters, out_dir):
    """Run Shogun with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        The path to the job's output directory

    Returns
    -------
    bool, list, str
        The results of the job
    """
    # Step 1 get the rest of the information need to run Atropos
    qclient.update_job_step(job_id, "Step 1 of 7: Collecting information")
    artifact_id = parameters['input']
    del parameters['input']

    # Get the artifact filepath information
    artifact_info = qclient.get("/qiita_db/artifacts/%s/" % artifact_id)
    fps = artifact_info['files']

    # Get the artifact metadata
    prep_info = qclient.get('/qiita_db/prep_template/%s/' %
                            artifact_info['prep_information'][0])
    qiime_map = prep_info['qiime-map']

    # Step 2 converting to fna
    qclient.update_job_step(job_id,
                            "Step 2 of 7: Converting to FNA for Shogun")

    with TemporaryDirectory(dir=out_dir, prefix='shogun_') as temp_dir:
        rs = fps['raw_reverse_seqs'] if 'raw_reverse_seqs' in fps else []
        samples = make_read_pairs_per_sample(fps['raw_forward_seqs'], rs,
                                             qiime_map)

        # Combining files
        comb_fp = generate_fna_file(temp_dir, samples)

        # Formatting parameters
        parameters = _format_params(parameters, SHOGUN_PARAMS)

        # Step 3 align
        sys_msg = "Step 3 of 7: Aligning FNA with Shogun (%d/{0})"
        align_cmd = generate_shogun_align_commands(comb_fp, temp_dir,
                                                   parameters)
        success, msg = _run_commands(qclient, job_id, align_cmd, sys_msg,
                                     'Shogun Align')

        if not success:
            return False, None, msg

        # Step 4 taxonomic profile
        sys_msg = "Step 4 of 7: Taxonomic profile with Shogun (%d/{0})"
        assign_cmd, profile_fp = generate_shogun_assign_taxonomy_commands(
            temp_dir, parameters)
        success, msg = _run_commands(qclient, job_id, assign_cmd, sys_msg,
                                     'Shogun taxonomy assignment')

        if not success:
            return False, None, msg

        # Step 5 redistribute profile
        sys_msg = "Step 5 of 7: Redistributed profile with Shogun (%d/{0})"
        levels = ['genus', 'species', 'strain']
        redist_fps = []
        for level in levels:
            redist_cmd, output = generate_shogun_redist_commands(
                profile_fp, temp_dir, parameters, level)
            redist_fps.append(output)
            success, msg = _run_commands(qclient, job_id, redist_cmd, sys_msg,
                                         'Shogun redistribute')
            if not success:
                return False, None, msg

        # Step 6 functional profile
        sys_msg = "Step 6 of 7: Functional profile with Shogun (%d/{0})"
        levels = ['species']
        func_fp = ''
        for level in levels:
            func_cmd, output = generate_shogun_functional_commands(
                profile_fp, temp_dir, parameters, level)
            func_fp = output
            success, msg = _run_commands(qclient, job_id, func_cmd, sys_msg,
                                         'Shogun functional')
            if not success:
                return False, None, msg
        # Step 6 functional profile
        sys_msg = "Step 7 of 7: Converting results to BIOM (%d/{0})"
        func_biom_outputs = []
        redist_biom_outputs = []
        # Converting redistributed files to biom
        redist_levels = ['genus', 'species', 'strain']
        for redist_fp, level in zip(redist_fps, redist_levels):
            biom_cmd, output = generate_biom_conversion_commands(
                redist_fp, out_dir, level, 'redist')
            success, msg = _run_commands(qclient, job_id, biom_cmd, sys_msg,
                                         'Redistribute Biom conversion')
            if not success:
                return False, None, msg
            else:
                redist_biom_outputs.append(output)
        # Coverting funcitonal files to biom
        for level in levels:

            func_to_biom_fps = [
                "kegg.modules.coverage", "kegg.modules",
                "kegg.pathways.coverage", "kegg.pathways", "kegg", "normalized"
            ]
            for biom_in in func_to_biom_fps:
                biom_in_fp = join(func_fp,
                                  "profile.%s.%s.txt" % (level, biom_in))
                biom_cmd, output = generate_biom_conversion_commands(
                    biom_in_fp, out_dir, level, biom_in)
                success, msg = _run_commands(qclient, job_id, biom_cmd,
                                             sys_msg,
                                             ' Functional Biom conversion')
                if not success:
                    return False, None, msg
                else:
                    func_biom_outputs.append(output)
    func_files_type_name = 'Functional Predictions'
    redist_files_type_name = 'Taxonomic Predictions'
    ainfo = [
        ArtifactInfo(func_files_type_name, 'BIOM', func_biom_outputs),
        ArtifactInfo(redist_files_type_name, 'BIOM', redist_biom_outputs)
    ]

    return True, ainfo, ""
Esempio n. 4
0
def shogun(qclient, job_id, parameters, out_dir):
    """Run Shogun with the given parameters

    Parameters
    ----------
    qclient : tgp.qiita_client.QiitaClient
        The Qiita server client
    job_id : str
        The job id
    parameters : dict
        The parameter values to run split libraries
    out_dir : str
        The path to the job's output directory

    Returns
    -------
    bool, list, str
        The results of the job
    """
    # Step 1 get the rest of the information need to run Atropos
    qclient.update_job_step(job_id, "Step 1 of 6: Collecting information")
    artifact_id = parameters['input']
    del parameters['input']

    # Get the artifact filepath information
    artifact_info = qclient.get("/qiita_db/artifacts/%s/" % artifact_id)
    fps = artifact_info['files']

    # Get the artifact metadata
    prep_info = qclient.get('/qiita_db/prep_template/%s/' %
                            artifact_info['prep_information'][0])
    qiime_map = prep_info['qiime-map']

    # Step 2 converting to fna
    qclient.update_job_step(job_id,
                            "Step 2 of 6: Converting to FNA for Shogun")

    rs = fps['raw_reverse_seqs'] if 'raw_reverse_seqs' in fps else []
    samples = make_read_pairs_per_sample(fps['raw_forward_seqs'], rs,
                                         qiime_map)

    # Combining files
    comb_fp = generate_fna_file(out_dir, samples)

    # Formatting parameters
    parameters = _format_params(parameters, SHOGUN_PARAMS)

    # Step 3 align
    align_cmd = generate_shogun_align_commands(comb_fp, out_dir, parameters)
    sys_msg = "Step 3 of 6: Aligning FNA with Shogun (%d/{0})".format(
        len(align_cmd))
    success, msg = _run_commands(qclient, job_id, align_cmd, sys_msg,
                                 'Shogun Align')

    if not success:
        return False, None, msg

    # Step 4 taxonomic profile
    sys_msg = "Step 4 of 6: Taxonomic profile with Shogun (%d/{0})"
    assign_cmd, profile_fp = generate_shogun_assign_taxonomy_commands(
        out_dir, parameters)
    success, msg = _run_commands(qclient, job_id, assign_cmd, sys_msg,
                                 'Shogun taxonomy assignment')
    if not success:
        return False, None, msg

    sys_msg = "Step 5 of 6: Compressing and converting alignment to BIOM"
    qclient.update_job_step(job_id, msg)
    alignment_fp = join(
        out_dir, 'alignment.%s.%s' %
        (parameters['aligner'], ALN2EXT[parameters['aligner']]))
    xz_cmd = 'xz -9 -T%s %s' % (parameters['threads'], alignment_fp)
    std_out, std_err, return_value = system_call(xz_cmd)
    if return_value != 0:
        error_msg = ("Error during %s:\nStd out: %s\nStd err: %s"
                     "\n\nCommand run was:\n%s" %
                     (sys_msg, std_out, std_err, xz_cmd))
        return False, None, error_msg
    output = run_shogun_to_biom(profile_fp, [None, None, None, True], out_dir,
                                'profile')

    ainfo = [
        ArtifactInfo('Shogun Alignment Profile', 'BIOM',
                     [(output, 'biom'), ('%s.xz' % alignment_fp, 'log')])
    ]

    # Step 5 redistribute profile
    sys_msg = "Step 6 of 6: Redistributed profile with Shogun (%d/{0})"
    levels = ['phylum', 'genus', 'species']
    redist_fps = []
    for level in levels:
        redist_cmd, output = generate_shogun_redist_commands(
            profile_fp, out_dir, parameters, level)
        redist_fps.append(output)
        success, msg = _run_commands(qclient, job_id, redist_cmd, sys_msg,
                                     'Shogun redistribute')
        if not success:
            return False, None, msg
    # Converting redistributed files to biom
    for redist_fp, level in zip(redist_fps, levels):
        biom_in = ["redist", None, '', True]
        output = run_shogun_to_biom(redist_fp, biom_in, out_dir, level,
                                    'redist')
        aname = 'Taxonomic Predictions - %s' % level
        ainfo.append(ArtifactInfo(aname, 'BIOM', [(output, 'biom')]))

    return True, ainfo, ""