Beispiel #1
0
def perform_hmmer(fasta, query_profiles, pfam, session):
    """Main of running a hmmer search

    Args:
        database_pfam: String, Path to pfam db
        database: String, Path to seqeunce db, in fasta or gbk format
        query_profiles: List, Pfam profiles needed to be searched
    Returns:
        hit_res: List of class objects with the hits

    """
    LOG.info("Starting hmmer search")

    # Make sure we can find hmmfetch and hmmsearch on PATH
    helpers.get_program_path(["hmmfetch", "hmmsearch"])

    # Find Pfam database (.dat and .hmm)
    hmm, dat = check_pfam_db(pfam)

    # Find real Pfam accessions
    session.queries = get_full_accession_number(dat, query_profiles)

    if not session.queries:
        LOG.error("No valid profiles could be selected")
        return

    # Extract HMM profiles from database
    query = fetch_profiles(hmm, dat, session.queries)

    # Run search
    results = run_hmmsearch(hmm, fasta, query)

    # Parse results and return
    return parse_hmmer_output(results)
Beispiel #2
0
def preform_hmmer(
    database,
    query_profiles,
    database_pfam,
):
    """Main of running a hmmer search

    Args:
        database_pfam: String, Path to pfam db
        database: String, Path to seqeunce db, in fasta or gbk format
        query_profiles: List, Pfam profiles needed to be searched
    Returns:
        hit_res: List of class objects with the hits

    """
    #1. Check if program exist else give error message and stop program
    helpers.get_program_path(["hmmfetch", "hmmsearch"])

    LOG.info("Starting hmmer search")
    #2. run check_pfam_d
    check_pfam_db(database_pfam)

    #3. get_full_acc_number and run hmmfetch
    ls_keys = fetch_profiles(database_pfam, query_profiles)

    #4. run hmmsearch
    ls_res = run_hmmsearch(database_pfam, database, ls_keys)

    #5. Parse hmm output, needs to be the same as blast output
    hit_res = parse_hmmer_output(ls_res)
    return hit_res
Beispiel #3
0
def test_get_program_path_not_found(monkeypatch):
    def return_none(alias):
        return

    monkeypatch.setattr(shutil, "which", return_none)

    with pytest.raises(ValueError):
        helpers.get_program_path(["alias"])
Beispiel #4
0
def test_get_program_path(monkeypatch):
    def return_path(alias):
        return "test_path"

    monkeypatch.setattr(shutil, "which", return_path)

    assert helpers.get_program_path(["alias"]) == "test_path"
Beispiel #5
0
def diamond(fasta,
            database,
            max_evalue=0.01,
            min_identity=30,
            min_coverage=50,
            cpus=1):
    """Launch a local DIAMOND search against a database.

    Arguments:
        fasta (str): Path to FASTA format query file
        database (str): Path to DIAMOND database generated with cblaster makedb
        max_evalue (float): Maximum e-value threshold
        min_identity (float): Minimum identity (%) cutoff
        min_coverage (float): Minimum coverage (%) cutoff
        cpus (int): Number of CPU threads for DIAMOND to use
    Returns:
        list: Rows from DIAMOND search result table (split by newline)
    """
    diamond = helpers.get_program_path(["diamond", "diamond-aligner"])
    LOG.debug("diamond path: %s", diamond)

    parameters = {
        "args": [diamond, "blastp"],
        "--query":
        fasta,
        "--db":
        database,
        "--id":
        str(min_identity),
        "--evalue":
        str(max_evalue),
        "--outfmt": [
            "6",
            "qseqid",
            "sseqid",
            "pident",
            "qcovhsp",
            "evalue",
            "bitscore",
        ],
        "--threads":
        str(cpus),
        "--query-cover":
        str(min_coverage),
        "--max-hsps":
        "1",
    }

    command = helpers.form_command(parameters)
    LOG.debug("Parameters: %s", command)

    results = subprocess.run(command,
                             stderr=subprocess.DEVNULL,
                             stdout=subprocess.PIPE,
                             check=True)

    return results.stdout.decode().split("\n")
Beispiel #6
0
def diamond_makedb(fasta, name):
    """Builds a DIAMOND database from JSON.

    Args:
        fasta (str): Path to FASTA file containing protein sequences.
        name (str): Name for DIAMOND database.
    """
    diamond = helpers.get_program_path(["diamond", "diamond-aligner"])
    subprocess.run(
        [diamond, "makedb", "--in", fasta, "--db", name],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
    )