def perform_hmmer(fasta, query_profiles, pfam, session): """Main of running a hmmer search Args: database_pfam: String, Path to pfam db database: String, Path to seqeunce db, in fasta or gbk format query_profiles: List, Pfam profiles needed to be searched Returns: hit_res: List of class objects with the hits """ LOG.info("Starting hmmer search") # Make sure we can find hmmfetch and hmmsearch on PATH helpers.get_program_path(["hmmfetch", "hmmsearch"]) # Find Pfam database (.dat and .hmm) hmm, dat = check_pfam_db(pfam) # Find real Pfam accessions session.queries = get_full_accession_number(dat, query_profiles) if not session.queries: LOG.error("No valid profiles could be selected") return # Extract HMM profiles from database query = fetch_profiles(hmm, dat, session.queries) # Run search results = run_hmmsearch(hmm, fasta, query) # Parse results and return return parse_hmmer_output(results)
def preform_hmmer( database, query_profiles, database_pfam, ): """Main of running a hmmer search Args: database_pfam: String, Path to pfam db database: String, Path to seqeunce db, in fasta or gbk format query_profiles: List, Pfam profiles needed to be searched Returns: hit_res: List of class objects with the hits """ #1. Check if program exist else give error message and stop program helpers.get_program_path(["hmmfetch", "hmmsearch"]) LOG.info("Starting hmmer search") #2. run check_pfam_d check_pfam_db(database_pfam) #3. get_full_acc_number and run hmmfetch ls_keys = fetch_profiles(database_pfam, query_profiles) #4. run hmmsearch ls_res = run_hmmsearch(database_pfam, database, ls_keys) #5. Parse hmm output, needs to be the same as blast output hit_res = parse_hmmer_output(ls_res) return hit_res
def test_get_program_path_not_found(monkeypatch): def return_none(alias): return monkeypatch.setattr(shutil, "which", return_none) with pytest.raises(ValueError): helpers.get_program_path(["alias"])
def test_get_program_path(monkeypatch): def return_path(alias): return "test_path" monkeypatch.setattr(shutil, "which", return_path) assert helpers.get_program_path(["alias"]) == "test_path"
def diamond(fasta, database, max_evalue=0.01, min_identity=30, min_coverage=50, cpus=1): """Launch a local DIAMOND search against a database. Arguments: fasta (str): Path to FASTA format query file database (str): Path to DIAMOND database generated with cblaster makedb max_evalue (float): Maximum e-value threshold min_identity (float): Minimum identity (%) cutoff min_coverage (float): Minimum coverage (%) cutoff cpus (int): Number of CPU threads for DIAMOND to use Returns: list: Rows from DIAMOND search result table (split by newline) """ diamond = helpers.get_program_path(["diamond", "diamond-aligner"]) LOG.debug("diamond path: %s", diamond) parameters = { "args": [diamond, "blastp"], "--query": fasta, "--db": database, "--id": str(min_identity), "--evalue": str(max_evalue), "--outfmt": [ "6", "qseqid", "sseqid", "pident", "qcovhsp", "evalue", "bitscore", ], "--threads": str(cpus), "--query-cover": str(min_coverage), "--max-hsps": "1", } command = helpers.form_command(parameters) LOG.debug("Parameters: %s", command) results = subprocess.run(command, stderr=subprocess.DEVNULL, stdout=subprocess.PIPE, check=True) return results.stdout.decode().split("\n")
def diamond_makedb(fasta, name): """Builds a DIAMOND database from JSON. Args: fasta (str): Path to FASTA file containing protein sequences. name (str): Name for DIAMOND database. """ diamond = helpers.get_program_path(["diamond", "diamond-aligner"]) subprocess.run( [diamond, "makedb", "--in", fasta, "--db", name], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, )