def test_invalid_input(): app = blast.BlastWebApp("blastn", dna_seq, obey_rules=False) # Set some invalid parameters app.set_word_size(-20) app.set_substitution_matrix("FOOBAR") app.start() with pytest.raises(ValueError): app.join(timeout=300)
def test_no_hit(): app = blast.BlastWebApp("blastn", "ACTGTACGAAACTCGGCGTA", obey_rules=False) app.set_word_size(20) app.start() app.join(timeout=300) alignments = app.get_alignments() # BLAST should find original sequence as best hit assert len(alignments) == 0
def test_tblastn(): app = blast.BlastWebApp("tblastn", prot_seq, obey_rules=False) app.set_max_expect_value(200) app.start() app.join(timeout=300) alignments = app.get_alignments() # BLAST should find original sequence as best hit assert prot_seq == alignments[0].sequences[0] assert prot_seq == alignments[0].sequences[1]
def test_hit_with_selenocysteine(): # Sequence is taken from issue #344 query = seqio.load_sequence( os.path.join(data_dir("sequence"), "selenocysteine.fasta")) # Expect hit containing selenocysteine when searching Swiss-Prot blast_app = blast.BlastWebApp("blastp", query, "swissprot") blast_app.start() # No AlphabetError should be raised here blast_app.join()
def test_tblastx(): app = blast.BlastWebApp("tblastx", dna_seq, obey_rules=False) app.set_max_expect_value(100) app.start() app.join(timeout=300) alignments = app.get_alignments() # BLAST should find original sequence as best hit print(alignments[0].sequences[0]) print(alignments[0].sequences[1]) rev_prot_seq = dna_seq.reverse().complement().translate(complete=True) assert rev_prot_seq == alignments[0].sequences[0] assert rev_prot_seq == alignments[0].sequences[1]
import biotite import biotite.sequence as seq import biotite.sequence.io.fasta as fasta import biotite.sequence.graphics as graphics import biotite.application.muscle as muscle import biotite.application.blast as blast import biotite.database.entrez as entrez import matplotlib.pyplot as plt # Download sequence of Streptococcus pyogenes Cas9 file_name = entrez.fetch("Q99ZW2", biotite.temp_dir(), "fa", "protein", "fasta") file = fasta.FastaFile.read(file_name) ref_seq = fasta.get_sequence(file) # Find homologous proteins using NCBI Blast # Search only the UniProt/SwissProt database blast_app = blast.BlastWebApp("blastp", ref_seq, "swissprot", obey_rules=False) blast_app.start() blast_app.join() alignments = blast_app.get_alignments() # Get hit IDs for hits with score > 200 hits = [] for ali in alignments: if ali.score > 200: hits.append(ali.hit_id) # Get the sequences from hit IDs hit_seqs = [] for hit in hits: file_name = entrez.fetch(hit, biotite.temp_dir(), "fa", "protein", "fasta") file = fasta.FastaFile.read(file_name) hit_seqs.append(fasta.get_sequence(file))
# # Finding homologous sequences with BLAST # --------------------------------------- # # .. currentmodule:: biotite.application.blast # # the :mod:`biotite.application.blast` subpackage provides an # interface to NCBI BLAST: the :class:`BlastWebApp` class. # Let's dive directly into the code, we try to find # homologous sequences to the miniprotein *TC5b*: import biotite.application.blast as blast import biotite.sequence as seq tc5b_seq = seq.ProteinSequence("NLYIQWLKDGGPSSGRPPPS") app = blast.BlastWebApp("blastp", tc5b_seq) app.start() app.join() alignments = app.get_alignments() best_ali = alignments[0] print(best_ali) print() print("HSP position in query: ", best_ali.query_interval) print("HSP position in hit: ", best_ali.hit_interval) print("Score: ", best_ali.score) print("E-value: ", best_ali.e_value) print("Hit UID: ", best_ali.hit_id) print("Hit name: ", best_ali.hit_definition) ######################################################################## # This was too simple for BLAST:
def test_invalid_query(): with pytest.raises(ValueError): app = blast.BlastWebApp("blastn", "ABCDEFGHIJKLMNOP", obey_rules=False) with pytest.raises(ValueError): app = blast.BlastWebApp("blastp", "ABCDEFGHIJKLMNOP", obey_rules=False)
def test_file_input(): path = os.path.join(data_dir("sequence"), "prot.fasta") app = blast.BlastWebApp("blastp", path, obey_rules=False)
import biotite.application.clustalo as clustalo import biotite.database.rcsb as rcsb import biotite.database.entrez as entrez # Get structure and sequence pdbx_file = pdbx.PDBxFile.read(rcsb.fetch("1GUU", "mmcif")) sequence = pdbx.get_sequence(pdbx_file)[0] # 'use_author_fields' is set to false, # to ensure that values in the 'res_id' annotation point to the sequence structure = pdbx.get_structure(pdbx_file, model=1, use_author_fields=False) structure = structure[struc.filter_amino_acids(structure)] # Identity threshold for a sequence to be counted as homologous sequence IDENTITY_THESHOLD = 0.4 # Find homologous proteins in SwissProt via BLAST app = blast.BlastWebApp("blastp", sequence, database="swissprot") app.start() app.join() alignments = app.get_alignments() hit_seqs = [sequence] hit_ids = ["Query"] hit_starts = [1] for ali in alignments: identity = align.get_sequence_identity(ali) # Do not include the exact same sequence -> identity < 1.0 if identity > IDENTITY_THESHOLD and identity < 1.0: hit_seqs.append(ali.sequences[1]) hit_ids.append(ali.hit_id) hit_starts.append(ali.hit_interval[0]) # Perform MSA
# # Finding homologous sequences with BLAST # --------------------------------------- # # .. currentmodule:: biotite.application.blast # # the :mod:`biotite.application.blast` subpackage provides an # interface to NCBI BLAST: the :class:`BlastWebApp` class. # Let's dive directly into the code, we try to find # homologous sequences to the miniprotein *TC5b*: import biotite.application.blast as blast import biotite.sequence as seq tc5b_seq = seq.ProteinSequence("NLYIQWLKDGGPSSGRPPPS") app = blast.BlastWebApp("blastp", tc5b_seq) app.start() app.join() alignments = app.get_alignments() best_ali = alignments[0] print(best_ali) print() print("HSP position in query: ", best_ali.query_interval) print("HSP position in hit: ", best_ali.hit_interval) print("Score: ", best_ali.score) print("E-value: ", best_ali.e_value) print("Hit UID: ", best_ali.hit_id) print("Hit name: ", best_ali.hit_definition) ######################################################################## # This was too simple for BLAST: