Esempio n. 1
0
def test_invalid_input():
    app = blast.BlastWebApp("blastn", dna_seq, obey_rules=False)
    # Set some invalid parameters
    app.set_word_size(-20)
    app.set_substitution_matrix("FOOBAR")
    app.start()
    with pytest.raises(ValueError):
        app.join(timeout=300)
Esempio n. 2
0
def test_no_hit():
    app = blast.BlastWebApp("blastn", "ACTGTACGAAACTCGGCGTA", obey_rules=False)
    app.set_word_size(20)
    app.start()
    app.join(timeout=300)
    alignments = app.get_alignments()
    # BLAST should find original sequence as best hit
    assert len(alignments) == 0
Esempio n. 3
0
def test_tblastn():
    app = blast.BlastWebApp("tblastn", prot_seq, obey_rules=False)
    app.set_max_expect_value(200)
    app.start()
    app.join(timeout=300)
    alignments = app.get_alignments()
    # BLAST should find original sequence as best hit
    assert prot_seq == alignments[0].sequences[0]
    assert prot_seq == alignments[0].sequences[1]
Esempio n. 4
0
def test_hit_with_selenocysteine():
    # Sequence is taken from issue #344
    query = seqio.load_sequence(
        os.path.join(data_dir("sequence"), "selenocysteine.fasta"))

    # Expect hit containing selenocysteine when searching Swiss-Prot
    blast_app = blast.BlastWebApp("blastp", query, "swissprot")
    blast_app.start()
    # No AlphabetError should be raised here
    blast_app.join()
Esempio n. 5
0
def test_tblastx():
    app = blast.BlastWebApp("tblastx", dna_seq, obey_rules=False)
    app.set_max_expect_value(100)
    app.start()
    app.join(timeout=300)
    alignments = app.get_alignments()
    # BLAST should find original sequence as best hit
    print(alignments[0].sequences[0])
    print(alignments[0].sequences[1])
    rev_prot_seq = dna_seq.reverse().complement().translate(complete=True)
    assert rev_prot_seq == alignments[0].sequences[0]
    assert rev_prot_seq == alignments[0].sequences[1]
Esempio n. 6
0
import biotite
import biotite.sequence as seq
import biotite.sequence.io.fasta as fasta
import biotite.sequence.graphics as graphics
import biotite.application.muscle as muscle
import biotite.application.blast as blast
import biotite.database.entrez as entrez
import matplotlib.pyplot as plt

# Download sequence of Streptococcus pyogenes Cas9
file_name = entrez.fetch("Q99ZW2", biotite.temp_dir(), "fa", "protein", "fasta")
file = fasta.FastaFile.read(file_name)
ref_seq = fasta.get_sequence(file)
# Find homologous proteins using NCBI Blast
# Search only the UniProt/SwissProt database
blast_app = blast.BlastWebApp("blastp", ref_seq, "swissprot", obey_rules=False)
blast_app.start()
blast_app.join()
alignments = blast_app.get_alignments()
# Get hit IDs for hits with score > 200
hits = []
for ali in alignments:
    if ali.score > 200:
        hits.append(ali.hit_id)
# Get the sequences from hit IDs
hit_seqs = []
for hit in hits:
    file_name = entrez.fetch(hit, biotite.temp_dir(), "fa", "protein", "fasta")
    file = fasta.FastaFile.read(file_name)
    hit_seqs.append(fasta.get_sequence(file))
Esempio n. 7
0
#
# Finding homologous sequences with BLAST
# ---------------------------------------
#
# .. currentmodule:: biotite.application.blast
#
# the :mod:`biotite.application.blast` subpackage provides an
# interface to NCBI BLAST: the :class:`BlastWebApp` class.
# Let's dive directly into the code, we try to find
# homologous sequences to the miniprotein *TC5b*:

import biotite.application.blast as blast
import biotite.sequence as seq

tc5b_seq = seq.ProteinSequence("NLYIQWLKDGGPSSGRPPPS")
app = blast.BlastWebApp("blastp", tc5b_seq)
app.start()
app.join()
alignments = app.get_alignments()
best_ali = alignments[0]
print(best_ali)
print()
print("HSP position in query: ", best_ali.query_interval)
print("HSP position in hit: ", best_ali.hit_interval)
print("Score: ", best_ali.score)
print("E-value: ", best_ali.e_value)
print("Hit UID: ", best_ali.hit_id)
print("Hit name: ", best_ali.hit_definition)

########################################################################
# This was too simple for BLAST:
Esempio n. 8
0
def test_invalid_query():
    with pytest.raises(ValueError):
        app = blast.BlastWebApp("blastn", "ABCDEFGHIJKLMNOP", obey_rules=False)
    with pytest.raises(ValueError):
        app = blast.BlastWebApp("blastp", "ABCDEFGHIJKLMNOP", obey_rules=False)
Esempio n. 9
0
def test_file_input():
    path = os.path.join(data_dir("sequence"), "prot.fasta")
    app = blast.BlastWebApp("blastp", path, obey_rules=False)
import biotite.application.clustalo as clustalo
import biotite.database.rcsb as rcsb
import biotite.database.entrez as entrez

# Get structure and sequence
pdbx_file = pdbx.PDBxFile.read(rcsb.fetch("1GUU", "mmcif"))
sequence = pdbx.get_sequence(pdbx_file)[0]
# 'use_author_fields' is set to false,
# to ensure that values in the 'res_id' annotation point to the sequence
structure = pdbx.get_structure(pdbx_file, model=1, use_author_fields=False)
structure = structure[struc.filter_amino_acids(structure)]

# Identity threshold for a sequence to be counted as homologous sequence
IDENTITY_THESHOLD = 0.4
# Find homologous proteins in SwissProt via BLAST
app = blast.BlastWebApp("blastp", sequence, database="swissprot")
app.start()
app.join()
alignments = app.get_alignments()
hit_seqs = [sequence]
hit_ids = ["Query"]
hit_starts = [1]
for ali in alignments:
    identity = align.get_sequence_identity(ali)
    # Do not include the exact same sequence -> identity < 1.0
    if identity > IDENTITY_THESHOLD and identity < 1.0:
        hit_seqs.append(ali.sequences[1])
        hit_ids.append(ali.hit_id)
        hit_starts.append(ali.hit_interval[0])

# Perform MSA
Esempio n. 11
0
# 
# Finding homologous sequences with BLAST
# ---------------------------------------
#
# .. currentmodule:: biotite.application.blast
#
# the :mod:`biotite.application.blast` subpackage provides an
# interface to NCBI BLAST: the :class:`BlastWebApp` class.
# Let's dive directly into the code, we try to find
# homologous sequences to the miniprotein *TC5b*:

import biotite.application.blast as blast
import biotite.sequence as seq

tc5b_seq = seq.ProteinSequence("NLYIQWLKDGGPSSGRPPPS")
app = blast.BlastWebApp("blastp", tc5b_seq)
app.start()
app.join()
alignments = app.get_alignments()
best_ali = alignments[0]
print(best_ali)
print()
print("HSP position in query: ", best_ali.query_interval)
print("HSP position in hit: ", best_ali.hit_interval)
print("Score: ", best_ali.score)
print("E-value: ", best_ali.e_value)
print("Hit UID: ", best_ali.hit_id)
print("Hit name: ", best_ali.hit_definition)

########################################################################
# This was too simple for BLAST: