Ejemplo n.º 1
0
def test_saving_single(suffix):
    ref_sequence = seqio.load_sequence(
        join(data_dir("sequence"), "random.fasta")
    )
    temp = NamedTemporaryFile("w+", suffix=f".{suffix}")
    try:
        seqio.save_sequence(temp.name, ref_sequence)
    except PermissionError:
        # This error might occur on AppVeyor
        pytest.skip("Permission is denied")
Ejemplo n.º 2
0
def test_hit_with_selenocysteine():
    # Sequence is taken from issue #344
    query = seqio.load_sequence(
        os.path.join(data_dir("sequence"), "selenocysteine.fasta"))

    # Expect hit containing selenocysteine when searching Swiss-Prot
    blast_app = blast.BlastWebApp("blastp", query, "swissprot")
    blast_app.start()
    # No AlphabetError should be raised here
    blast_app.join()
Ejemplo n.º 3
0
def test_genbank(file_name):
    """
    Simply test whether reading or writing a GenBank/GenPept file
    raises an exception.
    """

    temp = NamedTemporaryFile("w+", suffix=".gb")
    sequence = seqio.load_sequence(join(data_dir("sequence"), file_name))
    try:
        seqio.save_sequence(temp.name, sequence)
    except PermissionError:
        # This error might occur on AppVeyor
        pytest.skip("Permission is denied")
Ejemplo n.º 4
0
from matplotlib.patches import Rectangle
from matplotlib.ticker import MultipleLocator
import biotite
import biotite.sequence as seq
import biotite.sequence.io as seqio
import biotite.sequence.io.genbank as gb
import biotite.sequence.align as align
import biotite.database.entrez as entrez
import biotite.application.tantan as tantan

fasta_file = entrez.fetch("NC_000932",
                          tempfile.gettempdir(),
                          "fasta",
                          db_name="Nucleotide",
                          ret_type="fasta")
chloroplast_seq = seqio.load_sequence(fasta_file)

fasta_file = entrez.fetch("NC_000911",
                          tempfile.gettempdir(),
                          "fasta",
                          db_name="Nucleotide",
                          ret_type="fasta")
bacterium_seq = seqio.load_sequence(fasta_file)

########################################################################
# For the *k-mer* matching step the genome of the cyanobacterium is
# indexed into a :class:`KmerTable`.
# As homologous regions between both genomes may also appear on the
# complementary strand, both, the original genome sequence and its
# reverse complement, are indexed.
# Two additional techniques are used here:
Ejemplo n.º 5
0
#
# In the next step we map each read to its respective position
# in the reference genome.
# An additional challenge is to find the correct sense of the read:
# In the library preparation both, sense and complementary DNA, is
# produced from the virus RNA.
# For this reason we need to create a complementary copy for each read
# and map both strands to the reference genome.
# Later the *wrong* strand is discarded.

# Download and read the reference SARS-CoV-2 genome
orig_genome_file = entrez.fetch(
    "NC_045512", tempfile.gettempdir(), "gb",
    db_name="Nucleotide", ret_type="gb"
)
orig_genome = seqio.load_sequence(orig_genome_file)

# Create complementary reads
compl_reads = list(itertools.chain(
    *[(read, read.reverse(False).complement()) for read in reads]
))

########################################################################
# To map the reads to their corresponding positions in the reference
# genome, we need to align them to it.
# Although we could use :func:`align_optimal()`
# (Needleman-Wunsch algorithm [4]_) for this purpose, aligning this
# large number of reads to even a small virus genome would take hours.
#
# Instead we choose an heuristic alignment approach, similar to the
# method used by software like *BLAST* [5]_:
Ejemplo n.º 6
0
def test_loading_single(path):
    ref_sequence = seqio.load_sequence(
        join(data_dir("sequence"), "random.fasta")
    )
    sequence = seqio.load_sequence(path)
    assert ref_sequence == sequence