Esempio n. 1
0
def test_array_conversion(path, model, hybrid36):
    pdb_file = pdb.PDBFile.read(path)
    # Test also the thin wrapper around the methods
    # 'get_structure()' and 'set_structure()'
    try:
        array1 = pdb.get_structure(pdb_file, model=model)
    except biotite.InvalidFileError:
        if model is None:
            # The file cannot be parsed into an AtomArrayStack,
            # as the models contain different numbers of atoms
            # -> skip this test case
            return
        else:
            raise

    if hybrid36 and (array1.res_id < 1).any():
        with pytest.raises(ValueError,
                           match="Only positive integers can be converted "
                           "into hybrid-36 notation"):
            pdb_file = pdb.PDBFile()
            pdb.set_structure(pdb_file, array1, hybrid36=hybrid36)
        return
    else:
        pdb_file = pdb.PDBFile()
        pdb.set_structure(pdb_file, array1, hybrid36=hybrid36)

    array2 = pdb.get_structure(pdb_file, model=model)

    if array1.box is not None:
        assert np.allclose(array1.box, array2.box)
    assert array1.bonds == array2.bonds
    for category in array1.get_annotation_categories():
        assert array1.get_annotation(category).tolist() == \
               array2.get_annotation(category).tolist()
    assert array1.coord.tolist() == array2.coord.tolist()
Esempio n. 2
0
def test_array_conversion(path, single_model, hybrid36):
    model = 1 if single_model else None
    pdb_file = pdb.PDBFile.read(path)
    # Test also the thin wrapper around the methods
    # 'get_structure()' and 'set_structure()'
    array1 = pdb.get_structure(pdb_file, model=model)

    if hybrid36 and (array1.res_id < 1).any():
        with pytest.raises(ValueError,
                           match="Only positive integers can be converted "
                           "into hybrid-36 notation"):
            pdb_file = pdb.PDBFile()
            pdb.set_structure(pdb_file, array1, hybrid36=hybrid36)
        return
    else:
        pdb_file = pdb.PDBFile()
        pdb.set_structure(pdb_file, array1, hybrid36=hybrid36)

    array2 = pdb.get_structure(pdb_file, model=model)

    if array1.box is not None:
        assert np.allclose(array1.box, array2.box)
    assert array1.bonds == array2.bonds
    for category in array1.get_annotation_categories():
        assert array1.get_annotation(category).tolist() == \
               array2.get_annotation(category).tolist()
    assert array1.coord.tolist() == array2.coord.tolist()
Esempio n. 3
0
def test_fetch(format, as_file_like):
    path = None if as_file_like else biotite.temp_dir()
    file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True)
    if format == "pdb":
        file = pdb.PDBFile()
        file.read(file_path_or_obj)
        pdb.get_structure(file)
    elif format == "pdbx":
        file = pdbx.PDBxFile()
        file.read(file_path_or_obj)
        pdbx.get_structure(file)
    elif format == "mmtf":
        file = mmtf.MMTFFile()
        file.read(file_path_or_obj)
        mmtf.get_structure(file)
Esempio n. 4
0
def test_fetch(format, as_file_like):
    path = None if as_file_like else biotite.temp_dir()
    file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True)
    if format == "pdb":
        file = pdb.PDBFile.read(file_path_or_obj)
        pdb.get_structure(file)
    elif format == "pdbx":
        file = pdbx.PDBxFile.read(file_path_or_obj)
        pdbx.get_structure(file)
    elif format == "mmtf":
        file = mmtf.MMTFFile.read(file_path_or_obj)
        mmtf.get_structure(file)
    elif format == "fasta":
        file = fasta.FastaFile.read(file_path_or_obj)
        # Test if the file contains any sequences
        assert len(fasta.get_sequences(file)) > 0
Esempio n. 5
0
def load_structure(fpath, chain=None):
    """
    Args:
        fpath: filepath to either pdb or cif file
        chain: the chain id
    Returns:
        biotite.structure.AtomArray
    """
    if fpath.endswith('cif'):
        with open(fpath) as fin:
            pdbxf = pdbx.PDBxFile.read(fin)
        structure = pdbx.get_structure(pdbxf, model=1)
    elif fpath.endswith('pdb'):
        with open(fpath) as fin:
            pdbf = pdb.PDBFile.read(fin)
        structure = pdb.get_structure(pdbf, model=1)
    issolvent = filter_solvent(structure)
    structure = structure[~issolvent]
    chains = get_chains(structure)
    print(f'Found {len(chains)} chains:', chains, '\n')
    if len(chains) == 0:
        raise ValueError('No chains found in the input file.')
    if chain is None:
        chain = chains[0]
    if chain not in chains:
        raise ValueError(f'Chain {chain} not found in input file')
    structure = structure[structure.chain_id == chain]
    print(f'Loaded chain {chain}\n')
    return structure
Esempio n. 6
0
def test_get_model_count():
    pdb_file = pdb.PDBFile.read(join(data_dir("structure"), "1l2y.pdb"))
    # Test also the thin wrapper around the method
    # 'get_model_count()'
    test_model_count = pdb.get_model_count(pdb_file)
    ref_model_count = pdb.get_structure(pdb_file).stack_depth()
    assert test_model_count == ref_model_count
Esempio n. 7
0
def test_array_conversion(path, single_model, hybrid36):
    model = 1 if single_model else None
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    # Test also the thin wrapper around the methods
    # 'get_structure()' and 'set_structure()'
    array1 = pdb.get_structure(pdb_file, model=model)
    pdb_file = pdb.PDBFile()
    pdb.set_structure(pdb_file, array1, hybrid36=hybrid36)
    array2 = pdb.get_structure(pdb_file, model=model)
    if array1.box is not None:
        assert np.allclose(array1.box, array2.box)
    assert array1.bonds == array2.bonds
    for category in array1.get_annotation_categories():
        assert array1.get_annotation(category).tolist() == \
               array2.get_annotation(category).tolist()
    assert array1.coord.tolist() == array2.coord.tolist()
Esempio n. 8
0
def test_id_overflow():
    # Create an atom array >= 100k atoms
    length = 100000
    a = struc.AtomArray(length)
    a.coord = np.zeros(a.coord.shape)
    a.chain_id = np.full(length, "A")
    # Create residue IDs over 10000
    a.res_id = np.arange(1, length + 1)
    a.res_name = np.full(length, "GLY")
    a.hetero = np.full(length, False)
    a.atom_name = np.full(length, "CA")
    a.element = np.full(length, "C")

    # Write stack to pdb file and make sure a warning is thrown
    with pytest.warns(UserWarning):
        temp = TemporaryFile("w+")
        pdb_file = pdb.PDBFile()
        pdb_file.set_structure(a)
        pdb_file.write(temp)

    # Assert file can be read properly
    temp.seek(0)
    a2 = pdb.get_structure(pdb.PDBFile.read(temp))
    assert (a2.array_length() == a.array_length())

    # Manually check if the written atom id is correct
    temp.seek(0)
    last_line = temp.readlines()[-1]
    atom_id = int(last_line.split()[1])
    assert (atom_id == 1)

    temp.close()

    # Write stack as hybrid-36 pdb file: no warning should be thrown
    with pytest.warns(None) as record:
        temp = TemporaryFile("w+")
        tmp_pdb_file = pdb.PDBFile()
        tmp_pdb_file.set_structure(a, hybrid36=True)
        tmp_pdb_file.write(temp)
    assert len(record) == 0

    # Manually check if the output is written as correct hybrid-36
    temp.seek(0)
    last_line = temp.readlines()[-1]
    atom_id = last_line.split()[1]
    assert (atom_id == "A0000")
    res_id = last_line.split()[4][1:]
    assert (res_id == "BXG0")

    temp.close()
Esempio n. 9
0
def plot_rna(pdb_id, axes):
    # Download the PDB file and read the structure
    pdb_file_path = rcsb.fetch(pdb_id, "pdb", gettempdir())
    pdb_file = pdb.PDBFile.read(pdb_file_path)
    atom_array = pdb.get_structure(pdb_file)[0]
    nucleotides = atom_array[struc.filter_nucleotides(atom_array)]

    # Compute the base pairs and their pseudoknot order
    base_pairs = struc.base_pairs(nucleotides)
    base_pairs = struc.get_residue_positions(
        nucleotides, base_pairs.flatten()
    ).reshape(base_pairs.shape)
    pseudoknot_order = struc.pseudoknots(base_pairs)[0]

    # Set the linestyle according to the pseudoknot order
    linestyles = np.full(base_pairs.shape[0], '-', dtype=object)
    linestyles[pseudoknot_order == 1] = '--'
    linestyles[pseudoknot_order == 2] = ':'

    # Indicate canonical nucleotides with an upper case one-letter-code
    # and non-canonical nucleotides with a lower case one-letter-code
    base_labels = []
    for base in struc.residue_iter(nucleotides):
        one_letter_code, exact = struc.map_nucleotide(base)
        if exact:
            base_labels.append(one_letter_code)
        else:
            base_labels.append(one_letter_code.lower())

    # Color canonical Watson-Crick base pairs with a darker orange and
    # non-canonical base pairs with a lighter orange
    colors = np.full(base_pairs.shape[0], biotite.colors['brightorange'])
    for i, (base1, base2) in enumerate(base_pairs):
        name1 = base_labels[base1]
        name2 = base_labels[base2]
        if sorted([name1, name2]) in [["A", "U"], ["C", "G"]]:
            colors[i] = biotite.colors["dimorange"]

    # Plot the secondary structure
    graphics.plot_nucleotide_secondary_structure(
        axes, base_labels, base_pairs, struc.get_residue_count(nucleotides),
        pseudoknot_order=pseudoknot_order, bond_linestyle=linestyles,
        bond_color=colors,
        # Margin to compensate for reduced axis limits in shared axis
        border=0.13
    )

    # Use the PDB ID to label each plot
    axes.set_title(pdb_id, loc="left")
Esempio n. 10
0
def test_bond_parsing():
    """
    Compare parsing of bonds from PDB with output from
    :func:`connect_via_residue_names()`.
    """
    # Choose a structure with CONECT records to test these as well
    path = join(data_dir("structure"), "3o5r.pdb")
    pdb_file = pdb.PDBFile.read(path)
    atoms = pdb.get_structure(pdb_file, model=1, include_bonds=True)

    test_bonds = atoms.bonds

    ref_bonds = struc.connect_via_residue_names(atoms)
    ref_bonds.remove_bond_order()

    assert test_bonds.as_set() == ref_bonds.as_set()
Esempio n. 11
0
# Code source: Tom David Müller
# License: BSD 3 clause

from tempfile import gettempdir
import biotite
import biotite.structure.io.pdb as pdb
import biotite.database.rcsb as rcsb
import biotite.structure as struc
import biotite.structure.graphics as graphics
import matplotlib.pyplot as plt
import numpy as np

# Download the PDB file and read the structure
pdb_file_path = rcsb.fetch("6ZYB", "pdb", gettempdir())
pdb_file = pdb.PDBFile.read(pdb_file_path)
atom_array = pdb.get_structure(pdb_file)[0]
nucleotides = atom_array[struc.filter_nucleotides(atom_array)]

# Compute the base pairs and the Leontis-Westhof nomenclature
base_pairs = struc.base_pairs(nucleotides)
glycosidic_bonds = struc.base_pairs_glycosidic_bond(nucleotides, base_pairs)
edges = struc.base_pairs_edge(nucleotides, base_pairs)
base_pairs = struc.get_residue_positions(
    nucleotides, base_pairs.flatten()).reshape(base_pairs.shape)

# Get the one-letter-codes of the bases
base_labels = []
for base in struc.residue_iter(nucleotides):
    base_labels.append(base.res_name[0])

# Color canonical Watson-Crick base pairs with a darker orange and