Ejemplo n.º 1
0
def test_base_pairs_forward(nuc_sample_array, basepairs, unique_bool):
    """
    Test for the function base_pairs.
    """
    computed_basepairs = struc.base_pairs(nuc_sample_array, unique=unique_bool)
    check_residue_starts(computed_basepairs, nuc_sample_array)
    check_output(nuc_sample_array[computed_basepairs].res_id, basepairs)
Ejemplo n.º 2
0
def test_pseudoknots(nuc_sample_array):
    """
    Check the output of :func:`pseudoknots()`.
    """
    # Known base pairs with pseudoknot-order = 1:
    pseudoknot_order_one = [{2, 74}, {58, 72}, {59, 71}, {60, 70}]
    # Known base pairs that can either be of order one or two
    pseudoknot_order_one_or_two = [{9, 48}, {10, 49}]
    order_one_count = (len(pseudoknot_order_one) +
                       (len(pseudoknot_order_one_or_two) / 2))
    order_two_count = len(pseudoknot_order_one_or_two) / 2

    base_pairs = struc.base_pairs(nuc_sample_array)
    pseudoknot_order = struc.pseudoknots(base_pairs)

    # Sample structure should have two optimal solutions with default
    # scoring parameters
    assert len(pseudoknot_order) == 2

    for optimal_solution in pseudoknot_order:
        # Assert that the right number of pseudoknots is present for
        # each order
        assert len(base_pairs) == len(optimal_solution)
        assert np.count_nonzero(optimal_solution == 1) == order_one_count
        assert np.count_nonzero(optimal_solution == 2) == order_two_count
        assert np.max(optimal_solution) == 2

        # Assert that the each base pair has the right pseudoknot order
        for base_pair, order in zip(nuc_sample_array[base_pairs].res_id,
                                    optimal_solution):
            if (order == 1):
                assert (set(base_pair) in pseudoknot_order_one
                        or set(base_pair) in pseudoknot_order_one_or_two)
            elif (order == 2):
                assert (set(base_pair) in pseudoknot_order_one_or_two)
Ejemplo n.º 3
0
def test_base_pairs_reordered(nuc_sample_array, seed):
    """
    Test the function base_pairs with structure where the atoms are not
    in the RCSB-Order.
    """
    # Randomly reorder the atoms in each residue
    nuc_sample_array_reordered = struc.AtomArray(0)
    np.random.seed(seed)

    for residue in struc.residue_iter(nuc_sample_array):
        bound = residue.array_length()
        indices = np.random.choice(np.arange(bound), bound, replace=False)
        nuc_sample_array_reordered += residue[..., indices]

    assert (np.all(
        struc.base_pairs(nuc_sample_array) == struc.base_pairs(
            nuc_sample_array_reordered)))
Ejemplo n.º 4
0
def test_base_pairs_forward_no_hydrogen(nuc_sample_array, basepairs):
    """
    Test for the function base_pairs with the hydrogens removed from the
    test structure.
    """
    nuc_sample_array = nuc_sample_array[nuc_sample_array.element != "H"]
    computed_basepairs = struc.base_pairs(nuc_sample_array)
    check_residue_starts(computed_basepairs, nuc_sample_array)
    check_output(nuc_sample_array[computed_basepairs].res_id, basepairs)
Ejemplo n.º 5
0
def plot_rna(pdb_id, axes):
    # Download the PDB file and read the structure
    pdb_file_path = rcsb.fetch(pdb_id, "pdb", gettempdir())
    pdb_file = pdb.PDBFile.read(pdb_file_path)
    atom_array = pdb.get_structure(pdb_file)[0]
    nucleotides = atom_array[struc.filter_nucleotides(atom_array)]

    # Compute the base pairs and their pseudoknot order
    base_pairs = struc.base_pairs(nucleotides)
    base_pairs = struc.get_residue_positions(
        nucleotides, base_pairs.flatten()
    ).reshape(base_pairs.shape)
    pseudoknot_order = struc.pseudoknots(base_pairs)[0]

    # Set the linestyle according to the pseudoknot order
    linestyles = np.full(base_pairs.shape[0], '-', dtype=object)
    linestyles[pseudoknot_order == 1] = '--'
    linestyles[pseudoknot_order == 2] = ':'

    # Indicate canonical nucleotides with an upper case one-letter-code
    # and non-canonical nucleotides with a lower case one-letter-code
    base_labels = []
    for base in struc.residue_iter(nucleotides):
        one_letter_code, exact = struc.map_nucleotide(base)
        if exact:
            base_labels.append(one_letter_code)
        else:
            base_labels.append(one_letter_code.lower())

    # Color canonical Watson-Crick base pairs with a darker orange and
    # non-canonical base pairs with a lighter orange
    colors = np.full(base_pairs.shape[0], biotite.colors['brightorange'])
    for i, (base1, base2) in enumerate(base_pairs):
        name1 = base_labels[base1]
        name2 = base_labels[base2]
        if sorted([name1, name2]) in [["A", "U"], ["C", "G"]]:
            colors[i] = biotite.colors["dimorange"]

    # Plot the secondary structure
    graphics.plot_nucleotide_secondary_structure(
        axes, base_labels, base_pairs, struc.get_residue_count(nucleotides),
        pseudoknot_order=pseudoknot_order, bond_linestyle=linestyles,
        bond_color=colors,
        # Margin to compensate for reduced axis limits in shared axis
        border=0.13
    )

    # Use the PDB ID to label each plot
    axes.set_title(pdb_id, loc="left")
Ejemplo n.º 6
0
def test_base_pairs_reverse(nuc_sample_array, basepairs, unique_bool):
    """
    Reverse the order of residues in the atom_array and then test the
    function base_pairs.
    """

    # Reverse sequence of residues in nuc_sample_array
    reversed_nuc_sample_array = struc.AtomArray(0)
    for residue in reversed_iterator(struc.residue_iter(nuc_sample_array)):
        reversed_nuc_sample_array = reversed_nuc_sample_array + residue

    computed_basepairs = struc.base_pairs(reversed_nuc_sample_array,
                                          unique=unique_bool)
    check_residue_starts(computed_basepairs, reversed_nuc_sample_array)
    check_output(reversed_nuc_sample_array[computed_basepairs].res_id,
                 basepairs)
Ejemplo n.º 7
0
def test_base_pairs_reverse_no_hydrogen(nuc_sample_array, basepairs):
    """
    Remove the hydrogens from the sample structure. Then reverse the
    order of residues in the atom_array and then test the function
    base_pairs.
    """
    nuc_sample_array = nuc_sample_array[nuc_sample_array.element != "H"]
    # Reverse sequence of residues in nuc_sample_array
    reversed_nuc_sample_array = struc.AtomArray(0)
    for residue in reversed_iterator(struc.residue_iter(nuc_sample_array)):
        reversed_nuc_sample_array = reversed_nuc_sample_array + residue

    computed_basepairs = struc.base_pairs(reversed_nuc_sample_array)
    check_residue_starts(computed_basepairs, reversed_nuc_sample_array)
    check_output(reversed_nuc_sample_array[computed_basepairs].res_id,
                 basepairs)
Ejemplo n.º 8
0
def test_base_pairs_incomplete_structure(nuc_sample_array):
    """
    Remove atoms belonging to the pyrimidine / purine ring of each base
    and the ``O2`` atom contained in pyrimidine bases.

    Test that no base pairs are detected as all bases have less than 3 
    common atoms with their implemented reference base. 
    """
    
    nuc_sample_array = nuc_sample_array[
        ~ np.isin(
            nuc_sample_array.atom_name, 
            ['N1', 'C2', 'N3', 'C4', 'C5', 'C6', 'N7', 'C8', 'N9', 'O2']
        )
    ]
    with pytest.warns(struc.IncompleteStructureWarning):
        assert len(struc.base_pairs(nuc_sample_array)) == 0
Ejemplo n.º 9
0
def test_base_pairs_glycosidic_bond(pdb_id):
    """
    Test the function ``base_pairs_edge``. Each test structure is a
    crystal structure onto which hydrogens were added using Gromacs
    force fields. The reference data was taken from the NDB-database
    annotations and parsed as json array.
    """
    # Get the references
    reference_structure, reference_gly_bonds = get_reference(pdb_id, "sugar")
    # Calculate base pairs and edges for the references
    pairs = struc.base_pairs(reference_structure)
    glycosidic_bond_orientations = struc.base_pairs_glycosidic_bond(
        reference_structure, pairs
    )

    # Check the plausibility with the reference data for each base pair
    for pair, pair_orientation in zip(pairs, glycosidic_bond_orientations):
        pair_res_ids = reference_structure[pair].res_id
        index = get_reference_index(pair_res_ids, reference_gly_bonds)
        if index is not None:
            reference_orientation = struc.GlycosidicBond(
                reference_gly_bonds[index, 2]
            )
            assert reference_orientation == pair_orientation
Ejemplo n.º 10
0
def test_base_pairs_edge(pdb_id):
    """
    Test the function ``base_pairs_edge``. Each test structure is a
    crystal structure onto which hydrogens were added using Gromacs
    force fields. The reference data was taken from the NDB-database
    annotations and parsed as json array.
    """
    # Get the references
    reference_structure, reference_edges = get_reference(pdb_id, "edges")
    # Calculate base pairs and edges for the references
    pairs = struc.base_pairs(reference_structure)
    edges = struc.base_pairs_edge(reference_structure, pairs)

    # Check the plausibility with the reference data for each base pair
    for pair, pair_edges in zip(pairs, edges):
        pair_res_ids = reference_structure[pair].res_id
        index = get_reference_index(pair_res_ids, reference_edges)
        if index is not None:
            pair_reference_edges =  [
                reference_edges[index, 2], reference_edges[index, 3]
            ]
            check_edge_plausibility(
                reference_structure, pair, pair_reference_edges, pair_edges
            )
Ejemplo n.º 11
0
import biotite
import biotite.structure.io.pdb as pdb
import biotite.database.rcsb as rcsb
import biotite.structure as struc
import biotite.structure.graphics as graphics
import matplotlib.pyplot as plt
import numpy as np

# Download the PDB file and read the structure
pdb_file_path = rcsb.fetch("6ZYB", "pdb", gettempdir())
pdb_file = pdb.PDBFile.read(pdb_file_path)
atom_array = pdb.get_structure(pdb_file)[0]
nucleotides = atom_array[struc.filter_nucleotides(atom_array)]

# Compute the base pairs and the Leontis-Westhof nomenclature
base_pairs = struc.base_pairs(nucleotides)
glycosidic_bonds = struc.base_pairs_glycosidic_bond(nucleotides, base_pairs)
edges = struc.base_pairs_edge(nucleotides, base_pairs)
base_pairs = struc.get_residue_positions(
    nucleotides, base_pairs.flatten()).reshape(base_pairs.shape)

# Get the one-letter-codes of the bases
base_labels = []
for base in struc.residue_iter(nucleotides):
    base_labels.append(base.res_name[0])

# Color canonical Watson-Crick base pairs with a darker orange and
# non-canonical base pairs with a lighter orange
colors = np.full(base_pairs.shape[0], biotite.colors['brightorange'])
for i, (base1, base2) in enumerate(base_pairs):
    name1 = base_labels[base1]
Ejemplo n.º 12
0
ax.set_xlim(0.5, len(residue_ids) + 0.5)
ax.set_ylim(0, len(residue_ids) / 2 + 0.5)
ax.set_aspect("equal")
ax.xaxis.set_major_locator(ticker.MultipleLocator(3))
ax.tick_params(axis='both', which='major', labelsize=8)
ax.set_yticks([])

# Remove the frame
plt.box(False)

# Plot the residue names in order
for residue_name, residue_id in zip(residue_names, residue_ids):
    ax.text(residue_id, 0, residue_name, ha='center', fontsize=8)

# Draw the arcs between basepairs
for base1, base2 in struc.base_pairs(nucleotides):
    arc_center = (np.mean(
        (nucleotides.res_id[base1], nucleotides.res_id[base2])), 1.5)
    arc_diameter = abs(nucleotides.res_id[base2] - nucleotides.res_id[base1])
    name1 = nucleotides.res_name[base1]
    name2 = nucleotides.res_name[base2]
    if sorted([name1, name2]) in [["A", "U"], ["C", "G"]]:
        color = biotite.colors["dimorange"]
    else:
        color = biotite.colors["brightorange"]
    arc = Arc(arc_center,
              arc_diameter,
              arc_diameter,
              180,
              theta1=180,
              theta2=0,