def test_base_pairs_forward(nuc_sample_array, basepairs, unique_bool): """ Test for the function base_pairs. """ computed_basepairs = struc.base_pairs(nuc_sample_array, unique=unique_bool) check_residue_starts(computed_basepairs, nuc_sample_array) check_output(nuc_sample_array[computed_basepairs].res_id, basepairs)
def test_pseudoknots(nuc_sample_array): """ Check the output of :func:`pseudoknots()`. """ # Known base pairs with pseudoknot-order = 1: pseudoknot_order_one = [{2, 74}, {58, 72}, {59, 71}, {60, 70}] # Known base pairs that can either be of order one or two pseudoknot_order_one_or_two = [{9, 48}, {10, 49}] order_one_count = (len(pseudoknot_order_one) + (len(pseudoknot_order_one_or_two) / 2)) order_two_count = len(pseudoknot_order_one_or_two) / 2 base_pairs = struc.base_pairs(nuc_sample_array) pseudoknot_order = struc.pseudoknots(base_pairs) # Sample structure should have two optimal solutions with default # scoring parameters assert len(pseudoknot_order) == 2 for optimal_solution in pseudoknot_order: # Assert that the right number of pseudoknots is present for # each order assert len(base_pairs) == len(optimal_solution) assert np.count_nonzero(optimal_solution == 1) == order_one_count assert np.count_nonzero(optimal_solution == 2) == order_two_count assert np.max(optimal_solution) == 2 # Assert that the each base pair has the right pseudoknot order for base_pair, order in zip(nuc_sample_array[base_pairs].res_id, optimal_solution): if (order == 1): assert (set(base_pair) in pseudoknot_order_one or set(base_pair) in pseudoknot_order_one_or_two) elif (order == 2): assert (set(base_pair) in pseudoknot_order_one_or_two)
def test_base_pairs_reordered(nuc_sample_array, seed): """ Test the function base_pairs with structure where the atoms are not in the RCSB-Order. """ # Randomly reorder the atoms in each residue nuc_sample_array_reordered = struc.AtomArray(0) np.random.seed(seed) for residue in struc.residue_iter(nuc_sample_array): bound = residue.array_length() indices = np.random.choice(np.arange(bound), bound, replace=False) nuc_sample_array_reordered += residue[..., indices] assert (np.all( struc.base_pairs(nuc_sample_array) == struc.base_pairs( nuc_sample_array_reordered)))
def test_base_pairs_forward_no_hydrogen(nuc_sample_array, basepairs): """ Test for the function base_pairs with the hydrogens removed from the test structure. """ nuc_sample_array = nuc_sample_array[nuc_sample_array.element != "H"] computed_basepairs = struc.base_pairs(nuc_sample_array) check_residue_starts(computed_basepairs, nuc_sample_array) check_output(nuc_sample_array[computed_basepairs].res_id, basepairs)
def plot_rna(pdb_id, axes): # Download the PDB file and read the structure pdb_file_path = rcsb.fetch(pdb_id, "pdb", gettempdir()) pdb_file = pdb.PDBFile.read(pdb_file_path) atom_array = pdb.get_structure(pdb_file)[0] nucleotides = atom_array[struc.filter_nucleotides(atom_array)] # Compute the base pairs and their pseudoknot order base_pairs = struc.base_pairs(nucleotides) base_pairs = struc.get_residue_positions( nucleotides, base_pairs.flatten() ).reshape(base_pairs.shape) pseudoknot_order = struc.pseudoknots(base_pairs)[0] # Set the linestyle according to the pseudoknot order linestyles = np.full(base_pairs.shape[0], '-', dtype=object) linestyles[pseudoknot_order == 1] = '--' linestyles[pseudoknot_order == 2] = ':' # Indicate canonical nucleotides with an upper case one-letter-code # and non-canonical nucleotides with a lower case one-letter-code base_labels = [] for base in struc.residue_iter(nucleotides): one_letter_code, exact = struc.map_nucleotide(base) if exact: base_labels.append(one_letter_code) else: base_labels.append(one_letter_code.lower()) # Color canonical Watson-Crick base pairs with a darker orange and # non-canonical base pairs with a lighter orange colors = np.full(base_pairs.shape[0], biotite.colors['brightorange']) for i, (base1, base2) in enumerate(base_pairs): name1 = base_labels[base1] name2 = base_labels[base2] if sorted([name1, name2]) in [["A", "U"], ["C", "G"]]: colors[i] = biotite.colors["dimorange"] # Plot the secondary structure graphics.plot_nucleotide_secondary_structure( axes, base_labels, base_pairs, struc.get_residue_count(nucleotides), pseudoknot_order=pseudoknot_order, bond_linestyle=linestyles, bond_color=colors, # Margin to compensate for reduced axis limits in shared axis border=0.13 ) # Use the PDB ID to label each plot axes.set_title(pdb_id, loc="left")
def test_base_pairs_reverse(nuc_sample_array, basepairs, unique_bool): """ Reverse the order of residues in the atom_array and then test the function base_pairs. """ # Reverse sequence of residues in nuc_sample_array reversed_nuc_sample_array = struc.AtomArray(0) for residue in reversed_iterator(struc.residue_iter(nuc_sample_array)): reversed_nuc_sample_array = reversed_nuc_sample_array + residue computed_basepairs = struc.base_pairs(reversed_nuc_sample_array, unique=unique_bool) check_residue_starts(computed_basepairs, reversed_nuc_sample_array) check_output(reversed_nuc_sample_array[computed_basepairs].res_id, basepairs)
def test_base_pairs_reverse_no_hydrogen(nuc_sample_array, basepairs): """ Remove the hydrogens from the sample structure. Then reverse the order of residues in the atom_array and then test the function base_pairs. """ nuc_sample_array = nuc_sample_array[nuc_sample_array.element != "H"] # Reverse sequence of residues in nuc_sample_array reversed_nuc_sample_array = struc.AtomArray(0) for residue in reversed_iterator(struc.residue_iter(nuc_sample_array)): reversed_nuc_sample_array = reversed_nuc_sample_array + residue computed_basepairs = struc.base_pairs(reversed_nuc_sample_array) check_residue_starts(computed_basepairs, reversed_nuc_sample_array) check_output(reversed_nuc_sample_array[computed_basepairs].res_id, basepairs)
def test_base_pairs_incomplete_structure(nuc_sample_array): """ Remove atoms belonging to the pyrimidine / purine ring of each base and the ``O2`` atom contained in pyrimidine bases. Test that no base pairs are detected as all bases have less than 3 common atoms with their implemented reference base. """ nuc_sample_array = nuc_sample_array[ ~ np.isin( nuc_sample_array.atom_name, ['N1', 'C2', 'N3', 'C4', 'C5', 'C6', 'N7', 'C8', 'N9', 'O2'] ) ] with pytest.warns(struc.IncompleteStructureWarning): assert len(struc.base_pairs(nuc_sample_array)) == 0
def test_base_pairs_glycosidic_bond(pdb_id): """ Test the function ``base_pairs_edge``. Each test structure is a crystal structure onto which hydrogens were added using Gromacs force fields. The reference data was taken from the NDB-database annotations and parsed as json array. """ # Get the references reference_structure, reference_gly_bonds = get_reference(pdb_id, "sugar") # Calculate base pairs and edges for the references pairs = struc.base_pairs(reference_structure) glycosidic_bond_orientations = struc.base_pairs_glycosidic_bond( reference_structure, pairs ) # Check the plausibility with the reference data for each base pair for pair, pair_orientation in zip(pairs, glycosidic_bond_orientations): pair_res_ids = reference_structure[pair].res_id index = get_reference_index(pair_res_ids, reference_gly_bonds) if index is not None: reference_orientation = struc.GlycosidicBond( reference_gly_bonds[index, 2] ) assert reference_orientation == pair_orientation
def test_base_pairs_edge(pdb_id): """ Test the function ``base_pairs_edge``. Each test structure is a crystal structure onto which hydrogens were added using Gromacs force fields. The reference data was taken from the NDB-database annotations and parsed as json array. """ # Get the references reference_structure, reference_edges = get_reference(pdb_id, "edges") # Calculate base pairs and edges for the references pairs = struc.base_pairs(reference_structure) edges = struc.base_pairs_edge(reference_structure, pairs) # Check the plausibility with the reference data for each base pair for pair, pair_edges in zip(pairs, edges): pair_res_ids = reference_structure[pair].res_id index = get_reference_index(pair_res_ids, reference_edges) if index is not None: pair_reference_edges = [ reference_edges[index, 2], reference_edges[index, 3] ] check_edge_plausibility( reference_structure, pair, pair_reference_edges, pair_edges )
import biotite import biotite.structure.io.pdb as pdb import biotite.database.rcsb as rcsb import biotite.structure as struc import biotite.structure.graphics as graphics import matplotlib.pyplot as plt import numpy as np # Download the PDB file and read the structure pdb_file_path = rcsb.fetch("6ZYB", "pdb", gettempdir()) pdb_file = pdb.PDBFile.read(pdb_file_path) atom_array = pdb.get_structure(pdb_file)[0] nucleotides = atom_array[struc.filter_nucleotides(atom_array)] # Compute the base pairs and the Leontis-Westhof nomenclature base_pairs = struc.base_pairs(nucleotides) glycosidic_bonds = struc.base_pairs_glycosidic_bond(nucleotides, base_pairs) edges = struc.base_pairs_edge(nucleotides, base_pairs) base_pairs = struc.get_residue_positions( nucleotides, base_pairs.flatten()).reshape(base_pairs.shape) # Get the one-letter-codes of the bases base_labels = [] for base in struc.residue_iter(nucleotides): base_labels.append(base.res_name[0]) # Color canonical Watson-Crick base pairs with a darker orange and # non-canonical base pairs with a lighter orange colors = np.full(base_pairs.shape[0], biotite.colors['brightorange']) for i, (base1, base2) in enumerate(base_pairs): name1 = base_labels[base1]
ax.set_xlim(0.5, len(residue_ids) + 0.5) ax.set_ylim(0, len(residue_ids) / 2 + 0.5) ax.set_aspect("equal") ax.xaxis.set_major_locator(ticker.MultipleLocator(3)) ax.tick_params(axis='both', which='major', labelsize=8) ax.set_yticks([]) # Remove the frame plt.box(False) # Plot the residue names in order for residue_name, residue_id in zip(residue_names, residue_ids): ax.text(residue_id, 0, residue_name, ha='center', fontsize=8) # Draw the arcs between basepairs for base1, base2 in struc.base_pairs(nucleotides): arc_center = (np.mean( (nucleotides.res_id[base1], nucleotides.res_id[base2])), 1.5) arc_diameter = abs(nucleotides.res_id[base2] - nucleotides.res_id[base1]) name1 = nucleotides.res_name[base1] name2 = nucleotides.res_name[base2] if sorted([name1, name2]) in [["A", "U"], ["C", "G"]]: color = biotite.colors["dimorange"] else: color = biotite.colors["brightorange"] arc = Arc(arc_center, arc_diameter, arc_diameter, 180, theta1=180, theta2=0,