def test_remove_pbc_selections(multi_model): """ This test makes no assertions, it only test whether an exception occurs, when the `selection` parameter is given in `remove_pbc()`. """ array = load_structure(join(data_dir("structure"), "3o5r.mmtf")) if multi_model: array = struc.stack([array, array]) struc.remove_pbc(array) struc.remove_pbc(array, array.chain_id[0]) struc.remove_pbc(array, struc.filter_amino_acids(array)) struc.remove_pbc( array, [struc.filter_amino_acids(array), (array.res_name == "FK5")]) # Expect error when selectinf an atom multiple times with pytest.raises(ValueError): struc.remove_pbc( array, [struc.filter_amino_acids(array), (array.atom_name == "CA")])
def test_remove_pbc_unsegmented(): """ `remove_pbc()` should not alter unsegmented structures, when the structure is entirely in the box. Exclude the solvent, due to high distances between each atom. """ ref_array = load_structure(join(data_dir("structure"), "3o5r.mmtf")) # Center structure in box centroid = struc.centroid(ref_array) box_center = np.diag(ref_array.box) / 2 ref_array = struc.translate(ref_array, box_center - centroid) # Remove solvent ref_array = ref_array[~struc.filter_solvent(ref_array)] array = struc.remove_pbc(ref_array) assert ref_array.equal_annotation_categories(array) assert np.allclose(ref_array.coord, array.coord)
def test_remove_pbc_restore(multi_model, translation_vector): CUTOFF = 5.0 def get_matrices(array): """ Create a periodic and non-periodic adjacency matrix. """ nonlocal CUTOFF if isinstance(array, struc.AtomArray): matrix = struc.CellList(array, CUTOFF, periodic=False) \ .create_adjacency_matrix(CUTOFF) matrix_pbc = struc.CellList(array, CUTOFF, periodic=True) \ .create_adjacency_matrix(CUTOFF) elif isinstance(array, struc.AtomArrayStack): matrix = np.array([ struc.CellList(model, CUTOFF, periodic=False).create_adjacency_matrix(CUTOFF) for model in array ]) matrix_pbc = np.array([ struc.CellList(model, CUTOFF, periodic=True).create_adjacency_matrix(CUTOFF) for model in array ]) return matrix, matrix_pbc def assert_equal_matrices(array, matrix1, matrix2, periodic): """ Due to numerical instability, entries in both matrices might be different, when the distance of atoms is almost equal to the cutoff distance of the matrix. This function checks, whether two atoms with unequal entries in the matrices are near the cutoff distance. """ nonlocal CUTOFF indices = np.where(matrix1 != matrix2) for index in range(len(indices[0])): if len(indices) == 2: # multi_model = False -> AtomArray m = None i = indices[0][index] j = indices[1][index] box = array.box if periodic else None distance = struc.distance(array[i], array[j], box=box) if len(indices) == 3: # multi_model = True -> AtomArrayStack m = indices[0][index] i = indices[1][index] j = indices[2][index] box = array.box[m] if periodic else None distance = struc.distance(array[m, i], array[m, j], box=box) try: assert distance == pytest.approx(CUTOFF, abs=1e-4) except AssertionError: print(f"Model {m}, Atoms {i} and {j}") raise stack = load_structure(join(data_dir("structure"), "1gya.mmtf")) stack.box = np.array([ np.diag(np.max(coord, axis=0) - np.min(coord, axis=0) + 10) for coord in stack.coord ]) stack.coord -= np.min(stack.coord, axis=-2)[:, np.newaxis, :] - 5 if multi_model: array = stack else: array = stack[0] # Use adjacency matrices instead of pairwise distances # for compuational efficiency ref_matrix, ref_matrix_pbc = get_matrices(array) array = struc.translate(array, translation_vector) array.coord = struc.move_inside_box(array.coord, array.box) moved_matrix, moved_matrix_pbc = get_matrices(array) # The translation and the periodic move should not # alter PBC-aware pairwise distances assert_equal_matrices(array, ref_matrix_pbc, moved_matrix_pbc, True) # Non-PBC-aware distances should change, # otherwise the atoms do not go over the periodic boundary # and the test does not make sense with pytest.raises(AssertionError): assert_equal_matrices(array, ref_matrix, moved_matrix, False) array = struc.remove_pbc(array) restored_matrix, restored_matrix_pbc = get_matrices(array) # Both adjacency matrices should be equal to the original ones, # as the structure should be completely restored assert_equal_matrices(array, ref_matrix_pbc, restored_matrix_pbc, True) assert_equal_matrices(array, ref_matrix, restored_matrix, False)
def rmsf_plot(topology, xtc_traj, start_frame=None, stop_frame=None, write_dat_files=None): # Gromacs does not set the element symbol in its PDB files, # but Biotite guesses the element names from the atom names, # emitting a warning template = strucio.load_structure(topology) # The structure still has water and ions, that are not needed for our # calculations, we are only interested in the protein itself # These are removed for the sake of computational speed using a boolean # mask protein_mask = struc.filter_amino_acids(template) template = template[protein_mask] residue_names = struc.get_residues(template)[1] xtc_file = XTCFile() xtc_file.read(xtc_traj, atom_i=np.where(protein_mask)[0], start=start_frame, stop=stop_frame + 1) trajectory = xtc_file.get_structure(template) time = xtc_file.get_time() # Get simulation time for plotting purposes trajectory = struc.remove_pbc(trajectory) trajectory, transform = struc.superimpose(trajectory[0], trajectory) rmsd = struc.rmsd(trajectory[0], trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, rmsd, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(0, 2) ax.set_xlabel("Time (ps)") ax.set_ylabel("RMSD (Å)") figure.tight_layout() radius = struc.gyration_radius(trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, radius, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(14.0, 14.5) ax.set_xlabel("Time (ps)") ax.set_ylabel("Radius of gyration (Å)") figure.tight_layout() # In all models, mask the CA atoms ca_trajectory = trajectory[:, trajectory.atom_name == "CA"] rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) res_count = struc.get_residue_count(trajectory) ax.plot(np.arange(1, res_count + 1), rmsf, color=biotite.colors["dimorange"]) ax.set_xlim(1, res_count) ax.set_ylim(0, 1.5) ax.set_xlabel("Residue") ax.set_ylabel("RMSF (Å)") figure.tight_layout() if write_dat_files == True: # Write RMSD *.dat file frames = np.array(range(start_frame - 1, stop_frame), dtype=int) frames[0] = 0 df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"]) df.index.name = 'Frames' df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w') # Write RMSF *.dat file df1 = pd.DataFrame(data=rmsf, index=residue_names, columns=["RMSF Values"]) df1.index.name = 'Residues' df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w') plt.show()
# This gives us the additional option that allows us to select the # coordinates belonging to the amino acids. xtc_file = xtc.XTCFile.read(traj_file_path, atom_i=np.where(protein_mask)[0]) trajectory = xtc_file.get_structure(template) # Get simulation time for plotting purposes time = xtc_file.get_time() ######################################################################## # Since the MD simulation used periodic boundaries, the protein might be # segmented over the box boundary. # For further analysis we need to reassemble the protein chain into a # whole molecule, without periodic boundaries. # in *Gromacs* we could have used ``gmx trjconv`` for this, but this # problem can be handled in *Biotite*, too. trajectory = struc.remove_pbc(trajectory) ######################################################################## # Now our trajectory is ready for some analysis! # At first we want to see if the simulation converged. # For this purpose we take the RMSD of a frame compared to the initial # model as measure. In order to calculate the RMSD we must # superimpose all models onto a reference, in this case we also choose # the initial structure. trajectory, transform = struc.superimpose(trajectory[0], trajectory) rmsd = struc.rmsd(trajectory[0], trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, rmsd, color=biotite.colors["dimorange"])
array = struc.AtomArray(length=100) print(array.box) array.box = box print(array.box) file_path = rcsb.fetch("3o5r", "mmtf", biotite.temp_dir()) array = strucio.load_structure(file_path) print(array.box) ######################################################################## # When loading a trajectory from an MD simulation, the molecules are # often fragmented over the periodic boundary. # While a lot of analysis functions can handle such periodic boundary # conditions automatically, some require completed molecules. # In this case you should use :func:`remove_pbc()`. array = struc.remove_pbc(array) ######################################################################## # Structure analysis # ------------------ # # This package would be almost useless, if there wasn't some means to # analyze your structures. # Therefore, *Biotite* offers a bunch of functions for this purpose, # reaching from simple bond angle and length measurements to more # complex characteristics, like accessible surface area and # secondary structure. # The following section will introduce you to some of these functions, # which should be applied to that good old structure of *TC5b*. # # The examples shown in this section do not represent the full spectrum