def test_dssp(path): sec_struct_codes = { 0: "I", 1: "S", 2: "H", 3: "E", 4: "G", 5: "B", 6: "T", 7: "C" } mmtf_file = mmtf.MMTFFile.read(path) array = mmtf.get_structure(mmtf_file, model=1) array = array[array.hetero == False] first_chain_id = array.chain_id[0] chain = array[array.chain_id == first_chain_id] n_residues = struc.get_residue_count(chain) # Secondary structure annotation in PDB use also DSSP # -> compare PDB and local DSSP sse = mmtf_file["secStructList"] sse = sse[:n_residues] if (sse == -1).all(): # First chain is not a polypeptide chain (presumably DNA/RNA) # DSSP not applicable -> return return sse = np.array([sec_struct_codes[code] for code in sse], dtype="U1") chain = array[array.chain_id == first_chain_id] sse_from_app = DsspApp.annotate_sse(chain) np.set_printoptions(threshold=10000) # PDB uses different DSSP version -> slight differences possible # -> only 90% must be identical assert np.count_nonzero(sse_from_app == sse) / len(sse) > 0.9
def test_get_residues(array): ids, names = struc.get_residues(array) assert ids.tolist() == list(range(1, 21)) assert names.tolist() == [ "ASN", "LEU", "TYR", "ILE", "GLN", "TRP", "LEU", "LYS", "ASP", "GLY", "GLY", "PRO", "SER", "SER", "GLY", "ARG", "PRO", "PRO", "PRO", "SER" ] assert len(ids) == struc.get_residue_count(array)
def plot_rna(pdb_id, axes): # Download the PDB file and read the structure pdb_file_path = rcsb.fetch(pdb_id, "pdb", gettempdir()) pdb_file = pdb.PDBFile.read(pdb_file_path) atom_array = pdb.get_structure(pdb_file)[0] nucleotides = atom_array[struc.filter_nucleotides(atom_array)] # Compute the base pairs and their pseudoknot order base_pairs = struc.base_pairs(nucleotides) base_pairs = struc.get_residue_positions( nucleotides, base_pairs.flatten() ).reshape(base_pairs.shape) pseudoknot_order = struc.pseudoknots(base_pairs)[0] # Set the linestyle according to the pseudoknot order linestyles = np.full(base_pairs.shape[0], '-', dtype=object) linestyles[pseudoknot_order == 1] = '--' linestyles[pseudoknot_order == 2] = ':' # Indicate canonical nucleotides with an upper case one-letter-code # and non-canonical nucleotides with a lower case one-letter-code base_labels = [] for base in struc.residue_iter(nucleotides): one_letter_code, exact = struc.map_nucleotide(base) if exact: base_labels.append(one_letter_code) else: base_labels.append(one_letter_code.lower()) # Color canonical Watson-Crick base pairs with a darker orange and # non-canonical base pairs with a lighter orange colors = np.full(base_pairs.shape[0], biotite.colors['brightorange']) for i, (base1, base2) in enumerate(base_pairs): name1 = base_labels[base1] name2 = base_labels[base2] if sorted([name1, name2]) in [["A", "U"], ["C", "G"]]: colors[i] = biotite.colors["dimorange"] # Plot the secondary structure graphics.plot_nucleotide_secondary_structure( axes, base_labels, base_pairs, struc.get_residue_count(nucleotides), pseudoknot_order=pseudoknot_order, bond_linestyle=linestyles, bond_color=colors, # Margin to compensate for reduced axis limits in shared axis border=0.13 ) # Use the PDB ID to label each plot axes.set_title(pdb_id, loc="left")
atom_name == "CA"] ca_trajectory_kinase_right = trajectory_kinase_right[:, trajectory_kinase_right. atom_name == "CA"] rmsf_kinase_left = struc.rmsf(struc.average(ca_trajectory_kinase_left), ca_trajectory_kinase_left) rmsf_upper_kinase_left = rmsf_kinase_left.max() * 1.1 rmsf_kinase_right = struc.rmsf(struc.average(ca_trajectory_kinase_right), ca_trajectory_kinase_right) rmsf_upper_kinase_right = rmsf_kinase_right.max() * 1.1 fig, (ax1, ax2) = plt.subplots(2, 1) res_count = struc.get_residue_count(trajectory_kinase_left) ax1.plot(np.arange(1, res_count + 1) + 2801, rmsf_kinase_left, color=biotite.colors["dimorange"]) ax1.set_title("Kinase Left") #ax1.axvline(3828, ls="--", color="k") #ax1.axvline(3838, ls="--", color="k") ax1.set_xlim(2801 + 1, 2801 + res_count) ax1.set_ylim(0, rmsf_upper_kinase_left) ax1.set_xlabel("Residue") ax1.set_ylabel("RMSF (Å)") res_count = struc.get_residue_count(trajectory_kinase_right) ax2.plot(np.arange(1, res_count + 1) + 2801, rmsf_kinase_right, color=biotite.colors["dimorange"])
annotation += "W" elif edge == 2: annotation += "H" else: annotation += "S" base_labels[base] = annotation # Create a matplotlib pyplot fig, ax = plt.subplots(figsize=(8.0, 8.0)) # Plot the secondary structure graphics.plot_nucleotide_secondary_structure( ax, base_labels, base_pairs, struc.get_residue_count(nucleotides), bond_color=colors) # Display the plot plt.show() ######################################################################## # The sarcin-ricin loop is part of the 23s rRNA and is considered # crucial to the ribosome‘s activity. The incorporation of the # Leontis-Westhof nomenclature into the 2D-plot shows how the individual # base pairs are oriented and how their glycosidic bonds are oriented # relative to each other. # # This visualization enables one to see a pattern that cannot be # communicated through the 2D structure alone. The upper part of the # sarcin-ricin loop consists of only cis (c) oriented glycosidic bonds.
import biotite.structure as struc import biotite.structure.io as strucio import biotite.structure.io.xtc as xtc from biotite.application.dssp import DsspApp # Put here the path of the downloaded files templ_file_path = "../../download/lysozyme_md.pdb" traj_file_path = "../../download/lysozyme_md.xtc" xtc_file = xtc.XTCFile.read(traj_file_path) traj = xtc_file.get_structure(template=strucio.load_structure(templ_file_path)) time = xtc_file.get_time() traj = traj[:, struc.filter_amino_acids(traj)] # DSSP does not assign an SSE to the last residue -> -1 sse = np.empty((traj.shape[0], struc.get_residue_count(traj) - 1), dtype='U1') for idx, frame in enumerate(traj): app = DsspApp(traj[idx]) app.start() app.join() sse[idx] = app.get_sse() # Matplotlib needs numbers to assign colors correctly def sse_to_num(sse): num = np.empty(sse.shape, dtype=int) num[sse == 'C'] = 0 num[sse == 'E'] = 1 num[sse == 'B'] = 2 num[sse == 'S'] = 3 num[sse == 'T'] = 4
else: annotation = "t" if edge == 1: annotation += "W" elif edge == 2: annotation += "H" else: annotation += "S" base_labels[base] = annotation # Create a matplotlib pyplot fig, ax = plt.subplots(figsize=(8.0, 8.0)) # Plot the secondary structure graphics.plot_nucleotide_secondary_structure( ax, base_labels, base_pairs, struc.get_residue_count(nucleotides), bond_color=colors ) # Display the plot plt.show() ######################################################################## # The sarcin-ricin loop is part of the 23s rRNA and is considered # crucial to the ribosome‘s activity. The incorporation of the # Leontis-Westhof nomenclature into the 2D-plot shows how the individual # base pairs are oriented and how their glycosidic bonds are oriented # relative to each other. # # This visualization enables one to see a pattern that cannot be # communicated through the 2D structure alone. The upper part of the
def rmsf_plot(topology, xtc_traj, start_frame=None, stop_frame=None, write_dat_files=None): # Gromacs does not set the element symbol in its PDB files, # but Biotite guesses the element names from the atom names, # emitting a warning template = strucio.load_structure(topology) # The structure still has water and ions, that are not needed for our # calculations, we are only interested in the protein itself # These are removed for the sake of computational speed using a boolean # mask protein_mask = struc.filter_amino_acids(template) template = template[protein_mask] residue_names = struc.get_residues(template)[1] xtc_file = XTCFile() xtc_file.read(xtc_traj, atom_i=np.where(protein_mask)[0], start=start_frame, stop=stop_frame + 1) trajectory = xtc_file.get_structure(template) time = xtc_file.get_time() # Get simulation time for plotting purposes trajectory = struc.remove_pbc(trajectory) trajectory, transform = struc.superimpose(trajectory[0], trajectory) rmsd = struc.rmsd(trajectory[0], trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, rmsd, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(0, 2) ax.set_xlabel("Time (ps)") ax.set_ylabel("RMSD (Å)") figure.tight_layout() radius = struc.gyration_radius(trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, radius, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(14.0, 14.5) ax.set_xlabel("Time (ps)") ax.set_ylabel("Radius of gyration (Å)") figure.tight_layout() # In all models, mask the CA atoms ca_trajectory = trajectory[:, trajectory.atom_name == "CA"] rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) res_count = struc.get_residue_count(trajectory) ax.plot(np.arange(1, res_count + 1), rmsf, color=biotite.colors["dimorange"]) ax.set_xlim(1, res_count) ax.set_ylim(0, 1.5) ax.set_xlabel("Residue") ax.set_ylabel("RMSF (Å)") figure.tight_layout() if write_dat_files == True: # Write RMSD *.dat file frames = np.array(range(start_frame - 1, stop_frame), dtype=int) frames[0] = 0 df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"]) df.index.name = 'Frames' df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w') # Write RMSF *.dat file df1 = pd.DataFrame(data=rmsf, index=residue_names, columns=["RMSF Values"]) df1.index.name = 'Residues' df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w') plt.show()
# during the entire simulation. # # Let's have a look at single amino acids: # Which residues fluctuate most? # For answering this question we calculate the RMSF # (Root mean square fluctuation). # It is similar to the RMSD, but instead of averaging over the atoms # and looking at each time step, we average over the time and look at # each residue. # Usually the average model is taken as reference # (compared to the starting model for RMSD). # # Since side chain atoms fluctuate quite a lot, they are not suitable # for evaluation of the residue flexibility. Therefore, we consider only # CA atoms. # In all models, mask the CA atoms ca_trajectory = trajectory[:, trajectory.atom_name == "CA"] rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) res_count = struc.get_residue_count(trajectory) ax.plot(np.arange(1, res_count + 1), rmsf, color=biotite.colors["dimorange"]) ax.set_xlim(1, res_count) ax.set_ylim(0, 1.5) ax.set_xlabel("Residue") ax.set_ylabel("RMSF (Å)") figure.tight_layout() plt.show()