def test_rmsd(stack, as_coord): if as_coord: stack = stack.coord assert struc.rmsd(stack[0], stack).tolist() \ == pytest.approx([0.0, 25.98076211, 51.96152423]) assert struc.rmsd(stack[0], stack[1]) \ == pytest.approx(25.9807621135)
def test_superimposition_stack(ca_only): """ Take a structure with multiple models where each model is not (optimally) superimposed onto each other. Then superimpose and expect an improved RMSD. """ path = join(data_dir, "1l2y.mmtf") stack = strucio.load_structure(path) fixed = stack[0] mobile = stack[1:] if ca_only: mask = (mobile.atom_name == "CA") else: mask = None fitted, transformation = struc.superimpose(fixed, mobile, mask) if ca_only: # The superimpositions are better for most cases than the # superimpositions in the structure file # -> Use average assert np.mean(struc.rmsd(fixed, fitted)) \ < np.mean(struc.rmsd(fixed, mobile)) else: # The superimpositions are better than the superimpositions # in the structure file assert (struc.rmsd(fixed, fitted) < struc.rmsd(fixed, mobile)).all()
def test_superimposition_array(path): pdbx_file = pdbx.PDBxFile() pdbx_file.read(path) fixed = pdbx.get_structure(pdbx_file, model=1) mobile = fixed.copy() mobile = struc.rotate(mobile, (1, 2, 3)) mobile = struc.translate(mobile, (1, 2, 3)) fitted, transformation = struc.superimpose(fixed, mobile, (mobile.atom_name == "CA")) assert struc.rmsd(fixed, fitted) == pytest.approx(0) fitted = struc.superimpose_apply(mobile, transformation) assert struc.rmsd(fixed, fitted) == pytest.approx(0)
def test_superimposition_array(path): """ Take a structure and rotate and translate a copy of it, so that they are not superimposed anymore. Then superimpose these structure onto each other and expect an almost perfect match. """ fixed = strucio.load_structure(path, model=1) mobile = fixed.copy() mobile = struc.rotate(mobile, (1, 2, 3)) mobile = struc.translate(mobile, (1, 2, 3)) fitted, transformation = struc.superimpose(fixed, mobile) assert struc.rmsd(fixed, fitted) == pytest.approx(0, abs=6e-4) fitted = struc.superimpose_apply(mobile, transformation) assert struc.rmsd(fixed, fitted) == pytest.approx(0, abs=6e-4)
def test_superimposition_stack(ca_only): path = join(data_dir, "1l2y.cif") pdbx_file = pdbx.PDBxFile() pdbx_file.read(path) stack = pdbx.get_structure(pdbx_file) fixed = stack[0] mobile = stack[1:] if ca_only: mask = (mobile.atom_name == "CA") else: mask = None fitted, transformation = struc.superimpose(fixed, mobile, mask) if ca_only: # The superimpositions are better for most cases than the # superimpositions in the structure file # -> Use average assert np.mean(struc.rmsd(fixed, fitted)) \ < np.mean(struc.rmsd(fixed, mobile)) else: # The superimpositions are better than the superimpositions # in the structure file assert (struc.rmsd(fixed, fitted) < struc.rmsd(fixed, mobile)).all()
print(" ... writing frame[1] ... ") frame_1 = template_model.copy() frame_1.coord = trajectory[1].coord save_structure("frame_1_coord.pdb", frame_1) save_structure("frame_1.pdb", trajectory[1]) print(" ... done ... ") print(" ... writing end frame ...") frame_end = template_model.copy() frame_end.coord = trajectory[-1].coord save_structure("frame_end_coord.pdb", frame_end) save_structure("frame_end.pdb", trajectory[-1]) print(" ... done ... ") rmsd_overall = struc.rmsd(trajectory[0], trajectory) radius_overall = struc.gyration_radius(trajectory) # kinase left trajectory_kinase_left, transform = struc.superimpose( trajectory_kinase_left[0], trajectory_kinase_left) rmsd_kinase_left = struc.rmsd(trajectory_kinase_left[0], trajectory_kinase_left) radius_kinase_left = struc.gyration_radius(trajectory_kinase_left) # kinase right trajectory_kinase_right, transform = struc.superimpose( trajectory_kinase_right[0], trajectory_kinase_right) rmsd_kinase_right = struc.rmsd(trajectory_kinase_right[0], trajectory_kinase_right) radius_kinase_right = struc.gyration_radius(trajectory_kinase_right)
# Therefore we simply determine the symbol # from the first character in the atom name # Since hydrogens may have leading numbers we simply ignore numbers for i in range(template.array_length()): template.element[i] = re.sub(r"\d", "", template.atom_name[i])[0] trajectory = strucio.load_structure(traj_file_path, template=template) ######################################################################## # At first we want to see if the simulation converged. # For this purpose we take the RMSD of a frame compared to the starting # structure as measure. In order to calculate the RMSD we must # superimpose all models onto a reference, in this case we choose the # starting structure. trajectory, transform = struc.superimpose(template, trajectory) rmsd = struc.rmsd(template, trajectory) # Simulation was 1000 ps long time = np.linspace(0, 1000, len(trajectory)) figure = plt.figure(figsize=(6,3)) ax = figure.add_subplot(111) ax.plot(time, rmsd, color=biotite.colors["dimorange"]) ax.set_xlim(0,1000) ax.set_xlabel("Time (ps)") ax.set_ylabel("RMSD (Angstrom)") figure.tight_layout() ######################################################################## # As we can see the simulation seems to converge already in the # beginning of the simulation. After a few ps the RMSD stays in a range
def test_docking(flexible): """ Test :class:`VinaApp` for the case of docking biotin to streptavidin. The output binding pose should be very similar to the pose in the PDB structure. """ # A structure of a straptavidin-biotin complex mmtf_file = mmtf.MMTFFile.read(join(data_dir("application"), "2rtg.mmtf")) structure = mmtf.get_structure(mmtf_file, model=1, extra_fields=["charge"], include_bonds=True) structure = structure[structure.chain_id == "B"] receptor = structure[struc.filter_amino_acids(structure)] ref_ligand = structure[structure.res_name == "BTN"] ref_ligand_coord = ref_ligand.coord ligand = info.residue("BTN") # Remove hydrogen atom that is missing in ref_ligand ligand = ligand[ligand.atom_name != "HO2"] if flexible: # Two residues within the binding pocket: ASN23, SER88 flexible_mask = np.isin(receptor.res_id, (23, 88)) else: flexible_mask = None app = VinaApp(ligand, receptor, struc.centroid(ref_ligand), [20, 20, 20], flexible=flexible_mask) app.set_seed(0) app.start() app.join() test_ligand_coord = app.get_ligand_coord() test_receptor_coord = app.get_receptor_coord() energies = app.get_energies() # One energy value per model assert len(test_ligand_coord) == len(energies) assert len(test_receptor_coord) == len(energies) assert np.all(energies < 0) # Select best binding pose test_ligand_coord = test_ligand_coord[0] not_nan_mask = ~np.isnan(test_ligand_coord).any(axis=-1) ref_ligand_coord = ref_ligand_coord[not_nan_mask] test_ligand_coord = test_ligand_coord[not_nan_mask] # Check if it least one atom is preserved assert test_ligand_coord.shape[1] > 0 rmsd = struc.rmsd(ref_ligand_coord, test_ligand_coord) # The deviation of the best pose from the real conformation # should be less than 1 Å assert rmsd < 1.0 if flexible: # Select best binding pose test_receptor_coord = test_receptor_coord[0] not_nan_mask = ~np.isnan(test_receptor_coord).any(axis=-1) ref_receptor_coord = receptor[not_nan_mask] test_receptor_coord = test_receptor_coord[not_nan_mask] # Check if it least one atom is preserved assert test_receptor_coord.shape[1] > 0 # The flexible residues should have a maximum deviation of 1 Å # from the original conformation assert np.max(struc.distance(test_receptor_coord, ref_receptor_coord)) < 1.0 else: ref_receptor_coord = receptor.coord for model_coord in test_receptor_coord: assert np.array_equal(model_coord, ref_receptor_coord)
# For comparison of the docked pose with the experimentally determined # reference conformation, the atom order of both must be exactly the # same # Therefore, all atoms, that are additional in one of both models, # e.g. carboxy or nonpolar hydrogen atoms, are removed... docked_ligand = docked_ligand[ ..., np.isin(docked_ligand.atom_name, ref_ligand.atom_name)] docked_ligand = docked_ligand[..., info.standardize_order(docked_ligand)] # ...and the atom order is standardized ref_ligand = ref_ligand[np.isin(ref_ligand.atom_name, docked_ligand.atom_name)] ref_ligand = ref_ligand[info.standardize_order(ref_ligand)] # Calculate the RMSD of the docked models to the correct binding mode # No superimposition prior to RMSD calculation, as we want to see # conformation differences with respect to the binding pocket rmsd = struc.rmsd(ref_ligand, docked_ligand) # Evaluate correlation between RMSD and binding energies correlation, p_value = spearmanr(energies, rmsd) figure, ax = plt.subplots(figsize=(8.0, 6.0)) ax.set_title(f"$r_s$ = {correlation:.2f} ($p$ = {p_value*100:.1f}%)") ax.scatter(energies, rmsd, marker="+", color="black") ax.set_xlabel("Energy (kcal/mol)") ax.set_ylabel("RMSD (Å)") figure.tight_layout() plt.show() ######################################################################## # For this specific case *AutoDock Vina* shows only a low Spearman # correlation between the RMSD of the calculated models to the
def test_rmsd(stack): assert struc.rmsd(stack[0], stack).tolist() \ == pytest.approx([0.0, 25.98076211, 51.96152423]) assert struc.rmsd(stack[0], stack[1]) \ == pytest.approx(25.9807621135)
def rmsf_plot(topology, xtc_traj, start_frame=None, stop_frame=None, write_dat_files=None): # Gromacs does not set the element symbol in its PDB files, # but Biotite guesses the element names from the atom names, # emitting a warning template = strucio.load_structure(topology) # The structure still has water and ions, that are not needed for our # calculations, we are only interested in the protein itself # These are removed for the sake of computational speed using a boolean # mask protein_mask = struc.filter_amino_acids(template) template = template[protein_mask] residue_names = struc.get_residues(template)[1] xtc_file = XTCFile() xtc_file.read(xtc_traj, atom_i=np.where(protein_mask)[0], start=start_frame, stop=stop_frame + 1) trajectory = xtc_file.get_structure(template) time = xtc_file.get_time() # Get simulation time for plotting purposes trajectory = struc.remove_pbc(trajectory) trajectory, transform = struc.superimpose(trajectory[0], trajectory) rmsd = struc.rmsd(trajectory[0], trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, rmsd, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(0, 2) ax.set_xlabel("Time (ps)") ax.set_ylabel("RMSD (Å)") figure.tight_layout() radius = struc.gyration_radius(trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, radius, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(14.0, 14.5) ax.set_xlabel("Time (ps)") ax.set_ylabel("Radius of gyration (Å)") figure.tight_layout() # In all models, mask the CA atoms ca_trajectory = trajectory[:, trajectory.atom_name == "CA"] rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) res_count = struc.get_residue_count(trajectory) ax.plot(np.arange(1, res_count + 1), rmsf, color=biotite.colors["dimorange"]) ax.set_xlim(1, res_count) ax.set_ylim(0, 1.5) ax.set_xlabel("Residue") ax.set_ylabel("RMSF (Å)") figure.tight_layout() if write_dat_files == True: # Write RMSD *.dat file frames = np.array(range(start_frame - 1, stop_frame), dtype=int) frames[0] = 0 df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"]) df.index.name = 'Frames' df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w') # Write RMSF *.dat file df1 = pd.DataFrame(data=rmsf, index=residue_names, columns=["RMSF Values"]) df1.index.name = 'Residues' df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w') plt.show()
# whole molecule, without periodic boundaries. # in *Gromacs* we could have used ``gmx trjconv`` for this, but this # problem can be handled in *Biotite*, too. trajectory = struc.remove_pbc(trajectory) ######################################################################## # Now our trajectory is ready for some analysis! # At first we want to see if the simulation converged. # For this purpose we take the RMSD of a frame compared to the initial # model as measure. In order to calculate the RMSD we must # superimpose all models onto a reference, in this case we also choose # the initial structure. trajectory, transform = struc.superimpose(trajectory[0], trajectory) rmsd = struc.rmsd(trajectory[0], trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, rmsd, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(0, 2) ax.set_xlabel("Time (ps)") ax.set_ylabel("RMSD (Å)") figure.tight_layout() ######################################################################## # As we can see the simulation seems to converge already early in the # simulation. # After a about 200 ps the RMSD stays in a range of approx. 2 - 3 Å. #
# that. # It represents the deviation for each atom in all models relative # to a reference model, which is usually the averaged structure. # Since we are only interested in the backbone flexibility, we consider # only CA atoms. # Before we can calculate a reasonable RMSF, we have to superimpose each # model on a reference model (we choose the first model), # which minimizes the *root mean square deviation* (RMSD). stack = strucio.load_structure(file_path) # We consider only CA atoms stack = stack[:, stack.atom_name == "CA"] # Superimposing all models of the structure onto the first model stack, transformation_tuple = struc.superimpose(stack[0], stack) print("RMSD for each model to first model:") print(struc.rmsd(stack[0], stack)) # Calculate the RMSF relative to average of all models rmsf = struc.rmsf(struc.average(stack), stack) # Plotting stuff plt.plot(np.arange(1, 21), rmsf) plt.xlim(0, 20) plt.xticks(np.arange(1, 21)) plt.xlabel("Residue") plt.ylabel("RMSF") plt.show() ######################################################################## # As you can see, both terminal residues are most flexible. # # Calculating accessible surface area # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^