Esempio n. 1
0
def test_rmsd(stack, as_coord):
    if as_coord:
        stack = stack.coord
    assert struc.rmsd(stack[0], stack).tolist() \
           == pytest.approx([0.0, 25.98076211, 51.96152423])
    assert struc.rmsd(stack[0], stack[1]) \
            == pytest.approx(25.9807621135)
Esempio n. 2
0
def test_superimposition_stack(ca_only):
    """
    Take a structure with multiple models where each model is not
    (optimally) superimposed onto each other.
    Then superimpose and expect an improved RMSD.
    """
    path = join(data_dir, "1l2y.mmtf")
    stack = strucio.load_structure(path)
    fixed = stack[0]
    mobile = stack[1:]
    if ca_only:
        mask = (mobile.atom_name == "CA")
    else:
        mask = None

    fitted, transformation = struc.superimpose(fixed, mobile, mask)

    if ca_only:
        # The superimpositions are better for most cases than the
        # superimpositions in the structure file
        # -> Use average
        assert np.mean(struc.rmsd(fixed, fitted)) \
             < np.mean(struc.rmsd(fixed, mobile))
    else:
        # The superimpositions are better than the superimpositions
        # in the structure file
        assert (struc.rmsd(fixed, fitted) < struc.rmsd(fixed, mobile)).all()
Esempio n. 3
0
def test_superimposition_array(path):
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)
    fixed = pdbx.get_structure(pdbx_file, model=1)
    mobile = fixed.copy()
    mobile = struc.rotate(mobile, (1, 2, 3))
    mobile = struc.translate(mobile, (1, 2, 3))
    fitted, transformation = struc.superimpose(fixed, mobile,
                                               (mobile.atom_name == "CA"))
    assert struc.rmsd(fixed, fitted) == pytest.approx(0)
    fitted = struc.superimpose_apply(mobile, transformation)
    assert struc.rmsd(fixed, fitted) == pytest.approx(0)
Esempio n. 4
0
def test_superimposition_array(path):
    """
    Take a structure and rotate and translate a copy of it, so that they
    are not superimposed anymore.
    Then superimpose these structure onto each other and expect an
    almost perfect match.
    """
    fixed = strucio.load_structure(path, model=1)

    mobile = fixed.copy()
    mobile = struc.rotate(mobile, (1, 2, 3))
    mobile = struc.translate(mobile, (1, 2, 3))

    fitted, transformation = struc.superimpose(fixed, mobile)

    assert struc.rmsd(fixed, fitted) == pytest.approx(0, abs=6e-4)

    fitted = struc.superimpose_apply(mobile, transformation)

    assert struc.rmsd(fixed, fitted) == pytest.approx(0, abs=6e-4)
Esempio n. 5
0
def test_superimposition_stack(ca_only):
    path = join(data_dir, "1l2y.cif")
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)
    stack = pdbx.get_structure(pdbx_file)
    fixed = stack[0]
    mobile = stack[1:]
    if ca_only:
        mask = (mobile.atom_name == "CA")
    else:
        mask = None
    fitted, transformation = struc.superimpose(fixed, mobile, mask)
    if ca_only:
        # The superimpositions are better for most cases than the
        # superimpositions in the structure file
        # -> Use average
        assert np.mean(struc.rmsd(fixed, fitted)) \
             < np.mean(struc.rmsd(fixed, mobile))
    else:
        # The superimpositions are better than the superimpositions
        # in the structure file
        assert (struc.rmsd(fixed, fitted) < struc.rmsd(fixed, mobile)).all()
Esempio n. 6
0
print(" ... writing frame[1] ... ")
frame_1 = template_model.copy()
frame_1.coord = trajectory[1].coord
save_structure("frame_1_coord.pdb", frame_1)
save_structure("frame_1.pdb", trajectory[1])
print(" ... done ... ")

print(" ... writing end frame ...")
frame_end = template_model.copy()
frame_end.coord = trajectory[-1].coord
save_structure("frame_end_coord.pdb", frame_end)
save_structure("frame_end.pdb", trajectory[-1])
print(" ... done ... ")

rmsd_overall = struc.rmsd(trajectory[0], trajectory)
radius_overall = struc.gyration_radius(trajectory)

# kinase left
trajectory_kinase_left, transform = struc.superimpose(
    trajectory_kinase_left[0], trajectory_kinase_left)
rmsd_kinase_left = struc.rmsd(trajectory_kinase_left[0],
                              trajectory_kinase_left)
radius_kinase_left = struc.gyration_radius(trajectory_kinase_left)

# kinase right
trajectory_kinase_right, transform = struc.superimpose(
    trajectory_kinase_right[0], trajectory_kinase_right)
rmsd_kinase_right = struc.rmsd(trajectory_kinase_right[0],
                               trajectory_kinase_right)
radius_kinase_right = struc.gyration_radius(trajectory_kinase_right)
Esempio n. 7
0
# Therefore we simply determine the symbol
# from the first character in the atom name
# Since hydrogens may have leading numbers we simply ignore numbers
for i in range(template.array_length()):
    template.element[i] = re.sub(r"\d", "", template.atom_name[i])[0]
trajectory = strucio.load_structure(traj_file_path, template=template)

########################################################################
# At first we want to see if the simulation converged.
# For this purpose we take the RMSD of a frame compared to the starting
# structure as measure. In order to calculate the RMSD we must
# superimpose all models onto a reference, in this case we choose the
# starting structure. 

trajectory, transform = struc.superimpose(template, trajectory)
rmsd = struc.rmsd(template, trajectory)
# Simulation was 1000 ps long
time = np.linspace(0, 1000, len(trajectory))

figure = plt.figure(figsize=(6,3))
ax = figure.add_subplot(111)
ax.plot(time, rmsd, color=biotite.colors["dimorange"])
ax.set_xlim(0,1000)
ax.set_xlabel("Time (ps)")
ax.set_ylabel("RMSD (Angstrom)")
figure.tight_layout()


########################################################################
# As we can see the simulation seems to converge already in the
# beginning of the simulation. After a few ps the RMSD stays in a range
Esempio n. 8
0
def test_docking(flexible):
    """
    Test :class:`VinaApp` for the case of docking biotin to
    streptavidin.
    The output binding pose should be very similar to the pose in the
    PDB structure.
    """
    # A structure of a straptavidin-biotin complex
    mmtf_file = mmtf.MMTFFile.read(join(data_dir("application"), "2rtg.mmtf"))
    structure = mmtf.get_structure(mmtf_file,
                                   model=1,
                                   extra_fields=["charge"],
                                   include_bonds=True)
    structure = structure[structure.chain_id == "B"]
    receptor = structure[struc.filter_amino_acids(structure)]
    ref_ligand = structure[structure.res_name == "BTN"]
    ref_ligand_coord = ref_ligand.coord

    ligand = info.residue("BTN")
    # Remove hydrogen atom that is missing in ref_ligand
    ligand = ligand[ligand.atom_name != "HO2"]

    if flexible:
        # Two residues within the binding pocket: ASN23, SER88
        flexible_mask = np.isin(receptor.res_id, (23, 88))
    else:
        flexible_mask = None

    app = VinaApp(ligand,
                  receptor,
                  struc.centroid(ref_ligand), [20, 20, 20],
                  flexible=flexible_mask)
    app.set_seed(0)
    app.start()
    app.join()

    test_ligand_coord = app.get_ligand_coord()
    test_receptor_coord = app.get_receptor_coord()
    energies = app.get_energies()
    # One energy value per model
    assert len(test_ligand_coord) == len(energies)
    assert len(test_receptor_coord) == len(energies)

    assert np.all(energies < 0)

    # Select best binding pose
    test_ligand_coord = test_ligand_coord[0]
    not_nan_mask = ~np.isnan(test_ligand_coord).any(axis=-1)
    ref_ligand_coord = ref_ligand_coord[not_nan_mask]
    test_ligand_coord = test_ligand_coord[not_nan_mask]
    # Check if it least one atom is preserved
    assert test_ligand_coord.shape[1] > 0
    rmsd = struc.rmsd(ref_ligand_coord, test_ligand_coord)
    # The deviation of the best pose from the real conformation
    # should be less than 1 Å
    assert rmsd < 1.0

    if flexible:
        # Select best binding pose
        test_receptor_coord = test_receptor_coord[0]
        not_nan_mask = ~np.isnan(test_receptor_coord).any(axis=-1)
        ref_receptor_coord = receptor[not_nan_mask]
        test_receptor_coord = test_receptor_coord[not_nan_mask]
        # Check if it least one atom is preserved
        assert test_receptor_coord.shape[1] > 0
        # The flexible residues should have a maximum deviation of 1 Å
        # from the original conformation
        assert np.max(struc.distance(test_receptor_coord,
                                     ref_receptor_coord)) < 1.0
    else:
        ref_receptor_coord = receptor.coord
        for model_coord in test_receptor_coord:
            assert np.array_equal(model_coord, ref_receptor_coord)
Esempio n. 9
0
# For comparison of the docked pose with the experimentally determined
# reference conformation, the atom order of both must be exactly the
# same
# Therefore, all atoms, that are additional in one of both models,
# e.g. carboxy or nonpolar hydrogen atoms, are removed...
docked_ligand = docked_ligand[
    ..., np.isin(docked_ligand.atom_name, ref_ligand.atom_name)]
docked_ligand = docked_ligand[..., info.standardize_order(docked_ligand)]
# ...and the atom order is standardized
ref_ligand = ref_ligand[np.isin(ref_ligand.atom_name, docked_ligand.atom_name)]
ref_ligand = ref_ligand[info.standardize_order(ref_ligand)]

# Calculate the RMSD of the docked models to the correct binding mode
# No superimposition prior to RMSD calculation, as we want to see
# conformation differences with respect to the binding pocket
rmsd = struc.rmsd(ref_ligand, docked_ligand)

# Evaluate correlation between RMSD and binding energies
correlation, p_value = spearmanr(energies, rmsd)

figure, ax = plt.subplots(figsize=(8.0, 6.0))
ax.set_title(f"$r_s$ = {correlation:.2f} ($p$ = {p_value*100:.1f}%)")
ax.scatter(energies, rmsd, marker="+", color="black")
ax.set_xlabel("Energy (kcal/mol)")
ax.set_ylabel("RMSD (Å)")
figure.tight_layout()
plt.show()

########################################################################
# For this specific case *AutoDock Vina* shows only a low Spearman
# correlation between the RMSD of the calculated models to the
Esempio n. 10
0
def test_rmsd(stack):
    assert struc.rmsd(stack[0], stack).tolist() \
           == pytest.approx([0.0, 25.98076211, 51.96152423])
    assert struc.rmsd(stack[0], stack[1]) \
            == pytest.approx(25.9807621135)
Esempio n. 11
0
def rmsf_plot(topology,
              xtc_traj,
              start_frame=None,
              stop_frame=None,
              write_dat_files=None):
    # Gromacs does not set the element symbol in its PDB files,
    # but Biotite guesses the element names from the atom names,
    # emitting a warning
    template = strucio.load_structure(topology)

    # The structure still has water and ions, that are not needed for our
    # calculations, we are only interested in the protein itself
    # These are removed for the sake of computational speed using a boolean
    # mask
    protein_mask = struc.filter_amino_acids(template)
    template = template[protein_mask]
    residue_names = struc.get_residues(template)[1]

    xtc_file = XTCFile()
    xtc_file.read(xtc_traj,
                  atom_i=np.where(protein_mask)[0],
                  start=start_frame,
                  stop=stop_frame + 1)

    trajectory = xtc_file.get_structure(template)

    time = xtc_file.get_time()  # Get simulation time for plotting purposes

    trajectory = struc.remove_pbc(trajectory)
    trajectory, transform = struc.superimpose(trajectory[0], trajectory)
    rmsd = struc.rmsd(trajectory[0], trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    ax.plot(time, rmsd, color=biotite.colors["dimorange"])
    ax.set_xlim(time[0], time[-1])
    ax.set_ylim(0, 2)
    ax.set_xlabel("Time (ps)")
    ax.set_ylabel("RMSD (Å)")
    figure.tight_layout()

    radius = struc.gyration_radius(trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    ax.plot(time, radius, color=biotite.colors["dimorange"])
    ax.set_xlim(time[0], time[-1])
    ax.set_ylim(14.0, 14.5)
    ax.set_xlabel("Time (ps)")
    ax.set_ylabel("Radius of gyration (Å)")
    figure.tight_layout()

    # In all models, mask the CA atoms
    ca_trajectory = trajectory[:, trajectory.atom_name == "CA"]
    rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    res_count = struc.get_residue_count(trajectory)
    ax.plot(np.arange(1, res_count + 1),
            rmsf,
            color=biotite.colors["dimorange"])
    ax.set_xlim(1, res_count)
    ax.set_ylim(0, 1.5)
    ax.set_xlabel("Residue")
    ax.set_ylabel("RMSF (Å)")
    figure.tight_layout()

    if write_dat_files == True:
        # Write RMSD *.dat file
        frames = np.array(range(start_frame - 1, stop_frame), dtype=int)
        frames[0] = 0
        df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"])
        df.index.name = 'Frames'
        df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w')

        # Write RMSF *.dat file
        df1 = pd.DataFrame(data=rmsf,
                           index=residue_names,
                           columns=["RMSF Values"])
        df1.index.name = 'Residues'
        df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w')
    plt.show()
Esempio n. 12
0
# whole molecule, without periodic boundaries.
# in *Gromacs* we could have used ``gmx trjconv`` for this, but this
# problem can be handled in *Biotite*, too.

trajectory = struc.remove_pbc(trajectory)

########################################################################
# Now our trajectory is ready for some analysis!
# At first we want to see if the simulation converged.
# For this purpose we take the RMSD of a frame compared to the initial
# model as measure. In order to calculate the RMSD we must
# superimpose all models onto a reference, in this case we also choose
# the initial structure.

trajectory, transform = struc.superimpose(trajectory[0], trajectory)
rmsd = struc.rmsd(trajectory[0], trajectory)

figure = plt.figure(figsize=(6, 3))
ax = figure.add_subplot(111)
ax.plot(time, rmsd, color=biotite.colors["dimorange"])
ax.set_xlim(time[0], time[-1])
ax.set_ylim(0, 2)
ax.set_xlabel("Time (ps)")
ax.set_ylabel("RMSD (Å)")
figure.tight_layout()

########################################################################
# As we can see the simulation seems to converge already early in the
# simulation.
# After a about 200 ps the RMSD stays in a range of approx. 2 - 3 Å.
#
Esempio n. 13
0
# that.
# It represents the deviation for each atom in all models relative
# to a reference model, which is usually the averaged structure.
# Since we are only interested in the backbone flexibility, we consider
# only CA atoms.
# Before we can calculate a reasonable RMSF, we have to superimpose each
# model on a reference model (we choose the first model),
# which minimizes the *root mean square deviation* (RMSD).

stack = strucio.load_structure(file_path)
# We consider only CA atoms
stack = stack[:, stack.atom_name == "CA"]
# Superimposing all models of the structure onto the first model
stack, transformation_tuple = struc.superimpose(stack[0], stack)
print("RMSD for each model to first model:")
print(struc.rmsd(stack[0], stack))
# Calculate the RMSF relative to average of all models
rmsf = struc.rmsf(struc.average(stack), stack)
# Plotting stuff
plt.plot(np.arange(1, 21), rmsf)
plt.xlim(0, 20)
plt.xticks(np.arange(1, 21))
plt.xlabel("Residue")
plt.ylabel("RMSF")
plt.show()

########################################################################
# As you can see, both terminal residues are most flexible.
#
# Calculating accessible surface area
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^