Ejemplo n.º 1
0
def test_remove_pbc_selections(multi_model):
    """
    This test makes no assertions, it only test whether an exception
    occurs, when the `selection` parameter is given in `remove_pbc()`.
    """
    array = load_structure(join(data_dir("structure"), "3o5r.mmtf"))
    if multi_model:
        array = struc.stack([array, array])

    struc.remove_pbc(array)
    struc.remove_pbc(array, array.chain_id[0])
    struc.remove_pbc(array, struc.filter_amino_acids(array))
    struc.remove_pbc(
        array, [struc.filter_amino_acids(array), (array.res_name == "FK5")])
    # Expect error when selectinf an atom multiple times
    with pytest.raises(ValueError):
        struc.remove_pbc(
            array,
            [struc.filter_amino_acids(array), (array.atom_name == "CA")])
Ejemplo n.º 2
0
def test_remove_pbc_unsegmented():
    """
    `remove_pbc()` should not alter unsegmented structures,
    when the structure is entirely in the box.
    Exclude the solvent, due to high distances between each atom. 
    """
    ref_array = load_structure(join(data_dir("structure"), "3o5r.mmtf"))
    # Center structure in box
    centroid = struc.centroid(ref_array)
    box_center = np.diag(ref_array.box) / 2
    ref_array = struc.translate(ref_array, box_center - centroid)
    # Remove solvent
    ref_array = ref_array[~struc.filter_solvent(ref_array)]
    array = struc.remove_pbc(ref_array)

    assert ref_array.equal_annotation_categories(array)
    assert np.allclose(ref_array.coord, array.coord)
Ejemplo n.º 3
0
def test_remove_pbc_restore(multi_model, translation_vector):
    CUTOFF = 5.0

    def get_matrices(array):
        """
        Create a periodic and non-periodic adjacency matrix.
        """
        nonlocal CUTOFF
        if isinstance(array, struc.AtomArray):
            matrix     = struc.CellList(array, CUTOFF, periodic=False) \
                        .create_adjacency_matrix(CUTOFF)
            matrix_pbc = struc.CellList(array, CUTOFF, periodic=True) \
                        .create_adjacency_matrix(CUTOFF)
        elif isinstance(array, struc.AtomArrayStack):
            matrix = np.array([
                struc.CellList(model, CUTOFF,
                               periodic=False).create_adjacency_matrix(CUTOFF)
                for model in array
            ])
            matrix_pbc = np.array([
                struc.CellList(model, CUTOFF,
                               periodic=True).create_adjacency_matrix(CUTOFF)
                for model in array
            ])
        return matrix, matrix_pbc

    def assert_equal_matrices(array, matrix1, matrix2, periodic):
        """
        Due to numerical instability, entries in both matrices might
        be different, when the distance of atoms is almost equal to
        the cutoff distance of the matrix.
        This function checks, whether two atoms with unequal entries
        in the matrices are near the cutoff distance.
        """
        nonlocal CUTOFF
        indices = np.where(matrix1 != matrix2)
        for index in range(len(indices[0])):
            if len(indices) == 2:
                # multi_model = False -> AtomArray
                m = None
                i = indices[0][index]
                j = indices[1][index]
                box = array.box if periodic else None
                distance = struc.distance(array[i], array[j], box=box)
            if len(indices) == 3:
                # multi_model = True -> AtomArrayStack
                m = indices[0][index]
                i = indices[1][index]
                j = indices[2][index]
                box = array.box[m] if periodic else None
                distance = struc.distance(array[m, i], array[m, j], box=box)
            try:
                assert distance == pytest.approx(CUTOFF, abs=1e-4)
            except AssertionError:
                print(f"Model {m}, Atoms {i} and {j}")
                raise

    stack = load_structure(join(data_dir("structure"), "1gya.mmtf"))
    stack.box = np.array([
        np.diag(np.max(coord, axis=0) - np.min(coord, axis=0) + 10)
        for coord in stack.coord
    ])
    stack.coord -= np.min(stack.coord, axis=-2)[:, np.newaxis, :] - 5
    if multi_model:
        array = stack
    else:
        array = stack[0]

    # Use adjacency matrices instead of pairwise distances
    # for compuational efficiency
    ref_matrix, ref_matrix_pbc = get_matrices(array)

    array = struc.translate(array, translation_vector)
    array.coord = struc.move_inside_box(array.coord, array.box)
    moved_matrix, moved_matrix_pbc = get_matrices(array)
    # The translation and the periodic move should not
    # alter PBC-aware pairwise distances
    assert_equal_matrices(array, ref_matrix_pbc, moved_matrix_pbc, True)
    # Non-PBC-aware distances should change,
    # otherwise the atoms do not go over the periodic boundary
    # and the test does not make sense
    with pytest.raises(AssertionError):
        assert_equal_matrices(array, ref_matrix, moved_matrix, False)

    array = struc.remove_pbc(array)
    restored_matrix, restored_matrix_pbc = get_matrices(array)
    # Both adjacency matrices should be equal to the original ones,
    # as the structure should be completely restored
    assert_equal_matrices(array, ref_matrix_pbc, restored_matrix_pbc, True)
    assert_equal_matrices(array, ref_matrix, restored_matrix, False)
Ejemplo n.º 4
0
def rmsf_plot(topology,
              xtc_traj,
              start_frame=None,
              stop_frame=None,
              write_dat_files=None):
    # Gromacs does not set the element symbol in its PDB files,
    # but Biotite guesses the element names from the atom names,
    # emitting a warning
    template = strucio.load_structure(topology)

    # The structure still has water and ions, that are not needed for our
    # calculations, we are only interested in the protein itself
    # These are removed for the sake of computational speed using a boolean
    # mask
    protein_mask = struc.filter_amino_acids(template)
    template = template[protein_mask]
    residue_names = struc.get_residues(template)[1]

    xtc_file = XTCFile()
    xtc_file.read(xtc_traj,
                  atom_i=np.where(protein_mask)[0],
                  start=start_frame,
                  stop=stop_frame + 1)

    trajectory = xtc_file.get_structure(template)

    time = xtc_file.get_time()  # Get simulation time for plotting purposes

    trajectory = struc.remove_pbc(trajectory)
    trajectory, transform = struc.superimpose(trajectory[0], trajectory)
    rmsd = struc.rmsd(trajectory[0], trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    ax.plot(time, rmsd, color=biotite.colors["dimorange"])
    ax.set_xlim(time[0], time[-1])
    ax.set_ylim(0, 2)
    ax.set_xlabel("Time (ps)")
    ax.set_ylabel("RMSD (Å)")
    figure.tight_layout()

    radius = struc.gyration_radius(trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    ax.plot(time, radius, color=biotite.colors["dimorange"])
    ax.set_xlim(time[0], time[-1])
    ax.set_ylim(14.0, 14.5)
    ax.set_xlabel("Time (ps)")
    ax.set_ylabel("Radius of gyration (Å)")
    figure.tight_layout()

    # In all models, mask the CA atoms
    ca_trajectory = trajectory[:, trajectory.atom_name == "CA"]
    rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    res_count = struc.get_residue_count(trajectory)
    ax.plot(np.arange(1, res_count + 1),
            rmsf,
            color=biotite.colors["dimorange"])
    ax.set_xlim(1, res_count)
    ax.set_ylim(0, 1.5)
    ax.set_xlabel("Residue")
    ax.set_ylabel("RMSF (Å)")
    figure.tight_layout()

    if write_dat_files == True:
        # Write RMSD *.dat file
        frames = np.array(range(start_frame - 1, stop_frame), dtype=int)
        frames[0] = 0
        df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"])
        df.index.name = 'Frames'
        df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w')

        # Write RMSF *.dat file
        df1 = pd.DataFrame(data=rmsf,
                           index=residue_names,
                           columns=["RMSF Values"])
        df1.index.name = 'Residues'
        df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w')
    plt.show()
Ejemplo n.º 5
0
# This gives us the additional option that allows us to select the
# coordinates belonging to the amino acids.
xtc_file = xtc.XTCFile.read(traj_file_path, atom_i=np.where(protein_mask)[0])
trajectory = xtc_file.get_structure(template)
# Get simulation time for plotting purposes
time = xtc_file.get_time()

########################################################################
# Since the MD simulation used periodic boundaries, the protein might be
# segmented over the box boundary.
# For further analysis we need to reassemble the protein chain into a
# whole molecule, without periodic boundaries.
# in *Gromacs* we could have used ``gmx trjconv`` for this, but this
# problem can be handled in *Biotite*, too.

trajectory = struc.remove_pbc(trajectory)

########################################################################
# Now our trajectory is ready for some analysis!
# At first we want to see if the simulation converged.
# For this purpose we take the RMSD of a frame compared to the initial
# model as measure. In order to calculate the RMSD we must
# superimpose all models onto a reference, in this case we also choose
# the initial structure.

trajectory, transform = struc.superimpose(trajectory[0], trajectory)
rmsd = struc.rmsd(trajectory[0], trajectory)

figure = plt.figure(figsize=(6, 3))
ax = figure.add_subplot(111)
ax.plot(time, rmsd, color=biotite.colors["dimorange"])
Ejemplo n.º 6
0
array = struc.AtomArray(length=100)
print(array.box)
array.box = box
print(array.box)
file_path = rcsb.fetch("3o5r", "mmtf", biotite.temp_dir())
array = strucio.load_structure(file_path)
print(array.box)

########################################################################
# When loading a trajectory from an MD simulation, the molecules are
# often fragmented over the periodic boundary.
# While a lot of analysis functions can handle such periodic boundary
# conditions automatically, some require completed molecules.
# In this case you should use :func:`remove_pbc()`.

array = struc.remove_pbc(array)

########################################################################
# Structure analysis
# ------------------
#
# This package would be almost useless, if there wasn't some means to
# analyze your structures.
# Therefore, *Biotite* offers a bunch of functions for this purpose,
# reaching from simple bond angle and length measurements to more
# complex characteristics, like accessible surface area and
# secondary structure.
# The following section will introduce you to some of these functions,
# which should be applied to that good old structure of *TC5b*.
#
# The examples shown in this section do not represent the full spectrum