Exemple #1
0
def test_masked_superimposition(seed):
    """
    Take two models of the same structure and superimpose based on a
    single, randomly chosen atom.
    Since two atoms can be superimposed perfectly, the distance between
    the atom in both models should be 0.
    """

    path = join(data_dir, "1l2y.mmtf")
    fixed = strucio.load_structure(path, model=1)
    mobile = strucio.load_structure(path, model=2)

    # Create random mask for a single atom
    np.random.seed(seed)
    mask = np.full(fixed.array_length(), False)
    mask[np.random.randint(fixed.array_length())] = True

    # The distance between the atom in both models should not be
    # already 0 prior to superimposition
    assert struc.distance(fixed[mask], mobile[mask])[0] \
        != pytest.approx(0, abs=5e-4)

    fitted, transformation = struc.superimpose(fixed, mobile, mask)

    assert struc.distance(fixed[mask], fitted[mask])[0] \
        == pytest.approx(0, abs=5e-4)

    fitted = struc.superimpose_apply(mobile, transformation)

    struc.distance(fixed[mask], fitted[mask])[0] \
        == pytest.approx(0, abs=5e-4)
Exemple #2
0
def test_superimposition_stack(ca_only):
    """
    Take a structure with multiple models where each model is not
    (optimally) superimposed onto each other.
    Then superimpose and expect an improved RMSD.
    """
    path = join(data_dir, "1l2y.mmtf")
    stack = strucio.load_structure(path)
    fixed = stack[0]
    mobile = stack[1:]
    if ca_only:
        mask = (mobile.atom_name == "CA")
    else:
        mask = None

    fitted, transformation = struc.superimpose(fixed, mobile, mask)

    if ca_only:
        # The superimpositions are better for most cases than the
        # superimpositions in the structure file
        # -> Use average
        assert np.mean(struc.rmsd(fixed, fitted)) \
             < np.mean(struc.rmsd(fixed, mobile))
    else:
        # The superimpositions are better than the superimpositions
        # in the structure file
        assert (struc.rmsd(fixed, fitted) < struc.rmsd(fixed, mobile)).all()
Exemple #3
0
def test_superimposition_array(path):
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)
    fixed = pdbx.get_structure(pdbx_file, model=1)
    mobile = fixed.copy()
    mobile = struc.rotate(mobile, (1, 2, 3))
    mobile = struc.translate(mobile, (1, 2, 3))
    fitted, transformation = struc.superimpose(fixed, mobile,
                                               (mobile.atom_name == "CA"))
    assert struc.rmsd(fixed, fitted) == pytest.approx(0)
    fitted = struc.superimpose_apply(mobile, transformation)
    assert struc.rmsd(fixed, fitted) == pytest.approx(0)
Exemple #4
0
def test_superimposition_array(path):
    """
    Take a structure and rotate and translate a copy of it, so that they
    are not superimposed anymore.
    Then superimpose these structure onto each other and expect an
    almost perfect match.
    """
    fixed = strucio.load_structure(path, model=1)

    mobile = fixed.copy()
    mobile = struc.rotate(mobile, (1, 2, 3))
    mobile = struc.translate(mobile, (1, 2, 3))

    fitted, transformation = struc.superimpose(fixed, mobile)

    assert struc.rmsd(fixed, fitted) == pytest.approx(0, abs=6e-4)

    fitted = struc.superimpose_apply(mobile, transformation)

    assert struc.rmsd(fixed, fitted) == pytest.approx(0, abs=6e-4)
Exemple #5
0
def test_superimposition_stack(ca_only):
    path = join(data_dir, "1l2y.cif")
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)
    stack = pdbx.get_structure(pdbx_file)
    fixed = stack[0]
    mobile = stack[1:]
    if ca_only:
        mask = (mobile.atom_name == "CA")
    else:
        mask = None
    fitted, transformation = struc.superimpose(fixed, mobile, mask)
    if ca_only:
        # The superimpositions are better for most cases than the
        # superimpositions in the structure file
        # -> Use average
        assert np.mean(struc.rmsd(fixed, fitted)) \
             < np.mean(struc.rmsd(fixed, mobile))
    else:
        # The superimpositions are better than the superimpositions
        # in the structure file
        assert (struc.rmsd(fixed, fitted) < struc.rmsd(fixed, mobile)).all()
save_structure("frame_1_coord.pdb", frame_1)
save_structure("frame_1.pdb", trajectory[1])
print(" ... done ... ")

print(" ... writing end frame ...")
frame_end = template_model.copy()
frame_end.coord = trajectory[-1].coord
save_structure("frame_end_coord.pdb", frame_end)
save_structure("frame_end.pdb", trajectory[-1])
print(" ... done ... ")

rmsd_overall = struc.rmsd(trajectory[0], trajectory)
radius_overall = struc.gyration_radius(trajectory)

# kinase left
trajectory_kinase_left, transform = struc.superimpose(
    trajectory_kinase_left[0], trajectory_kinase_left)
rmsd_kinase_left = struc.rmsd(trajectory_kinase_left[0],
                              trajectory_kinase_left)
radius_kinase_left = struc.gyration_radius(trajectory_kinase_left)

# kinase right
trajectory_kinase_right, transform = struc.superimpose(
    trajectory_kinase_right[0], trajectory_kinase_right)
rmsd_kinase_right = struc.rmsd(trajectory_kinase_right[0],
                               trajectory_kinase_right)
radius_kinase_right = struc.gyration_radius(trajectory_kinase_right)

figure, (ax1, ax2) = plt.subplots(2, 1)

ax1.plot(time, rmsd_kinase_left, color=biotite.colors["dimorange"])
ax1.set_xlim(time[0], time[-1])
Exemple #7
0
                                       coord[np.newaxis, :])
            clashed = distances < vdw_radii_mean
            for clash_atom1, clash_atom2 in zip(*np.where(clashed)):
                if clash_atom1 == clash_atom2:
                    # Ignore distance of an atom to itself
                    continue
                if (clash_atom1, clash_atom2) not in bond_list:
                    # Nonbonded atoms clash
                    # -> structure is not accepted
                    accepted = False
    rotamer_coord[i] = coord
rotamers = struc.from_template(residue, rotamer_coord)

### Superimpose backbone onto first model for better visualization ###
rotamers, _ = struc.superimpose(rotamers[0],
                                rotamers,
                                atom_mask=struc.filter_backbone(rotamers))

### Visualize rotamers ###
colors = np.zeros((residue.array_length(), 3))
colors[residue.element == "H"] = (0.8, 0.8, 0.8)  # gray
colors[residue.element == "C"] = (0.0, 0.8, 0.0)  # green
colors[residue.element == "N"] = (0.0, 0.0, 0.8)  # blue
colors[residue.element == "O"] = (0.8, 0.0, 0.0)  # red

# For consistency, each subplot has the same box size
coord = rotamers.coord
size = np.array([
    coord[:, :, 0].max() - coord[:, :, 0].min(), coord[:, :, 1].max() -
    coord[:, :, 1].min(), coord[:, :, 2].max() - coord[:, :, 2].min()
]).max() * 0.5
Exemple #8
0
# Gromacs does not set the element symbol in its PDB files
# Therefore we simply determine the symbol
# from the first character in the atom name
# Since hydrogens may have leading numbers we simply ignore numbers
for i in range(template.array_length()):
    template.element[i] = re.sub(r"\d", "", template.atom_name[i])[0]
trajectory = strucio.load_structure(traj_file_path, template=template)

########################################################################
# At first we want to see if the simulation converged.
# For this purpose we take the RMSD of a frame compared to the starting
# structure as measure. In order to calculate the RMSD we must
# superimpose all models onto a reference, in this case we choose the
# starting structure. 

trajectory, transform = struc.superimpose(template, trajectory)
rmsd = struc.rmsd(template, trajectory)
# Simulation was 1000 ps long
time = np.linspace(0, 1000, len(trajectory))

figure = plt.figure(figsize=(6,3))
ax = figure.add_subplot(111)
ax.plot(time, rmsd, color=biotite.colors["dimorange"])
ax.set_xlim(0,1000)
ax.set_xlabel("Time (ps)")
ax.set_ylabel("RMSD (Angstrom)")
figure.tight_layout()


########################################################################
# As we can see the simulation seems to converge already in the
ku_file = biotite.temp_file("ku.cif")

# Download and parse structure files
file = rcsb.fetch("1JEY", "mmtf", biotite.temp_dir())
ku_dna = strucio.load_structure(file)
file = rcsb.fetch("1JEQ", "mmtf", biotite.temp_dir())
ku = strucio.load_structure(file)
# Remove DNA and water
ku_dna = ku_dna[(ku_dna.chain_id == "A") | (ku_dna.chain_id == "B")]
ku_dna = ku_dna[~struc.filter_solvent(ku_dna)]
ku = ku[~struc.filter_solvent(ku)]
# The structures have a differing amount of atoms missing
# at the the start and end of the structure
# -> Find common structure
ku_dna_common = ku_dna[struc.filter_intersection(ku_dna, ku)]
ku_common = ku[struc.filter_intersection(ku, ku_dna)]
# Superimpose
ku_superimposed, transformation = struc.superimpose(
    ku_dna_common, ku_common, (ku_common.atom_name == "CA"))
# We do not want the cropped structures
# -> apply superimposition on structures before intersection filtering
ku_superimposed = struc.superimpose_apply(ku, transformation)
# Write PDBx files as input for PyMOL
cif_file = pdbx.PDBxFile()
pdbx.set_structure(cif_file, ku_dna, data_block="ku_dna")
cif_file.write(ku_dna_file)
cif_file = pdbx.PDBxFile()
pdbx.set_structure(cif_file, ku_superimposed, data_block="ku")
cif_file.write(ku_file)
# Visualization with PyMOL...
# biotite_static_image = ku_superimposition.png
Exemple #10
0
def rmsf_plot(topology,
              xtc_traj,
              start_frame=None,
              stop_frame=None,
              write_dat_files=None):
    # Gromacs does not set the element symbol in its PDB files,
    # but Biotite guesses the element names from the atom names,
    # emitting a warning
    template = strucio.load_structure(topology)

    # The structure still has water and ions, that are not needed for our
    # calculations, we are only interested in the protein itself
    # These are removed for the sake of computational speed using a boolean
    # mask
    protein_mask = struc.filter_amino_acids(template)
    template = template[protein_mask]
    residue_names = struc.get_residues(template)[1]

    xtc_file = XTCFile()
    xtc_file.read(xtc_traj,
                  atom_i=np.where(protein_mask)[0],
                  start=start_frame,
                  stop=stop_frame + 1)

    trajectory = xtc_file.get_structure(template)

    time = xtc_file.get_time()  # Get simulation time for plotting purposes

    trajectory = struc.remove_pbc(trajectory)
    trajectory, transform = struc.superimpose(trajectory[0], trajectory)
    rmsd = struc.rmsd(trajectory[0], trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    ax.plot(time, rmsd, color=biotite.colors["dimorange"])
    ax.set_xlim(time[0], time[-1])
    ax.set_ylim(0, 2)
    ax.set_xlabel("Time (ps)")
    ax.set_ylabel("RMSD (Å)")
    figure.tight_layout()

    radius = struc.gyration_radius(trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    ax.plot(time, radius, color=biotite.colors["dimorange"])
    ax.set_xlim(time[0], time[-1])
    ax.set_ylim(14.0, 14.5)
    ax.set_xlabel("Time (ps)")
    ax.set_ylabel("Radius of gyration (Å)")
    figure.tight_layout()

    # In all models, mask the CA atoms
    ca_trajectory = trajectory[:, trajectory.atom_name == "CA"]
    rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    res_count = struc.get_residue_count(trajectory)
    ax.plot(np.arange(1, res_count + 1),
            rmsf,
            color=biotite.colors["dimorange"])
    ax.set_xlim(1, res_count)
    ax.set_ylim(0, 1.5)
    ax.set_xlabel("Residue")
    ax.set_ylabel("RMSF (Å)")
    figure.tight_layout()

    if write_dat_files == True:
        # Write RMSD *.dat file
        frames = np.array(range(start_frame - 1, stop_frame), dtype=int)
        frames[0] = 0
        df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"])
        df.index.name = 'Frames'
        df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w')

        # Write RMSF *.dat file
        df1 = pd.DataFrame(data=rmsf,
                           index=residue_names,
                           columns=["RMSF Values"])
        df1.index.name = 'Residues'
        df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w')
    plt.show()
def assemble_peptide(sequence):
    res_names = [seq.ProteinSequence.convert_letter_1to3(r) for r in sequence]
    peptide = struc.AtomArray(length=0)

    for res_id, res_name, connect_angle in zip(
            np.arange(1,
                      len(res_names) + 1), res_names,
            itertools.cycle([120, -120])):
        # Create backbone
        atom_n = struc.Atom([0.0, 0.0, 0.0], atom_name="N", element="N")

        atom_ca = struc.Atom([0.0, N_CA_LENGTH, 0.0],
                             atom_name="CA",
                             element="C")

        coord_c = calculate_atom_coord_by_z_rotation(atom_ca.coord,
                                                     atom_n.coord, 120,
                                                     CA_C_LENGTH)
        atom_c = struc.Atom(coord_c, atom_name="C", element="C")

        coord_o = calculate_atom_coord_by_z_rotation(atom_c.coord,
                                                     atom_ca.coord, 120,
                                                     C_O_DOUBLE_LENGTH)
        atom_o = struc.Atom(coord_o, atom_name="O", element="O")

        coord_h = calculate_atom_coord_by_z_rotation(atom_n.coord,
                                                     atom_ca.coord, -120,
                                                     N_H_LENGTH)
        atom_h = struc.Atom(coord_h, atom_name="H", element="H")

        backbone = struc.array([atom_n, atom_ca, atom_c, atom_o, atom_h])
        backbone.res_id[:] = res_id
        backbone.res_name[:] = res_name

        # Add bonds between backbone atoms
        bonds = struc.BondList(backbone.array_length())
        bonds.add_bond(0, 1, struc.BondType.SINGLE)  # N-CA
        bonds.add_bond(1, 2, struc.BondType.SINGLE)  # CA-C
        bonds.add_bond(2, 3, struc.BondType.DOUBLE)  # C-O
        bonds.add_bond(0, 4, struc.BondType.SINGLE)  # N-H
        backbone.bonds = bonds

        # Get residue from dataset
        residue = info.residue(res_name)
        # Superimpose backbone of residue
        # with backbone created previously
        _, transformation = struc.superimpose(
            backbone[struc.filter_backbone(backbone)],
            residue[struc.filter_backbone(residue)])
        residue = struc.superimpose_apply(residue, transformation)
        # Remove backbone atoms from residue because they are already
        # existing in the backbone created prevoisly
        side_chain = residue[~np.isin(
            residue.
            atom_name, ["N", "CA", "C", "O", "OXT", "H", "H2", "H3", "HXT"])]

        # Assemble backbone with side chain (including HA)
        # and set annotation arrays
        residue = backbone + side_chain
        residue.bonds.add_bond(
            np.where(residue.atom_name == "CA")[0][0],
            np.where(residue.atom_name == "CB")[0][0], struc.BondType.SINGLE)
        residue.bonds.add_bond(
            np.where(residue.atom_name == "CA")[0][0],
            np.where(residue.atom_name == "HA")[0][0], struc.BondType.SINGLE)
        residue.chain_id[:] = "A"
        residue.res_id[:] = res_id
        residue.res_name[:] = res_name
        peptide += residue

        # Connect current residue to existing residues in the chain
        if res_id > 1:
            index_prev_ca = np.where((peptide.res_id == res_id - 1)
                                     & (peptide.atom_name == "CA"))[0][0]
            index_prev_c = np.where((peptide.res_id == res_id - 1)
                                    & (peptide.atom_name == "C"))[0][0]
            index_curr_n = np.where((peptide.res_id == res_id)
                                    & (peptide.atom_name == "N"))[0][0]
            index_curr_c = np.where((peptide.res_id == res_id)
                                    & (peptide.atom_name == "C"))[0][0]
            curr_residue_mask = peptide.res_id == res_id

            # Adjust geometry
            curr_coord_n = calculate_atom_coord_by_z_rotation(
                peptide.coord[index_prev_c], peptide.coord[index_prev_ca],
                connect_angle, C_N_LENGTH)
            peptide.coord[curr_residue_mask] -= peptide.coord[index_curr_n]
            peptide.coord[curr_residue_mask] += curr_coord_n
            # Adjacent residues should show in opposing directions
            # -> rotate residues with even residue ID by 180 degrees
            if res_id % 2 == 0:
                coord_n = peptide.coord[index_curr_n]
                coord_c = peptide.coord[index_curr_c]
                peptide.coord[curr_residue_mask] = struc.rotate_about_axis(
                    atoms=peptide.coord[curr_residue_mask],
                    axis=coord_c - coord_n,
                    angle=np.deg2rad(180),
                    support=coord_n)

            # Add bond between previous C and current N
            peptide.bonds.add_bond(index_prev_c, index_curr_n,
                                   struc.BondType.SINGLE)

    # Add N-terminal hydrogen
    atom_n = peptide[(peptide.res_id == 1) & (peptide.atom_name == "N")][0]
    atom_h = peptide[(peptide.res_id == 1) & (peptide.atom_name == "H")][0]
    coord_h2 = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_h.coord,
                                                  -120, N_H_LENGTH)
    atom_h2 = struc.Atom(coord_h2,
                         chain_id="A",
                         res_id=1,
                         res_name=atom_h.res_name,
                         atom_name="H2",
                         element="H")
    peptide = struc.array([atom_h2]) + peptide
    peptide.bonds.add_bond(0, 1, struc.BondType.SINGLE)  # H2-N

    # Add C-terminal hydroxyl group
    last_id = len(sequence)
    index_c = np.where((peptide.res_id == last_id)
                       & (peptide.atom_name == "C"))[0][0]
    index_o = np.where((peptide.res_id == last_id)
                       & (peptide.atom_name == "O"))[0][0]
    coord_oxt = calculate_atom_coord_by_z_rotation(peptide.coord[index_c],
                                                   peptide.coord[index_o],
                                                   connect_angle, C_O_LENGTH)
    coord_hxt = calculate_atom_coord_by_z_rotation(coord_oxt,
                                                   peptide.coord[index_c],
                                                   connect_angle, O_H_LENGTH)
    atom_oxt = struc.Atom(coord_oxt,
                          chain_id="A",
                          res_id=last_id,
                          res_name=peptide[index_c].res_name,
                          atom_name="OXT",
                          element="O")
    atom_hxt = struc.Atom(coord_hxt,
                          chain_id="A",
                          res_id=last_id,
                          res_name=peptide[index_c].res_name,
                          atom_name="HXT",
                          element="H")
    peptide = peptide + struc.array([atom_oxt, atom_hxt])
    peptide.bonds.add_bond(index_c, -2, struc.BondType.SINGLE)  # C-OXT
    peptide.bonds.add_bond(-2, -1, struc.BondType.SINGLE)  # OXT-HXT

    return peptide
# For further analysis we need to reassemble the protein chain into a
# whole molecule, without periodic boundaries.
# in *Gromacs* we could have used ``gmx trjconv`` for this, but this
# problem can be handled in *Biotite*, too.

trajectory = struc.remove_pbc(trajectory)

########################################################################
# Now our trajectory is ready for some analysis!
# At first we want to see if the simulation converged.
# For this purpose we take the RMSD of a frame compared to the initial
# model as measure. In order to calculate the RMSD we must
# superimpose all models onto a reference, in this case we also choose
# the initial structure.

trajectory, transform = struc.superimpose(trajectory[0], trajectory)
rmsd = struc.rmsd(trajectory[0], trajectory)

figure = plt.figure(figsize=(6, 3))
ax = figure.add_subplot(111)
ax.plot(time, rmsd, color=biotite.colors["dimorange"])
ax.set_xlim(time[0], time[-1])
ax.set_ylim(0, 2)
ax.set_xlabel("Time (ps)")
ax.set_ylabel("RMSD (Å)")
figure.tight_layout()

########################################################################
# As we can see the simulation seems to converge already early in the
# simulation.
# After a about 200 ps the RMSD stays in a range of approx. 2 - 3 Å.
Exemple #13
0
# Now we want to calculate a measure of flexibility for each residue in
# *TC5b*. The *root mean square fluctuation* (RMSF) is a good value for
# that.
# It represents the deviation for each atom in all models relative
# to a reference model, which is usually the averaged structure.
# Since we are only interested in the backbone flexibility, we consider
# only CA atoms.
# Before we can calculate a reasonable RMSF, we have to superimpose each
# model on a reference model (we choose the first model),
# which minimizes the *root mean square deviation* (RMSD).

stack = strucio.load_structure(file_path)
# We consider only CA atoms
stack = stack[:, stack.atom_name == "CA"]
# Superimposing all models of the structure onto the first model
stack, transformation_tuple = struc.superimpose(stack[0], stack)
print("RMSD for each model to first model:")
print(struc.rmsd(stack[0], stack))
# Calculate the RMSF relative to average of all models
rmsf = struc.rmsf(struc.average(stack), stack)
# Plotting stuff
plt.plot(np.arange(1, 21), rmsf)
plt.xlim(0, 20)
plt.xticks(np.arange(1, 21))
plt.xlabel("Residue")
plt.ylabel("RMSF")
plt.show()

########################################################################
# As you can see, both terminal residues are most flexible.
#
Exemple #14
0
    template_dimer = strucio.load_structure(
        "dimer_refined/pk_mono_sur_di_0001_000001_0001.pdb")

    print(" ... loading XTC files ... ")
    xtc_dimer = xtc.XTCFile()
    xtc_dimer.read("dimers_ordered_by_cleaned.xtc")  #, 1, 10)
    print(" ... done ... ")
    print("")
    print("")

    trajectory_dimer = xtc_dimer.get_structure(template_dimer)

    pkcs_start = trajectory_dimer[0][trajectory_dimer[0].chain_id == 'A']
    pkcs_start = pkcs_start[pkcs_start.atom_name == "CA"]
    trajectory_dimer, transform = struc.superimpose(
        trajectory_dimer[0], trajectory_dimer,
        (trajectory_dimer[0].chain_id == 'A')
        & (trajectory_dimer[0].atom_name == 'CA'))

    trajectory_dimer_ca = trajectory_dimer[:,
                                           (trajectory_dimer.atom_name == "CA")
                                           &
                                           ((trajectory_dimer.res_id < 3206) |
                                            (trajectory_dimer.res_id > 3226))]
    trajectory_dimer_activesite = trajectory_dimer[:, (
        trajectory_dimer.res_id >= 3747)
                                                   & (trajectory_dimer.
                                                      res_id <= 4015)]
    trajectory_dimer_survivin_1 = trajectory_dimer[:,
                                                   trajectory_dimer.chain_id ==
                                                   'B']
    trajectory_dimer_survivin_2 = trajectory_dimer[:,