Esempio n. 1
0
def test_get_residues(array):
    ids, names = struc.get_residues(array)
    assert ids.tolist() == list(range(1, 21))
    assert names.tolist() == [
        "ASN", "LEU", "TYR", "ILE", "GLN", "TRP", "LEU", "LYS", "ASP", "GLY",
        "GLY", "PRO", "SER", "SER", "GLY", "ARG", "PRO", "PRO", "PRO", "SER"
    ]
    assert len(ids) == struc.get_residue_count(array)
Esempio n. 2
0
def get_reference_from_structure(
        structure_path: str,
        positions: t.Optional[t.Container[int]] = None) -> str:
    aa_mapping = AminoAcidDict().aa_dict
    residues = zip(*bst.get_residues(io.load_structure(structure_path)))
    if positions is not None:
        residues = (r for r in residues if r[0] in positions)
    return "".join([aa_mapping[r[1]] for r in residues])
def analyze_chirality(array):
    # Filter backbone + CB
    array = array[struc.filter_amino_acids(array)]
    array = array[(array.atom_name == "CB") | (struc.filter_backbone(array))]
    # Iterate over each residue
    ids, names = struc.get_residues(array)
    enantiomers = np.zeros(len(ids), dtype=int)
    for i, id in enumerate(ids):
        coord = array.coord[array.res_id == id]
        if len(coord) != 4:
            # Glyine -> no chirality
            enantiomers[i] = 0
        else:
            enantiomers[i] = get_enantiomer(coord[0], coord[1], coord[2],
                                            coord[3])
    return enantiomers
Esempio n. 4
0
def test_mass():
    """
    Test whether the mass of a residue is the same as the sum of the
    masses of its contained atoms.
    """
    array = load_structure(join(data_dir, "1l2y.mmtf"))[0]
    _, res_names = struc.get_residues(array)
    water_mass = strucinfo.mass("H") * 2 + strucinfo.mass("O")
    # Mass of water must be subtracted
    masses = [strucinfo.mass(res_name) - water_mass for res_name in res_names]
    # C-terminus normally has additional oxygen atom
    masses[-1] += strucinfo.mass("O")
    ref_masses = [strucinfo.mass(res) for res in struc.residue_iter(array)]
    # Up to three additional/missing hydrogens are allowed
    # (protonation state)
    mass_diff = np.abs(
        np.array(
            [mass - ref_mass for mass, ref_mass in zip(masses, ref_masses)]))
    assert (mass_diff // strucinfo.mass("H") <= 3).all()
    assert np.allclose((mass_diff % strucinfo.mass("H")), 0, atol=5e-3)
Esempio n. 5
0
THRESHOLD_DISTANCE = 4.0

# Fetch and load structure
mmtf_file = mmtf.MMTFFile()
mmtf_file.read(rcsb.fetch("2or1", "mmtf"))
structure = mmtf.get_structure(mmtf_file, model=1)

# Separate structure into the DNA and the two identical protein chains
dna = structure[np.isin(structure.chain_id, ["A", "B"])
                & (structure.hetero == False)]
protein_l = structure[(structure.chain_id == "L")
                      & (structure.hetero == False)]
protein_r = structure[(structure.chain_id == "R")
                      & (structure.hetero == False)]
# Quick check if the two protein chains are really identical
assert len(struc.get_residues(protein_l)) == len(struc.get_residues(protein_r))

# Fast identification of contacts via a cell list:
# The cell list is initiliazed with the coordinates of the DNA
# and later provided with the atom coordinates of the two protein chains
cell_list = struc.CellList(dna, cell_size=THRESHOLD_DISTANCE)

# Sets to store the residue IDs of contact residues
# for each protein chain
id_set_l = set()
id_set_r = set()

for protein, res_id_set in zip((protein_l, protein_r), (id_set_l, id_set_r)):
    # For each atom in the protein chain,
    # find all atoms in the DNA that are in contact with it
    contacts = cell_list.get_atoms(protein.coord, radius=THRESHOLD_DISTANCE)
Esempio n. 6
0
                            content = ''.join(f.readlines())

                            query = 'INSERT INTO interfaces_cif (dimer_id, cif_file, insert_time) VALUES(?,?,?)', (
                                dimer_id, content,
                                datetime.strftime(datetime.now(),
                                                  "%Y-%m-%d %H:%M:%S"))
                            sql_queries.append(query)

                        os.remove("%s.cif" % dimer_id)

                        seq_1 = ", ".join(
                            map(
                                str,
                                list(
                                    struc.get_residues(extInterface[4][
                                        extInterface[4].chain_id == comb[0]])
                                    [1])))
                        seq_2 = ", ".join(
                            map(
                                str,
                                list(
                                    struc.get_residues(extInterface[4][
                                        extInterface[4].chain_id == comb[1]])
                                    [1])))

                        query = 'INSERT INTO interfaces_seq (dimer_id, pdb_id, chain_1, chain_2, sequence_1, sequence_2, insert_time) VALUES(?,?,?,?,?,?,?)', (
                            dimer_id, pdb_id, comb[0], comb[1], seq_1, seq_2,
                            datetime.strftime(datetime.now(),
                                              "%Y-%m-%d %H:%M:%S"))
                        sql_queries.append(query)
Esempio n. 7
0
def rmsf_plot(topology,
              xtc_traj,
              start_frame=None,
              stop_frame=None,
              write_dat_files=None):
    # Gromacs does not set the element symbol in its PDB files,
    # but Biotite guesses the element names from the atom names,
    # emitting a warning
    template = strucio.load_structure(topology)

    # The structure still has water and ions, that are not needed for our
    # calculations, we are only interested in the protein itself
    # These are removed for the sake of computational speed using a boolean
    # mask
    protein_mask = struc.filter_amino_acids(template)
    template = template[protein_mask]
    residue_names = struc.get_residues(template)[1]

    xtc_file = XTCFile()
    xtc_file.read(xtc_traj,
                  atom_i=np.where(protein_mask)[0],
                  start=start_frame,
                  stop=stop_frame + 1)

    trajectory = xtc_file.get_structure(template)

    time = xtc_file.get_time()  # Get simulation time for plotting purposes

    trajectory = struc.remove_pbc(trajectory)
    trajectory, transform = struc.superimpose(trajectory[0], trajectory)
    rmsd = struc.rmsd(trajectory[0], trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    ax.plot(time, rmsd, color=biotite.colors["dimorange"])
    ax.set_xlim(time[0], time[-1])
    ax.set_ylim(0, 2)
    ax.set_xlabel("Time (ps)")
    ax.set_ylabel("RMSD (Å)")
    figure.tight_layout()

    radius = struc.gyration_radius(trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    ax.plot(time, radius, color=biotite.colors["dimorange"])
    ax.set_xlim(time[0], time[-1])
    ax.set_ylim(14.0, 14.5)
    ax.set_xlabel("Time (ps)")
    ax.set_ylabel("Radius of gyration (Å)")
    figure.tight_layout()

    # In all models, mask the CA atoms
    ca_trajectory = trajectory[:, trajectory.atom_name == "CA"]
    rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    res_count = struc.get_residue_count(trajectory)
    ax.plot(np.arange(1, res_count + 1),
            rmsf,
            color=biotite.colors["dimorange"])
    ax.set_xlim(1, res_count)
    ax.set_ylim(0, 1.5)
    ax.set_xlabel("Residue")
    ax.set_ylabel("RMSF (Å)")
    figure.tight_layout()

    if write_dat_files == True:
        # Write RMSD *.dat file
        frames = np.array(range(start_frame - 1, stop_frame), dtype=int)
        frames[0] = 0
        df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"])
        df.index.name = 'Frames'
        df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w')

        # Write RMSF *.dat file
        df1 = pd.DataFrame(data=rmsf,
                           index=residue_names,
                           columns=["RMSF Values"])
        df1.index.name = 'Residues'
        df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w')
    plt.show()
Esempio n. 8
0
import biotite.database.rcsb as rcsb
import biotite.structure as struc
import biotite.sequence.graphics as graphics
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.patches import Arc
import numpy as np

# Download the PDB file and read the structure
pdb_file_path = rcsb.fetch("4p5j", "pdb", gettempdir())
pdb_file = pdb.PDBFile.read(pdb_file_path)
atom_array = pdb.get_structure(pdb_file)[0]
nucleotides = atom_array[struc.filter_nucleotides(atom_array)]

# Get the residue names and residue ids of the nucleotides
residue_ids, residue_names = struc.get_residues(nucleotides)

# Create a matplotlib pyplot
fig, ax = plt.subplots(figsize=(8.0, 4.5))

# Setup the axis
ax.set_xlim(0.5, len(residue_ids) + 0.5)
ax.set_ylim(0, len(residue_ids) / 2 + 0.5)
ax.set_aspect("equal")
ax.xaxis.set_major_locator(ticker.MultipleLocator(3))
ax.tick_params(axis='both', which='major', labelsize=8)
ax.set_yticks([])

# Remove the frame
plt.box(False)