Python get_residues Exemples, biotite.structure.get_residues Python Exemples

Exemple #1

0

Afficher le fichier

def test_get_residues(array):
    ids, names = struc.get_residues(array)
    assert ids.tolist() == list(range(1, 21))
    assert names.tolist() == [
        "ASN", "LEU", "TYR", "ILE", "GLN", "TRP", "LEU", "LYS", "ASP", "GLY",
        "GLY", "PRO", "SER", "SER", "GLY", "ARG", "PRO", "PRO", "PRO", "SER"
    ]
    assert len(ids) == struc.get_residue_count(array)

Exemple #2

0

Afficher le fichier

def get_reference_from_structure(
        structure_path: str,
        positions: t.Optional[t.Container[int]] = None) -> str:
    aa_mapping = AminoAcidDict().aa_dict
    residues = zip(*bst.get_residues(io.load_structure(structure_path)))
    if positions is not None:
        residues = (r for r in residues if r[0] in positions)
    return "".join([aa_mapping[r[1]] for r in residues])

Exemple #3

0

Afficher le fichier

Fichier : residue_chirality.py Projet : simoneperazzoli/biotite

def analyze_chirality(array):
    # Filter backbone + CB
    array = array[struc.filter_amino_acids(array)]
    array = array[(array.atom_name == "CB") | (struc.filter_backbone(array))]
    # Iterate over each residue
    ids, names = struc.get_residues(array)
    enantiomers = np.zeros(len(ids), dtype=int)
    for i, id in enumerate(ids):
        coord = array.coord[array.res_id == id]
        if len(coord) != 4:
            # Glyine -> no chirality
            enantiomers[i] = 0
        else:
            enantiomers[i] = get_enantiomer(coord[0], coord[1], coord[2],
                                            coord[3])
    return enantiomers

Exemple #4

0

Afficher le fichier

Fichier : test_info.py Projet : avestamh/biotite

def test_mass():
    """
    Test whether the mass of a residue is the same as the sum of the
    masses of its contained atoms.
    """
    array = load_structure(join(data_dir, "1l2y.mmtf"))[0]
    _, res_names = struc.get_residues(array)
    water_mass = strucinfo.mass("H") * 2 + strucinfo.mass("O")
    # Mass of water must be subtracted
    masses = [strucinfo.mass(res_name) - water_mass for res_name in res_names]
    # C-terminus normally has additional oxygen atom
    masses[-1] += strucinfo.mass("O")
    ref_masses = [strucinfo.mass(res) for res in struc.residue_iter(array)]
    # Up to three additional/missing hydrogens are allowed
    # (protonation state)
    mass_diff = np.abs(
        np.array(
            [mass - ref_mass for mass, ref_mass in zip(masses, ref_masses)]))
    assert (mass_diff // strucinfo.mass("H") <= 3).all()
    assert np.allclose((mass_diff % strucinfo.mass("H")), 0, atol=5e-3)

Exemple #5

0

Afficher le fichier

THRESHOLD_DISTANCE = 4.0

# Fetch and load structure
mmtf_file = mmtf.MMTFFile()
mmtf_file.read(rcsb.fetch("2or1", "mmtf"))
structure = mmtf.get_structure(mmtf_file, model=1)

# Separate structure into the DNA and the two identical protein chains
dna = structure[np.isin(structure.chain_id, ["A", "B"])
                & (structure.hetero == False)]
protein_l = structure[(structure.chain_id == "L")
                      & (structure.hetero == False)]
protein_r = structure[(structure.chain_id == "R")
                      & (structure.hetero == False)]
# Quick check if the two protein chains are really identical
assert len(struc.get_residues(protein_l)) == len(struc.get_residues(protein_r))

# Fast identification of contacts via a cell list:
# The cell list is initiliazed with the coordinates of the DNA
# and later provided with the atom coordinates of the two protein chains
cell_list = struc.CellList(dna, cell_size=THRESHOLD_DISTANCE)

# Sets to store the residue IDs of contact residues
# for each protein chain
id_set_l = set()
id_set_r = set()

for protein, res_id_set in zip((protein_l, protein_r), (id_set_l, id_set_r)):
    # For each atom in the protein chain,
    # find all atoms in the DNA that are in contact with it
    contacts = cell_list.get_atoms(protein.coord, radius=THRESHOLD_DISTANCE)

Exemple #6

0

Afficher le fichier

                            content = ''.join(f.readlines())

                            query = 'INSERT INTO interfaces_cif (dimer_id, cif_file, insert_time) VALUES(?,?,?)', (
                                dimer_id, content,
                                datetime.strftime(datetime.now(),
                                                  "%Y-%m-%d %H:%M:%S"))
                            sql_queries.append(query)

                        os.remove("%s.cif" % dimer_id)

                        seq_1 = ", ".join(
                            map(
                                str,
                                list(
                                    struc.get_residues(extInterface[4][
                                        extInterface[4].chain_id == comb[0]])
                                    [1])))
                        seq_2 = ", ".join(
                            map(
                                str,
                                list(
                                    struc.get_residues(extInterface[4][
                                        extInterface[4].chain_id == comb[1]])
                                    [1])))

                        query = 'INSERT INTO interfaces_seq (dimer_id, pdb_id, chain_1, chain_2, sequence_1, sequence_2, insert_time) VALUES(?,?,?,?,?,?,?)', (
                            dimer_id, pdb_id, comb[0], comb[1], seq_1, seq_2,
                            datetime.strftime(datetime.now(),
                                              "%Y-%m-%d %H:%M:%S"))
                        sql_queries.append(query)

Exemple #7

0

Afficher le fichier

def rmsf_plot(topology,
              xtc_traj,
              start_frame=None,
              stop_frame=None,
              write_dat_files=None):
    # Gromacs does not set the element symbol in its PDB files,
    # but Biotite guesses the element names from the atom names,
    # emitting a warning
    template = strucio.load_structure(topology)

    # The structure still has water and ions, that are not needed for our
    # calculations, we are only interested in the protein itself
    # These are removed for the sake of computational speed using a boolean
    # mask
    protein_mask = struc.filter_amino_acids(template)
    template = template[protein_mask]
    residue_names = struc.get_residues(template)[1]

    xtc_file = XTCFile()
    xtc_file.read(xtc_traj,
                  atom_i=np.where(protein_mask)[0],
                  start=start_frame,
                  stop=stop_frame + 1)

    trajectory = xtc_file.get_structure(template)

    time = xtc_file.get_time()  # Get simulation time for plotting purposes

    trajectory = struc.remove_pbc(trajectory)
    trajectory, transform = struc.superimpose(trajectory[0], trajectory)
    rmsd = struc.rmsd(trajectory[0], trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    ax.plot(time, rmsd, color=biotite.colors["dimorange"])
    ax.set_xlim(time[0], time[-1])
    ax.set_ylim(0, 2)
    ax.set_xlabel("Time (ps)")
    ax.set_ylabel("RMSD (Å)")
    figure.tight_layout()

    radius = struc.gyration_radius(trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    ax.plot(time, radius, color=biotite.colors["dimorange"])
    ax.set_xlim(time[0], time[-1])
    ax.set_ylim(14.0, 14.5)
    ax.set_xlabel("Time (ps)")
    ax.set_ylabel("Radius of gyration (Å)")
    figure.tight_layout()

    # In all models, mask the CA atoms
    ca_trajectory = trajectory[:, trajectory.atom_name == "CA"]
    rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    res_count = struc.get_residue_count(trajectory)
    ax.plot(np.arange(1, res_count + 1),
            rmsf,
            color=biotite.colors["dimorange"])
    ax.set_xlim(1, res_count)
    ax.set_ylim(0, 1.5)
    ax.set_xlabel("Residue")
    ax.set_ylabel("RMSF (Å)")
    figure.tight_layout()

    if write_dat_files == True:
        # Write RMSD *.dat file
        frames = np.array(range(start_frame - 1, stop_frame), dtype=int)
        frames[0] = 0
        df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"])
        df.index.name = 'Frames'
        df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w')

        # Write RMSF *.dat file
        df1 = pd.DataFrame(data=rmsf,
                           index=residue_names,
                           columns=["RMSF Values"])
        df1.index.name = 'Residues'
        df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w')
    plt.show()

Exemple #8

0

Afficher le fichier

import biotite.database.rcsb as rcsb
import biotite.structure as struc
import biotite.sequence.graphics as graphics
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.patches import Arc
import numpy as np

# Download the PDB file and read the structure
pdb_file_path = rcsb.fetch("4p5j", "pdb", gettempdir())
pdb_file = pdb.PDBFile.read(pdb_file_path)
atom_array = pdb.get_structure(pdb_file)[0]
nucleotides = atom_array[struc.filter_nucleotides(atom_array)]

# Get the residue names and residue ids of the nucleotides
residue_ids, residue_names = struc.get_residues(nucleotides)

# Create a matplotlib pyplot
fig, ax = plt.subplots(figsize=(8.0, 4.5))

# Setup the axis
ax.set_xlim(0.5, len(residue_ids) + 0.5)
ax.set_ylim(0, len(residue_ids) / 2 + 0.5)
ax.set_aspect("equal")
ax.xaxis.set_major_locator(ticker.MultipleLocator(3))
ax.tick_params(axis='both', which='major', labelsize=8)
ax.set_yticks([])

# Remove the frame
plt.box(False)