def test_get_residues(array): ids, names = struc.get_residues(array) assert ids.tolist() == list(range(1, 21)) assert names.tolist() == [ "ASN", "LEU", "TYR", "ILE", "GLN", "TRP", "LEU", "LYS", "ASP", "GLY", "GLY", "PRO", "SER", "SER", "GLY", "ARG", "PRO", "PRO", "PRO", "SER" ] assert len(ids) == struc.get_residue_count(array)
def get_reference_from_structure( structure_path: str, positions: t.Optional[t.Container[int]] = None) -> str: aa_mapping = AminoAcidDict().aa_dict residues = zip(*bst.get_residues(io.load_structure(structure_path))) if positions is not None: residues = (r for r in residues if r[0] in positions) return "".join([aa_mapping[r[1]] for r in residues])
def analyze_chirality(array): # Filter backbone + CB array = array[struc.filter_amino_acids(array)] array = array[(array.atom_name == "CB") | (struc.filter_backbone(array))] # Iterate over each residue ids, names = struc.get_residues(array) enantiomers = np.zeros(len(ids), dtype=int) for i, id in enumerate(ids): coord = array.coord[array.res_id == id] if len(coord) != 4: # Glyine -> no chirality enantiomers[i] = 0 else: enantiomers[i] = get_enantiomer(coord[0], coord[1], coord[2], coord[3]) return enantiomers
def test_mass(): """ Test whether the mass of a residue is the same as the sum of the masses of its contained atoms. """ array = load_structure(join(data_dir, "1l2y.mmtf"))[0] _, res_names = struc.get_residues(array) water_mass = strucinfo.mass("H") * 2 + strucinfo.mass("O") # Mass of water must be subtracted masses = [strucinfo.mass(res_name) - water_mass for res_name in res_names] # C-terminus normally has additional oxygen atom masses[-1] += strucinfo.mass("O") ref_masses = [strucinfo.mass(res) for res in struc.residue_iter(array)] # Up to three additional/missing hydrogens are allowed # (protonation state) mass_diff = np.abs( np.array( [mass - ref_mass for mass, ref_mass in zip(masses, ref_masses)])) assert (mass_diff // strucinfo.mass("H") <= 3).all() assert np.allclose((mass_diff % strucinfo.mass("H")), 0, atol=5e-3)
THRESHOLD_DISTANCE = 4.0 # Fetch and load structure mmtf_file = mmtf.MMTFFile() mmtf_file.read(rcsb.fetch("2or1", "mmtf")) structure = mmtf.get_structure(mmtf_file, model=1) # Separate structure into the DNA and the two identical protein chains dna = structure[np.isin(structure.chain_id, ["A", "B"]) & (structure.hetero == False)] protein_l = structure[(structure.chain_id == "L") & (structure.hetero == False)] protein_r = structure[(structure.chain_id == "R") & (structure.hetero == False)] # Quick check if the two protein chains are really identical assert len(struc.get_residues(protein_l)) == len(struc.get_residues(protein_r)) # Fast identification of contacts via a cell list: # The cell list is initiliazed with the coordinates of the DNA # and later provided with the atom coordinates of the two protein chains cell_list = struc.CellList(dna, cell_size=THRESHOLD_DISTANCE) # Sets to store the residue IDs of contact residues # for each protein chain id_set_l = set() id_set_r = set() for protein, res_id_set in zip((protein_l, protein_r), (id_set_l, id_set_r)): # For each atom in the protein chain, # find all atoms in the DNA that are in contact with it contacts = cell_list.get_atoms(protein.coord, radius=THRESHOLD_DISTANCE)
content = ''.join(f.readlines()) query = 'INSERT INTO interfaces_cif (dimer_id, cif_file, insert_time) VALUES(?,?,?)', ( dimer_id, content, datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S")) sql_queries.append(query) os.remove("%s.cif" % dimer_id) seq_1 = ", ".join( map( str, list( struc.get_residues(extInterface[4][ extInterface[4].chain_id == comb[0]]) [1]))) seq_2 = ", ".join( map( str, list( struc.get_residues(extInterface[4][ extInterface[4].chain_id == comb[1]]) [1]))) query = 'INSERT INTO interfaces_seq (dimer_id, pdb_id, chain_1, chain_2, sequence_1, sequence_2, insert_time) VALUES(?,?,?,?,?,?,?)', ( dimer_id, pdb_id, comb[0], comb[1], seq_1, seq_2, datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S")) sql_queries.append(query)
def rmsf_plot(topology, xtc_traj, start_frame=None, stop_frame=None, write_dat_files=None): # Gromacs does not set the element symbol in its PDB files, # but Biotite guesses the element names from the atom names, # emitting a warning template = strucio.load_structure(topology) # The structure still has water and ions, that are not needed for our # calculations, we are only interested in the protein itself # These are removed for the sake of computational speed using a boolean # mask protein_mask = struc.filter_amino_acids(template) template = template[protein_mask] residue_names = struc.get_residues(template)[1] xtc_file = XTCFile() xtc_file.read(xtc_traj, atom_i=np.where(protein_mask)[0], start=start_frame, stop=stop_frame + 1) trajectory = xtc_file.get_structure(template) time = xtc_file.get_time() # Get simulation time for plotting purposes trajectory = struc.remove_pbc(trajectory) trajectory, transform = struc.superimpose(trajectory[0], trajectory) rmsd = struc.rmsd(trajectory[0], trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, rmsd, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(0, 2) ax.set_xlabel("Time (ps)") ax.set_ylabel("RMSD (Å)") figure.tight_layout() radius = struc.gyration_radius(trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) ax.plot(time, radius, color=biotite.colors["dimorange"]) ax.set_xlim(time[0], time[-1]) ax.set_ylim(14.0, 14.5) ax.set_xlabel("Time (ps)") ax.set_ylabel("Radius of gyration (Å)") figure.tight_layout() # In all models, mask the CA atoms ca_trajectory = trajectory[:, trajectory.atom_name == "CA"] rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory) figure = plt.figure(figsize=(6, 3)) ax = figure.add_subplot(111) res_count = struc.get_residue_count(trajectory) ax.plot(np.arange(1, res_count + 1), rmsf, color=biotite.colors["dimorange"]) ax.set_xlim(1, res_count) ax.set_ylim(0, 1.5) ax.set_xlabel("Residue") ax.set_ylabel("RMSF (Å)") figure.tight_layout() if write_dat_files == True: # Write RMSD *.dat file frames = np.array(range(start_frame - 1, stop_frame), dtype=int) frames[0] = 0 df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"]) df.index.name = 'Frames' df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w') # Write RMSF *.dat file df1 = pd.DataFrame(data=rmsf, index=residue_names, columns=["RMSF Values"]) df1.index.name = 'Residues' df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w') plt.show()
import biotite.database.rcsb as rcsb import biotite.structure as struc import biotite.sequence.graphics as graphics import matplotlib.pyplot as plt import matplotlib.ticker as ticker from matplotlib.patches import Arc import numpy as np # Download the PDB file and read the structure pdb_file_path = rcsb.fetch("4p5j", "pdb", gettempdir()) pdb_file = pdb.PDBFile.read(pdb_file_path) atom_array = pdb.get_structure(pdb_file)[0] nucleotides = atom_array[struc.filter_nucleotides(atom_array)] # Get the residue names and residue ids of the nucleotides residue_ids, residue_names = struc.get_residues(nucleotides) # Create a matplotlib pyplot fig, ax = plt.subplots(figsize=(8.0, 4.5)) # Setup the axis ax.set_xlim(0.5, len(residue_ids) + 0.5) ax.set_ylim(0, len(residue_ids) / 2 + 0.5) ax.set_aspect("equal") ax.xaxis.set_major_locator(ticker.MultipleLocator(3)) ax.tick_params(axis='both', which='major', labelsize=8) ax.set_yticks([]) # Remove the frame plt.box(False)