def create(pdb_id, directory, include_gro): # Create *.pdb", *.cif and *.mmtf for file_format in ["pdb", "cif", "mmtf"]: rcsb.fetch(pdb_id, file_format, directory, overwrite=True) try: array = strucio.load_structure(join(directory, pdb_id + ".pdb")) except biotite.InvalidFileError: # Structure probably contains multiple models with different # number of atoms # -> Cannot load AtomArrayStack # -> Skip writing GRO and NPZ file return # Create *.gro file strucio.save_structure(join(directory, pdb_id + ".npz"), array) # Create *.gro files using GROMACS # Clean PDB file -> remove inscodes and altlocs if include_gro: cleaned_file_name = biotite.temp_file("pdb") strucio.save_structure(cleaned_file_name, array) # Run GROMACS for file conversion subprocess.run([ "editconf", "-f", cleaned_file_name, "-o", join(directory, pdb_id + ".gro") ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def api_route(): pdb_id = request.args.get("pdb_id", "1Q2W") file_format = request.args.get("format", "mmtf") file_name = rcsb.fetch(pdb_id, file_format, biotite.temp_dir()) mmtf_file = mmtf.MMTFFile() mmtf_file.read(file_name) print() try: mmtf_s = mmtf_sec(mmtf_file).tolist() except: mmtf_s = [] try: dssp_s = dssp_sec(mmtf_file).tolist() except: dssp_s = [] try: psea_s = psea_sec(mmtf_file).tolist() except: dssp_s = [] structs = { "mmtf": mmtf_s, "dssp": dssp_s, "psea": psea_s, } return jsonify( sequence=list(mmtf_file["entityList"][0]["sequence"]), **structs, diffs=diff_all(**structs), )
def create(pdb_id, directory, include_gro): # Create *.pdb", *.cif and *.mmtf for file_format in ["pdb", "cif", "mmtf"]: rcsb.fetch(pdb_id, file_format, directory) if include_gro: # Create *.gro files using GROMACS # Clean PDB file -> remove inscodes and altlocs array = strucio.load_structure(join(directory, pdb_id + ".pdb")) cleaned_file_name = biotite.temp_file("pdb") strucio.save_structure(cleaned_file_name, array) # Run GROMACS for file conversion subprocess.run([ "gmx", "editconf", "-f", cleaned_file_name, "-o", join(directory, pdb_id + ".gro") ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def get_diameter(pdb_id): file_name = rcsb.fetch(pdb_id, "mmtf", gettempdir()) atom_array = strucio.load_structure(file_name) # Remove all non-amino acids atom_array = atom_array[struc.filter_amino_acids(atom_array)] coord = atom_array.coord # Calculate all pairwise difference vectors diff = coord[:, np.newaxis, :] - coord[np.newaxis, :, :] # Calculate absolute of difference vectors -> square distances sq_dist = np.sum(diff*diff, axis=-1) # Maximum distance is diameter diameter = np.sqrt(np.max(sq_dist)) return diameter
def plot_rna(pdb_id, axes): # Download the PDB file and read the structure pdb_file_path = rcsb.fetch(pdb_id, "pdb", gettempdir()) pdb_file = pdb.PDBFile.read(pdb_file_path) atom_array = pdb.get_structure(pdb_file)[0] nucleotides = atom_array[struc.filter_nucleotides(atom_array)] # Compute the base pairs and their pseudoknot order base_pairs = struc.base_pairs(nucleotides) base_pairs = struc.get_residue_positions( nucleotides, base_pairs.flatten() ).reshape(base_pairs.shape) pseudoknot_order = struc.pseudoknots(base_pairs)[0] # Set the linestyle according to the pseudoknot order linestyles = np.full(base_pairs.shape[0], '-', dtype=object) linestyles[pseudoknot_order == 1] = '--' linestyles[pseudoknot_order == 2] = ':' # Indicate canonical nucleotides with an upper case one-letter-code # and non-canonical nucleotides with a lower case one-letter-code base_labels = [] for base in struc.residue_iter(nucleotides): one_letter_code, exact = struc.map_nucleotide(base) if exact: base_labels.append(one_letter_code) else: base_labels.append(one_letter_code.lower()) # Color canonical Watson-Crick base pairs with a darker orange and # non-canonical base pairs with a lighter orange colors = np.full(base_pairs.shape[0], biotite.colors['brightorange']) for i, (base1, base2) in enumerate(base_pairs): name1 = base_labels[base1] name2 = base_labels[base2] if sorted([name1, name2]) in [["A", "U"], ["C", "G"]]: colors[i] = biotite.colors["dimorange"] # Plot the secondary structure graphics.plot_nucleotide_secondary_structure( axes, base_labels, base_pairs, struc.get_residue_count(nucleotides), pseudoknot_order=pseudoknot_order, bond_linestyle=linestyles, bond_color=colors, # Margin to compensate for reduced axis limits in shared axis border=0.13 ) # Use the PDB ID to label each plot axes.set_title(pdb_id, loc="left")
def test_fetch(format, as_file_like): path = None if as_file_like else biotite.temp_dir() file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True) if format == "pdb": file = pdb.PDBFile() file.read(file_path_or_obj) pdb.get_structure(file) elif format == "pdbx": file = pdbx.PDBxFile() file.read(file_path_or_obj) pdbx.get_structure(file) elif format == "mmtf": file = mmtf.MMTFFile() file.read(file_path_or_obj) mmtf.get_structure(file)
def test_fetch(format, as_file_like): path = None if as_file_like else biotite.temp_dir() file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True) if format == "pdb": file = pdb.PDBFile.read(file_path_or_obj) pdb.get_structure(file) elif format == "pdbx": file = pdbx.PDBxFile.read(file_path_or_obj) pdbx.get_structure(file) elif format == "mmtf": file = mmtf.MMTFFile.read(file_path_or_obj) mmtf.get_structure(file) elif format == "fasta": file = fasta.FastaFile.read(file_path_or_obj) # Test if the file contains any sequences assert len(fasta.get_sequences(file)) > 0
def test_search_sequence(): IDENTIY_CUTOFF = 0.9 pdbx_file = pdbx.PDBxFile.read(join(data_dir("structure"), "1l2y.cif")) ref_sequence = pdbx.get_sequence(pdbx_file)[0] query = rcsb.SequenceQuery(ref_sequence, "protein", min_identity=IDENTIY_CUTOFF) test_ids = rcsb.search(query) for id in test_ids: fasta_file = fasta.FastaFile.read(rcsb.fetch(id, "fasta")) test_sequence = fasta.get_sequence(fasta_file) matrix = align.SubstitutionMatrix.std_protein_matrix() alignment = align.align_optimal(ref_sequence, test_sequence, matrix, terminal_penalty=False)[0] identity = align.get_sequence_identity(alignment, mode="shortest") assert identity >= IDENTIY_CUTOFF
# License: BSD 3 clause import biotite import biotite.structure as struc import biotite.structure.io as strucio import biotite.structure.io.pdbx as pdbx import biotite.database.rcsb as rcsb import numpy as np # The output file names # Modify these values for actual file output ku_dna_file = biotite.temp_file("ku_dna.cif") ku_file = biotite.temp_file("ku.cif") # Download and parse structure files file = rcsb.fetch("1JEY", "mmtf", biotite.temp_dir()) ku_dna = strucio.load_structure(file) file = rcsb.fetch("1JEQ", "mmtf", biotite.temp_dir()) ku = strucio.load_structure(file) # Remove DNA and water ku_dna = ku_dna[(ku_dna.chain_id == "A") | (ku_dna.chain_id == "B")] ku_dna = ku_dna[~struc.filter_solvent(ku_dna)] ku = ku[~struc.filter_solvent(ku)] # The structures have a differing amount of atoms missing # at the the start and end of the structure # -> Find common structure ku_dna_common = ku_dna[struc.filter_intersection(ku_dna, ku)] ku_common = ku[struc.filter_intersection(ku, ku_dna)] # Superimpose ku_superimposed, transformation = struc.superimpose( ku_dna_common, ku_common, (ku_common.atom_name == "CA"))
Downloading structure files from the *RCSB PDB* is quite easy: Simply specify the PDB ID, the file format and the target directory for the :func:`fetch()` function and you are done. The function even returns the path to the downloaded file, so you can just load it via the other *Biotite* subpackages (more on this later). We will download on a protein structure of the miniprotein *TC5b* (PDB: 1L2Y) into a temporary directory. """ from os.path import relpath import biotite import biotite.database.rcsb as rcsb file_path = rcsb.fetch("1l2y", "pdb", biotite.temp_dir()) print(relpath(file_path)) ######################################################################## # In case you want to download multiple files, you are able to specify a # list of PDB IDs, which in return gives you a list of file paths. # Download files in the more modern mmCIF format file_paths = rcsb.fetch(["1l2y", "1aki"], "cif", biotite.temp_dir()) print([relpath(file_path) for file_path in file_paths]) ######################################################################## # By default :func:`fetch()` checks whether the file to be fetched # already exists in the directory, and downloads it, if it does not # exist yet. # If you want to download files irrespectively, set :obj:`overwrite` to
capsid from *Paramecium bursaria Chlorella virus type 1* - a h**o-5040-mer! At first we will check, which assemblies are available to us. """ # Code source: Patrick Kunzmann # License: BSD 3 clause import numpy as np import biotite.structure as struc import biotite.structure.io.pdbx as pdbx import biotite.structure.io as strucio import biotite.database.rcsb as rcsb pdbx_file = pdbx.PDBxFile.read(rcsb.fetch("1M4X", "mmcif")) assemblies = pdbx.list_assemblies(pdbx_file) print("ID name") print() for assembly_id, name in assemblies.items(): print(f"{assembly_id:2} {name}") ######################################################################## # ``'complete icosahedral assembly'`` sounds good. # In fact, often the first assembly is the complete one. # Hence, the :func:`get_assembly()` function builds the first assembly # by default. # Since we know the ID we want (``'1'``), we will provide it to this # function anyway. # It returns the chosen assembly as :class:`AtomArray`.
[116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51], [0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98], [119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88], [130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01], [114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82], [117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16], [139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74], [-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19], [-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90], [-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23], [-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91], ]) # Fetch animal lysoyzme structures lyso_files = rcsb.fetch(["1REX", "1AKI", "1DKJ", "1GD6"], format="mmtf", target_path=biotite.temp_dir()) organisms = ["H. sapiens", "G. gallus", "C. viginianus", "B. mori"] # Create a PB sequence from each structure pb_seqs = [] for file_name in lyso_files: file = mmtf.MMTFFile() file.read(file_name) # Take only the first model into account array = mmtf.get_structure(file, model=1) # Remove everything but the first protein chain array = array[struc.filter_amino_acids(array)] array = array[array.chain_id == array.chain_id[0]] # Calculate backbone dihedral angles,
ammolite.cmd.set("cartoon_side_chain_helper", 1) ammolite.cmd.set("cartoon_oval_length", 0.8) ammolite.cmd.set("depth_cue", 0) ammolite.cmd.set("valence", 0) #----------------------------------------------------------------------# # Define colors used later ammolite.cmd.set_color("lightorange", to_rgb(biotite.colors["lightorange"])) ammolite.cmd.set_color("lightgreen", to_rgb(biotite.colors["lightgreen"])) ammolite.cmd.set_color("darkgreen", to_rgb(biotite.colors["darkgreen"])) #----------------------------------------------------------------------# # Fetch and load cytochrome C structure and remove water mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("1C75", "mmtf")) structure = mmtf.get_structure(mmtf_file, model=1, include_bonds=True) cyt_c = structure[structure.res_name != "HOH"] pymol_cyt_c = ammolite.PyMOLObject.from_structure(cyt_c) #----------------------------------------------------------------------# # Style protein protein_mask = struc.filter_amino_acids(cyt_c) pymol_cyt_c.show_as("cartoon", protein_mask) pymol_cyt_c.color("lightgreen", protein_mask & (cyt_c.element == "C")) #----------------------------------------------------------------------#
def build_patterns(structfam, folder): patterns = [] for pdb, c, start, end in tqdm(structfam): file_name = rcsb.fetch(pdb, "mmtf", biotite.temp_dir()) mmtf_file = mmtf.MMTFFile() mmtf_file.read(file_name) array = mmtf.get_structure(mmtf_file, model=1) tk_dimer = array[struc.filter_amino_acids(array)] # The chain ID corresponding to each residue chain_id_per_res = array.chain_id[struc.get_residue_starts(tk_dimer)] sse = mmtf_file["secStructList"] sse = sse[:chain_id_per_res.shape[0]][chain_id_per_res == c] sse = np.array(sse[start:end + 1]) sse = np.array([sec_struct_codes[code % 8] for code in sse], dtype="U1") sse8 = to_onehot([dssp_codes[x] for x in sse], (None, 8)) dss8 = (sse8[1:] - sse8[:-1]) cls = to_onehot(np.where(dss8 == -1)[1], (None, 8)).T bbox = np.array( [np.where(dss8 == 1)[0], np.where(dss8 == -1)[0], *cls]).T pat8 = np.argmax(bbox[:, 2:], 1) sse3 = to_onehot([abc_codes[dssp_to_abc[x]] for x in sse], (None, 3)) dss3 = (sse3[1:] - sse3[:-1]) cls = to_onehot(np.where(dss3 == -1)[1], (None, 3)).T bbox = np.array( [np.where(dss3 == 1)[0], np.where(dss3 == -1)[0], *cls]).T pat3 = np.argmax(bbox[:, 2:], 1) patterns.append((pat3, pat8)) if len(patterns) == 0: print("No pattern find") return None, None, None, None c_patterns3, n_patterns3, c_patterns8, n_patterns8, weights = [], [], [], [], [] for pat3, pat8 in patterns: char_pat8 = "".join([sec_struct_codes[x] for x in pat8]) char_pat3 = "".join(["abc"[x] for x in pat3]) c_patterns8.append(char_pat8) n_patterns8.append(list(pat8)) c_patterns3.append(char_pat3) n_patterns3.append(list(pat3)) occ_sum8 = dict() occ_sum3 = dict() correspondings8 = dict() correspondings3 = dict() for c8, n8, c3, n3 in zip(c_patterns8, n_patterns8, c_patterns3, n_patterns3): if len(c3) == 0: continue if c3[0] != "c": c3 = "c" + c3 n3 = [2] + n3 if c3[-1] != "c": c3 = c3 + "c" n3 = n3 + [2] if c8[0] != "C": c8 = "C" + c8 n8 = [7] + n8 if c8[-1] != "C": c8 = c8 + "C" n8 = n8 + [7] if c8 not in occ_sum8.keys(): occ_sum8[c8] = 0 correspondings8[c8] = c8, n8 occ_sum8[c8] += 1 if c3 not in occ_sum3.keys(): occ_sum3[c3] = 0 correspondings3[c3] = c3, n3 occ_sum3[c3] += 1 c_pattern8, n_pattern8 = correspondings8[max(occ_sum8, key=occ_sum8.get)] c_pattern3, n_pattern3 = correspondings3[max(occ_sum3, key=occ_sum3.get)] push(f"{folder}/data.pt", "pattern", (c_pattern3, n_pattern3, c_pattern8, n_pattern8)) return c_pattern3, n_pattern3, c_pattern8, n_pattern8, occ_sum3, occ_sum8
.. currentmodule:: biotite.database.rcsb Downloading structure files from the *RCSB PDB* is quite easy: Simply specify the PDB ID, the file format and the target directory for the :func:`fetch()` function and you are done. The function returns the path to the downloaded file, so you can simply load the file via the other *Biotite* subpackages (more on this later). We will download on a protein structure of the miniprotein *TC5b* (PDB: 1L2Y) into a temporary directory. """ from tempfile import gettempdir import biotite.database.rcsb as rcsb file_path = rcsb.fetch("1l2y", "pdb", gettempdir()) print(file_path) ######################################################################## # In case you want to download multiple files, you are able to specify a # list of PDB IDs, which in return gives you a list of file paths. # Download files in the more modern mmCIF format file_paths = rcsb.fetch(["1l2y", "1aki"], "cif", gettempdir()) print([file_path for file_path in file_paths]) ######################################################################## # By default :func:`fetch()` checks whether the file to be fetched # already exists in the directory and downloads it, if it does not # exist yet. # If you want to download files irrespectively, set :obj:`overwrite` to
def plot_gaps(pdb_id, chain_id, ax): # Download and parse structure file path = rcsb.fetch(pdb_id, "mmtf", gettempdir()) atom_array = strucio.load_structure(path) # Consider only one chain atom_array = atom_array[atom_array.chain_id == chain_id] # Array for saving the 'green', 'yellow' and 'red' state states = np.zeros(atom_array.res_id[-1], dtype=int) for i in range(len(states)): # Get array for only one residue ID residue = atom_array[atom_array.res_id == i + 1] if len(residue) == 0: # not existing states[i] = 0 elif residue.res_name[0] == "UNK": # exisiting but polyalanine states[i] = 1 else: # existing states[i] = 2 # Find the intervals for each state state_intervals = [] curr_state = None curr_start = None for i in range(len(states)): if curr_start is None: curr_start = i curr_state = states[i] else: if states[i] != states[i - 1]: state_intervals.append((curr_start, i, curr_state)) curr_start = i curr_state = states[i] state_intervals.append((curr_start, i, curr_state)) # Draw the state intervals as colored rectangles for interval in state_intervals: start = interval[0] stop = interval[1] state = interval[2] if state == 0: color = "firebrick" elif state == 1: color = "gold" elif state == 2: color = "forestgreen" ax.add_patch( Rectangle((start + 1 - 0.5, 0), stop - start, 1, edgecolor="None", facecolor=color)) # Some other visual stuff ax.spines["left"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["top"].set_visible(False) ax.yaxis.set_visible(False) ax.set_xlim(0.5, len(states) + 0.5) ax.set_ylim(0, 2)
# Code source: Patrick Kunzmann # License: BSD 3 clause import numpy as np import matplotlib.pyplot as plt from scipy.stats import spearmanr import biotite.structure as struc import biotite.structure.info as info import biotite.structure.io.mmtf as mmtf import biotite.structure.graphics as graphics import biotite.database.rcsb as rcsb import biotite.application.autodock as autodock # Get the receptor structure # and the original 'correct' conformation of the ligand mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("2RTG", "mmtf")) structure = mmtf.get_structure( # Include formal charge for accurate partial charge calculation mmtf_file, model=1, include_bonds=True, extra_fields=["charge"]) # The asymmetric unit describes a streptavidin homodimer # However, we are only interested in a single monomer structure = structure[structure.chain_id == "B"] receptor = structure[struc.filter_amino_acids(structure)] ref_ligand = structure[structure.res_name == "BTN"] ref_ligand_center = struc.centroid(ref_ligand) # Independently, get the ligand without optimized conformation
} # Converter for the DSSP secondary structure elements # to the classical ones dssp_to_abc = { "I": "c", "S": "c", "H": "a", "E": "b", "G": "c", "B": "b", "T": "c", "C": "c" } # Fetch and load structure file_name = rcsb.fetch("1QGD", "mmtf", biotite.temp_dir()) mmtf_file = mmtf.MMTFFile() mmtf_file.read(file_name) array = mmtf.get_structure(mmtf_file, model=1) # Transketolase homodimer tk_dimer = array[struc.filter_amino_acids(array)] # Transketolase monomer tk_mono = tk_dimer[tk_dimer.chain_id == "A"] # The chain ID corresponding to each residue chain_id_per_res = array.chain_id[struc.get_residue_starts(tk_dimer)] sse = mmtf_file["secStructList"] sse = sse[sse != -1] sse = sse[chain_id_per_res == "A"] sse = np.array([sec_struct_codes[code] for code in sse if code != -1], dtype="U1")
- a h**o-5040-mer! At first we will check, which assemblies are available to us. """ # Code source: Patrick Kunzmann # License: BSD 3 clause import numpy as np import biotite.structure as struc import biotite.structure.io.pdbx as pdbx import biotite.structure.io as strucio import biotite.database.rcsb as rcsb pdbx_file = pdbx.PDBxFile() pdbx_file.read(rcsb.fetch("1M4X", "mmcif")) assemblies = pdbx.list_assemblies(pdbx_file) print("ID name") print() for assembly_id, name in assemblies.items(): print(f"{assembly_id:2} {name}") ######################################################################## # ``'complete icosahedral assembly'`` sounds good. # In fact, often the first assembly is the complete one. # Hence, the :func:`get_assembly()` function builds the first assembly # by default. # Since we know the ID we want (``'1'``), we will provide it to this # function anyway. # It returns the chosen assembly as :class:`AtomArray`.
""" # Code source: Patrick Kunzmann # License: BSD 3 clause from tempfile import gettempdir import biotite.structure as struc import biotite.structure.io as strucio import biotite.database.rcsb as rcsb import matplotlib.pyplot as plt import numpy as np from matplotlib import colors import scipy.stats as sts # Download and parse file file = rcsb.fetch("3vkh", "cif", gettempdir()) atom_array = strucio.load_structure(file) # Calculate backbone dihedral angles # from one of the two identical chains in the asymmetric unit phi, psi, omega = struc.dihedral_backbone( atom_array[atom_array.chain_id == "A"]) # Conversion from radians into degree phi *= 180 / np.pi psi *= 180 / np.pi # Remove invalid values (NaN) at first and last position phi = phi[1:-1] psi = psi[1:-1] # Plot density figure = plt.figure() ax = figure.add_subplot(111)
def analyze_chirality(array): # Filter backbone + CB array = array[struc.filter_amino_acids(array)] array = array[(array.atom_name == "CB") | (struc.filter_backbone(array))] # Iterate over each residue ids, names = struc.get_residues(array) enantiomers = np.zeros(len(ids), dtype=int) for i, id in enumerate(ids): coord = array.coord[array.res_id == id] if len(coord) != 4: # Glyine -> no chirality enantiomers[i] = 0 else: enantiomers[i] = get_enantiomer(coord[0], coord[1], coord[2], coord[3]) return enantiomers # Fetch and parse structure file file = rcsb.fetch("1l2y", "mmtf", gettempdir()) stack = strucio.load_structure(file) # Get first model array = stack[0] # Get enantiomers print("1l2y ", analyze_chirality(array)) # Reflected structures have opposite enantiomers # Test via reflection at x-y-plane, z -> -z array_reflect = array.copy() array_reflect.coord[:, 2] *= -1 print("1l2y (reflected)", analyze_chirality(array_reflect))
def test_fetch_invalid(format): with pytest.raises(RequestError): file = rcsb.fetch("xxxx", format, biotite.temp_dir(), overwrite=True)
def analyze_chirality(array): # Filter backbone + CB array = array[struc.filter_amino_acids(array)] array = array[(array.atom_name == "CB") | (struc.filter_backbone(array))] # Iterate over each residue ids, names = struc.get_residues(array) enantiomers = np.zeros(len(ids), dtype=int) for i, id in enumerate(ids): coord = array.coord[array.res_id == id] if len(coord) != 4: # Glyine -> no chirality enantiomers[i] = 0 else: enantiomers[i] = get_enantiomer(coord[0], coord[1], coord[2], coord[3]) return enantiomers # Fetch and parse structure file file = rcsb.fetch("1l2y", "mmtf", biotite.temp_dir()) stack = strucio.load_structure(file) # Get first model array = stack[0] # Get enantiomers print("1l2y ", analyze_chirality(array)) # Reflected structures have opposite enantiomers # Test via reflection at x-y-plane, z -> -z array_reflect = array.copy() array_reflect.coord[:, 2] *= -1 print("1l2y (reflected)", analyze_chirality(array_reflect))
# Code source: Patrick Kunzmann # License: BSD 3 clause import numpy as np import biotite.structure as struc import biotite.structure.io.mmtf as mmtf import biotite.database.rcsb as rcsb # The maximum distance between an atom in the repressor and an atom in # the DNA for them to be considered 'in contact' THRESHOLD_DISTANCE = 4.0 # Fetch and load structure mmtf_file = mmtf.MMTFFile() mmtf_file.read(rcsb.fetch("2or1", "mmtf")) structure = mmtf.get_structure(mmtf_file, model=1) # Separate structure into the DNA and the two identical protein chains dna = structure[np.isin(structure.chain_id, ["A", "B"]) & (structure.hetero == False)] protein_l = structure[(structure.chain_id == "L") & (structure.hetero == False)] protein_r = structure[(structure.chain_id == "R") & (structure.hetero == False)] # Quick check if the two protein chains are really identical assert len(struc.get_residues(protein_l)) == len(struc.get_residues(protein_r)) # Fast identification of contacts via a cell list: # The cell list is initiliazed with the coordinates of the DNA # and later provided with the atom coordinates of the two protein chains
# Code source: Tom David Müller # License: BSD 3 clause from tempfile import gettempdir import biotite import biotite.structure.io.pdb as pdb import biotite.database.rcsb as rcsb import biotite.structure as struc import biotite.sequence.graphics as graphics import matplotlib.pyplot as plt import matplotlib.ticker as ticker from matplotlib.patches import Arc import numpy as np # Download the PDB file and read the structure pdb_file_path = rcsb.fetch("4p5j", "pdb", gettempdir()) pdb_file = pdb.PDBFile.read(pdb_file_path) atom_array = pdb.get_structure(pdb_file)[0] nucleotides = atom_array[struc.filter_nucleotides(atom_array)] # Get the residue names and residue ids of the nucleotides residue_ids = [] residue_names = [] for residue in struc.residue_iter(nucleotides): mapped_nucleotide, exact_match = struc.map_nucleotide(residue) if mapped_nucleotide is None: continue residue_ids.append(residue[0].res_id) if exact_match: residue_names.append(mapped_nucleotide) else:
######################################################################### # Now that the raw data is prepared, we can load a protein structure for # which we will display the glycosylation. # Here we choose the glycosylated peroxidase *4CUO*, as it contains a # lot of glycans. # # The resulting plot makes only sense for a single protein chain. # In this case the peroxidase structure has only one chain, but since # this script should also work for any other structure, we filter out # a single one. PDB_ID = "4CUO" CHAIN_ID = "A" mmtf_file = mmtf.MMTFFile.read(rcsb.fetch(PDB_ID, "mmtf")) structure = mmtf.get_structure(mmtf_file, model=1, include_bonds=True) structure = structure[structure.chain_id == CHAIN_ID] # We will need these later: # An array containing all residue IDs belonging to amino acids amino_acid_res_ids = np.unique(structure.res_id[~structure.hetero]) # A dictionary mapping residue IDs to their residue names ids_to_names = { res_id: res_name for res_id, res_name in zip(structure.res_id, structure.res_name) } ######################################################################## # To determine which residues (including the saccharides) are connected # with each other, we will use a graph representation:
######################################################################## # As test case a structure of a *cysteine knot* protein is used, # specifically the squash trypsin inhibitor *EETI-II* # (PDB: `2IT7 <http://www.rcsb.org/structure/2IT7>`_). # This motif is famous for its three characteristic disulfide bridges # forming a 'knot'. # However, the loaded MMTF file already has information about the # covalent bonds - including the disulfide bridges. # To have a proper test case, all disulfide bonds are removed from the # structure and we pretend that the structure never had information # about the disulfide bonds. # For later verification that the implemented function wroks correctly, # the disulfide bonds, that are removed, are printed out. mmtf_file = mmtf.MMTFFile.read( rcsb.fetch("2IT7", "mmtf", biotite.temp_dir()) ) knottin = mmtf.get_structure(mmtf_file, include_bonds=True, model=1) sulfide_indices = np.where( (knottin.res_name == "CYS") & (knottin.atom_name == "SG") )[0] for i, j, _ in knottin.bonds.as_array(): if i in sulfide_indices and j in sulfide_indices: print(knottin[i]) print(knottin[j]) print() knottin.bonds.remove_bond(i,j) ######################################################################## # Now the sanitized structure is put into the disulfide detection # function.
and the selecivity filter of the channel protein KcsA (PDB: 2KB1). The structure was resolved using NMR, so multiple models are present in the structure. Hence, we can also calculate the frequency of each bond. """ # Code source: Daniel Bauer # License: BSD 3 clause import biotite import matplotlib.pyplot as plt import biotite.structure as struc import biotite.structure.io as strucio import biotite.database.rcsb as rcsb file_name = rcsb.fetch("2KB1", "mmtf", biotite.temp_dir()) stack = strucio.load_structure(file_name) # Four identical chains, consider only chain A chain_a = stack[:, stack.chain_id == "A"] # Selection for p-helix p_helix = (chain_a.res_id >= 40) & (chain_a.res_id <= 52) # Selection for selectivity filter sf = (chain_a.res_id >= 53) & (chain_a.res_id <= 58) # Calculate the hydrogen bonds and the frequency of each bond triplets, mask = struc.hbond(chain_a, selection1=p_helix, selection2=sf) freq = struc.hbond_frequency(mask) # Create names of bonds label = "{d_resid}{d_resnm}-{d_a} -- {a_resid}{a_resnm}-{a_a}" names = [label.format(
""" # Code source: Tom David Müller # License: BSD 3 clause from tempfile import gettempdir import biotite import biotite.structure.io.pdb as pdb import biotite.database.rcsb as rcsb import biotite.structure as struc import biotite.structure.graphics as graphics import matplotlib.pyplot as plt import numpy as np # Download the PDB file and read the structure pdb_file_path = rcsb.fetch("6ZYB", "pdb", gettempdir()) pdb_file = pdb.PDBFile.read(pdb_file_path) atom_array = pdb.get_structure(pdb_file)[0] nucleotides = atom_array[struc.filter_nucleotides(atom_array)] # Compute the base pairs and the Leontis-Westhof nomenclature base_pairs = struc.base_pairs(nucleotides) glycosidic_bonds = struc.base_pairs_glycosidic_bond(nucleotides, base_pairs) edges = struc.base_pairs_edge(nucleotides, base_pairs) base_pairs = struc.get_residue_positions( nucleotides, base_pairs.flatten()).reshape(base_pairs.shape) # Get the one-letter-codes of the bases base_labels = [] for base in struc.residue_iter(nucleotides): base_labels.append(base.res_name[0])
""" # Code source: Patrick Kunzmann # License: BSD 3 clause import biotite import biotite.structure as struc import biotite.structure.io as strucio import biotite.database.rcsb as rcsb import matplotlib.pyplot as plt import numpy as np from matplotlib import colors import scipy.stats as sts # Download and parse file file = rcsb.fetch("3vkh", "cif", biotite.temp_dir()) atom_array = strucio.load_structure(file) # Calculate backbone dihedral angles # from one of the two identical chains in the asymmetric unit phi, psi, omega = struc.dihedral_backbone( atom_array[atom_array.chain_id == "A"]) # Conversion from radians into degree phi *= 180 / np.pi psi *= 180 / np.pi # Remove invalid values (NaN) at first and last position phi = phi[1:-1] psi = psi[1:-1] # Plot density figure = plt.figure() ax = figure.add_subplot(111)