def test_connect_via_distances(): """ Test whether the created bond list is equal to the bonds deposited in the MMTF file. """ file = mmtf.MMTFFile() file.read(join(data_dir, "1l2y.mmtf")) atoms = mmtf.get_structure(file, include_bonds=True, model=1) # Remove termini to solve the issue that the reference bonds do not # contain proper bonds for the protonated/deprotonated termini atoms = atoms[(atoms.res_id > 1) & (atoms.res_id < 20)] ref_bonds = atoms.bonds # Convert all bonds to BondType.ANY ref_bonds = struc.BondList( ref_bonds.get_atom_count(), ref_bonds.as_array()[:, :2] ) test_bonds = struc.connect_via_distances(atoms) assert test_bonds == ref_bonds
def test_modification(bond_list): """ Test whether `BondList` correctly identifies whether it contains a certain bond on a known example. """ # Already in list bond_list.add_bond(3, 1) # Also already in list -> update bond_list.add_bond(1, 3, 1) # The same but with negative atom index bond_list.add_bond(-6, -4, 1) # Not in list bond_list.add_bond(4, 1) # In list -> remove bond_list.remove_bond(4, 0) # Not in list -> Do nothing bond_list.remove_bond(0, 3) # Remove mutliple bonds, one of them is not in list bond_list.remove_bonds( struc.BondList(10, np.array([(1, 0), (1, 2), (8, 9)]))) assert bond_list.as_array().tolist() == [[1, 3, 1], [3, 4, 0], [4, 6, 0], [1, 4, 0]]
def convert_to_atom_array(chempy_model, include_bonds=False): """ Convert a :class:`chempy.models.Indexed` object into an :class:`AtomArray`. The returned :class:`AtomArray` contains the optional annotation categories ``b_factor``, ``occupancy``, ``charge`` and ``altloc_id``. No *altloc* ID filtering is performed. Parameters ---------- chempy_model : Indexed The ``chempy`` model. include_bonds : bool, optional If set to true, an associated :class:`BondList` will be created for the returned atom array. Returns ------- atom_array : AtomArray The converted structure. """ atoms = chempy_model.atom bonds = chempy_model.bond atom_array = struc.AtomArray(len(atoms)) # Add annotation arrays atom_array.chain_id = np.array([a.chain for a in atoms], dtype="U3") atom_array.res_id = np.array([a.resi_number for a in atoms], dtype=int) atom_array.ins_code = np.array([a.ins_code for a in atoms], dtype="U1") atom_array.res_name = np.array([a.resn for a in atoms], dtype="U3") atom_array.hetero = np.array([a.hetatm for a in atoms], dtype=bool) atom_array.atom_name = np.array([a.name for a in atoms], dtype="U6") atom_array.element = np.array([a.symbol for a in atoms], dtype="U2") atom_array.set_annotation( "b_factor", np.array([a.b if hasattr(a, "b") else 0 for a in atoms], dtype=float)) atom_array.set_annotation( "occupancy", np.array([a.q if hasattr(a, "q") else 1.0 for a in atoms], dtype=float)) atom_array.set_annotation( "charge", np.array([ a.formal_charge if hasattr(a, "formal_charge") else 0 for a in atoms ], dtype=int)) atom_array.set_annotation( "altloc_id", np.array([a.alt if hasattr(a, "alt") else "" for a in atoms], dtype="U1")) # Set coordinates atom_array.coord = np.array([a.coord for a in atoms], dtype=np.float32) # Add bonds if include_bonds: bond_array = np.array([[b.index[0], b.index[1], b.order] for b in bonds], dtype=np.uint32) atom_array.bonds = struc.BondList(len(atoms), bond_array) return atom_array
def create_residue_dict(components_pdbx_file_path, msgpack_file_path): pdbx_file = pdbx.PDBxFile() pdbx_file.read(components_pdbx_file_path) components = pdbx_file.get_block_names() residue_dict = {} for i, component in enumerate(components): print(f"{component:3} {int(i/len(components)*100):>3d}%", end="\r") try: # Some entries use invalid quotation for the component name cif_general = pdbx_file.get_category("chem_comp", block=component) except ValueError: cif_general = None cif_atoms = pdbx_file.get_category("chem_comp_atom", block=component, expect_looped=True) cif_bonds = pdbx_file.get_category("chem_comp_bond", block=component, expect_looped=True) if cif_atoms is None: continue array = struc.AtomArray(len(list(cif_atoms.values())[0])) array.res_name = cif_atoms["comp_id"] array.atom_name = cif_atoms["atom_id"] array.element = cif_atoms["type_symbol"] array.add_annotation("charge", int) array.charge = np.array( [int(c) if c != "?" else 0 for c in cif_atoms["charge"]]) if cif_general is None: array.hetero[:] = True else: array.hetero[:] = True if cif_general["type"] == "NON-POLYMER" \ else False # For some entries only 'model_Cartn', # for some entries only 'pdbx_model_Cartn_ideal' and # for some entries none of them is defined try: array.coord[:, 0] = cif_atoms["pdbx_model_Cartn_x_ideal"] array.coord[:, 1] = cif_atoms["pdbx_model_Cartn_y_ideal"] array.coord[:, 2] = cif_atoms["pdbx_model_Cartn_z_ideal"] except (KeyError, ValueError): try: array.coord[:, 0] = cif_atoms["model_Cartn_x"] array.coord[:, 1] = cif_atoms["model_Cartn_y"] array.coord[:, 2] = cif_atoms["model_Cartn_z"] except (KeyError, ValueError): # If none of them is defined, skip this component continue bonds = struc.BondList(array.array_length()) if cif_bonds is not None: for atom1, atom2, order, aromatic_flag in zip( cif_bonds["atom_id_1"], cif_bonds["atom_id_2"], cif_bonds["value_order"], cif_bonds["pdbx_aromatic_flag"]): atom_i = np.where(array.atom_name == atom1)[0][0] atom_j = np.where(array.atom_name == atom2)[0][0] bond_type = BOND_ORDERS[order, aromatic_flag] bonds.add_bond(atom_i, atom_j, bond_type) array.bonds = bonds residue_dict[component] = array_to_dict(array) with open(msgpack_file_path, "wb") as msgpack_file: msgpack.dump(residue_dict, msgpack_file)
def test_merge(bond_list): merged_list = bond_list.merge(struc.BondList(8, np.array([(4, 6), (6, 7)]))) assert merged_list.as_array().tolist() == [[0, 1, 0], [1, 2, 0], [1, 3, 0], [3, 4, 0], [0, 4, 0], [4, 6, 0], [6, 7, 0]]
def bond_list(): bond_array = np.array([(0, 1), (2, 1), (3, 1), (3, 4), (3, 1), (1, 2), (4, 0), (6, 4)]) return struc.BondList(7, bond_array)
PNG_SIZE = (800, 800) ######################################################################## mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("3EGZ", "mmtf")) structure = mmtf.get_structure(mmtf_file, model=1) aptamer = structure[struc.filter_nucleotides(structure)] # Coarse graining: Represent each nucleotide using its C3' atom aptamer = aptamer[aptamer.atom_name == "C3'"] # Connect consecutive nucleotides indices = np.arange(aptamer.array_length()) aptamer.bonds = struc.BondList( aptamer.array_length(), np.stack((indices[:-1], indices[1:]), axis=-1) ) pymol_obj = ammolite.PyMOLObject.from_structure(aptamer) pymol_obj.show("sticks") pymol_obj.show("spheres") pymol_obj.color("black") ammolite.cmd.set("stick_color", "red") ammolite.cmd.set("stick_radius", 0.5) ammolite.cmd.set("sphere_scale", 1.0) ammolite.cmd.set("sphere_quality", 4) # Adjust camera pymol_obj.orient() pymol_obj.zoom(buffer=10) ammolite.cmd.rotate("z", 90)
def assemble_peptide(sequence): res_names = [seq.ProteinSequence.convert_letter_1to3(r) for r in sequence] peptide = struc.AtomArray(length=0) for res_id, res_name, connect_angle in zip( np.arange(1, len(res_names) + 1), res_names, itertools.cycle([120, -120])): # Create backbone atom_n = struc.Atom([0.0, 0.0, 0.0], atom_name="N", element="N") atom_ca = struc.Atom([0.0, N_CA_LENGTH, 0.0], atom_name="CA", element="C") coord_c = calculate_atom_coord_by_z_rotation(atom_ca.coord, atom_n.coord, 120, CA_C_LENGTH) atom_c = struc.Atom(coord_c, atom_name="C", element="C") coord_o = calculate_atom_coord_by_z_rotation(atom_c.coord, atom_ca.coord, 120, C_O_DOUBLE_LENGTH) atom_o = struc.Atom(coord_o, atom_name="O", element="O") coord_h = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_ca.coord, -120, N_H_LENGTH) atom_h = struc.Atom(coord_h, atom_name="H", element="H") backbone = struc.array([atom_n, atom_ca, atom_c, atom_o, atom_h]) backbone.res_id[:] = res_id backbone.res_name[:] = res_name # Add bonds between backbone atoms bonds = struc.BondList(backbone.array_length()) bonds.add_bond(0, 1, struc.BondType.SINGLE) # N-CA bonds.add_bond(1, 2, struc.BondType.SINGLE) # CA-C bonds.add_bond(2, 3, struc.BondType.DOUBLE) # C-O bonds.add_bond(0, 4, struc.BondType.SINGLE) # N-H backbone.bonds = bonds # Get residue from dataset residue = info.residue(res_name) # Superimpose backbone of residue # with backbone created previously _, transformation = struc.superimpose( backbone[struc.filter_backbone(backbone)], residue[struc.filter_backbone(residue)]) residue = struc.superimpose_apply(residue, transformation) # Remove backbone atoms from residue because they are already # existing in the backbone created prevoisly side_chain = residue[~np.isin( residue. atom_name, ["N", "CA", "C", "O", "OXT", "H", "H2", "H3", "HXT"])] # Assemble backbone with side chain (including HA) # and set annotation arrays residue = backbone + side_chain residue.bonds.add_bond( np.where(residue.atom_name == "CA")[0][0], np.where(residue.atom_name == "CB")[0][0], struc.BondType.SINGLE) residue.bonds.add_bond( np.where(residue.atom_name == "CA")[0][0], np.where(residue.atom_name == "HA")[0][0], struc.BondType.SINGLE) residue.chain_id[:] = "A" residue.res_id[:] = res_id residue.res_name[:] = res_name peptide += residue # Connect current residue to existing residues in the chain if res_id > 1: index_prev_ca = np.where((peptide.res_id == res_id - 1) & (peptide.atom_name == "CA"))[0][0] index_prev_c = np.where((peptide.res_id == res_id - 1) & (peptide.atom_name == "C"))[0][0] index_curr_n = np.where((peptide.res_id == res_id) & (peptide.atom_name == "N"))[0][0] index_curr_c = np.where((peptide.res_id == res_id) & (peptide.atom_name == "C"))[0][0] curr_residue_mask = peptide.res_id == res_id # Adjust geometry curr_coord_n = calculate_atom_coord_by_z_rotation( peptide.coord[index_prev_c], peptide.coord[index_prev_ca], connect_angle, C_N_LENGTH) peptide.coord[curr_residue_mask] -= peptide.coord[index_curr_n] peptide.coord[curr_residue_mask] += curr_coord_n # Adjacent residues should show in opposing directions # -> rotate residues with even residue ID by 180 degrees if res_id % 2 == 0: coord_n = peptide.coord[index_curr_n] coord_c = peptide.coord[index_curr_c] peptide.coord[curr_residue_mask] = struc.rotate_about_axis( atoms=peptide.coord[curr_residue_mask], axis=coord_c - coord_n, angle=np.deg2rad(180), support=coord_n) # Add bond between previous C and current N peptide.bonds.add_bond(index_prev_c, index_curr_n, struc.BondType.SINGLE) # Add N-terminal hydrogen atom_n = peptide[(peptide.res_id == 1) & (peptide.atom_name == "N")][0] atom_h = peptide[(peptide.res_id == 1) & (peptide.atom_name == "H")][0] coord_h2 = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_h.coord, -120, N_H_LENGTH) atom_h2 = struc.Atom(coord_h2, chain_id="A", res_id=1, res_name=atom_h.res_name, atom_name="H2", element="H") peptide = struc.array([atom_h2]) + peptide peptide.bonds.add_bond(0, 1, struc.BondType.SINGLE) # H2-N # Add C-terminal hydroxyl group last_id = len(sequence) index_c = np.where((peptide.res_id == last_id) & (peptide.atom_name == "C"))[0][0] index_o = np.where((peptide.res_id == last_id) & (peptide.atom_name == "O"))[0][0] coord_oxt = calculate_atom_coord_by_z_rotation(peptide.coord[index_c], peptide.coord[index_o], connect_angle, C_O_LENGTH) coord_hxt = calculate_atom_coord_by_z_rotation(coord_oxt, peptide.coord[index_c], connect_angle, O_H_LENGTH) atom_oxt = struc.Atom(coord_oxt, chain_id="A", res_id=last_id, res_name=peptide[index_c].res_name, atom_name="OXT", element="O") atom_hxt = struc.Atom(coord_hxt, chain_id="A", res_id=last_id, res_name=peptide[index_c].res_name, atom_name="HXT", element="H") peptide = peptide + struc.array([atom_oxt, atom_hxt]) peptide.bonds.add_bond(index_c, -2, struc.BondType.SINGLE) # C-OXT peptide.bonds.add_bond(-2, -1, struc.BondType.SINGLE) # OXT-HXT return peptide
# A :class:`BondList` is created by passing a :class:`ndarray` # containing pairs of integers, where each integer represents an index # in a corresponding atom array and the pairs indicate which atoms share # a bond. # Addtionally, it is required to specifiy the number of atoms in the # atom array. import biotite.structure as struc array = struc.array([ struc.Atom([0, 0, 0], atom_name="N"), struc.Atom([0, 0, 0], atom_name="CA"), struc.Atom([0, 0, 0], atom_name="C"), struc.Atom([0, 0, 0], atom_name="CB") ]) print("Atoms:", array.atom_name) bond_list = struc.BondList(len(array), np.array([[1, 0], [1, 2], [1, 3]])) print("Bonds (indices):") print(bond_list.as_array()) print("Bonds (atoms names):") print(array.atom_name[bond_list.as_array()[:, :2]]) ca_bonds, ca_bond_types = bond_list.get_bonds(1) print("Bonds of CA:", array.atom_name[ca_bonds]) ######################################################################## # When you look at the internal :class:`ndarray` # (as given by :func:`BondList.as_array()`), you see a third column # containging zeros. # This column describes each bond with values from the :class:`BondType` # enum: *0* correponds to ``BondType.ANY``, which means that the type of # the bond is undefined. # This makes sense, since we did not define the bond types, when we
# containing pairs of integers, where each integer represents an index # in a corresponding atom array. # The pairs indicate which atoms share a bond. # Addtionally, it is required to specifiy the number of atoms in the # atom array. import biotite.structure as struc array = struc.array([ struc.Atom([0, 0, 0], atom_name="N"), struc.Atom([0, 0, 0], atom_name="CA"), struc.Atom([0, 0, 0], atom_name="C"), struc.Atom([0, 0, 0], atom_name="CB") ]) print("Atoms:", array.atom_name) bond_list = struc.BondList(array.array_length(), np.array([[1, 0], [1, 2], [1, 3]])) print("Bonds (indices):") print(bond_list.as_array()) print("Bonds (atoms names):") print(array.atom_name[bond_list.as_array()[:, :2]]) ca_bonds, ca_bond_types = bond_list.get_bonds(1) print("Bonds of CA:", array.atom_name[ca_bonds]) ######################################################################## # When you look at the internal :class:`ndarray` # (as given by :func:`BondList.as_array()`), you see a third column # containging zeros. # This column describes each bond with values from the :class:`BondType` # enum: *0* correponds to ``BondType.ANY``, which means that the type of # the bond is undefined. # This makes sense, since we did not define the bond types, when we
import biotite.structure.io.pdbx as pdbx import biotite.database.rcsb as rcsb import ammolite PNG_SIZE = (800, 800) ######################################################################## assembly = pdbx.get_assembly(pdbx.PDBxFile.read(rcsb.fetch("1XI4", "cif")), model=1) ######################################################################## # Structure contains only CA # Bonds are not required for visulization -> empty bond list assembly.bonds = struc.BondList(assembly.array_length()) ######################################################################## # General configuration ammolite.cmd.bg_color("white") ammolite.cmd.set("cartoon_side_chain_helper", 1) ammolite.cmd.set("cartoon_oval_length", 0.8) ammolite.cmd.set("depth_cue", 0) ammolite.cmd.set("valence", 0) ######################################################################## pymol_obj = ammolite.PyMOLObject.from_structure(assembly) pymol_obj.show_as("spheres")