예제 #1
0
def test_get_atoms(cell_size):
    """
    Test the correct functionality of a cell list on a simple test case
    with known solutions.
    """
    array = struc.AtomArray(length=5)
    array.coord = np.array([[0,0,i] for i in range(5)])
    cell_list = struc.CellList(array, cell_size=cell_size)
    assert cell_list.get_atoms(np.array([0,0,0.1]), 1).tolist() == [0,1]
    assert cell_list.get_atoms(np.array([0,0,1.1]), 1).tolist() == [1,2]
    assert cell_list.get_atoms(np.array([0,0,1.1]), 2).tolist() == [0,1,2,3]
    # Multiple positions
    pos = np.array([[0,0,0.1],
                    [0,0,1.1],
                    [0,0,4.1]])
    expected_indices = [0, 1, 2,
                        0, 1, 2, 3,
                        3, 4]
    indices = cell_list.get_atoms(pos, 2)
    assert indices[indices != -1].tolist() == expected_indices
    # Multiple positions and multiple radii
    pos = np.array([[0,0,0.1],
                    [0,0,1.1],
                    [0,0,4.1]])
    rad = np.array([1.0, 2.0, 3.0])
    expected_indices = [0, 1,
                        0, 1, 2, 3,
                        2, 3, 4]
    indices = cell_list.get_atoms(pos, rad)
    assert indices[indices != -1].tolist() == expected_indices
예제 #2
0
def array():
    """
    Create an :class:`AtomArray` containing a lot of different
    molecules.
    The atoms that belong to a single molecule are not adjacent in the
    :class:`AtomArray`, but a are shuffled in random positions of the
    :class:`AtomArray`.
    """
    MOL_NAMES = [
        "ARG",  # Molecule with multiple branches
        "TRP",  # Molecule with a cycle
        "GLC",  # Molecule with a cycle
        "NA",  # A single atom
        "ATP"  # Larger molecule
    ]
    N_MOLECULES = 20

    np.random.seed(0)

    atom_array = struc.AtomArray(0)
    for i, mol_name in enumerate(np.random.choice(MOL_NAMES, N_MOLECULES)):
        molecule = info.residue(mol_name)
        molecule.res_id[:] = i + 1
        atom_array += molecule

    reordered_indices = np.random.choice(np.arange(atom_array.array_length()),
                                         atom_array.array_length(),
                                         replace=False)
    atom_array = atom_array[reordered_indices]

    return atom_array
예제 #3
0
def test_standardize_order(multi_model, seed):
    original = load_structure(join(data_dir("structure"), "1l2y.mmtf"))
    if not multi_model:
        original = original[0]
    # The box is not preserved when concatenating atom arrays later
    # This would complicate the atom array equality later
    original.box = None

    # Randomly reorder the atoms in each residue
    np.random.seed(seed)
    if multi_model:
        reordered = struc.AtomArrayStack(original.stack_depth(), 0)
    else:
        reordered = struc.AtomArray(0)
    for residue in struc.residue_iter(original):
        bound = residue.array_length()
        indices = np.random.choice(np.arange(bound), bound, replace=False)
        reordered += residue[..., indices]

    # Restore the original PDB standard order
    restored = reordered[..., strucinfo.standardize_order(reordered)]

    assert restored.shape == original.shape
    assert restored[..., restored.element != "H"] \
        == original[..., original.element != "H"]
예제 #4
0
def test_id_overflow():
    # Create an atom array >= 100k atoms
    length = 100000
    a = struc.AtomArray(length)
    a.coord = np.zeros(a.coord.shape)
    a.chain_id = np.full(length, "A")
    # Create residue IDs over 10000
    a.res_id = np.arange(1, length + 1)
    a.res_name = np.full(length, "GLY")
    a.hetero = np.full(length, False)
    a.atom_name = np.full(length, "CA")
    a.element = np.full(length, "C")

    # Write stack to pdb file and make sure a warning is thrown
    with pytest.warns(UserWarning):
        temp = TemporaryFile("w+")
        pdb_file = pdb.PDBFile()
        pdb_file.set_structure(a)
        pdb_file.write(temp)

    # Assert file can be read properly
    temp.seek(0)
    a2 = pdb.get_structure(pdb.PDBFile.read(temp))
    assert (a2.array_length() == a.array_length())

    # Manually check if the written atom id is correct
    temp.seek(0)
    last_line = temp.readlines()[-1]
    atom_id = int(last_line.split()[1])
    assert (atom_id == 1)

    temp.close()

    # Write stack as hybrid-36 pdb file: no warning should be thrown
    with pytest.warns(None) as record:
        temp = TemporaryFile("w+")
        tmp_pdb_file = pdb.PDBFile()
        tmp_pdb_file.set_structure(a, hybrid36=True)
        tmp_pdb_file.write(temp)
    assert len(record) == 0

    # Manually check if the output is written as correct hybrid-36
    temp.seek(0)
    last_line = temp.readlines()[-1]
    atom_id = last_line.split()[1]
    assert (atom_id == "A0000")
    res_id = last_line.split()[4][1:]
    assert (res_id == "BXG0")

    temp.close()
예제 #5
0
def test_base_pairs_reverse_no_hydrogen(nuc_sample_array, basepairs):
    """
    Remove the hydrogens from the sample structure. Then reverse the 
    order of residues in the atom_array and then test the function 
    base_pairs.
    """
    nuc_sample_array = nuc_sample_array[nuc_sample_array.element != "H"]
    # Reverse sequence of residues in nuc_sample_array
    reversed_nuc_sample_array = struc.AtomArray(0)
    for residue in reversed_iterator(struc.residue_iter(nuc_sample_array)):
        reversed_nuc_sample_array = reversed_nuc_sample_array + residue

    computed_basepairs = base_pairs(reversed_nuc_sample_array)
    check_output(reversed_nuc_sample_array[computed_basepairs].res_id,
                 basepairs)
예제 #6
0
def test_base_pairs_reverse(nuc_sample_array, basepairs, unique_bool):
    """
    Reverse the order of residues in the atom_array and then test the
    function base_pairs.
    """

    # Reverse sequence of residues in nuc_sample_array
    reversed_nuc_sample_array = struc.AtomArray(0)
    for residue in reversed_iterator(struc.residue_iter(nuc_sample_array)):
        reversed_nuc_sample_array = reversed_nuc_sample_array + residue

    computed_basepairs = base_pairs(reversed_nuc_sample_array,
                                    unique=unique_bool)
    check_output(reversed_nuc_sample_array[computed_basepairs].res_id,
                 basepairs)
예제 #7
0
def test_base_pairs_reordered(nuc_sample_array, seed):
    """
    Test the function base_pairs with structure where the atoms are not
    in the RCSB-Order.
    """
    # Randomly reorder the atoms in each residue
    nuc_sample_array_reordered = struc.AtomArray(0)
    np.random.seed(seed)

    for residue in struc.residue_iter(nuc_sample_array):
        bound = residue.array_length()
        indices = np.random.choice(np.arange(bound), bound, replace=False)
        nuc_sample_array_reordered += residue[..., indices]

    assert (np.all(
        struc.base_pairs(nuc_sample_array) == struc.base_pairs(
            nuc_sample_array_reordered)))
예제 #8
0
def convert_to_atom_array(chempy_model, include_bonds=False):
    """
    Convert a :class:`chempy.models.Indexed`
    object into an :class:`AtomArray`.

    The returned :class:`AtomArray` contains the optional annotation
    categories ``b_factor``, ``occupancy``, ``charge`` and
    ``altloc_id``.
    No *altloc* ID filtering is performed.

    Parameters
    ----------
    chempy_model : Indexed
        The ``chempy`` model.
    include_bonds : bool, optional
        If set to true, an associated :class:`BondList` will be created
        for the returned atom array.
    
    Returns
    -------
    atom_array : AtomArray
        The converted structure.
    """
    atoms = chempy_model.atom

    bonds = chempy_model.bond

    atom_array = struc.AtomArray(len(atoms))

    # Add annotation arrays
    atom_array.chain_id = np.array([a.chain for a in atoms], dtype="U3")
    atom_array.res_id = np.array([a.resi_number for a in atoms], dtype=int)
    atom_array.ins_code = np.array([a.ins_code for a in atoms], dtype="U1")
    atom_array.res_name = np.array([a.resn for a in atoms], dtype="U3")
    atom_array.hetero = np.array([a.hetatm for a in atoms], dtype=bool)
    atom_array.atom_name = np.array([a.name for a in atoms], dtype="U6")
    atom_array.element = np.array([a.symbol for a in atoms], dtype="U2")

    atom_array.set_annotation(
        "b_factor",
        np.array([a.b if hasattr(a, "b") else 0 for a in atoms], dtype=float))
    atom_array.set_annotation(
        "occupancy",
        np.array([a.q if hasattr(a, "q") else 1.0 for a in atoms],
                 dtype=float))
    atom_array.set_annotation(
        "charge",
        np.array([
            a.formal_charge if hasattr(a, "formal_charge") else 0
            for a in atoms
        ],
                 dtype=int))
    atom_array.set_annotation(
        "altloc_id",
        np.array([a.alt if hasattr(a, "alt") else "" for a in atoms],
                 dtype="U1"))

    # Set coordinates
    atom_array.coord = np.array([a.coord for a in atoms], dtype=np.float32)

    # Add bonds
    if include_bonds:
        bond_array = np.array([[b.index[0], b.index[1], b.order]
                               for b in bonds],
                              dtype=np.uint32)
        atom_array.bonds = struc.BondList(len(atoms), bond_array)

    return atom_array
예제 #9
0
def create_residue_dict(components_pdbx_file_path, msgpack_file_path):
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(components_pdbx_file_path)
    components = pdbx_file.get_block_names()
    residue_dict = {}

    for i, component in enumerate(components):
        print(f"{component:3}   {int(i/len(components)*100):>3d}%", end="\r")
        try:
            # Some entries use invalid quotation for the component name
            cif_general = pdbx_file.get_category("chem_comp", block=component)
        except ValueError:
            cif_general = None
        cif_atoms = pdbx_file.get_category("chem_comp_atom",
                                           block=component,
                                           expect_looped=True)
        cif_bonds = pdbx_file.get_category("chem_comp_bond",
                                           block=component,
                                           expect_looped=True)
        if cif_atoms is None:
            continue

        array = struc.AtomArray(len(list(cif_atoms.values())[0]))

        array.res_name = cif_atoms["comp_id"]
        array.atom_name = cif_atoms["atom_id"]
        array.element = cif_atoms["type_symbol"]
        array.add_annotation("charge", int)
        array.charge = np.array(
            [int(c) if c != "?" else 0 for c in cif_atoms["charge"]])
        if cif_general is None:
            array.hetero[:] = True
        else:
            array.hetero[:] = True if cif_general["type"] == "NON-POLYMER" \
                              else False

        # For some entries only 'model_Cartn',
        # for some entries only 'pdbx_model_Cartn_ideal' and
        # for some entries none of them is defined
        try:
            array.coord[:, 0] = cif_atoms["pdbx_model_Cartn_x_ideal"]
            array.coord[:, 1] = cif_atoms["pdbx_model_Cartn_y_ideal"]
            array.coord[:, 2] = cif_atoms["pdbx_model_Cartn_z_ideal"]
        except (KeyError, ValueError):
            try:
                array.coord[:, 0] = cif_atoms["model_Cartn_x"]
                array.coord[:, 1] = cif_atoms["model_Cartn_y"]
                array.coord[:, 2] = cif_atoms["model_Cartn_z"]
            except (KeyError, ValueError):
                # If none of them is defined, skip this component
                continue

        bonds = struc.BondList(array.array_length())
        if cif_bonds is not None:
            for atom1, atom2, order, aromatic_flag in zip(
                    cif_bonds["atom_id_1"], cif_bonds["atom_id_2"],
                    cif_bonds["value_order"], cif_bonds["pdbx_aromatic_flag"]):
                atom_i = np.where(array.atom_name == atom1)[0][0]
                atom_j = np.where(array.atom_name == atom2)[0][0]
                bond_type = BOND_ORDERS[order, aromatic_flag]
                bonds.add_bond(atom_i, atom_j, bond_type)
        array.bonds = bonds

        residue_dict[component] = array_to_dict(array)

    with open(msgpack_file_path, "wb") as msgpack_file:
        msgpack.dump(residue_dict, msgpack_file)
예제 #10
0
def assemble_peptide(sequence):
    res_names = [seq.ProteinSequence.convert_letter_1to3(r) for r in sequence]
    peptide = struc.AtomArray(length=0)

    for res_id, res_name, connect_angle in zip(
            np.arange(1,
                      len(res_names) + 1), res_names,
            itertools.cycle([120, -120])):
        # Create backbone
        atom_n = struc.Atom([0.0, 0.0, 0.0], atom_name="N", element="N")

        atom_ca = struc.Atom([0.0, N_CA_LENGTH, 0.0],
                             atom_name="CA",
                             element="C")

        coord_c = calculate_atom_coord_by_z_rotation(atom_ca.coord,
                                                     atom_n.coord, 120,
                                                     CA_C_LENGTH)
        atom_c = struc.Atom(coord_c, atom_name="C", element="C")

        coord_o = calculate_atom_coord_by_z_rotation(atom_c.coord,
                                                     atom_ca.coord, 120,
                                                     C_O_DOUBLE_LENGTH)
        atom_o = struc.Atom(coord_o, atom_name="O", element="O")

        coord_h = calculate_atom_coord_by_z_rotation(atom_n.coord,
                                                     atom_ca.coord, -120,
                                                     N_H_LENGTH)
        atom_h = struc.Atom(coord_h, atom_name="H", element="H")

        backbone = struc.array([atom_n, atom_ca, atom_c, atom_o, atom_h])
        backbone.res_id[:] = res_id
        backbone.res_name[:] = res_name

        # Add bonds between backbone atoms
        bonds = struc.BondList(backbone.array_length())
        bonds.add_bond(0, 1, struc.BondType.SINGLE)  # N-CA
        bonds.add_bond(1, 2, struc.BondType.SINGLE)  # CA-C
        bonds.add_bond(2, 3, struc.BondType.DOUBLE)  # C-O
        bonds.add_bond(0, 4, struc.BondType.SINGLE)  # N-H
        backbone.bonds = bonds

        # Get residue from dataset
        residue = info.residue(res_name)
        # Superimpose backbone of residue
        # with backbone created previously
        _, transformation = struc.superimpose(
            backbone[struc.filter_backbone(backbone)],
            residue[struc.filter_backbone(residue)])
        residue = struc.superimpose_apply(residue, transformation)
        # Remove backbone atoms from residue because they are already
        # existing in the backbone created prevoisly
        side_chain = residue[~np.isin(
            residue.
            atom_name, ["N", "CA", "C", "O", "OXT", "H", "H2", "H3", "HXT"])]

        # Assemble backbone with side chain (including HA)
        # and set annotation arrays
        residue = backbone + side_chain
        residue.bonds.add_bond(
            np.where(residue.atom_name == "CA")[0][0],
            np.where(residue.atom_name == "CB")[0][0], struc.BondType.SINGLE)
        residue.bonds.add_bond(
            np.where(residue.atom_name == "CA")[0][0],
            np.where(residue.atom_name == "HA")[0][0], struc.BondType.SINGLE)
        residue.chain_id[:] = "A"
        residue.res_id[:] = res_id
        residue.res_name[:] = res_name
        peptide += residue

        # Connect current residue to existing residues in the chain
        if res_id > 1:
            index_prev_ca = np.where((peptide.res_id == res_id - 1)
                                     & (peptide.atom_name == "CA"))[0][0]
            index_prev_c = np.where((peptide.res_id == res_id - 1)
                                    & (peptide.atom_name == "C"))[0][0]
            index_curr_n = np.where((peptide.res_id == res_id)
                                    & (peptide.atom_name == "N"))[0][0]
            index_curr_c = np.where((peptide.res_id == res_id)
                                    & (peptide.atom_name == "C"))[0][0]
            curr_residue_mask = peptide.res_id == res_id

            # Adjust geometry
            curr_coord_n = calculate_atom_coord_by_z_rotation(
                peptide.coord[index_prev_c], peptide.coord[index_prev_ca],
                connect_angle, C_N_LENGTH)
            peptide.coord[curr_residue_mask] -= peptide.coord[index_curr_n]
            peptide.coord[curr_residue_mask] += curr_coord_n
            # Adjacent residues should show in opposing directions
            # -> rotate residues with even residue ID by 180 degrees
            if res_id % 2 == 0:
                coord_n = peptide.coord[index_curr_n]
                coord_c = peptide.coord[index_curr_c]
                peptide.coord[curr_residue_mask] = struc.rotate_about_axis(
                    atoms=peptide.coord[curr_residue_mask],
                    axis=coord_c - coord_n,
                    angle=np.deg2rad(180),
                    support=coord_n)

            # Add bond between previous C and current N
            peptide.bonds.add_bond(index_prev_c, index_curr_n,
                                   struc.BondType.SINGLE)

    # Add N-terminal hydrogen
    atom_n = peptide[(peptide.res_id == 1) & (peptide.atom_name == "N")][0]
    atom_h = peptide[(peptide.res_id == 1) & (peptide.atom_name == "H")][0]
    coord_h2 = calculate_atom_coord_by_z_rotation(atom_n.coord, atom_h.coord,
                                                  -120, N_H_LENGTH)
    atom_h2 = struc.Atom(coord_h2,
                         chain_id="A",
                         res_id=1,
                         res_name=atom_h.res_name,
                         atom_name="H2",
                         element="H")
    peptide = struc.array([atom_h2]) + peptide
    peptide.bonds.add_bond(0, 1, struc.BondType.SINGLE)  # H2-N

    # Add C-terminal hydroxyl group
    last_id = len(sequence)
    index_c = np.where((peptide.res_id == last_id)
                       & (peptide.atom_name == "C"))[0][0]
    index_o = np.where((peptide.res_id == last_id)
                       & (peptide.atom_name == "O"))[0][0]
    coord_oxt = calculate_atom_coord_by_z_rotation(peptide.coord[index_c],
                                                   peptide.coord[index_o],
                                                   connect_angle, C_O_LENGTH)
    coord_hxt = calculate_atom_coord_by_z_rotation(coord_oxt,
                                                   peptide.coord[index_c],
                                                   connect_angle, O_H_LENGTH)
    atom_oxt = struc.Atom(coord_oxt,
                          chain_id="A",
                          res_id=last_id,
                          res_name=peptide[index_c].res_name,
                          atom_name="OXT",
                          element="O")
    atom_hxt = struc.Atom(coord_hxt,
                          chain_id="A",
                          res_id=last_id,
                          res_name=peptide[index_c].res_name,
                          atom_name="HXT",
                          element="H")
    peptide = peptide + struc.array([atom_oxt, atom_hxt])
    peptide.bonds.add_bond(index_c, -2, struc.BondType.SINGLE)  # C-OXT
    peptide.bonds.add_bond(-2, -1, struc.BondType.SINGLE)  # OXT-HXT

    return peptide
예제 #11
0
print("Cell:")
print(cell)

########################################################################
# An atom array can have an associated box, which is used in functions,
# that consider periodic boundary conditions.
# Atom array stacks require a *(m,3,3)*-shaped :class:`ndarray`,
# that contains the box vectors for each model.
# The box is accessed via the `box` attribute, which is ``None`` by
# default.
# When loaded from a structure file, the box described in the file is
# automatically used.

import biotite.database.rcsb as rcsb
import biotite.structure.io as strucio
array = struc.AtomArray(length=100)
print(array.box)
array.box = box
print(array.box)
file_path = rcsb.fetch("3o5r", "mmtf", biotite.temp_dir())
array = strucio.load_structure(file_path)
print(array.box)

########################################################################
# When loading a trajectory from an MD simulation, the molecules are
# often fragmented over the periodic boundary.
# While a lot of analysis functions can handle such periodic boundary
# conditions automatically, some require completed molecules.
# In this case you should use :func:`remove_pbc()`.

array = struc.remove_pbc(array)