Ejemplo n.º 1
0
def test_dihedral_backbone_result(file_name):
    import mdtraj

    mmtf_file = mmtf.MMTFFile.read(file_name)
    array = mmtf.get_structure(mmtf_file, model=1)
    array = array[struc.filter_amino_acids(array)]
    if array.array_length() == 0:
        # Structure contains no protein
        # -> determination of backbone angles makes no sense
        return

    for chain in struc.chain_iter(array):
        print("Chain: ", chain.chain_id[0])
        if len(struc.check_res_id_continuity(chain)) != 0:
            # Do not test discontinuous chains
            return
        test_phi, test_psi, test_ome = struc.dihedral_backbone(chain)

        temp = NamedTemporaryFile("w+", suffix=".pdb")
        strucio.save_structure(temp.name, chain)
        traj = mdtraj.load(temp.name)
        temp.close()
        _, ref_phi = mdtraj.compute_phi(traj)
        _, ref_psi = mdtraj.compute_psi(traj)
        _, ref_ome = mdtraj.compute_omega(traj)
        ref_phi, ref_psi, ref_ome = ref_phi[0], ref_psi[0], ref_ome[0]

        assert test_phi[1:] == pytest.approx(ref_phi, abs=1e-5, rel=5e-3)
        assert test_psi[:-1] == pytest.approx(ref_psi, abs=1e-5, rel=5e-3)
        assert test_ome[:-1] == pytest.approx(ref_ome, abs=1e-5, rel=5e-3)
Ejemplo n.º 2
0
def test_get_assembly(single_model):
    """
    Test whether the :func:`get_assembly()` function produces the same
    number of peptide chains as the
    ``_pdbx_struct_assembly.oligomeric_count`` field indicates.
    """
    model = 1 if single_model else None

    path = join(data_dir, "1f2n.cif")
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)

    assembly_category = pdbx_file.get_category("pdbx_struct_assembly",
                                               expect_looped=True)
    # Test each available assembly
    for id, ref_oligomer_count in zip(assembly_category["id"],
                                      assembly_category["oligomeric_count"]):
        assembly = pdbx.get_assembly(pdbx_file, assembly_id=id, model=model)
        protein_assembly = assembly[..., struc.filter_amino_acids(assembly)]
        test_oligomer_count = struc.get_chain_count(protein_assembly)

        if single_model:
            assert isinstance(assembly, struc.AtomArray)
        else:
            assert isinstance(assembly, struc.AtomArrayStack)
        assert test_oligomer_count == int(ref_oligomer_count)
Ejemplo n.º 3
0
def test_hbond_structure(pdb_id):
    file_name = join(data_dir("structure"), pdb_id + ".mmtf")

    array = load_structure(file_name)
    # Only consider amino acids for consistency
    # with bonded hydrogen detection in MDTraj
    array = array[..., struc.filter_amino_acids(array)]
    if isinstance(array, struc.AtomArrayStack):
        # For consistency with MDTraj 'S' cannot be acceptor element
        # https://github.com/mdtraj/mdtraj/blob/master/mdtraj/geometry/hbond.py#L365
        triplets, mask = struc.hbond(array, acceptor_elements=("O", "N"))
    else:
        triplets = struc.hbond(array, acceptor_elements=("O", "N"))

    # Save to new pdb file for consistent treatment of inscode/altloc
    # im MDTraj
    temp = NamedTemporaryFile("w+", suffix=".pdb")
    save_structure(temp.name, array)

    # Compare with MDTraj
    import mdtraj
    traj = mdtraj.load(temp.name)
    temp.close()
    triplets_ref = mdtraj.baker_hubbard(traj, freq=0, periodic=False)

    # Both packages may use different order
    # -> use set for comparison
    triplets_set = set([tuple(tri) for tri in triplets])
    triplets_ref_set = set([tuple(tri) for tri in triplets_ref])
    assert triplets_set == triplets_ref_set
Ejemplo n.º 4
0
def test_dihedral_backbone_result(file_name):
    import mdtraj

    mmtf_file = mmtf.MMTFFile()
    mmtf_file.read(file_name)
    array = mmtf.get_structure(mmtf_file, model=1)
    array = array[struc.filter_amino_acids(array)]
    for chain in struc.chain_iter(array):
        print("Chain: ", chain.chain_id[0])
        if len(struc.check_id_continuity(chain)) != 0:
            # Do not test discontinuous chains
            return
        test_phi, test_psi, test_ome = struc.dihedral_backbone(chain)

        temp_file_name = biotite.temp_file("pdb")
        strucio.save_structure(temp_file_name, chain)
        traj = mdtraj.load(temp_file_name)
        _, ref_phi = mdtraj.compute_phi(traj)
        _, ref_psi = mdtraj.compute_psi(traj)
        _, ref_ome = mdtraj.compute_omega(traj)
        ref_phi, ref_psi, ref_ome = ref_phi[0], ref_psi[0], ref_ome[0]

        assert test_phi[1:] == pytest.approx(ref_phi, abs=1e-5, rel=5e-3)
        assert test_psi[:-1] == pytest.approx(ref_psi, abs=1e-5, rel=5e-3)
        assert test_ome[:-1] == pytest.approx(ref_ome, abs=1e-5, rel=5e-3)
Ejemplo n.º 5
0
def psea_sec(file):
    array = mmtf.get_structure(file, model=1)
    tk_dimer = array[struc.filter_amino_acids(array)]
    tk_mono = tk_dimer[tk_dimer.chain_id == "A"]

    sse = struc.annotate_sse(array, chain_id="A")
    return sse
Ejemplo n.º 6
0
def dssp_sec(file):
    array = mmtf.get_structure(file, model=1)
    tk_dimer = array[struc.filter_amino_acids(array)]
    tk_mono = tk_dimer[tk_dimer.chain_id == "A"]

    sse = dssp.DsspApp.annotate_sse(tk_mono)
    sse = np.array([dssp_to_abc[e] for e in sse], dtype="U1")
    return sse
Ejemplo n.º 7
0
def test_outside_location():
    # Test result for location outside any cell
    array = strucio.load_structure(join(data_dir, "3o5r.mmtf"))
    array = array[struc.filter_amino_acids(array)]
    cell_list = struc.CellList(array, cell_size=5)
    outside_coord = np.min(array.coord, axis=0) - 100
    # Expect empty array
    assert len(cell_list.get_atoms(outside_coord, 5)) == 0
Ejemplo n.º 8
0
def test_remove_pbc_selections(multi_model):
    """
    This test makes no assertions, it only test whether an exception
    occurs, when the `selection` parameter is given in `remove_pbc()`.
    """
    array = load_structure(join(data_dir("structure"), "3o5r.mmtf"))
    if multi_model:
        array = struc.stack([array, array])

    struc.remove_pbc(array)
    struc.remove_pbc(array, array.chain_id[0])
    struc.remove_pbc(array, struc.filter_amino_acids(array))
    struc.remove_pbc(
        array, [struc.filter_amino_acids(array), (array.res_name == "FK5")])
    # Expect error when selectinf an atom multiple times
    with pytest.raises(ValueError):
        struc.remove_pbc(
            array,
            [struc.filter_amino_acids(array), (array.atom_name == "CA")])
Ejemplo n.º 9
0
def mmtf_sec(file):
    array = mmtf.get_structure(file, model=1)
    tk_dimer = array[struc.filter_amino_acids(array)]
    tk_mono = tk_dimer[tk_dimer.chain_id == "A"]

    chain_id_per_res = array.chain_id[struc.get_residue_starts(tk_dimer)]
    sse = file["secStructList"]
    sse = sse[sse != -1]
    sse = sse[: len(chain_id_per_res)][chain_id_per_res == "A"]
    sse = np.array([sec_struct_codes[code] for code in sse if code != -1], dtype="U1")
    sse = np.array([dssp_to_abc[e] for e in sse], dtype="U1")

    return sse
Ejemplo n.º 10
0
def get_diameter(pdb_id):
    file_name = rcsb.fetch(pdb_id, "mmtf", gettempdir())
    atom_array = strucio.load_structure(file_name)
    # Remove all non-amino acids
    atom_array = atom_array[struc.filter_amino_acids(atom_array)]
    coord = atom_array.coord
    # Calculate all pairwise difference vectors
    diff = coord[:, np.newaxis, :] - coord[np.newaxis, :, :]
    # Calculate absolute of difference vectors -> square distances
    sq_dist = np.sum(diff*diff, axis=-1)
    # Maximum distance is diameter
    diameter = np.sqrt(np.max(sq_dist))
    return diameter
Ejemplo n.º 11
0
def test_protOr_radii():
    """
    Assert that ProtOr VdW radii (except hydrogen) can be calculated for
    all atoms in the given structure, since the structure (1GYA)
    does only contain standard amino acids after the removal of
    glycosylation.
    This means, that none of the resulting radii should be the None.
    """
    array = load_structure(join(data_dir("structure"), "1gya.mmtf"))
    array = array[..., array.element != "H"]
    array = array[..., struc.filter_amino_acids(array)]
    for res_name, atom_name in zip(array.res_name, array.atom_name):
        radius = strucinfo.vdw_radius_protor(res_name, atom_name)
        assert isinstance(radius, float)
        assert radius != None
Ejemplo n.º 12
0
def Diameter(TheFile):
    '''
	Find the diameter of a protein's structure approximately, requires less
	memory thus good for big structures
	'''
    # Get atom coordinates
    atom_array = strucio.load_structure(TheFile)
    # Remove all non-amino acids atoms
    atom_array = atom_array[struc.filter_amino_acids(atom_array)]
    # Coordinates as a NumPy array
    coord = atom_array.coord
    # Find geometric center
    center = np.mean(coord, axis=0)
    # Find largest distance from center -> diameter
    diameter = 2 * np.sqrt(np.sum((coord - center)**2, axis=-1)).max()
    return (round(diameter, 3))
Ejemplo n.º 13
0
def analyze_chirality(array):
    # Filter backbone + CB
    array = array[struc.filter_amino_acids(array)]
    array = array[(array.atom_name == "CB") | (struc.filter_backbone(array))]
    # Iterate over each residue
    ids, names = struc.get_residues(array)
    enantiomers = np.zeros(len(ids), dtype=int)
    for i, id in enumerate(ids):
        coord = array.coord[array.res_id == id]
        if len(coord) != 4:
            # Glyine -> no chirality
            enantiomers[i] = 0
        else:
            enantiomers[i] = get_enantiomer(coord[0], coord[1], coord[2],
                                            coord[3])
    return enantiomers
Ejemplo n.º 14
0
def test_adjacency_matrix(cell_size, threshold):
    array = strucio.load_structure(join(data_dir, "3o5r.mmtf"))
    array = array[struc.filter_amino_acids(array)]
    cell_list = struc.CellList(array, cell_size=cell_size)
    matrix = cell_list.create_adjacency_matrix(threshold)
    coord = array.coord
    # Create distance matrix
    diff = coord[:, np.newaxis, :] - coord[np.newaxis, :, :]
    # Convert to float64 to avoid errorenous warning
    # https://github.com/ContinuumIO/anaconda-issues/issues/9129
    diff = diff.astype(np.float64)
    distance = np.sqrt(np.sum(diff**2, axis=-1))
    # Create adjacency matrix from distance matrix
    expected_matrix = (distance <= threshold)
    # Both ways to create an adjacency matrix
    # should give the same result
    assert matrix.tolist() == expected_matrix.tolist()
Ejemplo n.º 15
0
def test_get_assembly(model):
    """
    Test whether the :func:`get_assembly()` function produces the same
    number of peptide chains as the
    ``_pdbx_struct_assembly.oligomeric_count`` field indicates.
    Furthermore, check if the number of atoms in the entire assembly
    is a multiple of the numbers of atoms in a monomer.
    """

    path = join(data_dir("structure"), "1f2n.cif")
    pdbx_file = pdbx.PDBxFile.read(path)

    assembly_category = pdbx_file.get_category("pdbx_struct_assembly",
                                               expect_looped=True)
    # Test each available assembly
    for id, ref_oligomer_count in zip(assembly_category["id"],
                                      assembly_category["oligomeric_count"]):
        print("Assembly ID:", id)
        try:
            assembly = pdbx.get_assembly(pdbx_file,
                                         assembly_id=id,
                                         model=model)
        except biotite.InvalidFileError:
            if model is None:
                # The file cannot be parsed into an AtomArrayStack,
                # as the models contain different numbers of atoms
                # -> skip this test case
                return
            else:
                raise
        protein_assembly = assembly[..., struc.filter_amino_acids(assembly)]
        test_oligomer_count = struc.get_chain_count(protein_assembly)

        if model is None:
            assert isinstance(assembly, struc.AtomArrayStack)
        else:
            assert isinstance(assembly, struc.AtomArray)
        assert test_oligomer_count == int(ref_oligomer_count)

        # The atom count of the entire assembly should be a multiple
        # a monomer,
        monomer_atom_count = pdbx.get_structure(pdbx_file).array_length()
        assert assembly.array_length() % monomer_atom_count == 0
Ejemplo n.º 16
0
def interacting_pairs(structure_path: str,
                      distance_threshold: float,
                      atom_name: str = 'CA',
                      positions: t.Optional[t.Iterable[int]] = None):
    """
    Finds residues in structure within distance threshold.
    :param structure_path: path to a structure file
    :param distance_threshold: min distance between elements (non-inclusive)
    :param atom_name: filter atoms to this names (CA, CB, and so on)
    :param positions: filter positions to the ones in this list
    :return: numpy array with shape (N, 2) where N is a number of interacting pairs
    """
    st = io.load_structure(structure_path)
    ca = st[(st.atom_name == atom_name) & bst.filter_amino_acids(st)]
    if positions is not None:
        ca = ca[np.isin(ca.res_id, list(positions))]
    pairs = np.array(list(combinations(np.unique(ca.res_id), 2)))
    pairs_idx = np.array(list(combinations(np.arange(len(ca)), 2)))
    dist = bst.index_distance(ca, pairs_idx)
    return pairs[dist < distance_threshold]
Ejemplo n.º 17
0
def test_coarse_grained(pdb_id):
    # Multi atom SASA (ProtOr), compare with single atom SASA
    # on residue level
    file = mmtf.MMTFFile.read(join(data_dir("structure"), pdb_id + ".mmtf"))
    array = mmtf.get_structure(file, model=1)
    array = array[struc.filter_amino_acids(array)]
    sasa = struc.apply_residue_wise(array, struc.sasa(array,
                                                      vdw_radii="ProtOr"),
                                    np.nansum)
    sasa_exp = struc.apply_residue_wise(array,
                                        struc.sasa(array, vdw_radii="Single"),
                                        np.nansum)

    # Assert that more than 90% of atoms
    # have less than 10% SASA difference
    assert np.count_nonzero(np.isclose(sasa, sasa_exp, rtol=1e-1,
                                       atol=1)) / len(sasa) > 0.9
    # Assert that more than 98% of atoms
    # have less than 40% SASA difference
    assert np.count_nonzero(np.isclose(sasa, sasa_exp, rtol=4e-1,
                                       atol=1)) / len(sasa) > 0.98
Ejemplo n.º 18
0
def DiameterA(TheFile):
    '''
	Find the diameter of a protein's structure accuratly, requires lots
	of memory and crashes for big structures. Here we broadcast the array
	against itself, calculating all pairwise distances between points.
	This is a bad idea, because we have N*(N-1) = (1e6)**2 = 1 trillion
	pairs! This will raise a MemoryError for N=1 million, as it requires
	half a million gigabytes!!
	'''
    # Get atom coordinates
    atom_array = strucio.load_structure(TheFile)
    # Remove all non-amino acids atoms
    atom_array = atom_array[struc.filter_amino_acids(atom_array)]
    # Coordinates as a NumPy array
    coord = atom_array.coord
    # Calculate all pairwise difference vectors
    diff = coord[:, np.newaxis, :] - coord[np.newaxis, :, :]
    # Calculate absolute of difference vectors -> square distances
    sq_dist = np.sum(diff * diff, axis=-1)
    # Get maximum distance
    maxdist = np.max(sq_dist)
    # Maximum distance is diameter
    diameter = np.sqrt(np.max(sq_dist))
    return (round(diameter, 3))
Ejemplo n.º 19
0
def build_patterns(structfam, folder):
    patterns = []
    for pdb, c, start, end in tqdm(structfam):
        file_name = rcsb.fetch(pdb, "mmtf", biotite.temp_dir())
        mmtf_file = mmtf.MMTFFile()
        mmtf_file.read(file_name)

        array = mmtf.get_structure(mmtf_file, model=1)
        tk_dimer = array[struc.filter_amino_acids(array)]

        # The chain ID corresponding to each residue
        chain_id_per_res = array.chain_id[struc.get_residue_starts(tk_dimer)]

        sse = mmtf_file["secStructList"]
        sse = sse[:chain_id_per_res.shape[0]][chain_id_per_res == c]
        sse = np.array(sse[start:end + 1])
        sse = np.array([sec_struct_codes[code % 8] for code in sse],
                       dtype="U1")

        sse8 = to_onehot([dssp_codes[x] for x in sse], (None, 8))
        dss8 = (sse8[1:] - sse8[:-1])
        cls = to_onehot(np.where(dss8 == -1)[1], (None, 8)).T
        bbox = np.array(
            [np.where(dss8 == 1)[0],
             np.where(dss8 == -1)[0], *cls]).T
        pat8 = np.argmax(bbox[:, 2:], 1)

        sse3 = to_onehot([abc_codes[dssp_to_abc[x]] for x in sse], (None, 3))
        dss3 = (sse3[1:] - sse3[:-1])
        cls = to_onehot(np.where(dss3 == -1)[1], (None, 3)).T
        bbox = np.array(
            [np.where(dss3 == 1)[0],
             np.where(dss3 == -1)[0], *cls]).T
        pat3 = np.argmax(bbox[:, 2:], 1)
        patterns.append((pat3, pat8))
    if len(patterns) == 0:
        print("No pattern find")
        return None, None, None, None
    c_patterns3, n_patterns3, c_patterns8, n_patterns8, weights = [], [], [], [], []
    for pat3, pat8 in patterns:
        char_pat8 = "".join([sec_struct_codes[x] for x in pat8])
        char_pat3 = "".join(["abc"[x] for x in pat3])
        c_patterns8.append(char_pat8)
        n_patterns8.append(list(pat8))
        c_patterns3.append(char_pat3)
        n_patterns3.append(list(pat3))
    occ_sum8 = dict()
    occ_sum3 = dict()

    correspondings8 = dict()
    correspondings3 = dict()
    for c8, n8, c3, n3 in zip(c_patterns8, n_patterns8, c_patterns3,
                              n_patterns3):
        if len(c3) == 0:
            continue
        if c3[0] != "c":
            c3 = "c" + c3
            n3 = [2] + n3
        if c3[-1] != "c":
            c3 = c3 + "c"
            n3 = n3 + [2]
        if c8[0] != "C":
            c8 = "C" + c8
            n8 = [7] + n8
        if c8[-1] != "C":
            c8 = c8 + "C"
            n8 = n8 + [7]
        if c8 not in occ_sum8.keys():
            occ_sum8[c8] = 0
            correspondings8[c8] = c8, n8
        occ_sum8[c8] += 1
        if c3 not in occ_sum3.keys():
            occ_sum3[c3] = 0
            correspondings3[c3] = c3, n3
        occ_sum3[c3] += 1

    c_pattern8, n_pattern8 = correspondings8[max(occ_sum8, key=occ_sum8.get)]
    c_pattern3, n_pattern3 = correspondings3[max(occ_sum3, key=occ_sum3.get)]

    push(f"{folder}/data.pt", "pattern",
         (c_pattern3, n_pattern3, c_pattern8, n_pattern8))

    return c_pattern3, n_pattern3, c_pattern8, n_pattern8, occ_sum3, occ_sum8
Ejemplo n.º 20
0
from matplotlib.lines import Line2D
from matplotlib import colors
import matplotlib as mpl
import biotite.structure as struc
import biotite.structure.io as strucio
import biotite.structure.io.xtc as xtc
from biotite.application.dssp import DsspApp

# Put here the path of the downloaded files
templ_file_path = "../../download/lysozyme_md.pdb"
traj_file_path = "../../download/lysozyme_md.xtc"

xtc_file = xtc.XTCFile.read(traj_file_path)
traj = xtc_file.get_structure(template=strucio.load_structure(templ_file_path))
time = xtc_file.get_time()
traj = traj[:, struc.filter_amino_acids(traj)]

# DSSP does not assign an SSE to the last residue -> -1
sse = np.empty((traj.shape[0], struc.get_residue_count(traj) - 1), dtype='U1')
for idx, frame in enumerate(traj):
    app = DsspApp(traj[idx])
    app.start()
    app.join()
    sse[idx] = app.get_sse()


# Matplotlib needs numbers to assign colors correctly
def sse_to_num(sse):
    num = np.empty(sse.shape, dtype=int)
    num[sse == 'C'] = 0
    num[sse == 'E'] = 1
Ejemplo n.º 21
0
def test_docking(flexible):
    """
    Test :class:`VinaApp` for the case of docking biotin to
    streptavidin.
    The output binding pose should be very similar to the pose in the
    PDB structure.
    """
    # A structure of a straptavidin-biotin complex
    mmtf_file = mmtf.MMTFFile.read(join(data_dir("application"), "2rtg.mmtf"))
    structure = mmtf.get_structure(mmtf_file,
                                   model=1,
                                   extra_fields=["charge"],
                                   include_bonds=True)
    structure = structure[structure.chain_id == "B"]
    receptor = structure[struc.filter_amino_acids(structure)]
    ref_ligand = structure[structure.res_name == "BTN"]
    ref_ligand_coord = ref_ligand.coord

    ligand = info.residue("BTN")
    # Remove hydrogen atom that is missing in ref_ligand
    ligand = ligand[ligand.atom_name != "HO2"]

    if flexible:
        # Two residues within the binding pocket: ASN23, SER88
        flexible_mask = np.isin(receptor.res_id, (23, 88))
    else:
        flexible_mask = None

    app = VinaApp(ligand,
                  receptor,
                  struc.centroid(ref_ligand), [20, 20, 20],
                  flexible=flexible_mask)
    app.set_seed(0)
    app.start()
    app.join()

    test_ligand_coord = app.get_ligand_coord()
    test_receptor_coord = app.get_receptor_coord()
    energies = app.get_energies()
    # One energy value per model
    assert len(test_ligand_coord) == len(energies)
    assert len(test_receptor_coord) == len(energies)

    assert np.all(energies < 0)

    # Select best binding pose
    test_ligand_coord = test_ligand_coord[0]
    not_nan_mask = ~np.isnan(test_ligand_coord).any(axis=-1)
    ref_ligand_coord = ref_ligand_coord[not_nan_mask]
    test_ligand_coord = test_ligand_coord[not_nan_mask]
    # Check if it least one atom is preserved
    assert test_ligand_coord.shape[1] > 0
    rmsd = struc.rmsd(ref_ligand_coord, test_ligand_coord)
    # The deviation of the best pose from the real conformation
    # should be less than 1 Å
    assert rmsd < 1.0

    if flexible:
        # Select best binding pose
        test_receptor_coord = test_receptor_coord[0]
        not_nan_mask = ~np.isnan(test_receptor_coord).any(axis=-1)
        ref_receptor_coord = receptor[not_nan_mask]
        test_receptor_coord = test_receptor_coord[not_nan_mask]
        # Check if it least one atom is preserved
        assert test_receptor_coord.shape[1] > 0
        # The flexible residues should have a maximum deviation of 1 Å
        # from the original conformation
        assert np.max(struc.distance(test_receptor_coord,
                                     ref_receptor_coord)) < 1.0
    else:
        ref_receptor_coord = receptor.coord
        for model_coord in test_receptor_coord:
            assert np.array_equal(model_coord, ref_receptor_coord)
Ejemplo n.º 22
0
# Fetch animal lysoyzme structures
lyso_files = rcsb.fetch(["1REX", "1AKI", "1DKJ", "1GD6"],
                        format="mmtf",
                        target_path=biotite.temp_dir())
organisms = ["H. sapiens", "G. gallus", "C. viginianus", "B. mori"]

# Create a PB sequence from each structure
pb_seqs = []
for file_name in lyso_files:
    file = mmtf.MMTFFile()
    file.read(file_name)
    # Take only the first model into account
    array = mmtf.get_structure(file, model=1)
    # Remove everything but the first protein chain
    array = array[struc.filter_amino_acids(array)]
    array = array[array.chain_id == array.chain_id[0]]

    # Calculate backbone dihedral angles,
    # as the PBs are determined from them
    phi, psi, omega = struc.dihedral_backbone(array)
    # A PB requires the 8 phi/psi angles of 5 amino acids,
    # centered on the amino acid to calculate the PB for
    # Hence, the PBs are not defined for the two amino acids
    # at each terminus
    pb_angles = np.full((len(phi) - 4, 8), np.nan)
    pb_angles[:, 0] = psi[:-4]
    pb_angles[:, 1] = phi[1:-3]
    pb_angles[:, 2] = psi[1:-3]
    pb_angles[:, 3] = phi[2:-2]
    pb_angles[:, 4] = psi[2:-2]
Ejemplo n.º 23
0
def test_amino_acid_filter(sample_array):
    assert len(sample_array[struc.filter_amino_acids(sample_array)]) == 982
Ejemplo n.º 24
0
# The normal mode to be visualized
# '-1' is the last (and most significant) one
MODE = -1
# The amount of frames (models) per oscillation
FRAMES = 60
# The maximum oscillation amplitude for an atom
# (The length of the ANM's eigenvectors make only sense when compared
# relative to each other, the absolute values have no significance)
MAX_AMPLITUDE = 5

# Load structure
mmtf_file = mmtf.MMTFFile.read(rcsb.fetch(PDB_ID, "mmtf"))
structure = mmtf.get_structure(mmtf_file, model=1)

# Filter first peptide chain
protein_chain = structure[struc.filter_amino_acids(structure)
                          & (structure.chain_id == structure.chain_id[0])]
# Filter CA atoms
ca = protein_chain[protein_chain.atom_name == "CA"]

# Load eigenvectors for CA atoms
# The first axis indicates the mode,
# the second axis indicates the vector component
vectors = np.loadtxt(VECTOR_FILE, delimiter=",").transpose()
# Discard the last 6 modes, as these are movements of the entire system:
# A system with N atoms has only 3N - 6 degrees of freedom
#                                   ^^^
vectors = vectors[:-6]
# Extract vectors for given mode and reshape to (n,3) array
mode_vectors = vectors[MODE].reshape((-1, 3))
# Rescale, so that the largest vector has the length 'MAX_AMPLITUDE'
Ejemplo n.º 25
0
import biotite.sequence as seq
import biotite.sequence.io.fasta as fasta
import biotite.sequence.align as align
import biotite.sequence.graphics as graphics
import biotite.application.blast as blast
import biotite.application.clustalo as clustalo
import biotite.database.rcsb as rcsb
import biotite.database.entrez as entrez

# Get structure and sequence
pdbx_file = pdbx.PDBxFile.read(rcsb.fetch("1GUU", "mmcif"))
sequence = pdbx.get_sequence(pdbx_file)[0]
# 'use_author_fields' is set to false,
# to ensure that values in the 'res_id' annotation point to the sequence
structure = pdbx.get_structure(pdbx_file, model=1, use_author_fields=False)
structure = structure[struc.filter_amino_acids(structure)]

# Identity threshold for a sequence to be counted as homologous sequence
IDENTITY_THESHOLD = 0.4
# Find homologous proteins in SwissProt via BLAST
app = blast.BlastWebApp("blastp", sequence, database="swissprot")
app.start()
app.join()
alignments = app.get_alignments()
hit_seqs = [sequence]
hit_ids = ["Query"]
hit_starts = [1]
for ali in alignments:
    identity = align.get_sequence_identity(ali)
    # Do not include the exact same sequence -> identity < 1.0
    if identity > IDENTITY_THESHOLD and identity < 1.0:
Ejemplo n.º 26
0
import biotite.database.rcsb as rcsb
import biotite.application.autodock as autodock

# Get the receptor structure
# and the original 'correct' conformation of the ligand
mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("2RTG", "mmtf"))
structure = mmtf.get_structure(
    # Include formal charge for accurate partial charge calculation
    mmtf_file,
    model=1,
    include_bonds=True,
    extra_fields=["charge"])
# The asymmetric unit describes a streptavidin homodimer
# However, we are only interested in a single monomer
structure = structure[structure.chain_id == "B"]
receptor = structure[struc.filter_amino_acids(structure)]

ref_ligand = structure[structure.res_name == "BTN"]
ref_ligand_center = struc.centroid(ref_ligand)

# Independently, get the ligand without optimized conformation
# from the chemical components dictionary
ligand = info.residue("BTN")

# Search for a binding mode in a 20 Å radius
# of the original ligand position
app = autodock.VinaApp(ligand, receptor, ref_ligand_center, [20, 20, 20])
# For reproducibility
app.set_seed(0)
# This is the maximum number:
# Vina may find less interesting binding modes
Ejemplo n.º 27
0
import biotite.database.rcsb as rcsb
import ammolite

PNG_SIZE = (800, 800)
BACKBONE_ATOMS = ["N", "C", "O", "HA"]

########################################################################

mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("2RTG", "mmtf"))
structure = mmtf.get_structure(mmtf_file, model=1, include_bonds=True)
# Select one monomer of the dimer
structure = structure[structure.chain_id == structure.chain_id[0]]
# Remove water and ions
structure = structure[~struc.filter_solvent(structure)
                      & ~struc.filter_monoatomic_ions(structure)]
strep_mask = struc.filter_amino_acids(structure)
biotin_mask = (structure.res_name == "BTN")

pymol_obj = ammolite.PyMOLObject.from_structure(structure)
pymol_obj.cartoon("loop", strep_mask)
pymol_obj.set("cartoon_transparency", 0.5)
pymol_obj.color("salmon", strep_mask & (structure.element == "C"))
pymol_obj.color("forest", biotin_mask & (structure.element == "C"))
pymol_obj.zoom(biotin_mask, buffer=5.0)
ammolite.show(PNG_SIZE)

########################################################################

bonds = struc.hbond(structure, strep_mask, biotin_mask)

res_ids = []
Ejemplo n.º 28
0
def rmsf_plot(topology,
              xtc_traj,
              start_frame=None,
              stop_frame=None,
              write_dat_files=None):
    # Gromacs does not set the element symbol in its PDB files,
    # but Biotite guesses the element names from the atom names,
    # emitting a warning
    template = strucio.load_structure(topology)

    # The structure still has water and ions, that are not needed for our
    # calculations, we are only interested in the protein itself
    # These are removed for the sake of computational speed using a boolean
    # mask
    protein_mask = struc.filter_amino_acids(template)
    template = template[protein_mask]
    residue_names = struc.get_residues(template)[1]

    xtc_file = XTCFile()
    xtc_file.read(xtc_traj,
                  atom_i=np.where(protein_mask)[0],
                  start=start_frame,
                  stop=stop_frame + 1)

    trajectory = xtc_file.get_structure(template)

    time = xtc_file.get_time()  # Get simulation time for plotting purposes

    trajectory = struc.remove_pbc(trajectory)
    trajectory, transform = struc.superimpose(trajectory[0], trajectory)
    rmsd = struc.rmsd(trajectory[0], trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    ax.plot(time, rmsd, color=biotite.colors["dimorange"])
    ax.set_xlim(time[0], time[-1])
    ax.set_ylim(0, 2)
    ax.set_xlabel("Time (ps)")
    ax.set_ylabel("RMSD (Å)")
    figure.tight_layout()

    radius = struc.gyration_radius(trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    ax.plot(time, radius, color=biotite.colors["dimorange"])
    ax.set_xlim(time[0], time[-1])
    ax.set_ylim(14.0, 14.5)
    ax.set_xlabel("Time (ps)")
    ax.set_ylabel("Radius of gyration (Å)")
    figure.tight_layout()

    # In all models, mask the CA atoms
    ca_trajectory = trajectory[:, trajectory.atom_name == "CA"]
    rmsf = struc.rmsf(struc.average(ca_trajectory), ca_trajectory)

    figure = plt.figure(figsize=(6, 3))
    ax = figure.add_subplot(111)
    res_count = struc.get_residue_count(trajectory)
    ax.plot(np.arange(1, res_count + 1),
            rmsf,
            color=biotite.colors["dimorange"])
    ax.set_xlim(1, res_count)
    ax.set_ylim(0, 1.5)
    ax.set_xlabel("Residue")
    ax.set_ylabel("RMSF (Å)")
    figure.tight_layout()

    if write_dat_files == True:
        # Write RMSD *.dat file
        frames = np.array(range(start_frame - 1, stop_frame), dtype=int)
        frames[0] = 0
        df = pd.DataFrame(data=rmsd, index=frames, columns=["RMSD Values"])
        df.index.name = 'Frames'
        df.to_csv('rmsd.dat', header=True, index=True, sep='\t', mode='w')

        # Write RMSF *.dat file
        df1 = pd.DataFrame(data=rmsf,
                           index=residue_names,
                           columns=["RMSF Values"])
        df1.index.name = 'Residues'
        df1.to_csv('rmsf.dat', header=True, index=True, sep='\t', mode='w')
    plt.show()
Ejemplo n.º 29
0
template_model = strucio.load_structure(templ_file_path)
#templ_file_path = "output/npt.gro"
#traj_file_path  = "output/kinase_dimer_md_center.xtc"
#traj_file_path  = "output/kinase_dimer_nowater_fit.xtc"
#traj_file_path  = "output/kinase_dimer_md.xtc"
traj_file_path = "output/dimer_nopbc_cluster_fit.xtc"

# Gromacs does not set the element symbol in its PDB files,
# but Biotite guesses the element names from the atom names,
# emitting a warning
protein = strucio.load_structure(templ_file_path)
# The structure still has water and ions, that are not needed for our
# calculations, we are only interested in the protein itself
# These are removed for the sake of computational speed using a boolean
# mask
protein_mask = struc.filter_amino_acids(protein)
template = protein[protein_mask]
# We could have loaded the trajectory also with
# 'strucio.load_structure()', but in this case we only want to load
# those coordinates that belong to the already selected atoms of the
# template structure.
# Hence, we use the 'XTCFile' class directly to load the trajectory
# This gives us the additional option that allows us to select the
# coordinates belonging to the amino acids.

print(" .. loading trajectory ...")
xtc_file = xtc.XTCFile()
#xtc_file.read(traj_file_path, 0, 10, atom_i=np.where(protein_mask)[0])
xtc_file.read(traj_file_path, atom_i=np.where(protein_mask)[0])
#xtc_file.read(traj_file_path)
Ejemplo n.º 30
0
    "S": "c",
    "H": "a",
    "E": "b",
    "G": "c",
    "B": "b",
    "T": "c",
    "C": "c"
}

# Fetch and load structure
file_name = rcsb.fetch("1QGD", "mmtf", biotite.temp_dir())
mmtf_file = mmtf.MMTFFile()
mmtf_file.read(file_name)
array = mmtf.get_structure(mmtf_file, model=1)
# Transketolase homodimer
tk_dimer = array[struc.filter_amino_acids(array)]
# Transketolase monomer
tk_mono = tk_dimer[tk_dimer.chain_id == "A"]

# The chain ID corresponding to each residue
chain_id_per_res = array.chain_id[struc.get_residue_starts(tk_dimer)]
sse = mmtf_file["secStructList"]
sse = sse[sse != -1]
sse = sse[chain_id_per_res == "A"]
sse = np.array([sec_struct_codes[code] for code in sse if code != -1],
               dtype="U1")
sse = np.array([dssp_to_abc[e] for e in sse], dtype="U1")


# Helper function to convert secondary structure array to annotation
# and visualize it