Exemple #1
0
def test_conversion(path, model):
    pdbx_file = pdbx.PDBxFile.read(path)

    try:
        array1 = pdbx.get_structure(pdbx_file, model=model)
    except biotite.InvalidFileError:
        if model is None:
            # The file cannot be parsed into an AtomArrayStack,
            # as the models contain different numbers of atoms
            # -> skip this test case
            return
        else:
            raise

    pdbx_file = pdbx.PDBxFile()
    pdbx.set_structure(pdbx_file, array1, data_block="test")

    # Remove one optional auth section in label to test fallback to label fields
    atom_cat = pdbx_file.get_category("atom_site", "test")
    atom_cat.pop("auth_atom_id")
    pdbx_file.set_category("atom_site", atom_cat, "test")

    array2 = pdbx.get_structure(pdbx_file, model=model)

    assert array1.array_length() > 0
    if array1.box is not None:
        assert np.allclose(array1.box, array2.box)
    assert array1.bonds == array2.bonds
    for category in array1.get_annotation_categories():
        assert (array1.get_annotation(category).tolist() ==
                array2.get_annotation(category).tolist())
    assert array1.coord.tolist() == array2.coord.tolist()
Exemple #2
0
def test_extra_fields():
    path = join(data_dir("structure"), "1l2y.cif")
    pdbx_file = pdbx.PDBxFile.read(path)
    stack1 = pdbx.get_structure(
        pdbx_file, extra_fields=["atom_id", "b_factor", "occupancy", "charge"])
    pdbx_file = pdbx.PDBxFile()
    pdbx.set_structure(pdbx_file, stack1, data_block="test")
    stack2 = pdbx.get_structure(
        pdbx_file, extra_fields=["atom_id", "b_factor", "occupancy", "charge"])
    assert stack1 == stack2

    path = join(data_dir("structure"), "1l2y.cif")
    pdbx_file = pdbx.PDBxFile.read(path)
    stack1 = pdbx.get_structure(
        pdbx_file, extra_fields=["atom_id", "b_factor", "occupancy", "charge"])

    pdbx_file = pdbx.PDBxFile()
    pdbx.set_structure(pdbx_file, stack1, data_block="test")

    stack2 = pdbx.get_structure(
        pdbx_file, extra_fields=["atom_id", "b_factor", "occupancy", "charge"])

    assert stack1.ins_code.tolist() == stack2.ins_code.tolist()
    assert stack1.atom_id.tolist() == stack2.atom_id.tolist()
    assert stack1.b_factor.tolist() == approx(stack2.b_factor.tolist())
    assert stack1.occupancy.tolist() == approx(stack2.occupancy.tolist())
    assert stack1.charge.tolist() == stack2.charge.tolist()
    assert stack1 == stack2
Exemple #3
0
def test_conversion(path, model):
    pdbx_file = pdbx.PDBxFile.read(path)

    try:
        array1 = pdbx.get_structure(pdbx_file, model=model)
    except biotite.InvalidFileError:
        if model is None:
            # The file cannot be parsed into an AtomArrayStack,
            # as the models contain different numbers of atoms
            # -> skip this test case
            return
        else:
            raise

    pdbx_file = pdbx.PDBxFile()
    pdbx.set_structure(pdbx_file, array1, data_block="test")

    array2 = pdbx.get_structure(pdbx_file, model=model)

    if array1.box is not None:
        assert np.allclose(array1.box, array2.box)
    assert array1.bonds == array2.bonds
    for category in array1.get_annotation_categories():
        assert array1.get_annotation(category).tolist() == \
               array2.get_annotation(category).tolist()
    assert array1.coord.tolist() == array2.coord.tolist()
Exemple #4
0
def test_extra_fields():
    path = join(data_dir, "1l2y.cif")
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)
    stack1 = pdbx.get_structure(pdbx_file, extra_fields=["atom_id","b_factor",
                                "occupancy","charge"])
    pdbx_file = pdbx.PDBxFile()
    pdbx.set_structure(pdbx_file, stack1, data_block="test")
    stack2 = pdbx.get_structure(pdbx_file, extra_fields=["atom_id","b_factor",
                                "occupancy","charge"])
    assert stack1 == stack2
Exemple #5
0
def test_conversion(path, single_model):
    model = 1 if single_model else None
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)
    array1 = pdbx.get_structure(pdbx_file, model=model)
    pdbx_file = pdbx.PDBxFile()
    pdbx.set_structure(pdbx_file, array1, data_block="test")
    array2 = pdbx.get_structure(pdbx_file, model=model)
    for category in array1.get_annotation_categories():
        assert array1.get_annotation(category).tolist() == \
               array2.get_annotation(category).tolist()
    assert array1.coord.tolist() == array2.coord.tolist()
Exemple #6
0
def test_fetch(format, as_file_like):
    path = None if as_file_like else biotite.temp_dir()
    file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True)
    if format == "pdb":
        file = pdb.PDBFile()
        file.read(file_path_or_obj)
        pdb.get_structure(file)
    elif format == "pdbx":
        file = pdbx.PDBxFile()
        file.read(file_path_or_obj)
        pdbx.get_structure(file)
    elif format == "mmtf":
        file = mmtf.MMTFFile()
        file.read(file_path_or_obj)
        mmtf.get_structure(file)
Exemple #7
0
def test_array_conversion(path):
    pdbx_file = pdbx.PDBxFile.read(path)
    ref_structure = pdbx.get_structure(pdbx_file,
                                       model=1,
                                       extra_fields=["charge"])
    ref_structure.bonds = struc.connect_via_residue_names(ref_structure)

    pdbqt_file = pdbqt.PDBQTFile()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # Ignore warnings about atoms not parametrized
        mask = pdbqt.set_structure(pdbqt_file, ref_structure)
    ref_structure = ref_structure[mask]
    temp = TemporaryFile("r+")
    pdbqt_file.write(temp)

    temp.seek(0)
    pdbqt_file = pdbqt.PDBQTFile.read(temp)
    test_structure = pdbqt.get_structure(pdbqt_file, model=1)
    temp.close()

    assert np.allclose(test_structure.coord, ref_structure.coord)
    for category in test_structure.get_annotation_categories():
        if category == "element":
            # PDBQT uses special atom types, which replace the usual
            # elements
            # -> there cannot be equality of the 'element' annotation
            continue
        try:
            assert np.array_equal(test_structure.get_annotation(category),
                                  ref_structure.get_annotation(category))
        except AssertionError:
            print(f"Inequality in '{category}' category")
            raise
Exemple #8
0
def test_PDBx_consistency(format):
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(join(data_dir, "1l2y.cif"))
    array1 = pdbx.get_structure(pdbx_file)
    template = pdbx.get_structure(pdbx_file, model=1)
    if format == "trr":
        traj_file = trr.TRRFile()
        traj_file.read(join(data_dir, "1l2y.trr"))
    if format == "xtc":
        traj_file = xtc.XTCFile()
        traj_file.read(join(data_dir, "1l2y.xtc"))
    array2 = traj_file.get_structure(template)
    for cat in array1. get_annotation_categories():
        assert array1.get_annotation(cat).tolist() == \
               array2.get_annotation(cat).tolist()
        assert array1.coord == pytest.approx(array2.coord)
Exemple #9
0
def load_structure(fpath, chain=None):
    """
    Args:
        fpath: filepath to either pdb or cif file
        chain: the chain id
    Returns:
        biotite.structure.AtomArray
    """
    if fpath.endswith('cif'):
        with open(fpath) as fin:
            pdbxf = pdbx.PDBxFile.read(fin)
        structure = pdbx.get_structure(pdbxf, model=1)
    elif fpath.endswith('pdb'):
        with open(fpath) as fin:
            pdbf = pdb.PDBFile.read(fin)
        structure = pdb.get_structure(pdbf, model=1)
    issolvent = filter_solvent(structure)
    structure = structure[~issolvent]
    chains = get_chains(structure)
    print(f'Found {len(chains)} chains:', chains, '\n')
    if len(chains) == 0:
        raise ValueError('No chains found in the input file.')
    if chain is None:
        chain = chains[0]
    if chain not in chains:
        raise ValueError(f'Chain {chain} not found in input file')
    structure = structure[structure.chain_id == chain]
    print(f'Loaded chain {chain}\n')
    return structure
Exemple #10
0
def test_fetch(format, as_file_like):
    path = None if as_file_like else biotite.temp_dir()
    file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True)
    if format == "pdb":
        file = pdb.PDBFile.read(file_path_or_obj)
        pdb.get_structure(file)
    elif format == "pdbx":
        file = pdbx.PDBxFile.read(file_path_or_obj)
        pdbx.get_structure(file)
    elif format == "mmtf":
        file = mmtf.MMTFFile.read(file_path_or_obj)
        mmtf.get_structure(file)
    elif format == "fasta":
        file = fasta.FastaFile.read(file_path_or_obj)
        # Test if the file contains any sequences
        assert len(fasta.get_sequences(file)) > 0
Exemple #11
0
def test_command(command_name, kwargs):
    reset()
    pdbx_file = pdbx.PDBxFile.read(join(data_dir, "1l2y.cif"))
    structure = pdbx.get_structure(pdbx_file)
    structure.bonds = struc.connect_via_residue_names(structure)
    pymol_obj = PyMOLObject.from_structure(structure)
    command = getattr(PyMOLObject, command_name)
    command(pymol_obj, **kwargs)
Exemple #12
0
def test_superimposition_array(path):
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)
    fixed = pdbx.get_structure(pdbx_file, model=1)
    mobile = fixed.copy()
    mobile = struc.rotate(mobile, (1, 2, 3))
    mobile = struc.translate(mobile, (1, 2, 3))
    fitted, transformation = struc.superimpose(fixed, mobile,
                                               (mobile.atom_name == "CA"))
    assert struc.rmsd(fixed, fitted) == pytest.approx(0)
    fitted = struc.superimpose_apply(mobile, transformation)
    assert struc.rmsd(fixed, fitted) == pytest.approx(0)
Exemple #13
0
def test_pdbx_consistency(path, single_model):
    model = 1 if single_model else None
    cif_path = splitext(path)[0] + ".cif"
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    a1 = pdb_file.get_structure(model=model)
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(cif_path)
    a2 = pdbx.get_structure(pdbx_file, model=model)
    for category in a1.get_annotation_categories():
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()
    assert a1.coord.tolist() == a2.coord.tolist()
Exemple #14
0
def test_both_directions(path, state):
    pdbx_file = pdbx.PDBxFile.read(path)
    ref_array = pdbx.get_structure(pdbx_file, model=state)
    ref_array.bonds = struc.connect_via_residue_names(ref_array)

    reset()
    test_array = PyMOLObject.from_structure(ref_array) \
                            .to_structure(state=state, include_bonds=True)

    for cat in ref_array.get_annotation_categories():
        assert (test_array.get_annotation(cat) == ref_array.get_annotation(cat)
                ).all()
    assert np.allclose(test_array.coord, ref_array.coord)
    assert test_array.bonds == ref_array.bonds
Exemple #15
0
def test_pdbx_consistency(path, single_model):
    model = None if single_model else 1
    cif_path = splitext(path)[0] + ".cif"
    mmtf_file = mmtf.MMTFFile()
    mmtf_file.read(path)
    a1 = mmtf.get_structure(mmtf_file, model=model)
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(cif_path)
    a2 = pdbx.get_structure(pdbx_file, model=model)
    for category in a1.get_annotation_categories():
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()
    assert a1.coord.flatten().tolist() == \
           approx(a2.coord.flatten().tolist(), abs=1e-3)
Exemple #16
0
def test_to_biotite(path, altloc, state):
    pdbx_file = pdbx.PDBxFile.read(path)
    ref_array = pdbx.get_structure(pdbx_file, model=state, altloc=altloc)

    reset()
    cmd.load(path, "test")
    test_array = PyMOLObject("test").to_structure(state=state, altloc=altloc)

    for cat in [
            c for c in ref_array.get_annotation_categories()
            if c != "altloc_id"
    ]:
        assert (test_array.get_annotation(cat) == ref_array.get_annotation(cat)
                ).all()
    assert np.allclose(test_array.coord, ref_array.coord)
Exemple #17
0
def test_pdbx_consistency(path, single_model):
    model = None if single_model else 1
    cif_path = splitext(path)[0] + ".cif"
    mmtf_file = mmtf.MMTFFile.read(path)
    a1 = mmtf.get_structure(mmtf_file, model=model)
    pdbx_file = pdbx.PDBxFile.read(cif_path)
    a2 = pdbx.get_structure(pdbx_file, model=model)
    # Sometimes mmCIF files can have 'cell' entry
    # but corresponding MMTF file has not 'unitCell' entry
    # -> Do not assert for dummy entry in mmCIF file
    # (all vector elements = {0, 1})
    if a2.box is not None and not ((a2.box == 0) | (a2.box == 1)).all():
        assert np.allclose(a1.box, a2.box)
    for category in a1.get_annotation_categories():
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()
    assert a1.coord.flatten().tolist() == \
           approx(a2.coord.flatten().tolist(), abs=1e-3)
Exemple #18
0
def test_get_assembly(model):
    """
    Test whether the :func:`get_assembly()` function produces the same
    number of peptide chains as the
    ``_pdbx_struct_assembly.oligomeric_count`` field indicates.
    Furthermore, check if the number of atoms in the entire assembly
    is a multiple of the numbers of atoms in a monomer.
    """

    path = join(data_dir("structure"), "1f2n.cif")
    pdbx_file = pdbx.PDBxFile.read(path)

    assembly_category = pdbx_file.get_category("pdbx_struct_assembly",
                                               expect_looped=True)
    # Test each available assembly
    for id, ref_oligomer_count in zip(assembly_category["id"],
                                      assembly_category["oligomeric_count"]):
        print("Assembly ID:", id)
        try:
            assembly = pdbx.get_assembly(pdbx_file,
                                         assembly_id=id,
                                         model=model)
        except biotite.InvalidFileError:
            if model is None:
                # The file cannot be parsed into an AtomArrayStack,
                # as the models contain different numbers of atoms
                # -> skip this test case
                return
            else:
                raise
        protein_assembly = assembly[..., struc.filter_amino_acids(assembly)]
        test_oligomer_count = struc.get_chain_count(protein_assembly)

        if model is None:
            assert isinstance(assembly, struc.AtomArrayStack)
        else:
            assert isinstance(assembly, struc.AtomArray)
        assert test_oligomer_count == int(ref_oligomer_count)

        # The atom count of the entire assembly should be a multiple
        # a monomer,
        monomer_atom_count = pdbx.get_structure(pdbx_file).array_length()
        assert assembly.array_length() % monomer_atom_count == 0
Exemple #19
0
def test_pdbx_consistency(path, model):
    cif_path = splitext(path)[0] + ".cif"
    mmtf_file = mmtf.MMTFFile.read(path)
    try:
        a1 = mmtf.get_structure(mmtf_file, model=model)
    except biotite.InvalidFileError:
        if model is None:
            # The file cannot be parsed into an AtomArrayStack,
            # as the models contain different numbers of atoms
            # -> skip this test case
            return
        else:
            raise

    pdbx_file = pdbx.PDBxFile.read(cif_path)
    a2 = pdbx.get_structure(pdbx_file, model=model)

    # Sometimes mmCIF files can have 'cell' entry
    # but corresponding MMTF file has not 'unitCell' entry
    # -> Do not assert for dummy entry in mmCIF file
    # (all vector elements = {0, 1})
    if a2.box is not None and not ((a2.box == 0) | (a2.box == 1)).all():
        assert np.allclose(a1.box, a2.box)
    # MMTF might assign some residues, that PDBx assigns as 'hetero',
    # as 'non-hetero' if they are RNA/DNA or peptide linking
    try:
        assert a1.hetero.tolist() == \
               a2.hetero.tolist()
    except AssertionError:
        conflict_residues = np.unique(a1.res_name[a1.hetero != a2.hetero])
        for res in conflict_residues:
            assert info.link_type(res) in [
                "L-PEPTIDE LINKING", "PEPTIDE LINKING", "DNA LINKING",
                "RNA LINKING"
            ]
    # Test the remaining categories
    for category in [
            c for c in a1.get_annotation_categories() if c != "hetero"
    ]:
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()
    assert a1.coord.flatten().tolist() == \
           approx(a2.coord.flatten().tolist(), abs=1e-3)
Exemple #20
0
def test_superimposition_stack(ca_only):
    path = join(data_dir, "1l2y.cif")
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)
    stack = pdbx.get_structure(pdbx_file)
    fixed = stack[0]
    mobile = stack[1:]
    if ca_only:
        mask = (mobile.atom_name == "CA")
    else:
        mask = None
    fitted, transformation = struc.superimpose(fixed, mobile, mask)
    if ca_only:
        # The superimpositions are better for most cases than the
        # superimpositions in the structure file
        # -> Use average
        assert np.mean(struc.rmsd(fixed, fitted)) \
             < np.mean(struc.rmsd(fixed, mobile))
    else:
        # The superimpositions are better than the superimpositions
        # in the structure file
        assert (struc.rmsd(fixed, fitted) < struc.rmsd(fixed, mobile)).all()
Exemple #21
0
def test_to_pymol(path):
    reset()
    cmd.load(path, "test")
    ref_model = cmd.get_model("test", 1)

    pdbx_file = pdbx.PDBxFile.read(path)
    atom_array = pdbx.get_structure(
        pdbx_file, model=1, extra_fields=["b_factor", "occupancy", "charge"])
    test_model = convert_to_chempy_model(atom_array)

    test_atoms = test_model.atom
    ref_atoms = [atom for atom in ref_model.atom if atom.alt in ("", "A")]
    assert len(test_atoms) == len(ref_atoms)
    for test_atom, ref_atom in zip(test_atoms, ref_atoms):
        assert test_atom.symbol == ref_atom.symbol
        assert test_atom.name == ref_atom.name
        assert test_atom.resn == ref_atom.resn
        assert test_atom.ins_code == ref_atom.ins_code
        assert test_atom.resi_number == ref_atom.resi_number
        assert test_atom.b == pytest.approx(ref_atom.b)
        assert test_atom.q == pytest.approx(ref_atom.q)
        assert test_atom.hetatm == ref_atom.hetatm
        assert test_atom.chain == ref_atom.chain
        assert test_atom.coord == pytest.approx(ref_atom.coord)
Exemple #22
0
def test_select(random_seed):
    reset()

    pdbx_file = pdbx.PDBxFile.read(join(data_dir, "1l2y.cif"))
    array = pdbx.get_structure(pdbx_file, model=1)
    # Add bonds to avoid warning
    array.bonds = struc.connect_via_residue_names(array)
    
    # Use B factor as indicator if the selection was correctly applied
    array.set_annotation("b_factor", np.zeros(array.array_length()))
    pymol_object = PyMOLObject.from_structure(array)
    
    np.random.seed(random_seed)
    ref_mask = np.random.choice([False, True], array.array_length())
    
    # The method that is actually tested
    test_selection = pymol_object.where(ref_mask)
    # Set B factor of all masked atoms to 1
    cmd.alter(test_selection, "b=1.0")
    test_b_factor = pymol_object.to_structure(state=1).b_factor
    # Get the mask from the occupancy back again
    test_mask = (test_b_factor == 1.0)

    assert np.array_equal(test_mask, ref_mask)
Exemple #23
0
def test_pdbx_consistency(path, model):
    cif_path = splitext(path)[0] + ".cif"
    pdb_file = pdb.PDBFile.read(path)
    try:
        a1 = pdb_file.get_structure(model=model)
    except biotite.InvalidFileError:
        if model is None:
            # The file cannot be parsed into an AtomArrayStack,
            # as the models contain different numbers of atoms
            # -> skip this test case
            return
        else:
            raise

    pdbx_file = pdbx.PDBxFile.read(cif_path)
    a2 = pdbx.get_structure(pdbx_file, model=model)

    if a2.box is not None:
        assert np.allclose(a1.box, a2.box)
    assert a1.bonds == a2.bonds
    for category in a1.get_annotation_categories():
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()
    assert a1.coord.tolist() == a2.coord.tolist()
Exemple #24
0
# :class:`ndarray`.
# Setting/adding a category in the file is done in a similar way:

cif_file["audit_author"] = {
    "name": ["Doe, Jane", "Doe, John"],
    "pdbx_ordinal": ["1", "2"]
}

########################################################################
# In most applications only the structure itself
# (stored in the *atom_site* category) is relevant.
# :func:`get_structure()` and :func:`set_structure()` are convenience
# functions that are used to convert the
# ``atom_site`` category into an atom array (stack) and vice versa.

tc5b = pdbx.get_structure(cif_file)
# Do some fancy stuff
pdbx.set_structure(cif_file, tc5b)

########################################################################
# :func:`get_structure()` creates automatically an
# :class:`AtomArrayStack`, even if the file actually contains only a
# single model.
# If you would like to have an :class:`AtomArray` instead, you have to
# specifiy the :obj:`model` parameter.
#
# .. currentmodule:: biotite.structure.io.mmtf
#
# If you want to parse a large batch of structure files or you have to
# load very large structure files, the usage of PDB or mmCIF files might
# be too slow for your requirements.
Exemple #25
0
# :class:`ndarray`.
# Setting/adding a category in the file is done in a similar way:

file["audit_author"] = {
    "name": ["Doe, Jane", "Doe, John"],
    "pdbx_ordinal": ["1", "2"]
}

########################################################################
# In most applications only the structure itself
# (stored in the *atom_site* category) is relevant.
# :func:`get_structure()` and :func:`set_structure()` are convenience
# functions that are used to convert the
# *atom_site* category into an atom array (stack) and vice versa.

tc5b = pdbx.get_structure(file)
# Do some fancy stuff
pdbx.set_structure(file, tc5b)

########################################################################
# :func:`get_structure()` creates automatically an
# :class:`AtomArrayStack`, even if the file actually contains only a
# single model.
# If you would like to have an :class:`AtomArray` instead, you have to
# specifiy the :obj:`model` parameter.
#
# .. currentmodule:: biotite.structure.io.mmtf
#
# If you want to parse a large batch of structure files or you have to
# load very large structure files, the usage of PDB or mmCIF files might
# be too slow for your requirements. In this case you probably might
Exemple #26
0
from os.path import join
import numpy as np
import pytest
import biotite.structure as struc
import biotite.structure.io.pdbx as pdbx
from ammolite import PyMOLObject, reset
from .util import data_dir

pdbx_file = pdbx.PDBxFile.read(join(data_dir, "1l2y.cif"))
structure = pdbx.get_structure(pdbx_file)
mask = structure.res_id < 10
expr = "resi 1-10"


@pytest.mark.parametrize(
    "command_name, kwargs",
    [
        ("alter", {
            "selection": mask,
            "expression": "chain='B'",
        }),
        ("cartoon", {
            "type": "tube",
        }),
        ("cartoon", {
            "type": "tube",
            "selection": expr,
        }),
        ("cartoon", {
            "type": "tube",
            "selection": mask,
Exemple #27
0
def test_get_model_count():
    pdbx_file = pdbx.PDBxFile.read(join(data_dir("structure"), "1l2y.cif"))
    test_model_count = pdbx.get_model_count(pdbx_file)
    ref_model_count = pdbx.get_structure(pdbx_file).stack_depth()
    assert test_model_count == ref_model_count
import biotite.structure.io.pdbx as pdbx
import biotite.sequence as seq
import biotite.sequence.io.fasta as fasta
import biotite.sequence.align as align
import biotite.sequence.graphics as graphics
import biotite.application.blast as blast
import biotite.application.clustalo as clustalo
import biotite.database.rcsb as rcsb
import biotite.database.entrez as entrez

# Get structure and sequence
pdbx_file = pdbx.PDBxFile.read(rcsb.fetch("1GUU", "mmcif"))
sequence = pdbx.get_sequence(pdbx_file)[0]
# 'use_author_fields' is set to false,
# to ensure that values in the 'res_id' annotation point to the sequence
structure = pdbx.get_structure(pdbx_file, model=1, use_author_fields=False)
structure = structure[struc.filter_amino_acids(structure)]

# Identity threshold for a sequence to be counted as homologous sequence
IDENTITY_THESHOLD = 0.4
# Find homologous proteins in SwissProt via BLAST
app = blast.BlastWebApp("blastp", sequence, database="swissprot")
app.start()
app.join()
alignments = app.get_alignments()
hit_seqs = [sequence]
hit_ids = ["Query"]
hit_starts = [1]
for ali in alignments:
    identity = align.get_sequence_identity(ali)
    # Do not include the exact same sequence -> identity < 1.0
Exemple #29
0
# Benchmark the parsing of a mmCIF file given as an argument

import sys
import time
import biotite.structure.io.pdbx as pdbx

mmcif_filepath = sys.argv[1]

start = time.time()
file = pdbx.PDBxFile()
file.read(mmcif_filepath)
pdbx.get_structure(file)
end = time.time()

print(end - start)
Exemple #30
0
from tempfile import NamedTemporaryFile
import biotite.structure as struc
import biotite.structure.io.pdbx as pdbx
import biotite.database.rcsb as rcsb
import numpy as np

ku_dna_file = NamedTemporaryFile(suffix=".cif")
ku_file = NamedTemporaryFile(suffix=".cif")
# The output file names
# Modify these values for actual file output
ku_dna_file_name = ku_dna_file.name
ku_file_name = ku_file.name

# Download and parse structure files
ku_dna = pdbx.get_structure(pdbx.PDBxFile.read(rcsb.fetch("1JEY", "cif")))[0]
ku = pdbx.get_structure(pdbx.PDBxFile.read(rcsb.fetch("1JEQ", "cif")))[0]
# Remove DNA and water
ku_dna = ku_dna[(ku_dna.chain_id == "A") | (ku_dna.chain_id == "B")]
ku_dna = ku_dna[~struc.filter_solvent(ku_dna)]
ku = ku[~struc.filter_solvent(ku)]
# The structures have a differing amount of atoms missing
# at the the start and end of the structure
# -> Find common structure
ku_dna_common = ku_dna[struc.filter_intersection(ku_dna, ku)]
ku_common = ku[struc.filter_intersection(ku, ku_dna)]
# Superimpose
ku_superimposed, transformation = struc.superimpose(
    ku_dna_common, ku_common, (ku_common.atom_name == "CA"))
# We do not want the cropped structures
# -> apply superimposition on original structures