コード例 #1
0
ファイル: test_pdbx.py プロジェクト: Discngine/biotite
def test_conversion(path, model):
    pdbx_file = pdbx.PDBxFile.read(path)

    try:
        array1 = pdbx.get_structure(pdbx_file, model=model)
    except biotite.InvalidFileError:
        if model is None:
            # The file cannot be parsed into an AtomArrayStack,
            # as the models contain different numbers of atoms
            # -> skip this test case
            return
        else:
            raise

    pdbx_file = pdbx.PDBxFile()
    pdbx.set_structure(pdbx_file, array1, data_block="test")

    # Remove one optional auth section in label to test fallback to label fields
    atom_cat = pdbx_file.get_category("atom_site", "test")
    atom_cat.pop("auth_atom_id")
    pdbx_file.set_category("atom_site", atom_cat, "test")

    array2 = pdbx.get_structure(pdbx_file, model=model)

    assert array1.array_length() > 0
    if array1.box is not None:
        assert np.allclose(array1.box, array2.box)
    assert array1.bonds == array2.bonds
    for category in array1.get_annotation_categories():
        assert (array1.get_annotation(category).tolist() ==
                array2.get_annotation(category).tolist())
    assert array1.coord.tolist() == array2.coord.tolist()
コード例 #2
0
ファイル: test_pdbx.py プロジェクト: Discngine/biotite
def test_extra_fields():
    path = join(data_dir("structure"), "1l2y.cif")
    pdbx_file = pdbx.PDBxFile.read(path)
    stack1 = pdbx.get_structure(
        pdbx_file, extra_fields=["atom_id", "b_factor", "occupancy", "charge"])
    pdbx_file = pdbx.PDBxFile()
    pdbx.set_structure(pdbx_file, stack1, data_block="test")
    stack2 = pdbx.get_structure(
        pdbx_file, extra_fields=["atom_id", "b_factor", "occupancy", "charge"])
    assert stack1 == stack2

    path = join(data_dir("structure"), "1l2y.cif")
    pdbx_file = pdbx.PDBxFile.read(path)
    stack1 = pdbx.get_structure(
        pdbx_file, extra_fields=["atom_id", "b_factor", "occupancy", "charge"])

    pdbx_file = pdbx.PDBxFile()
    pdbx.set_structure(pdbx_file, stack1, data_block="test")

    stack2 = pdbx.get_structure(
        pdbx_file, extra_fields=["atom_id", "b_factor", "occupancy", "charge"])

    assert stack1.ins_code.tolist() == stack2.ins_code.tolist()
    assert stack1.atom_id.tolist() == stack2.atom_id.tolist()
    assert stack1.b_factor.tolist() == approx(stack2.b_factor.tolist())
    assert stack1.occupancy.tolist() == approx(stack2.occupancy.tolist())
    assert stack1.charge.tolist() == stack2.charge.tolist()
    assert stack1 == stack2
コード例 #3
0
def test_conversion(path, model):
    pdbx_file = pdbx.PDBxFile.read(path)

    try:
        array1 = pdbx.get_structure(pdbx_file, model=model)
    except biotite.InvalidFileError:
        if model is None:
            # The file cannot be parsed into an AtomArrayStack,
            # as the models contain different numbers of atoms
            # -> skip this test case
            return
        else:
            raise

    pdbx_file = pdbx.PDBxFile()
    pdbx.set_structure(pdbx_file, array1, data_block="test")

    array2 = pdbx.get_structure(pdbx_file, model=model)

    if array1.box is not None:
        assert np.allclose(array1.box, array2.box)
    assert array1.bonds == array2.bonds
    for category in array1.get_annotation_categories():
        assert array1.get_annotation(category).tolist() == \
               array2.get_annotation(category).tolist()
    assert array1.coord.tolist() == array2.coord.tolist()
コード例 #4
0
def test_extra_fields():
    path = join(data_dir, "1l2y.cif")
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)
    stack1 = pdbx.get_structure(pdbx_file, extra_fields=["atom_id","b_factor",
                                "occupancy","charge"])
    pdbx_file = pdbx.PDBxFile()
    pdbx.set_structure(pdbx_file, stack1, data_block="test")
    stack2 = pdbx.get_structure(pdbx_file, extra_fields=["atom_id","b_factor",
                                "occupancy","charge"])
    assert stack1 == stack2
コード例 #5
0
def test_conversion(path, single_model):
    model = 1 if single_model else None
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)
    array1 = pdbx.get_structure(pdbx_file, model=model)
    pdbx_file = pdbx.PDBxFile()
    pdbx.set_structure(pdbx_file, array1, data_block="test")
    array2 = pdbx.get_structure(pdbx_file, model=model)
    for category in array1.get_annotation_categories():
        assert array1.get_annotation(category).tolist() == \
               array2.get_annotation(category).tolist()
    assert array1.coord.tolist() == array2.coord.tolist()
コード例 #6
0
def test_fetch(format, as_file_like):
    path = None if as_file_like else biotite.temp_dir()
    file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True)
    if format == "pdb":
        file = pdb.PDBFile()
        file.read(file_path_or_obj)
        pdb.get_structure(file)
    elif format == "pdbx":
        file = pdbx.PDBxFile()
        file.read(file_path_or_obj)
        pdbx.get_structure(file)
    elif format == "mmtf":
        file = mmtf.MMTFFile()
        file.read(file_path_or_obj)
        mmtf.get_structure(file)
コード例 #7
0
def test_array_conversion(path):
    pdbx_file = pdbx.PDBxFile.read(path)
    ref_structure = pdbx.get_structure(pdbx_file,
                                       model=1,
                                       extra_fields=["charge"])
    ref_structure.bonds = struc.connect_via_residue_names(ref_structure)

    pdbqt_file = pdbqt.PDBQTFile()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # Ignore warnings about atoms not parametrized
        mask = pdbqt.set_structure(pdbqt_file, ref_structure)
    ref_structure = ref_structure[mask]
    temp = TemporaryFile("r+")
    pdbqt_file.write(temp)

    temp.seek(0)
    pdbqt_file = pdbqt.PDBQTFile.read(temp)
    test_structure = pdbqt.get_structure(pdbqt_file, model=1)
    temp.close()

    assert np.allclose(test_structure.coord, ref_structure.coord)
    for category in test_structure.get_annotation_categories():
        if category == "element":
            # PDBQT uses special atom types, which replace the usual
            # elements
            # -> there cannot be equality of the 'element' annotation
            continue
        try:
            assert np.array_equal(test_structure.get_annotation(category),
                                  ref_structure.get_annotation(category))
        except AssertionError:
            print(f"Inequality in '{category}' category")
            raise
コード例 #8
0
def test_PDBx_consistency(format):
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(join(data_dir, "1l2y.cif"))
    array1 = pdbx.get_structure(pdbx_file)
    template = pdbx.get_structure(pdbx_file, model=1)
    if format == "trr":
        traj_file = trr.TRRFile()
        traj_file.read(join(data_dir, "1l2y.trr"))
    if format == "xtc":
        traj_file = xtc.XTCFile()
        traj_file.read(join(data_dir, "1l2y.xtc"))
    array2 = traj_file.get_structure(template)
    for cat in array1. get_annotation_categories():
        assert array1.get_annotation(cat).tolist() == \
               array2.get_annotation(cat).tolist()
        assert array1.coord == pytest.approx(array2.coord)
コード例 #9
0
ファイル: util.py プロジェクト: facebookresearch/esm
def load_structure(fpath, chain=None):
    """
    Args:
        fpath: filepath to either pdb or cif file
        chain: the chain id
    Returns:
        biotite.structure.AtomArray
    """
    if fpath.endswith('cif'):
        with open(fpath) as fin:
            pdbxf = pdbx.PDBxFile.read(fin)
        structure = pdbx.get_structure(pdbxf, model=1)
    elif fpath.endswith('pdb'):
        with open(fpath) as fin:
            pdbf = pdb.PDBFile.read(fin)
        structure = pdb.get_structure(pdbf, model=1)
    issolvent = filter_solvent(structure)
    structure = structure[~issolvent]
    chains = get_chains(structure)
    print(f'Found {len(chains)} chains:', chains, '\n')
    if len(chains) == 0:
        raise ValueError('No chains found in the input file.')
    if chain is None:
        chain = chains[0]
    if chain not in chains:
        raise ValueError(f'Chain {chain} not found in input file')
    structure = structure[structure.chain_id == chain]
    print(f'Loaded chain {chain}\n')
    return structure
コード例 #10
0
ファイル: test_rcsb.py プロジェクト: simoneperazzoli/biotite
def test_fetch(format, as_file_like):
    path = None if as_file_like else biotite.temp_dir()
    file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True)
    if format == "pdb":
        file = pdb.PDBFile.read(file_path_or_obj)
        pdb.get_structure(file)
    elif format == "pdbx":
        file = pdbx.PDBxFile.read(file_path_or_obj)
        pdbx.get_structure(file)
    elif format == "mmtf":
        file = mmtf.MMTFFile.read(file_path_or_obj)
        mmtf.get_structure(file)
    elif format == "fasta":
        file = fasta.FastaFile.read(file_path_or_obj)
        # Test if the file contains any sequences
        assert len(fasta.get_sequences(file)) > 0
コード例 #11
0
ファイル: test_command.py プロジェクト: wlsong/ammolite
def test_command(command_name, kwargs):
    reset()
    pdbx_file = pdbx.PDBxFile.read(join(data_dir, "1l2y.cif"))
    structure = pdbx.get_structure(pdbx_file)
    structure.bonds = struc.connect_via_residue_names(structure)
    pymol_obj = PyMOLObject.from_structure(structure)
    command = getattr(PyMOLObject, command_name)
    command(pymol_obj, **kwargs)
コード例 #12
0
def test_superimposition_array(path):
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)
    fixed = pdbx.get_structure(pdbx_file, model=1)
    mobile = fixed.copy()
    mobile = struc.rotate(mobile, (1, 2, 3))
    mobile = struc.translate(mobile, (1, 2, 3))
    fitted, transformation = struc.superimpose(fixed, mobile,
                                               (mobile.atom_name == "CA"))
    assert struc.rmsd(fixed, fitted) == pytest.approx(0)
    fitted = struc.superimpose_apply(mobile, transformation)
    assert struc.rmsd(fixed, fitted) == pytest.approx(0)
コード例 #13
0
def test_pdbx_consistency(path, single_model):
    model = 1 if single_model else None
    cif_path = splitext(path)[0] + ".cif"
    pdb_file = pdb.PDBFile()
    pdb_file.read(path)
    a1 = pdb_file.get_structure(model=model)
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(cif_path)
    a2 = pdbx.get_structure(pdbx_file, model=model)
    for category in a1.get_annotation_categories():
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()
    assert a1.coord.tolist() == a2.coord.tolist()
コード例 #14
0
ファイル: test_conversion.py プロジェクト: wlsong/ammolite
def test_both_directions(path, state):
    pdbx_file = pdbx.PDBxFile.read(path)
    ref_array = pdbx.get_structure(pdbx_file, model=state)
    ref_array.bonds = struc.connect_via_residue_names(ref_array)

    reset()
    test_array = PyMOLObject.from_structure(ref_array) \
                            .to_structure(state=state, include_bonds=True)

    for cat in ref_array.get_annotation_categories():
        assert (test_array.get_annotation(cat) == ref_array.get_annotation(cat)
                ).all()
    assert np.allclose(test_array.coord, ref_array.coord)
    assert test_array.bonds == ref_array.bonds
コード例 #15
0
def test_pdbx_consistency(path, single_model):
    model = None if single_model else 1
    cif_path = splitext(path)[0] + ".cif"
    mmtf_file = mmtf.MMTFFile()
    mmtf_file.read(path)
    a1 = mmtf.get_structure(mmtf_file, model=model)
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(cif_path)
    a2 = pdbx.get_structure(pdbx_file, model=model)
    for category in a1.get_annotation_categories():
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()
    assert a1.coord.flatten().tolist() == \
           approx(a2.coord.flatten().tolist(), abs=1e-3)
コード例 #16
0
ファイル: test_conversion.py プロジェクト: wlsong/ammolite
def test_to_biotite(path, altloc, state):
    pdbx_file = pdbx.PDBxFile.read(path)
    ref_array = pdbx.get_structure(pdbx_file, model=state, altloc=altloc)

    reset()
    cmd.load(path, "test")
    test_array = PyMOLObject("test").to_structure(state=state, altloc=altloc)

    for cat in [
            c for c in ref_array.get_annotation_categories()
            if c != "altloc_id"
    ]:
        assert (test_array.get_annotation(cat) == ref_array.get_annotation(cat)
                ).all()
    assert np.allclose(test_array.coord, ref_array.coord)
コード例 #17
0
def test_pdbx_consistency(path, single_model):
    model = None if single_model else 1
    cif_path = splitext(path)[0] + ".cif"
    mmtf_file = mmtf.MMTFFile.read(path)
    a1 = mmtf.get_structure(mmtf_file, model=model)
    pdbx_file = pdbx.PDBxFile.read(cif_path)
    a2 = pdbx.get_structure(pdbx_file, model=model)
    # Sometimes mmCIF files can have 'cell' entry
    # but corresponding MMTF file has not 'unitCell' entry
    # -> Do not assert for dummy entry in mmCIF file
    # (all vector elements = {0, 1})
    if a2.box is not None and not ((a2.box == 0) | (a2.box == 1)).all():
        assert np.allclose(a1.box, a2.box)
    for category in a1.get_annotation_categories():
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()
    assert a1.coord.flatten().tolist() == \
           approx(a2.coord.flatten().tolist(), abs=1e-3)
コード例 #18
0
ファイル: test_pdbx.py プロジェクト: Discngine/biotite
def test_get_assembly(model):
    """
    Test whether the :func:`get_assembly()` function produces the same
    number of peptide chains as the
    ``_pdbx_struct_assembly.oligomeric_count`` field indicates.
    Furthermore, check if the number of atoms in the entire assembly
    is a multiple of the numbers of atoms in a monomer.
    """

    path = join(data_dir("structure"), "1f2n.cif")
    pdbx_file = pdbx.PDBxFile.read(path)

    assembly_category = pdbx_file.get_category("pdbx_struct_assembly",
                                               expect_looped=True)
    # Test each available assembly
    for id, ref_oligomer_count in zip(assembly_category["id"],
                                      assembly_category["oligomeric_count"]):
        print("Assembly ID:", id)
        try:
            assembly = pdbx.get_assembly(pdbx_file,
                                         assembly_id=id,
                                         model=model)
        except biotite.InvalidFileError:
            if model is None:
                # The file cannot be parsed into an AtomArrayStack,
                # as the models contain different numbers of atoms
                # -> skip this test case
                return
            else:
                raise
        protein_assembly = assembly[..., struc.filter_amino_acids(assembly)]
        test_oligomer_count = struc.get_chain_count(protein_assembly)

        if model is None:
            assert isinstance(assembly, struc.AtomArrayStack)
        else:
            assert isinstance(assembly, struc.AtomArray)
        assert test_oligomer_count == int(ref_oligomer_count)

        # The atom count of the entire assembly should be a multiple
        # a monomer,
        monomer_atom_count = pdbx.get_structure(pdbx_file).array_length()
        assert assembly.array_length() % monomer_atom_count == 0
コード例 #19
0
def test_pdbx_consistency(path, model):
    cif_path = splitext(path)[0] + ".cif"
    mmtf_file = mmtf.MMTFFile.read(path)
    try:
        a1 = mmtf.get_structure(mmtf_file, model=model)
    except biotite.InvalidFileError:
        if model is None:
            # The file cannot be parsed into an AtomArrayStack,
            # as the models contain different numbers of atoms
            # -> skip this test case
            return
        else:
            raise

    pdbx_file = pdbx.PDBxFile.read(cif_path)
    a2 = pdbx.get_structure(pdbx_file, model=model)

    # Sometimes mmCIF files can have 'cell' entry
    # but corresponding MMTF file has not 'unitCell' entry
    # -> Do not assert for dummy entry in mmCIF file
    # (all vector elements = {0, 1})
    if a2.box is not None and not ((a2.box == 0) | (a2.box == 1)).all():
        assert np.allclose(a1.box, a2.box)
    # MMTF might assign some residues, that PDBx assigns as 'hetero',
    # as 'non-hetero' if they are RNA/DNA or peptide linking
    try:
        assert a1.hetero.tolist() == \
               a2.hetero.tolist()
    except AssertionError:
        conflict_residues = np.unique(a1.res_name[a1.hetero != a2.hetero])
        for res in conflict_residues:
            assert info.link_type(res) in [
                "L-PEPTIDE LINKING", "PEPTIDE LINKING", "DNA LINKING",
                "RNA LINKING"
            ]
    # Test the remaining categories
    for category in [
            c for c in a1.get_annotation_categories() if c != "hetero"
    ]:
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()
    assert a1.coord.flatten().tolist() == \
           approx(a2.coord.flatten().tolist(), abs=1e-3)
コード例 #20
0
def test_superimposition_stack(ca_only):
    path = join(data_dir, "1l2y.cif")
    pdbx_file = pdbx.PDBxFile()
    pdbx_file.read(path)
    stack = pdbx.get_structure(pdbx_file)
    fixed = stack[0]
    mobile = stack[1:]
    if ca_only:
        mask = (mobile.atom_name == "CA")
    else:
        mask = None
    fitted, transformation = struc.superimpose(fixed, mobile, mask)
    if ca_only:
        # The superimpositions are better for most cases than the
        # superimpositions in the structure file
        # -> Use average
        assert np.mean(struc.rmsd(fixed, fitted)) \
             < np.mean(struc.rmsd(fixed, mobile))
    else:
        # The superimpositions are better than the superimpositions
        # in the structure file
        assert (struc.rmsd(fixed, fitted) < struc.rmsd(fixed, mobile)).all()
コード例 #21
0
ファイル: test_conversion.py プロジェクト: wlsong/ammolite
def test_to_pymol(path):
    reset()
    cmd.load(path, "test")
    ref_model = cmd.get_model("test", 1)

    pdbx_file = pdbx.PDBxFile.read(path)
    atom_array = pdbx.get_structure(
        pdbx_file, model=1, extra_fields=["b_factor", "occupancy", "charge"])
    test_model = convert_to_chempy_model(atom_array)

    test_atoms = test_model.atom
    ref_atoms = [atom for atom in ref_model.atom if atom.alt in ("", "A")]
    assert len(test_atoms) == len(ref_atoms)
    for test_atom, ref_atom in zip(test_atoms, ref_atoms):
        assert test_atom.symbol == ref_atom.symbol
        assert test_atom.name == ref_atom.name
        assert test_atom.resn == ref_atom.resn
        assert test_atom.ins_code == ref_atom.ins_code
        assert test_atom.resi_number == ref_atom.resi_number
        assert test_atom.b == pytest.approx(ref_atom.b)
        assert test_atom.q == pytest.approx(ref_atom.q)
        assert test_atom.hetatm == ref_atom.hetatm
        assert test_atom.chain == ref_atom.chain
        assert test_atom.coord == pytest.approx(ref_atom.coord)
コード例 #22
0
def test_select(random_seed):
    reset()

    pdbx_file = pdbx.PDBxFile.read(join(data_dir, "1l2y.cif"))
    array = pdbx.get_structure(pdbx_file, model=1)
    # Add bonds to avoid warning
    array.bonds = struc.connect_via_residue_names(array)
    
    # Use B factor as indicator if the selection was correctly applied
    array.set_annotation("b_factor", np.zeros(array.array_length()))
    pymol_object = PyMOLObject.from_structure(array)
    
    np.random.seed(random_seed)
    ref_mask = np.random.choice([False, True], array.array_length())
    
    # The method that is actually tested
    test_selection = pymol_object.where(ref_mask)
    # Set B factor of all masked atoms to 1
    cmd.alter(test_selection, "b=1.0")
    test_b_factor = pymol_object.to_structure(state=1).b_factor
    # Get the mask from the occupancy back again
    test_mask = (test_b_factor == 1.0)

    assert np.array_equal(test_mask, ref_mask)
コード例 #23
0
def test_pdbx_consistency(path, model):
    cif_path = splitext(path)[0] + ".cif"
    pdb_file = pdb.PDBFile.read(path)
    try:
        a1 = pdb_file.get_structure(model=model)
    except biotite.InvalidFileError:
        if model is None:
            # The file cannot be parsed into an AtomArrayStack,
            # as the models contain different numbers of atoms
            # -> skip this test case
            return
        else:
            raise

    pdbx_file = pdbx.PDBxFile.read(cif_path)
    a2 = pdbx.get_structure(pdbx_file, model=model)

    if a2.box is not None:
        assert np.allclose(a1.box, a2.box)
    assert a1.bonds == a2.bonds
    for category in a1.get_annotation_categories():
        assert a1.get_annotation(category).tolist() == \
               a2.get_annotation(category).tolist()
    assert a1.coord.tolist() == a2.coord.tolist()
コード例 #24
0
# :class:`ndarray`.
# Setting/adding a category in the file is done in a similar way:

cif_file["audit_author"] = {
    "name": ["Doe, Jane", "Doe, John"],
    "pdbx_ordinal": ["1", "2"]
}

########################################################################
# In most applications only the structure itself
# (stored in the *atom_site* category) is relevant.
# :func:`get_structure()` and :func:`set_structure()` are convenience
# functions that are used to convert the
# ``atom_site`` category into an atom array (stack) and vice versa.

tc5b = pdbx.get_structure(cif_file)
# Do some fancy stuff
pdbx.set_structure(cif_file, tc5b)

########################################################################
# :func:`get_structure()` creates automatically an
# :class:`AtomArrayStack`, even if the file actually contains only a
# single model.
# If you would like to have an :class:`AtomArray` instead, you have to
# specifiy the :obj:`model` parameter.
#
# .. currentmodule:: biotite.structure.io.mmtf
#
# If you want to parse a large batch of structure files or you have to
# load very large structure files, the usage of PDB or mmCIF files might
# be too slow for your requirements.
コード例 #25
0
# :class:`ndarray`.
# Setting/adding a category in the file is done in a similar way:

file["audit_author"] = {
    "name": ["Doe, Jane", "Doe, John"],
    "pdbx_ordinal": ["1", "2"]
}

########################################################################
# In most applications only the structure itself
# (stored in the *atom_site* category) is relevant.
# :func:`get_structure()` and :func:`set_structure()` are convenience
# functions that are used to convert the
# *atom_site* category into an atom array (stack) and vice versa.

tc5b = pdbx.get_structure(file)
# Do some fancy stuff
pdbx.set_structure(file, tc5b)

########################################################################
# :func:`get_structure()` creates automatically an
# :class:`AtomArrayStack`, even if the file actually contains only a
# single model.
# If you would like to have an :class:`AtomArray` instead, you have to
# specifiy the :obj:`model` parameter.
#
# .. currentmodule:: biotite.structure.io.mmtf
#
# If you want to parse a large batch of structure files or you have to
# load very large structure files, the usage of PDB or mmCIF files might
# be too slow for your requirements. In this case you probably might
コード例 #26
0
ファイル: test_command.py プロジェクト: wlsong/ammolite
from os.path import join
import numpy as np
import pytest
import biotite.structure as struc
import biotite.structure.io.pdbx as pdbx
from ammolite import PyMOLObject, reset
from .util import data_dir

pdbx_file = pdbx.PDBxFile.read(join(data_dir, "1l2y.cif"))
structure = pdbx.get_structure(pdbx_file)
mask = structure.res_id < 10
expr = "resi 1-10"


@pytest.mark.parametrize(
    "command_name, kwargs",
    [
        ("alter", {
            "selection": mask,
            "expression": "chain='B'",
        }),
        ("cartoon", {
            "type": "tube",
        }),
        ("cartoon", {
            "type": "tube",
            "selection": expr,
        }),
        ("cartoon", {
            "type": "tube",
            "selection": mask,
コード例 #27
0
ファイル: test_pdbx.py プロジェクト: Discngine/biotite
def test_get_model_count():
    pdbx_file = pdbx.PDBxFile.read(join(data_dir("structure"), "1l2y.cif"))
    test_model_count = pdbx.get_model_count(pdbx_file)
    ref_model_count = pdbx.get_structure(pdbx_file).stack_depth()
    assert test_model_count == ref_model_count
コード例 #28
0
import biotite.structure.io.pdbx as pdbx
import biotite.sequence as seq
import biotite.sequence.io.fasta as fasta
import biotite.sequence.align as align
import biotite.sequence.graphics as graphics
import biotite.application.blast as blast
import biotite.application.clustalo as clustalo
import biotite.database.rcsb as rcsb
import biotite.database.entrez as entrez

# Get structure and sequence
pdbx_file = pdbx.PDBxFile.read(rcsb.fetch("1GUU", "mmcif"))
sequence = pdbx.get_sequence(pdbx_file)[0]
# 'use_author_fields' is set to false,
# to ensure that values in the 'res_id' annotation point to the sequence
structure = pdbx.get_structure(pdbx_file, model=1, use_author_fields=False)
structure = structure[struc.filter_amino_acids(structure)]

# Identity threshold for a sequence to be counted as homologous sequence
IDENTITY_THESHOLD = 0.4
# Find homologous proteins in SwissProt via BLAST
app = blast.BlastWebApp("blastp", sequence, database="swissprot")
app.start()
app.join()
alignments = app.get_alignments()
hit_seqs = [sequence]
hit_ids = ["Query"]
hit_starts = [1]
for ali in alignments:
    identity = align.get_sequence_identity(ali)
    # Do not include the exact same sequence -> identity < 1.0
コード例 #29
0
ファイル: parse_mmcif.py プロジェクト: wojdyr/pdb-benchmarks
# Benchmark the parsing of a mmCIF file given as an argument

import sys
import time
import biotite.structure.io.pdbx as pdbx

mmcif_filepath = sys.argv[1]

start = time.time()
file = pdbx.PDBxFile()
file.read(mmcif_filepath)
pdbx.get_structure(file)
end = time.time()

print(end - start)
コード例 #30
0
from tempfile import NamedTemporaryFile
import biotite.structure as struc
import biotite.structure.io.pdbx as pdbx
import biotite.database.rcsb as rcsb
import numpy as np

ku_dna_file = NamedTemporaryFile(suffix=".cif")
ku_file = NamedTemporaryFile(suffix=".cif")
# The output file names
# Modify these values for actual file output
ku_dna_file_name = ku_dna_file.name
ku_file_name = ku_file.name

# Download and parse structure files
ku_dna = pdbx.get_structure(pdbx.PDBxFile.read(rcsb.fetch("1JEY", "cif")))[0]
ku = pdbx.get_structure(pdbx.PDBxFile.read(rcsb.fetch("1JEQ", "cif")))[0]
# Remove DNA and water
ku_dna = ku_dna[(ku_dna.chain_id == "A") | (ku_dna.chain_id == "B")]
ku_dna = ku_dna[~struc.filter_solvent(ku_dna)]
ku = ku[~struc.filter_solvent(ku)]
# The structures have a differing amount of atoms missing
# at the the start and end of the structure
# -> Find common structure
ku_dna_common = ku_dna[struc.filter_intersection(ku_dna, ku)]
ku_common = ku[struc.filter_intersection(ku, ku_dna)]
# Superimpose
ku_superimposed, transformation = struc.superimpose(
    ku_dna_common, ku_common, (ku_common.atom_name == "CA"))
# We do not want the cropped structures
# -> apply superimposition on original structures