Ejemplo n.º 1
0
 def from_list(cls, reslist, cif_path, parent_entry, annotate=True):
     """Construct PdbSite object directly from residue list"""
     mmcif_dict = dict()
     # First reduce redundant residues with multiple function locations
     reslist = PdbSite._cleanup_list(reslist)
     site = cls()
     site.parent_entry = parent_entry
     try:
         if annotate:
             parser = MMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
             mmcif_dict = parser._mmcif_dict
         else:
             parser = FastMMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
     except (TypeError, PDBConstructionException):
         warnings.warn(
             'Could not build site from residue list. Check entry',
             RuntimeWarning)
         return
     for res in reslist:
         if structure:
             res.add_structure(structure)
         site.add(res)
     if annotate:
         site.parent_structure = structure
         site.mmcif_dict = mmcif_dict
         site.find_ligands()
     return site
Ejemplo n.º 2
0
    def testModels(self):
        """Test file with multiple models."""
        parser = MMCIFParser(QUIET=1)
        f_parser = FastMMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/1LCD.cif")
            f_structure = f_parser.get_structure("example", "PDB/1LCD.cif")

        self.assertEqual(len(structure), 3)
        self.assertEqual(len(f_structure), 3)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(structure[1].serial_num, 2)
            self.assertEqual(structure[2].serial_num, 3)
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            # Here non-standard MSE are shown as M
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )
            # ==========================================================
            # Now try strict version with only standard amino acids
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )

        # This structure contains several models with multiple lengths.
        # The tests were failing.
        structure = parser.get_structure("example", "PDB/2OFG.cif")
        self.assertEqual(len(structure), 3)
Ejemplo n.º 3
0
 def __init__(self, file_path, config):
     self.config = config
     pdb_id = uf.parse_pdb_id_from_file(file_path)
     self.bio_struct = None
     if file_path[-4:] == '.pdb' or file_path[-4:] == '.ent':
         self.bio_struct = PDBParser(QUIET=True).get_structure(
             pdb_id, file_path)
     elif file_path[-4:] == '.cif':
         self.bio_struct = FastMMCIFParser(QUIET=True).get_structure(
             pdb_id, file_path)
     if self.bio_struct == None:
         logging.error(
             "{} Flipper unable to parse structure file: {}".format(
                 pdb_id, file_path))
Ejemplo n.º 4
0
def fetchStructure(pdbid:str, custom_path='default') -> Structure:
    """
    Returns an open PDB.Bio.Structure.Structure object corresponding to <pdbid> from the default repository(specified in the .env)  
    or if custom_path is provided -- from there.
    
    """
    pathToFile = custom_path if custom_path != 'default' else path.join(os.getenv('STATIC_ROOT'), pdbid.upper(),  pdbid.upper()+'.cif' )

    if not path.exists(pathToFile):
        print(f"File does not exits at the provided path {pathToFile}")
        raise FileNotFoundError(pathToFile) 
    parser:FastMMCIFParser     = FastMMCIFParser(QUIET=True)
    struct:Structure.Structure = parser.get_structure(pdbid.upper(), pathToFile)
    return struct
Ejemplo n.º 5
0
 def build_all(cls,
               reslist,
               reference_site,
               parent_entry,
               cif_path,
               annotate=True,
               redundancy_cutoff=None):
     """Builds all sites in using as input a list of catalytic residues.
     Returns a list of PdbSite objects"""
     # Map structure objects in every residue
     sites = []
     mmcif_dict = dict()
     try:
         if annotate:
             parser = MMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
             mmcif_dict = parser._mmcif_dict
         else:
             parser = FastMMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
     except (TypeError, PDBConstructionException):
         warnings.warn('Could not parse structure {}'.format(
             cif_path, RuntimeWarning))
         return sites
     # First reduce redundant residues with multiple function locations
     reslist = PdbSite._cleanup_list(reslist)
     # We want all equivalent residues from identical assembly chains
     reslist = PdbSite._get_assembly_residues(reslist, structure)
     # Get seeds to build active sites
     seeds = PdbSite._get_seeds(reslist)
     # Build a site from each seed
     for seed in seeds:
         sites.append(cls.build(seed, reslist, reference_site,
                                parent_entry))
     # Reduce redundancy
     sites = PdbSite._remove_redundant_sites(sites,
                                             cutoff=redundancy_cutoff)
     # Add ligands and annotations
     if annotate and structure:
         for site in sites:
             site.parent_structure = structure
             site.mmcif_dict = mmcif_dict
             site.find_ligands()
     # Flag unclustered sites
     PdbSite._mark_unclustered(sites)
     return sites
Ejemplo n.º 6
0
    def test_with_anisotrop(self):
        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/4CUP.cif")
        f_structure = fast_parser.get_structure("example", "PDB/4CUP.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        self.assertEqual(len(s_atoms), len(f_atoms))

        for atoms in [s_atoms, f_atoms]:
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]],
                                     [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([50.346, 19.287, 17.288], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)
            self.assertEqual(atoms[0].get_bfactor(), 32.02)

            ansiou = numpy.array(
                [0.4738, -0.0309, -0.0231, 0.4524, 0.0036, 0.2904],
                dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_anisou(), ansiou)
            ansiou = numpy.array(
                [1.1242, 0.2942, -0.0995, 1.1240, -0.1088, 0.8221],
                dtype=numpy.float32)
            atom_937 = list(f_structure[0]['A'])[114]['CB']
            numpy.testing.assert_array_equal(atom_937.get_anisou(), ansiou)
Ejemplo n.º 7
0
    def test_parsers(self):
        """Extract polypeptides from 1A80."""
        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(f_structure[0].serial_num, structure[0].serial_num)

            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            f_polypeptides = ppbuild.build_peptides(f_structure[0], False)

            self.assertEqual(len(polypeptides), 1)
            self.assertEqual(len(f_polypeptides), 1)

            pp = polypeptides[0]
            f_pp = f_polypeptides[0]

            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 151)
            self.assertEqual(pp[-1].get_id()[1], 220)

            self.assertEqual(f_pp[0].get_id()[1], 151)
            self.assertEqual(f_pp[-1].get_id()[1], 220)

            # Check the sequence
            s = pp.get_sequence()
            f_s = f_pp.get_sequence()

            self.assertEqual(s, f_s)  # enough to test this

            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)

            # Here non-standard MSE are shown as M
            self.assertEqual(
                "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
                "NANPDCKTILKALGPGATLEEMMTACQG",
                str(s),
            )

            # ==========================================================
            # Now try strict version with only standard amino acids
            # Should ignore MSE 151 at start, and then break the chain
            # at MSE 185, and MSE 214,215
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 3)

            # First fragment
            pp = polypeptides[0]
            self.assertEqual(pp[0].get_id()[1], 152)
            self.assertEqual(pp[-1].get_id()[1], 184)
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))

            # Second fragment
            pp = polypeptides[1]
            self.assertEqual(pp[0].get_id()[1], 186)
            self.assertEqual(pp[-1].get_id()[1], 213)
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))

            # Third fragment
            pp = polypeptides[2]
            self.assertEqual(pp[0].get_id()[1], 216)
            self.assertEqual(pp[-1].get_id()[1], 220)
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TACQG", str(s))

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        for atoms in [s_atoms, f_atoms]:
            self.assertEqual(len(atoms), 644)
            atom_names = ["N", "CA", "C", "O", "CB"]
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual(
                [a.get_occupancy() for a in atoms[:5]], [1.0, 1.0, 1.0, 1.0, 1.0]
            )
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([19.594, 32.367, 28.012], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)

            self.assertEqual(atoms[0].get_bfactor(), 18.03)
            for atom in atoms:
                self.assertIsNone(atom.get_anisou())
Ejemplo n.º 8
0
 def test_point_mutations_fast(self):
     """Test if FastMMCIFParser can parse point mutations correctly."""
     self._run_point_mutation_tests(FastMMCIFParser(QUIET=True))
Ejemplo n.º 9
0
# Version 2.0
# It is compatible with Python 3.6
#
# Author: Monika Wiech

import gzip
import glob
import os
import sys
import time

# select parser class
# from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB.MMCIFParser import FastMMCIFParser
# parser = MMCIFParser()
parser = FastMMCIFParser()

# set data dir and limit
root_dir = './'
data_dir = root_dir + 'data/'
grp_dir_limit = 100
str_dir_limit = 1000
grp_dir_count = 0

# define logging system
log_name = 'tester__%s'
err_ext = '.err'
log_ext = '.log'
out_ext = '.out'

Ejemplo n.º 10
0
    def test_parsers(self):
        """Extract polypeptides from 1A80."""

        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(f_structure[0].serial_num,
                             structure[0].serial_num)

            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            f_polypeptides = ppbuild.build_peptides(f_structure[0], False)

            self.assertEqual(len(polypeptides), 1)
            self.assertEqual(len(f_polypeptides), 1)

            pp = polypeptides[0]
            f_pp = f_polypeptides[0]

            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 151)
            self.assertEqual(pp[-1].get_id()[1], 220)

            self.assertEqual(f_pp[0].get_id()[1], 151)
            self.assertEqual(f_pp[-1].get_id()[1], 220)

            # Check the sequence
            s = pp.get_sequence()
            f_s = f_pp.get_sequence()

            self.assertEqual(s, f_s)  # enough to test this

            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)

            # Here non-standard MSE are shown as M
            self.assertEqual(
                "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
                "NANPDCKTILKALGPGATLEEMMTACQG", str(s))

            # ==========================================================
            # Now try strict version with only standard amino acids
            # Should ignore MSE 151 at start, and then break the chain
            # at MSE 185, and MSE 214,215
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 3)

            # First fragment
            pp = polypeptides[0]
            self.assertEqual(pp[0].get_id()[1], 152)
            self.assertEqual(pp[-1].get_id()[1], 184)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))

            # Second fragment
            pp = polypeptides[1]
            self.assertEqual(pp[0].get_id()[1], 186)
            self.assertEqual(pp[-1].get_id()[1], 213)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))

            # Third fragment
            pp = polypeptides[2]
            self.assertEqual(pp[0].get_id()[1], 216)
            self.assertEqual(pp[-1].get_id()[1], 220)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TACQG", str(s))
Ejemplo n.º 11
0
def openStructutre(pdbid: str, cifpath: str) -> Structure:
    return FastMMCIFParser(QUIET=True).get_structure(pdbid, cifpath)
Ejemplo n.º 12
0
from asyncio import run


def root_self(rootname: str = '') -> str:
    """Returns the rootpath for the project if it's unique in the current folder tree."""
    root = os.path.abspath(
        __file__)[:os.path.abspath(__file__).find(rootname) + len(rootname)]
    sys.path.append(root)
    load_dotenv(os.path.join(root, '.env'))


root_self('ribxz')

from ciftools.Neoget import _neoget

prs = FastMMCIFParser(QUIET=True)

io = MMCIFIO()

for pdbid in [
        '1vy4',
]:
    pdbid = pdbid.upper()
    struct: Structure = prs.get_structure(f'{pdbid}', f'{pdbid}.cif')
    for chain in struct[0].child_list:
        strand_id = chain.id
        nomclass_result = _neoget(
            f"""match (r:RibosomeStructure{{rcsb_id: "{pdbid.upper()}"}})-[]-(rp:RibosomalProtein)-[]-(n:NomenclatureClass)
        where rp.entity_poly_strand_id  = "{strand_id}" return n.class_id""")
        print(nomclass_result)
        if len(nomclass_result) > 0: