Ejemplo n.º 1
0
    def test_with_anisotrop(self):
        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/4CUP.cif")
        f_structure = fast_parser.get_structure("example", "PDB/4CUP.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        self.assertEqual(len(s_atoms), len(f_atoms))

        for atoms in [s_atoms, f_atoms]:
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]], [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([50.346, 19.287, 17.288], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)
            self.assertEqual(atoms[0].get_bfactor(), 32.02)

            ansiou = numpy.array([0.4738, -0.0309, -0.0231, 0.4524, 0.0036, 0.2904], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_anisou(), ansiou)
            ansiou = numpy.array([1.1242, 0.2942, -0.0995, 1.1240, -0.1088, 0.8221], dtype=numpy.float32)
            atom_937 = list(f_structure[0]['A'])[114]['CB']
            numpy.testing.assert_array_equal(atom_937.get_anisou(), ansiou)
Ejemplo n.º 2
0
    def testModels(self):
        """Test file with multiple models."""
        parser = MMCIFParser(QUIET=1)
        f_parser = FastMMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/1LCD.cif")
            f_structure = f_parser.get_structure("example", "PDB/1LCD.cif")

        self.assertEqual(len(structure), 3)
        self.assertEqual(len(f_structure), 3)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(structure[1].serial_num, 2)
            self.assertEqual(structure[2].serial_num, 3)
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            # Here non-standard MSE are shown as M
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )
            # ==========================================================
            # Now try strict version with only standard amino acids
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )

        # This structure contains several models with multiple lengths.
        # The tests were failing.
        structure = parser.get_structure("example", "PDB/2OFG.cif")
        self.assertEqual(len(structure), 3)
Ejemplo n.º 3
0
    def testModels(self):
        """Test file with multiple models"""

        parser = MMCIFParser(QUIET=1)
        f_parser = FastMMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/1LCD.cif")
            f_structure = f_parser.get_structure("example", "PDB/1LCD.cif")

        self.assertEqual(len(structure), 3)
        self.assertEqual(len(f_structure), 3)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(structure[1].serial_num, 2)
            self.assertEqual(structure[2].serial_num, 3)
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            # Here non-standard MSE are shown as M
            self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR",
                             str(s))
            # ==========================================================
            # Now try strict version with only standard amino acids
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR",
                             str(s))

        # This structure contains several models with multiple lengths.
        # The tests were failing.
        structure = parser.get_structure("example", "PDB/2OFG.cif")
        self.assertEqual(len(structure), 3)
Ejemplo n.º 4
0
 def __init__(self, file_path, config):
     self.config = config
     pdb_id = uf.parse_pdb_id_from_file(file_path)
     self.bio_struct = None
     if file_path[-4:] == '.pdb' or file_path[-4:] == '.ent':
         self.bio_struct = PDBParser(QUIET=True).get_structure(
             pdb_id, file_path)
     elif file_path[-4:] == '.cif':
         self.bio_struct = FastMMCIFParser(QUIET=True).get_structure(
             pdb_id, file_path)
     if self.bio_struct == None:
         logging.error(
             "{} Flipper unable to parse structure file: {}".format(
                 pdb_id, file_path))
Ejemplo n.º 5
0
 def from_list(cls, reslist, cif_path, parent_entry, annotate=True):
     """Construct PdbSite object directly from residue list"""
     mmcif_dict = dict()
     # First reduce redundant residues with multiple function locations
     reslist = PdbSite._cleanup_list(reslist)
     site = cls()
     site.parent_entry = parent_entry
     try:
         if annotate:
             parser = MMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
             mmcif_dict = parser._mmcif_dict
         else:
             parser = FastMMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
     except (TypeError, PDBConstructionException):
         warnings.warn(
             'Could not build site from residue list. Check entry',
             RuntimeWarning)
         return
     for res in reslist:
         if structure:
             res.add_structure(structure)
         site.add(res)
     if annotate:
         site.parent_structure = structure
         site.mmcif_dict = mmcif_dict
         site.find_ligands()
     return site
Ejemplo n.º 6
0
    def test_with_anisotrop(self):
        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/4CUP.cif")
        f_structure = fast_parser.get_structure("example", "PDB/4CUP.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        self.assertEqual(len(s_atoms), len(f_atoms))

        for atoms in [s_atoms, f_atoms]:
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]],
                                     [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([50.346, 19.287, 17.288], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)
            self.assertEqual(atoms[0].get_bfactor(), 32.02)

            ansiou = numpy.array(
                [0.4738, -0.0309, -0.0231, 0.4524, 0.0036, 0.2904],
                dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_anisou(), ansiou)
            ansiou = numpy.array(
                [1.1242, 0.2942, -0.0995, 1.1240, -0.1088, 0.8221],
                dtype=numpy.float32)
            atom_937 = list(f_structure[0]['A'])[114]['CB']
            numpy.testing.assert_array_equal(atom_937.get_anisou(), ansiou)
Ejemplo n.º 7
0
def fetchStructure(pdbid:str, custom_path='default') -> Structure:
    """
    Returns an open PDB.Bio.Structure.Structure object corresponding to <pdbid> from the default repository(specified in the .env)  
    or if custom_path is provided -- from there.
    
    """
    pathToFile = custom_path if custom_path != 'default' else path.join(os.getenv('STATIC_ROOT'), pdbid.upper(),  pdbid.upper()+'.cif' )

    if not path.exists(pathToFile):
        print(f"File does not exits at the provided path {pathToFile}")
        raise FileNotFoundError(pathToFile) 
    parser:FastMMCIFParser     = FastMMCIFParser(QUIET=True)
    struct:Structure.Structure = parser.get_structure(pdbid.upper(), pathToFile)
    return struct
Ejemplo n.º 8
0
 def build_all(cls,
               reslist,
               reference_site,
               parent_entry,
               cif_path,
               annotate=True,
               redundancy_cutoff=None):
     """Builds all sites in using as input a list of catalytic residues.
     Returns a list of PdbSite objects"""
     # Map structure objects in every residue
     sites = []
     mmcif_dict = dict()
     try:
         if annotate:
             parser = MMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
             mmcif_dict = parser._mmcif_dict
         else:
             parser = FastMMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
     except (TypeError, PDBConstructionException):
         warnings.warn('Could not parse structure {}'.format(
             cif_path, RuntimeWarning))
         return sites
     # First reduce redundant residues with multiple function locations
     reslist = PdbSite._cleanup_list(reslist)
     # We want all equivalent residues from identical assembly chains
     reslist = PdbSite._get_assembly_residues(reslist, structure)
     # Get seeds to build active sites
     seeds = PdbSite._get_seeds(reslist)
     # Build a site from each seed
     for seed in seeds:
         sites.append(cls.build(seed, reslist, reference_site,
                                parent_entry))
     # Reduce redundancy
     sites = PdbSite._remove_redundant_sites(sites,
                                             cutoff=redundancy_cutoff)
     # Add ligands and annotations
     if annotate and structure:
         for site in sites:
             site.parent_structure = structure
             site.mmcif_dict = mmcif_dict
             site.find_ligands()
     # Flag unclustered sites
     PdbSite._mark_unclustered(sites)
     return sites
Ejemplo n.º 9
0
# Version 2.0
# It is compatible with Python 3.6
#
# Author: Monika Wiech

import gzip
import glob
import os
import sys
import time

# select parser class
# from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB.MMCIFParser import FastMMCIFParser
# parser = MMCIFParser()
parser = FastMMCIFParser()

# set data dir and limit
root_dir = './'
data_dir = root_dir + 'data/'
grp_dir_limit = 100
str_dir_limit = 1000
grp_dir_count = 0

# define logging system
log_name = 'tester__%s'
err_ext = '.err'
log_ext = '.log'
out_ext = '.out'

Ejemplo n.º 10
0
class StructureBuilder:
    # constructor, sets the path of the pdb file and configuration parameters
    def __init__(self, file_path, config):
        self.config = config
        pdb_id = uf.parse_pdb_id_from_file(file_path)
        self.bio_struct = None
        if file_path[-4:] == '.pdb' or file_path[-4:] == '.ent':
            self.bio_struct = PDBParser(QUIET=True).get_structure(
                pdb_id, file_path)
        elif file_path[-4:] == '.cif':
            self.bio_struct = FastMMCIFParser(QUIET=True).get_structure(
                pdb_id, file_path)
        if self.bio_struct == None:
            logging.error(
                "{} Flipper unable to parse structure file: {}".format(
                    pdb_id, file_path))
            #sys.exit("ERROR! unable to parse structure: pdb_id: {} file: {}".format(pdb_id, file_path))

    # add a residue to the structure
    def add_residue(self, fl_chain, res):
        # create an empty residue
        fl_res = FlipperResidue()
        ### extract res info ###
        fl_res.pdb_id = res_pdb_id(res)
        fl_res.model_id = res_model_id(res)
        fl_res.pdb_index = res_pdb_index(res)
        fl_res.pdb_insertion_code = res_insertion_code(res)
        fl_res.chain_id = fl_chain.id_label
        fl_res.name3 = res.get_resname()
        # if the name in one letter does not exists, let default 'X'
        n = uf.aa_3to1.get(fl_res.name3)
        if n:
            fl_res.name1 = n
        # if the chain we are inserting in is not a DNA or RNA chain
        if not fl_chain.rna_dna_chain:
            # get the atoms coordinates of the residue, and the alpha carbon also separately
            for atom in res:
                fl_res.atoms_coord.append(atom.get_coord())
                if atom.id == 'CA':
                    fl_res.c_alpha_coord = atom.get_coord()
            if fl_res.c_alpha_coord is not None:
                # residues are inserted in order, so give it position equal to the length of the list
                fl_res.pos_in_chain = len(fl_chain.residues)
                # calculate distance between aplah carbons of this new residue and the previous one, to insert gap flags eventually

                if fl_chain.residues and uf.distance_3D(
                        fl_chain.residues[-1].c_alpha_coord, fl_res.
                        c_alpha_coord) < self.config["open_gap_threshold"]:
                    fl_chain.residues[-1].has_next = True
                    fl_res.has_prev = True
        # if the chain we are inserting in is a DNA or RNA chain
        else:
            # set the uniprot identifier as a string "DNA-RNA"
            fl_res.uniprot_id = "DNA-RNA"
        # add residue inside the chain object
        if fl_res.c_alpha_coord is not None or fl_chain.rna_dna_chain:
            fl_chain.string_index_map[fl_res.string_index()] = len(
                fl_chain.residues)
            fl_chain.residues.append(fl_res)

    # build the structure, giving it pdb_id as identigier
    def build_structure(self, pdb_id, model_id=0):
        # create Bio.PDB structure
        fl_struct = FlipperStructure(pdb_id, self.bio_struct[model_id].id)
        # for each chain from this structure (first model)
        for chain in self.bio_struct[model_id]:
            # create a FLipper chain with same id
            fl_chain = FlipperChain(chain.id)
            # if this chain is a DNA-RNA chain, set flag
            if is_DNA(chain):
                fl_chain.rna_dna_chain = True
            # for each residue in the chain
            for residue in chain:
                # if it is not hetero, add it (so DNA-RNA residues too)
                if is_good_res(residue):
                    self.add_residue(fl_chain, residue)
            # add chain to the structure
            fl_struct.chains[chain.id] = fl_chain
        # return the structure
        return fl_struct

    # create the neighbors network for given struture
    def make_neighbors(self, fl_struct):
        # create an empty NeighborsNet
        nn = NeighborsNet()
        # use NeighborSearch from Bio.PDB to compute distances
        ns = NeighborSearch(list(self.bio_struct.get_atoms()))
        # for each chain in structure that is not a dna-rna one
        for fl_chain in fl_struct.get_chains():
            if not fl_chain.rna_dna_chain:
                # for each residue in this chain
                for fl_res in fl_chain.residues:
                    # add a default entry
                    nn.add_default(fl_res)
                    # keep track of already inserted nieghbors (the search is mate for each atom in the residue)
                    already_have = []
                    # for each atom (coordinates) in the residue
                    for atom_coord in fl_res.atoms_coord:
                        # for each residue in range
                        for res in ns.search(atom_coord,
                                             self.config["neighbors_range"],
                                             level='R'):
                            # check if it is good atom and the same model, cause sometimes NS computes all models
                            if is_good_res(
                                    res) and fl_res.model_id == res_model_id(
                                        res) and not res.get_full_id(
                                        ) in already_have:
                                # try to get FlipperResidueAssociated
                                pos_2 = fl_struct.chains[res_chain_id(
                                    res)].string_index_map.get(
                                        res_string_index(res))
                                # print(fl_res.get_full_identifier(), res.get_full_id(), fl_struct.chains[res_chain_id(res)].string_index_map.get(res_string_index(res)))
                                if not pos_2 == None:
                                    fl_res_2 = fl_struct.chains[res_chain_id(
                                        res)].residues[pos_2]
                                    # if the chain is the same
                                    if fl_res.chain_id == fl_res_2.chain_id:
                                        already_have.append(res.get_full_id())
                                        if fl_res.pos_in_chain == fl_res_2.pos_in_chain:
                                            continue
                                        # if distance (as residue number) is less than threshold, then it is a short range neighbor
                                        if abs(
                                                fl_res.pos_in_chain -
                                                fl_res_2.pos_in_chain
                                        ) < self.config[
                                                "long_short_threshold"] and not fl_chain.have_gaps(
                                                    fl_res, fl_res_2):
                                            nn.add_short(fl_res, fl_res_2)
                                        # else it is a long rage neighbor
                                        else:
                                            nn.add_long(fl_res, fl_res_2)
                                    # if it is not in the same chain it is an inter chain neighbor
                                    else:
                                        nn.add_inter(fl_res, fl_res_2)
                                        already_have.append(res.get_full_id())

        return nn
Ejemplo n.º 11
0
    def test_parsers(self):
        """Extract polypeptides from 1A80."""

        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(f_structure[0].serial_num,
                             structure[0].serial_num)

            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            f_polypeptides = ppbuild.build_peptides(f_structure[0], False)

            self.assertEqual(len(polypeptides), 1)
            self.assertEqual(len(f_polypeptides), 1)

            pp = polypeptides[0]
            f_pp = f_polypeptides[0]

            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 151)
            self.assertEqual(pp[-1].get_id()[1], 220)

            self.assertEqual(f_pp[0].get_id()[1], 151)
            self.assertEqual(f_pp[-1].get_id()[1], 220)

            # Check the sequence
            s = pp.get_sequence()
            f_s = f_pp.get_sequence()

            self.assertEqual(s, f_s)  # enough to test this

            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)

            # Here non-standard MSE are shown as M
            self.assertEqual(
                "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
                "NANPDCKTILKALGPGATLEEMMTACQG", str(s))

            # ==========================================================
            # Now try strict version with only standard amino acids
            # Should ignore MSE 151 at start, and then break the chain
            # at MSE 185, and MSE 214,215
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 3)

            # First fragment
            pp = polypeptides[0]
            self.assertEqual(pp[0].get_id()[1], 152)
            self.assertEqual(pp[-1].get_id()[1], 184)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))

            # Second fragment
            pp = polypeptides[1]
            self.assertEqual(pp[0].get_id()[1], 186)
            self.assertEqual(pp[-1].get_id()[1], 213)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))

            # Third fragment
            pp = polypeptides[2]
            self.assertEqual(pp[0].get_id()[1], 216)
            self.assertEqual(pp[-1].get_id()[1], 220)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TACQG", str(s))
Ejemplo n.º 12
0
    def test_parsers(self):
        """Extract polypeptides from 1A80."""

        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(f_structure[0].serial_num, structure[0].serial_num)

            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            f_polypeptides = ppbuild.build_peptides(f_structure[0], False)

            self.assertEqual(len(polypeptides), 1)
            self.assertEqual(len(f_polypeptides), 1)

            pp = polypeptides[0]
            f_pp = f_polypeptides[0]

            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 151)
            self.assertEqual(pp[-1].get_id()[1], 220)

            self.assertEqual(f_pp[0].get_id()[1], 151)
            self.assertEqual(f_pp[-1].get_id()[1], 220)

            # Check the sequence
            s = pp.get_sequence()
            f_s = f_pp.get_sequence()

            self.assertEqual(s, f_s)  # enough to test this

            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)

            # Here non-standard MSE are shown as M
            self.assertEqual("MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
                             "NANPDCKTILKALGPGATLEEMMTACQG", str(s))

            # ==========================================================
            # Now try strict version with only standard amino acids
            # Should ignore MSE 151 at start, and then break the chain
            # at MSE 185, and MSE 214,215
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 3)

            # First fragment
            pp = polypeptides[0]
            self.assertEqual(pp[0].get_id()[1], 152)
            self.assertEqual(pp[-1].get_id()[1], 184)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))

            # Second fragment
            pp = polypeptides[1]
            self.assertEqual(pp[0].get_id()[1], 186)
            self.assertEqual(pp[-1].get_id()[1], 213)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))

            # Third fragment
            pp = polypeptides[2]
            self.assertEqual(pp[0].get_id()[1], 216)
            self.assertEqual(pp[-1].get_id()[1], 220)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TACQG", str(s))

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        for atoms in [s_atoms, f_atoms]:
            self.assertEqual(len(atoms), 644)
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]], [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([19.594, 32.367, 28.012], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)

            self.assertEqual(atoms[0].get_bfactor(), 18.03)
            for atom in atoms:
                self.assertIsNone(atom.get_anisou())
Ejemplo n.º 13
0
    def test_parsers(self):
        """Extract polypeptides from 1A80."""

        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(f_structure[0].serial_num, structure[0].serial_num)

            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            f_polypeptides = ppbuild.build_peptides(f_structure[0], False)

            self.assertEqual(len(polypeptides), 1)
            self.assertEqual(len(f_polypeptides), 1)

            pp = polypeptides[0]
            f_pp = f_polypeptides[0]

            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 151)
            self.assertEqual(pp[-1].get_id()[1], 220)

            self.assertEqual(f_pp[0].get_id()[1], 151)
            self.assertEqual(f_pp[-1].get_id()[1], 220)

            # Check the sequence
            s = pp.get_sequence()
            f_s = f_pp.get_sequence()

            self.assertEqual(s, f_s)  # enough to test this

            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)

            # Here non-standard MSE are shown as M
            self.assertEqual("MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
                             "NANPDCKTILKALGPGATLEEMMTACQG", str(s))

            # ==========================================================
            # Now try strict version with only standard amino acids
            # Should ignore MSE 151 at start, and then break the chain
            # at MSE 185, and MSE 214,215
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 3)

            # First fragment
            pp = polypeptides[0]
            self.assertEqual(pp[0].get_id()[1], 152)
            self.assertEqual(pp[-1].get_id()[1], 184)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))

            # Second fragment
            pp = polypeptides[1]
            self.assertEqual(pp[0].get_id()[1], 186)
            self.assertEqual(pp[-1].get_id()[1], 213)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))

            # Third fragment
            pp = polypeptides[2]
            self.assertEqual(pp[0].get_id()[1], 216)
            self.assertEqual(pp[-1].get_id()[1], 220)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TACQG", str(s))
Ejemplo n.º 14
0
 def test_point_mutations_fast(self):
     """Test if FastMMCIFParser can parse point mutations correctly."""
     self._run_point_mutation_tests(FastMMCIFParser(QUIET=True))
Ejemplo n.º 15
0
def openStructutre(pdbid: str, cifpath: str) -> Structure:
    return FastMMCIFParser(QUIET=True).get_structure(pdbid, cifpath)
Ejemplo n.º 16
0
    def test_parsers(self):
        """Extract polypeptides from 1A80."""
        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(f_structure[0].serial_num, structure[0].serial_num)

            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            f_polypeptides = ppbuild.build_peptides(f_structure[0], False)

            self.assertEqual(len(polypeptides), 1)
            self.assertEqual(len(f_polypeptides), 1)

            pp = polypeptides[0]
            f_pp = f_polypeptides[0]

            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 151)
            self.assertEqual(pp[-1].get_id()[1], 220)

            self.assertEqual(f_pp[0].get_id()[1], 151)
            self.assertEqual(f_pp[-1].get_id()[1], 220)

            # Check the sequence
            s = pp.get_sequence()
            f_s = f_pp.get_sequence()

            self.assertEqual(s, f_s)  # enough to test this

            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)

            # Here non-standard MSE are shown as M
            self.assertEqual(
                "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
                "NANPDCKTILKALGPGATLEEMMTACQG",
                str(s),
            )

            # ==========================================================
            # Now try strict version with only standard amino acids
            # Should ignore MSE 151 at start, and then break the chain
            # at MSE 185, and MSE 214,215
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 3)

            # First fragment
            pp = polypeptides[0]
            self.assertEqual(pp[0].get_id()[1], 152)
            self.assertEqual(pp[-1].get_id()[1], 184)
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))

            # Second fragment
            pp = polypeptides[1]
            self.assertEqual(pp[0].get_id()[1], 186)
            self.assertEqual(pp[-1].get_id()[1], 213)
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))

            # Third fragment
            pp = polypeptides[2]
            self.assertEqual(pp[0].get_id()[1], 216)
            self.assertEqual(pp[-1].get_id()[1], 220)
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TACQG", str(s))

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        for atoms in [s_atoms, f_atoms]:
            self.assertEqual(len(atoms), 644)
            atom_names = ["N", "CA", "C", "O", "CB"]
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual(
                [a.get_occupancy() for a in atoms[:5]], [1.0, 1.0, 1.0, 1.0, 1.0]
            )
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([19.594, 32.367, 28.012], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)

            self.assertEqual(atoms[0].get_bfactor(), 18.03)
            for atom in atoms:
                self.assertIsNone(atom.get_anisou())
Ejemplo n.º 17
0
from asyncio import run


def root_self(rootname: str = '') -> str:
    """Returns the rootpath for the project if it's unique in the current folder tree."""
    root = os.path.abspath(
        __file__)[:os.path.abspath(__file__).find(rootname) + len(rootname)]
    sys.path.append(root)
    load_dotenv(os.path.join(root, '.env'))


root_self('ribxz')

from ciftools.Neoget import _neoget

prs = FastMMCIFParser(QUIET=True)

io = MMCIFIO()

for pdbid in [
        '1vy4',
]:
    pdbid = pdbid.upper()
    struct: Structure = prs.get_structure(f'{pdbid}', f'{pdbid}.cif')
    for chain in struct[0].child_list:
        strand_id = chain.id
        nomclass_result = _neoget(
            f"""match (r:RibosomeStructure{{rcsb_id: "{pdbid.upper()}"}})-[]-(rp:RibosomalProtein)-[]-(n:NomenclatureClass)
        where rp.entity_poly_strand_id  = "{strand_id}" return n.class_id""")
        print(nomclass_result)
        if len(nomclass_result) > 0: