def test_conversion(self):
        """Parse 1A8O.cif, write 1A8O.pdb, parse again and compare"""

        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()
        pdb_writer.save(filename)

        pdb_parser = PDBParser(QUIET=1)
        pdb_struct = pdb_parser.get_structure('example_pdb', filename)

        # comparisons
        self.assertEqual(len(pdb_struct), len(cif_struct))

        pdb_atom_names = [a.name for a in pdb_struct.get_atoms()]
        cif_atom_names = [a.name for a in pdb_struct.get_atoms()]
        self.assertEqual(len(pdb_atom_names), len(cif_atom_names))
        self.assertSequenceEqual(pdb_atom_names, cif_atom_names)

        pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()]
        cif_atom_elems = [a.element for a in pdb_struct.get_atoms()]
        self.assertSequenceEqual(pdb_atom_elems, cif_atom_elems)
Example #2
0
def get_info_mmcif(file):
    parser = MMCIFParser()
    structure = parser.get_structure(file.split('.')[0], file)
    coord_ca = {}
    bary = {}
    for chain in structure[0]:
        coord_ca[chain] = []
        bary[chain] = 0
        for residue in chain:
            if residue.has_id('CA'):
                coord_ca[chain].append(residue['CA'].get_coord())
            else:
                coord_moy = [0, 0, 0]
                for atom in residue:
                    coord_at = atom.get_coord()
                    coord_moy = [coord_at[i] / len(residue) for i in range(3)]
                coord_ca[chain].append(coord_moy)
        coord_ca[chain] = np.asarray(coord_ca[chain])
        bary[chain] = np.array([np.mean(coord_ca[chain][i]) for i in range(3)])
    enf = {}
    for chain in structure[0]:
        enf[chain] = []
        for coord in coord_ca[chain]:
            enf[chain].append(np.linalg.norm(coord - bary[chain]))
    #ppb = PPBuilder()
    #seqpdb = ppb.build_peptides(chain)[0].get_sequence()
    return bary, enf
def CIF2PDB(ciffile, pdbfile, verbose=False):

    #Not sure why biopython needs this to read a cif file
    strucid = ciffile[:4] if len(ciffile) > 4 else "1xxx"

    # Read file
    parser = MMCIFParser()
    structure = parser.get_structure(strucid, ciffile)

    # rename long chains
    try:
        chainmap = rename_chains(structure)
    except OutOfChainsError:
        logging.error("Too many chains to represent in PDB format")
        sys.exit(1)

    if verbose:
        for new, old in chainmap.items():
            if new != old:
                logging.info("Renaming chain {0} to {1}".format(old, new))

    #Write PDB
    io = PDBIO()
    io.set_structure(structure)
    #TODO What happens with large structures?
    io.save(pdbfile)

    return pdbfile
    def check_mmtf_vs_cif(self, mmtf_filename, cif_filename):
        """Compare parsed structures for MMTF and CIF files."""
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            mmtf_struct = MMTFParser.get_structure(mmtf_filename)
        mmcif_parser = MMCIFParser()
        mmcif_struct = mmcif_parser.get_structure("4CUP", cif_filename)
        self.mmcif_atoms = list(mmcif_struct.get_atoms())
        self.mmtf_atoms = list(mmtf_struct.get_atoms())
        self.check_atoms()
        mmcif_chains = list(mmcif_struct.get_chains())
        mmtf_chains = list(mmtf_struct.get_chains())
        self.assertEqual(len(mmcif_chains), len(mmtf_chains))
        for i, e in enumerate(mmcif_chains):
            self.mmcif_res = list(mmcif_chains[i].get_residues())
            self.mmtf_res = list(mmtf_chains[i].get_residues())
            self.check_residues()

        self.mmcif_res = list(mmcif_struct.get_residues())
        self.mmtf_res = list(mmtf_struct.get_residues())
        self.check_residues()
        self.assertEqual(
            sum(1 for _ in mmcif_struct.get_models()),
            sum(1 for _ in mmtf_struct.get_models()),
        )
Example #5
0
 def from_list(cls, reslist, cif_path, parent_entry, annotate=True):
     """Construct PdbSite object directly from residue list"""
     mmcif_dict = dict()
     # First reduce redundant residues with multiple function locations
     reslist = PdbSite._cleanup_list(reslist)
     site = cls()
     site.parent_entry = parent_entry
     try:
         if annotate:
             parser = MMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
             mmcif_dict = parser._mmcif_dict
         else:
             parser = FastMMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
     except (TypeError, PDBConstructionException):
         warnings.warn(
             'Could not build site from residue list. Check entry',
             RuntimeWarning)
         return
     for res in reslist:
         if structure:
             res.add_structure(structure)
         site.add(res)
     if annotate:
         site.parent_structure = structure
         site.mmcif_dict = mmcif_dict
         site.find_ligands()
     return site
Example #6
0
 def test_insertions(self):
     """Test file with residue insertion codes."""
     parser = MMCIFParser(QUIET=1)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", PDBConstructionWarning)
         structure = parser.get_structure("example", "PDB/4ZHL.cif")
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         # First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 2)
         pp = polypeptides[0]
         # Check the start and end positions (first segment only)
         self.assertEqual(pp[0].get_id()[1], 16)
         self.assertEqual(pp[-1].get_id()[1], 244)
         # Check the sequence
         refseq = (
             "IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGR"
             "SRLNSNTQGEMKFEVENLILHKDYSADTLAYHNDIALLKIRSKEGRCAQPSRTIQTIALPSMY"
             "NDPQFGTSCEITGFGKEQSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQW"
             "KTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKE"
         )
         s = pp.get_sequence()
         self.assertIsInstance(s, Seq)
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual(refseq, str(s))
 def __init__( self, path ):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     self.structure = MMCIFParser().get_structure('STR', path)
    def test_filehandle(self):
        """Test if the parser can handle file handle as well as filename."""
        parser = MMCIFParser()
        structure = parser.get_structure("example", "PDB/1A8O.cif")
        self.assertEqual(len(structure), 1)

        structure = parser.get_structure("example", open("PDB/1A8O.cif"))
        self.assertEqual(len(structure), 1)
Example #9
0
def CifAtomIterator(source):
    """Return SeqRecord objects for each chain in an mmCIF file.

    Argument source is a file-like object or a path to a file.

    The sequences are derived from the 3D structure (_atom_site.* fields)
    in the mmCIF file.

    Unrecognised three letter amino acid codes (e.g. "CSD") from HETATM entries
    are converted to "X" in the sequence.

    In addition to information from the PDB header (which is the same for all
    records), the following chain specific information is placed in the
    annotation:

    record.annotations["residues"] = List of residue ID strings
    record.annotations["chain"] = Chain ID (typically A, B ,...)
    record.annotations["model"] = Model ID (typically zero)

    Where amino acids are missing from the structure, as indicated by residue
    numbering, the sequence is filled in with 'X' characters to match the size
    of the missing region, and  None is included as the corresponding entry in
    the list record.annotations["residues"].

    This function uses the Bio.PDB module to do most of the hard work. The
    annotation information could be improved but this extra parsing should be
    done in parse_pdb_header, not this module.

    This gets called internally via Bio.SeqIO for the atom based interpretation
    of the PDB file format:

    >>> from Bio import SeqIO
    >>> for record in SeqIO.parse("PDB/1A8O.cif", "cif-atom"):
    ...     print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
    ...
    Record id 1A8O:A, chain A

    Equivalently,

    >>> with open("PDB/1A8O.cif") as handle:
    ...     for record in CifAtomIterator(handle):
    ...         print("Record id %s, chain %s" % (record.id, record.annotations["chain"]))
    ...
    Record id 1A8O:A, chain A

    """
    # TODO - Add record.annotations to the doctest, esp the residues (not working?)

    # Only import parser when needed, to avoid/delay NumPy dependency in SeqIO
    from Bio.PDB.MMCIFParser import MMCIFParser

    structure = MMCIFParser().get_structure(None, source)
    pdb_id = structure.header["idcode"]
    if not pdb_id:
        warnings.warn("Could not determine the PDB ID.",
                      BiopythonParserWarning)
        pdb_id = "????"
    yield from AtomIterator(pdb_id, structure)
Example #10
0
    def testModels(self):
        """Test file with multiple models"""
        parser = MMCIFParser()
        structure = parser.get_structure("example", "PDB/1LCD.cif")
        self.assertEqual(len(structure), 3)
        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(structure[1].serial_num, 2)
            self.assertEqual(structure[2].serial_num, 3)
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            # Here non-standard MSE are shown as M
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))
            # ==========================================================
            # Now try strict version with only standard amino acids
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))

        parser = MMCIFParser()
        # This structure contains several models with multiple lengths.
        # The tests were failing.
        structure = parser.get_structure("example", "PDB/2OFG.cif")
        self.assertEqual(len(structure), 3)
 def __init__( self, path ):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     CIF_PARSER = MMCIFParser()
     self.structure = CIF_PARSER.get_structure('PHA-L',path) # Parse the structure once and re-use it in the functions below
Example #12
0
    def test_conversion_not_preserve_numbering(self):
        """Convert mmCIF to PDB and renumber atom serials."""
        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/a_structure.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()

        pdb_writer.save(filename, preserve_atom_numbering=False)
Example #13
0
 def __init__(self, path):
     """
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     """
     # parser object for reading in structure in CIF format
     parser = MMCIFParser()
     # Parse the structure once and re-use it in the functions below
     self.structure = parser.get_structure('some structure string here, e.g. 7AHL', path)
Example #14
0
 def __init__( self, path ):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     CIF_PARSER     = MMCIFParser() # parser object for reading in structure in CIF format
     i=0
     self.structure = CIF_PARSER.get_structure("Structure",path) # Parse the structure once and re-use it in the functions below
     print(self.get_number_of_water_molecules("D"))
Example #15
0
def clean_pdb(file_input, file_output, chain_to_keep, parameters):
    parser = MMCIFParser()

    structure = parser.get_structure(file_input[:-4].upper(), file_input)

    structure = remove_chains(structure, chain_to_keep)
    structure = remove_extra_atoms(structure, parameters)

    io = MMCIFIO()
    io.set_structure(structure)
    io.save(file_output)
Example #16
0
    def test_conversion_preserve_numbering(self):
        """Convert mmCIF to PDB and preserve original serial numbering."""
        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/a_structure.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()

        with self.assertRaises(ValueError):
            pdb_writer.save(filename, preserve_atom_numbering=True)
Example #17
0
def get_STR(filePath, fileType='pdb'):
    if fileType == "pdb":
        STR = MMCIFParser(QUIET=True).get_structure("pdb", filePath)
        return STR

    if fileType == "cif":
        DICT = MMCIF2Dict(filePath)
        # print(DICT)
        return DICT
    else:
        raise TypeError("%s is not a valid fileType" % fileType)
Example #18
0
 def test_parser(self):
     """Extract polypeptides from 1A80."""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1A8O.cif")
     self.assertEqual(len(structure), 1)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         # ==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         # First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 151)
         self.assertEqual(pp[-1].get_id()[1], 220)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         # Here non-standard MSE are shown as M
         self.assertEqual(
             "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
             "NANPDCKTILKALGPGATLEEMMTACQG", str(s))
         # ==========================================================
         # Now try strict version with only standard amino acids
         # Should ignore MSE 151 at start, and then break the chain
         # at MSE 185, and MSE 214,215
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 3)
         # First fragment
         pp = polypeptides[0]
         self.assertEqual(pp[0].get_id()[1], 152)
         self.assertEqual(pp[-1].get_id()[1], 184)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))
         # Second fragment
         pp = polypeptides[1]
         self.assertEqual(pp[0].get_id()[1], 186)
         self.assertEqual(pp[-1].get_id()[1], 213)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))
         # Third fragment
         pp = polypeptides[2]
         self.assertEqual(pp[0].get_id()[1], 216)
         self.assertEqual(pp[-1].get_id()[1], 220)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TACQG", str(s))
Example #19
0
 def __init__(self, path):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     cif_parser = MMCIFParser(QUIET=True)  # parser object for reading in structure in CIF format
     self.structure = cif_parser.get_structure('structure', path)
     self.model = self.structure[0]
     self.residue_dict = {k.upper(): v for d in [protein_letters_3to1, {'HOH': ''}] for k, v in d.items()}
def get_atoms(file):
    parser = MMCIFParser()
    structure = parser.get_structure(file.split('.')[0], file)
    pos = []
    model = structure[0]
    for chain in model:
        pos_c = []
        for residue in chain:
            if residue.has_id('CA'):
                vca = residue['CA'].get_vector()
                pos_c.append((residue.get_resname(), vca))
        pos.append(pos_c)
    return pos
Example #21
0
    def addStruct(self,
                  secondPDBfileName,
                  outPDBfileName=None,
                  useModel=False):
        """ Join the second structure to the first one.
            If cheon numes are the same rename them.
            if outPDBfileName id provided then new
            struct is saved to a file"""
        # read new structure
        if outPDBfileName is not None:
            pdbID = (os.path.splitext(os.path.basename(outPDBfileName))[0])[:4]
        else:
            pdbID = (os.path.splitext(
                os.path.basename(secondPDBfileName))[0])[:4]

        if secondPDBfileName.endswith(".pdb") or secondPDBfileName.endswith(
                ".ent"):
            parser = PDBParser(PERMISSIVE=self.permissive)
        else:
            parser = MMCIFParser()

        struct2 = parser.get_structure(pdbID, secondPDBfileName)

        if useModel:
            modelNumber = 0
            modelID = 0
            # model.id = model.serial_num = len(self.structure)?  # not sure this
            # is valid always
            for model in self.structure:
                pass
            modelNumber = model.serial_num
            modelID = model.id
            for model in struct2:
                modelNumber += 1
                modelID += 1
                model.detach_parent()
                model.serial_num = modelNumber
                model.id = modelID
                self.structure.add(model)
        else:
            self._renameChainsIfNeed(struct2)

            for model in struct2:
                for chain in model:
                    chain.detach_parent()
                    self.structure[0].add(chain)

        # create new output file
        if outPDBfileName is not None:
            self.write(outPDBfileName)
 def __init__(self, path):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     # parser object for reading in structure in CIF format
     CIF_PARSER: MMCIFParser = MMCIFParser()
     self.ppb = PPBuilder()
     self.structure = CIF_PARSER.get_structure('structure', path)
     self.chains: Dict[Entity] = {}
     for chain in self.structure.get_chains():
         self.chains[chain.id] = chain
    def _load_structure_file(self, input_pdb_path, cache_dir, pdb_server,
                             file_format):
        """ Load structure file """
        if "pdb:" in input_pdb_path:
            # MMBPDBList child defaults to Bio.PDB.PDBList if MMB server is not selected
            pdbl = MMBPDBList(pdb=cache_dir, server=pdb_server)
            if '.' in input_pdb_path:
                [pdbid, biounit] = input_pdb_path.split('.')
                input_pdb_path = pdbid[4:].upper()
                if pdb_server != 'mmb':
                    raise WrongServerError
                real_pdb_path = pdbl.retrieve_pdb_file(input_pdb_path,
                                                       file_format='pdb',
                                                       biounit=biounit)
                self.biounit = biounit
            else:
                input_pdb_path = input_pdb_path[4:].upper()
                real_pdb_path = pdbl.retrieve_pdb_file(
                    input_pdb_path, file_format=self.file_format)
                if file_format == 'pdb':
                    # change file name to id.pdb
                    os.rename(real_pdb_path, input_pdb_path + ".pdb")
                    real_pdb_path = input_pdb_path + ".pdb"
        else:
            real_pdb_path = input_pdb_path

        if '.pdb' in real_pdb_path:
            parser = PDBParser(PERMISSIVE=1)
            input_format = 'pdb'
        elif '.cif' in real_pdb_path:
            parser = MMCIFParser()
            input_format = 'cif'
        else:
            raise UnknownFileTypeError(input_pdb_path)

        warnings.simplefilter('ignore', BiopythonWarning)

        try:
            self.st = parser.get_structure('st', real_pdb_path)
        except ValueError as err:
            raise ParseError('ValueError', err)
        except PDBConstructionException as err:
            raise ParseError('PDBBuildError', err)
        if input_format == 'pdb':
            self.headers = parse_pdb_header(real_pdb_path)
        else:
            self.headers = MMCIF2Dict(real_pdb_path)

        return input_format
    def setUp(self):

        # Silence!
        warnings.simplefilter("ignore", PDBConstructionWarning)

        pdbparser = PDBParser(QUIET=1)
        cifparser = MMCIFParser(QUIET=1)

        modpath = os.path.abspath(os.path.dirname(__file__))

        pdb_file = os.path.join(modpath, "PDB", "1LCD.pdb")
        cif_file = os.path.join(modpath, "PDB", "1LCD.cif")

        self.pdbo = pdbparser.get_structure("pdb", pdb_file)
        self.cifo = cifparser.get_structure("pdb", cif_file)
Example #25
0
def read_inputs(in_file, file_format, curr_model, chains):
    # Infer file format from extension
    file_format = file_format or os.path.basename(in_file).rsplit(".", 1)[-1]

    # Handle stdin
    if in_file == "-":
        contents = sys.stdin.read()
        struct_file = StringIO(contents)
        try:
            # Redirect stdin from pipe back to terminal
            sys.stdin = open("/dev/tty", "r")
        except:
            print(
                "Piping structures not supported on this system (no /dev/tty)")
            return None, None
    else:
        struct_file = in_file

    # Use Biopython parser by default
    get_coords = get_coords_biopython

    if file_format.lower() == "pdb":
        from Bio.PDB import PDBParser
        p = PDBParser()
        struc = p.get_structure("", struct_file)
    elif file_format.lower() in ("mmcif", "cif"):
        from Bio.PDB.MMCIFParser import MMCIFParser
        p = MMCIFParser()
        struc = p.get_structure("", struct_file)
    elif file_format.lower() == "mmtf":
        from Bio.PDB.mmtf import MMTFParser
        struc = MMTFParser.get_structure(struct_file)
    elif file_format.lower() in ("mae", "maegz"):
        from schrodinger import structure
        struc = list(structure.StructureReader(struct_file))
        get_coords = get_coords_schrodinger
    else:
        print("Unrecognised file format")
        return None, None

    coords, info = get_coords(struc, chains)

    if coords is None or curr_model > len(coords):
        print("Nothing to show")
        return None, None

    return np.array(coords), info
Example #26
0
 def test_write(self):
     """Test a simple structure object is written out correctly to MMTF."""
     parser = MMCIFParser()
     struc = parser.get_structure("1A8O", "PDB/1A8O.cif")
     io = MMTFIO()
     io.set_structure(struc)
     filenumber, filename = tempfile.mkstemp()
     os.close(filenumber)
     try:
         io.save(filename)
         struc_back = MMTFParser.get_structure(filename)
         dict_back = mmtf.parse(filename)
         self.assertEqual(dict_back.structure_id, "1A8O")
         self.assertEqual(dict_back.num_models, 1)
         self.assertEqual(dict_back.num_chains, 2)
         self.assertEqual(dict_back.num_groups, 158)
         self.assertEqual(dict_back.num_atoms, 644)
         self.assertEqual(len(dict_back.x_coord_list), 644)
         self.assertEqual(len(dict_back.y_coord_list), 644)
         self.assertEqual(len(dict_back.z_coord_list), 644)
         self.assertEqual(len(dict_back.b_factor_list), 644)
         self.assertEqual(len(dict_back.occupancy_list), 644)
         self.assertEqual(dict_back.x_coord_list[5], 20.022)
         self.assertEqual(set(dict_back.ins_code_list), {"\x00"})
         self.assertEqual(set(dict_back.alt_loc_list), {"\x00"})
         self.assertEqual(list(dict_back.atom_id_list), list(range(1, 645)))
         self.assertEqual(list(dict_back.sequence_index_list),
                          list(range(70)) + [-1] * 88)
         self.assertEqual(dict_back.chain_id_list, ["A", "B"])
         self.assertEqual(dict_back.chain_name_list, ["A", "A"])
         self.assertEqual(dict_back.chains_per_model, [2])
         self.assertEqual(len(dict_back.group_list), 21)
         self.assertEqual(len(dict_back.group_id_list), 158)
         self.assertEqual(len(dict_back.group_type_list), 158)
         self.assertEqual(dict_back.groups_per_chain, [70, 88])
         self.assertEqual(len(dict_back.entity_list), 2)
         self.assertEqual(dict_back.entity_list[0]["type"], "polymer")
         self.assertEqual(dict_back.entity_list[0]["chainIndexList"], [0])
         self.assertEqual(
             dict_back.entity_list[0]["sequence"],
             "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG",
         )
         self.assertEqual(dict_back.entity_list[1]["type"], "water")
         self.assertEqual(dict_back.entity_list[1]["chainIndexList"], [1])
         self.assertEqual(dict_back.entity_list[1]["sequence"], "")
     finally:
         os.remove(filename)
Example #27
0
 def build_all(cls,
               reslist,
               reference_site,
               parent_entry,
               cif_path,
               annotate=True,
               redundancy_cutoff=None):
     """Builds all sites in using as input a list of catalytic residues.
     Returns a list of PdbSite objects"""
     # Map structure objects in every residue
     sites = []
     mmcif_dict = dict()
     try:
         if annotate:
             parser = MMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
             mmcif_dict = parser._mmcif_dict
         else:
             parser = FastMMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
     except (TypeError, PDBConstructionException):
         warnings.warn('Could not parse structure {}'.format(
             cif_path, RuntimeWarning))
         return sites
     # First reduce redundant residues with multiple function locations
     reslist = PdbSite._cleanup_list(reslist)
     # We want all equivalent residues from identical assembly chains
     reslist = PdbSite._get_assembly_residues(reslist, structure)
     # Get seeds to build active sites
     seeds = PdbSite._get_seeds(reslist)
     # Build a site from each seed
     for seed in seeds:
         sites.append(cls.build(seed, reslist, reference_site,
                                parent_entry))
     # Reduce redundancy
     sites = PdbSite._remove_redundant_sites(sites,
                                             cutoff=redundancy_cutoff)
     # Add ligands and annotations
     if annotate and structure:
         for site in sites:
             site.parent_structure = structure
             site.mmcif_dict = mmcif_dict
             site.find_ligands()
     # Flag unclustered sites
     PdbSite._mark_unclustered(sites)
     return sites
Example #28
0
    def test_header(self):
        """Test if the parser populates header data."""
        parser = MMCIFParser()

        structure = parser.get_structure("example", "PDB/a_structure.cif")
        self.assertEqual("", structure.header["idcode"])
        self.assertEqual("", structure.header["head"])
        self.assertEqual("", structure.header["deposition_date"])
        self.assertEqual("", structure.header["structure_method"])
        self.assertEqual(0.0, structure.header["resolution"])

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        self.assertEqual("1A8O", structure.header["idcode"])
        self.assertEqual("Viral protein", structure.header["head"])
        self.assertEqual("", structure.header["deposition_date"])
        self.assertEqual("X-RAY DIFFRACTION", structure.header["structure_method"])
        self.assertEqual(1.7, structure.header["resolution"])
Example #29
0
    def read(self, fileName):
        """ Read and parse file."""
        # biopython assigns an ID to any read structure
        structure_id = os.path.basename(fileName)
        structure_id = structure_id[:4] if len(structure_id) > 4 else "1xxx"

        if fileName.endswith(".pdb") or fileName.endswith(".ent"):
            if self.pdbParser is None:
                self.pdbParser = PDBParser(PERMISSIVE=self.permissive)
            parser = self.pdbParser
            self.type = self.PDB
        else:
            if self.cifParser is None:
                self.cifParser = MMCIFParser()
            parser = self.cifParser
            self.type = self.CIF

        self.structure = parser.get_structure(structure_id, fileName)
        self._readDone = True
def get_descriptors(file):
    parser = MMCIFParser()
    structure = parser.get_structure(file.split('.')[0], file)
    pos = []
    model = structure[0]
    hse = HSExposureCB(model)
    for chain in model:
        pos_c = []
        for residue in chain:
            dic = {}
            dic["name"] = residue.get_resname()
            if residue.has_id('CA'):
                vca = residue['CA'].get_vector()
                dic["coord"] = vca
                hse_ = hse[(chain.id, residue.id)]
                dic["hse"] = (hse_[0], hse_[1])
            pos_c.append(dic)
        pos = pos + pos_c
    return pos