def test_loop_keyword_case_insensitive(self):
        """Comments may begin outside of column 1."""
        test_data = """\
            data_verbatim_test
            _test_key_value foo # Ignore this comment
            loop_
            _test_loop
            a b c d # Ignore this comment
            e f g

        """
        mmcif_dict = MMCIF2Dict(io.StringIO(textwrap.dedent(test_data)))

        mmcif_dict2 = MMCIF2Dict(
            io.StringIO(textwrap.dedent(test_data.replace("loop_", "LOOP_")))
        )
        self.assertDictEqual(mmcif_dict, mmcif_dict2)

        mmcif_dict2 = MMCIF2Dict(
            io.StringIO(textwrap.dedent(test_data.replace("loop_", "looP_")))
        )
        self.assertDictEqual(mmcif_dict, mmcif_dict2)

        mmcif_dict2 = MMCIF2Dict(
            io.StringIO(textwrap.dedent(test_data.replace("_loop", "_LOOP")))
        )
        self.assertNotEqual(mmcif_dict, mmcif_dict2)
Example #2
0
 def loadPDB_CIF_format(pdbId, filePath, pdbl):
     pdbFileSavePath = '%s%s.cif' % (filePath, pdbId)
     try:
         mmcif_dict = MMCIF2Dict(pdbFileSavePath)
     except IOError:
         # Get the file
         pdbl.retrieve_pdb_file(pdbId, file_format='mmCif', pdir=filePath)
         mmcif_dict = MMCIF2Dict(pdbFileSavePath)
     return mmcif_dict
    def test_token_after_multiline(self):
        """Multi-line string followed by token on the same line."""
        stream = io.StringIO("data_test _key1\n;foo bar\n; _key2 'value 2'\n")
        mmcif_dict = MMCIF2Dict(stream)
        self.assertEqual(mmcif_dict, {
            "data_": "test",
            "_key1": ["foo bar"],
            "_key2": ["value 2"]
        })

        stream = io.StringIO(
            "data_test _key1\n;foo bar\n;# missing space here")
        with self.assertRaisesRegex(ValueError, "Missing whitespace"):
            mmcif_dict = MMCIF2Dict(stream)
def parse_cif(label):
    #parse the cif file and return the site information if there is any
    mmcif_dict = MMCIF2Dict(label + '.cif')
    if "_struct_site.id" in mmcif_dict:  #if there is site info documented in cif file, get the info of cat and bind residues.
        siteseq = mmcif_dict["_struct_site_gen.auth_comp_id"], mmcif_dict[
            "_struct_site_gen.auth_asym_id"], mmcif_dict[
                "_struct_site_gen.auth_seq_id"], mmcif_dict[
                    "_struct_site_gen.site_id"]
        siteseq = np.asarray(siteseq)
        siteseq.shape = (4, -1)

        #extract the id of important sites as "siteinfo"
        siteinfo = mmcif_dict["_struct_site.id"], mmcif_dict[
            "_struct_site.details"]
        siteinfo = np.asarray(siteinfo)
        siteinfo.shape = (2, -1)

        HasSite = True

    else:
        siteseq = np.asarray([])
        siteinfo = np.asarray([])
        HasSite = False

    return siteseq, siteinfo, HasSite
Example #5
0
 def get_info_from_mmcif(self, key):
     cifname = 'mmcifs/' + self.code + '.mmcif'
     try:
         mmcif = MMCIF2Dict(cifname)
         return mmcif.get(key)
     except:
         print 'no %s in %s' % (key, self.code)
Example #6
0
    def get_structure(self, structure_id, file):
        """ Parses file contents and returns Structure object.

        Note that parameter order is different to the BioPython's implementation (reversed, as structure_id is optional).

        :param structure_id: if None, taken from mmcif (`_entry.id`)
        :param file: a file-like object or a file name
        :return: Bio.PDB.Structure
        """

        with warnings.catch_warnings():
            if self.QUIET:
                warnings.filterwarnings("ignore",
                                        category=PDBConstructionWarning)
            self._mmcif_dict = MMCIF2Dict(file)

            # begin change
            if structure_id is None:
                structure_id = self._mmcif_dict['_entry.id'][0].lower()
            # end change

            self._build_structure(structure_id)
            self._structure_builder.set_header(self._get_header())

        return self._structure_builder.get_structure()
 def test_get_mmcif_seq(self):
     filename = './tests/pdb/4nuv.cif'
     mmcif_dict = MMCIF2Dict(filename)
     sequence = pdbtools.get_mmcif_canonical_seq(mmcif_dict)
     to_match = {
         'C':
         'GPTGTENSSQLDFEDVWNSSYGVNDSFPDGDYGA',
         'D':
         'GPTGTENSSQLDFEDVWNSSYGVNDSFPDGDYGA',
         'A': ('ASNTVMKNCNYKRKRRERDWDCNTKKDVCIPDRRYQLCMKELTNLVNNTDT'
               'NFHRDITFRKLYLKRKLIYDAAVEGDLLLKLNNYRYNKDFCKDIRWSLGDF'
               'GDIIMGTDMEGIGYSKVVENNLRSIFGTDEKAQQRRKQWWNESKAQIWTAM'
               'MYSVKKRLKGNFIWICKLNVAVNIEPQIYRWIREWGRDYVSELPTEVQKLK'
               'EKCDGKINYTDKKVCKVPPCQNACKSYDQWITRKKNQWDVLSNKFISVKNA'
               'EKVQTAGIVTPYDILKQELDEFNEVAFENEINKRDGAYIELCVCSVEEAKK'
               'NTQEVVTNVDN'),
         'B': ('ASNTVMKNCNYKRKRRERDWDCNTKKDVCIPDRRYQLCMKELTNLVNNTDT'
               'NFHRDITFRKLYLKRKLIYDAAVEGDLLLKLNNYRYNKDFCKDIRWSLGDF'
               'GDIIMGTDMEGIGYSKVVENNLRSIFGTDEKAQQRRKQWWNESKAQIWTAM'
               'MYSVKKRLKGNFIWICKLNVAVNIEPQIYRWIREWGRDYVSELPTEVQKLK'
               'EKCDGKINYTDKKVCKVPPCQNACKSYDQWITRKKNQWDVLSNKFISVKNA'
               'EKVQTAGIVTPYDILKQELDEFNEVAFENEINKRDGAYIELCVCSVEEAKK'
               'NTQEVVTNVDN')
     }
     self.assertDictEqual(sequence, to_match)
Example #8
0
 def get_structure(self, structure_id, filename):
     with warnings.catch_warnings():
         if self.QUIET:
             warnings.filterwarnings("ignore", category=PDBConstructionWarning)
     self._mmcif_dict = MMCIF2Dict(filename)
     self._build_structure(structure_id)
     return self._structure_builder.get_structure()
def fasta(t1, pos):

    pdb = t1[0:4]
    chain = t1[5:len(t1)]

    #count = 0
    #if count == 0:
    try:
        fol = pdb[1:3]
        pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb)
        tar = gzip.open("{}".format(pdbfile), "rb")
        out = open("pdbprocess1.cif", "wb")
        out.write(tar.read())
        tar.close()
        out.close()

        mmcif = MMCIF2Dict("pdbprocess1.cif")
        idmap1 = seqres_atom_map(mmcif, chain, pos)
        k1 = 1
        str1 = ""
        while k1 <= len(idmap1[0]):
            t2 = "{}".format(k1)
            key1 = (t2, chain)
            res = idmap1[0][key1]
            if k1 % 100 == 0:
                str1 = str1 + "{}\n".format(res)
            else:
                str1 = str1 + "{}".format(res)
            k1 = k1 + 1

        return (str1, idmap1[1])

    except:
        return ("NA")
 def test_quotefix(self):
     # Test quote characters parse correctly
     filename = "PDB/1MOM_min.cif"
     mmcif = MMCIF2Dict(filename)
     self.assertEqual(len(mmcif.keys()), 21)
     self.assertEqual(
         mmcif["_struct_conf.pdbx_PDB_helix_id"],
         [
             "A",
             "A'",
             "B",
             "C",
             "B'",
             "D",
             "E",
             "C'",
             "F",
             "G",
             "H",
             "D'",
             "E'",
             "A'\"",
             "BC",
             "CD",
             "DE",
         ],
     )
    def test_splitline(self):
        filename = "PDB/4Q9R_min.cif"
        mmcif = MMCIF2Dict(filename)
        self.assertEqual(list(mmcif._splitline("foo bar")), ["foo", "bar"])
        self.assertEqual(list(mmcif._splitline("  foo bar  ")), ["foo", "bar"])
        self.assertEqual(list(mmcif._splitline("'foo' bar")), ["foo", "bar"])
        self.assertEqual(list(mmcif._splitline('foo "bar"')), ["foo", "bar"])
        self.assertEqual(list(mmcif._splitline("foo 'bar a' b")),
                         ["foo", "bar a", "b"])
        self.assertEqual(list(mmcif._splitline("foo 'bar'a' b")),
                         ["foo", "bar'a", "b"])
        self.assertEqual(list(mmcif._splitline('foo "bar\' a" b')),
                         ["foo", "bar' a", "b"])
        self.assertEqual(list(mmcif._splitline("foo '' b")), ["foo", "", "b"])
        self.assertEqual(list(mmcif._splitline("foo bar' b")),
                         ["foo", "bar'", "b"])
        self.assertEqual(list(mmcif._splitline("foo bar b'")),
                         ["foo", "bar", "b'"])

        # A hash (#) starts a comment iff it is preceded by whitespace or is at
        # the beginning of a line:
        # https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax#lex
        self.assertEqual(list(mmcif._splitline("foo#bar")), ["foo#bar"])
        self.assertEqual(list(mmcif._splitline("foo #bar")), ["foo"])
        self.assertEqual(list(mmcif._splitline("foo# bar")), ["foo#", "bar"])
        self.assertEqual(list(mmcif._splitline("#foo bar")), [])

        self.assertRaises(ValueError, list, mmcif._splitline("foo 'bar"))
        self.assertRaises(ValueError, list, mmcif._splitline("foo 'ba'r  "))
        self.assertRaises(ValueError, list, mmcif._splitline("foo \"bar'"))

        # quotes are allowed if not followed by whitespace
        self.assertEqual(list(mmcif._splitline("foo b'ar'")), ["foo", "b'ar'"])
        self.assertEqual(list(mmcif._splitline("foo 'b'ar'")), ["foo", "b'ar"])
Example #12
0
 def test_MMCIF2dict(self):
     filename = "PDB/1A8O.cif"
     mmcif = MMCIF2Dict(filename)
     self.assertEqual(len(mmcif.keys()), 575)
     self.assertEqual(mmcif['_entity_poly_seq.mon_id'], ['MSE', 'ASP', 'ILE', 'ARG', 'GLN', 'GLY', 'PRO', 'LYS', 'GLU', 'PRO', 'PHE', 'ARG', 'ASP', 'TYR', 'VAL', 'ASP', 'ARG', 'PHE', 'TYR', 'LYS', 'THR', 'LEU', 'ARG', 'ALA', 'GLU', 'GLN', 'ALA', 'SER', 'GLN', 'GLU', 'VAL', 'LYS', 'ASN', 'TRP', 'MSE', 'THR', 'GLU', 'THR', 'LEU', 'LEU', 'VAL', 'GLN', 'ASN', 'ALA', 'ASN', 'PRO', 'ASP', 'CYS', 'LYS', 'THR', 'ILE', 'LEU', 'LYS', 'ALA', 'LEU', 'GLY', 'PRO', 'GLY', 'ALA', 'THR', 'LEU', 'GLU', 'GLU', 'MSE', 'MSE', 'THR', 'ALA', 'CYS', 'GLN', 'GLY'])
     self.assertEqual(mmcif['_atom_site.Cartn_x'], ['19.594', '20.255', '20.351', '19.362', '19.457', '20.022', '21.718', '21.424', '21.554', '21.835', '21.947', '21.678', '23.126', '23.098', '23.433', '22.749', '22.322', '22.498', '21.220', '20.214', '23.062', '24.282', '23.423', '25.429', '21.280', '20.173', '20.766', '21.804', '19.444', '18.724', '18.011', '17.416', '16.221', '15.459', '15.824', '20.116', '20.613', '20.546', '19.488', '19.837', '20.385', '19.526', '18.365', '20.090', '21.675', '21.698', '20.859', '20.729', '20.260', '19.435', '20.158', '19.512', '18.993', '20.056', '20.300', '21.486', '22.285', '23.286', '24.155', '23.025', '22.117', '21.236', '20.159', '19.231', '23.152', '24.037', '23.563', '22.398', '24.086', '25.003', '24.858', '23.861', '25.748', '24.459', '24.089', '23.580', '24.111', '25.415', '26.116', '25.852', '22.544', '21.960', '22.965', '22.928', '20.793', '19.999', '19.234', '20.019', '18.495', '19.286', '18.523', '23.861', '24.870', '25.788', '26.158', '25.684', '26.777', '26.215', '27.235', '28.136', '28.155', '29.030', '26.137', '26.994', '26.279', '26.880', '27.408', '28.345', '28.814', '28.620', '24.992', '24.151', '24.025', '24.139', '22.787', '21.629', '21.657', '20.489', '20.571', '19.408', '19.450', '18.365', '23.839', '23.720', '24.962', '24.853', '23.502', '23.661', '22.120', '26.137', '27.387', '27.511', '27.925', '28.595', '28.723', '28.016', '29.545', '27.136', '27.202', '26.238', '26.585', '26.850', '27.835', '27.667', '26.352', '25.494', '25.797', '24.325', '25.037', '23.984', '24.456', '24.305', '22.761', '21.538', '21.301', '20.586', '20.130', '19.415', '19.186', '25.033', '25.526', '26.755', '27.015', '25.771', '24.608', '23.508', '24.583', '22.406', '23.490', '22.406', '21.326', '27.508', '28.691', '28.183', '28.705', '29.455', '30.787', '31.428', '32.618', '33.153', '27.116', '26.508', '25.826', '25.827', '25.475', '26.150', '24.741', '25.264', '24.587', '25.587', '25.302', '23.789', '22.707', '21.787', '21.910', '26.767', '27.806', '28.299', '28.656', '29.006', '28.944', '30.295', '30.744', '30.326', '29.441', '30.787', '28.332', '28.789', '27.943', '28.374', '28.803', '26.740', '25.833', '25.775', '24.998', '24.425', '24.354', '24.816', '24.535', '25.454', '26.601', '26.645', '25.240', '24.885', '27.391', '28.884', '29.200', '28.729', '29.998', '24.438', '23.066', '23.001', '23.824', '22.370', '22.035', '21.831', '21.174', '20.852', '20.917', '19.638', '20.949', '20.315', '18.908', '18.539', '20.262', '19.688', '20.414', '21.592', '19.714', '18.136', '16.775', '16.738', '15.875', '16.101', '15.478', '14.341', '13.247', '14.542', '17.668', '17.730', '18.064', '17.491', '18.754', '18.932', '18.279', '18.971', '19.343', '18.126', '17.905', '20.444', '21.777', '22.756', '24.069', '24.913', '17.344', '16.136', '15.146', '14.599', '15.468', '16.242', '17.164', '15.865', '14.932', '14.017', '14.495', '13.700', '13.904', '13.254', '12.332', '13.484', '11.975', '12.666', '14.303', '12.641', '14.280', '13.452', '15.793', '16.368', '16.285', '16.053', '17.815', '17.939', '17.221', '18.427', '16.438', '16.375', '14.950', '14.778', '16.869', '18.228', '16.791', '13.947', '12.529', '12.045', '11.151', '11.625', '11.950', '11.054', '11.086', '10.326', '12.589', '12.177', '13.076', '12.888', '11.978', '13.202', '10.883', '14.054', '14.963', '15.702', '15.846', '15.935', '15.286', '16.327', '14.580', '16.162', '16.876', '15.961', '16.391', '17.402', '18.238', '19.553', '18.506', '14.695', '13.703', '13.270', '13.262', '12.460', '11.372', '12.854', '12.954', '12.503', '13.541', '13.184', '12.008', '10.830', '10.505', '10.626', '10.093', '14.820', '15.887', '16.443', '17.416', '17.014', '16.627', '15.451', '17.619', '15.830', '16.248', '15.758', '14.809', '15.689', '16.404', '16.005', '14.639', '14.122', '17.109', '17.396', '16.559', '18.588', '14.018', '12.706', '12.516', '11.536', '12.617', '13.288', '14.522', '13.454', '13.383', '13.351', '12.406', '14.564', '14.482', '13.353', '15.552', '14.378', '14.488', '13.443', '12.968', '15.902', '16.144', '13.061', '12.087', '10.746', '10.157', '11.879', '11.014', '11.003', '10.171', '10.269', '10.273', '9.002', '9.101', '8.227', '8.612', '8.611', '7.224', '10.191', '10.458', '10.518', '9.916', '11.791', '11.677', '12.184', '12.967', '11.222', '11.377', '10.082', '9.885', '12.416', '13.824', '14.764', '14.287', '9.214', '7.937', '7.048', '6.294', '7.230', '7.828', '7.618', '8.090', '7.916', '7.189', '6.419', '6.871', '6.391', '6.449', '7.815', '8.305', '7.481', '7.371', '9.788', '10.832', '12.217', '10.789', '6.886', '6.080', '6.922', '8.149', '6.294', '7.024', '7.912', '7.680', '5.901', '4.734', '4.839', '8.952', '9.861', '10.886', '11.642', '10.910', '11.884', '13.285', '13.524', '11.599', '14.199', '15.563', '16.391', '16.022', '16.290', '16.498', '15.473', '17.509', '18.426', '18.875', '19.012', '19.645', '20.773', '20.264', '21.920', '19.082', '19.510', '18.471', '18.816', '19.784', '21.035', '20.954', '19.902', '21.955', '17.199', '16.109', '16.001', '15.690', '14.787', '14.776', '13.539', '13.220', '12.888', '16.301', '16.274', '17.413', '17.209', '16.429', '15.284', '15.332', '13.844', '18.606', '19.764', '19.548', '19.922', '21.047', '21.507', '23.105', '22.645', '18.915', '18.636', '17.640', '17.807', '18.050', '18.998', '17.730', '16.631', '15.593', '16.104', '15.685', '14.486', '17.033', '17.572', '18.985', '19.634', '17.525', '15.855', '19.451', '20.802', '21.001', '20.066', '21.152', '20.421', '20.725', '21.768', '19.817', '22.226', '22.536', '23.683', '24.328', '23.949', '15.165', '19.774', '22.152', '12.938', '23.499', '17.568', '13.544', '15.524', '31.249', '11.999', '14.511', '7.439', '19.303', '17.114', '21.867', '17.573', '26.151', '20.974', '20.796', '28.370', '29.565', '21.248', '25.744', '8.691', '30.789', '30.905', '28.623', '24.935', '23.462', '9.924', '28.729', '13.579', '23.652', '25.631', '17.799', '23.547', '16.363', '24.125', '33.063', '29.209', '10.391', '12.221', '18.997', '16.360', '27.915', '28.158', '21.975', '27.069', '30.148', '21.196', '8.864', '13.228', '18.577', '20.526', '25.758', '7.838', '20.569', '13.009', '19.229', '17.655', '30.445', '9.014', '3.398', '31.603', '16.543', '12.037', '7.261', '5.607', '23.532', '30.701', '32.300', '34.351', '9.450', '29.476', '13.681', '26.728', '10.004', '30.553', '23.569', '10.927', '17.983', '8.191', '32.095', '11.520', '13.249', '15.919', '11.187', '16.743'])
     self.assertEqual(mmcif['_struct_ref.pdbx_seq_one_letter_code'], 'GARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQTGSEELRSLYNTIAVLYCVHQRIDVKDTKEALDKIEEEQNKSKKKAQQAAADTGNNSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQGVGGPGHKARVLAEAMSQVTNPATIMIQKGNFRNQRKTVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGHQMKDCTERQANFLGKIWPSHKGRPGNFLQSRPEPTAPPEESFRFGEETTTPSQKQEPIDKELYPLASLRSLFGSDPSSQ')
Example #13
0
def readPDBFile(filename):
    mmcif_dict = MMCIF2Dict(filename)
    nbchains, nbres, nbatoms, res = mmcif_dict[
        '_struct_sheet.number_strands'], mmcif_dict[
            '_struct_site.pdbx_num_residues'], mmcif_dict[
                '_refine_hist.number_atoms_total'], mmcif_dict['_exptl.method']
    return sum([int(nbchains[i])
                for i in range(len(nbchains))]), nbres, nbatoms, res
Example #14
0
 def test_underscores(self):
     # Test values starting with an underscore are not treated as keys
     filename = "PDB/4Q9R_min.cif"
     mmcif = MMCIF2Dict(filename)
     self.assertEqual(len(mmcif.keys()), 5)
     self.assertEqual(mmcif['_pdbx_audit_revision_item.item'], [
         '_atom_site.B_iso_or_equiv', '_atom_site.Cartn_x',
         '_atom_site.Cartn_y', '_atom_site.Cartn_z'
     ])
Example #15
0
 def test_quotefix(self):
     # Test quote characters parse correctly
     filename = "PDB/1MOM_min.cif"
     mmcif = MMCIF2Dict(filename)
     self.assertEqual(len(mmcif.keys()), 21)
     self.assertEqual(mmcif['_struct_conf.pdbx_PDB_helix_id'], [
         'A', 'A\'', 'B', 'C', 'B\'', 'D', 'E', 'C\'', 'F', 'G', 'H', 'D\'',
         'E\'', 'A\'"', 'BC', 'CD', 'DE'
     ])
Example #16
0
def split_file(filename):
    """
    MMCIF2Dict from biopython does not know how to parse this type of file, because it is like multiple
    mmcif files in one file. We need to split it
    :param filename:
    :return: list of mmcifdict ligands
    """
    try:
        with open(filename, mode='r', encoding='utf-8') as f:
            file_string = f.read()
            ligands = re.split('([\r\n?|\n]#[\r\n?|\n]data_.*?[\r\n?|\n]#)', file_string)
            ligands = [MMCIF2Dict(io.StringIO(x + y)) for x, y in zip(ligands[1:][0::2], ligands[1:][1::2])]
    except UnicodeDecodeError:
        with open(filename, mode='r', encoding='utf-16') as f:
            file_string = f.read()
            ligands = re.split('([\r\n?|\n]#[\r\n?|\n]data_.*?[\r\n?|\n]#)', file_string)
            ligands = [MMCIF2Dict(io.StringIO(x + y)) for x, y in zip(ligands[1:][0::2], ligands[1:][1::2])]
    return ligands
Example #17
0
    def test_mmcifio_write_dict(self):
        """Write an mmCIF dictionary out, read it in and compare them."""
        d1 = MMCIF2Dict(self.mmcif_file)

        # Write to temp file
        self.io.set_dict(d1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            d2 = MMCIF2Dict(filename)
            k1 = sorted(d1.keys())
            k2 = sorted(d2.keys())
            self.assertEqual(k1, k2)
            for key in k1:
                self.assertEqual(d1[key], d2[key])
        finally:
            os.remove(filename)
Example #18
0
def main():

    pathmmcif = "/Volumes/BIOINFO/mmCIF"
    pathfasta = "/Users/tarunkhanna/Documents/Bioinformatics/file_links/"

    f = open("{}/pdb_seqres.txt".format(pathfasta), "r")
    ft = f.readlines()
    f.close()

    g = open("PDB_fasta.txt", "w")

    start = sys.argv[1]
    end = sys.argv[2]
    if end == "END" or end == "end":
        end = len(ft)
    end = int(end)
    start = int(start)
    k = start
    while k < end:
        print(k, len(ft))
        ft1 = ft[k].split()
        t1 = ft1[0].strip(">")
        pdb = t1[0:4]
        chain = t1[5:len(t1)]

        k = k + 2

        #count = 0
        #if count == 0:
        try:
            fol = pdb[1:3]
            pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb)
            tar = gzip.open("{}".format(pdbfile), "rb")
            out = open("pdbprocess1{}.cif".format(start), "wb")
            out.write(tar.read())
            tar.close()
            out.close()

            mmcif = MMCIF2Dict("pdbprocess1{}.cif".format(start))
            idmap1 = seqres_atom_map(mmcif, chain)
            k1 = 1
            str1 = ""
            while k1 <= len(idmap1):
                t2 = "{}".format(k1)
                key1 = (t2, chain)
                res = idmap1[key1]
                if k % 100 == 0:
                    str1 = str1 + "{}\n".format(res)
                else:
                    str1 = str1 + "{}".format(res)
                k1 = k1 + 1
            g.write(">{}\n".format(t1))
            g.write("{}\n".format(str1))

        except:
            print("FILE NOT_FOUND")
    def __init__(self, path):
        '''
            Initialize every PDB_Parser with a path to a structure-file in CIF format.
            An example file is included in the repository (7ahl.cif).
            Tip: Store the parsed structure in an object variable instead of parsing it
            again & again ...
        '''

        #        self.structure = CIF_PARSER.get_structure('name',path) # Parse the structure once and re-use it in the functions below
        self.structure = list(MMCIF2Dict(path))
Example #20
0
def get_STR(filePath, fileType='pdb'):
    if fileType == "pdb":
        STR = MMCIFParser(QUIET=True).get_structure("pdb", filePath)
        return STR

    if fileType == "cif":
        DICT = MMCIF2Dict(filePath)
        # print(DICT)
        return DICT
    else:
        raise TypeError("%s is not a valid fileType" % fileType)
Example #21
0
def assign(filename, dict):
    n = 0
    #Grab header
    mmcif_dict = MMCIF2Dict(filename)
    #Grab entity names
    details = mmcif_dict['_entity.pdbx_description']
    #Grab chain ids
    strand_id = mmcif_dict['_entity_poly.pdbx_strand_id']
    for x in strand_id:
        dict[x] = details[n]
        n += 1
    return dict
Example #22
0
    def readLowLevel(self, fileName):
        """ Return a dictionary with all mmcif fields. you should parse them
            Example: get the list of the y coordinates of all atoms
              dict = readLowLevel("kk.pdb")
              y_list = dict['_atom_site.Cartn_y']
        """

        if fileName.endswith(".pdb"):
            print("Low level access to PDB is not implemented")
        else:
            dict = MMCIF2Dict(fileName)
        return dict
def determine_transmembrane_domains(filename):
    """
    Compares the helix domains in PDB with the transmembrane domains in
    uniprot to determine if it is a transmembrane domain or not.
    """

    chain_trans_dom = {}
    true_chain_trans_dom = {}

    mmcif_dict = MMCIF2Dict(filename)
    chains = mmcif_dict['_entity_poly.pdbx_strand_id']
    seqs = mmcif_dict['_entity_poly.pdbx_seq_one_letter_code_can']
    assembly = mmcif_dict['_pdbx_struct_assembly_gen.asym_id_list']

    # Checks if there is present more than one seq for chain.
    # If there is only one (it is a string), it is processed below and the domains are added to the dict.
    if type(seqs) == list:
        i = 0
        for seq in seqs:
            if seq == None or seq == "\n" or seq == "\t" or seq == "" or seq == "?":
                continue
            else:
                for dom in topconn_run(seq):
                    list_chains = chains[i].split(",")
                    for lett in list_chains:
                        if lett == None or lett == "?":
                            continue
                        chain_trans_dom.setdefault(lett, []).append(dom)
                i += 1

    else:
        if seqs == "?":
            return
        for dom in topconn_run(seqs):
            list_chains = chains.split(",")
            for lett in list_chains:
                if lett == None or lett == "?":
                    continue
                chain_trans_dom.setdefault(lett, []).append(dom)

    if type(assembly) == list:
        for chain in assembly[0]:
            if chain in chain_trans_dom.keys():
                true_chain_trans_dom[chain] = chain_trans_dom[chain]
    else:
        true_chain_trans_dom = chain_trans_dom

    print(true_chain_trans_dom)

    logging.info('There are {} chains with transmembrane dom: {}'.format(
        len(true_chain_trans_dom), true_chain_trans_dom.keys()))
    return true_chain_trans_dom
Example #24
0
 def __init__(self, path):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     # get id of the CIF
     cif_dict = MMCIF2Dict(path)
     identifier = cif_dict['_entry.id']
     self.structure = PDB_Parser.CIF_PARSER.get_structure(
         identifier, path
     )  # Parse the structure once and re-use it in the functions below
Example #25
0
    def mmcif_dict(self):
        '''Return the mmcif dictionary.

        Only applicable if using an mmCIF file.

        Returns:
            dict: A dictionary containing mmCIF data.
        '''
        if self._mmcif and self._mmcif_dict is None:
            self._mmcif_dict = MMCIF2Dict(self.pdb_file())
        elif not self._mmcif:
            raise TypeError("Not an mmCIF file!")
        return self._mmcif_dict
Example #26
0
    def get_structure(self, structure_id, filename):
        """Return the structure.

        Arguments:
         - structure_id - string, the id that will be used for the structure
         - filename - name of the mmCIF file OR an open filehandle
        """
        with warnings.catch_warnings():
            if self.QUIET:
                warnings.filterwarnings("ignore", category=PDBConstructionWarning)
        self._mmcif_dict = MMCIF2Dict(filename)
        self._build_structure(structure_id)
        return self._structure_builder.get_structure()
Example #27
0
    def test_inline_comments(self):
        """Comments may begin outside of column 1."""
        mmcif_dict = MMCIF2Dict(io.StringIO(textwrap.dedent(u"""\
            data_verbatim_test
            _test_key_value foo # Ignore this comment
            loop_
            _test_loop
            a b c d # Ignore this comment
            e f g

        """)))
        self.assertEqual(mmcif_dict["_test_key_value"], "foo")
        self.assertEqual(mmcif_dict["_test_loop"], list("abcdefg"))
Example #28
0
def find_ligand_annotations(cif_path, ligands):
    """
    Returns a list of ligand annotations in from a PDB structures cif file
    if they exist
    :Param cif_path: path to PDB structure in mmCIF format
    :Param ligans: list of ligands
    :return known_interfaces: list of tuples of known interfaces
                                [(pbid, position, chain, type), ...]
    """
    known_interfaces = []
    mmcif_dict = MMCIF2Dict(cif_path)
    structure_id = cif_path[-8:-4]
    ligands = set(ligands)

    try:
        binding_site_details = mmcif_dict['_struct_site.details']
        binding_site_ids = mmcif_dict['_struct_site.id']
    except KeyError:
        print('No interface annotations found for:\n', cif_path, '\n\n')
        return None

    # Find binding site ID of first ligand if it exists
    site_id = ''
    for site, detail in zip(binding_site_ids, binding_site_details):
        words = detail.split()
        for w in words:
            if w in ligands and len(w) > 1:
                site_id = site

    if site_id == '':
        print('No ligand annotations found for: \n', cif_path, '\n\n')
        return None

    print(site_id)

    # Find the residues of the binding site
    positions = mmcif_dict['_struct_site_gen.label_seq_id']
    chains = mmcif_dict['_struct_site_gen.label_asym_id']
    res_ids = mmcif_dict['_struct_site_gen.label_comp_id']
    sites = mmcif_dict['_struct_site_gen.site_id']

    for position, chain, res_id, site in zip(positions, chains, res_ids,
                                             sites):
        if site != site_id: continue
        if len(res_id) > 1 and res_id not in 'AUCG': continue
        known_interfaces.append((structure_id, position, chain, 'ligand'))

    if len(known_interfaces) == 0: return None

    return known_interfaces
def all_seqres_pdb_map():

    # PATH FOR THE PDB/mmCIF FILES
    import gzip

    pathmmcif = "/bmm/data/pdbmmcif/data/structures/all/mmCIF"

    dis = open("distinct_mutants.txt", "r")
    ht = dis.readlines()
    dis.close()

    h = open("distinct_mutants_pdb.txt", "w")

    k = 0
    while k < len(ht):  # end = len(ht)
        mutant = []
        mu = ht[k].split(', ')

        pdbid = mu[0].strip('[|\,|\'|]')
        pdb = pdbid[0:4]  # PDB NAME
        C = pdbid[5:6]  # CHAIN

        print("*** {} :: {} of {} ***".format(pdb, k, len(ht)))

        # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE

        try:
            pdbfile = "{}/{}.cif.gz".format(pathmmcif, pdb)
            tar = gzip.open("{}".format(pdbfile), "rb")
            out = open("pdbprocess.cif", "wb")
            out.write(tar.read())
            tar.close()
            out.close()

            mmcif = MMCIF2Dict("pdbprocess.cif")
            idmap = seqres_atom_map(mmcif)

            reslist = [pdbid]
            k1 = 1
            while k1 < len(mu):
                id1 = int(mu[k1].strip("[|'|]|\n"))
                id2 = idmap[id1][0]
                reslist.append("{}".format(id2))
                k1 = k1 + 1
            h.write("{}".format(reslist))
            h.write("\n")
        except:
            print("FILE NOT FOUND")

        k = k + 1
 def test_inline_comments(self):
     """Comments may begin outside of column 1 if preceded by whitespace."""
     mmcif_dict = MMCIF2Dict(
         io.StringIO("data_verbatim_test\n"
                     "_test_key_value_1 foo # Ignore this comment\n"
                     "_test_key_value_2 foo#NotIgnored\n"
                     "loop_\n"
                     "_test_loop\n"
                     "a b c d # Ignore this comment\n"
                     "e f g\n"
                     "\n"))
     self.assertEqual(mmcif_dict["_test_key_value_1"], ["foo"])
     self.assertEqual(mmcif_dict["_test_key_value_2"], ["foo#NotIgnored"])
     self.assertEqual(mmcif_dict["_test_loop"], list("abcdefg"))