Ejemplo n.º 1
0
    def __init__(self, pdb_data, chainA='A', chainB='B'):
        """Compute the burried surface area feature.

        Freesasa is required for this feature.
        From Freesasa version 2.0.3 the Python bindings are released
        as a separate module. They can be installed using
        >>> pip install freesasa

        Args :
            pdb_data (list(byte) or str): pdb data or pdb filename
            chainA (str, optional): name of the first chain
            chainB (str, optional): name of the second chain

        Example :
        >>> bsa = BSA('1AK4.pdb')
        >>> bsa.get_structure()
        >>> bsa.get_contact_residue_sasa()
        >>> bsa.sql._close()
        """
        self.pdb_data = pdb_data
        self.sql = pdb2sql.interface(pdb_data)
        self.chains_label = [chainA, chainB]

        self.feature_data = {}
        self.feature_data_xyz = {}

        freesasa.setVerbosity(freesasa.nowarnings)
Ejemplo n.º 2
0
    def test_database_consistency(self):
        """"verify initilizing interface with updated pdb2sql database"""
        pdb_db = pdb2sql(self.pdb)
        pdb_db.update_column('temp', [99] * 10)
        target = pdb_db.get('*')

        self.db = interface(pdb_db)
        result = self.db.get('*')
        self.assertEqual(target, result)
Ejemplo n.º 3
0
 def test_get_contact_atoms_exludeH(self):
     """"verify get_contact_atoms(excludeH=True)"""
     pdb = 'pdb/3CRO_H.pdb'
     db = interface(pdb)
     contact_atoms = db.get_contact_atoms(excludeH=True)
     self.assertIsInstance(contact_atoms, dict)
     self.assertEqual(len(contact_atoms), 2)
     self.assertEqual(list(contact_atoms.keys()), ['A', 'B'])
     self.assertEqual(len(contact_atoms['A']), 341)
     self.assertEqual(len(contact_atoms['B']), 333)
Ejemplo n.º 4
0
 def test_get_contact_residues_default(self):
     """"verify get_contact_residues default."""
     pdb_db = pdb2sql(self.pdb)
     self.db = interface(pdb_db)
     contact_residues = self.db.get_contact_residues()
     self.assertIsInstance(contact_residues, dict)
     self.assertEqual(len(contact_residues), 2)
     self.assertEqual(list(contact_residues.keys()), ['A', 'B'])
     self.assertEqual(len(contact_residues['A']), 20)
     self.assertEqual(len(contact_residues['B']), 20)
Ejemplo n.º 5
0
 def test_get_contact_residues_exludeH(self):
     """"verify get_contact_residues(excludeH=True)"""
     pdb = 'pdb/3CRO_H.pdb'
     db = interface(pdb)
     contact_residues = db.get_contact_residues(allchains=True,
                                                excludeH=True)
     self.assertIsInstance(contact_residues, dict)
     self.assertEqual(len(contact_residues), 4)
     self.assertEqual(list(contact_residues.keys()), ['A', 'B', 'L', 'R'])
     self.assertEqual(len(contact_residues['A']), 20)
     self.assertEqual(len(contact_residues['B']), 20)
     self.assertEqual(len(contact_residues['L']), 47)
     self.assertEqual(len(contact_residues['R']), 48)
Ejemplo n.º 6
0
 def test_get_contact_residues_alltrue(self):
     """"verify get_contact_residues(True)"""
     pdb = 'pdb/3CRO_H.pdb'
     db = interface(pdb)
     with self.assertWarns(UserWarning) as ex:
         contact_residues = db.get_contact_residues(
             allchains=True, only_backbone_atoms=True, excludeH=True)
     self.assertEqual(len(ex.warnings), 2)
     self.assertEqual(ex.warning.args[0], 'SQL query get an empty')
     self.assertIsInstance(contact_residues, dict)
     self.assertEqual(len(contact_residues), 4)
     self.assertEqual(list(contact_residues.keys()), ['A', 'B', 'L', 'R'])
     self.assertEqual(len(contact_residues['A']), 0)
     self.assertEqual(len(contact_residues['B']), 0)
     self.assertEqual(len(contact_residues['L']), 9)
     self.assertEqual(len(contact_residues['R']), 8)
Ejemplo n.º 7
0
 def test_get_contact_atoms_alltrue(self):
     """"verify get_contact_atoms(True)"""
     pdb = 'pdb/3CRO_H.pdb'
     db = interface(pdb)
     with self.assertWarns(UserWarning) as ex:
         contact_atoms = db.get_contact_atoms(allchains=True,
                                              extend_to_residue=True,
                                              only_backbone_atoms=True,
                                              excludeH=True)
     self.assertEqual(len(ex.warnings), 2)
     self.assertEqual(ex.warning.args[0], 'SQL query get an empty')
     self.assertIsInstance(contact_atoms, dict)
     self.assertEqual(len(contact_atoms), 4)
     self.assertEqual(list(contact_atoms.keys()), ['A', 'B', 'L', 'R'])
     # pymol `select catoms, name CA+C+N+O and byres((chain L and name CA+C+N+O )
     #  within 8.5 of (chain R and name CA+C+N+O))`
     self.assertEqual(len(contact_atoms['A']), 0)
     self.assertEqual(len(contact_atoms['B']), 0)
     self.assertEqual(len(contact_atoms['L']), 36)
     self.assertEqual(len(contact_atoms['R']), 32)
Ejemplo n.º 8
0
    def get_feature_value(self, contact_only=True):
        """get the feature value."""

        sql = pdb2sql.interface(self.pdbfile)
        xyz_info = sql.get('chainID,resSeq,resName', name='CB')
        xyz = sql.get('x,y,z', name='CB')

        xyz_dict = {}
        for pos, info in zip(xyz, xyz_info):
            xyz_dict[tuple(info)] = pos

        contact_residue = sql.get_contact_residue(cutoff=5.5)
        contact_residue = contact_residue["A"] + contact_residue["B"]
        sql._close()

        pssm_data_xyz = {}
        pssm_data = {}

        for res, data in zip(self.res_data, self.pssm_data):

            if contact_only and res not in contact_residue:
                continue

            if tuple(res) in xyz_dict:
                chain = {'A': 0, 'B': 1}[res[0]]
                key = tuple([chain] + xyz_dict[tuple(res)])
                sasa = self.sasa[tuple(res)]

                pssm_data[res] = [data * sasa]
                pssm_data_xyz[key] = [data * sasa]
            else:
                printif([tuple(res), ' not found in the pdbfile'], self.debug)

        # if we have no contact atoms
        if len(pssm_data_xyz) == 0:
            pssm_data_xyz[tuple([0, 0., 0., 0.])] = [0.0]
            pssm_data_xyz[tuple([1, 0., 0., 0.])] = [0.0]

        self.feature_data['pssm'] = pssm_data
        self.feature_data_xyz['pssm'] = pssm_data_xyz
Ejemplo n.º 9
0
    def __init__(self, pdb_data, chainA='A', chainB='B'):
        """Compute the residue contacts between polar/apolar/charged residues.

        Args :
            pdb_data (list(byte) or str): pdb data or pdb filename
            chainA (str, optional): name of the first chain
            chainB (str, optional): name of the second chain

        Example :
        >>> rcd = ResidueDensity('1EWY_100w.pdb')
        >>> rcd.get(cutoff=5.5)
        >>> rcd.extract_features()
        """

        self.pdb_data = pdb_data
        self.sql = pdb2sql.interface(pdb_data)
        self.chains_label = [chainA, chainB]

        self.feature_data = {}
        self.feature_data_xyz = {}

        self.residue_types = config.AA_properties
Ejemplo n.º 10
0
    def __init__(self, pdb_data, chain1='A', chain2='B'):
        """Compute the residue contacts between polar/apolar/charged residues.

        Args:
            pdb_data (list(byte) or str): pdb data or pdb filename
            chain1 (str): First chain ID. Defaults to 'A'
            chain2 (str): Second chain ID. Defaults to 'B'

        Example:
            >>> rcd = ResidueDensity('1EWY_100w.pdb')
            >>> rcd.get(cutoff=5.5)
            >>> rcd.extract_features()
        """

        self.pdb_data = pdb_data
        self.sql = pdb2sql.interface(pdb_data)
        self.chains_label = [chain1, chain2]
        self.chain1 = chain1
        self.chain2 = chain2

        self.feature_data = {}
        self.feature_data_xyz = {}

        self.residue_types = config.AA_properties
Ejemplo n.º 11
0
    def __init__(self,
                 pdb=None,
                 pssm=None,
                 contact_distance=8.5,
                 internal_contact_distance=3,
                 pssm_align='res',
                 biopython=False):
        """Class from which Residue features are computed

        Args:
            pdb (str, optional): path to pdb file. Defaults to None.
            pssm (str, optional): path to pssm file. Defaults to None.
            contact_distance (float, optional): cutoff distance for external edges. Defaults to 8.5.
            internal_contact_distance (int, optional): cutoff distance for internal edges. Defaults to 3.
            pssm_align (str, optional): [description]. Defaults to 'res'.
        """
        super().__init__()

        self.type = 'residue'
        self.pdb = pdb
        self.name = os.path.splitext(os.path.basename(pdb))[0]

        if pssm is not None:
            self.pssm, self.ic = PSSM.PSSM_aligned(pssm, style=pssm_align)
        else:
            self.pssm, self.ic = None, None

        self.contact_distance = contact_distance
        self.internal_contact_distance = internal_contact_distance

        self.residue_charge = {
            'CYS': -0.64,
            'HIS': -0.29,
            'ASN': -1.22,
            'GLN': -1.22,
            'SER': -0.80,
            'THR': -0.80,
            'TYR': -0.80,
            'TRP': -0.79,
            'ALA': -0.37,
            'PHE': -0.37,
            'GLY': -0.37,
            'ILE': -0.37,
            'VAL': -0.37,
            'MET': -0.37,
            'PRO': 0.0,
            'LEU': -0.37,
            'GLU': -1.37,
            'ASP': -1.37,
            'LYS': -0.36,
            'ARG': -1.65
        }

        self.residue_names = {
            'CYS': 0,
            'HIS': 1,
            'ASN': 2,
            'GLN': 3,
            'SER': 4,
            'THR': 5,
            'TYR': 6,
            'TRP': 7,
            'ALA': 8,
            'PHE': 9,
            'GLY': 10,
            'ILE': 11,
            'VAL': 12,
            'MET': 13,
            'PRO': 14,
            'LEU': 15,
            'GLU': 16,
            'ASP': 17,
            'LYS': 18,
            'ARG': 19
        }

        self.residue_polarity = {
            'CYS': 'polar',
            'HIS': 'polar',
            'ASN': 'polar',
            'GLN': 'polar',
            'SER': 'polar',
            'THR': 'polar',
            'TYR': 'polar',
            'TRP': 'polar',
            'ALA': 'apolar',
            'PHE': 'apolar',
            'GLY': 'apolar',
            'ILE': 'apolar',
            'VAL': 'apolar',
            'MET': 'apolar',
            'PRO': 'apolar',
            'LEU': 'apolar',
            'GLU': 'neg_charged',
            'ASP': 'neg_charged',
            'LYS': 'neg_charged',
            'ARG': 'pos_charged'
        }

        self.pssm_pos = {
            'CYS': 4,
            'HIS': 8,
            'ASN': 2,
            'GLN': 5,
            'SER': 15,
            'THR': 16,
            'TYR': 18,
            'TRP': 17,
            'ALA': 0,
            'PHE': 13,
            'GLY': 7,
            'ILE': 9,
            'VAL': 19,
            'MET': 12,
            'PRO': 14,
            'LEU': 10,
            'GLU': 6,
            'ASP': 3,
            'LYS': 11,
            'ARG': 1
        }

        self.polarity_encoding = {
            'apolar': 0,
            'polar': 1,
            'neg_charged': 2,
            'pos_charged': 3
        }

        self.biopython = biopython
        #self.edge_polarity_encoding, iencod = {}, 0
        # for k1, v1 in self.polarity_encoding.items():
        # for k2, v2 in self.polarity_encoding.items():
        ##key = tuple(np.sort([v1, v2]))
        # if key not in self.edge_polarity_encoding:
        ##self.edge_polarity_encoding[key] = iencod
        #iencod += 1

        # check if external execs are installed
        self.check_execs()

        # create the sqldb
        db = interface(self.pdb)

        # get the graphs
        t0 = time()
        self.get_graph(db)
        print('Graph %f' % (time() - t0))

        # get the nodes/edge attributes
        t0 = time()
        self.get_node_features(db)
        print('Node %f' % (time() - t0))

        t0 = time()
        self.get_edge_features()
        print('Edge %f' % (time() - t0))

        # close the db
        db._close()
Ejemplo n.º 12
0
    def construct_graph(self, verbose=False, print_res_pairs=False):
        """Construct the graph corresponding to a given PDB

        Args:
            verbose (bool, optional): print for debug
            print_res_pairs (bool, optional): print the residue contact pairs for debug
        """

        db = interface(self.pdbfile)
        res_contact_pairs = db.get_contact_residues(
            cutoff=self.cutoff,
            allchains=True,
            # chain1=self.chain_label[0],
            # chain2=self.chain_label[1],
            return_contact_pairs=True)

        # tag the non residues
        keys_to_pop = []
        for res in res_contact_pairs.keys():
            if res[2] not in self.resmap_inv:
                keys_to_pop.append(res)
                #res_contact_pairs.pop(res,None)
                print(res)
                warnings.warn('--> Residue not valid')

        # tag the ones that are not in PSSM
        for res in list(res_contact_pairs.keys()):
            if res not in self.aligned_pssm:
                keys_to_pop.append(res)
                #res_contact_pairs.pop(res,None)
                print(res)
                warnings.warn('--> Residue not found in PSSM file')

        # Remove the residue
        for res in keys_to_pop:
            if res in res_contact_pairs:
                res_contact_pairs.pop(res, None)

        # get a list of residues of chain B
        # automatically remove the ones that are not proper res
        # and the ones that are not in the PSSM
        nodesB = []
        for k, reslist in list(res_contact_pairs.items()):
            tmp_reslist = []
            for res in reslist:
                if res[2] in self.resmap_inv and res in self.aligned_pssm:
                    nodesB += [res]
                    tmp_reslist += [res]
                else:
                    print(res)
                    warnings.warn(
                        '--> Residue not found in PSSM file or Residue not recognized'
                    )
            res_contact_pairs[k] = tmp_reslist

        nodesB = sorted(set(nodesB))

        # make a list of nodes
        self.nodes = list(res_contact_pairs.keys()) + nodesB

        # get the edge numbering
        self.edges = []
        for key, val in res_contact_pairs.items():
            ind1 = self.nodes.index(key)
            for v in val:
                ind2 = self.nodes.index(v)
                self.edges.append([ind1, ind2])

        if print_res_pairs:
            for k, vs in res_contact_pairs.items():
                print(k)
                for v in vs:
                    print('\t\t', v)
Ejemplo n.º 13
0
    def get_feature_value(self, cutoff=5.5):
        """get the feature value."""

        sql = pdb2sql.interface(self.pdb_file)

        # set achors for all residues and get their xyz
        xyz_info = sql.get('chainID,resSeq,resName', name='CB')
        xyz_info += sql.get('chainID,resSeq,resName', name='CA', resName='GLY')

        xyz = sql.get('x,y,z', name='CB')
        xyz += sql.get('x,y,z', name='CA', resName='GLY')

        xyz_dict = {}
        for pos, info in zip(xyz, xyz_info):
            xyz_dict[tuple(info)] = pos

        # get interface contact residues
        # ctc_res = {"A":[chain 1 residues], "B": [chain2 residues]}
        ctc_res = sql.get_contact_residues(cutoff=cutoff)
        sql._close()
        ctc_res = ctc_res["A"] + ctc_res["B"]

        # handle with small interface or no interface
        total_res = len(ctc_res)
        if total_res == 0:
            raise ValueError(
                f"{self.mol_name}: No interface residue found with the "
                f"cutoff {cutoff}Å."
                f" Failed to calculate the features of FullPSSM/PSSM_IC.")
        elif total_res < 5:  # this is an empirical value
            warnings.warn(
                f"{self.mol_name}: Only {total_res} interface residues found"
                f" with cutoff {cutoff}Å. Be careful with"
                f" using the features FullPSSM/PSSM_IC")

        # check if interface residues have pssm values
        ctc_res_set = set(ctc_res)
        pssm_res_set = set(self.pssm.keys())
        if len(ctc_res_set.intersection(pssm_res_set)) == 0:
            raise ValueError(
                f"{self.mol_name}: All interface residues have no pssm values."
                f" Check residue chainID/ID/name consistency "
                f"between PDB and PSSM files")
        elif len(ctc_res_set.difference(pssm_res_set)) > 0:
            ctc_res_wo_pssm = ctc_res_set.difference(pssm_res_set)
            ctc_res_with_pssm = ctc_res_set - ctc_res_wo_pssm
            warnings.warn(
                f"{self.mol_name}: The following interface residues have "
                f" no pssm value:\n {ctc_res_wo_pssm}")
        else:
            ctc_res_with_pssm = ctc_res

        # get feature values
        for res in ctc_res_with_pssm:
            chain = {'A': 0, 'B': 1}[res[0]]
            key = tuple([chain] + xyz_dict[res])
            for name, value in zip(self.feature_names, self.pssm[res]):
                # Make sure the feature_names and pssm[res] have
                # consistent order of the 20 residue types
                # name: PSSM_ALA
                # value: -3.0
                # res: ('B', 573, 'HIS')
                # key: (0, -19.346, 6.156, -3.44)
                self.feature_data[name][res] = [value]
                self.feature_data_xyz[name][key] = [value]
Ejemplo n.º 14
0
    def read_pdb(self):
        """Create a sql databse for the pdb."""

        self.sqldb = pdb2sql.interface(self.molgrp['complex'][()])
Ejemplo n.º 15
0
 def setUp(self):
     self.pdb = 'pdb/3CRO.pdb'
     self.db = interface(self.pdb)
Ejemplo n.º 16
0
    def __init__(self, pdb=None, pssm=None,
                 contact_distance=8.5, internal_contact_distance=3,
                 pssm_align='res'):

        super().__init__()

        self.type = 'residue'
        self.pdb = pdb
        self.name = os.path.splitext(os.path.basename(pdb))[0]

        if pssm is not None:
            self.pssm, self.ic = PSSM.PSSM_aligned(
                pssm, style=pssm_align)
        else:
            self.pssm, self.ic = None, None

        self.contact_distance = contact_distance
        self.internal_contact_distance = internal_contact_distance

        self.residue_charge = {'CYS': -0.64, 'HIS': -0.29, 'ASN': -1.22, 'GLN': -1.22, 'SER': -0.80, 'THR': -0.80, 'TYR': -0.80,
                               'TRP': -0.79, 'ALA': -0.37, 'PHE': -0.37, 'GLY': -0.37, 'ILE': -0.37, 'VAL': -0.37, 'MET': -0.37,
                               'PRO': 0.0, 'LEU': -0.37, 'GLU': -1.37, 'ASP': -1.37, 'LYS': -0.36, 'ARG': -1.65}

        self.residue_names = {'CYS': 0, 'HIS': 1, 'ASN': 2, 'GLN': 3, 'SER': 4, 'THR': 5, 'TYR': 6, 'TRP': 7,
                              'ALA': 8, 'PHE': 9, 'GLY': 10, 'ILE': 11, 'VAL': 12, 'MET': 13, 'PRO': 14, 'LEU': 15,
                              'GLU': 16, 'ASP': 17, 'LYS': 18, 'ARG': 20}

        self.residue_polarity = {'CYS': 'polar', 'HIS': 'polar', 'ASN': 'polar', 'GLN': 'polar', 'SER': 'polar', 'THR': 'polar', 'TYR': 'polar', 'TRP': 'polar',
                                 'ALA': 'apolar', 'PHE': 'apolar', 'GLY': 'apolar', 'ILE': 'apolar', 'VAL': 'apolar', 'MET': 'apolar', 'PRO': 'apolar', 'LEU': 'apolar',
                                 'GLU': 'charged', 'ASP': 'charged', 'LYS': 'charged', 'ARG': 'charged'}

        self.pssm_pos = {'CYS': 4, 'HIS': 8, 'ASN': 2, 'GLN': 5, 'SER': 15, 'THR': 16, 'TYR': 18, 'TRP': 17,
                         'ALA': 0, 'PHE': 13, 'GLY': 7, 'ILE': 9, 'VAL': 19, 'MET': 12, 'PRO': 14, 'LEU': 10,
                         'GLU': 6, 'ASP': 3, 'LYS': 11, 'ARG': 1}

        self.polarity_encoding = {
            'apolar': 0, 'polar': -1, 'charged': 1}
        self.edge_polarity_encoding, iencod = {}, 0
        for k1, v1 in self.polarity_encoding.items():
            for k2, v2 in self.polarity_encoding.items():
                key = tuple(np.sort([v1, v2]))
                if key not in self.edge_polarity_encoding:
                    self.edge_polarity_encoding[key] = iencod
                    iencod += 1

        # check if external execs are installed
        self.check_execs()

        # create the sqldb
        db = interface(self.pdb)

        # get the graphs
        t0 = time()
        self.get_graph(db)
        print('Graph %f' % (time()-t0))

        # get the nodes/edge attributes
        t0 = time()
        self.get_node_features(db)
        print('Node %f' % (time()-t0))

        t0 = time()
        self.get_edge_features()
        print('Edge %f' % (time()-t0))

        # close the db
        db._close()