Esempio n. 1
0
    def compute_residue_pairs_ref(self,
                                  cutoff=5.0,
                                  save_file=True,
                                  filename=None):
        """Compute the residue pair on the reference conformation

        Args:
            cutoff (float, optional): cutoff for the contact atoms
            save_file (bool, optional): save the file containing the residue pairs
            filename (None, optional): filename

        Returns:
            dict: defintition of the residue pairs
        """
        sql_ref = pdb2sql(self.ref, sqlfile='mol2.db')
        residue_pairs_ref = sql_ref.get_contact_residue(
            cutoff=cutoff, return_contact_pairs=True, excludeH=True)
        sql_ref.close()

        if save_file:
            if filename is None:
                f = open(
                    self.ref.split('.')[0] + 'residue_contact_pairs.pckl',
                    'wb')
            else:
                f = open(filename, 'wb')

            # save as pickle
            pickle.dump(residue_pairs_ref, f)
            f.close()
            return
        else:
            return residue_pairs_ref
Esempio n. 2
0
    def compute_izone(self, cutoff=5.0, save_file=True, filename=None):
        """Compute the zones  for i-rmsd calculationss

        Args:
            cutoff (float, optional): cutoff for the contact atoms
            save_file (bool, optional): svae file containing the zone
            filename (str, optional): filename

        Returns:
            dict: i-zone definition
        """

        sql_ref = pdb2sql(self.ref)
        contact_ref = sql_ref.get_contact_atoms(
            cutoff=cutoff,
            extend_to_residue=True,
            return_only_backbone_atoms=True)
        index_contact_ref = contact_ref[0] + contact_ref[1]

        # get the xyz and atom identifier of the decoy contact atoms
        #xyz_contact_ref = sql_ref.get('x,y,z',rowID=index_contact_ref)
        data_test = [
            tuple(data)
            for data in sql_ref.get('chainID,resSeq', rowID=index_contact_ref)
        ]
        data_test = sorted(set(data_test))

        # close the sql
        sql_ref.close()

        if save_file:

            if filename is None:
                f = open(self.ref.split('.')[0] + '.izone', 'w')
            else:
                f = open(filename, 'w')

            for res in data_test:
                chain = res[0]
                num = res[1]
                f.write('zone %s%d-%s%d\n' % (chain, num, chain, num))
            f.close()
            return

        else:
            resData = {}
            for res in data_test:
                chain = res[0]
                num = res[1]

                if chain not in resData.keys():
                    resData[chain] = []
                resData[chain].append(num)
            return resData
Esempio n. 3
0
    def compute_Fnat_pdb2sql(self, cutoff=5.0):
        """Slow method to compute the FNAT usign pdb2sql

        Args:
            cutoff (float, optional): cutoff for the contact atoms

        Returns:
            float: Fnat value for the conformation
        """
        # create the sql
        sql_decoy = pdb2sql(self.decoy)
        sql_ref = pdb2sql(self.ref)

        # get the contact atoms of the decoy
        residue_pairs_decoy = sql_decoy.get_contact_residue(
            cutoff=cutoff, return_contact_pairs=True, excludeH=True)

        # get the contact atoms of the ref
        residue_pairs_ref = sql_ref.get_contact_residue(
            cutoff=cutoff, return_contact_pairs=True, excludeH=True)

        # form the pair data
        data_pair_decoy = []
        for resA, resB_list in residue_pairs_decoy.items():
            data_pair_decoy += [(resA, resB) for resB in resB_list]

        # form the pair data
        data_pair_ref = []
        for resA, resB_list in residue_pairs_ref.items():
            data_pair_ref += [(resA, resB) for resB in resB_list]

        # find the umber of residue that ref and decoys hace in common
        nCommon = len(set(data_pair_ref).intersection(data_pair_decoy))

        # normalize
        Fnat = nCommon / len(data_pair_ref)

        sql_decoy.close()
        sql_ref.close()

        return Fnat
Esempio n. 4
0
    def compute_lzone(self, save_file=True, filename=None):
        """Compute the zone for L-RMSD calculation

        Args:
            save_file (bool, optional): save the zone file
            filename (str, optional): name of the file

        Returns:
            dict: definition of the zone
        """
        sql_ref = pdb2sql(self.ref)
        nA = len(sql_ref.get('x,y,z', chainID='A'))
        nB = len(sql_ref.get('x,y,z', chainID='B'))

        # detect which chain is the longest
        long_chain = 'A'
        if nA < nB:
            long_chain = 'B'

        # extract data about the residue
        data_test = [
            tuple(data)
            for data in sql_ref.get('chainID,resSeq', chainID=long_chain)
        ]
        data_test = sorted(set(data_test))

        # close the sql
        sql_ref.close()

        if save_file:
            if filename is None:
                f = open(self.ref.split('.')[0] + '.lzone', 'w')
            else:
                f = open(filename, 'w')
            for res in data_test:
                chain = res[0]
                num = res[1]
                f.write('zone %s%d-%s%d\n' % (chain, num, chain, num))
            f.close()
            return

        else:
            resData = {}
            for res in data_test:
                chain = res[0]
                num = res[1]

                if chain not in resData.keys():
                    resData[chain] = []
                resData[chain].append(num)
            return resData
Esempio n. 5
0
    def __init__(self,
                 pdbfile,
                 param_charge=None,
                 param_vdw=None,
                 patch_file=None,
                 contact_distance=8.5,
                 root_export='./',
                 individual_directory=False,
                 verbose=False):
        '''
        Compute the Coulomb, van der Waals interaction and charges

        Args:

            pdbfile (str): pdb file of the molecule

            param_charge (str): file name of the force field fiel containing the charges e.g. protein-allhdg5.4_new.top. Must be of the format:

                * CYM  atom O   type=O      charge=-0.500 end
                * ALA    atom N   type=NH1     charge=-0.570 end

            param_vdw (str): file name of the force field file containing the vdw parameters e.g  protein-allhdg5.4_new.param. Must be of the format

                * NONBonded  CYAA    0.105   3.750       0.013    3.750
                * NONBonded  CCIS    0.105   3.750       0.013    3.750


            patch_file (str): file name of a valid patch file for the parameters e.g. patch.top
                         The way we handle the patching is very manual and should be
                         made more automatic

            contact_distance (float): the maximum distance between 2 contact atoms

            include_entire_residue (bool): if atom X of resiue M is a contact atom, include all the atoms
                                     of resiude M in the contact_atom list

            root_export (str): root directory where the feature file will be exported (deprecated ?)


        Example :

        >>> pdb = '1AK4_100w.pdb'
        >>>
        >>> # get the force field included in deeprank
        >>> # if another FF has been used to compute the ref
        >>> # change also this path to the correct one
        >>> FF = pkg_resources.resource_filename('deeprank.features','') + '/forcefield/'
        >>>
        >>> # declare the feature calculator instance
        >>> atfeat = AtomicFeature(pdb,
        >>>                        param_charge = FF + 'protein-allhdg5-4_new.top',
        >>>                        param_vdw    = FF + 'protein-allhdg5-4_new.param',
        >>>                        patch_file   = FF + 'patch.top')
        >>> # assign parameters
        >>> atfeat.assign_parameters()
        >>>
        >>> # only compute the pair interactions here
        >>> atfeat.evaluate_pair_interaction(save_interactions=test_name)
        >>>
        >>> # close the db
        >>> atfeat.sqldb.close()
        '''

        super().__init__("Atomic")

        # set a few things
        self.pdbfile = pdbfile
        self.param_charge = param_charge
        self.param_vdw = param_vdw
        self.patch_file = patch_file
        self.contact_distance = contact_distance
        self.individual_directory = individual_directory
        self.verbose = verbose

        # a few constant
        self.eps0 = 1
        self.c = 332.0636

        # dircetory to export
        self.root_export = root_export

        # read the pdb as an sql
        self.sqldb = pdb2sql(self.pdbfile)

        # read the force field
        self.read_charge_file()

        if patch_file != None:
            self.patch = self.read_patch()
        else:
            self.patch = None

        # read the vdw param file
        self.read_vdw_file()

        # get the contact atoms
        self.get_contact_atoms()
Esempio n. 6
0
    def compute_irmsd_pdb2sql(self,
                              cutoff=10,
                              method='svd',
                              izone=None,
                              exportpath=None):
        """Slow method to compute the i-rmsd

        Require the precalculation of the izone. A dedicated routine is implemented to comoute the izone
        if izone is not given in argument the routine will compute them automatically

        i-RMSD is computed selecting the back bone of the contact residue with a cutoff of 10A
        in the decoy. Align these as best as possible with their coutner part in the ref
        and and compute the RMSD

        Ref : DockQ: A Quality Measure for Protein-Protein Docking Models
              http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0161879

        Args:
            izone (None, optional): file name of the zone. if None the zones will be calculated first
            method (str, optional): Method to align the fragments ('svd','quaternion')
            cutoff (float, optional): cutoff for the contact atoms
            exportpath (str, optional): file name where the aligned pdbs are exported

        Returns:
            float: i-RMSD value of the conformation
        """

        # create thes sql
        sql_decoy = pdb2sql(self.decoy)
        sql_ref = pdb2sql(self.ref)

        # get the contact atoms
        if izone is None:
            contact_ref = sql_ref.get_contact_atoms(
                cutoff=cutoff,
                extend_to_residue=True,
                return_only_backbone_atoms=False)
            index_contact_ref = contact_ref[0] + contact_ref[1]
        else:
            index_contact_ref = self.get_izone_rowID(
                sql_ref, izone, return_only_backbone_atoms=False)

        # get the xyz and atom identifier of the decoy contact atoms
        xyz_contact_ref = sql_ref.get('x,y,z', rowID=index_contact_ref)
        data_contact_ref = sql_ref.get('chainID,resSeq,resName,name',
                                       rowID=index_contact_ref)

        # get the xyz and atom indeitifier of the reference
        xyz_decoy = sql_decoy.get('x,y,z')
        data_decoy = sql_decoy.get('chainID,resSeq,resName,name')

        # loop through the ref label
        # check if the atom is in the decoy
        # if yes -> add xyz to xyz_contact_decoy
        # if no  -> remove the corresponding to xyz_contact_ref
        xyz_contact_decoy = []
        index_contact_decoy = []
        clean_ref = False
        for iat, atom in enumerate(data_contact_ref):

            try:
                index = data_decoy.index(atom)
                index_contact_decoy.append(index)
                xyz_contact_decoy.append(xyz_decoy[index])
            except Exception:
                xyz_contact_ref[iat] = None
                index_contact_ref[iat] = None
                clean_ref = True

        # clean the xyz
        if clean_ref:
            xyz_contact_ref = [
                xyz for xyz in xyz_contact_ref if xyz is not None
            ]
            index_contact_ref = [
                ind for ind in index_contact_ref if ind is not None
            ]

        # check that we still have atoms in both chains
        chain_decoy = list(
            set(sql_decoy.get('chainID', rowID=index_contact_decoy)))
        chain_ref = list(set(sql_ref.get('chainID', rowID=index_contact_ref)))

        if len(chain_decoy) < 1 or len(chain_ref) < 1:
            raise ValueError(
                'Error in i-rmsd: only one chain represented in one chain')

        # get the translation so that both A chains are centered
        tr_decoy = self.get_trans_vect(xyz_contact_decoy)
        tr_ref = self.get_trans_vect(xyz_contact_ref)

        # translate everything
        xyz_contact_decoy = self.translation(xyz_contact_decoy, tr_decoy)
        xyz_contact_ref = self.translation(xyz_contact_ref, tr_ref)

        # get the ideql rotation matrix
        # to superimpose the A chains
        U = self.get_rotation_matrix(xyz_contact_decoy,
                                     xyz_contact_ref,
                                     method=method)

        # rotate the entire fragment
        xyz_contact_decoy = self.rotation_matrix(xyz_contact_decoy,
                                                 U,
                                                 center=False)

        # compute the RMSD
        irmsd = self.get_rmsd(xyz_contact_decoy, xyz_contact_ref)

        # export the pdb for verifiactions
        if exportpath is not None:

            # update the sql database
            sql_decoy.update_xyz(xyz_contact_decoy, index=index_contact_decoy)
            sql_ref.update_xyz(xyz_contact_ref, index=index_contact_ref)

            sql_decoy.exportpdb(exportpath + '/irmsd_decoy.pdb',
                                index=index_contact_decoy)
            sql_ref.exportpdb(exportpath + '/irmsd_ref.pdb',
                              index=index_contact_ref)

        # close the db
        sql_decoy.close()
        sql_ref.close()

        return irmsd
Esempio n. 7
0
    def compute_lrmsd_pdb2sql(self, exportpath=None, method='svd'):
        """Slow routine to compute the L-RMSD.

        This routine parse the PDB directly using pdb2sql.
        L-RMSD is computed by aligning the longest chain of the decoy to the one of the reference
        and computing the RMSD of the shortest chain between decoy and reference.

        Ref : DockQ: A Quality Measure for Protein-Protein Docking Models
              http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0161879

        Args:
            exportpath (str, optional): file name where the aligned pdbs are exported
            method (str, optional): Method to align the fragments ('svd','quaternion')

        Returns:
            float: L-RMSD value of the conformation
        """

        # create the sql
        sql_decoy = pdb2sql(self.decoy, sqlfile='decoy.db')
        sql_ref = pdb2sql(self.ref, sqlfile='ref.db')

        # extract the pos of chains A
        xyz_decoy_A = np.array(sql_decoy.get('x,y,z', chainID='A'))
        xyz_ref_A = np.array(sql_ref.get('x,y,z', chainID='A'))

        # extract the pos of chains B
        xyz_decoy_B = np.array(sql_decoy.get('x,y,z', chainID='B'))
        xyz_ref_B = np.array(sql_ref.get('x,y,z', chainID='B'))

        # check the lengthes
        if len(xyz_decoy_A) != len(xyz_ref_A):
            xyz_decoy_A, xyz_ref_A = self.get_identical_atoms(
                sql_decoy, sql_ref, 'A')

        if len(xyz_decoy_B) != len(xyz_ref_B):
            xyz_decoy_B, xyz_ref_B = self.get_identical_atoms(
                sql_decoy, sql_ref, 'B')

        # detect which chain is the longest
        nA, nB = len(xyz_decoy_A), len(xyz_decoy_B)
        if nA > nB:
            xyz_decoy_long = xyz_decoy_A
            xyz_ref_long = xyz_ref_A

            xyz_decoy_short = xyz_decoy_B
            xyz_ref_short = xyz_ref_B

        else:
            xyz_decoy_long = xyz_decoy_B
            xyz_ref_long = xyz_ref_B

            xyz_decoy_short = xyz_decoy_A
            xyz_ref_short = xyz_ref_A

        # get the translation so that both A chains are centered
        tr_decoy = self.get_trans_vect(xyz_decoy_long)
        tr_ref = self.get_trans_vect(xyz_ref_long)

        # translate everything for 1
        xyz_decoy_short = self.translation(xyz_decoy_short, tr_decoy)
        xyz_decoy_long = self.translation(xyz_decoy_long, tr_decoy)

        # translate everuthing for 2
        xyz_ref_short = self.translation(xyz_ref_short, tr_ref)
        xyz_ref_long = self.translation(xyz_ref_long, tr_ref)

        # get the ideal rotation matrix
        # to superimpose the A chains
        U = self.get_rotation_matrix(xyz_decoy_long,
                                     xyz_ref_long,
                                     method=method)

        # rotate the entire fragment
        xyz_decoy_short = self.rotation_matrix(xyz_decoy_short,
                                               U,
                                               center=False)

        # compute the RMSD
        lrmsd = self.get_rmsd(xyz_decoy_short, xyz_ref_short)

        # export the pdb for verifiactions
        if exportpath is not None:

            # extract the pos of the dimer
            xyz_decoy = np.array(sql_decoy.get('x,y,z'))
            xyz_ref = np.array(sql_ref.get('x,y,z'))

            # translate
            xyz_ref = self.translation(xyz_ref, tr_ref)
            xyz_decoy = self.translation(xyz_decoy, tr_decoy)

            # rotate decoy
            xyz_decoy = self.rotation_matrix(xyz_decoy, U, center=False)

            # update the sql database
            sql_decoy.update_xyz(xyz_decoy)
            sql_ref.update_xyz(xyz_ref)

            # export
            sql_decoy.exportpdb(exportpath + '/lrmsd_decoy.pdb')
            sql_ref.exportpdb(exportpath + '/lrmsd_aligned.pdb')

        # close the db
        sql_decoy.close()
        sql_ref.close()

        return lrmsd