コード例 #1
0
ファイル: dockingHandler.py プロジェクト: MMSB-MOBI/DockingPP
    def loadScores(self, score_file: str):
        """Load scores from a score file serialized by serializeRescoring. 
        Load scores for each pose in rescoring Pose attribute, and store the number of rescored poses.

        Args:
            score_file (str): Path to score file

        Raises:
            error.IncompatiblePoseNumber: Raises if you try to load scores for more poses than loaded.
        """
        typecheck.validFile(score_file)
        with open(score_file) as f:
            f.readline()  # discard first lines with comments
            header = f.readline().rstrip()  # second line is header line
            scores = header.split("\t")[1:]  # get score names from header
            nb_poses = 0
            for pose_line in f:
                nb_poses += 1
                if nb_poses > len(self.poses):
                    raise error.IncompatiblePoseNumber(
                        f"There are more poses in scores file than in loaded zdock results.")

                pose_index = int(pose_line.split("\t")[0])
                pose_scores = pose_line.rstrip().split("\t")[1:]

                for i in range(len(scores)):
                    self.poses[pose_index - 1].rescoring[scores[i]
                                                         ] = float(pose_scores[i])
        self._nb_rescored_poses = nb_poses
コード例 #2
0
ファイル: dockingHandler.py プロジェクト: MMSB-MOBI/DockingPP
    def getRankedPoses(self, score: str, nb_poses: int = -1) -> List['DockingPP.pose.Pose']:
        """Get poses ranked by given score

        Args:
            score (str): Score to rank by
            nb_poses (int): Number of poses to get

        Raises:
            error.IncompatiblePoseNumber: Raises if you try to rank more poses than rescored poses.

        Returns:
            List[DockingPP.pose.Pose]: List of poses in decreasing score order
        """

        if nb_poses == -1:
            nb_poses = self._nb_cmap_poses

        if score != "original" and nb_poses > self._nb_rescored_poses:
            raise error.IncompatiblePoseNumber(
                f"Try to rank {nb_poses} but only {self._nb_rescored_poses} have been rescored.")

        if score == "original":
            return self.poses[:nb_poses]

        sorted_poses = sorted(
            self.poses[:nb_poses], key=lambda pose: pose.getScore(score), reverse=True)
        return sorted_poses
コード例 #3
0
ファイル: dockingHandler.py プロジェクト: MMSB-MOBI/DockingPP
    def computeFrequencies(self, nb_poses: int = -1):
        """Compute contact frequencies and residues at interface frequencies for given poses. Contact map has to be computed before.  

        Contact frequencies : For each contact between residue i of ligand and residue j of the receptor, count and relative frequency among poses are computed. 

        Frequencies of residues at interface : For each residue of the ligand and each residue of the receptor, count and relative frequency of the number of time the residue appears at interface among poses is computed. 

        Set freq attribute of the class. freq is a DockingPP.frequencies.Frequencies object. 

        Args:
            nb_poses (int): Number of poses to compute frequencies (optionnal)

        Raises:
            error.IncompatiblePoseNumber: Raise if you try to compute frequency with more poses than poses with contact map. 

        Examples: 
            Compute frequencies for 1BJ1 complex with 50 poses

            >>> DH.computeFrequencies(50)
            >>> DH.freq.rel_frequencies_contact
            {(312, 171): 0.5, (262, 170): 0.66, (313, 172): 0.5, (266, 5): 0.22, (320, 168): 0.64, (314, 161): 0.5, (259, 167): 0.34 ... }
            >>> DH.freq.rel_frequencies_residue
            {'ligand': {129: 0.74, 4: 0.7, 5: 0.68, 8: 0.74, 160: 0.5, 161: 0.5, 162: 0.74, 163: 0.54, 164: 0.72, 165: 0.7, 166: 0.66, 167: 0.72, 168: 0.7 ... }, 'receptor': {259: 0.48, 262: 1.0, 263: 0.66, 264: 1.0, 265: 0.7, 266: 1.0, 267: 0.66, 269: 0.62, 271: 0.94, 31: 0.9 ... }}


        """
        if nb_poses == -1:
            nb_poses = self._nb_cmap_poses

        logging.info(f"== Compute frequencies ==\nNumber of poses: {nb_poses}")

        if not self._raw_contact_map:
            raise error.ContactMapNotComputed(
                "Contact map doesn't exist. Call computeContactMap first.")

        if nb_poses > self._nb_cmap_poses:
            raise error.IncompatiblePoseNumber(
                f"You try to compute frequencies for {nb_poses} but only {self._nb_cmap_poses} have contact map.")

        self.freq = Frequencies(self.cmap_poses[:nb_poses])
コード例 #4
0
    def computeContactMap(self,
                          nb_threads: int,
                          nb_poses: int = -1,
                          distance: float = 5):
        """Function that compute contact map for given poses and distance. It uses ccmap module, decode and store its results.

        Args:
            nb_threads (int): Number of threads to compute contact map
            nb_poses (int): Number of poses to compute contact map
            distance (float, optional): Distance (in Angstrom) below which two residues are considered in contact. Defaults to 5.

        Raises:
            error.IncompatiblePoseNumber: Raise if you want to compute on more poses than loaded.
        """

        if nb_poses == -1:
            nb_poses = len(self.poses)
            print("Number of poses : ", nb_poses)

        if nb_poses > len(self.poses):
            raise error.IncompatiblePoseNumber(
                f"You try to compute contact map on {nb_poses} and only {len(self.poses)} are loaded"
            )
        logging.info(
            f"== Compute contact map ==\nnumber of threads : {nb_threads}\nnumber of poses : {nb_poses}\ndistance : {distance}"
        )

        if not self.ligand:
            raise error.PdbNotSet("Ligand is not set. ")

        if not self.receptor:
            raise error.PdbNotSet("Receptor is not set. ")

        self._nb_cmap_poses = nb_poses
        output = [None for i in range(nb_threads)]
        threadPool = []

        nb_to_keep: int = nb_poses
        nb_split: int = nb_threads
        nWidth = int(nb_to_keep / nb_split)

        #Cut the ligand and receptor structure into different part for threading
        for i in range(nb_split):
            top = (i + 1) * nWidth
            if i == (nb_split - 1):
                top += nb_to_keep % nb_split

            #Get subset of structure for each thread
            reclist = list(
                map(lambda x: x.atomDictorize, self.ligand[i * nWidth:top]))
            liglist = list(
                map(lambda x: x.atomDictorize, self.receptor[i * nWidth:top]))

            threadPool.append(
                threading.Thread(target=self._lcmap_thread,
                                 args=(i, reclist, liglist, output, distance)))

        for th in threadPool:
            th.start()

        for th in threadPool:
            th.join()

        ccmap_result = [pose for thread in output for pose in thread]
        self._decodeContactMap(ccmap_result)
コード例 #5
0
ファイル: loader.py プロジェクト: MMSB-MOBI/DockingPP
def loadZdock(zdock_results: str,
              nb_pose: int = -1) -> 'DockingPP.dockingHandler.DockingHandler':
    """Load zdock results into DockingHandler object

    Args:
        zdock_results (str): path to zdock results file
        nb_pose (int, optional): number of poses to load. If -1, all poses will be loaded. Defaults to -1.

    Raises:
        error.ZdockFormatError: Raise if error is detected in zdock format
        error.IncompatiblePoseNumber: Raise if you try to load more poses than possible.

    Returns:
        DockingPP.dockingHandler.DockingHandler: DockingHandler object for this zdock results.

    """
    typecheck.validFile(zdock_results)  # Check if the file exists
    logging.info(
        f"== Load zDock results ==\n path : {zdock_results}\n number of poses : {'All' if nb_pose == -1 else nb_pose}"
    )
    reL1 = r'^([\d]+)[\s]+([\d\.]+)[\s]*$'
    reL2 = r'^[\s]*([\.\d-]+)[\s]+([\d\.-]+)[\s]+([\.\d-]+)[\s]*$'
    reL3 = r'^[\s]*([\S]+)[\s]+([\.\d-]+)[\s]+([\d\.-]+)[\s]+([\.\d-]+)[\s]*$'

    reZPOSE = r'^([\d\.-]+)[\s]+([\d\.-]+)[\s]+([\d\.-]+)[\s]+([\d]+)[\s]+([\d]+)[\s]+([\d]+)[\s]+.*'

    with open(zdock_results, 'r') as f:
        # Check header lines formatting
        re_line1 = re.match(reL1, f.readline())
        re_line2 = re.match(reL2, f.readline())
        re_line3 = re.match(reL3, f.readline())
        re_line4 = re.match(reL3, f.readline())

        if not re_line1:
            raise error.ZdockFormatError("Line 1 has wrong format")
        if not re_line2:
            raise error.ZdockFormatError("Line 2 has wrong format")
        if not re_line3:
            raise error.ZdockFormatError("Line 3 has wrong format")
        if not re_line4:
            raise error.ZdockFormatError("Line 4 has wrong format")

        # Set docking_collection attributes

        grid_dimension = int(re_line1.groups()[0])
        step = float(re_line1.groups()[1])
        initial_euler = (float(re_line2.groups()[0]),
                         float(re_line2.groups()[1]),
                         float(re_line2.groups()[2]))
        baryRec = (float(re_line3.groups()[1]), float(re_line3.groups()[2]),
                   float(re_line3.groups()[3]))
        baryLig = (float(re_line4.groups()[1]), float(re_line4.groups()[2]),
                   float(re_line4.groups()[3]))

        docking_collection = DockingHandlerZdock(grid_dimension, step,
                                                 initial_euler, baryRec,
                                                 baryLig)

        pose_index = 1
        # Parse poses lines
        for line in f:
            m = re.match(reZPOSE, line)
            if not m:
                raise error.ZdockFormatError("A pose line has wrong format")
            euler = (float(m.groups()[0]), float(m.groups()[1]),
                     float(m.groups()[2]))

            if initial_euler != (0, 0, 0):  # Rotation has to be applied
                # Make rotation matrices
                rand_rot = trans_matrix(*initial_euler)
                pose_rot = trans_matrix(*euler)
                # Combine into one matrix
                double = pose_rot.dot(rand_rot)
                # Recover combined angles
                euler = eulerFromMatrix(double)

            _translation = [
                int(m.groups()[3]),
                int(m.groups()[4]),
                int(m.groups()[5])
            ]
            translation = tuple([
                t - grid_dimension if t > grid_dimension / 2 else t
                for t in _translation
            ])
            # Copy Julia's calculations
            translation = tuple([-1 * t * step for t in translation])

            docking_collection.addPose(pose_index, euler, translation)

            if nb_pose == pose_index:
                return docking_collection

            pose_index += 1

        if nb_pose == -1:
            return docking_collection

        raise error.IncompatiblePoseNumber(
            f"You ask too much poses, only {pose_index} are present in the result file."
        )
コード例 #6
0
ファイル: dockingHandler.py プロジェクト: MMSB-MOBI/DockingPP
    def rescorePoses(self, nb_poses: int, type_score: str):
        """Rescore N poses according to given type_score. A new score will be computed for each pose

        Args:
            nb_poses (int): Number of poses to rescore
            type_score (str): Score to use

                Available type score
                    * CONSRANK_U : sum of relative frequencies of each contact of the pose
                    * CONSRANK : CONSRANK_U normalised by number of contacts in the pose 
                    * contact_log_sum : sum of log of relative frequencies of each contact of the pose
                    * contact_square_sum : sum of square of relative frequencies of each contact of the pose
                    * residue_sum : sum of relative frequencies of each interface residue (ligand and receptor) of the pose
                    * residue_sum_ligand : sum of relative frequencies for ligand interface residues
                    * residue_sum_receptor : sum of relative frequencies for receptor interface residues
                    * residue_average : residues_sum normalised by number of interface residues in the pose
                    * residue_average_ligand : residues_sum normalised by number of interface residues in the pose, just for ligand residues
                    * residue_average_receptor : residues_sum normalised by number of interface residues in the pose, just for ligand receptor
                    * residue_log_sum : sum of log of relative frequencies of each interface residue of the pose
                    * residue_square_sum : sum of square of relative frequencies of each interface residue of the pose
                    * all : to compute all above scores

        Raises:
            error.IncompatiblePoseNumber: Raise if you try to rescore more poses than poses with contact map
            error.InvalidScore: Raise if you give an invalid type score. 

        Examples:
            For 1BJ1, rescore 2000 poses with all scores and display the first 2

            >>> DH.rescorePoses(2000, type_score = "all")
            >>> for p in DH.poses[:2]:
            >>>     print(p.index, p.rescoring)
            1 {'CONSRANK_U': 38.180000000000014, 'CONSRANK': 0.4061702127659576, 'contact_log_sum': -94.55436888131206, 'contact_square_sum': 17.90280000000001, 'residue_sum': 42.06, 'residue_average': 0.7378947368421053, 'residue_log_sum': -20.625622826512405, 'residues_square_sum': 33.8476}
            2 {'CONSRANK_U': 38.00000000000001, 'CONSRANK': 0.44186046511627913, 'contact_log_sum': -75.18567402702058, 'contact_square_sum': 18.285600000000002, 'residue_sum': 40.12, 'residue_average': 0.7569811320754717, 'residue_log_sum': -17.346104879674645, 'residues_square_sum': 32.776}

        """
        logging.info(
            f'== Rescore poses ==\nNumber of poses : {nb_poses}\n Scores : {type_score}')

        if nb_poses > self._nb_cmap_poses:
            raise error.IncompatiblePoseNumber(
                f"Impossible to rescore {nb_poses} poses, only {self._nb_cmap_poses} have contact map")

        if not self.freq:
            raise error.FrequenciesNotComputed(
                "Frequencies doesn't exist. Call computeFrequencies first.")

        self._nb_rescored_poses = nb_poses

        if type_score == "all":
            scores_to_compute = self.freq.available_scores

        else:
            if not type_score in self.freq.available_scores:
                raise error.InvalidScore(f"{type_score} score is not valid")
            scores_to_compute = {
                type_score: self.freq.available_scores[type_score]}

        for pose in self.rescored_poses:
            for score, score_info in scores_to_compute.items():
                pose.computeScore(score, *score_info)