def loadScores(self, score_file: str): """Load scores from a score file serialized by serializeRescoring. Load scores for each pose in rescoring Pose attribute, and store the number of rescored poses. Args: score_file (str): Path to score file Raises: error.IncompatiblePoseNumber: Raises if you try to load scores for more poses than loaded. """ typecheck.validFile(score_file) with open(score_file) as f: f.readline() # discard first lines with comments header = f.readline().rstrip() # second line is header line scores = header.split("\t")[1:] # get score names from header nb_poses = 0 for pose_line in f: nb_poses += 1 if nb_poses > len(self.poses): raise error.IncompatiblePoseNumber( f"There are more poses in scores file than in loaded zdock results.") pose_index = int(pose_line.split("\t")[0]) pose_scores = pose_line.rstrip().split("\t")[1:] for i in range(len(scores)): self.poses[pose_index - 1].rescoring[scores[i] ] = float(pose_scores[i]) self._nb_rescored_poses = nb_poses
def getRankedPoses(self, score: str, nb_poses: int = -1) -> List['DockingPP.pose.Pose']: """Get poses ranked by given score Args: score (str): Score to rank by nb_poses (int): Number of poses to get Raises: error.IncompatiblePoseNumber: Raises if you try to rank more poses than rescored poses. Returns: List[DockingPP.pose.Pose]: List of poses in decreasing score order """ if nb_poses == -1: nb_poses = self._nb_cmap_poses if score != "original" and nb_poses > self._nb_rescored_poses: raise error.IncompatiblePoseNumber( f"Try to rank {nb_poses} but only {self._nb_rescored_poses} have been rescored.") if score == "original": return self.poses[:nb_poses] sorted_poses = sorted( self.poses[:nb_poses], key=lambda pose: pose.getScore(score), reverse=True) return sorted_poses
def computeFrequencies(self, nb_poses: int = -1): """Compute contact frequencies and residues at interface frequencies for given poses. Contact map has to be computed before. Contact frequencies : For each contact between residue i of ligand and residue j of the receptor, count and relative frequency among poses are computed. Frequencies of residues at interface : For each residue of the ligand and each residue of the receptor, count and relative frequency of the number of time the residue appears at interface among poses is computed. Set freq attribute of the class. freq is a DockingPP.frequencies.Frequencies object. Args: nb_poses (int): Number of poses to compute frequencies (optionnal) Raises: error.IncompatiblePoseNumber: Raise if you try to compute frequency with more poses than poses with contact map. Examples: Compute frequencies for 1BJ1 complex with 50 poses >>> DH.computeFrequencies(50) >>> DH.freq.rel_frequencies_contact {(312, 171): 0.5, (262, 170): 0.66, (313, 172): 0.5, (266, 5): 0.22, (320, 168): 0.64, (314, 161): 0.5, (259, 167): 0.34 ... } >>> DH.freq.rel_frequencies_residue {'ligand': {129: 0.74, 4: 0.7, 5: 0.68, 8: 0.74, 160: 0.5, 161: 0.5, 162: 0.74, 163: 0.54, 164: 0.72, 165: 0.7, 166: 0.66, 167: 0.72, 168: 0.7 ... }, 'receptor': {259: 0.48, 262: 1.0, 263: 0.66, 264: 1.0, 265: 0.7, 266: 1.0, 267: 0.66, 269: 0.62, 271: 0.94, 31: 0.9 ... }} """ if nb_poses == -1: nb_poses = self._nb_cmap_poses logging.info(f"== Compute frequencies ==\nNumber of poses: {nb_poses}") if not self._raw_contact_map: raise error.ContactMapNotComputed( "Contact map doesn't exist. Call computeContactMap first.") if nb_poses > self._nb_cmap_poses: raise error.IncompatiblePoseNumber( f"You try to compute frequencies for {nb_poses} but only {self._nb_cmap_poses} have contact map.") self.freq = Frequencies(self.cmap_poses[:nb_poses])
def computeContactMap(self, nb_threads: int, nb_poses: int = -1, distance: float = 5): """Function that compute contact map for given poses and distance. It uses ccmap module, decode and store its results. Args: nb_threads (int): Number of threads to compute contact map nb_poses (int): Number of poses to compute contact map distance (float, optional): Distance (in Angstrom) below which two residues are considered in contact. Defaults to 5. Raises: error.IncompatiblePoseNumber: Raise if you want to compute on more poses than loaded. """ if nb_poses == -1: nb_poses = len(self.poses) print("Number of poses : ", nb_poses) if nb_poses > len(self.poses): raise error.IncompatiblePoseNumber( f"You try to compute contact map on {nb_poses} and only {len(self.poses)} are loaded" ) logging.info( f"== Compute contact map ==\nnumber of threads : {nb_threads}\nnumber of poses : {nb_poses}\ndistance : {distance}" ) if not self.ligand: raise error.PdbNotSet("Ligand is not set. ") if not self.receptor: raise error.PdbNotSet("Receptor is not set. ") self._nb_cmap_poses = nb_poses output = [None for i in range(nb_threads)] threadPool = [] nb_to_keep: int = nb_poses nb_split: int = nb_threads nWidth = int(nb_to_keep / nb_split) #Cut the ligand and receptor structure into different part for threading for i in range(nb_split): top = (i + 1) * nWidth if i == (nb_split - 1): top += nb_to_keep % nb_split #Get subset of structure for each thread reclist = list( map(lambda x: x.atomDictorize, self.ligand[i * nWidth:top])) liglist = list( map(lambda x: x.atomDictorize, self.receptor[i * nWidth:top])) threadPool.append( threading.Thread(target=self._lcmap_thread, args=(i, reclist, liglist, output, distance))) for th in threadPool: th.start() for th in threadPool: th.join() ccmap_result = [pose for thread in output for pose in thread] self._decodeContactMap(ccmap_result)
def loadZdock(zdock_results: str, nb_pose: int = -1) -> 'DockingPP.dockingHandler.DockingHandler': """Load zdock results into DockingHandler object Args: zdock_results (str): path to zdock results file nb_pose (int, optional): number of poses to load. If -1, all poses will be loaded. Defaults to -1. Raises: error.ZdockFormatError: Raise if error is detected in zdock format error.IncompatiblePoseNumber: Raise if you try to load more poses than possible. Returns: DockingPP.dockingHandler.DockingHandler: DockingHandler object for this zdock results. """ typecheck.validFile(zdock_results) # Check if the file exists logging.info( f"== Load zDock results ==\n path : {zdock_results}\n number of poses : {'All' if nb_pose == -1 else nb_pose}" ) reL1 = r'^([\d]+)[\s]+([\d\.]+)[\s]*$' reL2 = r'^[\s]*([\.\d-]+)[\s]+([\d\.-]+)[\s]+([\.\d-]+)[\s]*$' reL3 = r'^[\s]*([\S]+)[\s]+([\.\d-]+)[\s]+([\d\.-]+)[\s]+([\.\d-]+)[\s]*$' reZPOSE = r'^([\d\.-]+)[\s]+([\d\.-]+)[\s]+([\d\.-]+)[\s]+([\d]+)[\s]+([\d]+)[\s]+([\d]+)[\s]+.*' with open(zdock_results, 'r') as f: # Check header lines formatting re_line1 = re.match(reL1, f.readline()) re_line2 = re.match(reL2, f.readline()) re_line3 = re.match(reL3, f.readline()) re_line4 = re.match(reL3, f.readline()) if not re_line1: raise error.ZdockFormatError("Line 1 has wrong format") if not re_line2: raise error.ZdockFormatError("Line 2 has wrong format") if not re_line3: raise error.ZdockFormatError("Line 3 has wrong format") if not re_line4: raise error.ZdockFormatError("Line 4 has wrong format") # Set docking_collection attributes grid_dimension = int(re_line1.groups()[0]) step = float(re_line1.groups()[1]) initial_euler = (float(re_line2.groups()[0]), float(re_line2.groups()[1]), float(re_line2.groups()[2])) baryRec = (float(re_line3.groups()[1]), float(re_line3.groups()[2]), float(re_line3.groups()[3])) baryLig = (float(re_line4.groups()[1]), float(re_line4.groups()[2]), float(re_line4.groups()[3])) docking_collection = DockingHandlerZdock(grid_dimension, step, initial_euler, baryRec, baryLig) pose_index = 1 # Parse poses lines for line in f: m = re.match(reZPOSE, line) if not m: raise error.ZdockFormatError("A pose line has wrong format") euler = (float(m.groups()[0]), float(m.groups()[1]), float(m.groups()[2])) if initial_euler != (0, 0, 0): # Rotation has to be applied # Make rotation matrices rand_rot = trans_matrix(*initial_euler) pose_rot = trans_matrix(*euler) # Combine into one matrix double = pose_rot.dot(rand_rot) # Recover combined angles euler = eulerFromMatrix(double) _translation = [ int(m.groups()[3]), int(m.groups()[4]), int(m.groups()[5]) ] translation = tuple([ t - grid_dimension if t > grid_dimension / 2 else t for t in _translation ]) # Copy Julia's calculations translation = tuple([-1 * t * step for t in translation]) docking_collection.addPose(pose_index, euler, translation) if nb_pose == pose_index: return docking_collection pose_index += 1 if nb_pose == -1: return docking_collection raise error.IncompatiblePoseNumber( f"You ask too much poses, only {pose_index} are present in the result file." )
def rescorePoses(self, nb_poses: int, type_score: str): """Rescore N poses according to given type_score. A new score will be computed for each pose Args: nb_poses (int): Number of poses to rescore type_score (str): Score to use Available type score * CONSRANK_U : sum of relative frequencies of each contact of the pose * CONSRANK : CONSRANK_U normalised by number of contacts in the pose * contact_log_sum : sum of log of relative frequencies of each contact of the pose * contact_square_sum : sum of square of relative frequencies of each contact of the pose * residue_sum : sum of relative frequencies of each interface residue (ligand and receptor) of the pose * residue_sum_ligand : sum of relative frequencies for ligand interface residues * residue_sum_receptor : sum of relative frequencies for receptor interface residues * residue_average : residues_sum normalised by number of interface residues in the pose * residue_average_ligand : residues_sum normalised by number of interface residues in the pose, just for ligand residues * residue_average_receptor : residues_sum normalised by number of interface residues in the pose, just for ligand receptor * residue_log_sum : sum of log of relative frequencies of each interface residue of the pose * residue_square_sum : sum of square of relative frequencies of each interface residue of the pose * all : to compute all above scores Raises: error.IncompatiblePoseNumber: Raise if you try to rescore more poses than poses with contact map error.InvalidScore: Raise if you give an invalid type score. Examples: For 1BJ1, rescore 2000 poses with all scores and display the first 2 >>> DH.rescorePoses(2000, type_score = "all") >>> for p in DH.poses[:2]: >>> print(p.index, p.rescoring) 1 {'CONSRANK_U': 38.180000000000014, 'CONSRANK': 0.4061702127659576, 'contact_log_sum': -94.55436888131206, 'contact_square_sum': 17.90280000000001, 'residue_sum': 42.06, 'residue_average': 0.7378947368421053, 'residue_log_sum': -20.625622826512405, 'residues_square_sum': 33.8476} 2 {'CONSRANK_U': 38.00000000000001, 'CONSRANK': 0.44186046511627913, 'contact_log_sum': -75.18567402702058, 'contact_square_sum': 18.285600000000002, 'residue_sum': 40.12, 'residue_average': 0.7569811320754717, 'residue_log_sum': -17.346104879674645, 'residues_square_sum': 32.776} """ logging.info( f'== Rescore poses ==\nNumber of poses : {nb_poses}\n Scores : {type_score}') if nb_poses > self._nb_cmap_poses: raise error.IncompatiblePoseNumber( f"Impossible to rescore {nb_poses} poses, only {self._nb_cmap_poses} have contact map") if not self.freq: raise error.FrequenciesNotComputed( "Frequencies doesn't exist. Call computeFrequencies first.") self._nb_rescored_poses = nb_poses if type_score == "all": scores_to_compute = self.freq.available_scores else: if not type_score in self.freq.available_scores: raise error.InvalidScore(f"{type_score} score is not valid") scores_to_compute = { type_score: self.freq.available_scores[type_score]} for pose in self.rescored_poses: for score, score_info in scores_to_compute.items(): pose.computeScore(score, *score_info)