def computeScore(self, name_score: str, *score_info): """Compute the given score with given function. Complete rescoring attribute. Args: type_score (str): Global type of the score, if it's derived from contacts frequencies or residues interface frequencies. Can be contacts|residues. name_score (str): Name of the score score_fn ([type]): Function to compute score score_info : Contains stored informations about current score. Obligatory : score_info[0] = type of score (contacts or residues), score_info[1] : function to compute score. Optional : score_info[2] : for residues, list of entities to compute with (["ligand"], ["receptor"] or both ["ligand", "receptor]) Raises: error.InvalidScore: Raise if global type of the score is invalid """ type_score = score_info[0] score_fn = score_info[1] if type_score == "contacts": score = score_fn(self.contacts) elif type_score == "residues": if len(score_info) <= 2: raise error.InvalidArgument( f"Error in Frequencies.available_scores definitions. Need roles argument for score {name_score}" ) score = score_fn(self.residues_interface, score_info[2]) else: raise error.InvalidScore( f"{type_score} is invalid. Choose contacts or residues") self.rescoring[name_score] = score
def serializeRescoring(self, output_file: str, type_score: List[str] = ["residue_sum", "residue_average", "residue_log_sum", "residue_square_sum", "CONSRANK_U", "CONSRANK", "contact_log_sum", "contact_square_sum"]): """Write rescoring results in a file. Args: output_file (str): File to write results. The first line is a comment that resumes the number of poses used. The second line is the header with pose index followed by scores as given in scores_to_write list. The next lines are the scores for each pose. scores_to_write (List[str], optional): List of scores to write. The scores will be write in the given order. Defaults to ["residue_sum", "residue_average", "residue_log_sum", "residue_square_sum", "CONSRANK_U", "CONSRANK", "contact_log_sum", "contact_square_sum"]. """ if type_score == "all": scores_to_write = self.freq.available_scores else: scores_to_write = {} for score in type_score: if not score in self.freq.available_scores: raise error.InvalidScore(f"{score} score is not valid") scores_to_write[score] = self.freq.available_scores[score] if not self._nb_rescored_poses: raise error.RescoringNotComputed( "Poses has not been rescored, call rescorePoses") o = open(output_file, "w") o.write( f"#Rescoring of {self._nb_rescored_poses} poses with frequencies computed on {self.freq.nb_poses_used} poses.\n") o.write("#Pose\t" + "\t".join(scores_to_write) + "\n") for p in self.rescored_poses: o.write(f"{p.index}{p.serializeScores(scores_to_write)}\n") o.close() logging.info(f"Scores writes to {output_file}")
def getScore(self, score) -> float: """Get a specific score Args: score ([type]): Score to get Raises: error.InvalidScore: Raise if score is not computed or invalid. Returns: float: score value """ if not score in self.rescoring: raise error.InvalidScore( f"{score} score is invalid or not computed") return self.rescoring[score]
def serializeScores(self, scores: List[str]) -> str: """Str representation of rescoring Args: scores (List[str]): Scores for which you want the representation Raises: error.InvalidScore: Raises if a score is not computed or invalid. Returns: str: Str representation of scores, "\t" separated, in the order given in scores attributes. """ serialized = "" for sc in scores: try: serialized += f"\t{self.rescoring[sc]}" except KeyError: raise error.InvalidScore( f"{sc} score is invalid or not computed") return serialized
def getRankedClusterRepresentatives(self, ranked_by: str) -> List[Tuple[int, 'DockingPP.pose.Pose']]: """Get clusters representatives in decreasing order of given score Raises: error.ClustersNotComputed : Raise when clusters are not computed error.InvalidScore: Raise if score is not computed or invalid Returns: List[Tuple[int, DockingPP.pose.Pose]]: List of tuple where first element is the cluster number and second is the representative pose. Examples: For 1BJ1, get CONSRANK_U clusters representatives and display the first 2 indexes >>> representatives = DH.getRankedClusterRepresentatives("CONSRANK_U") >>> for clust in representatives[:2]: >>> print("cluster", clust[0], "representative index", clust[1].index) cluster 0 representative index 16 cluster 1 representative index 74 """ if not self.clusters: raise error.ClustersNotComputed( "Clusters have not been computed. Call clusterPoses first") if not ranked_by in self.clusters: raise error.InvalidScore( f"{ranked_by} cluster is invalid or not computed") cluster_nb = 0 list_rep = [] for rep_pose in self.clusters[ranked_by]: list_rep.append((cluster_nb, rep_pose)) cluster_nb += 1 return list_rep
def rescorePoses(self, nb_poses: int, type_score: str): """Rescore N poses according to given type_score. A new score will be computed for each pose Args: nb_poses (int): Number of poses to rescore type_score (str): Score to use Available type score * CONSRANK_U : sum of relative frequencies of each contact of the pose * CONSRANK : CONSRANK_U normalised by number of contacts in the pose * contact_log_sum : sum of log of relative frequencies of each contact of the pose * contact_square_sum : sum of square of relative frequencies of each contact of the pose * residue_sum : sum of relative frequencies of each interface residue (ligand and receptor) of the pose * residue_sum_ligand : sum of relative frequencies for ligand interface residues * residue_sum_receptor : sum of relative frequencies for receptor interface residues * residue_average : residues_sum normalised by number of interface residues in the pose * residue_average_ligand : residues_sum normalised by number of interface residues in the pose, just for ligand residues * residue_average_receptor : residues_sum normalised by number of interface residues in the pose, just for ligand receptor * residue_log_sum : sum of log of relative frequencies of each interface residue of the pose * residue_square_sum : sum of square of relative frequencies of each interface residue of the pose * all : to compute all above scores Raises: error.IncompatiblePoseNumber: Raise if you try to rescore more poses than poses with contact map error.InvalidScore: Raise if you give an invalid type score. Examples: For 1BJ1, rescore 2000 poses with all scores and display the first 2 >>> DH.rescorePoses(2000, type_score = "all") >>> for p in DH.poses[:2]: >>> print(p.index, p.rescoring) 1 {'CONSRANK_U': 38.180000000000014, 'CONSRANK': 0.4061702127659576, 'contact_log_sum': -94.55436888131206, 'contact_square_sum': 17.90280000000001, 'residue_sum': 42.06, 'residue_average': 0.7378947368421053, 'residue_log_sum': -20.625622826512405, 'residues_square_sum': 33.8476} 2 {'CONSRANK_U': 38.00000000000001, 'CONSRANK': 0.44186046511627913, 'contact_log_sum': -75.18567402702058, 'contact_square_sum': 18.285600000000002, 'residue_sum': 40.12, 'residue_average': 0.7569811320754717, 'residue_log_sum': -17.346104879674645, 'residues_square_sum': 32.776} """ logging.info( f'== Rescore poses ==\nNumber of poses : {nb_poses}\n Scores : {type_score}') if nb_poses > self._nb_cmap_poses: raise error.IncompatiblePoseNumber( f"Impossible to rescore {nb_poses} poses, only {self._nb_cmap_poses} have contact map") if not self.freq: raise error.FrequenciesNotComputed( "Frequencies doesn't exist. Call computeFrequencies first.") self._nb_rescored_poses = nb_poses if type_score == "all": scores_to_compute = self.freq.available_scores else: if not type_score in self.freq.available_scores: raise error.InvalidScore(f"{type_score} score is not valid") scores_to_compute = { type_score: self.freq.available_scores[type_score]} for pose in self.rescored_poses: for score, score_info in scores_to_compute.items(): pose.computeScore(score, *score_info)