def _ui_callback(self, descr, pdb, cid, seq_marker): if descr not in self.matchers: error_msg = f"{descr} not found in list of matches. " \ f"This is an internal error." raise Exception(error_msg) return_val = calculate_single(pdb, cid, seq_marker) p_all_sno = self.matchers[descr].query(return_val) return_val_sno = return_val.sno.values residues = return_val.res res_to_AA1 = [] for res in residues.values: if res in AA3_to_AA1: res_to_AA1.append(AA3_to_AA1[res]) else: res_to_AA1.append('X') output = dict() output['residues'] = res_to_AA1 alphabets = [] for res in residues: if res in AA3_to_AA1: alphabets.append(res) else: alphabets.append("TYR") output['alphabet'] = alphabets output['per_point'] = dict() for term, values in p_all_sno.items(): output['per_point'][return_val_sno[term]] = values output['generic'] = defaultdict(dict) for descr_local, matcher in self.matchers_generic.items(): p_all_sno = matcher.query(return_val) return_val_sno = return_val.sno.values for term, values in p_all_sno.items(): output['generic'][descr_local][return_val_sno[term]] = values try: segmented_scores = self.segment_calculater.query( descr, pdb, cid, seq_marker) except: print(f"segment_calculater.query failed with error" f" {traceback.format_exc()}") output['segment'] = None else: output['segment'] = segmented_scores return output
def _ui_callback(pdb, cid, seq_marker, matcher): return_val = calculate_single(pdb, cid, seq_marker) p_all_sno = matcher.query(return_val) return_val_sno = return_val.sno.values output = dict() output['per_point'] = dict() for term, values in p_all_sno.items(): output['per_point'][return_val_sno[term]] = values return 0, output['per_point'] # pdb = '1a72' # cid = 'A' # seq_marker = '187' # matcher = get_matcher() # # ret_code, output = _ui_callback(pdb, cid, seq_marker, matcher) # # score_map = [dict() for __ in range(len(output))] # top_candidates = set() # top_5_scores = np.ones(len(output), dtype=float) # # # for i, value in enumerate(output.values()): # for j, (identifier, score) in enumerate(value): # id_str = (identifier[0], identifier[1], identifier[2]) # if j < 10: # top_candidates.add(id_str) # if j < 30: # top_5_scores[i] = min(top_5_scores[i], score) # score_map[i][id_str] = score # # segment_length = 5 # best_score_arr = [] # # for i in range(len(output) - segment_length): # max_score, max_cand = 0, None # for candidate in top_candidates: # cand_score = 0 # for j in range(segment_length): # try: # cand_score += score_map[i+j][candidate] # except KeyError: # continue # if cand_score > max_score: # max_score = cand_score # max_cand = candidate # best_score_arr.append([max_score, max_cand]) # # best_i = np.argmax([i[0] for i in best_score_arr]) # best_candidate = best_score_arr[best_i][1] # # extend on both ends # # to the left # left_i, right_i = best_i, best_i+segment_length # while left_i >= 0: # if score_map[left_i][best_candidate] < top_5_scores[left_i]: # break # left_i -= 1 # # while right_i < len(output): # if score_map[right_i][best_candidate] < top_5_scores[right_i]: # break # right_i += 1 # # candidate_output = [None for __ in range(len(output))] # for i in range(left_i, right_i+1): # candidate_output[i] = [best_candidate, score_map[i][best_candidate], # best_score_arr[best_i][0]] # # segment_length = 3 # # while left_i >= segment_length: # seg_left, seg_right = left_i - segment_length, left_i # max_score, max_cand = 0, None # for candidate in top_candidates: # cand_score = 0 # for j in range(seg_left, seg_right): # try: # cand_score += score_map[j][candidate] # except KeyError: # continue # if cand_score > max_score: # max_score = cand_score # max_cand = candidate # # while seg_left >= 0: # if score_map[seg_left][max_cand] < top_5_scores[seg_left]: # break # seg_left -= 1 # seg_left = max(seg_left, 0) # for i in range(seg_left, seg_right): # candidate_output[i] = [max_cand, score_map[i][max_cand], max_score] # left_i = seg_left # # while right_i <= len(output) - segment_length: # seg_left, seg_right = right_i, right_i+segment_length # max_score, max_cand = 0, None # for candidate in top_candidates: # cand_score = 0 # for j in range(seg_left, seg_right): # try: # cand_score += score_map[j][candidate] # except KeyError: # continue # if cand_score > max_score: # max_score = cand_score # max_cand = candidate # # while seg_right < len(output): # if score_map[seg_right][max_cand] < top_5_scores[seg_right]: # break # seg_right += 1 # # for i in range(seg_left, seg_right): # candidate_output[i] = [max_cand, score_map[i][max_cand], max_score] # right_i = seg_right # # for term in candidate_output: # print(term)
def query(self, descr, pdb, cid, seq_marker): matcher = self.matchers[descr] try: return_val = calculate_single(pdb, cid, seq_marker) except: msg = f"query failed with error {traceback.format_exc()}" raise Exception(msg) p_all_sno = matcher.query(return_val) sno_list = return_val.sno.values sno_probs = dict() for relative_sno, probs in p_all_sno.items(): actual_sno = sno_list[relative_sno] sno_probs[actual_sno] = probs descr_length = len(sno_probs) score_map = [dict() for __ in range(descr_length)] top_candidates = set() top_n_scores = np.ones(descr_length, dtype=float) for i, probs in enumerate(sno_probs.values()): for j, (identifier, score) in enumerate(probs): id_str = (identifier[0], identifier[1], identifier[2]) if j < self.num_top_candidates: top_candidates.add(id_str) if j < self.extension_cutoff: top_n_scores[i] = min(top_n_scores[i], score) score_map[i][id_str] = score best_scores = [] for i in range(descr_length - self.segment_len): max_score, max_cand = 0, None for candidate in top_candidates: cand_score = 0 for j in range(self.segment_len): try: cand_score += score_map[i + j][candidate] except KeyError: # skip these candidates continue if cand_score > max_score: max_score = cand_score max_cand = candidate best_scores.append([max_score, max_cand]) best_i = np.argmax([i[0] for i in best_scores]) # to dismiss ide inspection error best_i = int(best_i) best_candidate = best_scores[best_i][1] # extend on both ends # to the left left_i, right_i = best_i, best_i + self.segment_len while left_i > 0: if score_map[left_i][best_candidate] < top_n_scores[left_i]: break left_i -= 1 # to the right while right_i < descr_length - 1: if score_map[right_i][best_candidate] < top_n_scores[right_i]: break right_i += 1 candidate_output = [[] for __ in range(descr_length)] for i in range(left_i, right_i + 1): candidate_output[i] = [ best_candidate, score_map[i][best_candidate], best_scores[best_i][0] ] while left_i >= self.extension: seg_left, seg_right = left_i - self.extension, left_i max_score, max_cand = 0, None for candidate in top_candidates: cand_score = 0 for j in range(seg_left, seg_right): try: cand_score += score_map[j][candidate] except KeyError: continue if cand_score > max_score: max_score = cand_score max_cand = candidate while seg_left >= 0: if seg_left in score_map and max_cand in score_map[seg_left] \ and \ score_map[seg_left][max_cand] < top_n_scores[seg_left]: break seg_left -= 1 seg_left = max(seg_left, 0) for i in range(seg_left, seg_right): if max_cand in score_map[i]: score_to_add = score_map[i][max_cand] else: score_to_add = 0.01 candidate_output[i] = [max_cand, score_to_add, max_score] left_i = seg_left while right_i <= descr_length - self.extension: seg_left, seg_right = right_i, right_i + self.extension max_score, max_cand = 0, None for candidate in top_candidates: cand_score = 0 for j in range(seg_left, seg_right): try: cand_score += score_map[j][candidate] except KeyError: continue if cand_score > max_score: max_score = cand_score max_cand = candidate while seg_right < descr_length: if score_map[seg_right][max_cand] < top_n_scores[seg_right]: break seg_right += 1 for i in range(seg_left, seg_right): candidate_output[i] = [ max_cand, score_map[i][max_cand], max_score ] right_i = seg_right mapped_output = dict() for sno, value in zip(sno_probs.keys(), candidate_output): mapped_output[sno] = value return mapped_output