コード例 #1
0
    def _ui_callback(self, descr, pdb, cid, seq_marker):
        if descr not in self.matchers:
            error_msg = f"{descr} not found in list of matches. " \
                        f"This is an internal error."
            raise Exception(error_msg)
        return_val = calculate_single(pdb, cid, seq_marker)
        p_all_sno = self.matchers[descr].query(return_val)
        return_val_sno = return_val.sno.values
        residues = return_val.res
        res_to_AA1 = []
        for res in residues.values:
            if res in AA3_to_AA1:
                res_to_AA1.append(AA3_to_AA1[res])
            else:
                res_to_AA1.append('X')

        output = dict()
        output['residues'] = res_to_AA1
        alphabets = []
        for res in residues:
            if res in AA3_to_AA1:
                alphabets.append(res)
            else:
                alphabets.append("TYR")
        output['alphabet'] = alphabets
        output['per_point'] = dict()
        for term, values in p_all_sno.items():
            output['per_point'][return_val_sno[term]] = values

        output['generic'] = defaultdict(dict)
        for descr_local, matcher in self.matchers_generic.items():
            p_all_sno = matcher.query(return_val)
            return_val_sno = return_val.sno.values
            for term, values in p_all_sno.items():
                output['generic'][descr_local][return_val_sno[term]] = values
        try:
            segmented_scores = self.segment_calculater.query(
                descr, pdb, cid, seq_marker)
        except:
            print(f"segment_calculater.query failed with error"
                  f" {traceback.format_exc()}")
            output['segment'] = None
        else:
            output['segment'] = segmented_scores
        return output
コード例 #2
0
def _ui_callback(pdb, cid, seq_marker, matcher):
    return_val = calculate_single(pdb, cid, seq_marker)
    p_all_sno = matcher.query(return_val)
    return_val_sno = return_val.sno.values
    output = dict()
    output['per_point'] = dict()
    for term, values in p_all_sno.items():
        output['per_point'][return_val_sno[term]] = values
    return 0, output['per_point']


# pdb = '1a72'
# cid = 'A'
# seq_marker = '187'
# matcher = get_matcher()
#
# ret_code, output = _ui_callback(pdb, cid, seq_marker, matcher)
#
# score_map = [dict() for __ in range(len(output))]
# top_candidates = set()
# top_5_scores = np.ones(len(output), dtype=float)
#
#
# for i, value in enumerate(output.values()):
#     for j, (identifier, score) in enumerate(value):
#         id_str = (identifier[0], identifier[1], identifier[2])
#         if j < 10:
#             top_candidates.add(id_str)
#         if j < 30:
#             top_5_scores[i] = min(top_5_scores[i], score)
#         score_map[i][id_str] = score
#
# segment_length = 5
# best_score_arr = []
#
# for i in range(len(output) - segment_length):
#     max_score, max_cand = 0, None
#     for candidate in top_candidates:
#         cand_score = 0
#         for j in range(segment_length):
#             try:
#                 cand_score += score_map[i+j][candidate]
#             except KeyError:
#                 continue
#         if cand_score > max_score:
#             max_score = cand_score
#             max_cand = candidate
#     best_score_arr.append([max_score, max_cand])
#
# best_i = np.argmax([i[0] for i in best_score_arr])
# best_candidate = best_score_arr[best_i][1]
# # extend on both ends
# # to the left
# left_i, right_i = best_i, best_i+segment_length
# while left_i >= 0:
#     if score_map[left_i][best_candidate] < top_5_scores[left_i]:
#         break
#     left_i -= 1
#
# while right_i < len(output):
#     if score_map[right_i][best_candidate] < top_5_scores[right_i]:
#         break
#     right_i += 1
#
# candidate_output = [None for __ in range(len(output))]
# for i in range(left_i, right_i+1):
#     candidate_output[i] = [best_candidate, score_map[i][best_candidate],
#                            best_score_arr[best_i][0]]
#
# segment_length = 3
#
# while left_i >= segment_length:
#     seg_left, seg_right = left_i - segment_length, left_i
#     max_score, max_cand = 0, None
#     for candidate in top_candidates:
#         cand_score = 0
#         for j in range(seg_left, seg_right):
#             try:
#                 cand_score += score_map[j][candidate]
#             except KeyError:
#                 continue
#         if cand_score > max_score:
#             max_score = cand_score
#             max_cand = candidate
#
#     while seg_left >= 0:
#         if score_map[seg_left][max_cand] < top_5_scores[seg_left]:
#             break
#         seg_left -= 1
#     seg_left = max(seg_left, 0)
#     for i in range(seg_left, seg_right):
#         candidate_output[i] = [max_cand, score_map[i][max_cand], max_score]
#     left_i = seg_left
#
# while right_i <= len(output) - segment_length:
#     seg_left, seg_right = right_i, right_i+segment_length
#     max_score, max_cand = 0, None
#     for candidate in top_candidates:
#         cand_score = 0
#         for j in range(seg_left, seg_right):
#             try:
#                 cand_score += score_map[j][candidate]
#             except KeyError:
#                 continue
#         if cand_score > max_score:
#             max_score = cand_score
#             max_cand = candidate
#
#     while seg_right < len(output):
#         if score_map[seg_right][max_cand] < top_5_scores[seg_right]:
#             break
#         seg_right += 1
#
#     for i in range(seg_left, seg_right):
#         candidate_output[i] = [max_cand, score_map[i][max_cand], max_score]
#     right_i = seg_right
#
# for term in candidate_output:
#     print(term)
コード例 #3
0
    def query(self, descr, pdb, cid, seq_marker):
        matcher = self.matchers[descr]
        try:
            return_val = calculate_single(pdb, cid, seq_marker)
        except:
            msg = f"query failed with error {traceback.format_exc()}"
            raise Exception(msg)

        p_all_sno = matcher.query(return_val)
        sno_list = return_val.sno.values
        sno_probs = dict()
        for relative_sno, probs in p_all_sno.items():
            actual_sno = sno_list[relative_sno]
            sno_probs[actual_sno] = probs

        descr_length = len(sno_probs)
        score_map = [dict() for __ in range(descr_length)]
        top_candidates = set()
        top_n_scores = np.ones(descr_length, dtype=float)

        for i, probs in enumerate(sno_probs.values()):
            for j, (identifier, score) in enumerate(probs):
                id_str = (identifier[0], identifier[1], identifier[2])
                if j < self.num_top_candidates:
                    top_candidates.add(id_str)
                if j < self.extension_cutoff:
                    top_n_scores[i] = min(top_n_scores[i], score)
                score_map[i][id_str] = score

        best_scores = []
        for i in range(descr_length - self.segment_len):
            max_score, max_cand = 0, None
            for candidate in top_candidates:
                cand_score = 0
                for j in range(self.segment_len):
                    try:
                        cand_score += score_map[i + j][candidate]
                    except KeyError:
                        # skip these candidates
                        continue
                if cand_score > max_score:
                    max_score = cand_score
                    max_cand = candidate
            best_scores.append([max_score, max_cand])
        best_i = np.argmax([i[0] for i in best_scores])
        # to dismiss ide inspection error
        best_i = int(best_i)
        best_candidate = best_scores[best_i][1]
        # extend on both ends
        # to the left
        left_i, right_i = best_i, best_i + self.segment_len
        while left_i > 0:
            if score_map[left_i][best_candidate] < top_n_scores[left_i]:
                break
            left_i -= 1
        # to the right
        while right_i < descr_length - 1:
            if score_map[right_i][best_candidate] < top_n_scores[right_i]:
                break
            right_i += 1
        candidate_output = [[] for __ in range(descr_length)]

        for i in range(left_i, right_i + 1):
            candidate_output[i] = [
                best_candidate, score_map[i][best_candidate],
                best_scores[best_i][0]
            ]

        while left_i >= self.extension:
            seg_left, seg_right = left_i - self.extension, left_i
            max_score, max_cand = 0, None
            for candidate in top_candidates:
                cand_score = 0
                for j in range(seg_left, seg_right):
                    try:
                        cand_score += score_map[j][candidate]
                    except KeyError:
                        continue
                if cand_score > max_score:
                    max_score = cand_score
                    max_cand = candidate
            while seg_left >= 0:
                if seg_left in score_map and max_cand in score_map[seg_left] \
                        and \
                        score_map[seg_left][max_cand] < top_n_scores[seg_left]:
                    break
                seg_left -= 1
            seg_left = max(seg_left, 0)
            for i in range(seg_left, seg_right):
                if max_cand in score_map[i]:
                    score_to_add = score_map[i][max_cand]
                else:
                    score_to_add = 0.01
                candidate_output[i] = [max_cand, score_to_add, max_score]
            left_i = seg_left
        while right_i <= descr_length - self.extension:
            seg_left, seg_right = right_i, right_i + self.extension
            max_score, max_cand = 0, None
            for candidate in top_candidates:
                cand_score = 0
                for j in range(seg_left, seg_right):
                    try:
                        cand_score += score_map[j][candidate]
                    except KeyError:
                        continue
                if cand_score > max_score:
                    max_score = cand_score
                    max_cand = candidate
            while seg_right < descr_length:
                if score_map[seg_right][max_cand] < top_n_scores[seg_right]:
                    break
                seg_right += 1

            for i in range(seg_left, seg_right):
                candidate_output[i] = [
                    max_cand, score_map[i][max_cand], max_score
                ]
            right_i = seg_right

        mapped_output = dict()
        for sno, value in zip(sno_probs.keys(), candidate_output):
            mapped_output[sno] = value
        return mapped_output