Beispiel #1
0
 def _compute_fdr_score(self, conf):
     self.conf = conf
     res_fun = CResultFunction(conf)
     cand_file_list = res_fun.find_res_file(conf)
     print("[Info] Candidate file list", cand_file_list)
     psm_list = []
     for file_path in cand_file_list:
         psm_list += self._load_result(file_path)
     psm_list.sort(key=lambda k:k[1].score, reverse=True)
     target_num, decoy_num, all_num = 0, 0, len(psm_list)
     for (title, pep, decoy, ranker) in psm_list:
         if decoy: decoy_num += 1
         else: target_num += 1
     print("[Info] #PSMs is {0}, #Target is {1} and #Decoy is {2}".format(all_num, target_num, decoy_num))
     cur_target_num, cur_decoy_num = 0, 0
     if len(psm_list) == 0: return 0.0
     score_t = psm_list[0][1].score + 1.0
     if target_num > 0 and decoy_num * 1.0 / target_num <= conf.fdr_value:
         score_t = psm_list[-1][1].score
         return score_t
     for (title, pep, decoy, ranker) in psm_list[::-1]:
         if decoy: cur_decoy_num += 1
         else: cur_target_num += 1
         if target_num == cur_target_num: 
             fdr_value = 100
         else:
             fdr_value = (decoy_num - cur_decoy_num) * 1.0 / (target_num - cur_target_num)
         #print(decoy_num,cur_decoy_num, target_num, cur_target_num, fdr_value)
         if fdr_value <= conf.fdr_value:
             score_t = pep.score
             return score_t
     return score_t
Beispiel #2
0
    def _write_fdr_result(self, conf, fdr_score):
        resFunc = CResultFunction(conf)
        taskid = resFunc.get_taskid(conf, False)
        res_file_list = resFunc.find_single_res_file(conf)

        pep_score, pep_line = {}, {}
        head_line = ""
        title_score, title_line = {}, {}
        for file in res_file_list:
            with open(file) as f:
                lines = f.readlines()
            head_line = lines[0].strip()
            for i in range(1, len(lines)):
                line = lines[i].strip()
                tmp = line.split('\t')
                title = tmp[0]
                sq_key = tmp[2] + '@' + tmp[4] + '@' + str(
                    int(float(tmp[3]) + 0.5))
                is_target = (tmp[8] == "True")
                if not is_target: continue
                s = float(tmp[5])
                if s <= fdr_score: break
                title_score[title] = s
                title_line[title] = line
                if sq_key not in pep_score or pep_score[sq_key] < s:
                    pep_score[sq_key] = s
                    pep_line[sq_key] = line

        sorted_res = sorted(title_score.items(),
                            key=lambda d: d[1],
                            reverse=True)
        write_path = os.path.join(conf.result_folder,
                                  conf.SINGLE_PSM + '-' + taskid + '.txt')
        conf.single_res_path = write_path
        with open(write_path, 'w') as f:
            f.write(head_line + '\n')
            for (r, s) in sorted_res:
                one_line = title_line[r]
                f.write(one_line + '\n')

        sorted_res = sorted(pep_score.items(),
                            key=lambda d: d[1],
                            reverse=True)
        write_path = os.path.join(conf.result_folder,
                                  conf.SINGLE_PEP + '-' + taskid + '.txt')
        with open(write_path, 'w') as f:
            f.write(head_line + '\n')
            for (r, s) in sorted_res:
                one_line = pep_line[r]
                f.write(one_line + '\n')
Beispiel #3
0
 def _rerank_result(self, conf):
     res_fun = CResultFunction(conf)
     result_file_list = res_fun.find_res_file(conf)
     psm_list = []
     for file_path in result_file_list:
         psm_list += res_fun.load_psm(file_path)
     feature_write_path = os.path.join(conf.INPUT_SVM_FOLDER, res_fun.get_taskid(conf))
     print('aa', conf.INPUT_SVM_FOLDER, feature_write_path)
     self._write_svm_file(psm_list, feature_write_path)
     score_write_path = feature_write_path + ".txt"
     self._svm_classify(feature_write_path, conf.MODEL_PATH, score_write_path)
     #os.remove(feature_write_path)
     score_list = self._get_svm_score(score_write_path) 
     assert(len(score_list) == len(psm_list))
     self._update_psm_score(result_file_list, score_list)
Beispiel #4
0
 def _load_result(self, path):
     resFunc = CResultFunction(self.conf)
     psm_cand_list = resFunc.load_psm(path)
     psm_list = []
     title_dict = {}
     for (title, pep, decoy, ranker, delta_score) in psm_cand_list:
         if title not in title_dict: title_dict[title] = []
         title_dict[title].append((pep, decoy, ranker))
     for title in title_dict:
         for (pep, decoy, ranker) in title_dict[title]:
             if ranker == 1:
                 if not decoy: # target
                     psm_list.append((title, pep, decoy, ranker))
                 else: # decoy, check the other peps with same score
                     cand_list = title_dict[title]
                     cur_decoy = True
                     for (pep2, decoy2, ranker2) in cand_list:
                         if pep2.score == pep.score and not decoy2:
                             cur_decoy = False
                             break
                     psm_list.append((title, pep, cur_decoy, ranker))
     return psm_list
Beispiel #5
0
 def _load_result(self, path):
     resFunc = CResultFunction(self.conf)
     psm_list = resFunc.load_single_psm(path)
     return psm_list
Beispiel #6
0
    def _write_fdr_result(self, conf, fdr_score):
        resFunc = CResultFunction(conf)
        taskid = resFunc.get_taskid(conf, False)
        res_file_list = resFunc.find_res_file(conf)
        res_plabel_file_list = resFunc.find_res_file(conf, label_flag = True)

        pep_score, pep_line = {}, {}
        t_p_line = {}
        head_line = ""
        title_score, title_line, title_plabel_line = {}, {}, {}
        pep_plabel_line = {}
        for file in res_file_list:
            with open(file) as f:
                lines = f.readlines()
            head_line = lines[0].strip()
            for i in range(1, len(lines)):
                line = lines[i].strip()
                tmp = line.split('\t')
                title = tmp[0]
                sq_key = tmp[2] + '@' + tmp[3] + '@' + tmp[4] + '@' + tmp[6]
                ac_list = tmp[11].split(';')
                is_target = False
                for ac in ac_list:
                    if not ac.startswith("REV_"):
                        is_target = True
                        break
                if not is_target: continue
                s = float(tmp[12])
                if s <= fdr_score: break
                title_score[title] = s
                title_line[title] = line
                if sq_key not in pep_score or pep_score[sq_key] < s:
                    pep_score[sq_key] = s
                    pep_line[sq_key] = line

        for sq in pep_line:
            line = pep_line[sq]
            tmp = line.strip().split('\t')
            title = tmp[0]
            t_p_line[title] = 1
        
        modification_lines = []
        for file in res_plabel_file_list:
            with open(file) as f:
                lines = f.readlines()
            for i in range(len(lines)):
                if lines[i].startswith("name="):
                    title = lines[i].strip().split('=')[1].strip()
                    if title in title_line:
                        line = lines[i+1].strip()
                        title_plabel_line[title] = line
                    if title in t_p_line:
                        line = lines[i+1].strip()
                        pep_plabel_line[title] = line
                elif lines[i].startswith("[Modification]"):
                    modification_lines = []
                    for j in range(i+1, len(lines)):
                        ind = lines[j].find('=')
                        if ind < 0: break
                        modification_lines.append(lines[j].strip())

        sorted_res = sorted(title_score.items(), key=lambda d: d[1], reverse=True)
        write_path = os.path.join(conf.result_folder, conf.XLINK_PSM + '-' + taskid + '.txt')
        with open(write_path, 'w') as f:
            f.write(head_line + '\n')
            for (r,s) in sorted_res:
                one_line = title_line[r]
                f.write(one_line + '\n')
        write_path = os.path.join(conf.result_folder, conf.XLINK_PSM + '-' + taskid + '.plabel')
        with open(write_path, 'w') as f:
            f.write("[FilePath]\n")
            f.write("File_Path=" + ";".join(conf.mgf_file_list) + "\n")
            f.write("[Modification]\n")
            for mod_line in modification_lines:
                f.write(mod_line + '\n')
            f.write("[xlink]\n")
            f.write("xlink=UAA\n")
            f.write("[Total]\n")
            f.write("total=" + str(len(sorted_res)) + '\n')
            spec_id = 0
            for (r, s) in sorted_res:
                spec_id += 1
                one_line = title_plabel_line[r]
                f.write("[Spectrum" + str(spec_id) + "]\n")
                f.write("name=" + r + "\n")
                f.write(one_line + "\n")

        sorted_res = sorted(pep_score.items(), key=lambda d: d[1], reverse=True)
        write_path = os.path.join(conf.result_folder, conf.XLINK_PEP + '-' + taskid + '.txt')
        with open(write_path, 'w') as f:
            f.write(head_line + '\n')
            for (r, s) in sorted_res:
                one_line = pep_line[r]
                f.write(one_line + '\n')
                
        write_path = os.path.join(conf.result_folder, conf.XLINK_PEP + '-' + taskid + '.plabel')
        with open(write_path, 'w') as f:
            f.write("[FilePath]\n")
            f.write("File_Path=" + ";".join(conf.mgf_file_list) + "\n")
            f.write("[Modification]\n")
            for mod_line in modification_lines:
                f.write(mod_line + '\n')
            f.write("[xlink]\n")
            f.write("xlink=UAA\n")
            f.write("[Total]\n")
            f.write("total=" + str(len(sorted_res)) + '\n')
            spec_id = 0
            for (r, s) in sorted_res:
                spec_id += 1
                one_line = pep_line[r]
                tmp = one_line.strip().split('\t')
                title = tmp[0]
                f.write("[Spectrum" + str(spec_id) + "]\n")
                f.write("name=" + title + "\n")
                one_line = pep_plabel_line[title]
                f.write(one_line + "\n")