def getBY(pep, stop): bPeaks = [] # nterm peak yPeaks = [] # cterm peak for i in range(1, stop): left = pep[0:i] right = pep[i:] #b b = calcuSeqMass(left) - H2O bPeaks.append(b) #y y = calcuSeqMass(right) yPeaks.append(y) #pc pepMass = calcuSeqMass(pep) bPeaks.append(pepMass) yPeaks.insert(0, pepMass) return np.round(bPeaks, 2), np.round(yPeaks, 2)
def MILP(tagInfo, pepCandidate, oriMe, oriIe, pcMass, scanNo): # print(tagInfo, pepCandidate, pcMass) # tagInfo = ('KPPE', 1.19104, [1, 19, 47, 65, 87]) tag = tagInfo[0] tagPos = tagInfo[2] oriIe[0] = 10 oriIe[1] = 10 # print(tag) # print(pepCandidate) ptm = [] # for pep in pepCandidate[tag] : for pep in pepCandidate[tag] + []: # if pep not in ['ATEHPEPPK', 'KAPEPPK']: # continue # print('pep', pep) # print(pcMass, calcuSeqMass(pep), pep) M = round(pcMass - calcuSeqMass(pep), 2) # this is vital may cause "not solved" # print(M) if abs(M) > 350: continue # tag = 'KPPE' # tagPos = [1, 19, 47, 65, 87] ma, xa, obj = solveLP(tag, pep, tagPos, M, oriMe, oriIe, Nmod=1, c=0.01, tau=0.01, thres=0.8) # print('MILP res', ma, xa, obj) if len(ma) != 0: ptm.append([scanNo, ma, xa, obj, tag, pep]) # print(ptm) if len(ptm) == 0: return [] sortPtm = sorted(ptm, key=lambda x: -x[-3]) return sortPtm[0]
#y y = calcuSeqMass(right) yPeaks.append(y) #pc pepMass = calcuSeqMass(pep) bPeaks.append(pepMass) yPeaks.insert(0, pepMass) return np.round(bPeaks, 2), np.round(yPeaks, 2) ### TEST FUNCTION ### if __name__ == '__main__': pep = 'ANSPEK' #len 17 M = np.round(calcuSeqMass('ANSWEK') - calcuSeqMass(pep), 2) Nmod = 1 c = 0.001 # print(len(pep)) oriMe = [ 587.23] # oriMe = [0.,18.0105647,109.06473715,111.08071477,112.06435608,114.08025432,115.06338562,119.07467942,125.04877431,126.04283791,128.05822142,128.09559656,128.10553056,129.079721,135.05481089,138.0422402,138.07958107,139.0038664,140.05803308,146.10481195,148.038291,151.02754085,154.07464435,156.08941387,157.08605002,158.06935664,166.07362147,167.04950552,168.05422195,172.08538933,174.11215465,175.11467489,180.12634465,182.06844393,182.1413172,183.06420839,185.0802605,186.08374106,194.06797759,194.10675601,198.0999655,198.11839187,199.09765841,204.05397525,209.08106336,211.13264746,218.06970983,220.07682563,221.11586611,224.03740801,224.09137233,224.16187647,225.0363074,225.14919587,226.09460378,234.10912859,235.09381595,236.08123885,237.0746344,238.08935368,239.08801967,239.12672017,243.15918796,249.11149184,250.09384443,253.14889793,254.10141466,254.13968907,257.1392921,263.09142894,265.15560674,267.12324416,268.15923942,271.13732208,271.16352537,273.17609454,277.10733769,281.0427366,294.09319538,295.11669415,296.12353377,297.13191761,298.05142602,299.05563148,306.13128885,323.14850624,332.09897482,334.1279978,336.18624706,340.2106139,352.0771323,354.18813394,368.11160628,382.23610362,394.18896559,399.2602712,403.72702956,407.18657048,408.9290613,418.77233263,423.21191599,444.78403437,458.23409648,458.29012158,459.22749904,459.29095988,459.77853021,460.25747706,463.17317778,484.27691613,496.22751572,503.20043746,517.28021687,522.24091475,535.28853989,536.31483135,537.24979425,557.54355059,566.31168012,578.15983342,591.2709217,593.27249101,607.35714653,608.37032728,633.28453237,643.32898018,678.38416062,679.3932858,685.35902657,688.33702065,689.34855098,735.41392007,761.37544554,786.40853884,788.41524851,806.45077357,806.45707755,814.39547242,858.42572707,935.48349048,936.48937142,1032.55035093,1129.60358695,1368.70725389,1479.30568205] oriIe = [100] # Iave = np.average(oriIe) # Me = np.round([oriMe[i] for i in range(len(oriMe)) if i not in info[2] and oriIe[i] >= 0.5*Iave], 2) # Ie = np.round([oriIe[i] for i in range(len(oriIe)) if i not in info[2] and oriIe[i] >= 0.5*Iave], 2) Me = oriMe Ie = oriIe S = 6 E = len(Me)
def findPtm(mzs, reliableTags, pepCandDict, pcMass, tol, scanNo): massTol = pcMass * tol totalPsms = [] for reliableTag in reliableTags: psms = [] # xAA = '' tag = reliableTag[0] if tag[0] == '5': xTag = 'x' + tag[2:] # xAA = tag[1] # N_AA_RES_MASS['x'] = N_AA_RES_MASS['n'] + N_AA_RES_MASS[xAA] else: xTag = tag lOfTag = len(xTag) tagScore = reliableTag[1] # if tag != 'RGGHAV': # continue # print(tag) # print('it RHF now') # ['GAHQA', 0.97929, [13, 37, 68, 94, 113, 121] index = reliableTag[2] pepCand = pepCandDict[tag] # do not use xTag here numOfPepCand = len(pepCand) if numOfPepCand == 0: continue for pep in pepCand: xAA = '' # print(pepCand) if pep[0] == '5': xPep = 'x' + pep[2:] xAA = pep[1] N_AA_RES_MASS['x'] = N_AA_RES_MASS['5' + xAA] else: xPep = pep lOfPep = len(xPep) # if pep!= 'nMEFDIGAAIEPTSQKPGVGAGHGGDPK': # continue # pcMass = 2707.308610771486 pcMassDiff = pcMass - calcuSeqMass(xPep) # print('pcMassDiff', pcMassDiff) if pcMassDiff < -160: continue bPeaks, yPeaks, bH2OPeaks, yH2OPeaks, bNH3Peaks, yNH3Peaks, theoPeaks = getTheoPeaks( xPep) # print(theoPeaks) isPepReversed = False if xPep.find(xTag) == -1: isPepReversed = True xPep = xPep[::-1] alignPos = xPep.find(xTag) if (alignPos == 0) != ( index[0] == 1 ): #fanxiang qitou #H2O #skip some obviously wrong pep cand continue # if alignPos != 0 and index[0] == 1: #fanxiang qitou #skip some obviously wrong pep cand # continue lPeaks = yPeaks.copy() rPeaks = bPeaks.copy() lH2OPeaks = yH2OPeaks.copy() rH2OPeaks = bH2OPeaks.copy() lNH3Peaks = yNH3Peaks.copy() rNH3Peaks = bNH3Peaks.copy() else: alignPos = xPep.find(xTag) if (alignPos == 0) != (index[0] == 0): #skip some obviously wrong pep cand continue lPeaks = bPeaks.copy() rPeaks = yPeaks.copy() lH2OPeaks = bH2OPeaks.copy() rH2OPeaks = yH2OPeaks.copy() lNH3Peaks = bNH3Peaks.copy() rNH3Peaks = yNH3Peaks.copy() if alignPos + lOfTag == len(lPeaks): print('scanNo', scanNo) print(reliableTag, pep) print(lPeaks) print('Theo i', [i for i in range(alignPos, alignPos + lOfTag + 1)]) print('Expe i', [i for i in index]) # print('1',lIonMassTheo) # print('2',lIonMassExpe) lIonMassTheo = np.array([ lPeaks[i] for i in range(alignPos, alignPos + lOfTag + 1) ]) #range(i, j) has i, does not have j lIonMassExpe = np.array([mzs[i] for i in index]) if abs(pcMassDiff) < massTol: # otherMzs = list(set(mzs) - set(lIonMassExpe)) otherMzs = mzs originIons = lPeaks + lH2OPeaks + lNH3Peaks + rPeaks + rH2OPeaks + rNH3Peaks numMod = getModIons(originIons, otherMzs, massTol) finalScore = numMod / (lOfPep - lOfTag + 1) * tagScore if isPepReversed: psms.append( tuple((tag, pep, pcMassDiff, numMod / (lOfPep - lOfTag + 1), '-', '-', '-', '-', tagScore, finalScore, numOfPepCand, 'clean'))) else: psms.append( tuple((tag, pep, pcMassDiff, numMod / (lOfPep - lOfTag + 1), '-', '-', '-', '-', tagScore, finalScore, numOfPepCand, 'clean'))) continue # print(lIonMassExpe) alignMassDiff = np.average(lIonMassExpe - lIonMassTheo) # print(pcMassDiff) # print(abs(pcMassDiff - alignMassDiff) < massTol) modPosL = 1 #initial pos start from 1 modPosR = lOfPep #initial pos isModInL = 0 if abs( pcMassDiff - alignMassDiff ) < massTol: #very confident, almost do not need further validation #mod pos is in left if (lIonMassExpe[0] - pcMassDiff - (calcuSeqMass(xPep[0:alignPos]) - (not isPepReversed) * H2O)) > 0.1: #if mod is in left and m1-massdiff != Sigma m_aa in left, then pep is wrong pep continue if (pcMass - lIonMassExpe[-1] - (calcuSeqMass(xPep[alignPos + lOfTag + 1:]) - (isPepReversed) * H2O)) > 0.1: continue modPosL = 1 modPosR = alignPos isModInL = 1 #Lions for cov should be compare to modified # print(modPosL, modPosR) #try to narrow down the mod pos range oriLIonsForMod = [ i for i in zip(lPeaks[modPosL:modPosR], lH2OPeaks[modPosL:modPosR], lNH3Peaks[modPosL:modPosR]) ] oriRIonsForMod = [ i for i in zip(rPeaks[lOfPep - modPosR + 1:], rH2OPeaks[lOfPep - modPosR + 1:], rNH3Peaks[lOfPep - modPosR + 1:]) ] #for cov, they are used only to calcu ion cov, only oriMass should be used oriLIonsForCov = [ i for i in zip(lPeaks[modPosR + lOfTag + 1 + 1:], lH2OPeaks[modPosR + lOfTag + 1 + 1:], lNH3Peaks[modPosR + lOfTag + 1 + 1:]) ] oriRIonsForCov = [ i for i in zip(rPeaks[1:lOfPep - modPosR], rH2OPeaks[1:lOfPep - modPosR], rNH3Peaks[1:lOfPep - modPosR]) ] # # otherMzs = list(set(mzs) - set(lIonMassExpe)) otherMzs = mzs numMod, matchedLions, matchedRions, modPosScore = getModIonsV2( isPepReversed, isModInL, lOfPep, oriLIonsForMod, oriRIonsForMod, oriLIonsForCov, oriRIonsForCov, otherMzs, 0.02, pcMassDiff, modPosL, modPosR) # if numMod <= 5: # continue maxScore = -100 if len(modPosScore) != 0: maxScore = max(modPosScore) # maxScore = sum(modPosScore) finalScore = numMod / (lOfPep - lOfTag + 1) * ( 1 + (1 if maxScore >= 0 else -1) * sqrt(abs(maxScore) / 10)) * tagScore if 1: #numMod >= lOfPep:# or min(modPosRb) == max(modPosLb) <= 2: if isPepReversed: psms.append( tuple( (tag, pep, pcMassDiff, numMod / (lOfPep - lOfTag + 1), matchedRions, matchedLions, modPosScore[::-1], maxScore, tagScore, finalScore, numOfPepCand, 'mod'))) else: psms.append( tuple( (tag, pep, pcMassDiff, numMod / (lOfPep - lOfTag + 1), matchedLions, matchedRions, modPosScore, maxScore, tagScore, finalScore, numOfPepCand, 'mod'))) elif abs(alignMassDiff) < massTol: # print(abs(alignMassDiff) , massTol) #mod pos is in right # print('here') modPosL = alignPos + lOfTag + 1 modPosR = lOfPep # print(modPosL, modPosR) oriLIonsForMod = [ i for i in zip(lPeaks[modPosL:modPosR], lH2OPeaks[modPosL:modPosR], lNH3Peaks[modPosL:modPosR]) ] oriRIonsForMod = [ i for i in zip(rPeaks[1:lOfPep - modPosL + 1], rH2OPeaks[1:lOfPep - modPosL + 1], rNH3Peaks[1:lOfPep - modPosL + 1]) ] #for cov, they are used only to calcu ion cov, only oriMass should be used oriLIonsForCov = [ i for i in zip(lPeaks[1:modPosL - lOfTag - 1 - 1], lH2OPeaks[1:modPosL - lOfTag - 1 - 1], lNH3Peaks[1:modPosL - lOfTag - 1 - 1]) ] if 1 > modPosL - lOfTag - 1: oriLIonsForCov = [] oriRIonsForCov = [ i for i in zip(rPeaks[lOfPep - modPosL + 1 + 1:], rH2OPeaks[lOfPep - modPosL + 1 + 1:], rNH3Peaks[lOfPep - modPosL + 1 + 1:]) ] # otherMzs = list(set(mzs) - set(lIonMassExpe)) otherMzs = mzs numMod, matchedLions, matchedRions, modPosScore = getModIonsV2( isPepReversed, isModInL, lOfPep, oriLIonsForMod, oriRIonsForMod, oriLIonsForCov, oriRIonsForCov, otherMzs, 0.02, pcMassDiff, modPosL, modPosR) # print('hasMod', hasMod) # print(numMod, modPosLb, modPosRb) # if numMod <= 5: # continue maxScore = -100 if len(modPosScore) != 0: maxScore = max(modPosScore) # maxScore = sum(modPosScore) finalScore = numMod / (lOfPep - lOfTag + 1) * ( 1 + (1 if maxScore >= 0 else -1) * sqrt(abs(maxScore) / 10)) * tagScore # print(numMod , lOfPep) if 1: #numMod >= lOfPep:# or (min(modPosRb) == max(modPosLb)) : if isPepReversed: psms.append( tuple( (tag, pep, pcMassDiff, numMod / (lOfPep - lOfTag + 1), matchedRions, matchedLions, modPosScore[::-1], maxScore, tagScore, finalScore, numOfPepCand, 'mod'))) else: psms.append( tuple( (tag, pep, pcMassDiff, numMod / (lOfPep - lOfTag + 1), matchedLions, matchedRions, modPosScore, maxScore, tagScore, finalScore, numOfPepCand, 'mod'))) else: continue else: continue #combination of several modifications , ref to MODa's dp part # print('todo more than one modification') sortedPsms = sorted(psms, key=lambda x: -x[-3]) # print('all', sortedPsms[0:2]) totalPsms.extend(sortedPsms[0:1]) # numOfOutput = min(9, len(sortedPsms)) N_AA_RES_MASS['x'] = 0 return totalPsms
# pep = 'GGRGRGGSGGGGGGGGGGYNR' #len 17 # M = np.round(1747.821 - calcuSeqMass(pep), 4) # oriMe = [0.,18.0105647,111.00082069,111.04356845,111.08071138,114.04330469,114.08025084,115.06338211,116.45430554,118.04148758,125.09577792,126.04283406,131.0701086,135.05781821,135.06820122,136.07230558,154.07463965,156.06512767,157.08415942,158.59409431,171.06364534,171.10002212,174.1120808,181.36545468,183.06420281,192.08767958,196.09463461,200.08957124,201.07525638,211.09453892,212.98556487,213.96599043,228.08469638,228.12088645,230.99716954,231.35294829,237.07486192,240.08641801,249.3142561,253.11814117,254.01213526,254.10102091,254.99568265,262.65628345,271.12845708,272.02111907,273.00804971,277.10732927,277.1689414,278.01155603,281.46112195,282.97598203,283.0030007,284.00027666,285.10816041,288.15337677,296.02166492,297.00985079,298.17488901,304.61008029,314.032544,315.03646584,324.11405596,349.19839718,354.12427068,372.22956909,381.13521963,411.69896667,417.16950824,434.1984367,439.2024681,440.96899527,451.21961125,452.96726173,453.69307486,454.95589666,464.95481676,467.98490195,469.27874233,470.97691596,474.19337856,477.96986678,478.96315778,482.00297068,482.98132341,495.98650458,496.97518817,505.9724038,506.9686466,508.24244922,508.98559196,519.98798966,521.96848252,537.99692664,565.2597999,577.26168557,582.12941845,605.25692936,619.78825837,620.96385761,622.27346266,623.29549789,634.30113699,673.63794208,679.30817257,681.33371655,696.32132465,709.35907348,719.30547649,730.34335038,736.32715298,752.36941029,764.86221172,766.39268717,772.86696134,787.364157,793.33998625,795.36264365,795.85324762,823.39345353,842.51568493,850.37999819,867.41129035,880.42338618,907.39224476,909.42186808,923.41991068,964.41199826,1021.43037277,1041.94288076,1108.47208417,1158.29706401,1165.48283769,1222.48427001,1223.54056867,1296.60630005,1431.66675695,1459.67398655,1488.26156553,1573.72078003,1702.76124492,1730.42396347,1732.83344368,1747.80349019] # oriIe = [100.,100.,1586.86572266,1752.42297363,15437.13574219,20004.76757812,11482.04296875,3442.8996582,1914.1171875,1742.01257324,1835.63427734,5731.39648438,2496.375,5026.12988281,75814.03125,2734.19458008,1990.99523926,5082.33349609,15637.28222656,1742.00634766,19708.42773438,4450.51464844,88419.09375,1935.61560059,5800.00439453,7222.87451172,2145.99755859,10229.91015625,2408.93310547,3275.27807617,3745.68286133,1929.87780762,12649.35058594,4314.44482422,3420.35400391,2009.16369629,6188.68457031,13247.20800781,2419.02124023,5464.91064453,16238.03710938,14824.93554688,38919.9140625,2905.51269531,38457.79858398,5787.04589844,43988.8515625,4420.95996094,3357.03393555,10646.03710938,2338.21191406,2196.34619141,103756.4140625,2510.81713867,12517.73046875,13092.73925781,46437.515625,4191.86865234,4324.19335938,2323.81860352,266567.15625,17657.64453125,3811.5012207,3068.89111328,3243.8684082,2778.6875,3809.23413086,3096.32080078,5893.57568359,3829.56445312,3125.60693359,3854.43847656,8797.0234375,11386.72265625,3476.53466797,13191.08203125,3623.1796875,3715.80859375,10099.88574219,80991.546875,3506.66845703,3472.93920898,25689.99414062,5165.82666016,3339.53613281,59891.4375,14341.09277344,4785.09863281,27716.79492188,22385.83886719,12473.34765625,20237.89648438,2231.0327582,4861.31103516,61516.01074219,4026.47167969,22344.48779297,3410.88549805,5261.54248047,2925.50390625,54564.21484375,15108.80175781,4364.31054688,2956.31323242,72494.67578125,3696.88867188,3393.80737305,5086.66845703,3800.76513672,1870.66209628,99883.50585938,3228.67749023,3421.53491211,3544.75732422,5579.84619141,4675.30684709,63106.95117188,14048.41162109,4330.73046875,4404.84082031,3475.28833008,33477.421875,2656.15235552,4869.25683594,42599.26538086,7173.54986129,4586.73242188,25374.23339844,14721.12207031,7180.92236328,4528.99902344,3696.78442383,8429.45751953,12055.06738281,5509.26855469,40763.38183594,19754.48413086,9992.99804688,6681.04390258,16614.14208984,101825.1328125,6004.81160639,7579.6171875,172453.29003906] # info = ['KEASE', 2.14067, [1, 6, 18, 21, 22, 25]] # tag = 'KEASE' #len 5 # pep = 'NIIHGSDSVESAEK' #len 17 # M = np.round(1564.6759989388029 - calcuSeqMass(pep), 4) # oriMe = [0.,18.0105647,121.37300288,128.09582401,129.07975696,135.0548485,146.10485262,153.03558372,154.11133774,165.05435416,182.10610114,194.08095638,199.13257277,227.12704738,228.13082824,250.14299453,253.58389582,257.13542864,275.14658306,287.1160001,340.21070823,346.18397155,433.219159,449.27360994,477.27462565,562.2571007,572.34626142,612.22765897,669.28714694,678.82136031,733.35503771,748.36405111,755.39988212,782.32757547,782.45238696,782.84782022,783.38942476,915.10395516,932.41808752,981.42409873,984.38132333,989.41638926,1033.47726896,1034.42489298,1039.40542792,1087.42526202,1088.44798984,1126.49807406,1131.47260636,1173.53754165,1224.47650329,1225.45388239,1239.55098589,1240.58796187,1338.54220464] # oriIe = [10.,10.,2095.98681641,14821.42382812,6496.18554688,3628.47875977,15323.0078125,2136.98291016,3003.93115234,2760.078125,2973.38818359,4544.21533203,71381.97387695,62484.85546875,3560.70019531,6725.92089844,3248.91552734,11309.79296875,14107.03710938,2842.72998047,16371.76171875,6507.3828125,21063.45898438,5840.49414062,23573.01269531,14695.60644531,3462.01635742,4105.28808594,4713.28466797,3565.05932617,5740.57226562,14450.00439453,3860.7265625,6624.84423828,13438.55761719,6095.36816406,12635.94335938,4217.63232422,6262.31640625,6408.62988281,3706.63134766,67289.140625,4723.42089844,4157.40185547,3902.04223633,15352.87304688,4538.69140625,47601.01464844,3730.91552734,4011.34204102,14127.5390625,5302.15039062,10784.76171875,6087.14550781,6647.65304114] tagPos = [1, 57, 122, 174, 234] tag = 'RHPK' #len 5 pep = 'PATGGVKKPHR' #len 17 M = np.round(1174.6869923075321 - calcuSeqMass(pep), 2) oriMe = [ 0., 18.0105647, 109.06445244, 109.07127586, 111.08024439, 111.08721895, 112.0640422, 113.09542854, 114.0425237, 114.07985323, 115.06297881, 121.06374278, 122.08477158, 126.07911527, 128.0952883, 128.10384956, 128.13159477, 129.08083726, 129.09069122, 129.11603514, 129.39530763, 130.06610906, 130.07448313, 130.08177063, 137.05785164, 138.07926955, 139.07430689, 140.05144697, 140.05939428, 140.07211138, 140.09440123, 140.14208615, 141.08872501, 142.07406469, 142.11141715, 144.07905744, 144.08906456, 148.01546924, 149.01943796, 154.07429701, 154.08529077, 155.0685203, 156.08906202, 156.10023453, 157.0854639, 158.06924375, 161.09416954, 165.05393632, 166.02658253, 166.0736974, 168.0331278, 168.05189485, 168.05815122, 168.08953995, 170.0696791, 172.08507656, 174.07889275, 174.11187351, 176.1163807, 179.08049809, 184.08442624, 184.12265578, 185.11597764, 186.09991524, 188.08023484, 197.07971247, 199.05961928, 203.1262047, 206.11654026, 207.09849885, 207.13552927,
if __name__ == '__main__': #scan 2559 # info = ['RPKHA', 2.21487, [1, 30, 74, 95, 115, 122], [0.99433, 0.95308, 0.92334, 0.60139, 0.46782]] # tag = 'RPKHA' #len 5 # pep = 'ANSPEKPPEAGAAHKPR' #len 17 # M = np.round(1835.8617 - calcuSeqMass(pep), 2) # oriMe = [0.,18.0105647,109.06473715,111.08071477,112.06435608,114.08025432,115.06338562,119.07467942,125.04877431,126.04283791,128.05822142,128.09559656,128.10553056,129.079721,135.05481089,138.0422402,138.07958107,139.0038664,140.05803308,146.10481195,148.038291,151.02754085,154.07464435,156.08941387,157.08605002,158.06935664,166.07362147,167.04950552,168.05422195,172.08538933,174.11215465,175.11467489,180.12634465,182.06844393,182.1413172,183.06420839,185.0802605,186.08374106,194.06797759,194.10675601,198.0999655,198.11839187,199.09765841,204.05397525,209.08106336,211.13264746,218.06970983,220.07682563,221.11586611,224.03740801,224.09137233,224.16187647,225.0363074,225.14919587,226.09460378,234.10912859,235.09381595,236.08123885,237.0746344,238.08935368,239.08801967,239.12672017,243.15918796,249.11149184,250.09384443,253.14889793,254.10141466,254.13968907,257.1392921,263.09142894,265.15560674,267.12324416,268.15923942,271.13732208,271.16352537,273.17609454,277.10733769,281.0427366,294.09319538,295.11669415,296.12353377,297.13191761,298.05142602,299.05563148,306.13128885,323.14850624,332.09897482,334.1279978,336.18624706,340.2106139,352.0771323,354.18813394,368.11160628,382.23610362,394.18896559,399.2602712,403.72702956,407.18657048,408.9290613,418.77233263,423.21191599,444.78403437,458.23409648,458.29012158,459.22749904,459.29095988,459.77853021,460.25747706,463.17317778,484.27691613,496.22751572,503.20043746,517.28021687,522.24091475,535.28853989,536.31483135,537.24979425,557.54355059,566.31168012,578.15983342,591.2709217,593.27249101,607.35714653,608.37032728,633.28453237,643.32898018,678.38416062,679.3932858,685.35902657,688.33702065,689.34855098,735.41392007,761.37544554,786.40853884,788.41524851,806.45077357,806.45707755,814.39547242,858.42572707,935.48349048,936.48937142,1032.55035093,1129.60358695,1368.70725389,1479.30568205] # oriIe = [100.,100.,50984.32922363,3809.109375,2306.6159668,5314.91357422,2579.53540039,3139.07763672,2467.86914062,2276.90454102,3453.28051758,136401.14941406,2319.47314453,1698.35290527,1938.17016602,2631.86474609,12246.24804688,2605.19555664,40734.47412109,3902.30908203,4279.87304688,1719.0880127,3556.63549805,2026.9074707,11330.1640625,1809.06738281,21692.05664062,2757.60810092,62426.22705078,3085.65893555,45902.75390625,2209.88061523,3898.46044922,3210.00732422,4211.43115234,14962.79492188,67565.1796875,2692.60986328,3744.77612305,12327.90625,39294.45703125,3088.62646484,5118.65087891,1893.62573242,4903.51416016,3936.1706543,3614.46801758,1949.84423828,2299.40014648,27021.66748047,3145.37890625,2017.95812988,1511.13686383,4550.68652344,56683.58398438,2128.54052734,2511.3112793,3032.13427734,37685.37890625,44676.45507812,5422.15366658,30062.12304688,5624.63037109,5186.14404297,2425.80834961,2289.65014648,26173.08203125,3775.6328125,2449.55175781,4420.24902344,9957.17480469,5527.33789062,2858.21508789,3724.21484375,95169.13183594,2536.89746094,3764.97924805,8965.71582031,13863.42675781,61295.21484375,4440.36376953,4450.20751953,31534.3046875,6990.07707943,5131.52197266,9363.34277344,16712.00195312,5617.76855469,4158.87646484,3166.03515625,25123.30419922,26726.77661133,8489.78417969,3350.17993164,4638.48779297,76344.75878906,1442.76731939,3709.70507812,2278.56005859,12924.49707031,17091.14257812,5428.41455078,11907.24707031,34054.12109375,22672.29492188,6374.68061045,8732.01660156,3207.80175781,15680.27832031,3539.17773438,4177.74902344,3005.44995117,1403.2338213,4422.99755859,3177.26904297,70716.09960938,2637.95922852,2841.29492188,937.40952746,3304.51928711,14451.515625,3665.07407339,29876.078125,4580.52001953,5101.67529297,3504.13330078,9301.46069336,1628.84573606,3464.54330459,10092.69726562,29998.69775391,56795.84179688,3723.59204102,2943.37524414,16281.10498047,7999.83862305,55655.21777344,3018.19091797,12362.83666992,20335.50976562,5027.06152344,30604.64208984,69743.92749023,9698.70068359,4851.537531] # scan 2162 info = ['KGYNTSG', 2.33913, [1, 16, 26, 49, 53, 61, 70, 77]] tag = 'KGYNTSG' #len 5 pep = 'GNRGSGGGGGGGGQGSTNYGK' #len 17 M = np.round(1808.813644 - calcuSeqMass(pep), 2) oriMe = [0.,18.0105647,114.0437317,125.04743772,126.04284405,127.51880106,128.05822766,128.09566399,128.10275319,129.07972729,131.07011899,135.05481747,135.06836974,138.04380249,143.56348755,144.05286577,146.10651189,151.05067091,168.05228167,171.05292018,171.06356033,182.06845278,183.06421728,185.08038809,185.1164035,188.09053491,203.12655074,204.12922353,209.08107352,228.08544281,237.07488063,240.08643696,242.10210444,245.11458703,254.10104096,272.02114053,273.00820529,282.99870643,285.1081829,294.09801905,297.10751446,299.11976129,302.13147867,317.63325521,323.12083955,324.11964317,339.11897542,355.19973777,359.14186802,366.18859324,412.2165026,463.20400807,470.17019005,480.23443422,490.4198605,494.21403735,495.98569219,499.24966587,522.20687635,526.26095803,556.27071739,581.28080674,583.28454618,600.2768776,602.29387356,603.32143677,613.29330042,625.28330473,633.23909965,642.30612938,668.31445008,670.30942974,682.31558327,699.35349729,707.31588363,708.30055792,709.32320674,725.34026058,727.33072184,756.37175703,757.35738547,784.34983918,813.3803373,814.37342265,835.37953607,836.37391951,841.37639305,853.38069952,854.40039403,870.40534918,871.40678742,876.36599771,893.40022507,898.39399117,899.41931551,910.42276072,911.40585163,927.43273176,955.42253813,967.43486299,968.43719643,1024.47681245,1082.46823895,1083.48398652,1410.47881458,1458.72015257] oriIe = [10000.,10000.,23663.72070312,2070.2512207,9866.23828125,1532.9276123,3009.8515625,17794.87109375,1601.11047363,5279.53759766,13140.00585938,4752.70800781,4357.19970703,1834.73986816,1616.38012695,3583.80712891,15741.52148438,1521.63378906,4337.85595703,1543.13720703,20506.13476562,1859.58447266,5127.00341797,26925.15429688,6043.93066406,11790.91113281,69780.203125,2937.8828125,2354.609375,17608.52148438,2702.0456543,3410.44213867,14696.90820312,3696.49975586,2801.25585938,3546.95214844,4197.734375,3059.00854492,9101.58789062,2510.57617188,3317.16357422,9397.31445312,2695.72436523,2270.17822266,2600.55664062,5896.64404297,5284.51416016,4545.0078125,3034.2253418,59773.46777344,3519.49707031,9283.94628906,3117.35522461,42480.81933594,2680.54907227,4648.22021484,4696.72363281,5815.05859375,10968.79101562,4560.55859375,21042.68310547,50215.27734375,2594.38754248,8395.13476562,16116.25097656,10612.85546875,37821.61816406,4506.07617188,4679.87646484,5426.12939453,50546.27832031,53908.20898438,5984.30615234,11079.15527344,13515.89160156,5167.60888672,2874.39001436,260327.18164062,84599.24998336,18572.94921875,4851.91943359,73276.41992188,17182.9140625,3676.09741211,5712.79980469,41724.78125,59759.65332031,15239.90039062,4483.49121094,15130.37890625,3349.77563477,3844.19091797,15820.02392578,34633.23828125,13536.65527344,27923.48632812,11882.15332031,4527.93359375,53507.99414062,23473.953125,4982.15478516,19537.03466797,3364.98510742,4052.8996582,6269.70588922,6687.66244922] # info = ['RAEASS', 2.14067, [1, 16, 33, 46, 48, 54]] # tag = 'RAEAS' #len 5 # pep = 'YCCSSAEAR' #len 17 # M = np.round(1102.4186164721568 - calcuSeqMass(pep), 3) # oriMe = [0.,18.0105647,119.73026358,128.0957962,129.07972894,132.03563143,132.04317049,135.05783063,135.06821101,136.52871708,138.04380426,146.10482094,148.0383001,156.05393708,163.06245009,166.0494976,174.1123786,177.0576116,180.07317406,180.08933679,184.37186319,189.04638903,206.10535664,212.02659922,215.05566812,220.07683912,224.03692827,225.0363212,226.03264934,229.05162515,238.08879925,239.08765952,240.08534856,245.14775771,292.06447505,294.09802279,295.10123546,299.05564978,300.05422893,306.06957627,316.08484235,320.06454752,323.09361729,334.09309796,369.11613158,370.11117471,374.193791,392.11856735,445.23215661,453.9691977,483.12281858,489.95511277,508.17804697,514.2488376,532.26132894,534.17098685,596.11424804,602.2798232,619.29056696,688.30272287,703.61051285,761.31558395,762.29196516,779.32066528,780.18945215,922.327704,939.35378107,1202.56591854] # oriIe = [100.,100.,1488.79516602,2891.8840332,2252.53173828,31964.53320312,1869.0045166,5753.4140625,170084.59375,1740.05334473,3283.22924805,1569.33862305,2649.56640625,2544.34912109,1946.57641602,5518.625,23621.296875,20562.39257812,4790.86669922,157891.38671875,1786.01660156,1812.9798584,21292.9855957,2480.27148438,2528.73461914,4780.66162109,17975.11352539,1391.41874806,3464.19824219,2581.36669922,40231.76953125,24841.29296875,20790.7890625,48971.60571289,3608.00585938,5999.57568359,36226.69287109,5311.22038575,9774.31152344,23797.93554688,3883.03295898,10822.81738281,70286.94726562,3432.72412109,4901.64648438,3897.99487305,9936.56054688,19272.67773438,26320.26196289,13670.28613281,4819.95214844,2940.31787109,3025.94702148,3340.93017578,58243.09570312,5810.14892578,16636.00585938,2472.44057294,156043.91015625,3634.99072266,3317.86328125,4854.65429688,18503.37109375,360029.42382812,3252.44287109,3343.76171875,68632.34179688,7724.65283203] # info = ['RAEASS', 2.14067, [1, 7, 20, 28, 41]] # tag = 'RAAE' #len 5 # pep = 'IEMEMEAAR' #len 17 # M = np.round(1110.4685 - calcuSeqMass(pep), 3) # oriMe = [0.,18.0105647,140.09469744,157.08606231,166.07408461,168.08983021,172.08540279,174.11188103,180.08936033,184.0848552,194.06799276,196.1209108,198.09974832,210.17735855,212.07971048,214.13079773,225.11372512,230.07131918,242.12726294,243.12663389,245.14780138,248.08268917,251.12758779,255.40097287,258.06641181,269.13807494,276.07560574,312.17857363,316.1865782,322.15970746,325.16778481,340.17476519,383.22027308,387.10792307,388.11090005,389.16163551,405.12110933,411.20629788,427.22058561,431.20792635,433.11522825,445.23071742,518.19938766,528.26424643,534.14750526,555.17171856,555.32020193,556.156081,556.29217764,592.26922873,640.29849161,642.30586789,657.31633977,699.33450813,703.29123452,721.30748743,724.39434691,731.30131867,740.34985642,770.35861581,771.35789668,777.31811277,804.34961916,806.34868987,841.39995807,868.33420783,915.36688851,933.39461392,938.20743999,942.4445067,979.38214585,997.39123645,998.37961359,1278.59637155]
def testSome(scanNoList): global ms2SpecDict, allPeps, ms1SpecDict psms = [] for scanNo in scanNoList: pcMass = ms2SpecDict[scanNo][0] charge = ms2SpecDict[scanNo][3] pcScanNo = ms2SpecDict[scanNo][2] pcMz = ms2SpecDict[scanNo][4] # print('scanNo',scanNo) # print('chargev', charge) ms1Spec = ms1SpecDict[pcScanNo] rawSpec = ms2SpecDict[scanNo][1] # print(rawSpec) mzs, intes = deIsotope(rawSpec) # print('deIso Mzs and INtens finished', mzs, intes) # break if len(mzs) == 0: print('mzs is none') continue # [tag, round(sum(scores)/sqrt(len(simp_path) - 1), 5), simp_path, scores] # ['GAHQA', 0.97929, [13, 37, 68, 94, 113, 121], [0.03988, 0.01492, 0.55986, 0.75242, 0.82269]]] tags = extractTags(mzs, intes) # tags = readTagsFromMS2(mzs) # print(reliableTags) print('raw Tags', tags) print('len of raw tags', len(tags)) has01Tag = False # tag start using peak 0 or 1 hasGoodTag = False # tag score high than threshold for tag in tags: if tag[1] >= TAG_SCORE_THRES: hasGoodTag = True if tag[2][0] == 0 or tag[2][0] == 1: has01Tag = True # print('has01Tag', has01Tag) # print('hasGoodTag', hasGoodTag) reliableTags = [] for tag in tags: if hasGoodTag and tag[1] < TAG_SCORE_THRES: break # if has01Tag and tag[2][0] != 0 and tag[2][0] != 1: # continue reliableTags.append(tuple(tag[0:3])) if len(reliableTags) == MAX_TAGS_NUM: break if len(reliableTags) == 0: print('reliableTags number = 0 ') continue reliableTags = cleanUpTags(reliableTags) print('reliable tags',reliableTags) # break cutTag = (reliableTags[0][0][0:-2], reliableTags[0][1], reliableTags[0][2][0:-2]) reliableTags[0] = cutTag # cutTag = ('KIQNE', reliableTags[0][1], reliableTags[0][2][0:6]) # reliableTags[0] = cutTag # pepCandDict = dict.fromkeys([tag[0] for tag in reliableTags[0]], []) pepCandDict = dict.fromkeys([tag[0] for tag in [reliableTags[0]]], []) tagWithZeroPep = [] # #% this part is quite time consuming for tag in pepCandDict: pepIndexs = getFeasiblePepIndex(tag) pepList = [] for i in pepIndexs: # if 'A' in allPeps[i]: # print('A' , allPeps[i]) if abs(pcMass - calcuSeqMass(allPeps[i])) < pcMass*MS1_TOL: pepList = [allPeps[i]] tagWithZeroPep.append(tag) # print('break') break pepList.append(allPeps[i]) pepCandDict[tag] = pepList # # print('tagWithZeroPep',tagWithZeroPep) if len(tagWithZeroPep) != 0: pepCandDict = {key:val for key, val in pepCandDict.items() if key in tagWithZeroPep} pepCandDict = {key:val for key,val in pepCandDict.items() if len(val) != 0} # print('pepCandDict',pepCandDict) if len(pepCandDict) == 0: continue for tagInfo in [reliableTags[0]]: if tagInfo[2][0] == 1: validPep = [pep for pep in pepCandDict[tagInfo[0]] if pep[::-1].find(tagInfo[0]) == 0] pepCandDict[tagInfo[0]] = validPep # print('ptmRes') print('pepCandDict',pepCandDict) # break # reliableTags = [tag for tag in reliableTags if tag[0] in pepCandDict] ptmRes = [] # mzs = [0.,18.0105647,114.04395205,128.05822132,128.09578822,129.0797209,132.03588265,141.69357603,141.70489729,146.10650466,148.03656462,150.03647101,154.11129474,156.05383519,164.9345922,166.03719721,166.04743883,172.12225797,173.0788474,177.05755517,182.10605037,183.06420824,184.04867369,189.606892,197.08015598,200.11663436,201.07511143,202.05997622,211.09747575,215.09181547,220.07682546,224.03691437,225.07500233,238.0886128,239.08806732,239.12684805,240.08287367,241.06963054,241.1411109,242.1020925,243.08681205,252.08363086,253.06819331,256.15438036,257.09994774,257.13525354,260.11193588,269.13805372,270.09697969,271.07387398,274.16536194,283.00323323,284.00028507,285.00259655,288.10962382,289.11206006,289.52505186,297.09773364,298.05221652,299.05563125,300.0492546,310.16393088,312.0156098,314.122554,314.15439955,328.17269885,338.15694128,354.06123482,355.05973532,355.18601898,356.06231722,356.16990872,365.16904083,368.13179052,372.07229141,373.07928749,373.19600576,381.12814391,383.17890374,384.17231689,385.16392877,399.16205336,401.18984895,409.08658976,414.03141972,416.02796026,417.16952057,417.99011772,418.98455906,434.19967665,435.1993209,482.15408258,488.05133721,494.20355808,505.23576981,512.21320139,528.17282968,545.22761127,560.78867352,562.25682347,609.34192732,610.31585338,611.30648663,623.26591999,658.29455111,673.28784892,674.27012935,691.2981111,693.29151939,745.31774453,784.32792691,785.31016766,801.3430932,802.33560515,819.3418308,821.35374861,830.05578699,932.44371268,1001.4438153,1002.43821794,1019.46736608,1120.56740822,1144.49104395] # intes = [100.,100.,12727.68261719,4352.71191406,15721.62304688,12740.25195312,4607.52832031,1538.96374512,1616.79394531,15189.24316406,1879.09228516,2124.62548828,4813.92382812,31836.77734375,2367.92456055,5198.82470703,10223.296875,23055.82226562,20373.28320312,3935.09814453,2954.16088867,5309.90966797,33832.2409668,1873.64074707,3795.05883789,14632.12792969,150823.95800781,2237.64257812,2707.19311523,2403.68432617,2383.30444336,3355.94873047,4759.03564453,34754.4765625,19130.12109375,3076.82177734,11345.40332031,2466.3359375,5336.51074219,3154.12402344,9329.89355469,4358.05957031,13882.32714844,14511.84960938,12243.97460938,17651.70703125,12773.328125,3321.00561523,133389.875,19703.73632812,25845.69726562,3297.63012695,19248.92041016,2589.1046293,96529.4765625,5409.53710938,2283.33105469,2844.42724609,41346.24023438,6165.31682816,5131.34423828,4151.07519531,2597.23510742,3540.50976562,3969.88305664,4399.47851562,15237.40917969,123201.25533371,115195.68359375,18346.55664062,14787.77459559,47749.62890625,9511.49804688,10797.06347656,3483.03808594,2783.46264648,4476.01806641,3073.87866211,31300.91210938,4863.921875,11941.35742188,4114.22412109,5489.06298828,2934.94165039,4575.05175781,3984.76504375,3564.71728516,5259.80664062,4712.83642578,54448.5078125,6279.52539062,2927.9699707,3157.91381836,3571.52490234,22426.984375,4112.69970703,4866.40332031,4831.01513672,2129.18411936,170310.76123047,3583.79248047,5142.83349609,15749.97949219,3396.71044922,3901.94580078,28727.72851562,12970.04296875,181152.84375,5053.18457031,3567.69311523,12928.53027344,6152.48535156,34408.015625,186612.84179688,120767.640625,4488.13476562,10968.70556641,91075.05664062,5686.27148438,10118.90917969,128205.54003906,8779.65722656,8718.79589844] # ptmRes = MILP(reliableTags[0], pepCandDict, mzs, intes, pcMass, scanNo) # print('ptmRes',ptmRes) # ptmRes = findPtm(mzs, reliableTags, pepCandDict, pcMass, MS1_TOL, scanNo) if len(ptmRes) > 0: psms.append(ptmRes) print('scanNo %d finished' % scanNo) return psms
def testSomeTag3(scanNoList): global ms2SpecDict, allPeps, ms1SpecDict, allPepsHash, truthDict # psms = [] scanPepRank = [] for scanNo in scanNoList: pcMass = ms2SpecDict[scanNo][0] charge = ms2SpecDict[scanNo][3] pcScanNo = ms2SpecDict[scanNo][2] pcMz = ms2SpecDict[scanNo][4] # print('scanNo',scanNo) # print('chargev', charge) ms1Spec = ms1SpecDict[pcScanNo] rawSpec = ms2SpecDict[scanNo][1] # print(rawSpec) mzs, intes = deIsotope(rawSpec) # print('massdiff',abs(pcMass - calcuSeqMass('DQHLNNSSASPQR'))) # print('deIso Mzs and INtens finished', mzs, intes)l # break if len(mzs) == 0: print('mzs is none') continue tags = extractTags(mzs, intes) # print('original tags', tags) tag3 = getValidTagLs(tags, [3]) # print('-------------') ##############score dict scoreDict = {} for t in tag3: if t[0] in scoreDict: scoreDict[t[0]][1] += 0.1*t[2] scoreDict[t[0]][2] += 1 else: scoreDict[t[0]] = [t[1], t[2], 1] print(scoreDict) ##########score vector m,t #######hash tag # a = sorted(list(scoreDict.items()), key = lambda x:x[1][1],reverse=True) # print(a) # print('tag 2', tag3) # hash tag pep # tagHash = hashTag(scoreDict) # hashResDict = {} # for pep in allPepsHash: # if abs(pcMass - calcuSeqMass(pep)) > 300: # continue # hashResDict[pep] = sum(abs(np.ceil(tagHash/10000)-np.ceil(allPepsHash[pep]/10000))) # return hashResDict ###########pepscore use tag 3,4 # print(truthDict[7199]) if truthDict[scanNo] not in allPeps: allPeps.append(truthDict[scanNo]) pepScore = [] pepSize = 0 for pep in allPeps: if abs(pcMass - calcuSeqMass(pep)) > 300: continue pepSize += 1 pscore = 0 for tag in scoreDict: if scoreDict[tag][0] == 0: if tag in pep: pscore += scoreDict[tag][1]*1.1 else: pscore -= scoreDict[tag][1]/scoreDict[tag][2] elif scoreDict[tag][0] == 1: if tag[::-1] in pep: pscore += scoreDict[tag][1]*1.1 else: pscore -= scoreDict[tag][1]/scoreDict[tag][2] else: if tag in pep or tag[::-1] in pep: pscore += scoreDict[tag][1] else: pscore -= scoreDict[tag][1]/scoreDict[tag][2] pepScore.append([pep, pscore]) resList = sorted(pepScore, key = lambda x:x[1], reverse=True) for i in range(len(resList)): if resList[i][0] == truthDict[scanNo]: scanPepRank.append([scanNo,i, pepSize]) print('scan finished', scanNo, i, pepSize) print(resList[0:5]) break return scanPepRank #%--------- # pepCand = {} # for tag in tag3: # pepIndexs = getFeasiblePepIndexWithPos(tag[0], tag[1], tag[3]) # for p in pepIndexs: # if p in pepCand: # pepCand[p] *= 1.1 # else: # pepCand[p] = 0 # pepCand[p] += tag[2] # a = sorted(pepCand.items(), key = lambda x:x[1], reverse = True) # for i in a[0:20]: # print(allPeps[i[0]], i[1]) # # pepCandDict = dict.fromkeys([tag[0] for tag in tag3], []) # ptmRes = [] return psms