def getPairedAndUnpairedSpectra(dtaDir, dtaList, Nmod, Cmod, ppm=5, cutOff=0.1, verbose=False): specPairs = [] unpairedSpecs = [] delta = Nmod + Cmod for i in range(len(dtaList)): paired = False precMass1 = DataFile.getPrecMassAndCharge(dtaList[i])[0] spec1 = DataFile.getMassIntPairs(dtaList[i]) for j in range(i + 1, len(dtaList)): precMass2 = DataFile.getPrecMassAndCharge(dtaList[j])[0] epsilon = ppm * 10 ** -6 * max(precMass1, precMass2) if np.abs(np.abs(precMass1 - precMass2) - delta) < epsilon: spec2 = DataFile.getMassIntPairs(dtaList[j]) if precMass1 < precMass2: N, C = SA.getNandCIons(spec1, spec2, Nmod, Cmod, epsilon=epsilon) ratio = SA.getSharedPeaksRatio(spec1, spec2, N, C) else: N, C = SA.getNandCIons(spec2, spec1, Nmod, Cmod, epsilon=epsilon) ratio = SA.getSharedPeaksRatio(spec2, spec1, N, C) if ratio > cutOff: if verbose: print 'Pair found', dtaList[i], dtaList[j] paired = True specs = (dtaList[i], dtaList[j]) lightInd = int(precMass2 < precMass1) specPairs.extend([(ratio, specs[lightInd], specs[1 - lightInd])]) if not paired: unpairedSpecs.extend([dtaList[i]]) if verbose: print 'No pairs for', dtaList[i] return specPairs, unpairedSpecs
def getClusterPairingStats(lightSpecs, heavySpecs, lightPrecMass, pairConfig, epSTD = 0.01): lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD) heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD) allPairedIonsDict = getAllPairedIonsDict(lightMergedSpec, heavyMergedSpec, lightPrecMass, pairConfig, epSTD) specPairedPRMs = {} pairTypeCount = {} for pairType in pairTypes: specPairedPRMs[pairType] = [] pairTypeCount[pairType] = 0 numLightInds = 0 numHeavyInds = 0 for heavyIons in allPairedIonsDict: deltaMasses = [] pairType = {'light': [], 'heavy': []} for ion in allPairedIonsDict[heavyIons]: pairType['light'] += [ion[1]] deltaMasses += [PN.ProbNetwork.deltaRules[ion[1]](lightPrecMass-Constants.mods['H+']-Constants.mods['H2O'], lightMergedSpec[ion[0]][0], 0, 0)] for ion in heavyIons: pairType['heavy'] += [ion[1]] deltaMasses += [PN.ProbNetwork.deltaRules[ion[1]](lightPrecMass-Constants.mods['H+']-Constants.mods['H2O'], heavyMergedSpec[ion[0]][0], pairConfig['NMod'], pairConfig['CMod'])] pairTypeString = ''.join(pairType['light']) + '_' + ''.join(pairType['heavy']) specPairedPRMs[pairTypeString] += [(sum(deltaMasses)/len(deltaMasses), (allPairedIonsDict[heavyIons], heavyIons))] pairTypeCount[pairTypeString] += 1 numLightInds += len(allPairedIonsDict[heavyIons]) numHeavyInds += len(heavyIons) sharedPeaksRatio = float(numLightInds + numHeavyInds)/(lightMergedSpec.shape[0] + heavyMergedSpec.shape[0]) return {'Cluster Paired PRM Information': specPairedPRMs, 'Shared Peaks Ratio': sharedPeaksRatio, 'Pair Type Stats': pairTypeCount, 'Light Merged Spec': lightMergedSpec, 'Heavy Merged Spec': heavyMergedSpec, 'Num Paired Ions': numLightInds + numHeavyInds}
def getAllPairedIonsDict(lightMergedSpec, heavyMergedSpec, lightPrecMass, pairConfig, epSTD=0.01): NTermTable, CTermTable = SA.getNandCIons(lightMergedSpec, heavyMergedSpec, Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD) NCrossTable, CCrossTable = SA.getCrossPairedIons(lightMergedSpec, heavyMergedSpec, lightPrecMass, Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD) NTermIonDict = SA.prepIonTableForAddition(NTermTable, ['b', 'b']) CTermIonDict = SA.prepIonTableForAddition(CTermTable, ['y', 'y']) NCrossIonDict = SA.prepIonTableForAddition(NCrossTable, ['y', 'b']) CCrossIonDict = SA.prepIonTableForAddition(CCrossTable, ['b', 'y']) return SA.addDicts(SA.reverseDict(SA.addDicts(NTermIonDict, CCrossIonDict)), SA.reverseDict(SA.addDicts(NCrossIonDict, CTermIonDict)))
def getPairs(pairs, xVals): for pair in pairs: lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]] heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]] lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]])) epSTD = (float(paramsDict['ppmstd']['value'])) * 10 ** -6 * lightPrecMass lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD) heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD) svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD) xVals.put([svmClassificationData]) return xVals
def getAlignmentRatios(scanInfoFName, dtaDir, delta, epsilon=0.02): scanInfo = DataFile.getScanInfo(scanInfoFName) dtaNames = DataFile.getDTAFNamesInDir(dtaDir) scansToUse = scanInfo """ for i in range(len(scanInfo) - 1): if (int(scanInfo[i][0]) + 1 == int(scanInfo[i+1][0])): if (scanInfo[i][1] == scanInfo[i+1][1]): scansToUse += [scanInfo[i]] else: scansToUse += [scanInfo[i]] """ ratios = [] goodRatios = [] for i in range(len(scansToUse)): for j in range(i + 1, len(scansToUse)): if j == i + 1: print '%s percent done' % str(float(i) / len(scansToUse)) if np.abs(np.abs(float(scansToUse[i][1]) - float(scansToUse[j][1])) - delta) < epsilon: dta1 = '244.%(scanF)04i.%(scanF)04i.1.dta' % {'scanF': int(scansToUse[i][0])} dta2 = '244.%(scanF)04i.%(scanF)04i.1.dta' % {'scanF': int(scansToUse[j][0])} spec1 = DataFile.getMassIntPairs(dtaDir + dta1) spec2 = DataFile.getMassIntPairs(dtaDir + dta2) ratio = SA.getSharedPeaksRatio(float(scansToUse[i][1]), spec1, float(scansToUse[j][1]), spec2, epsilon) print ratio, scansToUse[i], scansToUse[j] ratios.extend([(ratio, scansToUse[i], scansToUse[j])]) with open('heavylightpairs.txt', 'w') as fout: pickle.dump(ratios, fout) return ratios
def getSamePeptideClusters(precMassClusters, scanFDict, svmModel, svmRange, ppmSTD=5, cutOff=0): trueClusters = [] for cluster in precMassClusters: if len(cluster) == 1: trueClusters += [cluster] else: # print 'testing cluster', cluster pairIndex = [] xVals = [] specs = [] for i in range(len(cluster)): specs += [DataFile.getMassIntPairs(scanFDict[cluster[i]]['dta'])] dMatrix = np.ones((len(cluster), len(cluster))) * -2 for i in range(len(cluster)): for j in range(i+1, len(cluster)): epSTD = ppmSTD * 10 ** -6 * scanFDict[cluster[i]]['precMass'] SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(specs[i], specs[j], scanFDict[cluster[i]]['precMass'], NMod=0, CMod=0, epsilon=2*epSTD) xVals += [SVMClassificationInfo] pairIndex += [(i, j)] xValsNorm = svmutil.normalize_instances(xVals, svmRange) pLabs = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0] # print pLabs for i, pLab in enumerate(pLabs): # Scale distances by 4: totalTICRatio, 1: TotalSharedPeaksRatio dMatrix[pairIndex[i][0]][pairIndex[i][1]] = dMatrix[pairIndex[i][1]][pairIndex[i][0]] = xVals[i][1] if pLab==1 else -1 trueClusters += heirarchicalClusteringAverageLinkage([[scanF] for scanF in cluster], dMatrix, cutOff=cutOff) return trueClusters
def getSharedPRMs(prmLadder1, prmLadder2, epsilon=0.5): hashTable = {} for i in range(prmLadder1.size): key = np.round(prmLadder1[i] / epsilon) hashTable[key] = [(i, prmLadder1[i])] temp = np.zeros((prmLadder2.size, 2)) temp[:, 0] = prmLadder2 pairedIonData = SA.getPairedIons(hashTable, temp, delta=0.0, epsilon=epsilon) sharedPRMs = [] for key in sorted(pairedIonData.keys()): sharedPRMs += [zip(*pairedIonData[key])[1]] if sharedPRMs: return zip(*sharedPRMs)[0] else: return []
def getSharedPeaksRatio(lightPath, heavyPath, epsilon): lightPairs = DataFile.getMassIntPairs(lightPath) heavyPairs = DataFile.getMassIntPairs(heavyPath) N, C = SA.getNandCIons(lightPairs, heavyPairs, pairConfig['NMod'], pairConfig['CMod'], epsilon=epsilon) return SA.getSharedPeaksRatio(lightPairs, heavyPairs, N, C)
addEnds = DNS.getSpectrumGraphEndpointInitFunction(pairConfig['NStatic'], pairConfig['CStatic'], paramsDict['Enzyme']['specificity']) termModHash = Constants.getTermModHashForPairConfig(pairConfig) svmModel = svmutil.svm_load_model(parent + pairConfig['Model']) svmRange = svmutil.load_ranges(parent + os.path.splitext(pairConfig['Model'])[0] + '.range') xVals = [] # xVals = getPairsThread(pairs) for pair in pairs: lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]] heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]] lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]])) epSTD = options.ppmstd * 10 ** -6 * lightPrecMass lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD) heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD) svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD) xVals += [svmClassificationData] xValsNorm = svmutil.normalize_instances(xVals, svmRange) pLab = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0] print 'Pairs found. Time taken:', time.time() - t1, '\n' heavySeqMap = copy.deepcopy(seqMap['LADS Unit Test']) heavySeqMap['Mods']['N-Term'] = paramsDict['Pair Configurations'][pairConfigName]['NModSymbol'] heavySeqMap['Mods']['C-Term'] = paramsDict['Pair Configurations'][pairConfigName]['CModSymbol']
def getSharedPeaksRatio(lightPath, heavyPath, epsilon): lightPairs = DataFile.getMassIntPairs(lightPath) heavyPairs = DataFile.getMassIntPairs(heavyPath) N, C = SA.getNandCIons(lightPairs, heavyPairs, 17.0265, -16.0187, epsilon=epsilon) return SA.getSharedPeaksRatio(lightPairs, heavyPairs, N, C)
def getSharedPeaksRatio(lightPairs, heavyPairs, pairConfig, epsilon): N, C = SA.getNandCIons(lightPairs, heavyPairs, pairConfig['NMod'], pairConfig['CMod'], epsilon=epsilon) return SA.getSharedPeaksRatio(lightPairs, heavyPairs, N, C)
for cluster in precMassClusters: if len(cluster) == 1: continue specs = [] for scanF in cluster: specs += [DataFile.getMassIntPairs(scanFDict[scanF]['dta'])] for i in range(len(cluster)): for j in range(i+1, len(cluster)): if cluster[i] in processedInfo[progName] and cluster[j] in processedInfo[progName]: epSTD = options.ppmstd * 10 ** -6 * scanFDict[cluster[i]]['precMass'] SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(specs[i], specs[j], scanFDict[cluster[i]]['precMass'], NMod=0, CMod=0, epsilon=2*epSTD) seq1 = processedInfo[progName][cluster[i]][infoMap[progDict[progName]]['Peptide']] seq2 = processedInfo[progName][cluster[j]][infoMap[progDict[progName]]['Peptide']] xVal = 1 if seq1 == seq2 else -1 clusterOut.write(' '.join([str(xVal)] + ['%i:%f' % (key, SVMClassificationInfo[key]) for key in sorted(SVMClassificationInfo)]) + ' # Scans %s, %i - %s, %i\n' % (processedInfo[progName][cluster[i]][infoMap[progDict[progName]]['Peptide']], cluster[i], processedInfo[progName][cluster[j]][infoMap[progDict[progName]]['Peptide']], cluster[j])) clusterOut.close() for pairConfigName in paramsDict['Pair Configurations']: pairConfig = paramsDict['Pair Configurations'][pairConfigName] delta = pairConfig['NMod'] + pairConfig['CMod'] deltaPairs = An.findDeltaPairsClusters(precMassClusters, scanFDict, delta, ppm=options.ppmstd)
lightSpecs = [ DataFile.getMassIntPairs(scanFDict[lightScanF]["dta"]) for lightScanF in samePeptideClusters[pair[0]] ] heavySpecs = [ DataFile.getMassIntPairs(scanFDict[heavyScanF]["dta"]) for heavyScanF in samePeptideClusters[pair[1]] ] lightPrecMass = np.average( np.array([scanFDict[lightScanF]["precMass"] for lightScanF in samePeptideClusters[pair[0]]]) ) # heavyPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[1]]])) # print lightPrecMass, heavyPrecMass # print samePeptideClusters[pair[0]], samePeptideClusters[pair[1]] epSTD = options.ppmstd * 10 ** -6 * lightPrecMass lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2 * epSTD) heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2 * epSTD) """ NTermTable, CTermTable = SA.getNandCIons(lightMergedSpec, heavyMergedSpec, Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD) NCrossTable, CCrossTable = SA.getCrossPairedIons(lightMergedSpec, heavyMergedSpec, lightPrecMass, Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD) NTermIonDict = prepIonTableForAddition(NTermTable, ['b', 'b']) CTermIonDict = prepIonTableForAddition(CTermTable, ['y', 'y']) NCrossIonDict = prepIonTableForAddition(NCrossTable, ['y', 'b']) CCrossIonDict = prepIonTableForAddition(CCrossTable, ['b', 'y']) allPairedIonsDict = addDicts(reverseDict(addDicts(NTermIonDict, CCrossIonDict)), reverseDict(addDicts(NCrossIonDict, CTermIonDict))) symLightInds = set() symHeavyInds = set() totalLightInds = set()