def getSamePeptideClusters(precMassClusters, scanFDict, svmModel, svmRange, ppmSTD=5, cutOff=0): trueClusters = [] for cluster in precMassClusters: if len(cluster) == 1: trueClusters += [cluster] else: # print 'testing cluster', cluster pairIndex = [] xVals = [] specs = [] for i in range(len(cluster)): specs += [DataFile.getMassIntPairs(scanFDict[cluster[i]]['dta'])] dMatrix = np.ones((len(cluster), len(cluster))) * -2 for i in range(len(cluster)): for j in range(i+1, len(cluster)): epSTD = ppmSTD * 10 ** -6 * scanFDict[cluster[i]]['precMass'] SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(specs[i], specs[j], scanFDict[cluster[i]]['precMass'], NMod=0, CMod=0, epsilon=2*epSTD) xVals += [SVMClassificationInfo] pairIndex += [(i, j)] xValsNorm = svmutil.normalize_instances(xVals, svmRange) pLabs = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0] # print pLabs for i, pLab in enumerate(pLabs): # Scale distances by 4: totalTICRatio, 1: TotalSharedPeaksRatio dMatrix[pairIndex[i][0]][pairIndex[i][1]] = dMatrix[pairIndex[i][1]][pairIndex[i][0]] = xVals[i][1] if pLab==1 else -1 trueClusters += heirarchicalClusteringAverageLinkage([[scanF] for scanF in cluster], dMatrix, cutOff=cutOff) return trueClusters
def getScanScoreDictSVM(LADSSeqInfo, seqEntry, scanFDict, svmModel, svmRange, pairConfig, PNet, desired_feats=None): scanScoreDict = {} spectrumAndPSMSpecificFeatureDict = getSpectrumAndPSMFeatureDict(LADSSeqInfo, seqEntry, scanFDict, pairConfig, PNet) # Now get PSM with highest rank score for each scan fullPSMList = LADSSeqInfo[seqEntry] for scan in lightScans + heavyScans: xVals = [] for PSM in fullPSMList: featureList = spectrumAndPSMSpecificFeatureDict[(scan, PSM[:2])] if desired_feats != None: xVals += [dict((i+1, featureList[desired_feats[i] - 1]) for i in range(len(desired_feats)))] else: xVals += [dict((i+1, featureList[i]) for i in range(len(featureList)))] xValsNorm = svmutil.normalize_instances(xVals, svmRange) probs = zip(*svmutil.svm_predict([0] * len(xValsNorm), xValsNorm, svmModel, '-b 1')[2])[0] #probs = zip(*svmutil.svm_predict([0] * len(xValsNorm), xValsNorm, svmModel, '-b 1')[2])[1] highestProbInd = np.argmax(probs) scanScoreDict[scan] = {'Seq': (fullPSMList[highestProbInd][1], fullPSMList[highestProbInd][2]), 'Raw Score': fullPSMList[highestProbInd][0], 'Post Score': probs[highestProbInd]} return scanScoreDict
for pair in pairs: lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]] heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]] lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]])) epSTD = options.ppmstd * 10 ** -6 * lightPrecMass lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD) heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD) svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD) xVals += [svmClassificationData] xValsNorm = svmutil.normalize_instances(xVals, svmRange) pLab = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0] print 'Pairs found. Time taken:', time.time() - t1, '\n' heavySeqMap = copy.deepcopy(seqMap['LADS Unit Test']) heavySeqMap['Mods']['N-Term'] = paramsDict['Pair Configurations'][pairConfigName]['NModSymbol'] heavySeqMap['Mods']['C-Term'] = paramsDict['Pair Configurations'][pairConfigName]['CModSymbol'] # hyperParameters = PNet.getHyperParameters(pairConfigName) # ambigPenaltyFun = DNS.getAmbigEdgePenaltyFunction(hyperParameters['minedge'], hyperParameters['ambigopen'], hyperParameters['ambigextend']) # ppmPenaltyFun = DNS.getPPMPenaltyFun(hyperParameters['ppmstd'], hashedAAs, hyperParameters['minedge'], hyperParameters['ppmpen'], 0, epStep) getSequencingThread(pairs, xVals, paramsDict, outFile, cols, pLab) # for i, pair in enumerate(pairs): # if pLab[i] == -1: # continue
) possPairs = [ (lightScanF, heavyScanF) for lightScanF in samePeptideClusters[pair[0]] for heavyScanF in samePeptideClusters[pair[1]] ] # possPairsList += [set(possPairs)] y += [1 if any([pair in progPairs[pairConfigName] for pair in possPairs]) else -1] # x += [{1: totalSharedPeaksRatio, 2: singleSymSharedPeaksRatio, 3: scanFDict[pair[0]]['precMass']}] x += [SVMClassificationInfo] # pairs[pairConfigName][pair] = getSharedPeaksRatio(lightSpec, heavySpec, pairConfig, epsilon) # print pair, pairs[pairConfigName][pair] x = svmutil.normalize_instances(x, svmRange) pLab = svmutil.svm_predict(y, x, svmModel)[0] pairs[pairConfigName] = {"test labels": pLab, "true labels": y, "pairs": testedDeltaPairs} times[pairConfigName] = time.time() - startTime # for i, pair in enumerate(testedPairs): # print pairs['same']['test labels'][i], pairs['same']['true labels'][i], pairs['same']['tested pairs'][i] # print processedInfo[progName][pairs['same']['tested pairs'][i][0]]['Peptide'], processedInfo[progName][pairs['same']['tested pairs'][i][1]]['Peptide'] for pairConfigName in paramsDict["Pair Configurations"]: truePairedScanFs = set() for pair in progPairs[pairConfigName]: truePairedScanFs.add(pair[0]) truePairedScanFs.add(pair[1]) # print 'number of true paired scanFs', len(truePairedScanFs)