def getSamePeptideClusters(precMassClusters, scanFDict, svmModel, svmRange, ppmSTD=5, cutOff=0): trueClusters = [] for cluster in precMassClusters: if len(cluster) == 1: trueClusters += [cluster] else: # print 'testing cluster', cluster pairIndex = [] xVals = [] specs = [] for i in range(len(cluster)): specs += [DataFile.getMassIntPairs(scanFDict[cluster[i]]['dta'])] dMatrix = np.ones((len(cluster), len(cluster))) * -2 for i in range(len(cluster)): for j in range(i+1, len(cluster)): epSTD = ppmSTD * 10 ** -6 * scanFDict[cluster[i]]['precMass'] SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(specs[i], specs[j], scanFDict[cluster[i]]['precMass'], NMod=0, CMod=0, epsilon=2*epSTD) xVals += [SVMClassificationInfo] pairIndex += [(i, j)] xValsNorm = svmutil.normalize_instances(xVals, svmRange) pLabs = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0] # print pLabs for i, pLab in enumerate(pLabs): # Scale distances by 4: totalTICRatio, 1: TotalSharedPeaksRatio dMatrix[pairIndex[i][0]][pairIndex[i][1]] = dMatrix[pairIndex[i][1]][pairIndex[i][0]] = xVals[i][1] if pLab==1 else -1 trueClusters += heirarchicalClusteringAverageLinkage([[scanF] for scanF in cluster], dMatrix, cutOff=cutOff) return trueClusters
def getPairs(pairs, xVals): for pair in pairs: lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]] heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]] lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]])) epSTD = (float(paramsDict['ppmstd']['value'])) * 10 ** -6 * lightPrecMass lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD) heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD) svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD) xVals.put([svmClassificationData]) return xVals
svmRange = svmutil.load_ranges(parent + os.path.splitext(pairConfig['Model'])[0] + '.range') xVals = [] # xVals = getPairsThread(pairs) for pair in pairs: lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]] heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]] lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]])) epSTD = options.ppmstd * 10 ** -6 * lightPrecMass lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD) heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD) svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD) xVals += [svmClassificationData] xValsNorm = svmutil.normalize_instances(xVals, svmRange) pLab = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0] print 'Pairs found. Time taken:', time.time() - t1, '\n' heavySeqMap = copy.deepcopy(seqMap['LADS Unit Test']) heavySeqMap['Mods']['N-Term'] = paramsDict['Pair Configurations'][pairConfigName]['NModSymbol'] heavySeqMap['Mods']['C-Term'] = paramsDict['Pair Configurations'][pairConfigName]['CModSymbol'] # hyperParameters = PNet.getHyperParameters(pairConfigName) # ambigPenaltyFun = DNS.getAmbigEdgePenaltyFunction(hyperParameters['minedge'], hyperParameters['ambigopen'], hyperParameters['ambigextend']) # ppmPenaltyFun = DNS.getPPMPenaltyFun(hyperParameters['ppmstd'], hashedAAs, hyperParameters['minedge'], hyperParameters['ppmpen'], 0, epStep)
for cluster in precMassClusters: if len(cluster) == 1: continue specs = [] for scanF in cluster: specs += [DataFile.getMassIntPairs(scanFDict[scanF]['dta'])] for i in range(len(cluster)): for j in range(i+1, len(cluster)): if cluster[i] in processedInfo[progName] and cluster[j] in processedInfo[progName]: epSTD = options.ppmstd * 10 ** -6 * scanFDict[cluster[i]]['precMass'] SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(specs[i], specs[j], scanFDict[cluster[i]]['precMass'], NMod=0, CMod=0, epsilon=2*epSTD) seq1 = processedInfo[progName][cluster[i]][infoMap[progDict[progName]]['Peptide']] seq2 = processedInfo[progName][cluster[j]][infoMap[progDict[progName]]['Peptide']] xVal = 1 if seq1 == seq2 else -1 clusterOut.write(' '.join([str(xVal)] + ['%i:%f' % (key, SVMClassificationInfo[key]) for key in sorted(SVMClassificationInfo)]) + ' # Scans %s, %i - %s, %i\n' % (processedInfo[progName][cluster[i]][infoMap[progDict[progName]]['Peptide']], cluster[i], processedInfo[progName][cluster[j]][infoMap[progDict[progName]]['Peptide']], cluster[j])) clusterOut.close() for pairConfigName in paramsDict['Pair Configurations']: pairConfig = paramsDict['Pair Configurations'][pairConfigName] delta = pairConfig['NMod'] + pairConfig['CMod'] deltaPairs = An.findDeltaPairsClusters(precMassClusters, scanFDict, delta, ppm=options.ppmstd)
symLightInds.add(ion[0]) for ion in heavyIons: totalHeavyInds.add(ion[0]) for ion in allPairedIonsDict[heavyIons]: totalLightInds.add(ion[0]) totalNumPeaks = float(lightMergedSpec.shape[0] + heavyMergedSpec.shape[0]) totalSharedPeaksRatio = (len(totalLightInds) + len(totalHeavyInds))/totalNumPeaks singleSymSharedPeaksRatio = (len(symLightInds) + len(symHeavyInds))/totalNumPeaks doubleSymSharedPeaksRatio = (len(doubleSymLightInds) + len(doubleSymHeavyInds))/totalNumPeaks """ SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification( lightMergedSpec, heavyMergedSpec, lightPrecMass, pairConfig["NMod"], pairConfig["CMod"], epsilon=2 * epSTD, ) possPairs = [ (lightScanF, heavyScanF) for lightScanF in samePeptideClusters[pair[0]] for heavyScanF in samePeptideClusters[pair[1]] ] # possPairsList += [set(possPairs)] y += [1 if any([pair in progPairs[pairConfigName] for pair in possPairs]) else -1] # x += [{1: totalSharedPeaksRatio, 2: singleSymSharedPeaksRatio, 3: scanFDict[pair[0]]['precMass']}] x += [SVMClassificationInfo] # pairs[pairConfigName][pair] = getSharedPeaksRatio(lightSpec, heavySpec, pairConfig, epsilon)