Exemple #1
0
def getSamePeptideClusters(precMassClusters, scanFDict, svmModel, svmRange, ppmSTD=5, cutOff=0):
    trueClusters = []
    for cluster in precMassClusters:
        if len(cluster) == 1:
            trueClusters += [cluster]
        else:
#            print 'testing cluster', cluster
            pairIndex = []
            xVals = []
            specs = []
            for i in range(len(cluster)):
                specs +=  [DataFile.getMassIntPairs(scanFDict[cluster[i]]['dta'])]
                
            dMatrix = np.ones((len(cluster), len(cluster))) * -2
            for i in range(len(cluster)):
                for j in range(i+1, len(cluster)):
                    epSTD = ppmSTD * 10 ** -6 * scanFDict[cluster[i]]['precMass']
            
                    SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(specs[i], specs[j], scanFDict[cluster[i]]['precMass'], NMod=0, CMod=0, epsilon=2*epSTD)
                    xVals += [SVMClassificationInfo]
                    pairIndex += [(i, j)]
            
            xValsNorm = svmutil.normalize_instances(xVals, svmRange)
            pLabs = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0]
#            print pLabs
            for i, pLab in enumerate(pLabs):
            # Scale distances by 4: totalTICRatio, 1: TotalSharedPeaksRatio
                dMatrix[pairIndex[i][0]][pairIndex[i][1]] =  dMatrix[pairIndex[i][1]][pairIndex[i][0]] = xVals[i][1] if pLab==1 else -1

            trueClusters += heirarchicalClusteringAverageLinkage([[scanF] for scanF in cluster], dMatrix, cutOff=cutOff)
    
    return trueClusters
def getScanScoreDictSVM(LADSSeqInfo, seqEntry, scanFDict, svmModel, svmRange, pairConfig, PNet, desired_feats=None):
    scanScoreDict = {}

    spectrumAndPSMSpecificFeatureDict = getSpectrumAndPSMFeatureDict(LADSSeqInfo, seqEntry, scanFDict, pairConfig, PNet)
    # Now get PSM with highest rank score for each scan
    fullPSMList = LADSSeqInfo[seqEntry]
    for scan in lightScans + heavyScans:
        xVals = []
        for PSM in fullPSMList:
            featureList = spectrumAndPSMSpecificFeatureDict[(scan, PSM[:2])]
            if desired_feats != None:
                xVals += [dict((i+1, featureList[desired_feats[i] - 1]) for i in range(len(desired_feats)))]
            else:
                xVals += [dict((i+1, featureList[i]) for i in range(len(featureList)))]

        xValsNorm = svmutil.normalize_instances(xVals, svmRange)
        
        probs = zip(*svmutil.svm_predict([0] * len(xValsNorm), xValsNorm, svmModel, '-b 1')[2])[0]
        #probs = zip(*svmutil.svm_predict([0] * len(xValsNorm), xValsNorm, svmModel, '-b 1')[2])[1]
            
        highestProbInd = np.argmax(probs)
        scanScoreDict[scan] = {'Seq': (fullPSMList[highestProbInd][1], fullPSMList[highestProbInd][2]), 'Raw Score': fullPSMList[highestProbInd][0], 'Post Score': probs[highestProbInd]}

    return scanScoreDict
        for pair in pairs:
            lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]]
            heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]]
            lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]]))

            epSTD = options.ppmstd * 10 ** -6 * lightPrecMass

            lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD)
            heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD)


            svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD)
            xVals += [svmClassificationData]
        
        
        xValsNorm = svmutil.normalize_instances(xVals, svmRange)
        pLab = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0]
        
        print 'Pairs found. Time taken:', time.time() - t1, '\n'
        heavySeqMap = copy.deepcopy(seqMap['LADS Unit Test'])
        heavySeqMap['Mods']['N-Term'] = paramsDict['Pair Configurations'][pairConfigName]['NModSymbol']
        heavySeqMap['Mods']['C-Term'] = paramsDict['Pair Configurations'][pairConfigName]['CModSymbol']
        
#        hyperParameters = PNet.getHyperParameters(pairConfigName)
#        ambigPenaltyFun = DNS.getAmbigEdgePenaltyFunction(hyperParameters['minedge'], hyperParameters['ambigopen'], hyperParameters['ambigextend'])
#        ppmPenaltyFun = DNS.getPPMPenaltyFun(hyperParameters['ppmstd'], hashedAAs, hyperParameters['minedge'], hyperParameters['ppmpen'], 0, epStep)

        getSequencingThread(pairs, xVals, paramsDict, outFile, cols, pLab)
        # for i, pair in enumerate(pairs):
        #     if pLab[i] == -1:
        #         continue
Exemple #4
0
            )

            possPairs = [
                (lightScanF, heavyScanF)
                for lightScanF in samePeptideClusters[pair[0]]
                for heavyScanF in samePeptideClusters[pair[1]]
            ]
            #            possPairsList += [set(possPairs)]
            y += [1 if any([pair in progPairs[pairConfigName] for pair in possPairs]) else -1]
            # x += [{1: totalSharedPeaksRatio, 2: singleSymSharedPeaksRatio, 3: scanFDict[pair[0]]['precMass']}]
            x += [SVMClassificationInfo]

        #            pairs[pairConfigName][pair] = getSharedPeaksRatio(lightSpec, heavySpec, pairConfig, epsilon)
        #            print pair, pairs[pairConfigName][pair]

        x = svmutil.normalize_instances(x, svmRange)
        pLab = svmutil.svm_predict(y, x, svmModel)[0]
        pairs[pairConfigName] = {"test labels": pLab, "true labels": y, "pairs": testedDeltaPairs}
        times[pairConfigName] = time.time() - startTime

    #    for i, pair in enumerate(testedPairs):
    #        print pairs['same']['test labels'][i], pairs['same']['true labels'][i], pairs['same']['tested pairs'][i]
    #        print processedInfo[progName][pairs['same']['tested pairs'][i][0]]['Peptide'], processedInfo[progName][pairs['same']['tested pairs'][i][1]]['Peptide']

    for pairConfigName in paramsDict["Pair Configurations"]:
        truePairedScanFs = set()
        for pair in progPairs[pairConfigName]:
            truePairedScanFs.add(pair[0])
            truePairedScanFs.add(pair[1])

        #        print 'number of true paired scanFs', len(truePairedScanFs)