Example #1
0
def getSamePeptideClusters(precMassClusters, scanFDict, svmModel, svmRange, ppmSTD=5, cutOff=0):
    trueClusters = []
    for cluster in precMassClusters:
        if len(cluster) == 1:
            trueClusters += [cluster]
        else:
#            print 'testing cluster', cluster
            pairIndex = []
            xVals = []
            specs = []
            for i in range(len(cluster)):
                specs +=  [DataFile.getMassIntPairs(scanFDict[cluster[i]]['dta'])]
                
            dMatrix = np.ones((len(cluster), len(cluster))) * -2
            for i in range(len(cluster)):
                for j in range(i+1, len(cluster)):
                    epSTD = ppmSTD * 10 ** -6 * scanFDict[cluster[i]]['precMass']
            
                    SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(specs[i], specs[j], scanFDict[cluster[i]]['precMass'], NMod=0, CMod=0, epsilon=2*epSTD)
                    xVals += [SVMClassificationInfo]
                    pairIndex += [(i, j)]
            
            xValsNorm = svmutil.normalize_instances(xVals, svmRange)
            pLabs = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0]
#            print pLabs
            for i, pLab in enumerate(pLabs):
            # Scale distances by 4: totalTICRatio, 1: TotalSharedPeaksRatio
                dMatrix[pairIndex[i][0]][pairIndex[i][1]] =  dMatrix[pairIndex[i][1]][pairIndex[i][0]] = xVals[i][1] if pLab==1 else -1

            trueClusters += heirarchicalClusteringAverageLinkage([[scanF] for scanF in cluster], dMatrix, cutOff=cutOff)
    
    return trueClusters
def getPairs(pairs, xVals):
    for pair in pairs:
        lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]]
        heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]]
        lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]]))

        epSTD = (float(paramsDict['ppmstd']['value'])) * 10 ** -6 * lightPrecMass

        lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD)
        heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD)

        svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD)
        xVals.put([svmClassificationData])
    
    return xVals
        svmRange = svmutil.load_ranges(parent + os.path.splitext(pairConfig['Model'])[0] + '.range')
        
        xVals = []
        # xVals = getPairsThread(pairs)
        for pair in pairs:
            lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]]
            heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]]
            lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]]))

            epSTD = options.ppmstd * 10 ** -6 * lightPrecMass

            lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD)
            heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD)


            svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD)
            xVals += [svmClassificationData]
        
        
        xValsNorm = svmutil.normalize_instances(xVals, svmRange)
        pLab = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0]
        
        print 'Pairs found. Time taken:', time.time() - t1, '\n'
        heavySeqMap = copy.deepcopy(seqMap['LADS Unit Test'])
        heavySeqMap['Mods']['N-Term'] = paramsDict['Pair Configurations'][pairConfigName]['NModSymbol']
        heavySeqMap['Mods']['C-Term'] = paramsDict['Pair Configurations'][pairConfigName]['CModSymbol']
        
#        hyperParameters = PNet.getHyperParameters(pairConfigName)
#        ambigPenaltyFun = DNS.getAmbigEdgePenaltyFunction(hyperParameters['minedge'], hyperParameters['ambigopen'], hyperParameters['ambigextend'])
#        ppmPenaltyFun = DNS.getPPMPenaltyFun(hyperParameters['ppmstd'], hashedAAs, hyperParameters['minedge'], hyperParameters['ppmpen'], 0, epStep)
    
    for cluster in precMassClusters:
        if len(cluster) == 1:
            continue

        specs = []
        for scanF in cluster:
            specs += [DataFile.getMassIntPairs(scanFDict[scanF]['dta'])]

        for i in range(len(cluster)):
            for j in range(i+1, len(cluster)):
                if cluster[i] in processedInfo[progName] and cluster[j] in processedInfo[progName]:
                    epSTD = options.ppmstd * 10 ** -6 * scanFDict[cluster[i]]['precMass']
                
                    SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(specs[i], specs[j], scanFDict[cluster[i]]['precMass'], NMod=0, CMod=0, epsilon=2*epSTD)
                    seq1 = processedInfo[progName][cluster[i]][infoMap[progDict[progName]]['Peptide']]
                    seq2 = processedInfo[progName][cluster[j]][infoMap[progDict[progName]]['Peptide']]

                    xVal = 1 if seq1 == seq2 else -1
                    clusterOut.write(' '.join([str(xVal)] + ['%i:%f' % (key, SVMClassificationInfo[key]) for key in sorted(SVMClassificationInfo)]) + ' # Scans %s, %i - %s, %i\n' % (processedInfo[progName][cluster[i]][infoMap[progDict[progName]]['Peptide']], cluster[i], processedInfo[progName][cluster[j]][infoMap[progDict[progName]]['Peptide']], cluster[j]))

    clusterOut.close()

    
    for pairConfigName in paramsDict['Pair Configurations']:
        pairConfig = paramsDict['Pair Configurations'][pairConfigName]

        delta = pairConfig['NMod'] + pairConfig['CMod']
        deltaPairs = An.findDeltaPairsClusters(precMassClusters, scanFDict, delta, ppm=options.ppmstd)
Example #5
0
                        symLightInds.add(ion[0])
                        
                for ion in heavyIons:
                    totalHeavyInds.add(ion[0])
                for ion in allPairedIonsDict[heavyIons]:
                    totalLightInds.add(ion[0])

            totalNumPeaks = float(lightMergedSpec.shape[0] + heavyMergedSpec.shape[0])
            totalSharedPeaksRatio = (len(totalLightInds) + len(totalHeavyInds))/totalNumPeaks
            singleSymSharedPeaksRatio = (len(symLightInds) + len(symHeavyInds))/totalNumPeaks
            doubleSymSharedPeaksRatio = (len(doubleSymLightInds) + len(doubleSymHeavyInds))/totalNumPeaks
            """
            SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(
                lightMergedSpec,
                heavyMergedSpec,
                lightPrecMass,
                pairConfig["NMod"],
                pairConfig["CMod"],
                epsilon=2 * epSTD,
            )

            possPairs = [
                (lightScanF, heavyScanF)
                for lightScanF in samePeptideClusters[pair[0]]
                for heavyScanF in samePeptideClusters[pair[1]]
            ]
            #            possPairsList += [set(possPairs)]
            y += [1 if any([pair in progPairs[pairConfigName] for pair in possPairs]) else -1]
            # x += [{1: totalSharedPeaksRatio, 2: singleSymSharedPeaksRatio, 3: scanFDict[pair[0]]['precMass']}]
            x += [SVMClassificationInfo]

        #            pairs[pairConfigName][pair] = getSharedPeaksRatio(lightSpec, heavySpec, pairConfig, epsilon)