Exemple #1
0
def getClusterPairingStats(lightSpecs, heavySpecs, lightPrecMass, pairConfig, epSTD = 0.01):
    lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD)
    heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD)

    allPairedIonsDict = getAllPairedIonsDict(lightMergedSpec, heavyMergedSpec, lightPrecMass, pairConfig, epSTD)

    specPairedPRMs = {}
    pairTypeCount = {}
    for pairType in pairTypes:
        specPairedPRMs[pairType] = []
        pairTypeCount[pairType] = 0
        
    numLightInds = 0
    numHeavyInds = 0
    for heavyIons in allPairedIonsDict:
        deltaMasses = []
        pairType = {'light': [], 'heavy': []}
        for ion in allPairedIonsDict[heavyIons]:
            pairType['light'] += [ion[1]]
            deltaMasses += [PN.ProbNetwork.deltaRules[ion[1]](lightPrecMass-Constants.mods['H+']-Constants.mods['H2O'], lightMergedSpec[ion[0]][0], 0, 0)]
        for ion in heavyIons:
            pairType['heavy'] += [ion[1]]
            deltaMasses += [PN.ProbNetwork.deltaRules[ion[1]](lightPrecMass-Constants.mods['H+']-Constants.mods['H2O'], heavyMergedSpec[ion[0]][0], pairConfig['NMod'], pairConfig['CMod'])]
            
        pairTypeString = ''.join(pairType['light']) + '_' + ''.join(pairType['heavy'])
        specPairedPRMs[pairTypeString] += [(sum(deltaMasses)/len(deltaMasses), (allPairedIonsDict[heavyIons], heavyIons))]
        pairTypeCount[pairTypeString] += 1
        
        numLightInds += len(allPairedIonsDict[heavyIons])
        numHeavyInds += len(heavyIons)

    sharedPeaksRatio = float(numLightInds + numHeavyInds)/(lightMergedSpec.shape[0] + heavyMergedSpec.shape[0])

    return {'Cluster Paired PRM Information': specPairedPRMs, 'Shared Peaks Ratio': sharedPeaksRatio, 'Pair Type Stats': pairTypeCount, 'Light Merged Spec': lightMergedSpec, 'Heavy Merged Spec': heavyMergedSpec, 'Num Paired Ions': numLightInds + numHeavyInds}
def getPairs(pairs, xVals):
    for pair in pairs:
        lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]]
        heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]]
        lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]]))

        epSTD = (float(paramsDict['ppmstd']['value'])) * 10 ** -6 * lightPrecMass

        lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD)
        heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD)

        svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD)
        xVals.put([svmClassificationData])
    
    return xVals
        addEnds = DNS.getSpectrumGraphEndpointInitFunction(pairConfig['NStatic'], pairConfig['CStatic'], paramsDict['Enzyme']['specificity'])
        termModHash = Constants.getTermModHashForPairConfig(pairConfig)
        
        svmModel = svmutil.svm_load_model(parent + pairConfig['Model'])
        svmRange = svmutil.load_ranges(parent + os.path.splitext(pairConfig['Model'])[0] + '.range')
        
        xVals = []
        # xVals = getPairsThread(pairs)
        for pair in pairs:
            lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]]
            heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]]
            lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]]))

            epSTD = options.ppmstd * 10 ** -6 * lightPrecMass

            lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD)
            heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD)


            svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD)
            xVals += [svmClassificationData]
        
        
        xValsNorm = svmutil.normalize_instances(xVals, svmRange)
        pLab = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0]
        
        print 'Pairs found. Time taken:', time.time() - t1, '\n'
        heavySeqMap = copy.deepcopy(seqMap['LADS Unit Test'])
        heavySeqMap['Mods']['N-Term'] = paramsDict['Pair Configurations'][pairConfigName]['NModSymbol']
        heavySeqMap['Mods']['C-Term'] = paramsDict['Pair Configurations'][pairConfigName]['CModSymbol']
        
            # Get all possible true pairings from database search results
            uniquePeptideDict = defaultdict(lambda: {'light': [], 'heavy': []})
            for scanF in precMassClusters[pair[0]]:
                if scanF in processedInfo[progName]:
                    uniquePeptideDict[An.stripModifications(processedInfo[progName][scanF][infoMap[progDict[progName]]['Peptide']], noRemove=['#'])]['light'] += [scanF]

            for scanF in precMassClusters[pair[1]]:
                if scanF in processedInfo[progName]:
                    uniquePeptideDict[An.stripModifications(processedInfo[progName][scanF][infoMap[progDict[progName]]['Peptide']], noRemove=['#'])]['heavy'] += [scanF]


            mergedSpecDictLight = {}
            mergedSpecDictHeavy = {}
            for peptide in uniquePeptideDict:
                if len(uniquePeptideDict[peptide]['light']) > 0:
                    mergedSpecDictLight[peptide] = SA.mergeSpectra([DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in uniquePeptideDict[peptide]['light']], epsilon = 2*epSTD)
                if len(uniquePeptideDict[peptide]['heavy']) > 0:
                    mergedSpecDictHeavy[peptide] = SA.mergeSpectra([DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in uniquePeptideDict[peptide]['heavy']], epsilon = 2*epSTD)                

            for lightPept in mergedSpecDictLight:
                for heavyPept in mergedSpecDictHeavy:

                    xVal = 1 if lightPept == heavyPept else -1
                    SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(mergedSpecDictLight[lightPept], mergedSpecDictHeavy[heavyPept], scanFDict[precMassClusters[pair[0]][0]]['precMass'], pairConfig['NMod'], pairConfig['CMod'], epsilon=2*epSTD)

                    pairsOut.write(' '.join([str(xVal)] + ['%i:%f' % (key, SVMClassificationInfo[key]) for key in sorted(SVMClassificationInfo)]) + ' # light peptide: %s, %s - heavy scans: %s, %s\n' % (lightPept, str(uniquePeptideDict[lightPept]['light']), heavyPept, str(uniquePeptideDict[heavyPept]['heavy'])))


        pairsOut.close()