Beispiel #1
0
def getPairedAndUnpairedSpectra(dtaDir, dtaList, Nmod, Cmod, ppm=5, cutOff=0.1, verbose=False):
    specPairs = []
    unpairedSpecs = []
    delta = Nmod + Cmod
    for i in range(len(dtaList)):
        paired = False
        precMass1 = DataFile.getPrecMassAndCharge(dtaList[i])[0]
        spec1 = DataFile.getMassIntPairs(dtaList[i])
        for j in range(i + 1, len(dtaList)):
            precMass2 = DataFile.getPrecMassAndCharge(dtaList[j])[0]
            epsilon = ppm * 10 ** -6 * max(precMass1, precMass2)
            if np.abs(np.abs(precMass1 - precMass2) - delta) < epsilon:
                spec2 = DataFile.getMassIntPairs(dtaList[j])
                if precMass1 < precMass2:
                    N, C = SA.getNandCIons(spec1, spec2, Nmod, Cmod, epsilon=epsilon)
                    ratio = SA.getSharedPeaksRatio(spec1, spec2, N, C)
                else:
                    N, C = SA.getNandCIons(spec2, spec1, Nmod, Cmod, epsilon=epsilon)
                    ratio = SA.getSharedPeaksRatio(spec2, spec1, N, C)
                if ratio > cutOff:
                    if verbose:
                        print 'Pair found', dtaList[i], dtaList[j]
                    paired = True
                    specs = (dtaList[i], dtaList[j])
                    lightInd = int(precMass2 < precMass1)
                    specPairs.extend([(ratio, specs[lightInd], specs[1 - lightInd])])
        
        if not paired:
            unpairedSpecs.extend([dtaList[i]])
            if verbose:
                print 'No pairs for', dtaList[i]
    return specPairs, unpairedSpecs
Beispiel #2
0
def getClusterPairingStats(lightSpecs, heavySpecs, lightPrecMass, pairConfig, epSTD = 0.01):
    lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD)
    heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD)

    allPairedIonsDict = getAllPairedIonsDict(lightMergedSpec, heavyMergedSpec, lightPrecMass, pairConfig, epSTD)

    specPairedPRMs = {}
    pairTypeCount = {}
    for pairType in pairTypes:
        specPairedPRMs[pairType] = []
        pairTypeCount[pairType] = 0
        
    numLightInds = 0
    numHeavyInds = 0
    for heavyIons in allPairedIonsDict:
        deltaMasses = []
        pairType = {'light': [], 'heavy': []}
        for ion in allPairedIonsDict[heavyIons]:
            pairType['light'] += [ion[1]]
            deltaMasses += [PN.ProbNetwork.deltaRules[ion[1]](lightPrecMass-Constants.mods['H+']-Constants.mods['H2O'], lightMergedSpec[ion[0]][0], 0, 0)]
        for ion in heavyIons:
            pairType['heavy'] += [ion[1]]
            deltaMasses += [PN.ProbNetwork.deltaRules[ion[1]](lightPrecMass-Constants.mods['H+']-Constants.mods['H2O'], heavyMergedSpec[ion[0]][0], pairConfig['NMod'], pairConfig['CMod'])]
            
        pairTypeString = ''.join(pairType['light']) + '_' + ''.join(pairType['heavy'])
        specPairedPRMs[pairTypeString] += [(sum(deltaMasses)/len(deltaMasses), (allPairedIonsDict[heavyIons], heavyIons))]
        pairTypeCount[pairTypeString] += 1
        
        numLightInds += len(allPairedIonsDict[heavyIons])
        numHeavyInds += len(heavyIons)

    sharedPeaksRatio = float(numLightInds + numHeavyInds)/(lightMergedSpec.shape[0] + heavyMergedSpec.shape[0])

    return {'Cluster Paired PRM Information': specPairedPRMs, 'Shared Peaks Ratio': sharedPeaksRatio, 'Pair Type Stats': pairTypeCount, 'Light Merged Spec': lightMergedSpec, 'Heavy Merged Spec': heavyMergedSpec, 'Num Paired Ions': numLightInds + numHeavyInds}
Beispiel #3
0
def getAllPairedIonsDict(lightMergedSpec, heavyMergedSpec, lightPrecMass, pairConfig, epSTD=0.01):
 
    NTermTable, CTermTable = SA.getNandCIons(lightMergedSpec, heavyMergedSpec, Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD)
    NCrossTable, CCrossTable = SA.getCrossPairedIons(lightMergedSpec, heavyMergedSpec, lightPrecMass, Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD)
    
    NTermIonDict = SA.prepIonTableForAddition(NTermTable, ['b', 'b'])
    CTermIonDict = SA.prepIonTableForAddition(CTermTable, ['y', 'y'])
    NCrossIonDict = SA.prepIonTableForAddition(NCrossTable, ['y', 'b'])
    CCrossIonDict = SA.prepIonTableForAddition(CCrossTable, ['b', 'y'])

    return SA.addDicts(SA.reverseDict(SA.addDicts(NTermIonDict, CCrossIonDict)), SA.reverseDict(SA.addDicts(NCrossIonDict, CTermIonDict)))
def getPairs(pairs, xVals):
    for pair in pairs:
        lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]]
        heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]]
        lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]]))

        epSTD = (float(paramsDict['ppmstd']['value'])) * 10 ** -6 * lightPrecMass

        lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD)
        heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD)

        svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD)
        xVals.put([svmClassificationData])
    
    return xVals
Beispiel #5
0
def getAlignmentRatios(scanInfoFName, dtaDir, delta, epsilon=0.02):
    scanInfo = DataFile.getScanInfo(scanInfoFName)
    dtaNames = DataFile.getDTAFNamesInDir(dtaDir)
    
    scansToUse = scanInfo
    """
    for i in range(len(scanInfo) - 1):
        if (int(scanInfo[i][0]) + 1 == int(scanInfo[i+1][0])):
            if (scanInfo[i][1] == scanInfo[i+1][1]):
                scansToUse += [scanInfo[i]]
        else:
            scansToUse += [scanInfo[i]]
    """
    ratios = []
    goodRatios = []
    for i in range(len(scansToUse)):
        for j in range(i + 1, len(scansToUse)):
            if j == i + 1:
                print '%s percent done' % str(float(i) / len(scansToUse))
            if np.abs(np.abs(float(scansToUse[i][1]) - float(scansToUse[j][1])) - delta) < epsilon:
                dta1 = '244.%(scanF)04i.%(scanF)04i.1.dta' % {'scanF': int(scansToUse[i][0])}
                dta2 = '244.%(scanF)04i.%(scanF)04i.1.dta' % {'scanF': int(scansToUse[j][0])}
                spec1 = DataFile.getMassIntPairs(dtaDir + dta1)
                spec2 = DataFile.getMassIntPairs(dtaDir + dta2)
                ratio = SA.getSharedPeaksRatio(float(scansToUse[i][1]), spec1, float(scansToUse[j][1]), spec2, epsilon)
                print ratio, scansToUse[i], scansToUse[j]
                ratios.extend([(ratio, scansToUse[i], scansToUse[j])])

    with open('heavylightpairs.txt', 'w') as fout:
        pickle.dump(ratios, fout)
    return ratios
Beispiel #6
0
def getSamePeptideClusters(precMassClusters, scanFDict, svmModel, svmRange, ppmSTD=5, cutOff=0):
    trueClusters = []
    for cluster in precMassClusters:
        if len(cluster) == 1:
            trueClusters += [cluster]
        else:
#            print 'testing cluster', cluster
            pairIndex = []
            xVals = []
            specs = []
            for i in range(len(cluster)):
                specs +=  [DataFile.getMassIntPairs(scanFDict[cluster[i]]['dta'])]
                
            dMatrix = np.ones((len(cluster), len(cluster))) * -2
            for i in range(len(cluster)):
                for j in range(i+1, len(cluster)):
                    epSTD = ppmSTD * 10 ** -6 * scanFDict[cluster[i]]['precMass']
            
                    SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(specs[i], specs[j], scanFDict[cluster[i]]['precMass'], NMod=0, CMod=0, epsilon=2*epSTD)
                    xVals += [SVMClassificationInfo]
                    pairIndex += [(i, j)]
            
            xValsNorm = svmutil.normalize_instances(xVals, svmRange)
            pLabs = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0]
#            print pLabs
            for i, pLab in enumerate(pLabs):
            # Scale distances by 4: totalTICRatio, 1: TotalSharedPeaksRatio
                dMatrix[pairIndex[i][0]][pairIndex[i][1]] =  dMatrix[pairIndex[i][1]][pairIndex[i][0]] = xVals[i][1] if pLab==1 else -1

            trueClusters += heirarchicalClusteringAverageLinkage([[scanF] for scanF in cluster], dMatrix, cutOff=cutOff)
    
    return trueClusters
Beispiel #7
0
def getSharedPRMs(prmLadder1, prmLadder2, epsilon=0.5):
    hashTable = {}
    for i in range(prmLadder1.size):
        key = np.round(prmLadder1[i] / epsilon)
        hashTable[key] = [(i, prmLadder1[i])]
    
    temp = np.zeros((prmLadder2.size, 2))
    temp[:, 0] = prmLadder2
    pairedIonData = SA.getPairedIons(hashTable, temp, delta=0.0, epsilon=epsilon)
    sharedPRMs = []
    for key in sorted(pairedIonData.keys()):
        sharedPRMs += [zip(*pairedIonData[key])[1]]

    if sharedPRMs:
        return zip(*sharedPRMs)[0]
    else:
        return []
def getSharedPeaksRatio(lightPath, heavyPath, epsilon):
    lightPairs = DataFile.getMassIntPairs(lightPath)
    heavyPairs = DataFile.getMassIntPairs(heavyPath)
    N, C = SA.getNandCIons(lightPairs, heavyPairs, pairConfig['NMod'], pairConfig['CMod'], epsilon=epsilon)
    return SA.getSharedPeaksRatio(lightPairs, heavyPairs, N, C)
        addEnds = DNS.getSpectrumGraphEndpointInitFunction(pairConfig['NStatic'], pairConfig['CStatic'], paramsDict['Enzyme']['specificity'])
        termModHash = Constants.getTermModHashForPairConfig(pairConfig)
        
        svmModel = svmutil.svm_load_model(parent + pairConfig['Model'])
        svmRange = svmutil.load_ranges(parent + os.path.splitext(pairConfig['Model'])[0] + '.range')
        
        xVals = []
        # xVals = getPairsThread(pairs)
        for pair in pairs:
            lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]]
            heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]]
            lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]]))

            epSTD = options.ppmstd * 10 ** -6 * lightPrecMass

            lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2*epSTD)
            heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2*epSTD)


            svmClassificationData = SA.getSpectraPairInfoForSVMClassification(lightMergedSpec, heavyMergedSpec, lightPrecMass, NMod=pairConfig['NMod'], CMod=pairConfig['CMod'], epsilon=2*epSTD)
            xVals += [svmClassificationData]
        
        
        xValsNorm = svmutil.normalize_instances(xVals, svmRange)
        pLab = svmutil.svm_predict([0]*len(xValsNorm), xValsNorm, svmModel)[0]
        
        print 'Pairs found. Time taken:', time.time() - t1, '\n'
        heavySeqMap = copy.deepcopy(seqMap['LADS Unit Test'])
        heavySeqMap['Mods']['N-Term'] = paramsDict['Pair Configurations'][pairConfigName]['NModSymbol']
        heavySeqMap['Mods']['C-Term'] = paramsDict['Pair Configurations'][pairConfigName]['CModSymbol']
        
def getSharedPeaksRatio(lightPath, heavyPath, epsilon):
    lightPairs = DataFile.getMassIntPairs(lightPath)
    heavyPairs = DataFile.getMassIntPairs(heavyPath)
    N, C = SA.getNandCIons(lightPairs, heavyPairs, 17.0265, -16.0187, epsilon=epsilon)
    return SA.getSharedPeaksRatio(lightPairs, heavyPairs, N, C)
def getSharedPeaksRatio(lightPairs, heavyPairs, pairConfig, epsilon):
    N, C = SA.getNandCIons(lightPairs, heavyPairs, pairConfig['NMod'], pairConfig['CMod'], epsilon=epsilon)
    return SA.getSharedPeaksRatio(lightPairs, heavyPairs, N, C)
    
    for cluster in precMassClusters:
        if len(cluster) == 1:
            continue

        specs = []
        for scanF in cluster:
            specs += [DataFile.getMassIntPairs(scanFDict[scanF]['dta'])]

        for i in range(len(cluster)):
            for j in range(i+1, len(cluster)):
                if cluster[i] in processedInfo[progName] and cluster[j] in processedInfo[progName]:
                    epSTD = options.ppmstd * 10 ** -6 * scanFDict[cluster[i]]['precMass']
                
                    SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(specs[i], specs[j], scanFDict[cluster[i]]['precMass'], NMod=0, CMod=0, epsilon=2*epSTD)
                    seq1 = processedInfo[progName][cluster[i]][infoMap[progDict[progName]]['Peptide']]
                    seq2 = processedInfo[progName][cluster[j]][infoMap[progDict[progName]]['Peptide']]

                    xVal = 1 if seq1 == seq2 else -1
                    clusterOut.write(' '.join([str(xVal)] + ['%i:%f' % (key, SVMClassificationInfo[key]) for key in sorted(SVMClassificationInfo)]) + ' # Scans %s, %i - %s, %i\n' % (processedInfo[progName][cluster[i]][infoMap[progDict[progName]]['Peptide']], cluster[i], processedInfo[progName][cluster[j]][infoMap[progDict[progName]]['Peptide']], cluster[j]))

    clusterOut.close()

    
    for pairConfigName in paramsDict['Pair Configurations']:
        pairConfig = paramsDict['Pair Configurations'][pairConfigName]

        delta = pairConfig['NMod'] + pairConfig['CMod']
        deltaPairs = An.findDeltaPairsClusters(precMassClusters, scanFDict, delta, ppm=options.ppmstd)
Beispiel #13
0
            lightSpecs = [
                DataFile.getMassIntPairs(scanFDict[lightScanF]["dta"]) for lightScanF in samePeptideClusters[pair[0]]
            ]
            heavySpecs = [
                DataFile.getMassIntPairs(scanFDict[heavyScanF]["dta"]) for heavyScanF in samePeptideClusters[pair[1]]
            ]
            lightPrecMass = np.average(
                np.array([scanFDict[lightScanF]["precMass"] for lightScanF in samePeptideClusters[pair[0]]])
            )
            #           heavyPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[1]]]))
            #           print lightPrecMass, heavyPrecMass
            #           print samePeptideClusters[pair[0]], samePeptideClusters[pair[1]]
            epSTD = options.ppmstd * 10 ** -6 * lightPrecMass

            lightMergedSpec = SA.mergeSpectra(lightSpecs, epsilon=2 * epSTD)
            heavyMergedSpec = SA.mergeSpectra(heavySpecs, epsilon=2 * epSTD)

            """
            NTermTable, CTermTable = SA.getNandCIons(lightMergedSpec, heavyMergedSpec, Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD)
            NCrossTable, CCrossTable = SA.getCrossPairedIons(lightMergedSpec, heavyMergedSpec, lightPrecMass, Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD)

            NTermIonDict = prepIonTableForAddition(NTermTable, ['b', 'b'])
            CTermIonDict = prepIonTableForAddition(CTermTable, ['y', 'y'])
            NCrossIonDict = prepIonTableForAddition(NCrossTable, ['y', 'b'])
            CCrossIonDict = prepIonTableForAddition(CCrossTable, ['b', 'y'])
            
            allPairedIonsDict = addDicts(reverseDict(addDicts(NTermIonDict, CCrossIonDict)), reverseDict(addDicts(NCrossIonDict, CTermIonDict)))
            symLightInds = set()
            symHeavyInds = set()
            totalLightInds = set()