SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(specs[i], specs[j], scanFDict[cluster[i]]['precMass'], NMod=0, CMod=0, epsilon=2*epSTD)
                    seq1 = processedInfo[progName][cluster[i]][infoMap[progDict[progName]]['Peptide']]
                    seq2 = processedInfo[progName][cluster[j]][infoMap[progDict[progName]]['Peptide']]

                    xVal = 1 if seq1 == seq2 else -1
                    clusterOut.write(' '.join([str(xVal)] + ['%i:%f' % (key, SVMClassificationInfo[key]) for key in sorted(SVMClassificationInfo)]) + ' # Scans %s, %i - %s, %i\n' % (processedInfo[progName][cluster[i]][infoMap[progDict[progName]]['Peptide']], cluster[i], processedInfo[progName][cluster[j]][infoMap[progDict[progName]]['Peptide']], cluster[j]))

    clusterOut.close()

    
    for pairConfigName in paramsDict['Pair Configurations']:
        pairConfig = paramsDict['Pair Configurations'][pairConfigName]

        delta = pairConfig['NMod'] + pairConfig['CMod']
        deltaPairs = An.findDeltaPairsClusters(precMassClusters, scanFDict, delta, ppm=options.ppmstd)

        pairsOut = open(options.output + '_' + pairConfigName + '.txt', 'w')

        for pair in deltaPairs:

            epSTD = options.ppmstd * 10 ** -6 * scanFDict[precMassClusters[pair[0]][0]]['precMass']
            
            # Get all possible true pairings from database search results
            uniquePeptideDict = defaultdict(lambda: {'light': [], 'heavy': []})
            for scanF in precMassClusters[pair[0]]:
                if scanF in processedInfo[progName]:
                    uniquePeptideDict[An.stripModifications(processedInfo[progName][scanF][infoMap[progDict[progName]]['Peptide']], noRemove=['#'])]['light'] += [scanF]

            for scanF in precMassClusters[pair[1]]:
                if scanF in processedInfo[progName]:
Ejemplo n.º 2
0
    precMassClusters = Analytics.findSamePrecMassClusters(dtaList, ppm=options.ppmstd)
#    print 'precMassClusters', precMassClusters                                                                                                                                                                      
    samePeptideClusters = Analytics.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=float(paramsDict['Cluster Configuration']['cutoff']))
#    samePeptideClusters = Analytics.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=4)
#    samePeptideClusters = An.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=4)

    # To test without any clustering
    #samePeptideClusters = [[scanF] for scanF in scanFDict]
    
    for pairConfigName in paramsDict['Pair Configurations']:
        
        print 'Getting heavy-light pairs for %s' % (pairConfigName,)
        t1 = time.time()

        pairConfig = paramsDict['Pair Configurations'][pairConfigName]
        pairs = Analytics.findDeltaPairsClusters(samePeptideClusters, scanFDict, pairConfig['NMod']+pairConfig['CMod'], ppm=options.ppmstd)
        addEnds = DNS.getSpectrumGraphEndpointInitFunction(pairConfig['NStatic'], pairConfig['CStatic'], paramsDict['Enzyme']['specificity'])
        termModHash = Constants.getTermModHashForPairConfig(pairConfig)
        
        svmModel = svmutil.svm_load_model(parent + pairConfig['Model'])
        svmRange = svmutil.load_ranges(parent + os.path.splitext(pairConfig['Model'])[0] + '.range')
        
        xVals = []
        # xVals = getPairsThread(pairs)
        for pair in pairs:
            lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]]
            heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]]
            lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]]))

            epSTD = options.ppmstd * 10 ** -6 * lightPrecMass
Ejemplo n.º 3
0
        precision = int(float(TP) / (TP + FP) * 10000) / float(100)
    except ZeroDivisionError:
        precision = 100
    outFile.write("\n" + "\t".join(["TP", "FP", "TN", "FN", "Sensitivity", "Precision"]) + "\n")
    outFile.write("\t".join([str(val) for val in [TP, FP, TN, FN, sensitivity, precision]]) + "\n")

    for pairConfigName in paramsDict["Pair Configurations"]:
        pairs[pairConfigName] = {}
        pairConfig = paramsDict["Pair Configurations"][pairConfigName]
        startTime = time.time()

        svmModel = svmutil.svm_load_model(pairConfig["Model"])
        svmRange = svmutil.load_ranges(os.path.splitext(pairConfig["Model"])[0] + ".range")

        delta = pairConfig["NMod"] + pairConfig["CMod"]
        deltaPairs = An.findDeltaPairsClusters(samePeptideClusters, scanFDict, delta, ppm=options.ppmstd)
        outFile.write(
            "\nTotal number of cluster pairs considered for pair Config %s (including pairs not reported by database search results): %i\n"
            % (pairConfigName, len(deltaPairs))
        )
        x, y = [], []
        #       testedDeltaPairedScanFs = set()
        #       testedPairs = set()
        #       possPairsList = []
        testedDeltaPairs = []
        for pair in deltaPairs:
            if not (
                any([scanF in processedInfo[progName] for scanF in samePeptideClusters[pair[0]]])
                and any([scanF in processedInfo[progName] for scanF in samePeptideClusters[pair[1]]])
            ):
                continue