SVMClassificationInfo = SA.getSpectraPairInfoForSVMClassification(specs[i], specs[j], scanFDict[cluster[i]]['precMass'], NMod=0, CMod=0, epsilon=2*epSTD) seq1 = processedInfo[progName][cluster[i]][infoMap[progDict[progName]]['Peptide']] seq2 = processedInfo[progName][cluster[j]][infoMap[progDict[progName]]['Peptide']] xVal = 1 if seq1 == seq2 else -1 clusterOut.write(' '.join([str(xVal)] + ['%i:%f' % (key, SVMClassificationInfo[key]) for key in sorted(SVMClassificationInfo)]) + ' # Scans %s, %i - %s, %i\n' % (processedInfo[progName][cluster[i]][infoMap[progDict[progName]]['Peptide']], cluster[i], processedInfo[progName][cluster[j]][infoMap[progDict[progName]]['Peptide']], cluster[j])) clusterOut.close() for pairConfigName in paramsDict['Pair Configurations']: pairConfig = paramsDict['Pair Configurations'][pairConfigName] delta = pairConfig['NMod'] + pairConfig['CMod'] deltaPairs = An.findDeltaPairsClusters(precMassClusters, scanFDict, delta, ppm=options.ppmstd) pairsOut = open(options.output + '_' + pairConfigName + '.txt', 'w') for pair in deltaPairs: epSTD = options.ppmstd * 10 ** -6 * scanFDict[precMassClusters[pair[0]][0]]['precMass'] # Get all possible true pairings from database search results uniquePeptideDict = defaultdict(lambda: {'light': [], 'heavy': []}) for scanF in precMassClusters[pair[0]]: if scanF in processedInfo[progName]: uniquePeptideDict[An.stripModifications(processedInfo[progName][scanF][infoMap[progDict[progName]]['Peptide']], noRemove=['#'])]['light'] += [scanF] for scanF in precMassClusters[pair[1]]: if scanF in processedInfo[progName]:
precMassClusters = Analytics.findSamePrecMassClusters(dtaList, ppm=options.ppmstd) # print 'precMassClusters', precMassClusters samePeptideClusters = Analytics.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=float(paramsDict['Cluster Configuration']['cutoff'])) # samePeptideClusters = Analytics.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=4) # samePeptideClusters = An.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=4) # To test without any clustering #samePeptideClusters = [[scanF] for scanF in scanFDict] for pairConfigName in paramsDict['Pair Configurations']: print 'Getting heavy-light pairs for %s' % (pairConfigName,) t1 = time.time() pairConfig = paramsDict['Pair Configurations'][pairConfigName] pairs = Analytics.findDeltaPairsClusters(samePeptideClusters, scanFDict, pairConfig['NMod']+pairConfig['CMod'], ppm=options.ppmstd) addEnds = DNS.getSpectrumGraphEndpointInitFunction(pairConfig['NStatic'], pairConfig['CStatic'], paramsDict['Enzyme']['specificity']) termModHash = Constants.getTermModHashForPairConfig(pairConfig) svmModel = svmutil.svm_load_model(parent + pairConfig['Model']) svmRange = svmutil.load_ranges(parent + os.path.splitext(pairConfig['Model'])[0] + '.range') xVals = [] # xVals = getPairsThread(pairs) for pair in pairs: lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]] heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]] lightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]])) epSTD = options.ppmstd * 10 ** -6 * lightPrecMass
precision = int(float(TP) / (TP + FP) * 10000) / float(100) except ZeroDivisionError: precision = 100 outFile.write("\n" + "\t".join(["TP", "FP", "TN", "FN", "Sensitivity", "Precision"]) + "\n") outFile.write("\t".join([str(val) for val in [TP, FP, TN, FN, sensitivity, precision]]) + "\n") for pairConfigName in paramsDict["Pair Configurations"]: pairs[pairConfigName] = {} pairConfig = paramsDict["Pair Configurations"][pairConfigName] startTime = time.time() svmModel = svmutil.svm_load_model(pairConfig["Model"]) svmRange = svmutil.load_ranges(os.path.splitext(pairConfig["Model"])[0] + ".range") delta = pairConfig["NMod"] + pairConfig["CMod"] deltaPairs = An.findDeltaPairsClusters(samePeptideClusters, scanFDict, delta, ppm=options.ppmstd) outFile.write( "\nTotal number of cluster pairs considered for pair Config %s (including pairs not reported by database search results): %i\n" % (pairConfigName, len(deltaPairs)) ) x, y = [], [] # testedDeltaPairedScanFs = set() # testedPairs = set() # possPairsList = [] testedDeltaPairs = [] for pair in deltaPairs: if not ( any([scanF in processedInfo[progName] for scanF in samePeptideClusters[pair[0]]]) and any([scanF in processedInfo[progName] for scanF in samePeptideClusters[pair[1]]]) ): continue