def getAccAndPrecForModRefPeptide(modList, newRefEndInds, deNovoSeq, deNovoUnmodSeq, refSeq, alignedIndsMap, deNovoAmbigEdges=[]): prevIntervalStart = newRefEndInds['start'] #print 'End Inds', newRefEndInds tempSeq = '' tempAmbigEdges = [] for interval in sorted(modList): if 'Isobaric' not in modList[interval][0][0] and not ('Insertion' in modList[interval][0][0] and (alignedIndsMap['De Novo'][interval[0]] < 2 or (len(deNovoUnmodSeq) - alignedIndsMap['De Novo'][interval[1]]) < 2)): tempSeq += refSeq[prevIntervalStart:alignedIndsMap['Ref'][interval[0]]] + 'X' #print 'Mod list interval', modList[interval] tempAmbigEdges += [(0, modList[interval][0][3])] #print 'temp ambig edges', tempAmbigEdges prevIntervalStart = alignedIndsMap['Ref'][interval[1]] #print 'TempSeq', tempSeq, tempAmbigEdges tempSeq += refSeq[prevIntervalStart:(len(refSeq) + newRefEndInds['end'])] #print deNovoSeq, refSeq, tempSeq, deNovoAmbigEdges, tempAmbigEdges comp = An.comparePeptideResults(deNovoSeq, tempSeq, ambigEdges1=deNovoAmbigEdges, ambigEdges2=tempAmbigEdges, ppm=10) return comp[0], comp[1]
# scanScoreDict = getScanScoreDictRankBoost(LADSSeqInfo, seqEntry, scanFDict, rankModel, pairConfigurations['lightdimethyl_heavydimethyl'], PNet) # scanScoreDict = getScanScoreDictClusterNormScore(LADSSeqInfo, seqEntry) for i, scan in enumerate(lightScans): scanData = {'ScanF': scan} lightSeq = An.preprocessSequence(scanScoreDict[scan]['Seq'][0], seqMap, ambigEdges=scanScoreDict[scan]['Seq'][1]) scanData['LADS Sequence'] = lightSeq scanData['LADS Ambig Edges'] = scanScoreDict[scan]['Seq'][1] scanData['LADS Raw Score'] = scanScoreDict[scan]['Raw Score'] scanData['LADS Post Score'] = scanScoreDict[scan]['Post Score'] scanData['M+H'] = scanFDict[scan]['precMass'] try: comp = An.comparePeptideResults(lightSeq, SEQUESTMASCOTResults[scan]['Peptide'], ambigEdges1=scanScoreDict[scan]['Seq'][1], ambigEdges2=[], ppm=20) scanData['SEQUEST XCorr'] = SEQUESTMASCOTResults[scan]['SEQUEST XCorr'] scanData['MASCOT Ion Score'] = SEQUESTMASCOTResults[scan]['MASCOT Ion Score'] scanData['SEQUEST MASCOT Sequence'] = SEQUESTMASCOTResults[scan]['Peptide'] scanData['Accuracy'] = comp[0] scanData['Precision'] = comp[1] except KeyError: scanData['SEQUEST XCorr'] = None scanData['MASCOT Ion Score'] = None scanData['SEQUEST MASCOT Sequence'] = None scanData['Accuracy'] = None scanData['Precision'] = None outFile.write('\t'.join([str(scanData[col]) for col in cols]) + '\n') for i, scan in enumerate(heavyScans):
# Add path score (and normalized variants), delta rank, delta score, number of negative PRMs, and minimum node score for spectrum to feature list pathScore = spectrumOrderedScoreStats[i]['Path Scores'][PSMIndexDict[PSM[:2]]] numNegativePRMs = spectrumOrderedScoreStats[i]['Num Negative PRMs'][PSMIndexDict[PSM[:2]]] spectrumSpecificFeatureList += [pathScore, pathScore/peptLength, pathScore/scoreStats[PSM[:2]]['Maximum Path Score'], -spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]], spectrumOrderedScoreStats[i]['Delta Scores'][PSMIndexDict[PSM[:2]]], numNegativePRMs, numNegativePRMs/float(peptLength-1), spectrumOrderedScoreStats[i]['Min Node Scores'][PSMIndexDict[PSM[:2]]]] # Add mass deviation from true peptide mass to feature list precMass = scanFDict[scan]['precMass'] spectrumSpecificFeatureList += [abs(truePMs[PSM[:2]] + Constants.mods['H2O'] + Constants.mods['H+'] - precMass)] peakAnnotationMassOffsetStats = Discriminator.getPeakAnnotationAndMassOffsetStats(DataFile.getMassIntPairs(scanFDict[scan]['dta']), specs[i], prmLadders[PSM[:2]], pairedPRMLadder, PNet) addPeakAnnotationStatsToFeatureList(PNet, peakAnnotationMassOffsetStats, spectrumSpecificFeatureList, peptLength) addMassOffsetStatsToFeatureList(peakAnnotationMassOffsetStats, spectrumSpecificFeatureList) spectrumSpecificFeatureList += [precMass, getChargeStateFromDTAFName(scanFDict[scan]['dta']), peptLength] comp = An.comparePeptideResults(lightSeq, SEQUESTMASCOTResults[scan]['Peptide'], ambigEdges1=PSM[2], ambigEdges2=[], ppm=5) acc, prec = comp[0], comp[1] # print lightSeq, SEQUESTMASCOTResults[scan]['Peptide'] # print "acc, prec", acc, prec if prec < 1: rank = -1 else: rank = 1 writeFeatures(spectrumSpecificFeatureList, rank, 1, outFile, comment="Scan %i From DTA Directory %s" % (int(scan), dtadir)) # printFeatures(featureNames, spectrumSpecificFeatureList) for j, scan in enumerate(heavyScans): if int(scan) not in SEQUESTMASCOTResults:
for i in getAllScanF(processedInfo): scanData = {} scanData['ScanF'] = i for progName in progNames: for col in dbDict[progDict[progName]]['cols']: try: scanData[progName + ' ' + col] = processedInfo[progName][i][col] except KeyError: scanData[progName + ' ' + col] = None progPept = getPeptideData(progName, progDict, i) for sprogName in consensi[progName]: sProgPept = getPeptideData(sprogName, progDict, i) if progPept and sProgPept: comp = An.comparePeptideResults(progPept[0], sProgPept[0], ambigEdges1=progPept[1], ambigEdges2=sProgPept[1]) else: comp = (None, None, None) for j, item in enumerate(['Precision', 'Accuracy', 'Consensus']): scanData[progName + ' ' + sprogName + ' ' + item] = comp[j] outFile.write('\t'.join([str(scanData[col]) for col in cols]) + '\n') print '\nScan Number %i \n' % i print '\t'.join(['Program', 'Peptide', 'Reference', 'Score', 'Obs M+H']) for progName in progNames: peptide = str(scanData[progName + ' ' + dbDict['infoMap'][progDict[progName]]['Peptide']]) score = dbDict['infoMap'][progDict[progName]]['Score'] + ': ' + str(scanData[progName + ' ' + dbDict['infoMap'][progDict[progName]]['Score']]) try: MH = str(scanData[progName + ' ' + dbDict['infoMap'][progDict[progName]]['Obs M+H']]) except KeyError: