def getAccAndPrecForModRefPeptide(modList, newRefEndInds, deNovoSeq, deNovoUnmodSeq, refSeq, alignedIndsMap, deNovoAmbigEdges=[]):
    prevIntervalStart = newRefEndInds['start']
    #print 'End Inds', newRefEndInds
    tempSeq = ''
    tempAmbigEdges = []
    for interval in sorted(modList):
        if 'Isobaric' not in modList[interval][0][0] and not ('Insertion' in modList[interval][0][0] and (alignedIndsMap['De Novo'][interval[0]] < 2 or (len(deNovoUnmodSeq) - alignedIndsMap['De Novo'][interval[1]]) < 2)):
            tempSeq += refSeq[prevIntervalStart:alignedIndsMap['Ref'][interval[0]]] + 'X'
            #print 'Mod list interval', modList[interval]
            tempAmbigEdges += [(0, modList[interval][0][3])]
            #print 'temp ambig edges', tempAmbigEdges
            prevIntervalStart = alignedIndsMap['Ref'][interval[1]]
    #print 'TempSeq', tempSeq, tempAmbigEdges
    tempSeq += refSeq[prevIntervalStart:(len(refSeq) + newRefEndInds['end'])]
    #print deNovoSeq, refSeq, tempSeq, deNovoAmbigEdges, tempAmbigEdges
    comp = An.comparePeptideResults(deNovoSeq, tempSeq, ambigEdges1=deNovoAmbigEdges, ambigEdges2=tempAmbigEdges, ppm=10)
    return comp[0], comp[1]
#        scanScoreDict = getScanScoreDictRankBoost(LADSSeqInfo, seqEntry, scanFDict, rankModel, pairConfigurations['lightdimethyl_heavydimethyl'], PNet)
#        scanScoreDict = getScanScoreDictClusterNormScore(LADSSeqInfo, seqEntry)

        for i, scan in enumerate(lightScans):

            scanData = {'ScanF': scan}
                        
            lightSeq = An.preprocessSequence(scanScoreDict[scan]['Seq'][0], seqMap, ambigEdges=scanScoreDict[scan]['Seq'][1])
            scanData['LADS Sequence'] = lightSeq
            scanData['LADS Ambig Edges'] = scanScoreDict[scan]['Seq'][1]
            scanData['LADS Raw Score'] = scanScoreDict[scan]['Raw Score']
            scanData['LADS Post Score'] = scanScoreDict[scan]['Post Score']
            scanData['M+H'] = scanFDict[scan]['precMass']

            try:
                comp = An.comparePeptideResults(lightSeq, SEQUESTMASCOTResults[scan]['Peptide'], ambigEdges1=scanScoreDict[scan]['Seq'][1], ambigEdges2=[], ppm=20)            
                scanData['SEQUEST XCorr'] = SEQUESTMASCOTResults[scan]['SEQUEST XCorr']
                scanData['MASCOT Ion Score'] = SEQUESTMASCOTResults[scan]['MASCOT Ion Score']
                scanData['SEQUEST MASCOT Sequence'] = SEQUESTMASCOTResults[scan]['Peptide']
                scanData['Accuracy'] = comp[0]
                scanData['Precision'] = comp[1]
            except KeyError:
                scanData['SEQUEST XCorr'] = None
                scanData['MASCOT Ion Score'] = None
                scanData['SEQUEST MASCOT Sequence'] = None
                scanData['Accuracy'] = None
                scanData['Precision'] = None
                
            outFile.write('\t'.join([str(scanData[col]) for col in cols]) + '\n')
            
        for i, scan in enumerate(heavyScans):
                    # Add path score (and normalized variants), delta rank, delta score, number of negative PRMs, and minimum node score for spectrum to feature list
                    pathScore = spectrumOrderedScoreStats[i]['Path Scores'][PSMIndexDict[PSM[:2]]]
                    numNegativePRMs = spectrumOrderedScoreStats[i]['Num Negative PRMs'][PSMIndexDict[PSM[:2]]]
                    spectrumSpecificFeatureList += [pathScore, pathScore/peptLength, pathScore/scoreStats[PSM[:2]]['Maximum Path Score'], -spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]], spectrumOrderedScoreStats[i]['Delta Scores'][PSMIndexDict[PSM[:2]]], numNegativePRMs, numNegativePRMs/float(peptLength-1), spectrumOrderedScoreStats[i]['Min Node Scores'][PSMIndexDict[PSM[:2]]]]
                    
                    # Add mass deviation from true peptide mass to feature list
                    precMass = scanFDict[scan]['precMass']
                    spectrumSpecificFeatureList += [abs(truePMs[PSM[:2]] + Constants.mods['H2O'] + Constants.mods['H+'] - precMass)]
                    
                    peakAnnotationMassOffsetStats = Discriminator.getPeakAnnotationAndMassOffsetStats(DataFile.getMassIntPairs(scanFDict[scan]['dta']), specs[i], prmLadders[PSM[:2]], pairedPRMLadder, PNet)
                    addPeakAnnotationStatsToFeatureList(PNet, peakAnnotationMassOffsetStats, spectrumSpecificFeatureList, peptLength)
                    addMassOffsetStatsToFeatureList(peakAnnotationMassOffsetStats, spectrumSpecificFeatureList)

                    spectrumSpecificFeatureList += [precMass, getChargeStateFromDTAFName(scanFDict[scan]['dta']), peptLength]
                    
                    comp = An.comparePeptideResults(lightSeq, SEQUESTMASCOTResults[scan]['Peptide'], ambigEdges1=PSM[2], ambigEdges2=[], ppm=5)
                    acc, prec = comp[0], comp[1]

#                    print lightSeq, SEQUESTMASCOTResults[scan]['Peptide']
#                    print "acc, prec", acc, prec

                    if prec < 1:
                        rank = -1
                    else:
                        rank = 1

                    writeFeatures(spectrumSpecificFeatureList, rank, 1, outFile, comment="Scan %i From DTA Directory %s" % (int(scan), dtadir))
#                    printFeatures(featureNames, spectrumSpecificFeatureList)

                for j, scan in enumerate(heavyScans):
                    if int(scan) not in SEQUESTMASCOTResults:
Ejemplo n.º 4
0
    
    for i in getAllScanF(processedInfo):
        scanData = {}
        scanData['ScanF'] = i
        for progName in progNames:
            for col in dbDict[progDict[progName]]['cols']:
                try:
                    scanData[progName + ' ' + col] = processedInfo[progName][i][col]
                except KeyError:
                    scanData[progName + ' ' + col] = None
            progPept = getPeptideData(progName, progDict, i)

            for sprogName in consensi[progName]:
                sProgPept = getPeptideData(sprogName, progDict, i)
                if progPept and sProgPept:
                    comp = An.comparePeptideResults(progPept[0], sProgPept[0], ambigEdges1=progPept[1], ambigEdges2=sProgPept[1])
                else:
                    comp = (None, None, None)
                for j, item in enumerate(['Precision', 'Accuracy', 'Consensus']):
                    scanData[progName + ' ' + sprogName + ' ' + item] = comp[j]

        outFile.write('\t'.join([str(scanData[col]) for col in cols]) + '\n')
        print '\nScan Number %i \n' % i 
        print '\t'.join(['Program', 'Peptide', 'Reference', 'Score', 'Obs M+H'])
        
        for progName in progNames:
            peptide = str(scanData[progName + ' ' + dbDict['infoMap'][progDict[progName]]['Peptide']])
            score = dbDict['infoMap'][progDict[progName]]['Score'] + ': ' + str(scanData[progName + ' ' + dbDict['infoMap'][progDict[progName]]['Score']])
            try:
                MH = str(scanData[progName + ' ' + dbDict['infoMap'][progDict[progName]]['Obs M+H']])
            except KeyError: