Example #1
0
 def sequenceDTAs(self):
     curPairedScanData = self._indexedPairData[int(self._pairedScanListbox.curselection()[0])]
     t1 = time.time()
     if curPairedScanData['heavy'] != 'N/A':
         heavySeqMap = copy.deepcopy(self._seqMap)
         heavySeqMap['Mods']['N-Term'] = self._paramsDict['Pair Configurations'][curPairedScanData['pair configuration']]['NModSymbol']
         heavySeqMap['Mods']['C-Term'] = self._paramsDict['Pair Configurations'][curPairedScanData['pair configuration']]['CModSymbol']
         sharedInfo, starts, ends, deltas, termModHash, specs, G = DNS.initializeSpectrumGraph(self._pnet, self._paramsDict, self._scanFDict[curPairedScanData['light']]['dta'], heavyPath=self._scanFDict[curPairedScanData['heavy']]['dta'], ppm=self._ppm, usePaired=True, pairConfigName=curPairedScanData['pair configuration'], verbose=False)
         precMass = sharedInfo['lightPrecMass']
     else:
         sharedInfo, starts, ends, deltas, termModHash, specs, G = DNS.initializeSpectrumGraph(self._pnet, self._paramsDict, self._scanFDict[curPairedScanData['light']]['dta'], ppm=self._ppm, verbose=False)
         precMass = sharedInfo['precMass']
     
     epsilon = self._ppm * precMass * 10 ** -6
     paths, subG = DNS.getSpectrumGraphPaths(G, deltas, specs, starts, ends, precMass - Constants.mods['H+'] - Constants.mods['H2O'], termModHash=termModHash, unknownPenalty=self._ambigpenalty, maxEdge=self._maxedge, minEdge=self._minedge, subGraphCut=self._subgraphcut, subAlpha=0.3, alpha=self._alpha, epsilon=epsilon, aas=self._aas, verbose=False)
     seqTime = time.time() - t1
     if paths:
         seqs = []
         for path in paths:
             seqs.extend([DNS.getSequenceFromNodes(subG, path[1], precMass - Constants.mods['H+'] - Constants.mods['H2O'], termModHash)])
 
         scores = list(zip(*paths)[0])
         Ord = np.argsort(-1 * np.array(scores))
         
         ambigEdges = []
         numAmbig = 0
         for j in range(self._numseq):
             try:
                 for i in range(len(seqs[Ord[j]])):
                     if type(seqs[Ord[j]][i]) == tuple:
                         ambigEdges.extend([seqs[Ord[j]][i]])
                         numAmbig += 1
                         seqs[Ord[j]][i] = '-'
             
                 curSeq = ''.join(seqs[Ord[j]])
                 curSeq = An.preprocessSequence(curSeq, self._seqMap, ambigEdges=ambigEdges)
                 if j == 0 and curPairedScanData['heavy'] != 'N/A':
                     try:
                         curHeavySeq = An.preprocessSequence(curSeq, heavySeqMap, replaceExistingTerminalMods=True, ambigEdges=ambigEdges)
                         AAs = An.getAllAAs(curHeavySeq, ambigEdges=ambigEdges)
                         self._seqStatus.set('Paired Sequencing Successful! Heavy Sequence: %s. Time taken: %f seconds' % (curHeavySeq, seqTime))
                     except KeyError:
                         self._seqStatus.set('ERROR: Heavy Sequence %s is not a valid sequence! Time wasted: %f seconds' % (curHeavySeq, seqTime))
                 elif j == 0:
                     self._seqStatus.set('Unpaired Sequencing Successful! Time taken: %f seconds' % (seqTime))
                 
                 for labelInst in self._seqScoreData[j]['seq'].children.values():
                     labelInst.destroy()
                 self.displayConfColoredSequence(subG, self._seqScoreData[j]['seq'], paths[Ord[j]][1], curSeq, ambigEdges=ambigEdges)
                 self._seqScoreData[j]['score'].set(str(scores[Ord[j]]))
             except IndexError:
                 for labelInst in self._seqScoreData[j]['seq'].children.values():
                     labelInst.destroy()
                 self._seqScoreData[j]['score'].set('')
     else:
         self._seqStatus.set('ERROR: No Sequences Found! Time wasted: %f seconds' % seqTime)
Example #2
0
def combineSpectraCompleteLinkage(masses1, masses2, epsilon=0.04):
    combMasses = np.append(masses1, masses2)
    combMasses = np.sort(combMasses)
    clusters = DNS.getClustersCompleteLinkage(combMasses, epsilon)
    combMasses = []
    for cluster in clusters:
        if len(cluster) > 1:
            combMasses += [sum(cluster) / len(cluster)]
        else:
            combMasses += cluster
    
    return np.sort(np.array(combMasses))
def getSequencing(pair, sharedPeaks, paramsDict, outFile, res):
    global print_lock, spectrum_lock

    result = []

    scanData = {}
    lightSpecs = [DataFile.getMassIntPairs(scanFDict[lightScanF]['dta']) for lightScanF in samePeptideClusters[pair[0]]]
    heavySpecs = [DataFile.getMassIntPairs(scanFDict[heavyScanF]['dta']) for heavyScanF in samePeptideClusters[pair[1]]]
    precMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in samePeptideClusters[pair[0]]]))
    
    epMean = options.ppmsyserror * precMass * 10**-6
    epSTD = options.ppmstd * precMass * 10**-6
                
    scanData['shared peaks ratio'] = sharedPeaks

    s1 = time.time()
    sharedInfo, starts, ends, deltas, G = DNS.prepPairedSpectrumGraph(lightSpecs, heavySpecs, precMass, addEnds, ppmSTD=options.ppmstd, Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], verbose=options.verbose)
    scanData['M+H'] = precMass
    
    specs = []
    for massIntPairs in lightSpecs:
        specs += [PN.Spectrum(PNet, precMass, Nmod=0.0, Cmod=0.0, epsilon=2*epSTD, spectrum=massIntPairs)]
    for massIntPairs in heavySpecs:
        specs += [PN.Spectrum(PNet, precMass + pairConfig['NMod'] + pairConfig['CMod'], Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD, spectrum=massIntPairs)]
    for spec in specs:
        spec.initializeNoiseModel()

    # with spectrum_lock:
    temp = DNS.getSpectrumGraphDataThread(G, deltas, specs, starts, ends, precMass - Constants.mods['H+'] - Constants.mods['H2O'], ambigPenaltyFun, ppmPenaltyFun, hashedAAs, termModHash=termModHash, maxEdge=options.maxedge, minEdge=options.minedge, subGraphCut=options.subgraphcut, subAlpha=0.3, alpha=options.alpha, epMean=epMean, epSTD=epSTD, epStep=epStep, verbose=options.verbose)
    temp_scan = temp[0]
    peps = temp[1]
    scanData.update(temp_scan)
    
    scanData['pair configuration'] = pairConfigName

    with print_lock:
        print 'Now sequencing light scan(s) %s, heavy scan(s) %s with shared peaks ratio %f \n' % (str(samePeptideClusters[pair[0]]), str(samePeptideClusters[pair[1]]), scanData['shared peaks ratio'])
        # out.append('Now sequencing light scan(s) ' + str(samePeptideClusters[pair[0]]) + ', heavy scan(s) ' + str(samePeptideClusters[pair[1]]) + ' with shared peaks ratio ' + str(scanData['shared peaks ratio']) + ' \n' )
        Ord = np.argsort(-1 * np.array(scanData['over_scores']))
        if scanData['blind'] == 0:
            for i in range(min(Ord.size, 10)):
                try:
                    print 'Score: ', peps[0][Ord[i]], 'Seq: ', ''.join(peps[1][Ord[i]])
                    # out.append('Score: ' + str(peps[0][Ord[i]]) + ' Seq: ' + ''.join(peps[1][Ord[i]]))
                except TypeError:
                    print 'Score: ', peps[0][Ord[i]], 'Seq: ', peps[1][Ord[i]]
                    # out.append('Score: ' + str(peps[0][Ord[i]]) + ' Seq: ' + str(peps[1][Ord[i]]))
        elif scanData['blind'] == 1:
            for i in range(min(Ord.size, maxNum)):
                try:
                    print 'Score: ', peps[0][Ord[i]], 'Seq: ', ''.join(peps[1][Ord[i]][0]), 'Mod Names: ', peps[2][Ord[i]][1]
                    # out.append('Score: ' + str(peps[0][Ord[i]]) + ' Seq: ' + ''.join(peps[1][Ord[i]][0]) + ' Mod Names: ' + peps[2][Ord[i]][1])
                except TypeError:
                    print 'Score: ', peps[0][Ord[i]], 'Seq: ', peps[1][Ord[i]][0], 'Mod Names: ', peps[2][1]
                    # out.append('Score: ' + str(peps[0][Ord[i]]) + ' Seq: ' + peps[1][Ord[i]][0] +  ' Mod Names: ' + peps[2][1])
        
        scanData['sequencing time'] = time.time() - s1
        print '\nTime Taken:', time.time() - s1, '\n'    
    # out.append('\nTime Taken: ' + str(time.time() - s1) + '\n')

    if validateHeavySequence(scanData['seq'], heavySeqMap, scanData['ambiguous edges']):
        for scanF in samePeptideClusters[pair[0]] + samePeptideClusters[pair[1]]:
            scanFDict[scanF]['sequenced'] = True
        if options.output:
            for pair in [(lightScanF, heavyScanF) for lightScanF in samePeptideClusters[pair[0]] for heavyScanF in samePeptideClusters[pair[1]]]:
                scanData['light scan'] = int(pair[0])
                scanData['heavy scan'] = int(pair[1])                  
                # outFile.write('\t'.join([str(scanData[col]) for col in cols]) + '\n')
                # print str(scanData[col])
                res.append([str(scanData[col]) for col in cols])
        else:
            print 'WARNING: Invalid sequence! Unsuccessful sequencing of %s and %s with pair configuration %s' % (str(samePeptideClusters[pair[0]]), str(samePeptideClusters[pair[1]]), pairConfigName)

    exit(0)
        cols = ['light scan', 'heavy scan', 'pair configuration', 'M+H', 'score', 'seq', 'epsilon', 'ambiguous edges', 'num ambig edges']
    
    if options.output:
        outFile = open(options.output, 'w')
        outFile.write('\t'.join([col.upper() for col in cols]) + '\n')
    
    PNet = PN.ProbNetwork(options.config, options.model)

    dtaList = glob.glob(options.dtadir + '/*.dta')
    scanFDict = getScanFDict(dtaList)
    
    aas = Constants.addPepsToAADict(300)
    hashedAAs = Constants.hashAAsEpsilonRange(aas, epStep, maxEp)
    
    ambigOpenPenalty = 0
    ambigPenaltyFun = DNS.getAmbigEdgePenaltyFunction(options.minedge, ambigOpenPenalty, options.ambigpenalty)
    ppmPenaltyFun = DNS.getPPMPenaltyFun(options.ppmstd, hashedAAs, options.minedge, options.ppmpenalty, options.ppmsyserror, epStep)
    
    print 'Getting Clusters'
    parent = os.path.abspath(os.pardir)
    clusterSVMModel = svmutil.svm_load_model(parent + paramsDict['Cluster Configuration']['model'])
    clusterSVMRanges = svmutil.load_ranges(parent + os.path.splitext((paramsDict['Cluster Configuration']['model']))[0] + '.range')

    precMassClusters = Analytics.findSamePrecMassClusters(dtaList, ppm=options.ppmstd)
#    print 'precMassClusters', precMassClusters                                                                                                                                                                      
    samePeptideClusters = Analytics.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=float(paramsDict['Cluster Configuration']['cutoff']))
#    samePeptideClusters = Analytics.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=4)
#    samePeptideClusters = An.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=4)

    # To test without any clustering
    #samePeptideClusters = [[scanF] for scanF in scanFDict]
    if options.output:
        outFile = open(options.output, "w")
        outFile.write("\t".join([col.upper() for col in cols]) + "\n")

    t1 = time.time()
    print "Configuring LADS for sequencing..."
    ETDPNet = PN.ProbNetwork(paramsDict["Models"]["etd"]["config"], paramsDict["Models"]["etd"]["model"])
    HCDPNet = PN.ProbNetwork(paramsDict["Models"]["hcd"]["config"], paramsDict["Models"]["hcd"]["model"])
    dtaList = glob.glob(options.dtadir + "/*.dta")
    scanFDict = getScanFDict(dtaList)
    aas = Constants.addPepsToAADict(options.minedge)
    hashedAAs = Constants.hashAAsEpsilonRange(aas, epStep, maxEp)

    ambigOpenPenalty = 0
    ambigPenaltyFun = DNS.getAmbigEdgePenaltyFunction(options.minedge, ambigOpenPenalty, options.ambigpenalty)
    ppmPenaltyFun = DNS.getPPMPenaltyFun(
        options.ppmstd, hashedAAs, options.minedge, options.ppmpenalty, options.ppmsyserror, epStep
    )

    addEnds = DNS.getSpectrumGraphEndpointInitFunction(
        np.array(Constants.NTermMods.values()),
        np.array(Constants.CTermMods.values()),
        paramsDict["Enzyme"]["specificity"],
    )
    termModHash = Constants.createTermModHashAAs(
        N=copy.deepcopy(Constants.NTermMods), C=copy.deepcopy(Constants.CTermMods)
    )
    print "Getting Pairs..."
    pairs = getCIDETDPairs(scanFDict)
Example #6
0
 (paired, unpaired) = Analytics.getPairedAndUnpairedSpectra(options.dtaDir, dtaList, delta=(options.Nmod + options.Cmod), ppm=options.ppm, cutOff=options.pairCutoff)
 if options.verbose:
     t2 = time.time()
     print 'Finished getting paired spectra. Time taken: ', t2 - t1
     print 'Starting Sequencing'
 
 aas = Constants.addPepsToAADict(options.minEdge)
 for pair in paired:
     (lightSpec, heavySpec) = pair[1:]
     if options.verbose:
         print 'Now sequencing %s %s with shared peaks ratio %f' % (lightSpec, heavySpec, pair[0])
         s1 = time.time()
         
     heavyPath = heavySpec
     lightPath = lightSpec
     sharedInfo = DNS.getPairedSpectraInfoForSequencing(lightPath, heavyPath, options.verbose)
     DNS.sequencePairedSpectra(sharedInfo['NInd'], sharedInfo['CInd'], sharedInfo['lightPairs'], sharedInfo['heavyPairs'], sharedInfo['lightPrecMass'] - Constants.mods['H+'] - Constants.mods['H2O'], PNet, alpha=options.alpha, unknownPenalty=options.ambigEdgePenalty, maxEdge=options.maxEdge, minEdge=options.minEdge, Nmod=options.Nmod, Cmod=options.Cmod, aas=aas, verbose=options.verbose)
     
     if options.verbose:
         print 'Time taken:', time.time() - s1 
 
 for spec in unpaired:
     if options.verbose:
         print 'Now sequencing unpaired spectrum %s' % spec
         s1 = time.time()
         
     precMass = DataFile.getPrecMassAndCharge(spec)[0]
     pairs = DataFile.getMassIntPairs(spec)
     DNS.sequenceSingleSpectrum(pairs, precMass - Constants.mods['H+'] - Constants.mods['H2O'], PNet, alpha=options.alpha, unknownPenalty=options.ambigEdgePenalty, maxEdge=options.maxEdge, minEdge=options.minEdge, aas=aas, verbose=options.verbose)
     
     if options.verbose: