Beispiel #1
0
 def setPixmapVisibility(self, bool_):
     """
     draw other peaks than the xcms peak
     
     """
     if not self.pixmaps and bool_:
         ppm = 1. if self.ref.sample.kind=='MRM' else self.ref.sample.ppm
         chrom = self.ref.sample.massExtraction(self.ref.mass(), ppm, asChromatogram=True) \
         if self.flags == 'peak' else self.ref
         chrom.findNonXCMSPeaks()
         for p in chrom.peaks.ipeaks():
             if self.flags == 'peak':
                 diff=(p.height*10)/100
                 if abs(p.height-self.ref.height) < diff:
                     continue #we assume that they are the same peaks
             pix=PeakIndicator(p, icon='flags')
             #self.connect(pix, SIGNAL("highlightRequested"), c.setHighlighted)
             self.connect(pix, SIGNAL('updateBarPlot'), self.barPlot.setPeakGroup)
             pix.setPos(p.rt, p.height + (p.height * 10) / 100.)
             pix.setZValue(1000)
             self.pixmaps.append(pix)
             self.pw.addItem(pix)                
     if self.pixmaps:
         for t in self.pixmaps:
             t.setVisible(bool_)
Beispiel #2
0
 def setPixmapVisibility(self, bool_):
     """
     draw other peaks than the xcms peak
     
     """
     if not self.pixmaps and bool_:
         ppm = 1. if self.ref.sample.kind == 'MRM' else self.ref.sample.ppm
         chrom = self.ref.sample.massExtraction(self.ref.mass(), ppm, asChromatogram=True) \
         if self.flags == 'peak' else self.ref
         chrom.findNonXCMSPeaks()
         for p in chrom.peaks.ipeaks():
             if self.flags == 'peak':
                 diff = (p.height * 10) / 100
                 if abs(p.height - self.ref.height) < diff:
                     continue  #we assume that they are the same peaks
             pix = PeakIndicator(p, icon='flags')
             #self.connect(pix, SIGNAL("highlightRequested"), c.setHighlighted)
             self.connect(pix, SIGNAL('updateBarPlot'),
                          self.barPlot.setPeakGroup)
             pix.setPos(p.rt, p.height + (p.height * 10) / 100.)
             pix.setZValue(1000)
             self.pixmaps.append(pix)
             self.pw.addItem(pix)
     if self.pixmaps:
         for t in self.pixmaps:
             t.setVisible(bool_)
Beispiel #3
0
 def drawSpectrumByTime(self, t, sample):
     if not sample or not t:
         print "unknown error..."
         return
     spectra = sample.spectraInRTRange(t.x(), t.x() - 2., t.x() + 2.)
     if not spectra:
         print "No spectrum found at this retention time"
         return
     closest = sorted(spectra, key=lambda x: abs(t.x() - x.rtmin))[0]
     self.subsidiaryWidget.pw.clear()
     self.subsidiaryWidget._plotting([closest])
     self.subsidiaryWidget.pw.setTitle("Spectrum@%s" % (str(closest.rtmin)))
     self.subsidiaryWidget.show()
Beispiel #4
0
 def drawSpectrumByTime(self, t, sample):
     if not sample or not t:
         print "unknown error..."            
         return
     spectra = sample.spectraInRTRange(t.x(), t.x()-2., t.x()+2.)
     if not spectra:
         print "No spectrum found at this retention time"
         return
     closest = sorted(spectra, key=lambda x: abs(t.x()-x.rtmin))[0]
     self.subsidiaryWidget.pw.clear()        
     self.subsidiaryWidget._plotting([closest])
     self.subsidiaryWidget.pw.setTitle("Spectrum@%s"%(str(closest.rtmin)))                        
     self.subsidiaryWidget.show()
Beispiel #5
0
def clusteringBASIC(peaks, adds, **k):
    
    if not peaks:
        return
    t=time.clock()
    errorRt = k.get('rtError', 6)
    #ppm = float(kwargs.get('ppm'))/10**6
    ppm = k.get('ppm')
    if ppm is None:
        try:        
            ppm = peaks[0].sample.ppm/1e6
        except AttributeError:
            print "No value found for ppm setting to 10/1E6"
            ppm = 10./1e6
    #mode = k.get('mode', 'HighRes')
    resolveConflicts=k.get('resolveConflicts', False)
    addsToCheck=np.array(adds.keys())
    
    adductsFound = MSPeakList()    
    for i, p in enumerate(peaks):
        a = MSClusterList()        
        for v in addsToCheck:
            m = p.mz+v[0]
            match = peaks.peaksInMZRTRange(m, p.rt, errorRt, deltam= 2 * ppm * m)
            if match is None or not match:
                continue
            #take the closest in mass
            goodP = sorted(match, key=lambda x:abs(x.mz - (p.mz + v[0])))[0]
            #if goodP in set(adductsFound):
            #    if resolveConflicts:
            #        pass
            #else:
            if goodP is p:
                continue
            a.append(goodP)
            goodP.parentPeak=p
            adductsFound.append(goodP)
        p.fragCluster=MSPeakList(set(a))#prevent from duplicates
        
#    def clusterComparison(list_):#receive a list of peak with clusters identified
#        """
#        return the best peak
#        WARNING: p_ydata and p_.y_data are None
#        TODO: 
#        
#        """        
#        sortedList = sorted(list_, key=lambda x: len(x.fragCluster))
#        longest=len(sortedList[-1].fragCluster)
#        sameSizePeaks=MSPeakList()        
#        
#        for p in sortedList:
#            if len(p.fragCluster) == longest:
#                sameSizePeaks.append(p)
#        
#        if len(sameSizePeaks) == 1:
#            return sameSizePeaks[0]
#        corr=np.array([0.] * len(sameSizePeaks))
#        #for i, p in enumerate(sameSizePeaks):
#        #    for p_ in p.fragCluster:
#        #        corr[i] += r_coef(p_.y_data, p.y_data)
#        m=max_f(corr)
#        return sameSizePeaks[np.where(corr == m)[0][0]]
#        
#        if resolveConflicts:
#            for add in set(adductsFound):
#                if len(add.parentPeak) <= 1:
#                    #print "%s belong to several fragCluster"%str(add)
#                    continue
#                #print "%s belong to several fragCluster"%str(add)
#                goodParent=clusterComparison(add.parentPeak)
#                #if goodParent is not None:
#                #    add.parentPeak = [goodParent]            
#                
#                for parent in add.parentPeak:
#                    if parent != goodParent:
#                        try:
#                            parent.fragCluster.remove(add)
#                        except ValueError:
#                            print "Error removing %s from fragCluster of %s"%(str(add), str(parent))
#                add.parentPeak = [goodParent] #the same of constructing a list 'toRemove then remove
#                #print "after removing len add.parentPeak", len(add.parentPeak)
    print "TiemElapsed: %s"%str(time.clock()-t)
    return peaks, adductsFound
Beispiel #6
0
def clusteringCAMERA(peaks, adducts, **kwargs):
    """
    arguments needed:
        error_rt:rt_ drift
        ppm:precision
        useCorrelation: if we calculate correlations
    """
    t=time.clock()
    #unpack parameters
    error_rt = kwargs.get('rtError', 6)
    #ppm = float(kwargs.get('ppm'))/10**6
    ppm=peaks[0].sample.ppm/1e6
    mode=kwargs.get('mode', 'HighRes')
    resolveConflicts=kwargs.get('resolveConflicts', False)
    peaks_with_iso=peaks
    print "peaklist length",len(peaks)
    adducts_to_check=np.array(adducts.keys())
    #===========================================================================
    #START CAMERA ALGORITHM
    print ("RT Grouping ...")
    #RT_peak=peaks_with_iso.rtClustering(error_rt)
    #3,find for each peak peaks which matches with retention time
    rtPeak =[]
    for i, peak in enumerate(peaks_with_iso.ipeaks()):
        
        l=MSPeakList()
        l.addPeak(peak)
        for j, peak_ in enumerate(peaks_with_iso.ipeaks()):
            if i!=j:
                if abs(peak.rt - peak_.rt) < error_rt:
                    l.append(peak_)
        isIncluded=False
        index=[]        
        for k, rtClust in enumerate(rtPeak):
            if  set(l)<=(set(rtClust)):#inclusion test of l already in rt ? seen as 'equivalent to'
                isIncluded=True
                break
            
            if set(rtClust) <= (set(l)):
                index.append(k)
                #break
        #del rtPeak[index]
        rtPeak= [rtPeak[i] for i in xrange(len(rtPeak)) if i not in index] 
        if not isIncluded:        
            rtPeak.append(MSPeakList(l))
                #isIncluded=True
            #else:
            #    if rtClust.__eq__(l):
            #        rtPeak[k]=l
            #        break                    
                    #isIncluded=True
        #if not isIncluded:
            #l.sort(key=lambda x:x.mass())
            
    
#    with open('test1.txt', 'w') as f:
#        for r in rtPeak:
#            s=""
#            for i, p in enumerate(r):
#                s+=str(p)+';' if i<len(r)-1 else str(p)+'\n'
#            f.write(s)

    #EXPERIMENTAL CODE            
#    cl=[]
#    for cluster in rtPeak:
#        list_=[];datapoints={}
#        for i, p in enumerate(cluster):
#            correspondingPeaks=set()
#            correspondingPeaks.add(p)
#            for j in xrange(i+1, len(cluster)):
#                #put caching on that to avoid recalculation each time of the datapoints
#                try:
#                    r=r_coef(list(datapoints[p]), list(datapoints[cluster[j]]))
#                except KeyError:
#                    y, y_= None, None                    
#                    try:
#                        y=datapoints[p]                        
#                    except KeyError:                        
#                        x, y= massExtractionBisectAlgo(p.sample,p.mass(), ppm)
#                        datapoints[p]=y
#                        
#                    try:
#                        y_=datapoints[cluster[j]]
#                    except KeyError:
#                        x, y_= massExtractionBisectAlgo(cluster[j].sample, cluster[j].mass(), ppm)
#                        datapoints[cluster[j]]=y_
#                    r=r_coef(y, y_)
#                if r >= threshold:
#                    correspondingPeaks.add(cluster[j])
#            list_.append(correspondingPeaks)
#        
#        for i, p in enumerate(list_):
#            for j in xrange(i+1, len(list_)):
#                if list_[j].issubset(p):
#                    continue
#                else:
#                    cl.append(MSPeakList(list(p)))
    #merging step again
#    print "cluster length, same without replicates",len(cl), len(set(map(set, [x for x in cl])))
#    with open('test2.txt', 'w') as f:
#        for r in cl:
#            s=""
#            for i, p in enumerate(r):
#                s+=str(p)+';' if i<len(r)-1 else str(p)+'\n'
#            f.write(s)
#            
    #END EXPERIMENTAL CODE
    print 'len RTpeak', len(rtPeak)
    print ("Creating possible M0...")
    #Cython code
    finalList = massGenPerGroup(rtPeak, adducts_to_check, ppm)   
    print("Mapping of calculated mass on peaklist...")
    #4,see if one matches with peak in the raw peaklist
    goodPeak=[]#list will contain good peak per rtCluster    
    for i, dic in enumerate(finalList):
        matchingMass=defaultdict(list)
        for mass in dic.iterkeys():
            p = rtPeak[i].peaksInMZRange(mass, deltam=mass * ppm if mode=='HighRes' else 1.)#rtPeak[i] not necessarily sorted warning
            if not p:
                continue
            peak=sorted(p, key=lambda x:abs(mass - x.mass()))[0]
            #if peak not in matchingMass.keys():#may avoid this to see if one peak appears several times !then do 'set'
            #    matchingMass[peak]=[]                    
            matchingMass[peak] += dic[mass]
        goodPeak.append(matchingMass)
    
    #start new stuffs here
    print ("Merging informations...")
    #conflicts=False
    adds=MSPeakList()#object sor storing adducts found    
    newGoodPeaks=defaultdict(list)#{}
    for peaksInOneRtGroup in goodPeak:
        for peak in peaksInOneRtGroup.iterkeys():
            newGoodPeaks[peak] += peaksInOneRtGroup[peak]            
    for p in newGoodPeaks.iterkeys():
        p.fragCluster=MSClusterList(list(set(newGoodPeaks[p])))
        for f in p.fragCluster:
            f.parentPeak.append(p)
        adds += p.fragCluster
    finalPeaks=MSPeakList(newGoodPeaks.keys())
        
    print ("Resolving conflicts if any...")
    #removing peak that appears many times that is to say in different clusters
    def clusterComparison(list_):#receive a list of peak with clusters identified
        """
        return the best peak
        WARNING: p_ydata and p_.y_data are None
        TODO: 
        
        """        
        sortedList = sorted(list_, key=lambda x: len(x.fragCluster))
        longest=len(sortedList[-1].fragCluster)
        sameSizePeaks=MSPeakList()        
        
        for p in sortedList:
            if len(p.fragCluster) == longest:
                sameSizePeaks.append(p)
        
        if len(sameSizePeaks) == 1:
            return sameSizePeaks[0]
        corr=np.array([0.] * len(sameSizePeaks))
        #for i, p in enumerate(sameSizePeaks):
        #    for p_ in p.fragCluster:
        #        corr[i] += r_coef(p_.y_data, p.y_data)
        m=max_f(corr)
        return sameSizePeaks[np.where(corr == m)[0][0]]
                
    if resolveConflicts:
        for add in set(adds):
            if len(add.parentPeak) <= 1:
                #print "%s belong to several fragCluster"%str(add)
                continue
            #print "%s belong to several fragCluster"%str(add)
            goodParent=clusterComparison(add.parentPeak)
            #if goodParent is not None:
            #    add.parentPeak = [goodParent]            
            
            for parent in add.parentPeak:
                if parent != goodParent:
                    try:
                        parent.fragCluster.remove(add)
                    except ValueError:
                        print "Error removing %s from fragCluster of %s"%(str(add), str(parent))
            add.parentPeak = [goodParent] #the same of constructing a list 'toRemove then remove
            #print "after removing len add.parentPeak", len(add.parentPeak)
                    

    #make the annotation
    for peak in finalPeaks.ipeaks():
        for f in peak.fragCluster:
            #results = makeAnnotations(adducts_to_check, adducts, f.mass(), ppm)
            for annot in adducts.iterkeys():
                p = f.mass() / annot[1] + annot[0]
                diff = peak.mass()*ppm if mode =='HighRes' else 1
                if peak.mass() > p-diff and peak.mass() < p+diff:
                    f.annotation[annot]=adducts[annot]
                    break
    finalPeaks=checkingSons(finalPeaks)
    #5,second filter, correlation on the isotopic cluster between samples
#    if useCorrelation:
#        print "Calculating correlation between samples..."
#        interSamplesCorr(spl, **kwargs)
#        print  "Calculating correlation intra sample..."
#        intraSampleCorr(spl)
#    #6 merging
    print "Merging interesting peaks"
    for peak in peaks_with_iso.ipeaks():#wring merging must take out those which allow to construct this peak
        if peak not in finalPeaks and peak not in adds:#matching_peaks:
            finalPeaks.append(peak) #matching_peaks to
    if not finalPeaks:
        print ("no cluster found, please increase the ppm, or rt drift parameters")
    print ("finished, time elapsed:",time.clock()-t)
    return MSPeakList(sorted(finalPeaks, key=lambda x:x.mass)), adds#checkingSons(finalPeaks), adds
Beispiel #7
0
def isotopicPeakListFinder(peaks, isomasses, **kwargs):
    """
    assign an isotopic cluster for each peak, and try to find an idms
     we may use a system like the CAMERA algorithm to see...
    input:
        list of peak must an obj.MSPeakList object
        clusterLength = 6  never go to six in LOW_RES
                        size expected of an isotopic cluster
        rtError: maximum drift of the retention time
        decreaseOrder: allow or not allow that the successive peak of the isotopic cluster
                        intensity are going down, can be confusing for finding idms
    output:
        two MSPeakList, the first one corresponding to the peaks with an isotopic cluster
        and the other one all peaks belonging to an isotopic cluster
    """
   
    #unpacking parameters
    print "Isotopic cluster calculation..."
    
    rtError = np.float(kwargs.get('rtError', 6))
    ppm=np.float(peaks[0].sample.ppm/1e6)
    MAX_GAP_ALLOWED = np.int(len(isomasses))
    decreaseOrder = kwargs.get('decreaseOrder', True)  #we use the less restrictive...
    mode =  kwargs.get('mode', 'Highres')
    #sort isomasses
    #isomasses = sorted(isomasses, key=lambda x:x[0])
    
    peaks_with_iso =MSPeakList()               
    peaks_without_iso = MSPeakList()#peaks without isotopic cluster but which does not have a isotopic cluster
    list_iso = set()#MSPeakList()

    t = time.clock()    
    
    for peak in peaks.ipeaks():#iterating over peaks
        
        if peak in list_iso:
            continue#avoid to calculate for every peaks
        
        isoCluster= MSClusterList()
        gap = 0
        #isos = resolutionAdjustment(isomasses, peak.mass()*ppm) if mode=='HighRes' else isomasses
        for i, isomass in enumerate(sorted(isomasses, key=lambda x:x[0])):
            #pic = _getMatchingPeaks(peaks, peak, isomass[0], ppm, rtError)
            
            mass=isomass[0]
            massToCheck=peak.mass()+mass
                        
            p = peaks.peaksInMZRange(massToCheck, deltam=ppm*massToCheck if mode=='HighRes' else 1.) #deltart
            matchingRtPeaks = MSPeakList()#will contain all matching peak in rt
            for pk in p.ipeaks():
                if pk != peak:
                    if abs(peak.rt - pk.rt) <= rtError:
                        matchingRtPeaks.append(pk)
            
            if matchingRtPeaks:
         
                pic = sorted(matchingRtPeaks, key=lambda pics: abs(pics.mass()-peak.mass()))[0] #take the closest in mass
                if pic is not None:
                    if decreaseOrder:#we want peak area inferior a peak
                        #if isoCluster:
                        areaToCompare=isoCluster[-1].area if isoCluster else peak.area 
                        if areaToCompare < pic.area:#idms found ???
                           break
                      
                    if pic not in list_iso:#pic not in isoCluster and
                        isoCluster.append(pic)
                        list_iso.add(pic)
            else:
                gap+=1
                if gap >=MAX_GAP_ALLOWED:
                    break
        
        # #set parent for all peaks found
        if isoCluster:
            for pics in isoCluster:
                #pics.parentPeak=peak
                pics.parentPeak.append(peak)
            peak.isoCluster = isoCluster
            peaks_with_iso.addPeak(peak)
        else:
            peaks_without_iso.addPeak(peak)
      
    
#    for p in peaks.ipeaks():
#        if p not in peaks_with_iso and p not in list_iso:
#            peaks_without_iso.addPeak(p)
    
    
    print time.clock()-t
    print "peaks with isotopes: " ,len(peaks_with_iso)
    print "list isotopes: " ,len(list_iso)
    print "peaks without isotopes: " ,len(peaks_without_iso)
    return peaks_with_iso+peaks_without_iso, list_iso
Beispiel #8
0
def inSpectraFinder(peaks, isomasses, **k):
    """
    complementary algorihtm to look for isotopic 
    cluster in the sepctra rather than in the peak
    list, should be more effective
    """

    ppm=np.float(peaks[0].sample.ppm/1e6)        
    decreaseOrder = k.get('decreaseOrder', False) #we use the less restrictive...
    mode=peaks[0].sample.kind    
    MAX_GAP_ALLOWED = np.int(k.get('gap', 0) +1) #a gap definition to avoid gap 
    isomasses = sorted(isomasses, key=lambda x:x[0])
    
    for peak in peaks.ipeaks():#generator
        isores=[]#dict containing results for each spectra
        for i, s in enumerate(peak.ispectra()):
            
            gap=np.int(0)
            isos = defaultdict(float)#OrderedDict()
            #for i in range(1,3):#some testing if we are close or far
            p=s.massPeakInRange(peak.mass(), ppm * peak.mass())#list pairs(mass, intensity)
            
            if not p:
                #count+=1
                continue
        
            pmass, pintensity = sorted(p, key=lambda x: abs(x[0]-peak.mass()))[0]            
            
            #print "setting the base peak"
            isos[pmass]+=pintensity#= #setting the base peak
            #isores.append(matched)            
            
            if mode == 'Highres':
                errormass=(pmass*ppm)/1e6
                adjustedIsos = resolutionAdjustment(isomasses, errormass) if mode=='HighRes' else isomasses
            else:
                adjustedIsos = isomasses
            
            for isomass in adjustedIsos:
                #for i in range(1,3):
                m=s.massPeakInRange(pmass+isomass[0], (pmass+isomass[0])*ppm)
                    #if m:break
                #if m.size:
                if m:
                    mass = sorted(m, key=lambda x: abs(x[0]-(pmass+isomass[0])))[0]
                    if decreaseOrder:#majority
                        if isos[isos.keys()[-1]] < mass[1]:
                        #if isores[-1]<mass[1]:
                            break#idms found ???
                    isos[mass[0]]+=mass[1] #= #mass:intensity pairs 
                    #isores.append(mass)
                else:#no peak found in this spectra
                    gap+=1
                    if gap >MAX_GAP_ALLOWED:
                        break
            isores.append(isos) 
        #isores=resolutionAdjustment(isores, ppm, adaptError=True)
        if isores:
            s=sorted(isores, key=lambda x:len(x))
            ref, isoext = s[-1], s[:-1] #take the longest one
            #if ref:
            #isoext = [x for x in isores if x != ref] #extraction
            for d in (x for x in isoext):#xiterates over dictionnaries
                
                checked=set()
                
                for mp in d.iterkeys():#iterates over masses
                    mp_included =False
                    
                    for mprime in ref.iterkeys():#iterate over reference
                        t = ppm * max(mprime,mp) if mode =='HighRes' else 2
                        if abs(mprime-mp) < t:#ppm * max(mprime,mp) :#mError#considering they are the same, change 
                            
                            if mprime in checked:
                                continue
                            #if mprime not in checked:
                            ref[mprime]+=d[mp]
                            checked.add(mprime)
                            mp_included=True
                            break
                    
                    if not mp_included:
                        ref[mp]+=d[mp]
            #setting the isotopes
            peak.isoSpectra= ref.items()#[(key, val) for key, val in ref.iteritems()]
        else:
            print ("no masspeak found in corresponding spectra for peak %s"%str(peak))
            print ("This peak may not be good !")
            peak.isGood=False
    return peaks
Beispiel #9
0
def clusteringBASIC(peaks, adds, **k):

    if not peaks:
        return
    t = time.clock()
    errorRt = k.get('rtError', 6)
    #ppm = float(kwargs.get('ppm'))/10**6
    ppm = k.get('ppm')
    if ppm is None:
        try:
            ppm = peaks[0].sample.ppm / 1e6
        except AttributeError:
            print "No value found for ppm setting to 10/1E6"
            ppm = 10. / 1e6
    #mode = k.get('mode', 'HighRes')
    resolveConflicts = k.get('resolveConflicts', False)
    addsToCheck = np.array(adds.keys())

    adductsFound = MSPeakList()
    for i, p in enumerate(peaks):
        a = MSClusterList()
        for v in addsToCheck:
            m = p.mz + v[0]
            match = peaks.peaksInMZRTRange(m,
                                           p.rt,
                                           errorRt,
                                           deltam=2 * ppm * m)
            if match is None or not match:
                continue
            #take the closest in mass
            goodP = sorted(match, key=lambda x: abs(x.mz - (p.mz + v[0])))[0]
            #if goodP in set(adductsFound):
            #    if resolveConflicts:
            #        pass
            #else:
            if goodP is p:
                continue
            a.append(goodP)
            goodP.parentPeak = p
            adductsFound.append(goodP)
        p.fragCluster = MSPeakList(set(a))  #prevent from duplicates

#    def clusterComparison(list_):#receive a list of peak with clusters identified
#        """
#        return the best peak
#        WARNING: p_ydata and p_.y_data are None
#        TODO:
#
#        """
#        sortedList = sorted(list_, key=lambda x: len(x.fragCluster))
#        longest=len(sortedList[-1].fragCluster)
#        sameSizePeaks=MSPeakList()
#
#        for p in sortedList:
#            if len(p.fragCluster) == longest:
#                sameSizePeaks.append(p)
#
#        if len(sameSizePeaks) == 1:
#            return sameSizePeaks[0]
#        corr=np.array([0.] * len(sameSizePeaks))
#        #for i, p in enumerate(sameSizePeaks):
#        #    for p_ in p.fragCluster:
#        #        corr[i] += r_coef(p_.y_data, p.y_data)
#        m=max_f(corr)
#        return sameSizePeaks[np.where(corr == m)[0][0]]
#
#        if resolveConflicts:
#            for add in set(adductsFound):
#                if len(add.parentPeak) <= 1:
#                    #print "%s belong to several fragCluster"%str(add)
#                    continue
#                #print "%s belong to several fragCluster"%str(add)
#                goodParent=clusterComparison(add.parentPeak)
#                #if goodParent is not None:
#                #    add.parentPeak = [goodParent]
#
#                for parent in add.parentPeak:
#                    if parent != goodParent:
#                        try:
#                            parent.fragCluster.remove(add)
#                        except ValueError:
#                            print "Error removing %s from fragCluster of %s"%(str(add), str(parent))
#                add.parentPeak = [goodParent] #the same of constructing a list 'toRemove then remove
#                #print "after removing len add.parentPeak", len(add.parentPeak)
    print "TiemElapsed: %s" % str(time.clock() - t)
    return peaks, adductsFound
Beispiel #10
0
def clusteringCAMERA(peaks, adducts, **kwargs):
    """
    arguments needed:
        error_rt:rt_ drift
        ppm:precision
        useCorrelation: if we calculate correlations
    """
    t = time.clock()
    #unpack parameters
    error_rt = kwargs.get('rtError', 6)
    #ppm = float(kwargs.get('ppm'))/10**6
    ppm = peaks[0].sample.ppm / 1e6
    mode = kwargs.get('mode', 'HighRes')
    resolveConflicts = kwargs.get('resolveConflicts', False)
    peaks_with_iso = peaks
    print "peaklist length", len(peaks)
    adducts_to_check = np.array(adducts.keys())
    #===========================================================================
    #START CAMERA ALGORITHM
    print("RT Grouping ...")
    #RT_peak=peaks_with_iso.rtClustering(error_rt)
    #3,find for each peak peaks which matches with retention time
    rtPeak = []
    for i, peak in enumerate(peaks_with_iso.ipeaks()):

        l = MSPeakList()
        l.addPeak(peak)
        for j, peak_ in enumerate(peaks_with_iso.ipeaks()):
            if i != j:
                if abs(peak.rt - peak_.rt) < error_rt:
                    l.append(peak_)
        isIncluded = False
        index = []
        for k, rtClust in enumerate(rtPeak):
            if set(l) <= (
                    set(rtClust)
            ):  #inclusion test of l already in rt ? seen as 'equivalent to'
                isIncluded = True
                break

            if set(rtClust) <= (set(l)):
                index.append(k)
                #break
        #del rtPeak[index]
        rtPeak = [rtPeak[i] for i in xrange(len(rtPeak)) if i not in index]
        if not isIncluded:
            rtPeak.append(MSPeakList(l))
            #isIncluded=True
            #else:
            #    if rtClust.__eq__(l):
            #        rtPeak[k]=l
            #        break
            #isIncluded=True
        #if not isIncluded:
        #l.sort(key=lambda x:x.mass())


#    with open('test1.txt', 'w') as f:
#        for r in rtPeak:
#            s=""
#            for i, p in enumerate(r):
#                s+=str(p)+';' if i<len(r)-1 else str(p)+'\n'
#            f.write(s)

#EXPERIMENTAL CODE
#    cl=[]
#    for cluster in rtPeak:
#        list_=[];datapoints={}
#        for i, p in enumerate(cluster):
#            correspondingPeaks=set()
#            correspondingPeaks.add(p)
#            for j in xrange(i+1, len(cluster)):
#                #put caching on that to avoid recalculation each time of the datapoints
#                try:
#                    r=r_coef(list(datapoints[p]), list(datapoints[cluster[j]]))
#                except KeyError:
#                    y, y_= None, None
#                    try:
#                        y=datapoints[p]
#                    except KeyError:
#                        x, y= massExtractionBisectAlgo(p.sample,p.mass(), ppm)
#                        datapoints[p]=y
#
#                    try:
#                        y_=datapoints[cluster[j]]
#                    except KeyError:
#                        x, y_= massExtractionBisectAlgo(cluster[j].sample, cluster[j].mass(), ppm)
#                        datapoints[cluster[j]]=y_
#                    r=r_coef(y, y_)
#                if r >= threshold:
#                    correspondingPeaks.add(cluster[j])
#            list_.append(correspondingPeaks)
#
#        for i, p in enumerate(list_):
#            for j in xrange(i+1, len(list_)):
#                if list_[j].issubset(p):
#                    continue
#                else:
#                    cl.append(MSPeakList(list(p)))
#merging step again
#    print "cluster length, same without replicates",len(cl), len(set(map(set, [x for x in cl])))
#    with open('test2.txt', 'w') as f:
#        for r in cl:
#            s=""
#            for i, p in enumerate(r):
#                s+=str(p)+';' if i<len(r)-1 else str(p)+'\n'
#            f.write(s)
#
#END EXPERIMENTAL CODE
    print 'len RTpeak', len(rtPeak)
    print("Creating possible M0...")
    #Cython code
    finalList = massGenPerGroup(rtPeak, adducts_to_check, ppm)
    print("Mapping of calculated mass on peaklist...")
    #4,see if one matches with peak in the raw peaklist
    goodPeak = []  #list will contain good peak per rtCluster
    for i, dic in enumerate(finalList):
        matchingMass = defaultdict(list)
        for mass in dic.iterkeys():
            p = rtPeak[i].peaksInMZRange(
                mass, deltam=mass * ppm if mode == 'HighRes' else
                1.)  #rtPeak[i] not necessarily sorted warning
            if not p:
                continue
            peak = sorted(p, key=lambda x: abs(mass - x.mass()))[0]
            #if peak not in matchingMass.keys():#may avoid this to see if one peak appears several times !then do 'set'
            #    matchingMass[peak]=[]
            matchingMass[peak] += dic[mass]
        goodPeak.append(matchingMass)

    #start new stuffs here
    print("Merging informations...")
    #conflicts=False
    adds = MSPeakList()  #object sor storing adducts found
    newGoodPeaks = defaultdict(list)  #{}
    for peaksInOneRtGroup in goodPeak:
        for peak in peaksInOneRtGroup.iterkeys():
            newGoodPeaks[peak] += peaksInOneRtGroup[peak]
    for p in newGoodPeaks.iterkeys():
        p.fragCluster = MSClusterList(list(set(newGoodPeaks[p])))
        for f in p.fragCluster:
            f.parentPeak.append(p)
        adds += p.fragCluster
    finalPeaks = MSPeakList(newGoodPeaks.keys())

    print("Resolving conflicts if any...")

    #removing peak that appears many times that is to say in different clusters
    def clusterComparison(
            list_):  #receive a list of peak with clusters identified
        """
        return the best peak
        WARNING: p_ydata and p_.y_data are None
        TODO: 
        
        """
        sortedList = sorted(list_, key=lambda x: len(x.fragCluster))
        longest = len(sortedList[-1].fragCluster)
        sameSizePeaks = MSPeakList()

        for p in sortedList:
            if len(p.fragCluster) == longest:
                sameSizePeaks.append(p)

        if len(sameSizePeaks) == 1:
            return sameSizePeaks[0]
        corr = np.array([0.] * len(sameSizePeaks))
        #for i, p in enumerate(sameSizePeaks):
        #    for p_ in p.fragCluster:
        #        corr[i] += r_coef(p_.y_data, p.y_data)
        m = max_f(corr)
        return sameSizePeaks[np.where(corr == m)[0][0]]

    if resolveConflicts:
        for add in set(adds):
            if len(add.parentPeak) <= 1:
                #print "%s belong to several fragCluster"%str(add)
                continue
            #print "%s belong to several fragCluster"%str(add)
            goodParent = clusterComparison(add.parentPeak)
            #if goodParent is not None:
            #    add.parentPeak = [goodParent]

            for parent in add.parentPeak:
                if parent != goodParent:
                    try:
                        parent.fragCluster.remove(add)
                    except ValueError:
                        print "Error removing %s from fragCluster of %s" % (
                            str(add), str(parent))
            add.parentPeak = [
                goodParent
            ]  #the same of constructing a list 'toRemove then remove
            #print "after removing len add.parentPeak", len(add.parentPeak)

    #make the annotation
    for peak in finalPeaks.ipeaks():
        for f in peak.fragCluster:
            #results = makeAnnotations(adducts_to_check, adducts, f.mass(), ppm)
            for annot in adducts.iterkeys():
                p = f.mass() / annot[1] + annot[0]
                diff = peak.mass() * ppm if mode == 'HighRes' else 1
                if peak.mass() > p - diff and peak.mass() < p + diff:
                    f.annotation[annot] = adducts[annot]
                    break
    finalPeaks = checkingSons(finalPeaks)
    #5,second filter, correlation on the isotopic cluster between samples
    #    if useCorrelation:
    #        print "Calculating correlation between samples..."
    #        interSamplesCorr(spl, **kwargs)
    #        print  "Calculating correlation intra sample..."
    #        intraSampleCorr(spl)
    #    #6 merging
    print "Merging interesting peaks"
    for peak in peaks_with_iso.ipeaks(
    ):  #wring merging must take out those which allow to construct this peak
        if peak not in finalPeaks and peak not in adds:  #matching_peaks:
            finalPeaks.append(peak)  #matching_peaks to
    if not finalPeaks:
        print(
            "no cluster found, please increase the ppm, or rt drift parameters"
        )
    print("finished, time elapsed:", time.clock() - t)
    return MSPeakList(
        sorted(finalPeaks,
               key=lambda x: x.mass)), adds  #checkingSons(finalPeaks), adds
Beispiel #11
0
def isotopicPeakListFinder(peaks, isomasses, **kwargs):
    """
    assign an isotopic cluster for each peak, and try to find an idms
     we may use a system like the CAMERA algorithm to see...
    input:
        list of peak must an obj.MSPeakList object
        clusterLength = 6  never go to six in LOW_RES
                        size expected of an isotopic cluster
        rtError: maximum drift of the retention time
        decreaseOrder: allow or not allow that the successive peak of the isotopic cluster
                        intensity are going down, can be confusing for finding idms
    output:
        two MSPeakList, the first one corresponding to the peaks with an isotopic cluster
        and the other one all peaks belonging to an isotopic cluster
    """

    #unpacking parameters
    print "Isotopic cluster calculation..."

    rtError = np.float(kwargs.get('rtError', 6))
    ppm = np.float(peaks[0].sample.ppm / 1e6)
    MAX_GAP_ALLOWED = np.int(len(isomasses))
    decreaseOrder = kwargs.get('decreaseOrder',
                               True)  #we use the less restrictive...
    mode = kwargs.get('mode', 'Highres')
    #sort isomasses
    #isomasses = sorted(isomasses, key=lambda x:x[0])

    peaks_with_iso = MSPeakList()
    peaks_without_iso = MSPeakList(
    )  #peaks without isotopic cluster but which does not have a isotopic cluster
    list_iso = set()  #MSPeakList()

    t = time.clock()

    for peak in peaks.ipeaks():  #iterating over peaks

        if peak in list_iso:
            continue  #avoid to calculate for every peaks

        isoCluster = MSClusterList()
        gap = 0
        #isos = resolutionAdjustment(isomasses, peak.mass()*ppm) if mode=='HighRes' else isomasses
        for i, isomass in enumerate(sorted(isomasses, key=lambda x: x[0])):
            #pic = _getMatchingPeaks(peaks, peak, isomass[0], ppm, rtError)

            mass = isomass[0]
            massToCheck = peak.mass() + mass

            p = peaks.peaksInMZRange(
                massToCheck,
                deltam=ppm *
                massToCheck if mode == 'HighRes' else 1.)  #deltart
            matchingRtPeaks = MSPeakList(
            )  #will contain all matching peak in rt
            for pk in p.ipeaks():
                if pk != peak:
                    if abs(peak.rt - pk.rt) <= rtError:
                        matchingRtPeaks.append(pk)

            if matchingRtPeaks:

                pic = sorted(matchingRtPeaks,
                             key=lambda pics: abs(pics.mass() - peak.mass()))[
                                 0]  #take the closest in mass
                if pic is not None:
                    if decreaseOrder:  #we want peak area inferior a peak
                        #if isoCluster:
                        areaToCompare = isoCluster[
                            -1].area if isoCluster else peak.area
                        if areaToCompare < pic.area:  #idms found ???
                            break

                    if pic not in list_iso:  #pic not in isoCluster and
                        isoCluster.append(pic)
                        list_iso.add(pic)
            else:
                gap += 1
                if gap >= MAX_GAP_ALLOWED:
                    break

        # #set parent for all peaks found
        if isoCluster:
            for pics in isoCluster:
                #pics.parentPeak=peak
                pics.parentPeak.append(peak)
            peak.isoCluster = isoCluster
            peaks_with_iso.addPeak(peak)
        else:
            peaks_without_iso.addPeak(peak)

#    for p in peaks.ipeaks():
#        if p not in peaks_with_iso and p not in list_iso:
#            peaks_without_iso.addPeak(p)

    print time.clock() - t
    print "peaks with isotopes: ", len(peaks_with_iso)
    print "list isotopes: ", len(list_iso)
    print "peaks without isotopes: ", len(peaks_without_iso)
    return peaks_with_iso + peaks_without_iso, list_iso
Beispiel #12
0
def inSpectraFinder(peaks, isomasses, **k):
    """
    complementary algorihtm to look for isotopic 
    cluster in the sepctra rather than in the peak
    list, should be more effective
    """

    ppm = np.float(peaks[0].sample.ppm / 1e6)
    decreaseOrder = k.get('decreaseOrder',
                          False)  #we use the less restrictive...
    mode = peaks[0].sample.kind
    MAX_GAP_ALLOWED = np.int(k.get('gap', 0) +
                             1)  #a gap definition to avoid gap
    isomasses = sorted(isomasses, key=lambda x: x[0])

    for peak in peaks.ipeaks():  #generator
        isores = []  #dict containing results for each spectra
        for i, s in enumerate(peak.ispectra()):

            gap = np.int(0)
            isos = defaultdict(float)  #OrderedDict()
            #for i in range(1,3):#some testing if we are close or far
            p = s.massPeakInRange(peak.mass(), ppm *
                                  peak.mass())  #list pairs(mass, intensity)

            if not p:
                #count+=1
                continue

            pmass, pintensity = sorted(
                p, key=lambda x: abs(x[0] - peak.mass()))[0]

            #print "setting the base peak"
            isos[pmass] += pintensity  #= #setting the base peak
            #isores.append(matched)

            if mode == 'Highres':
                errormass = (pmass * ppm) / 1e6
                adjustedIsos = resolutionAdjustment(
                    isomasses, errormass) if mode == 'HighRes' else isomasses
            else:
                adjustedIsos = isomasses

            for isomass in adjustedIsos:
                #for i in range(1,3):
                m = s.massPeakInRange(pmass + isomass[0],
                                      (pmass + isomass[0]) * ppm)
                #if m:break
                #if m.size:
                if m:
                    mass = sorted(m,
                                  key=lambda x: abs(x[0] -
                                                    (pmass + isomass[0])))[0]
                    if decreaseOrder:  #majority
                        if isos[isos.keys()[-1]] < mass[1]:
                            #if isores[-1]<mass[1]:
                            break  #idms found ???
                    isos[mass[0]] += mass[1]  #= #mass:intensity pairs
                    #isores.append(mass)
                else:  #no peak found in this spectra
                    gap += 1
                    if gap > MAX_GAP_ALLOWED:
                        break
            isores.append(isos)
        #isores=resolutionAdjustment(isores, ppm, adaptError=True)
        if isores:
            s = sorted(isores, key=lambda x: len(x))
            ref, isoext = s[-1], s[:-1]  #take the longest one
            #if ref:
            #isoext = [x for x in isores if x != ref] #extraction
            for d in (x for x in isoext):  #xiterates over dictionnaries

                checked = set()

                for mp in d.iterkeys():  #iterates over masses
                    mp_included = False

                    for mprime in ref.iterkeys():  #iterate over reference
                        t = ppm * max(mprime, mp) if mode == 'HighRes' else 2
                        if abs(
                                mprime - mp
                        ) < t:  #ppm * max(mprime,mp) :#mError#considering they are the same, change

                            if mprime in checked:
                                continue
                            #if mprime not in checked:
                            ref[mprime] += d[mp]
                            checked.add(mprime)
                            mp_included = True
                            break

                    if not mp_included:
                        ref[mp] += d[mp]
            #setting the isotopes
            peak.isoSpectra = ref.items(
            )  #[(key, val) for key, val in ref.iteritems()]
        else:
            print("no masspeak found in corresponding spectra for peak %s" %
                  str(peak))
            print("This peak may not be good !")
            peak.isGood = False
    return peaks