Example #1
0
def clusteringBASIC(peaks, adds, **k):
    
    if not peaks:
        return
    t=time.clock()
    errorRt = k.get('rtError', 6)
    #ppm = float(kwargs.get('ppm'))/10**6
    ppm = k.get('ppm')
    if ppm is None:
        try:        
            ppm = peaks[0].sample.ppm/1e6
        except AttributeError:
            print "No value found for ppm setting to 10/1E6"
            ppm = 10./1e6
    #mode = k.get('mode', 'HighRes')
    resolveConflicts=k.get('resolveConflicts', False)
    addsToCheck=np.array(adds.keys())
    
    adductsFound = MSPeakList()    
    for i, p in enumerate(peaks):
        a = MSClusterList()        
        for v in addsToCheck:
            m = p.mz+v[0]
            match = peaks.peaksInMZRTRange(m, p.rt, errorRt, deltam= 2 * ppm * m)
            if match is None or not match:
                continue
            #take the closest in mass
            goodP = sorted(match, key=lambda x:abs(x.mz - (p.mz + v[0])))[0]
            #if goodP in set(adductsFound):
            #    if resolveConflicts:
            #        pass
            #else:
            if goodP is p:
                continue
            a.append(goodP)
            goodP.parentPeak=p
            adductsFound.append(goodP)
        p.fragCluster=MSPeakList(set(a))#prevent from duplicates
        
#    def clusterComparison(list_):#receive a list of peak with clusters identified
#        """
#        return the best peak
#        WARNING: p_ydata and p_.y_data are None
#        TODO: 
#        
#        """        
#        sortedList = sorted(list_, key=lambda x: len(x.fragCluster))
#        longest=len(sortedList[-1].fragCluster)
#        sameSizePeaks=MSPeakList()        
#        
#        for p in sortedList:
#            if len(p.fragCluster) == longest:
#                sameSizePeaks.append(p)
#        
#        if len(sameSizePeaks) == 1:
#            return sameSizePeaks[0]
#        corr=np.array([0.] * len(sameSizePeaks))
#        #for i, p in enumerate(sameSizePeaks):
#        #    for p_ in p.fragCluster:
#        #        corr[i] += r_coef(p_.y_data, p.y_data)
#        m=max_f(corr)
#        return sameSizePeaks[np.where(corr == m)[0][0]]
#        
#        if resolveConflicts:
#            for add in set(adductsFound):
#                if len(add.parentPeak) <= 1:
#                    #print "%s belong to several fragCluster"%str(add)
#                    continue
#                #print "%s belong to several fragCluster"%str(add)
#                goodParent=clusterComparison(add.parentPeak)
#                #if goodParent is not None:
#                #    add.parentPeak = [goodParent]            
#                
#                for parent in add.parentPeak:
#                    if parent != goodParent:
#                        try:
#                            parent.fragCluster.remove(add)
#                        except ValueError:
#                            print "Error removing %s from fragCluster of %s"%(str(add), str(parent))
#                add.parentPeak = [goodParent] #the same of constructing a list 'toRemove then remove
#                #print "after removing len add.parentPeak", len(add.parentPeak)
    print "TiemElapsed: %s"%str(time.clock()-t)
    return peaks, adductsFound
Example #2
0
def clusteringBASIC(peaks, adds, **k):

    if not peaks:
        return
    t = time.clock()
    errorRt = k.get('rtError', 6)
    #ppm = float(kwargs.get('ppm'))/10**6
    ppm = k.get('ppm')
    if ppm is None:
        try:
            ppm = peaks[0].sample.ppm / 1e6
        except AttributeError:
            print "No value found for ppm setting to 10/1E6"
            ppm = 10. / 1e6
    #mode = k.get('mode', 'HighRes')
    resolveConflicts = k.get('resolveConflicts', False)
    addsToCheck = np.array(adds.keys())

    adductsFound = MSPeakList()
    for i, p in enumerate(peaks):
        a = MSClusterList()
        for v in addsToCheck:
            m = p.mz + v[0]
            match = peaks.peaksInMZRTRange(m,
                                           p.rt,
                                           errorRt,
                                           deltam=2 * ppm * m)
            if match is None or not match:
                continue
            #take the closest in mass
            goodP = sorted(match, key=lambda x: abs(x.mz - (p.mz + v[0])))[0]
            #if goodP in set(adductsFound):
            #    if resolveConflicts:
            #        pass
            #else:
            if goodP is p:
                continue
            a.append(goodP)
            goodP.parentPeak = p
            adductsFound.append(goodP)
        p.fragCluster = MSPeakList(set(a))  #prevent from duplicates

#    def clusterComparison(list_):#receive a list of peak with clusters identified
#        """
#        return the best peak
#        WARNING: p_ydata and p_.y_data are None
#        TODO:
#
#        """
#        sortedList = sorted(list_, key=lambda x: len(x.fragCluster))
#        longest=len(sortedList[-1].fragCluster)
#        sameSizePeaks=MSPeakList()
#
#        for p in sortedList:
#            if len(p.fragCluster) == longest:
#                sameSizePeaks.append(p)
#
#        if len(sameSizePeaks) == 1:
#            return sameSizePeaks[0]
#        corr=np.array([0.] * len(sameSizePeaks))
#        #for i, p in enumerate(sameSizePeaks):
#        #    for p_ in p.fragCluster:
#        #        corr[i] += r_coef(p_.y_data, p.y_data)
#        m=max_f(corr)
#        return sameSizePeaks[np.where(corr == m)[0][0]]
#
#        if resolveConflicts:
#            for add in set(adductsFound):
#                if len(add.parentPeak) <= 1:
#                    #print "%s belong to several fragCluster"%str(add)
#                    continue
#                #print "%s belong to several fragCluster"%str(add)
#                goodParent=clusterComparison(add.parentPeak)
#                #if goodParent is not None:
#                #    add.parentPeak = [goodParent]
#
#                for parent in add.parentPeak:
#                    if parent != goodParent:
#                        try:
#                            parent.fragCluster.remove(add)
#                        except ValueError:
#                            print "Error removing %s from fragCluster of %s"%(str(add), str(parent))
#                add.parentPeak = [goodParent] #the same of constructing a list 'toRemove then remove
#                #print "after removing len add.parentPeak", len(add.parentPeak)
    print "TiemElapsed: %s" % str(time.clock() - t)
    return peaks, adductsFound
Example #3
0
def isotopicPeakListFinder(peaks, isomasses, **kwargs):
    """
    assign an isotopic cluster for each peak, and try to find an idms
     we may use a system like the CAMERA algorithm to see...
    input:
        list of peak must an obj.MSPeakList object
        clusterLength = 6  never go to six in LOW_RES
                        size expected of an isotopic cluster
        rtError: maximum drift of the retention time
        decreaseOrder: allow or not allow that the successive peak of the isotopic cluster
                        intensity are going down, can be confusing for finding idms
    output:
        two MSPeakList, the first one corresponding to the peaks with an isotopic cluster
        and the other one all peaks belonging to an isotopic cluster
    """
   
    #unpacking parameters
    print "Isotopic cluster calculation..."
    
    rtError = np.float(kwargs.get('rtError', 6))
    ppm=np.float(peaks[0].sample.ppm/1e6)
    MAX_GAP_ALLOWED = np.int(len(isomasses))
    decreaseOrder = kwargs.get('decreaseOrder', True)  #we use the less restrictive...
    mode =  kwargs.get('mode', 'Highres')
    #sort isomasses
    #isomasses = sorted(isomasses, key=lambda x:x[0])
    
    peaks_with_iso =MSPeakList()               
    peaks_without_iso = MSPeakList()#peaks without isotopic cluster but which does not have a isotopic cluster
    list_iso = set()#MSPeakList()

    t = time.clock()    
    
    for peak in peaks.ipeaks():#iterating over peaks
        
        if peak in list_iso:
            continue#avoid to calculate for every peaks
        
        isoCluster= MSClusterList()
        gap = 0
        #isos = resolutionAdjustment(isomasses, peak.mass()*ppm) if mode=='HighRes' else isomasses
        for i, isomass in enumerate(sorted(isomasses, key=lambda x:x[0])):
            #pic = _getMatchingPeaks(peaks, peak, isomass[0], ppm, rtError)
            
            mass=isomass[0]
            massToCheck=peak.mass()+mass
                        
            p = peaks.peaksInMZRange(massToCheck, deltam=ppm*massToCheck if mode=='HighRes' else 1.) #deltart
            matchingRtPeaks = MSPeakList()#will contain all matching peak in rt
            for pk in p.ipeaks():
                if pk != peak:
                    if abs(peak.rt - pk.rt) <= rtError:
                        matchingRtPeaks.append(pk)
            
            if matchingRtPeaks:
         
                pic = sorted(matchingRtPeaks, key=lambda pics: abs(pics.mass()-peak.mass()))[0] #take the closest in mass
                if pic is not None:
                    if decreaseOrder:#we want peak area inferior a peak
                        #if isoCluster:
                        areaToCompare=isoCluster[-1].area if isoCluster else peak.area 
                        if areaToCompare < pic.area:#idms found ???
                           break
                      
                    if pic not in list_iso:#pic not in isoCluster and
                        isoCluster.append(pic)
                        list_iso.add(pic)
            else:
                gap+=1
                if gap >=MAX_GAP_ALLOWED:
                    break
        
        # #set parent for all peaks found
        if isoCluster:
            for pics in isoCluster:
                #pics.parentPeak=peak
                pics.parentPeak.append(peak)
            peak.isoCluster = isoCluster
            peaks_with_iso.addPeak(peak)
        else:
            peaks_without_iso.addPeak(peak)
      
    
#    for p in peaks.ipeaks():
#        if p not in peaks_with_iso and p not in list_iso:
#            peaks_without_iso.addPeak(p)
    
    
    print time.clock()-t
    print "peaks with isotopes: " ,len(peaks_with_iso)
    print "list isotopes: " ,len(list_iso)
    print "peaks without isotopes: " ,len(peaks_without_iso)
    return peaks_with_iso+peaks_without_iso, list_iso
Example #4
0
def isotopicPeakListFinder(peaks, isomasses, **kwargs):
    """
    assign an isotopic cluster for each peak, and try to find an idms
     we may use a system like the CAMERA algorithm to see...
    input:
        list of peak must an obj.MSPeakList object
        clusterLength = 6  never go to six in LOW_RES
                        size expected of an isotopic cluster
        rtError: maximum drift of the retention time
        decreaseOrder: allow or not allow that the successive peak of the isotopic cluster
                        intensity are going down, can be confusing for finding idms
    output:
        two MSPeakList, the first one corresponding to the peaks with an isotopic cluster
        and the other one all peaks belonging to an isotopic cluster
    """

    #unpacking parameters
    print "Isotopic cluster calculation..."

    rtError = np.float(kwargs.get('rtError', 6))
    ppm = np.float(peaks[0].sample.ppm / 1e6)
    MAX_GAP_ALLOWED = np.int(len(isomasses))
    decreaseOrder = kwargs.get('decreaseOrder',
                               True)  #we use the less restrictive...
    mode = kwargs.get('mode', 'Highres')
    #sort isomasses
    #isomasses = sorted(isomasses, key=lambda x:x[0])

    peaks_with_iso = MSPeakList()
    peaks_without_iso = MSPeakList(
    )  #peaks without isotopic cluster but which does not have a isotopic cluster
    list_iso = set()  #MSPeakList()

    t = time.clock()

    for peak in peaks.ipeaks():  #iterating over peaks

        if peak in list_iso:
            continue  #avoid to calculate for every peaks

        isoCluster = MSClusterList()
        gap = 0
        #isos = resolutionAdjustment(isomasses, peak.mass()*ppm) if mode=='HighRes' else isomasses
        for i, isomass in enumerate(sorted(isomasses, key=lambda x: x[0])):
            #pic = _getMatchingPeaks(peaks, peak, isomass[0], ppm, rtError)

            mass = isomass[0]
            massToCheck = peak.mass() + mass

            p = peaks.peaksInMZRange(
                massToCheck,
                deltam=ppm *
                massToCheck if mode == 'HighRes' else 1.)  #deltart
            matchingRtPeaks = MSPeakList(
            )  #will contain all matching peak in rt
            for pk in p.ipeaks():
                if pk != peak:
                    if abs(peak.rt - pk.rt) <= rtError:
                        matchingRtPeaks.append(pk)

            if matchingRtPeaks:

                pic = sorted(matchingRtPeaks,
                             key=lambda pics: abs(pics.mass() - peak.mass()))[
                                 0]  #take the closest in mass
                if pic is not None:
                    if decreaseOrder:  #we want peak area inferior a peak
                        #if isoCluster:
                        areaToCompare = isoCluster[
                            -1].area if isoCluster else peak.area
                        if areaToCompare < pic.area:  #idms found ???
                            break

                    if pic not in list_iso:  #pic not in isoCluster and
                        isoCluster.append(pic)
                        list_iso.add(pic)
            else:
                gap += 1
                if gap >= MAX_GAP_ALLOWED:
                    break

        # #set parent for all peaks found
        if isoCluster:
            for pics in isoCluster:
                #pics.parentPeak=peak
                pics.parentPeak.append(peak)
            peak.isoCluster = isoCluster
            peaks_with_iso.addPeak(peak)
        else:
            peaks_without_iso.addPeak(peak)

#    for p in peaks.ipeaks():
#        if p not in peaks_with_iso and p not in list_iso:
#            peaks_without_iso.addPeak(p)

    print time.clock() - t
    print "peaks with isotopes: ", len(peaks_with_iso)
    print "list isotopes: ", len(list_iso)
    print "peaks without isotopes: ", len(peaks_without_iso)
    return peaks_with_iso + peaks_without_iso, list_iso