def clusteringBASIC(peaks, adds, **k): if not peaks: return t=time.clock() errorRt = k.get('rtError', 6) #ppm = float(kwargs.get('ppm'))/10**6 ppm = k.get('ppm') if ppm is None: try: ppm = peaks[0].sample.ppm/1e6 except AttributeError: print "No value found for ppm setting to 10/1E6" ppm = 10./1e6 #mode = k.get('mode', 'HighRes') resolveConflicts=k.get('resolveConflicts', False) addsToCheck=np.array(adds.keys()) adductsFound = MSPeakList() for i, p in enumerate(peaks): a = MSClusterList() for v in addsToCheck: m = p.mz+v[0] match = peaks.peaksInMZRTRange(m, p.rt, errorRt, deltam= 2 * ppm * m) if match is None or not match: continue #take the closest in mass goodP = sorted(match, key=lambda x:abs(x.mz - (p.mz + v[0])))[0] #if goodP in set(adductsFound): # if resolveConflicts: # pass #else: if goodP is p: continue a.append(goodP) goodP.parentPeak=p adductsFound.append(goodP) p.fragCluster=MSPeakList(set(a))#prevent from duplicates # def clusterComparison(list_):#receive a list of peak with clusters identified # """ # return the best peak # WARNING: p_ydata and p_.y_data are None # TODO: # # """ # sortedList = sorted(list_, key=lambda x: len(x.fragCluster)) # longest=len(sortedList[-1].fragCluster) # sameSizePeaks=MSPeakList() # # for p in sortedList: # if len(p.fragCluster) == longest: # sameSizePeaks.append(p) # # if len(sameSizePeaks) == 1: # return sameSizePeaks[0] # corr=np.array([0.] * len(sameSizePeaks)) # #for i, p in enumerate(sameSizePeaks): # # for p_ in p.fragCluster: # # corr[i] += r_coef(p_.y_data, p.y_data) # m=max_f(corr) # return sameSizePeaks[np.where(corr == m)[0][0]] # # if resolveConflicts: # for add in set(adductsFound): # if len(add.parentPeak) <= 1: # #print "%s belong to several fragCluster"%str(add) # continue # #print "%s belong to several fragCluster"%str(add) # goodParent=clusterComparison(add.parentPeak) # #if goodParent is not None: # # add.parentPeak = [goodParent] # # for parent in add.parentPeak: # if parent != goodParent: # try: # parent.fragCluster.remove(add) # except ValueError: # print "Error removing %s from fragCluster of %s"%(str(add), str(parent)) # add.parentPeak = [goodParent] #the same of constructing a list 'toRemove then remove # #print "after removing len add.parentPeak", len(add.parentPeak) print "TiemElapsed: %s"%str(time.clock()-t) return peaks, adductsFound
def clusteringBASIC(peaks, adds, **k): if not peaks: return t = time.clock() errorRt = k.get('rtError', 6) #ppm = float(kwargs.get('ppm'))/10**6 ppm = k.get('ppm') if ppm is None: try: ppm = peaks[0].sample.ppm / 1e6 except AttributeError: print "No value found for ppm setting to 10/1E6" ppm = 10. / 1e6 #mode = k.get('mode', 'HighRes') resolveConflicts = k.get('resolveConflicts', False) addsToCheck = np.array(adds.keys()) adductsFound = MSPeakList() for i, p in enumerate(peaks): a = MSClusterList() for v in addsToCheck: m = p.mz + v[0] match = peaks.peaksInMZRTRange(m, p.rt, errorRt, deltam=2 * ppm * m) if match is None or not match: continue #take the closest in mass goodP = sorted(match, key=lambda x: abs(x.mz - (p.mz + v[0])))[0] #if goodP in set(adductsFound): # if resolveConflicts: # pass #else: if goodP is p: continue a.append(goodP) goodP.parentPeak = p adductsFound.append(goodP) p.fragCluster = MSPeakList(set(a)) #prevent from duplicates # def clusterComparison(list_):#receive a list of peak with clusters identified # """ # return the best peak # WARNING: p_ydata and p_.y_data are None # TODO: # # """ # sortedList = sorted(list_, key=lambda x: len(x.fragCluster)) # longest=len(sortedList[-1].fragCluster) # sameSizePeaks=MSPeakList() # # for p in sortedList: # if len(p.fragCluster) == longest: # sameSizePeaks.append(p) # # if len(sameSizePeaks) == 1: # return sameSizePeaks[0] # corr=np.array([0.] * len(sameSizePeaks)) # #for i, p in enumerate(sameSizePeaks): # # for p_ in p.fragCluster: # # corr[i] += r_coef(p_.y_data, p.y_data) # m=max_f(corr) # return sameSizePeaks[np.where(corr == m)[0][0]] # # if resolveConflicts: # for add in set(adductsFound): # if len(add.parentPeak) <= 1: # #print "%s belong to several fragCluster"%str(add) # continue # #print "%s belong to several fragCluster"%str(add) # goodParent=clusterComparison(add.parentPeak) # #if goodParent is not None: # # add.parentPeak = [goodParent] # # for parent in add.parentPeak: # if parent != goodParent: # try: # parent.fragCluster.remove(add) # except ValueError: # print "Error removing %s from fragCluster of %s"%(str(add), str(parent)) # add.parentPeak = [goodParent] #the same of constructing a list 'toRemove then remove # #print "after removing len add.parentPeak", len(add.parentPeak) print "TiemElapsed: %s" % str(time.clock() - t) return peaks, adductsFound
def isotopicPeakListFinder(peaks, isomasses, **kwargs): """ assign an isotopic cluster for each peak, and try to find an idms we may use a system like the CAMERA algorithm to see... input: list of peak must an obj.MSPeakList object clusterLength = 6 never go to six in LOW_RES size expected of an isotopic cluster rtError: maximum drift of the retention time decreaseOrder: allow or not allow that the successive peak of the isotopic cluster intensity are going down, can be confusing for finding idms output: two MSPeakList, the first one corresponding to the peaks with an isotopic cluster and the other one all peaks belonging to an isotopic cluster """ #unpacking parameters print "Isotopic cluster calculation..." rtError = np.float(kwargs.get('rtError', 6)) ppm=np.float(peaks[0].sample.ppm/1e6) MAX_GAP_ALLOWED = np.int(len(isomasses)) decreaseOrder = kwargs.get('decreaseOrder', True) #we use the less restrictive... mode = kwargs.get('mode', 'Highres') #sort isomasses #isomasses = sorted(isomasses, key=lambda x:x[0]) peaks_with_iso =MSPeakList() peaks_without_iso = MSPeakList()#peaks without isotopic cluster but which does not have a isotopic cluster list_iso = set()#MSPeakList() t = time.clock() for peak in peaks.ipeaks():#iterating over peaks if peak in list_iso: continue#avoid to calculate for every peaks isoCluster= MSClusterList() gap = 0 #isos = resolutionAdjustment(isomasses, peak.mass()*ppm) if mode=='HighRes' else isomasses for i, isomass in enumerate(sorted(isomasses, key=lambda x:x[0])): #pic = _getMatchingPeaks(peaks, peak, isomass[0], ppm, rtError) mass=isomass[0] massToCheck=peak.mass()+mass p = peaks.peaksInMZRange(massToCheck, deltam=ppm*massToCheck if mode=='HighRes' else 1.) #deltart matchingRtPeaks = MSPeakList()#will contain all matching peak in rt for pk in p.ipeaks(): if pk != peak: if abs(peak.rt - pk.rt) <= rtError: matchingRtPeaks.append(pk) if matchingRtPeaks: pic = sorted(matchingRtPeaks, key=lambda pics: abs(pics.mass()-peak.mass()))[0] #take the closest in mass if pic is not None: if decreaseOrder:#we want peak area inferior a peak #if isoCluster: areaToCompare=isoCluster[-1].area if isoCluster else peak.area if areaToCompare < pic.area:#idms found ??? break if pic not in list_iso:#pic not in isoCluster and isoCluster.append(pic) list_iso.add(pic) else: gap+=1 if gap >=MAX_GAP_ALLOWED: break # #set parent for all peaks found if isoCluster: for pics in isoCluster: #pics.parentPeak=peak pics.parentPeak.append(peak) peak.isoCluster = isoCluster peaks_with_iso.addPeak(peak) else: peaks_without_iso.addPeak(peak) # for p in peaks.ipeaks(): # if p not in peaks_with_iso and p not in list_iso: # peaks_without_iso.addPeak(p) print time.clock()-t print "peaks with isotopes: " ,len(peaks_with_iso) print "list isotopes: " ,len(list_iso) print "peaks without isotopes: " ,len(peaks_without_iso) return peaks_with_iso+peaks_without_iso, list_iso
def isotopicPeakListFinder(peaks, isomasses, **kwargs): """ assign an isotopic cluster for each peak, and try to find an idms we may use a system like the CAMERA algorithm to see... input: list of peak must an obj.MSPeakList object clusterLength = 6 never go to six in LOW_RES size expected of an isotopic cluster rtError: maximum drift of the retention time decreaseOrder: allow or not allow that the successive peak of the isotopic cluster intensity are going down, can be confusing for finding idms output: two MSPeakList, the first one corresponding to the peaks with an isotopic cluster and the other one all peaks belonging to an isotopic cluster """ #unpacking parameters print "Isotopic cluster calculation..." rtError = np.float(kwargs.get('rtError', 6)) ppm = np.float(peaks[0].sample.ppm / 1e6) MAX_GAP_ALLOWED = np.int(len(isomasses)) decreaseOrder = kwargs.get('decreaseOrder', True) #we use the less restrictive... mode = kwargs.get('mode', 'Highres') #sort isomasses #isomasses = sorted(isomasses, key=lambda x:x[0]) peaks_with_iso = MSPeakList() peaks_without_iso = MSPeakList( ) #peaks without isotopic cluster but which does not have a isotopic cluster list_iso = set() #MSPeakList() t = time.clock() for peak in peaks.ipeaks(): #iterating over peaks if peak in list_iso: continue #avoid to calculate for every peaks isoCluster = MSClusterList() gap = 0 #isos = resolutionAdjustment(isomasses, peak.mass()*ppm) if mode=='HighRes' else isomasses for i, isomass in enumerate(sorted(isomasses, key=lambda x: x[0])): #pic = _getMatchingPeaks(peaks, peak, isomass[0], ppm, rtError) mass = isomass[0] massToCheck = peak.mass() + mass p = peaks.peaksInMZRange( massToCheck, deltam=ppm * massToCheck if mode == 'HighRes' else 1.) #deltart matchingRtPeaks = MSPeakList( ) #will contain all matching peak in rt for pk in p.ipeaks(): if pk != peak: if abs(peak.rt - pk.rt) <= rtError: matchingRtPeaks.append(pk) if matchingRtPeaks: pic = sorted(matchingRtPeaks, key=lambda pics: abs(pics.mass() - peak.mass()))[ 0] #take the closest in mass if pic is not None: if decreaseOrder: #we want peak area inferior a peak #if isoCluster: areaToCompare = isoCluster[ -1].area if isoCluster else peak.area if areaToCompare < pic.area: #idms found ??? break if pic not in list_iso: #pic not in isoCluster and isoCluster.append(pic) list_iso.add(pic) else: gap += 1 if gap >= MAX_GAP_ALLOWED: break # #set parent for all peaks found if isoCluster: for pics in isoCluster: #pics.parentPeak=peak pics.parentPeak.append(peak) peak.isoCluster = isoCluster peaks_with_iso.addPeak(peak) else: peaks_without_iso.addPeak(peak) # for p in peaks.ipeaks(): # if p not in peaks_with_iso and p not in list_iso: # peaks_without_iso.addPeak(p) print time.clock() - t print "peaks with isotopes: ", len(peaks_with_iso) print "list isotopes: ", len(list_iso) print "peaks without isotopes: ", len(peaks_without_iso) return peaks_with_iso + peaks_without_iso, list_iso