def annotate_ms2scans(self): # Checks each peak against list of potential fragments t0 = timer.time() self.ms2_annotation = {} xcorr = XCorr() for peptide in self.ms2_spectra.keys(): self.ms2_annotation[peptide] = {} for z in self.ms2_spectra[peptide].keys(): self.ms2_annotation[peptide][z]={} for activ in self.ms2_spectra[peptide][z].keys(): fragments = mspy.mod_proteo.fragment(mspy.sequence(peptide),"by") fragments_loss = mspy.mod_proteo.fragmentlosses(fragments,["H20","NH3"]) fragment_mzs = [] fragment_loss_mzs = [] for z2 in range(1,int(z)+1): for fragment in fragments: fragment_mzs.append([ fragment.mz(charge=z2)[1], fragment.format('f ') +' '+ str(z2) + '+' ]) for fragment in fragments_loss: fragment_loss_mzs.append([ fragment.mz(charge=z2)[1], fragment.format('f ') +' '+ str(z2) + '+' ]) self.ms2_annotation[peptide][z][activ] = {} self.ms2_annotation[peptide][z][activ] = fragment_mzs self.ms2_annotation[peptide][z][activ] += fragment_loss_mzs self.ms2_annotation[peptide][z][activ].append(( mspy.mod_proteo.fragment(mspy.sequence(peptide),"M")[0].mz(charge=int(z))[1], mspy.mod_proteo.fragment(mspy.sequence(peptide),"M")[0].format('f '+' '+str(z)+'+') )) for scan_obj in self.ms2_spectra[peptide][z][activ]: xcorr_score = xcorr.get_xcorr(scan_obj['peaklist'],scan_obj['scan_info']['precursorMZ'],int(z),map(lambda x: x[0],fragment_mzs),map(lambda x: x[0],fragment_loss_mzs)) scan_obj['scan_info']['Xcorr'] = xcorr_score t1 = timer.time() - t0 print 'Calculated annotations for MS2 scans in %s ' % t1
def generate_pattern_objects(peptides, charge_min, charge_max): # Calculates theoretical patterns SequenceObjects = {} PatternObjects = {} for peptide in peptides: SequenceObjects[peptide] = mspy.sequence(peptide) PatternObjects[peptide] = {} for z in range(charge_min, charge_max + 1): PatternObjects[peptide][z] = mspy.pattern(SequenceObjects[peptide].formula(), charge=z, real=False) return [SequenceObjects, PatternObjects]
def generate_peptide_list(self): """Generates list of all possible peptides and their profiles""" t0 = timer.time() seq_obj = mspy.sequence(self.sequence) peptide_objects = mspy.mod_proteo.digest(seq_obj, 'Non-Specific',miscleavage=self.max_length) self.peptide_indexed_iso_maxpeak = {} self.peptide_indexed_iso_dist = {} for peptide in peptide_objects: compound = mspy.obj_compound.compound(peptide.formula()) self.peptide_indexed_iso_dist[peptide.format()] = {} self.peptide_indexed_iso_maxpeak[peptide.format()] = {} for z in range(self.charge_min,self.charge_max+1): pattern = compound.pattern(charge=z,real=False) if pattern[0][0] > self.mz_min and pattern[-1][0] < self.mz_max: highest_intensity_peak = max(pattern, key=lambda p: p[1]) self.peptide_indexed_iso_maxpeak[peptide.format()][str(z)] = highest_intensity_peak[0] self.peptide_indexed_iso_dist[peptide.format()][str(z)] = pattern t1 = timer.time() - t0 print 'Produced peptide isotopic distributions in %s ' % t1
def pepFrag(seq, X, Y, plotCanvas, annotation=None, plotSpec=True): # w = MPL_Widget(enableAutoScale = False, doublePlot = True, enableEdit = True) ax1 = plotCanvas.ax ax2 = plotCanvas.ax2 pep = seq mz = X yVals = Y sortInd = X.argsort() X = X[sortInd] Y = Y[sortInd] curSeq = mspy.sequence(pep) ans, series = doFrag(curSeq) fragRange = N.arange(len(series)) fragType = [] fragMZ = [] # print "Series: ", series # for val in ans: # print val for i, item in enumerate(ans): fragType.append(ans[i][0]) fragMZ.append(ans[i][4]) # print item fragMZ = N.array(fragMZ) fragType = N.array(fragType) fragMZInd = fragMZ.argsort() fragMZ = fragMZ[fragMZInd] fragType = fragType[fragMZInd] fragYVals = N.zeros_like(fragMZ) fragYVals += 1 # absTol = 3000 #ppm ppmErrs = [] ppmErrType = [] for i, frag in enumerate(fragMZ): foundInd = X.searchsorted(frag) if foundInd == 0: prevInd = 0 else: prevInd = foundInd - 1 if foundInd >= len(X): foundInd += -1 prevInd += -1 if foundInd < 0: foundInd = 0 if prevInd < 0: prevInd = 0 # print len(mz), foundInd, prevInd foundMZ = X[foundInd] prevMZ = X[prevInd] foundDiff = foundMZ - frag prevDiff = prevMZ - frag foundDiffOk = N.abs(foundDiff) < (foundMZ * absTol * 1E-6) prevDiffOk = N.abs(prevDiff) < (prevMZ * absTol * 1E-6) if foundDiffOk and prevDiffOk: if N.abs(foundDiff) < N.abs(prevDiff): ppmErrs.append([frag, foundDiff / frag * 1E6]) ppmErrType.append(fragType[i]) fragYVals[i] = Y[foundInd] else: ppmErrs.append([frag, prevDiff / frag * 1E6]) ppmErrType.append(fragType[i]) fragYVals[i] = Y[prevInd] elif foundDiffOk: ppmErrs.append([frag, foundDiff / frag * 1E6]) ppmErrType.append(fragType[i]) fragYVals[i] = Y[foundInd] elif prevDiffOk: ppmErrs.append([frag, prevDiff / frag * 1E6]) ppmErrType.append(fragType[i]) fragYVals[i] = Y[prevInd] #plot the lines for each spectrum before being matched if plotSpec: ax1.vlines(X, 0, Y, colors='k', alpha=1.0) #0.6) m = 1 ppmErrType = N.array(ppmErrType) ppmErrs = N.array(ppmErrs) errs = ppmErrs[:, 1] errY = N.arange(len(errs)) errY += 1 # ax2.plot(errs, errY, 'go', ms = 5, alpha = 0.6) tempColors = ['r', 'b', 'g', 'y', 'm', 'b', 'k'] tempMarkers = ['o', 's', 'd', '^', 'h', 'p', 'o', 'd', 's'] n = 1 for m, frag in enumerate(fragRange): fragInd = N.where(fragType == frag)[0] tempFrag = fragMZ[fragInd] errInd = N.where(ppmErrType == frag)[0] tempErrs = errs[errInd] tempErrY = N.arange(n, len(tempErrs) + n) n += len(tempErrs) ax2.plot(tempErrs, tempErrY, linestyle='None', marker=tempMarkers[frag], color=tempColors[frag], ms=5, alpha=0.6) # print frag, tempFrag tempInt = fragYVals[fragInd] ax1.vlines(tempFrag, 0, tempInt, colors=tempColors[frag], linestyles='solid', alpha=0.8) # # ax1.legend()#legend is broken in mpl 0.98.5 # ax2.axvline(ymax = errY.max(), color = 'k', ls = '--') # ax2.set_xlim(xmin = N.abs(errs.max())*-1.1, xman = N.abs(errs.max())*1.1) errMax = n - len(tempErrs) + 1 ax2.axvline(ymax=errMax, color='k', ls='--') ax2.set_xlim(xmin=N.abs(errs).max() * -1.1, xman=N.abs(errs).max() * 1.1) ax2.set_ylim(ymin=0, yman=errMax) if annotation != None: textTag = seq + annotation else: textTag = seq ax1.text(0.03, 0.95, textTag, fontsize=10,\ bbox=dict(facecolor='yellow', alpha=0.1),\ transform=ax1.transAxes, va='top') matchedInd = N.where(fragYVals > 1)[0] labelPeaks(fragMZ[matchedInd], fragYVals[matchedInd], ax1, yCutoff=10) plotCanvas.format_labels() plotCanvas.draw()
def pepFrag(seq, X, Y, plotCanvas, annotation = None, plotSpec = True): # w = MPL_Widget(enableAutoScale = False, doublePlot = True, enableEdit = True) ax1 = plotCanvas.ax ax2 = plotCanvas.ax2 pep = seq mz = X yVals = Y sortInd = X.argsort() X = X[sortInd] Y = Y[sortInd] curSeq = mspy.sequence(pep) ans, series = doFrag(curSeq) fragRange = N.arange(len(series)) fragType = [] fragMZ = [] # print "Series: ", series # for val in ans: # print val for i, item in enumerate(ans): fragType.append(ans[i][0]) fragMZ.append(ans[i][4]) # print item fragMZ = N.array(fragMZ) fragType = N.array(fragType) fragMZInd = fragMZ.argsort() fragMZ = fragMZ[fragMZInd] fragType = fragType[fragMZInd] fragYVals = N.zeros_like(fragMZ) fragYVals+=1# absTol = 3000#ppm ppmErrs = [] ppmErrType = [] for i,frag in enumerate(fragMZ): foundInd = X.searchsorted(frag) if foundInd == 0: prevInd = 0 else: prevInd = foundInd-1 if foundInd >= len(X): foundInd+=-1 prevInd+=-1 if foundInd < 0: foundInd = 0 if prevInd < 0: prevInd = 0 # print len(mz), foundInd, prevInd foundMZ = X[foundInd] prevMZ = X[prevInd] foundDiff = foundMZ-frag prevDiff = prevMZ-frag foundDiffOk = N.abs(foundDiff) < (foundMZ*absTol*1E-6) prevDiffOk = N.abs(prevDiff) < (prevMZ*absTol*1E-6) if foundDiffOk and prevDiffOk: if N.abs(foundDiff) < N.abs(prevDiff): ppmErrs.append([frag, foundDiff/frag*1E6]) ppmErrType.append(fragType[i]) fragYVals[i] = Y[foundInd] else: ppmErrs.append([frag, prevDiff/frag*1E6]) ppmErrType.append(fragType[i]) fragYVals[i] = Y[prevInd] elif foundDiffOk: ppmErrs.append([frag, foundDiff/frag*1E6]) ppmErrType.append(fragType[i]) fragYVals[i] = Y[foundInd] elif prevDiffOk: ppmErrs.append([frag, prevDiff/frag*1E6]) ppmErrType.append(fragType[i]) fragYVals[i] = Y[prevInd] #plot the lines for each spectrum before being matched if plotSpec: ax1.vlines(X, 0, Y, colors = 'k', alpha = 1.0)#0.6) m = 1 ppmErrType = N.array(ppmErrType) ppmErrs = N.array(ppmErrs) errs = ppmErrs[:,1] errY = N.arange(len(errs)) errY+=1 # ax2.plot(errs, errY, 'go', ms = 5, alpha = 0.6) tempColors = ['r', 'b', 'g', 'y', 'm', 'b', 'k'] tempMarkers = ['o','s','d','^', 'h', 'p', 'o', 'd', 's'] n = 1 for m,frag in enumerate(fragRange): fragInd = N.where(fragType == frag)[0] tempFrag = fragMZ[fragInd] errInd = N.where(ppmErrType == frag)[0] tempErrs = errs[errInd] tempErrY = N.arange(n,len(tempErrs)+n) n+=len(tempErrs) ax2.plot(tempErrs, tempErrY, linestyle = 'None', marker = tempMarkers[frag], color = tempColors[frag], ms = 5, alpha = 0.6) # print frag, tempFrag tempInt = fragYVals[fragInd] ax1.vlines(tempFrag, 0, tempInt, colors = tempColors[frag], linestyles = 'solid', alpha = 0.8)# # ax1.legend()#legend is broken in mpl 0.98.5 # ax2.axvline(ymax = errY.max(), color = 'k', ls = '--') # ax2.set_xlim(xmin = N.abs(errs.max())*-1.1, xman = N.abs(errs.max())*1.1) errMax = n-len(tempErrs)+1 ax2.axvline(ymax = errMax, color = 'k', ls = '--') ax2.set_xlim(xmin = N.abs(errs).max()*-1.1, xman = N.abs(errs).max()*1.1) ax2.set_ylim(ymin = 0, yman = errMax) if annotation != None: textTag = seq+annotation else: textTag = seq ax1.text(0.03, 0.95, textTag, fontsize=10,\ bbox=dict(facecolor='yellow', alpha=0.1),\ transform=ax1.transAxes, va='top') matchedInd = N.where(fragYVals>1)[0] labelPeaks(fragMZ[matchedInd], fragYVals[matchedInd], ax1, yCutoff = 10) plotCanvas.format_labels() plotCanvas.draw()