def test_mzml(): sys.stdout.write('Testing mzML class...') from _classes._mzML import mzML mzml = mzML('MultiTest', verbose=False) if mzml.functions.keys() != [1, 3, 4]: raise ValueError('Did not pull the correct functions') @mzml._foreachchrom def testperchrom(chromatogram): attr = mzml.attributes(chromatogram) return attr['id'] if testperchrom() != [ u'TIC', u'SRM SIC Q1=200 Q3=100 function=2 offset=0' ]: raise ValueError('For each chromatogram or attributes function failed') @mzml._foreachscan def testperspec(spectrum): p = mzml.cvparam(spectrum) return p["MS:1000016"] if testperspec() != [ 0.0171000008, 0.135733336, 0.254333347, 0.372983336, 0.491699994, 0.0510833338, 0.169750005, 0.288383335, 0.407000005, 0.525833309, 0.0847499967, 0.20341666, 0.322033346, 0.440683335 ]: raise ValueError('For each scan or cvparam function failed') if sum(mzml.sum_scans()[1]) != 162806964: raise ValueError('sum_scans function failed') if sum((mzml[2])[1]) != 6742121: raise ValueError('scan indexing failed') if sum((mzml[0.01])[1]) != 56270834: raise ValueError('time indexing failed') sys.stdout.write(' PASS\n')
def test_mzml(): sys.stdout.write('Testing mzML class...') from _classes._mzML import mzML mzml = mzML('MultiTest',verbose=False) if mzml.functions.keys() != [1,3,4]: raise ValueError('Did not pull the correct functions') @mzml._foreachchrom def testperchrom(chromatogram): attr = mzml.attributes(chromatogram) return attr['id'] if testperchrom() != [u'TIC', u'SRM SIC Q1=200 Q3=100 function=2 offset=0']: raise ValueError('For each chromatogram or attributes function failed') @mzml._foreachscan def testperspec(spectrum): p = mzml.cvparam(spectrum) return p["MS:1000016"] if testperspec() != [0.0171000008, 0.135733336, 0.254333347, 0.372983336, 0.491699994, 0.0510833338, 0.169750005, 0.288383335, 0.407000005, 0.525833309, 0.0847499967, 0.20341666, 0.322033346, 0.440683335]: raise ValueError('For each scan or cvparam function failed') if sum(mzml.sum_scans()[1]) != 162806964: raise ValueError('sum_scans function failed') if sum((mzml[2])[1]) != 6742121: raise ValueError('scan indexing failed') if sum((mzml[0.01])[1]) != 56270834: raise ValueError('time indexing failed') sys.stdout.write(' PASS\n')
def sumspectra(filename,start=None, end=None,excel=None): """ Sums spectra from raw file and outputs to excel file input: filename, *kwargs filename: name of raw file sr: scan range to sum default 'all' specify with [start scan,end scan] """ from _classes._mzML import mzML from _classes._XLSX import XLSX from _classes._ScriptTime import ScriptTime st = ScriptTime() st.printstart() mzml = mzML(filename) # create mzML object if start is None: start = mzml.functions[1]['sr'][0]+1 if end is None: end = mzml.functions[1]['sr'][1]+1 x,y = mzml.sum_scans(start=start,end=end) xlfile = XLSX(filename,create=True) xlfile.writespectrum(x,y,'summed spectra (scans %d-%d)' %(start,end)) xlfile.save() st.printend()
plt.savefig('../{OUTFILE}.png'.format(OUTFILE=outputFile + str(minFilter)), bbox_inches='tight') ################################## ############################################################### #MAIN ############################################################### #_mzML processing variables filename = 'HZ-140516_HOTKEYMSMS 1376 II.raw' # raw or mzml file name fillzeros = True # fills spectrum with zeros decpl = 1 # number of decimal places to track mzrange = None # mzrange to track sr = 'all' # scan range to track mzml = mzML(filename, verbose=True) #EDESI Plot Production variable minFilter = 20 # minFilter intensity value threshold = 1156 # threshold of peak height for Breakdown tracing plotBreakdown = True # Construct Plot with Breakdown? plotZoom = True # Construct Plot with Zoom in region of interest? (Autozoom) msmsfns = [] for func in mzml.functions: # identify MSMS functions in the provided file if mzml.functions[func][ 'type'] == 'MS' and mzml.functions[func]['level'] > 1: msmsfns.append(func) if len( msmsfns ) > 1: # if there is more than one msms function, ask the user which one to process
def mia(filename,dec=0): """MS/MS interpreter assistant""" def indexes(x,y, thres=0.3, min_dist=None): ''' !!!!! based on PeakUtils https://bitbucket.org/lucashnegri/peakutils Peak detection routine. Finds the peaks in *y* by taking its first order difference. By using *thres* and *min_dist* parameters, it is possible to reduce the number of detected peaks. *y* must be signed. Parameters ---------- x : list or ndarray y : list or ndarray (signed) 1D amplitude data to search for peaks. thres : float between [0., 1.] Normalized threshold. Only the peaks with amplitude higher than the threshold will be detected. min_dist : int minimum x distance between each detected peak Returns ------- ndarray Array containing the indexes of the peaks that were detected ''' if isinstance(y, np.ndarray) and np.issubdtype(y.dtype, np.unsignedinteger): raise ValueError("y must be signed") if type(y) != np.ndarray: # converts to numpy array if not already y = np.asarray(y) thres = thres * (np.max(y) - np.min(y)) + np.min(y) # normalize threshold to y max # find the peaks by using the first order difference dy = np.diff(y) # generate a list of differences between data points peaks = np.where((np.hstack([dy, 0.]) < 0.) & (np.hstack([0., dy]) > 0.) & (y > thres))[0] if peaks.size > 1 and min_dist is not None: # if there are peaks and a minimum distance has been supplied highest = peaks[np.argsort(y[peaks])][::-1] rem = np.ones(y.size, dtype=bool) rem[peaks] = False for peak in highest: if not rem[peak]: # if the peak hasn't already been looked at ind = x[peak] l,r = max(0,np.searchsorted(x,ind-min_dist)),min(len(y)-1,np.searchsorted(x,ind+min_dist)) # find slice based on x values and min_dist sl = slice(l,r) # create a slice object #sl = slice(max(0, peak - min_dist), peak + min_dist + 1) rem[sl] = True # set values in the slice to true rem[peak] = False # set the peak to true peaks = np.arange(y.size)[~rem] return peaks def com_loss(dec=0,custom_losses=None): """takes a common loss dictionary and reduces the keys to the specified decimal place""" from _classes.common_losses import losses,stored_dec if dec > stored_dec: raise ValueError('The specified number of decimal places (%d) exceeds the number stored (%d)' %(dec,stored_dec)) out = {} for key in losses: # round values and added to dictionary newkey = round(key,dec) if dec == 0: newkey = int(newkey) if out.has_key(newkey): out[newkey] += ', ' out[newkey] += losses[key] else: out[newkey] = losses[key] if custom_losses is not None: # if supplied with a custom list of losses from _classes._Molecule import Molecule for item in custom_losses: mol = Molecule(item) key = round(mol.em,dec) if dec == 0: key = int(key) if out.has_key(key): out[key] += ', ' out[key] += item else: out[key] = item return out def tabulate(diffs): """tabulates the data in the output""" string = '\t' for ind in inds: string += '%.1f\t' %x[ind] print string #string = '' for ind,row in enumerate(diffs): string = '%.1f\t' %round(x[inds[ind]],1) for col in diffs[ind]: string += '%.1f\t' %round(col,1) print string+'\n' def guess(diffs): """searches for common integer losses amoung the differences matrix and prints them""" loss = com_loss(0,specific_components) # grab dictionary of loss values and their probable representation print 'possible fragment assignments (from common losses):' for ind,peak in enumerate(diffs): for ind2,otherpeak in enumerate(diffs[ind]): val = int(round(otherpeak)) if val > 0 and val in loss: print `x[inds[ind]]`+' -> '+`x[inds[ind2]]`+':',val, loss[val] import numpy as np from _classes._mzML import mzML mzml = mzML(filename) x,y = mzml.sum_scans() # if not all peaks are being detected, decrease the last value handed to indexes inds = indexes(x,y,0.01,7) diffs = [] for i in inds: # for each index difline = [] for j in inds: # append the difference difline.append(x[i]-x[j]) diffs.append(difline) tabulate(diffs) #tabulate differences in console guess(diffs) # guess at what the differences might mean annotations = {} top = max(y) for i in inds: annotations[str(x[i])] = [x[i],float(y[i])/float(top)*100.] from tome_v02 import plotms plotms([x,y],annotations=annotations,output='show')
def pyrsir(filename,xlsx,n,**kwargs): def checkinteger(val,name): """ This function checks that the supplied values are integers greater than 1 A integer value that is non-negative is required for the summing function. Please check your input value. """ import sys if type(val) != list and type(val) != tuple: # if only one value given for n val = [val] for num in val: if type(num) != int: sys.exit('\nThe %s value (%s) is not an integer.\n%s' %(name,str(num),checkinteger.__doc__)) if num < 1: sys.exit('\nThe %s value (%s) is less than 1.\n%s' %(name,str(num),checkinteger.__doc__)) return val def plots(): """ Function for generating a set of plots for rapid visual assessment of the supplied n-level Outputs all MS species with the same sum level onto the same plot requirements: pylab as pl """ import pylab as pl pl.clf() # clears and closes old figure (if still open) pl.close() nplots = len(n)+1 # raw data pl.subplot(nplots,1,1) # top plot for mode in mskeys: modekey = 'raw'+mode if modekey in rtime.keys(): pl.plot(rtime[modekey],tic[modekey], linewidth = 0.75, label = 'TIC') #plot tic for key in sp: # plot each species if sp[key]['affin'] is mode: pl.plot(rtime[modekey],sp[key]['raw'], linewidth=0.75, label=key) pl.title('Raw Data') pl.ylabel('Intensity') pl.tick_params(axis='x',labelbottom='off') # summed data loc = 2 for num in n: pl.subplot(nplots,1,loc) sumkey = str(num)+'sum' for mode in mskeys: modekey = str(num)+'sum'+mode if modekey in rtime.keys(): pl.plot(rtime[modekey],tic[modekey], linewidth = 0.75, label = 'TIC') #plot tic for key in sp: if sp[key]['affin'] is mode: #if a MS species pl.plot(rtime[modekey],sp[key][sumkey], linewidth=0.75, label=key) pl.title('Summed Data (n=%i)' %(num)) pl.ylabel('Intensity') pl.tick_params(axis='x',labelbottom='off') loc+=1 pl.tick_params(axis='x',labelbottom='on') pl.show() def output(): """ Writes the retrieved and calculated values to the excel workbook using the XLSX object """ if newpeaks is True: # looks for and deletes any sheets where the data will be changed if ks['verbose'] is True: sys.stdout.write('Clearing duplicate XLSX sheets.') delete = [] for key in newsp: # generate strings to look for in excel file delete.append('Raw Data ('+sp[key]['affin']+')') for num in n: delete.append(str(num)+' Sum ('+sp[key]['affin']+')') delete.append(str(num)+' Normalized ('+sp[key]['affin']+')') delete.append('Isotope Patterns') xlfile.removesheets(delete) # remove those sheets if ks['verbose'] is True: sys.stdout.write(' DONE.\n') if ks['verbose'] is True: sys.stdout.write('Writing to "%s"' %xlfile.bookname) sys.stdout.flush() for mode in mskeys: # write raw data to sheets modekey = 'raw'+mode if modekey in rtime.keys(): sheetname = 'Raw Data ('+mode+')' xlfile.writersim(sp,rtime[modekey],'raw',sheetname,mode,tic[modekey]) for num in n: # write summed and normalized data to sheets sumkey = str(num)+'sum' normkey = str(num)+'norm' for mode in mskeys: modekey = 'raw'+mode if modekey in rtime.keys(): if max(n) > 1: # if data were summed sheetname = str(num)+' Sum ('+mode+')' xlfile.writersim(sp,rtime[sumkey+mode],sumkey,sheetname,mode,tic[sumkey+mode]) # write summed data sheetname = str(num)+' Normalized ('+mode+')' xlfile.writersim(sp,rtime[sumkey+mode],normkey,sheetname,mode) # write normalized data for key,val in sorted(sp.items()): # write isotope patterns if sp[key]['affin'] in mskeys: xlfile.writemultispectrum(sp[key]['spectrum'][0],sp[key]['spectrum'][1],'m/z','intensity','Isotope Patterns',key) if rd is None: for key,val in sorted(chroms.items()): # write chromatograms xlfile.writemultispectrum(chroms[key]['x'],chroms[key]['y'],chroms[key]['xunit'],chroms[key]['yunit'],'Function Chromatograms',key) uvstuff = False for key in sp: # check for UV-Vis spectra if sp[key]['affin'] is 'UV': uvstuff = True break if uvstuff is True: for ind,val in enumerate(tic['rawUV']): # normalize the UV intensities tic['rawUV'][ind] = val/1000000. xlfile.writersim(sp,rtime['rawUV'],'raw','UV-Vis','UV',tic['rawUV']) # write UV-Vis data to sheet if sumspec is not None: # write all summed spectra for fn in sumspec: specname = '%s %s' %(mzml.functions[fn]['mode'],mzml.functions[fn]['level']) if mzml.functions[fn].has_key('target'): specname += ' %.3f' %mzml.functions[fn]['target'] specname += ' (%.3f-%.3f)' %(mzml.functions[fn]['window'][0],mzml.functions[fn]['window'][1]) xlfile.writemultispectrum(sumspec[fn][0],sumspec[fn][1],'m/z','counts','Summed Spectra',specname) if ks['verbose'] is True: sys.stdout.write(' DONE\n') def prepformula(dct): """looks for formulas in a dictionary and prepares them for pullspeciesdata""" for species in dct: if dct[species].has_key('affin') is False: # set affinity if not specified fn = dct[species]['function'] if mzml.functions[fn]['type'] == 'MS': dct[species]['affin'] = mzml.functions[fn]['mode'] if mzml.functions[fn]['type'] == 'UV': dct[species]['affin'] = 'UV' if dct[species].has_key('formula') and dct[species]['formula'] is not None: try: dct[species]['mol'].res = res # sets resolution in Molecule object except NameError: res = int(mzml.auto_resolution()) dct[species]['mol'].res = res dct[species]['mol'].sigma = dct[species]['mol'].sigmafwhm()[1] # recalculates sigma with new resolution dct[species]['bounds'] = dct[species]['mol'].bounds(0.95) # caclulates bounds return dct # ---------------------------------------------------------- # -------------------PROGRAM BEGINS------------------------- # ---------------------------------------------------------- ks = { # default keyword arguments 'plot': True, # plot the data for a quick look 'verbose': True, # chatty 'bounds confidence': 0.99, # confidence interval for automatically generated bounds } if set(kwargs.keys()) - set(ks.keys()): # check for invalid keyword arguments string = '' for i in set(kwargs.keys()) - set(ks.keys()): string += ` i` raise KeyError('Unsupported keyword argument(s): %s' %string) ks.update(kwargs) # update defaules with provided keyword arguments global tome_v02,_ScriptTime,_mzML,_Spectrum,_Molecule,_XLSX from tome_v02 import bindata from _classes._ScriptTime import ScriptTime from _classes._mzML import mzML from _classes._Spectrum import Spectrum from _classes._Molecule import Molecule from _classes._XLSX import XLSX if ks['verbose'] is True: stime = ScriptTime() stime.printstart() n = checkinteger(n,'number of scans to sum') # checks integer input and converts to list if ks['verbose'] is True: sys.stdout.write('Loading processing parameters from excel file') sys.stdout.flush() xlfile = XLSX(xlsx) sp = xlfile.pullrsimparams() mskeys = ['+','-'] for key in sp: if sp[key]['formula'] is not None: # if formula is specified sp[key]['mol'] = Molecule(sp[key]['formula']) # create Molecule object sp[key]['bounds'] = sp[key]['mol'].bounds(ks['bounds confidence']) # generate bounds from molecule object with this confidence interval if ks['verbose'] is True: sys.stdout.write(' DONE\n') rtime = {} # empty dictionaries for time and tic tic = {} rd = False for mode in mskeys: # look for existing positive and negative mode raw data try: modedata,modetime,modetic = xlfile.pullrsim('Raw Data ('+mode+')') except KeyError: continue if ks['verbose'] is True: sys.stdout.write('Existing (%s) mode raw data were found, grabbing those values.'%mode) sys.stdout.flush() rd = True # bool that rd is present modekey = 'raw'+mode sp.update(modedata) # update sp dictionary with raw data for key in modedata: # check for affinities if sp[key].has_key('affin') is False: sp[key]['affin'] = mode rtime[modekey] = list(modetime) # update time list tic[modekey] = list(modetic) # update tic list if ks['verbose'] is True: sys.stdout.write(' DONE\n') sp = prepformula(sp) newpeaks = False if rd is True: newsp = {} sumspec = None for key in sp: # checks whether there is a MS species that does not have raw data if sp[key].has_key('raw') is False: newsp[key] = sp[key] # create references in the namespace if len(newsp) is not 0: newpeaks = True if ks['verbose'] is True: sys.stdout.write('Some peaks are not in the raw data, extracting these from raw file.\n') ips = xlfile.pullmultispectrum('Isotope Patterns') # pull predefined isotope patterns and add them to species for species in ips: # set spectrum list sp[species]['spectrum'] = [ips[species]['x'],ips[species]['y']] mzml = mzML(filename) # load mzML class #newsp = prepformula(newsp) # prep formula species for summing for species in newsp: if newsp[species].has_key('spectrum') is False: newsp[species]['spectrum'] = Spectrum(3,newsp[species]['bounds'][0],newsp[species]['bounds'][1]) newsp = mzml.pull_species_data(newsp) # pull data else: if ks['verbose'] is True: sys.stdout.write('No new peaks were specified. Proceeding directly to summing and normalization.\n') if rd is False: # if no raw data is present, process mzML file mzml = mzML(filename,verbose=ks['verbose']) # load mzML class #sp = prepformula(sp) sp,sumspec = mzml.pull_species_data(sp,True) # pull relevant data from mzML chroms = mzml.pull_chromatograms() # pull chromatograms from mzML rtime = {} tic = {} for key in sp: # compare predicted isotope patterns to the real spectrum and save standard error of the regression func = sp[key]['function'] if mzml.functions[func]['type'] == 'MS': # determine mode key sp[key]['spectrum'] = sumspec[sp[key]['function']].trim(xbounds=sp[key]['bounds']) # extract the spectrum object mode = 'raw'+mzml.functions[func]['mode'] if mzml.functions[func]['type'] == 'UV': mode = 'rawUV' if mode not in rtime: # if rtime and tic have not been pulled from that function rtime[mode] = mzml.functions[func]['timepoints'] tic[mode] = mzml.functions[func]['tic'] if sp[key]['formula'] is not None: sp[key]['match'] = sp[key]['mol'].compare(sp[key]['spectrum']) for fn in sumspec: sumspec[fn] = sumspec[fn].trim() # convert Spectrum objects into x,y lists if max(n) > 1: # run combine functions if n > 1 for num in n: # for each n to sum if ks['verbose'] is True: sys.stdout.write('\r%d Summing species traces.' %num) sumkey = str(num)+'sum' for ind,key in enumerate(sp): # bin each species if sp[key]['affin'] in mskeys or mzml.functions[sp[key]['function']]['type'] == 'MS': # if species is MS related sp[key][sumkey] = bindata(num,1,sp[key]['raw']) for mode in mskeys: sumkey = str(num)+'sum'+mode modekey = 'raw'+mode if modekey in rtime.keys(): # if there is data for that mode rtime[sumkey] = bindata(num,num,rtime[modekey]) tic[sumkey] = bindata(num,1,tic[modekey]) if ks['verbose'] is True: sys.stdout.write(' DONE\n') sys.stdout.flush() for num in n: # normalize each peak's chromatogram if ks['verbose'] is True: sys.stdout.write('\r%d Normalizing species traces.' %num) sys.stdout.flush() sumkey = str(num)+'sum' normkey = str(num)+'norm' for mode in mskeys: modekey = 'raw'+mode if modekey in rtime.keys(): # if there is data for that mode for key in sp: # for each species if sp[key]['affin'] in mskeys or mzml.functions[sp[key]['function']]['type'] == 'MS': # if species has affinity sp[key][normkey] = [] for ind,val in enumerate(sp[key][sumkey]): #sp[key][normkey].append(val/(mzml.function[func]['tic'][ind]+0.01)) #+0.01 to avoid div/0 errors sp[key][normkey].append(val/(tic[sumkey+sp[key]['affin']][ind]+0.01)) #+0.01 to avoid div/0 errors if ks['verbose'] is True: sys.stdout.write(' DONE\n') #import pickle #pickle objects (for troubleshooting) #pickle.dump(rtime,open("rtime.p","wb")) #pickle.dump(tic,open("tic.p","wb")) #pickle.dump(chroms,open("chroms.p","wb")) #pickle.dump(sp,open("sp.p","wb")) output() # write data to excel file #xlfile.updatersimparams(sp) # update summing parameters if ks['verbose'] is True: sys.stdout.write('\rSaving "%s" (this may take some time)' %xlfile.bookname) sys.stdout.flush() xlfile.save() if ks['verbose'] is True: sys.stdout.write(' DONE\n') if ks['plot'] is True: if ks['verbose'] is True: sys.stdout.write('Plotting traces') plots() # plots for quick review if ks['verbose'] is True: sys.stdout.write(' DONE\n') if ks['verbose'] is True: stime.printelapsed()
def pyrsir(filename, xlsx, n, **kwargs): def checkinteger(val, name): """ This function checks that the supplied values are integers greater than 1 A integer value that is non-negative is required for the summing function. Please check your input value. """ import sys if type(val) != list and type( val) != tuple: # if only one value given for n val = [val] for num in val: if type(num) != int: sys.exit('\nThe %s value (%s) is not an integer.\n%s' % (name, str(num), checkinteger.__doc__)) if num < 1: sys.exit('\nThe %s value (%s) is less than 1.\n%s' % (name, str(num), checkinteger.__doc__)) return val def plots(): """ Function for generating a set of plots for rapid visual assessment of the supplied n-level Outputs all MS species with the same sum level onto the same plot requirements: pylab as pl """ import pylab as pl pl.clf() # clears and closes old figure (if still open) pl.close() nplots = len(n) + 1 # raw data pl.subplot(nplots, 1, 1) # top plot for mode in mskeys: modekey = 'raw' + mode if modekey in rtime.keys(): pl.plot(rtime[modekey], tic[modekey], linewidth=0.75, label='TIC') #plot tic for key in sp: # plot each species if sp[key]['affin'] is mode: pl.plot(rtime[modekey], sp[key]['raw'], linewidth=0.75, label=key) pl.title('Raw Data') pl.ylabel('Intensity') pl.tick_params(axis='x', labelbottom='off') # summed data loc = 2 for num in n: pl.subplot(nplots, 1, loc) sumkey = str(num) + 'sum' for mode in mskeys: modekey = str(num) + 'sum' + mode if modekey in rtime.keys(): pl.plot(rtime[modekey], tic[modekey], linewidth=0.75, label='TIC') #plot tic for key in sp: if sp[key]['affin'] is mode: #if a MS species pl.plot(rtime[modekey], sp[key][sumkey], linewidth=0.75, label=key) pl.title('Summed Data (n=%i)' % (num)) pl.ylabel('Intensity') pl.tick_params(axis='x', labelbottom='off') loc += 1 pl.tick_params(axis='x', labelbottom='on') pl.show() def output(): """ Writes the retrieved and calculated values to the excel workbook using the XLSX object """ if newpeaks is True: # looks for and deletes any sheets where the data will be changed if ks['verbose'] is True: sys.stdout.write('Clearing duplicate XLSX sheets.') delete = [] for key in newsp: # generate strings to look for in excel file delete.append('Raw Data (' + sp[key]['affin'] + ')') for num in n: delete.append(str(num) + ' Sum (' + sp[key]['affin'] + ')') delete.append( str(num) + ' Normalized (' + sp[key]['affin'] + ')') delete.append('Isotope Patterns') xlfile.removesheets(delete) # remove those sheets if ks['verbose'] is True: sys.stdout.write(' DONE.\n') if ks['verbose'] is True: sys.stdout.write('Writing to "%s"' % xlfile.bookname) sys.stdout.flush() for mode in mskeys: # write raw data to sheets modekey = 'raw' + mode if modekey in rtime.keys(): sheetname = 'Raw Data (' + mode + ')' xlfile.writersim(sp, rtime[modekey], 'raw', sheetname, mode, tic[modekey]) for num in n: # write summed and normalized data to sheets sumkey = str(num) + 'sum' normkey = str(num) + 'norm' for mode in mskeys: modekey = 'raw' + mode if modekey in rtime.keys(): if max(n) > 1: # if data were summed sheetname = str(num) + ' Sum (' + mode + ')' xlfile.writersim( sp, rtime[sumkey + mode], sumkey, sheetname, mode, tic[sumkey + mode]) # write summed data sheetname = str(num) + ' Normalized (' + mode + ')' xlfile.writersim(sp, rtime[sumkey + mode], normkey, sheetname, mode) # write normalized data for key, val in sorted(sp.items()): # write isotope patterns if sp[key]['affin'] in mskeys: xlfile.writemultispectrum(sp[key]['spectrum'][0], sp[key]['spectrum'][1], 'm/z', 'intensity', 'Isotope Patterns', key) if rd is None: for key, val in sorted(chroms.items()): # write chromatograms xlfile.writemultispectrum(chroms[key]['x'], chroms[key]['y'], chroms[key]['xunit'], chroms[key]['yunit'], 'Function Chromatograms', key) uvstuff = False for key in sp: # check for UV-Vis spectra if sp[key]['affin'] is 'UV': uvstuff = True break if uvstuff is True: for ind, val in enumerate( tic['rawUV']): # normalize the UV intensities tic['rawUV'][ind] = val / 1000000. xlfile.writersim(sp, rtime['rawUV'], 'raw', 'UV-Vis', 'UV', tic['rawUV']) # write UV-Vis data to sheet if sumspec is not None: # write all summed spectra for fn in sumspec: specname = '%s %s' % (mzml.functions[fn]['mode'], mzml.functions[fn]['level']) if mzml.functions[fn].has_key('target'): specname += ' %.3f' % mzml.functions[fn]['target'] specname += ' (%.3f-%.3f)' % (mzml.functions[fn]['window'][0], mzml.functions[fn]['window'][1]) xlfile.writemultispectrum(sumspec[fn][0], sumspec[fn][1], 'm/z', 'counts', 'Summed Spectra', specname) if ks['verbose'] is True: sys.stdout.write(' DONE\n') def prepformula(dct): """looks for formulas in a dictionary and prepares them for pullspeciesdata""" for species in dct: if dct[species].has_key( 'affin') is False: # set affinity if not specified fn = dct[species]['function'] if mzml.functions[fn]['type'] == 'MS': dct[species]['affin'] = mzml.functions[fn]['mode'] if mzml.functions[fn]['type'] == 'UV': dct[species]['affin'] = 'UV' if dct[species].has_key( 'formula') and dct[species]['formula'] is not None: try: dct[species][ 'mol'].res = res # sets resolution in Molecule object except NameError: res = int(mzml.auto_resolution()) dct[species]['mol'].res = res dct[species]['mol'].sigma = dct[species]['mol'].sigmafwhm()[ 1] # recalculates sigma with new resolution dct[species]['bounds'] = dct[species]['mol'].bounds( 0.95) # caclulates bounds return dct # ---------------------------------------------------------- # -------------------PROGRAM BEGINS------------------------- # ---------------------------------------------------------- ks = { # default keyword arguments 'plot': True, # plot the data for a quick look 'verbose': True, # chatty 'bounds confidence': 0.99, # confidence interval for automatically generated bounds } if set(kwargs.keys()) - set( ks.keys()): # check for invalid keyword arguments string = '' for i in set(kwargs.keys()) - set(ks.keys()): string += ` i ` raise KeyError('Unsupported keyword argument(s): %s' % string) ks.update(kwargs) # update defaules with provided keyword arguments global tome_v02, _ScriptTime, _mzML, _Spectrum, _Molecule, _XLSX from tome_v02 import bindata from _classes._ScriptTime import ScriptTime from _classes._mzML import mzML from _classes._Spectrum import Spectrum from _classes._Molecule import Molecule from _classes._XLSX import XLSX if ks['verbose'] is True: stime = ScriptTime() stime.printstart() n = checkinteger( n, 'number of scans to sum') # checks integer input and converts to list if ks['verbose'] is True: sys.stdout.write('Loading processing parameters from excel file') sys.stdout.flush() xlfile = XLSX(xlsx) sp = xlfile.pullrsimparams() mskeys = ['+', '-'] for key in sp: if sp[key]['formula'] is not None: # if formula is specified sp[key]['mol'] = Molecule( sp[key]['formula']) # create Molecule object sp[key]['bounds'] = sp[key]['mol'].bounds( ks['bounds confidence'] ) # generate bounds from molecule object with this confidence interval if ks['verbose'] is True: sys.stdout.write(' DONE\n') rtime = {} # empty dictionaries for time and tic tic = {} rd = False for mode in mskeys: # look for existing positive and negative mode raw data try: modedata, modetime, modetic = xlfile.pullrsim('Raw Data (' + mode + ')') except KeyError: continue if ks['verbose'] is True: sys.stdout.write( 'Existing (%s) mode raw data were found, grabbing those values.' % mode) sys.stdout.flush() rd = True # bool that rd is present modekey = 'raw' + mode sp.update(modedata) # update sp dictionary with raw data for key in modedata: # check for affinities if sp[key].has_key('affin') is False: sp[key]['affin'] = mode rtime[modekey] = list(modetime) # update time list tic[modekey] = list(modetic) # update tic list if ks['verbose'] is True: sys.stdout.write(' DONE\n') sp = prepformula(sp) newpeaks = False if rd is True: newsp = {} sumspec = None for key in sp: # checks whether there is a MS species that does not have raw data if sp[key].has_key('raw') is False: newsp[key] = sp[key] # create references in the namespace if len(newsp) is not 0: newpeaks = True if ks['verbose'] is True: sys.stdout.write( 'Some peaks are not in the raw data, extracting these from raw file.\n' ) ips = xlfile.pullmultispectrum( 'Isotope Patterns' ) # pull predefined isotope patterns and add them to species for species in ips: # set spectrum list sp[species]['spectrum'] = [ ips[species]['x'], ips[species]['y'] ] mzml = mzML(filename) # load mzML class #newsp = prepformula(newsp) # prep formula species for summing for species in newsp: if newsp[species].has_key('spectrum') is False: newsp[species]['spectrum'] = Spectrum( 3, newsp[species]['bounds'][0], newsp[species]['bounds'][1]) newsp = mzml.pull_species_data(newsp) # pull data else: if ks['verbose'] is True: sys.stdout.write( 'No new peaks were specified. Proceeding directly to summing and normalization.\n' ) if rd is False: # if no raw data is present, process mzML file mzml = mzML(filename, verbose=ks['verbose']) # load mzML class #sp = prepformula(sp) sp, sumspec = mzml.pull_species_data( sp, True) # pull relevant data from mzML chroms = mzml.pull_chromatograms() # pull chromatograms from mzML rtime = {} tic = {} for key in sp: # compare predicted isotope patterns to the real spectrum and save standard error of the regression func = sp[key]['function'] if mzml.functions[func]['type'] == 'MS': # determine mode key sp[key]['spectrum'] = sumspec[sp[key]['function']].trim( xbounds=sp[key]['bounds']) # extract the spectrum object mode = 'raw' + mzml.functions[func]['mode'] if mzml.functions[func]['type'] == 'UV': mode = 'rawUV' if mode not in rtime: # if rtime and tic have not been pulled from that function rtime[mode] = mzml.functions[func]['timepoints'] tic[mode] = mzml.functions[func]['tic'] if sp[key]['formula'] is not None: sp[key]['match'] = sp[key]['mol'].compare(sp[key]['spectrum']) for fn in sumspec: sumspec[fn] = sumspec[fn].trim( ) # convert Spectrum objects into x,y lists if max(n) > 1: # run combine functions if n > 1 for num in n: # for each n to sum if ks['verbose'] is True: sys.stdout.write('\r%d Summing species traces.' % num) sumkey = str(num) + 'sum' for ind, key in enumerate(sp): # bin each species if sp[key]['affin'] in mskeys or mzml.functions[ sp[key] ['function']]['type'] == 'MS': # if species is MS related sp[key][sumkey] = bindata(num, 1, sp[key]['raw']) for mode in mskeys: sumkey = str(num) + 'sum' + mode modekey = 'raw' + mode if modekey in rtime.keys(): # if there is data for that mode rtime[sumkey] = bindata(num, num, rtime[modekey]) tic[sumkey] = bindata(num, 1, tic[modekey]) if ks['verbose'] is True: sys.stdout.write(' DONE\n') sys.stdout.flush() for num in n: # normalize each peak's chromatogram if ks['verbose'] is True: sys.stdout.write('\r%d Normalizing species traces.' % num) sys.stdout.flush() sumkey = str(num) + 'sum' normkey = str(num) + 'norm' for mode in mskeys: modekey = 'raw' + mode if modekey in rtime.keys(): # if there is data for that mode for key in sp: # for each species if sp[key]['affin'] in mskeys or mzml.functions[ sp[key]['function']][ 'type'] == 'MS': # if species has affinity sp[key][normkey] = [] for ind, val in enumerate(sp[key][sumkey]): #sp[key][normkey].append(val/(mzml.function[func]['tic'][ind]+0.01)) #+0.01 to avoid div/0 errors sp[key][normkey].append( val / (tic[sumkey + sp[key]['affin']][ind] + 0.01)) #+0.01 to avoid div/0 errors if ks['verbose'] is True: sys.stdout.write(' DONE\n') #import pickle #pickle objects (for troubleshooting) #pickle.dump(rtime,open("rtime.p","wb")) #pickle.dump(tic,open("tic.p","wb")) #pickle.dump(chroms,open("chroms.p","wb")) #pickle.dump(sp,open("sp.p","wb")) output() # write data to excel file #xlfile.updatersimparams(sp) # update summing parameters if ks['verbose'] is True: sys.stdout.write('\rSaving "%s" (this may take some time)' % xlfile.bookname) sys.stdout.flush() xlfile.save() if ks['verbose'] is True: sys.stdout.write(' DONE\n') if ks['plot'] is True: if ks['verbose'] is True: sys.stdout.write('Plotting traces') plots() # plots for quick review if ks['verbose'] is True: sys.stdout.write(' DONE\n') if ks['verbose'] is True: stime.printelapsed()
def mia(filename, dec=0): """MS/MS interpreter assistant""" def indexes(x, y, thres=0.3, min_dist=None): ''' !!!!! based on PeakUtils https://bitbucket.org/lucashnegri/peakutils Peak detection routine. Finds the peaks in *y* by taking its first order difference. By using *thres* and *min_dist* parameters, it is possible to reduce the number of detected peaks. *y* must be signed. Parameters ---------- x : list or ndarray y : list or ndarray (signed) 1D amplitude data to search for peaks. thres : float between [0., 1.] Normalized threshold. Only the peaks with amplitude higher than the threshold will be detected. min_dist : int minimum x distance between each detected peak Returns ------- ndarray Array containing the indexes of the peaks that were detected ''' if isinstance(y, np.ndarray) and np.issubdtype(y.dtype, np.unsignedinteger): raise ValueError("y must be signed") if type(y) != np.ndarray: # converts to numpy array if not already y = np.asarray(y) thres = thres * (np.max(y) - np.min(y)) + np.min( y) # normalize threshold to y max # find the peaks by using the first order difference dy = np.diff(y) # generate a list of differences between data points peaks = np.where((np.hstack([dy, 0.]) < 0.) & (np.hstack([0., dy]) > 0.) & (y > thres))[0] if peaks.size > 1 and min_dist is not None: # if there are peaks and a minimum distance has been supplied highest = peaks[np.argsort(y[peaks])][::-1] rem = np.ones(y.size, dtype=bool) rem[peaks] = False for peak in highest: if not rem[peak]: # if the peak hasn't already been looked at ind = x[peak] l, r = max(0, np.searchsorted(x, ind - min_dist)), min( len(y) - 1, np.searchsorted(x, ind + min_dist) ) # find slice based on x values and min_dist sl = slice(l, r) # create a slice object #sl = slice(max(0, peak - min_dist), peak + min_dist + 1) rem[sl] = True # set values in the slice to true rem[peak] = False # set the peak to true peaks = np.arange(y.size)[~rem] return peaks def com_loss(dec=0, custom_losses=None): """takes a common loss dictionary and reduces the keys to the specified decimal place""" from _classes.common_losses import losses, stored_dec if dec > stored_dec: raise ValueError( 'The specified number of decimal places (%d) exceeds the number stored (%d)' % (dec, stored_dec)) out = {} for key in losses: # round values and added to dictionary newkey = round(key, dec) if dec == 0: newkey = int(newkey) if out.has_key(newkey): out[newkey] += ', ' out[newkey] += losses[key] else: out[newkey] = losses[key] if custom_losses is not None: # if supplied with a custom list of losses from _classes._Molecule import Molecule for item in custom_losses: mol = Molecule(item) key = round(mol.em, dec) if dec == 0: key = int(key) if out.has_key(key): out[key] += ', ' out[key] += item else: out[key] = item return out def tabulate(diffs): """tabulates the data in the output""" string = '\t' for ind in inds: string += '%.1f\t' % x[ind] print string #string = '' for ind, row in enumerate(diffs): string = '%.1f\t' % round(x[inds[ind]], 1) for col in diffs[ind]: string += '%.1f\t' % round(col, 1) print string + '\n' def guess(diffs): """searches for common integer losses amoung the differences matrix and prints them""" loss = com_loss( 0, specific_components ) # grab dictionary of loss values and their probable representation print 'possible fragment assignments (from common losses):' for ind, peak in enumerate(diffs): for ind2, otherpeak in enumerate(diffs[ind]): val = int(round(otherpeak)) if val > 0 and val in loss: print ` x[inds[ind]] ` + ' -> ' + ` x[ inds[ind2]] ` + ':', val, loss[val] import numpy as np from _classes._mzML import mzML mzml = mzML(filename) x, y = mzml.sum_scans() # if not all peaks are being detected, decrease the last value handed to indexes inds = indexes(x, y, 0.01, 7) diffs = [] for i in inds: # for each index difline = [] for j in inds: # append the difference difline.append(x[i] - x[j]) diffs.append(difline) tabulate(diffs) #tabulate differences in console guess(diffs) # guess at what the differences might mean annotations = {} top = max(y) for i in inds: annotations[str(x[i])] = [x[i], float(y[i]) / float(top) * 100.] from tome_v02 import plotms plotms([x, y], annotations=annotations, output='show')
# override settings here override = { #'fs':16, # font size #'lw':1.5, # line width of traces #'size':[7.87,4.87], # image size [width,length] in inches #'xrange':[500,700], # wavelength bounds (in nm) #'yrange':[0,3], # absorbance bounds(in a.u.) #'legloc':0, # legend location (see ttp://matplotlib.org/api/legend_api.html for more location codes) } if __name__ == '__main__': from _classes._mzML import mzML from scipy import arange from tome_v02 import locateinlist, plotuv mzml = mzML(filename, ftt=True) # initiate mzml object fn = mzml.associate_to_function( 'UV') # determine which function contains UV-Vis data uvspecs = mzml.retrieve_scans(start, end, fn) # pull uv spectra wavelengths = list(uvspecs[0][0]) # wavelength list uvspecs = [y for x, y in uvspecs] # set uvspecs list to be only the y values timepoints = mzml.functions[fn][ 'timepoints'] # pull time points of the UV function l, r = locateinlist(timepoints, start, 'greater'), locateinlist( timepoints, end, 'lesser') # locate indicies of timepoints timepoints = timepoints[l:r + 1] # trim time list accordingly times = arange(start, end, deltat) # evenly spaced times between start and end specin = []
debugPrint(EDESIkwargs['debug'], "Breakdown End") plt.savefig('../{OUTFILE}.png'.format(OUTFILE = outputFile+str(minFilter)), bbox_inches='tight') ################################## ############################################################### #MAIN ############################################################### #_mzML processing variables filename = 'HZ-140516_HOTKEYMSMS 1376 II.raw' # raw or mzml file name fillzeros = True # fills spectrum with zeros decpl = 1 # number of decimal places to track mzrange = None # mzrange to track sr = 'all' # scan range to track mzml = mzML(filename,verbose=True) #EDESI Plot Production variable minFilter = 20 # minFilter intensity value threshold = 1156 # threshold of peak height for Breakdown tracing plotBreakdown = True # Construct Plot with Breakdown? plotZoom = True # Construct Plot with Zoom in region of interest? (Autozoom) msmsfns = [] for func in mzml.functions: # identify MSMS functions in the provided file if mzml.functions[func]['type'] == 'MS' and mzml.functions[func]['level'] > 1: msmsfns.append(func) if len(msmsfns) > 1: # if there is more than one msms function, ask the user which one to process sys.stdout.write('More than one MS/MS function is contained in this mzML file. Please indicate which one you wish to process:\nFunction\ttarget\n') for func in msmsfns: sys.stdout.write('%d\t%.3f\n' %(func,mzml.functions[func]['target']))
# override settings here override = { #'fs':16, # font size #'lw':1.5, # line width of traces #'size':[7.87,4.87], # image size [width,length] in inches #'xrange':[500,700], # wavelength bounds (in nm) #'yrange':[0,3], # absorbance bounds(in a.u.) #'legloc':0, # legend location (see ttp://matplotlib.org/api/legend_api.html for more location codes) } if __name__ == '__main__': from _classes._mzML import mzML from scipy import arange from tome_v02 import locateinlist,plotuv mzml = mzML(filename,ftt=True) # initiate mzml object fn = mzml.associate_to_function('UV') # determine which function contains UV-Vis data uvspecs = mzml.retrieve_scans(start,end,fn) # pull uv spectra wavelengths = list(uvspecs[0][0]) # wavelength list uvspecs = [y for x,y in uvspecs] # set uvspecs list to be only the y values timepoints = mzml.functions[fn]['timepoints'] # pull time points of the UV function l,r = locateinlist(timepoints,start,'greater'),locateinlist(timepoints,end,'lesser') # locate indicies of timepoints timepoints = timepoints[l:r+1] # trim time list accordingly times = arange(start,end,deltat) # evenly spaced times between start and end specin = [] for time in times: ind = locateinlist(timepoints,time) # find the closest time to that specin.append(uvspecs[ind]) # append that spectrum to the input list if override.has_key('outname') is False:
def spectrumtrace(filename,sp,scr='all',n=1,mz='all',inj=0.,save=1): """ plots a mass spectrum and an intensity trace for every scan in a raw file based on the parameters supplied filename: the *.raw filename in the working directory to work from sp: dictionary of species to track and render scr: scan range to sum [start,end] n: number of scans to sum integer mz: mz range to track [m/z start, m/z end] inj: injection point (e.g. for catalyst injection) float save: save every # number of scans integer """ from tome_v02 import bindata,binnspectra from _classes._mzML import mzML from _classes._ScriptTime import ScriptTime from _classes._Spectrum import Spectrum from _classes._Colour import Colour from bisect import bisect_left ,bisect_right import pylab as pl import os,sys st = ScriptTime(profile=True) @st.profilefn def plotit(x,y,index): """ generates plot input: x and y of mass spectrum, index of current time point """ fig = pl.figure(figsize = (9.6,5.4),dpi= 100) # set figure size to 1920x1080 font = {'fontname':'Arial'} #font parameters for axis/text labels tickfont = pl.matplotlib.font_manager.FontProperties(family='Arial',size=fs) # font parameters for axis ticks axl = fig.add_subplot(121) # left subplot (mass spectrum) axl.spines["right"].set_visible(False) axl.spines["top"].set_visible(False) axl.spines["bottom"].set_visible(False) axl.plot(x,y, 'k-', lw=0.75) pl.xlabel('m/z', style='italic',**font) pl.ylabel('Relative Intensity',**font) for axis in ["top","bottom","left","right"]: axl.spines[axis].set_linewidth(axwidth) axl.spines["bottom"].set_position(('axes',-0.01)) #offset x axis for label in axl.get_yticklabels(): label.set_fontproperties(tickfont) for label in axl.get_xticklabels(): label.set_fontproperties(tickfont) pl.tick_params(axis='y', length=axwidth*3, width=axwidth, direction='out',right='off') pl.tick_params(axis='x', length=axwidth*3, width=axwidth, direction='out',top='off') for key in sp: #ind,val in enumerate(sp): l,r = bisect_left(x,sp[key]['bounds'][0]),bisect_right(x,sp[key]['bounds'][1]) # index location of selected peak in spectrum axl.plot(x[l:r],y[l:r],color = sp[key]['colour'], lw=1) # plot spectrum in colour for selected peaks axl.text(sp[key]['bounds'][0],1.01,key,fontsize=fs,color=sp[key]['colour']) pl.xlim(mz) pl.ylim([-0.001,1]) axr = fig.add_subplot(122) # right subplot (chromatogram) pl.xlim([mintime,maxtime]) pl.ylim([-0.001,1]) axr.spines["right"].set_visible(False) axr.spines["top"].set_visible(False) pl.tick_params(axis='y', length=axwidth*3, width=axwidth, direction='out',right='off') pl.tick_params(axis='x', length=axwidth*3, width=axwidth, direction='out',top='off') for mode in mskeys: sumkey = str(n)+'sum'+mode spkey = str(n)+'norm' for key in sp: if sp[key]['affin'] is mode: # pair species with appropriate rtime axr.plot(rtime[sumkey][:index],sp[key][spkey][:index], linewidth = 1.0, label = key, color = sp[key]['colour']) pl.xlabel('time (min)',fontsize = fs, **font) pl.tick_params(axis='y',labelleft='off') for label in axr.get_yticklabels(): label.set_fontproperties(tickfont) for label in axr.get_xticklabels(): label.set_fontproperties(tickfont) for key in timepoints: # add vertical timepoint lines if maxtime+inj >= timepoints[key]: # and rtime[0] <= timepoints[ind][1] pl.axvline(x=(timepoints[key]-inj), ymin = 0, ymax = 1, linewidth=0.75, color = 'b', linestyle = ':') pl.text(timepoints[key]-inj,0.5,key, fontsize = fs, color = 'b', backgroundcolor = 'w', rotation = 'vertical', horizontalalignment='center',verticalalignment='center',alpha = 0.75,**font) textx = maxtime - (maxtime-mintime)*infop # calculate location for scan number and time text pl.text(textx,0.96, 'scan %i' %curspec,fontsize=fs,**font) # text for scan number pl.text(textx,0.92, '%.1f min' %maxtime,fontsize=fs,**font) # text for time pl.subplots_adjust(left = 0.07, right = 0.99, bottom = 0.095, top = 0.96, wspace = 0.06, hspace = 0.05) # hard coded subplot tightening #pl.tight_layout(pad=0.75) # automatically tighten subplots dpiset = 200 # 100 is 960x540, 150dpi is 1440x810, 200dpi is 1920x1080 pl.savefig(os.getcwd()+r'\imgs\scan'+str(itr)[2:6]+'.png',figsize = (19.2,10.8),dpi=dpiset) pl.clf() pl.close() @st.profilefn def msfignorm(x,y): """ Normalizes the height of a mass spectrum The height will be the sum of the heights of the base peaks in the window The function will normalize the y-values (assumes intensity) and return them """ height = 0 # starting point for key in sp: l,r = bisect_right(x,sp[key]['bounds'][0]),bisect_left(x,sp[key]['bounds'][1]) # index location of selected peak in spectrum try: height += max(y[l:r]) # add maximum in selected region to height except ValueError: # if no intensity in region height += 0.01 for ind,val in enumerate(y): #normalizes all y values y[ind] = val/height return y def timelimits(index): """ finds the appropriate time limits for the traces """ mintime = 10000 maxtime = -10000 for mode in mskeys: sumkey = str(n)+'sum'+mode if sumkey in rtime.keys(): if rtime[sumkey][0] < mintime: mintime = rtime[sumkey][0] if index == 0: index +=1 if rtime[sumkey][index] > maxtime: maxtime = rtime[sumkey][index] return mintime,maxtime st.printstart() # axis line width axwidth = 1.0 # fontsize fs = 12 # left-right scalar for scan info placement infop = 0.2 if save < n: # if the script is told to save more often than it sums save = n mskeys = ['+','-'] for key in sp: # append list places for chrom, summed chrom, and normalized chrom sp[key]['raw'] = [] sp[key]['spectrum'] = Spectrum(3,startmz=sp[key]['bounds'][0],endmz=sp[key]['bounds'][1]) sp[key]['%s' %(str(n)+'sum')] = [] sp[key]['%s' %(str(n)+'norm')] = [] mzml = mzML(filename) # load mzML class sp,TIC,rtime = mzml.pullspeciesdata(sp) # integrate species spec,sr,mz = mzml.pullspectra(mzrange=mz) # pull all spectra # run combine, regardless if called for (in order for keys to be correct #if n > 1: # run combine functions if n > 1 sys.stdout.write('%s summing and normalizing species traces' %str(n)) sumkey = str(n)+'sum' normkey = str(n)+'norm' sumsp = [] for key in sp: sp[key][sumkey] = bindata(n,1,sp[key]['raw']) # bin each species sp[key]['colour'] = Colour(sp[key]['colour']).mpl # convert colour into matplotlib format for ind,val in enumerate(sp[key][sumkey]): # for normalization try: sumsp[ind] += val except IndexError: sumsp.append(val) for mode in mskeys: sumkey = str(n)+'sum'+mode modekey = 'raw'+mode if modekey in rtime.keys(): # if there is data for that mode rtime[sumkey] = bindata(n,n,rtime[modekey]) for ind,val in enumerate(rtime[sumkey]): rtime[sumkey][ind] = val - inj # shift time data to zero at injection point TIC[sumkey] = bindata(n,1,TIC[modekey]) for key in sp: # for each species if sp[key]['affin'] in mskeys: # if species has affinity spkey = str(n)+'sum' sp[key][normkey] = [] for ind,val in enumerate(sp[key][spkey]): sp[key][normkey].append(val/(sumsp[ind]+0.01)) #+0.01 to avoid div/0 errors sys.stdout.write(' DONE\n') sys.stdout.flush() spec = binnspectra(spec,n,startmz=mz[0],endmz=mz[1]) # bin mass spectra if os.path.isdir('imgs') == False: # check for /img directory and create if missing os.makedirs('imgs') for ind,val in enumerate(spec): curspec = ind*n+1 if curspec >= scr[0] and curspec <= scr[1]: # if index is within scanrange to output sys.stdout.write('\rRendering scan #%i %.1f%% (scan range: %i to %i)' %(curspec,(float(curspec)-float(scr[0]))/(float(scr[1])-float(scr[0]))*100.,scr[0],scr[1])) val[1] = msfignorm(val[0],val[1]) # normalize spectrum itr = str(100000+curspec) mintime,maxtime = timelimits(ind) plotit(val[0],val[1],ind) sys.stdout.write(' DONE\n') st.printend() st.printprofiles()