def test_mzml(self): mzml = mzML( os.path.join(validation_file_path, 'MultiTest'), verbose=False ) self.assertEqual( # check that the correct function keys were pulled mzml.functions.keys(), {1, 3, 4}, ) @mzml.foreachchrom def testperchrom(chromatogram): attr = branch_attributes(chromatogram) return attr['id'] self.assertEqual( # test chromatogram decorator testperchrom(), [u'TIC', u'SRM SIC Q1=200 Q3=100 function=2 offset=0'] ) @mzml.foreachscan def testperspec(spectrum): p = branch_cvparams(spectrum) return p["MS:1000016"].value self.assertEqual( # test spectrum decorator testperspec(), [0.0171000008, 0.135733336, 0.254333347, 0.372983336, 0.491699994, 0.0510833338, 0.169750005, 0.288383335, 0.407000005, 0.525833309, 0.0847499967, 0.20341666, 0.322033346, 0.440683335] ) self.assertEqual( # test intensity summing sum(mzml.sum_scans()[1]), 162804754.0 ) self.assertEqual( # test scan indexing sum((mzml[2])[1]), 6742121 ) self.assertEqual( # test time indexing sum((mzml[0.01])[1]), 56270834 )
def bin_spectra(filename, start=None, end=None, save=True, dec=3, function=None): """ Sums spectra from raw file and outputs to excel file :param filename: raw or mzML filename :param start: start scan (None will default to 1) :param end: end scan (None will default to the last scan) :param save: whether to save into an excel document (if a string is provided, that filename will be used) :param dec: decimal places to track when binning the spectrum :param function: mzml function number to sum (usually this is 1) :return: paired x, summed y lists """ st = ScriptTime() st.printstart() mzml = mzML(filename) # create mzML object if function is None: function = mzml.associate_to_function() if start is None: start = mzml.functions[function]['sr'][0] + 1 if end is None: end = mzml.functions[function]['sr'][1] + 1 x, y = mzml.sum_scans( start=start, end=end, function=function, dec=dec, ) if save is not False: if type(save) == str: # if a filename was provided for the Excel file xlfile = XLSX(save, create=True) else: # otherwise use the mzML filename xlfile = XLSX( f'{filename}.xlsx', create=True ) xlfile.writespectrum( # write the spectrum to file x, y, 'summed spectra (scans %d-%d)' % (start, end) ) xlfile.save() st.printend() return x, y # return if specified
def bin_spectra(filename, start=None, end=None, save=True, dec=3, function=None): """ Sums spectra from raw file and outputs to excel file :param filename: raw or mzML filename :param start: start scan (None will default to 1) :param end: end scan (None will default to the last scan) :param save: whether to save into an excel document (if a string is provided, that filename will be used) :param dec: decimal places to track when binning the spectrum :param function: mzml function number to sum (usually this is 1) :return: paired x, summed y lists """ st = ScriptTime() st.printstart() mzml = mzML(filename) # create mzML object if function is None: function = mzml.associate_to_function() if start is None: start = mzml.functions[function]['sr'][0] + 1 if end is None: end = mzml.functions[function]['sr'][1] + 1 x, y = mzml.sum_scans( start=start, end=end, function=function, dec=dec, ) if save is not False: if type(save) == str: # if a filename was provided for the Excel file xlfile = XLSX(save, create=True) else: # otherwise use the mzML filename xlfile = XLSX(f'{filename}.xlsx', create=True) xlfile.writespectrum( # write the spectrum to file x, y, 'summed spectra (scans %d-%d)' % (start, end)) xlfile.save() st.printend() return x, y # return if specified
def test_mzml(self): mzml = mzML(os.path.join(validation_file_path, 'MultiTest'), verbose=False) self.assertEqual( # check that the correct function keys were pulled mzml.functions.keys(), {1, 3, 4}, ) @mzml.foreachchrom def testperchrom(chromatogram): attr = branch_attributes(chromatogram) return attr['id'] self.assertEqual( # test chromatogram decorator testperchrom(), [u'TIC', u'SRM SIC Q1=200 Q3=100 function=2 offset=0']) @mzml.foreachscan def testperspec(spectrum): p = branch_cvparams(spectrum) return p["MS:1000016"].value self.assertEqual( # test spectrum decorator testperspec(), [ 0.0171000008, 0.135733336, 0.254333347, 0.372983336, 0.491699994, 0.0510833338, 0.169750005, 0.288383335, 0.407000005, 0.525833309, 0.0847499967, 0.20341666, 0.322033346, 0.440683335 ]) self.assertEqual( # test intensity summing sum(mzml.sum_scans()[1]), 162804754.0) self.assertEqual( # test scan indexing sum((mzml[2])[1]), 6742121) self.assertEqual( # test time indexing sum((mzml[0.01])[1]), 56270834)
sys.stdout.flush() return ipsetting[typ] except KeyError: raise KeyError( '\nThe specified figure setting "%s" is not defined.\nPlease check your spelling' % setting) if __name__ == '__main__': os.chdir(curdir) # change to current working directory keywords = presets(setting) # pull preset kwargs if spectrum.lower().endswith('.mzml.gz') or spectrum.lower().endswith( '.raw'): # if supplied with a mass spec file mzml = mzML(spectrum) exp = mzml.sum_scans() keywords.update({'outname': mzml.filename.split('.')[0] }) # set default output filename else: # otherwise assume that it is an excel file xlfile = XLSX(spectrum, verbose=True) # load excel file if sheetname is None: # otherwise use the first sheet sheetname = xlfile.wb.sheetnames[0] exp = xlfile.pullspectrum(sheetname, skiplines=skiplines)[ 0] # load spectrum from first sheet in workbook keywords.update({ # set default output filename 'outname': f'{xlfile.bookname[:-5]} ({sheetname})', }) keywords.update(override) # apply any user overrides
def mia(filename, show=True, specific_components=None, write=True, save=False): """ MS/MS fragmentation interpreter assistant :param filename: mass spectrum file to parse :param show: whether to show the annotated spectrum :param specific_components: dictionary of the molecular formula of specific components in the mixture :param write: whether to write the output to console :param save: whether to save the results to an excel file """ if specific_components is None: specific_components = [] mzml = mzML(filename) # load the mzML x, y = mzml.sum_scans() # sum all the scans # if not all peaks are being detected, decrease the last value handed to indexes inds = indexes(x, y, 0.01, 7) diffs = [] for i in inds: # for each index difline = [] for j in inds: # append the difference difline.append(x[i] - x[j]) diffs.append(difline) loss = com_loss(*specific_components) guesses = [] for ind, peak in enumerate(diffs): for ind2, otherpeak in enumerate(diffs[ind]): val = int(round(otherpeak)) if val > 0 and val in loss: guesses.append([x[inds[ind]], x[inds[ind2]], val, loss[val]]) # print the results to console if specified if write is True: string = '\t' for ind in inds: string += f'{x[ind]:.1f}\t' sys.stdout.write(string + '\n') for ind, row in enumerate(diffs): string = f'{round(x[inds[ind]], 1):.1f}\t' for col in diffs[ind]: string += f'{round(col, 1):.1f}\t' sys.stdout.write(string + '\n') sys.stdout.write( '\nPossible fragment assignments (from common losses):\n') for a, b, val, change in guesses: sys.stdout.write(f'{a} -> {b}: {val} {change}\n') annotations = {} top = max(y) for i in inds: annotations[str(x[i])] = [x[i], float(y[i]) / float(top) * 100.] if show is True: from pythoms.tome import plot_mass_spectrum plot_mass_spectrum([x, y], annotations=annotations, output='show') if save is True: from pythoms.xlsx import XLSX xlfile = XLSX(filename, create=True) xlfile.writespectrum( x, y, 'MSMS', # norm=False, # don't normalized data # chart=False, # don't save basic chart to sheet ) cs = xlfile.wb.get_sheet_by_name('MSMS') cs.cell(row=1, column=6).value = 'differences' for ind, val in enumerate( inds): # write column headers using cell references cs[xlfile.inds_to_cellname( 0, 6 + ind)] = f'={xlfile.inds_to_cellname(val, 0)}' # across cs[xlfile.inds_to_cellname( 1 + ind, 5)] = f'={xlfile.inds_to_cellname(val, 0)}' # down for ind, val in enumerate( inds): # write differences based on cell references for ind2, val2 in enumerate(inds): cs[xlfile.inds_to_cellname(1 + ind, 6 + ind2)] = f'={xlfile.inds_to_cellname(val, 0)}' \ f'-{xlfile.inds_to_cellname(val2, 0)}' # value cs[xlfile.inds_to_cellname( 1 + ind, 6 + ind2)].number_format = '0' # number format # write guesses cs[xlfile.inds_to_cellname(3 + ind, 5)] = 'from' cs[xlfile.inds_to_cellname(3 + ind, 6)] = 'to' cs[xlfile.inds_to_cellname(3 + ind, 7)] = 'difference' cs[xlfile.inds_to_cellname(3 + ind, 8)] = 'guess' for i, val in enumerate(guesses): cs[xlfile.inds_to_cellname(4 + ind + i, 5)] = val[0] cs[xlfile.inds_to_cellname(4 + ind + i, 6)] = val[1] cs[xlfile.inds_to_cellname(4 + ind + i, 7)] = val[2] cs[xlfile.inds_to_cellname(4 + ind + i, 8)] = val[3] xlfile.save()
# time spacing between the traces deltat = 10 # override settings here override = { # 'fs':16, # font size # 'lw':1.5, # line width of traces # 'size':[7.87,4.87], # image size [width,length] in inches # 'xrange':[500,700], # wavelength bounds (in nm) # 'yrange':[0,3], # absorbance bounds(in a.u.) # 'legloc':0, # legend location (see ttp://matplotlib.org/api/legend_api.html for more location codes) } if __name__ == '__main__': mzml = mzML(filename, ftt=True) # initiate mzml object fn = mzml.associate_to_function( 'UV') # determine which function contains UV-Vis data uvspecs = mzml.retrieve_scans(start, end, fn) # pull uv spectra wavelengths = list(uvspecs[0][0]) # wavelength list uvspecs = [y for x, y in uvspecs] # set uvspecs list to be only the y values timepoints = mzml.functions[fn][ 'timepoints'] # pull time points of the UV function l, r = locate_in_list(timepoints, start, 'greater'), locate_in_list( timepoints, end, 'lesser') # locate indicies of timepoints timepoints = timepoints[l:r + 1] # trim time list accordingly times = arange(start, end, deltat) # evenly spaced times between start and end specin = []
sys.stdout.flush() return ipsetting[typ] except KeyError: raise KeyError( '\nThe specified figure setting "%s" is not defined.\nPlease check your spelling' % setting) if __name__ == '__main__': os.chdir(curdir) # change to current working directory keywords = presets(setting) # pull preset kwargs if spectrum.lower().endswith('.mzml.gz') or spectrum.lower().endswith( '.raw'): # if supplied with a mass spec file mzml = mzML(spectrum, verbose=False) exp = mzml.sum_scans() keywords.update({'outname': mzml.filename.split('.')[0] }) # set default output filename else: # otherwise assume that it is an excel file xlfile = XLSX(spectrum, verbose=True) # load excel file if sheetname is None: # otherwise use the first sheet sheetname = xlfile.wb.sheetnames[0] exp = xlfile.pullspectrum(sheetname, skiplines=skiplines)[ 0] # load spectrum from first sheet in workbook keywords.update({ # set default output filename 'outname': f'{xlfile.bookname[:-5]} ({sheetname})', }) keywords.update(override) # apply any user overrides
# time spacing between the traces deltat = 10 # override settings here override = { # 'fs':16, # font size # 'lw':1.5, # line width of traces # 'size':[7.87,4.87], # image size [width,length] in inches # 'xrange':[500,700], # wavelength bounds (in nm) # 'yrange':[0,3], # absorbance bounds(in a.u.) # 'legloc':0, # legend location (see ttp://matplotlib.org/api/legend_api.html for more location codes) } if __name__ == '__main__': mzml = mzML(filename, ftt=True) # initiate mzml object fn = mzml.associate_to_function('UV') # determine which function contains UV-Vis data uvspecs = mzml.retrieve_scans(start, end, fn) # pull uv spectra wavelengths = list(uvspecs[0][0]) # wavelength list uvspecs = [y for x, y in uvspecs] # set uvspecs list to be only the y values timepoints = mzml.functions[fn]['timepoints'] # pull time points of the UV function l, r = locate_in_list(timepoints, start, 'greater'), locate_in_list(timepoints, end, 'lesser') # locate indicies of timepoints timepoints = timepoints[l:r + 1] # trim time list accordingly times = arange(start, end, deltat) # evenly spaced times between start and end specin = [] for time in times: ind = locate_in_list(timepoints, time) # find the closest time to that specin.append(uvspecs[ind]) # append that spectrum to the input list
def pyrsir( filename, xlsx, n, plot=True, # plot the data for a quick look verbose=True, # chatty bounds_confidence=0.99, # combine_spectra=True, # whether or not to output a summed spectrum return_data=False, # ): """ A method for generating reconstructed single ion monitoring traces. :param filename: path to mzML or raw file to process :param xlsx: path to excel file with correctly formatted columns :param n: number of scans to sum together (for binning algorithm) :param plot: whether to plot and show the data for a quick look :param verbose: chatty mode :param bounds_confidence: confidence interval for automatically generated bounds (only applicable if molecular formulas are provided). :param combine_spectra: whether to output a summed spectrum :param return_data: whether to return data (if the data from the function is required by another function) :return: """ def check_integer(val, name): """ This function checks that the supplied values are integers greater than 1 A integer value that is non-negative is required for the summing function. Please check your input value. """ if type(val) != list and type( val) != tuple: # if only one value given for n val = [val] for num in val: if type(num) != int: sys.exit('\nThe %s value (%s) is not an integer.\n%s' % (name, str(num), check_integer.__doc__)) if num < 1: sys.exit('\nThe %s value (%s) is less than 1.\n%s' % (name, str(num), check_integer.__doc__)) return val def plots(): """ Function for generating a set of plots for rapid visual assessment of the supplied n-level Outputs all MS species with the same sum level onto the same plot requirements: pylab as pl """ pl.clf() # clears and closes old figure (if still open) pl.close() nplots = len(n) + 1 # raw data pl.subplot(nplots, 1, 1) # top plot for mode in mskeys: modekey = 'raw' + mode if modekey in rtime.keys(): pl.plot(rtime[modekey], tic[modekey], linewidth=0.75, label='TIC') # plot tic for key in sp: # plot each species if sp[key]['affin'] is mode: pl.plot(rtime[modekey], sp[key]['raw'], linewidth=0.75, label=key) pl.title('Raw Data') pl.ylabel('Intensity') pl.tick_params(axis='x', labelbottom='off') # summed data loc = 2 for num in n: pl.subplot(nplots, 1, loc) sumkey = str(num) + 'sum' for mode in mskeys: modekey = str(num) + 'sum' + mode if modekey in rtime.keys(): pl.plot(rtime[modekey], tic[modekey], linewidth=0.75, label='TIC') # plot tic for key in sp: if sp[key]['affin'] is mode: # if a MS species pl.plot(rtime[modekey], sp[key][sumkey], linewidth=0.75, label=key) pl.title('Summed Data (n=%i)' % (num)) pl.ylabel('Intensity') pl.tick_params(axis='x', labelbottom='off') loc += 1 pl.tick_params(axis='x', labelbottom='on') pl.show() def output(): """ Writes the retrieved and calculated values to the excel workbook using the XLSX object """ if newpeaks is True: # looks for and deletes any sheets where the data will be changed if verbose is True: sys.stdout.write('Clearing duplicate XLSX sheets.') delete = [] for key in newsp: # generate strings to look for in excel file delete.append('Raw Data (' + sp[key]['affin'] + ')') for num in n: delete.append(str(num) + ' Sum (' + sp[key]['affin'] + ')') delete.append( str(num) + ' Normalized (' + sp[key]['affin'] + ')') delete.append('Isotope Patterns') xlfile.removesheets(delete) # remove those sheets if verbose is True: sys.stdout.write(' DONE.\n') if verbose is True: sys.stdout.write('Writing to "%s"' % xlfile.bookname) sys.stdout.flush() for mode in mskeys: # write raw data to sheets modekey = 'raw' + mode if modekey in rtime.keys(): sheetname = 'Raw Data (' + mode + ')' xlfile.writersim(sp, rtime[modekey], 'raw', sheetname, mode, tic[modekey]) for num in n: # write summed and normalized data to sheets sumkey = str(num) + 'sum' normkey = str(num) + 'norm' for mode in mskeys: modekey = 'raw' + mode if modekey in rtime.keys(): if max(n) > 1: # if data were summed sheetname = str(num) + ' Sum (' + mode + ')' xlfile.writersim( sp, rtime[sumkey + mode], sumkey, sheetname, mode, tic[sumkey + mode]) # write summed data sheetname = str(num) + ' Normalized (' + mode + ')' xlfile.writersim(sp, rtime[sumkey + mode], normkey, sheetname, mode) # write normalized data for key, val in sorted(sp.items()): # write isotope patterns if sp[key]['affin'] in mskeys: xlfile.writemultispectrum( sp[key]['spectrum'][0], # x values sp[key]['spectrum'][1], # y values key, # name of the spectrum xunit='m/z', # x unit yunit='Intensity (counts)', # y unit sheetname='Isotope Patterns', # sheet name chart=True, # output excel chart ) if rd is None: for key, val in sorted(chroms.items()): # write chromatograms xlfile.writemultispectrum(chroms[key]['x'], chroms[key]['y'], chroms[key]['xunit'], chroms[key]['yunit'], 'Function Chromatograms', key) uvstuff = False for key in sp: # check for UV-Vis spectra if sp[key]['affin'] is 'UV': uvstuff = True break if uvstuff is True: for ind, val in enumerate( tic['rawUV']): # normalize the UV intensities tic['rawUV'][ind] = val / 1000000. xlfile.writersim(sp, rtime['rawUV'], 'raw', 'UV-Vis', 'UV', tic['rawUV']) # write UV-Vis data to sheet if sum_spectra is not None: # write all summed spectra for fn in sum_spectra: specname = '%s %s' % (mzml.functions[fn]['mode'], mzml.functions[fn]['level']) if 'target' in mzml.functions[fn]: specname += ' %.3f' % mzml.functions[fn]['target'] specname += ' (%.3f-%.3f)' % (mzml.functions[fn]['window'][0], mzml.functions[fn]['window'][1]) xlfile.writemultispectrum( sum_spectra[fn][0], # x values sum_spectra[fn][1], # y values specname, # name of the spectrum xunit='m/z', # x unit yunit='Intensity (counts)', # y unit sheetname='Summed Spectra', # sheet name chart=True, # output excel chart ) if verbose is True: sys.stdout.write(' DONE\n') def prepformula(dct): """looks for formulas in a dictionary and prepares them for pullspeciesdata""" for species in dct: if 'affin' not in dct[species]: # set affinity if not specified fn = dct[species]['function'] if mzml.functions[fn]['type'] == 'MS': dct[species]['affin'] = mzml.functions[fn]['mode'] if mzml.functions[fn]['type'] == 'UV': dct[species]['affin'] = 'UV' if 'formula' in dct[species] and dct[species][ 'formula'] is not None: try: dct[species][ 'mol'].res = res # sets resolution in Molecule object except NameError: res = int(mzml.auto_resolution()) dct[species]['mol'].res = res # dct[species]['mol'].sigma = dct[species]['mol'].sigmafwhm()[1] # recalculates sigma with new resolution dct[species]['bounds'] = dct[species][ 'mol'].bounds # caclulates bounds return dct # ---------------------------------------------------------- # -------------------PROGRAM BEGINS------------------------- # ---------------------------------------------------------- if verbose is True: stime = ScriptTime() stime.printstart() n = check_integer( n, 'number of scans to sum') # checks integer input and converts to list if type(xlsx) != dict: if verbose is True: sys.stdout.write('Loading processing parameters from excel file') sys.stdout.flush() xlfile = XLSX(xlsx, verbose=verbose) sp = xlfile.pullrsimparams() else: # if parameters were provided in place of an excel file sp = xlsx mskeys = ['+', '-'] for key in sp: if 'formula' in sp[key] and sp[key][ 'formula'] is not None: # if formula is specified sp[key]['mol'] = IPMolecule( sp[key]['formula']) # create Molecule object sp[key]['bounds'] = sp[key]['mol'].calculate_bounds( bounds_confidence ) # generate bounds from molecule object with this confidence interval if verbose is True: sys.stdout.write(' DONE\n') rtime = {} # empty dictionaries for time and tic tic = {} rd = False for mode in mskeys: # look for existing positive and negative mode raw data try: modedata, modetime, modetic = xlfile.pullrsim('Raw Data (' + mode + ')') except KeyError: continue except UnboundLocalError: # catch for if pyrsir was not handed an excel file continue if verbose is True: sys.stdout.write( 'Existing (%s) mode raw data were found, grabbing those values.' % mode) sys.stdout.flush() rd = True # bool that rd is present modekey = 'raw' + mode sp.update(modedata) # update sp dictionary with raw data for key in modedata: # check for affinities if 'affin' not in sp[key]: sp[key]['affin'] = mode rtime[modekey] = list(modetime) # update time list tic[modekey] = list(modetic) # update tic list if verbose is True: sys.stdout.write(' DONE\n') # sp = prepformula(sp) newpeaks = False if rd is True: newsp = {} sum_spectra = None for key in sp: # checks whether there is a MS species that does not have raw data if 'raw' not in sp[key]: newsp[key] = sp[key] # create references in the namespace if len(newsp) is not 0: newpeaks = True if verbose is True: sys.stdout.write( 'Some peaks are not in the raw data, extracting these from raw file.\n' ) ips = xlfile.pullmultispectrum( 'Isotope Patterns' ) # pull predefined isotope patterns and add them to species for species in ips: # set spectrum list sp[species]['spectrum'] = [ ips[species]['x'], ips[species]['y'] ] mzml = mzML(filename) # load mzML class sp = prepformula(sp) # prep formula etc for summing newsp = prepformula(newsp) # prep formula species for summing for species in newsp: if 'spectrum' not in newsp[species]: newsp[species]['spectrum'] = Spectrum( 3, newsp[species]['bounds'][0], newsp[species]['bounds'][1]) newsp = mzml.pull_species_data(newsp) # pull data else: if verbose is True: sys.stdout.write( 'No new peaks were specified. Proceeding directly to summing and normalization.\n' ) if rd is False: # if no raw data is present, process mzML file mzml = mzML(filename, verbose=verbose) # load mzML class sp = prepformula(sp) sp, sum_spectra = mzml.pull_species_data( sp, combine_spectra) # pull relevant data from mzML chroms = mzml.pull_chromatograms() # pull chromatograms from mzML rtime = {} tic = {} for key in sp: # compare predicted isotope patterns to the real spectrum and save standard error of the regression func = sp[key]['function'] if mzml.functions[func]['type'] == 'MS': # determine mode key if combine_spectra is True: sp[key]['spectrum'] = sum_spectra[ sp[key]['function']].trim( xbounds=sp[key] ['bounds']) # extract the spectrum object mode = 'raw' + mzml.functions[func]['mode'] if mzml.functions[func]['type'] == 'UV': mode = 'rawUV' if mode not in rtime: # if rtime and tic have not been pulled from that function rtime[mode] = mzml.functions[func]['timepoints'] tic[mode] = mzml.functions[func]['tic'] # if 'formula' in sp[key] and sp[key]['formula'] is not None: # sp[key]['match'] = sp[key]['mol'].compare(sp[key]['spectrum']) if combine_spectra is True: for fn in sum_spectra: sum_spectra[fn] = sum_spectra[fn].trim( ) # convert Spectrum objects into x,y lists # if max(n) > 1: # run combine functions if n > 1 for num in n: # for each n to sum if verbose is True: sys.stdout.write('\r%d Summing species traces.' % num) sumkey = str(num) + 'sum' for key in sp: # bin each species if sp[key]['affin'] in mskeys or mzml.functions[sp[key][ 'function']]['type'] == 'MS': # if species is MS related sp[key][sumkey] = bindata(num, sp[key]['raw']) for mode in mskeys: sumkey = str(num) + 'sum' + mode modekey = 'raw' + mode if modekey in rtime.keys(): # if there is data for that mode rtime[sumkey] = bindata(num, rtime[modekey], num) tic[sumkey] = bindata(num, tic[modekey]) if verbose is True: sys.stdout.write(' DONE\n') sys.stdout.flush() # else: # for key in sp: # create key for normalization # sp[key]['1sum'] = sp[key]['raw'] for num in n: # normalize each peak's chromatogram if verbose is True: sys.stdout.write('\r%d Normalizing species traces.' % num) sys.stdout.flush() sumkey = str(num) + 'sum' normkey = str(num) + 'norm' for mode in mskeys: modekey = 'raw' + mode if modekey in rtime.keys(): # if there is data for that mode for key in sp: # for each species if sp[key]['affin'] in mskeys or mzml.functions[ sp[key]['function']][ 'type'] == 'MS': # if species has affinity sp[key][normkey] = [] for ind, val in enumerate(sp[key][sumkey]): # sp[key][normkey].append(val/(mzml.function[func]['tic'][ind]+0.01)) #+0.01 to avoid div/0 errors sp[key][normkey].append( val / (tic[sumkey + sp[key]['affin']][ind] + 0.01)) # +0.01 to avoid div/0 errors if verbose is True: sys.stdout.write(' DONE\n') if return_data is True: # if data is to be used by another function, return the calculated data return mzml, sp, rtime, tic, chroms # import pickle #pickle objects (for troubleshooting) # pickle.dump(rtime,open("rtime.p","wb")) # pickle.dump(tic,open("tic.p","wb")) # pickle.dump(chroms,open("chroms.p","wb")) # pickle.dump(sp,open("sp.p","wb")) output() # write data to excel file if verbose is True: sys.stdout.write('\rUpdating paramters') sys.stdout.flush() xlfile.updatersimparams(sp) # update summing parameters if verbose is True: sys.stdout.write(' DONE\n') if verbose is True: sys.stdout.write('\rSaving "%s" (this may take some time)' % xlfile.bookname) sys.stdout.flush() xlfile.save() if verbose is True: sys.stdout.write(' DONE\n') if verbose is True: if verbose is True: sys.stdout.write('Plotting traces') if plot is True: plots() # plots for quick review if verbose is True: sys.stdout.write(' DONE\n') if verbose is True: stime.printelapsed()
def pyrsir( filename, xlsx, n, plot=True, # plot the data for a quick look verbose=True, # chatty bounds_confidence=0.99, # combine_spectra=True, # whether or not to output a summed spectrum return_data=False, # ): """ A method for generating reconstructed single ion monitoring traces. :param filename: path to mzML or raw file to process :param xlsx: path to excel file with correctly formatted columns :param n: number of scans to sum together (for binning algorithm) :param plot: whether to plot and show the data for a quick look :param verbose: chatty mode :param bounds_confidence: confidence interval for automatically generated bounds (only applicable if molecular formulas are provided). :param combine_spectra: whether to output a summed spectrum :param return_data: whether to return data (if the data from the function is required by another function) :return: """ def check_integer(val, name): """ This function checks that the supplied values are integers greater than 1 A integer value that is non-negative is required for the summing function. Please check your input value. """ if type(val) != list and type(val) != tuple: # if only one value given for n val = [val] for num in val: if type(num) != int: sys.exit('\nThe %s value (%s) is not an integer.\n%s' % (name, str(num), check_integer.__doc__)) if num < 1: sys.exit('\nThe %s value (%s) is less than 1.\n%s' % (name, str(num), check_integer.__doc__)) return val def plots(): """ Function for generating a set of plots for rapid visual assessment of the supplied n-level Outputs all MS species with the same sum level onto the same plot requirements: pylab as pl """ pl.clf() # clears and closes old figure (if still open) pl.close() nplots = len(n) + 1 # raw data pl.subplot(nplots, 1, 1) # top plot for mode in mskeys: modekey = 'raw' + mode if modekey in rtime.keys(): pl.plot(rtime[modekey], tic[modekey], linewidth=0.75, label='TIC') # plot tic for key in sp: # plot each species if sp[key]['affin'] is mode: pl.plot(rtime[modekey], sp[key]['raw'], linewidth=0.75, label=key) pl.title('Raw Data') pl.ylabel('Intensity') pl.tick_params(axis='x', labelbottom='off') # summed data loc = 2 for num in n: pl.subplot(nplots, 1, loc) sumkey = str(num) + 'sum' for mode in mskeys: modekey = str(num) + 'sum' + mode if modekey in rtime.keys(): pl.plot(rtime[modekey], tic[modekey], linewidth=0.75, label='TIC') # plot tic for key in sp: if sp[key]['affin'] is mode: # if a MS species pl.plot(rtime[modekey], sp[key][sumkey], linewidth=0.75, label=key) pl.title('Summed Data (n=%i)' % (num)) pl.ylabel('Intensity') pl.tick_params(axis='x', labelbottom='off') loc += 1 pl.tick_params(axis='x', labelbottom='on') pl.show() def output(): """ Writes the retrieved and calculated values to the excel workbook using the XLSX object """ if newpeaks is True: # looks for and deletes any sheets where the data will be changed if verbose is True: sys.stdout.write('Clearing duplicate XLSX sheets.') delete = [] for key in newsp: # generate strings to look for in excel file delete.append('Raw Data (' + sp[key]['affin'] + ')') for num in n: delete.append(str(num) + ' Sum (' + sp[key]['affin'] + ')') delete.append(str(num) + ' Normalized (' + sp[key]['affin'] + ')') delete.append('Isotope Patterns') xlfile.removesheets(delete) # remove those sheets if verbose is True: sys.stdout.write(' DONE.\n') if verbose is True: sys.stdout.write('Writing to "%s"' % xlfile.bookname) sys.stdout.flush() for mode in mskeys: # write raw data to sheets modekey = 'raw' + mode if modekey in rtime.keys(): sheetname = 'Raw Data (' + mode + ')' xlfile.writersim(sp, rtime[modekey], 'raw', sheetname, mode, tic[modekey]) for num in n: # write summed and normalized data to sheets sumkey = str(num) + 'sum' normkey = str(num) + 'norm' for mode in mskeys: modekey = 'raw' + mode if modekey in rtime.keys(): if max(n) > 1: # if data were summed sheetname = str(num) + ' Sum (' + mode + ')' xlfile.writersim(sp, rtime[sumkey + mode], sumkey, sheetname, mode, tic[sumkey + mode]) # write summed data sheetname = str(num) + ' Normalized (' + mode + ')' xlfile.writersim(sp, rtime[sumkey + mode], normkey, sheetname, mode) # write normalized data for key, val in sorted(sp.items()): # write isotope patterns if sp[key]['affin'] in mskeys: xlfile.writemultispectrum( sp[key]['spectrum'][0], # x values sp[key]['spectrum'][1], # y values key, # name of the spectrum xunit='m/z', # x unit yunit='Intensity (counts)', # y unit sheetname='Isotope Patterns', # sheet name chart=True, # output excel chart ) if rd is None: for key, val in sorted(chroms.items()): # write chromatograms xlfile.writemultispectrum(chroms[key]['x'], chroms[key]['y'], chroms[key]['xunit'], chroms[key]['yunit'], 'Function Chromatograms', key) uvstuff = False for key in sp: # check for UV-Vis spectra if sp[key]['affin'] is 'UV': uvstuff = True break if uvstuff is True: for ind, val in enumerate(tic['rawUV']): # normalize the UV intensities tic['rawUV'][ind] = val / 1000000. xlfile.writersim(sp, rtime['rawUV'], 'raw', 'UV-Vis', 'UV', tic['rawUV']) # write UV-Vis data to sheet if sum_spectra is not None: # write all summed spectra for fn in sum_spectra: specname = '%s %s' % (mzml.functions[fn]['mode'], mzml.functions[fn]['level']) if 'target' in mzml.functions[fn]: specname += ' %.3f' % mzml.functions[fn]['target'] specname += ' (%.3f-%.3f)' % (mzml.functions[fn]['window'][0], mzml.functions[fn]['window'][1]) xlfile.writemultispectrum( sum_spectra[fn][0], # x values sum_spectra[fn][1], # y values specname, # name of the spectrum xunit='m/z', # x unit yunit='Intensity (counts)', # y unit sheetname='Summed Spectra', # sheet name chart=True, # output excel chart ) if verbose is True: sys.stdout.write(' DONE\n') def prepformula(dct): """looks for formulas in a dictionary and prepares them for pullspeciesdata""" for species in dct: if 'affin' not in dct[species]: # set affinity if not specified fn = dct[species]['function'] if mzml.functions[fn]['type'] == 'MS': dct[species]['affin'] = mzml.functions[fn]['mode'] if mzml.functions[fn]['type'] == 'UV': dct[species]['affin'] = 'UV' if 'formula' in dct[species]and dct[species]['formula'] is not None: try: dct[species]['mol'].res = res # sets resolution in Molecule object except NameError: res = int(mzml.auto_resolution()) dct[species]['mol'].res = res # dct[species]['mol'].sigma = dct[species]['mol'].sigmafwhm()[1] # recalculates sigma with new resolution dct[species]['bounds'] = dct[species]['mol'].bounds # caclulates bounds return dct # ---------------------------------------------------------- # -------------------PROGRAM BEGINS------------------------- # ---------------------------------------------------------- if verbose is True: stime = ScriptTime() stime.printstart() n = check_integer(n, 'number of scans to sum') # checks integer input and converts to list if type(xlsx) != dict: if verbose is True: sys.stdout.write('Loading processing parameters from excel file') sys.stdout.flush() xlfile = XLSX(xlsx, verbose=verbose) sp = xlfile.pullrsimparams() else: # if parameters were provided in place of an excel file sp = xlsx mskeys = ['+', '-'] for key in sp: if 'formula' in sp[key] and sp[key]['formula'] is not None: # if formula is specified sp[key]['mol'] = IPMolecule(sp[key]['formula']) # create Molecule object sp[key]['bounds'] = sp[key]['mol'].calculate_bounds( bounds_confidence ) # generate bounds from molecule object with this confidence interval if verbose is True: sys.stdout.write(' DONE\n') rtime = {} # empty dictionaries for time and tic tic = {} rd = False for mode in mskeys: # look for existing positive and negative mode raw data try: modedata, modetime, modetic = xlfile.pullrsim('Raw Data (' + mode + ')') except KeyError: continue except UnboundLocalError: # catch for if pyrsir was not handed an excel file continue if verbose is True: sys.stdout.write('Existing (%s) mode raw data were found, grabbing those values.' % mode) sys.stdout.flush() rd = True # bool that rd is present modekey = 'raw' + mode sp.update(modedata) # update sp dictionary with raw data for key in modedata: # check for affinities if 'affin' not in sp[key]: sp[key]['affin'] = mode rtime[modekey] = list(modetime) # update time list tic[modekey] = list(modetic) # update tic list if verbose is True: sys.stdout.write(' DONE\n') # sp = prepformula(sp) newpeaks = False if rd is True: newsp = {} sum_spectra = None for key in sp: # checks whether there is a MS species that does not have raw data if 'raw' not in sp[key]: newsp[key] = sp[key] # create references in the namespace if len(newsp) is not 0: newpeaks = True if verbose is True: sys.stdout.write('Some peaks are not in the raw data, extracting these from raw file.\n') ips = xlfile.pullmultispectrum( 'Isotope Patterns') # pull predefined isotope patterns and add them to species for species in ips: # set spectrum list sp[species]['spectrum'] = [ips[species]['x'], ips[species]['y']] mzml = mzML(filename) # load mzML class sp = prepformula(sp) # prep formula etc for summing newsp = prepformula(newsp) # prep formula species for summing for species in newsp: if 'spectrum' not in newsp[species]: newsp[species]['spectrum'] = Spectrum(3, newsp[species]['bounds'][0], newsp[species]['bounds'][1]) newsp = mzml.pull_species_data(newsp) # pull data else: if verbose is True: sys.stdout.write('No new peaks were specified. Proceeding directly to summing and normalization.\n') if rd is False: # if no raw data is present, process mzML file mzml = mzML(filename, verbose=verbose) # load mzML class sp = prepformula(sp) sp, sum_spectra = mzml.pull_species_data(sp, combine_spectra) # pull relevant data from mzML chroms = mzml.pull_chromatograms() # pull chromatograms from mzML rtime = {} tic = {} for key in sp: # compare predicted isotope patterns to the real spectrum and save standard error of the regression func = sp[key]['function'] if mzml.functions[func]['type'] == 'MS': # determine mode key if combine_spectra is True: sp[key]['spectrum'] = sum_spectra[sp[key]['function']].trim( xbounds=sp[key]['bounds']) # extract the spectrum object mode = 'raw' + mzml.functions[func]['mode'] if mzml.functions[func]['type'] == 'UV': mode = 'rawUV' if mode not in rtime: # if rtime and tic have not been pulled from that function rtime[mode] = mzml.functions[func]['timepoints'] tic[mode] = mzml.functions[func]['tic'] # if 'formula' in sp[key] and sp[key]['formula'] is not None: # sp[key]['match'] = sp[key]['mol'].compare(sp[key]['spectrum']) if combine_spectra is True: for fn in sum_spectra: sum_spectra[fn] = sum_spectra[fn].trim() # convert Spectrum objects into x,y lists # if max(n) > 1: # run combine functions if n > 1 for num in n: # for each n to sum if verbose is True: sys.stdout.write('\r%d Summing species traces.' % num) sumkey = str(num) + 'sum' for key in sp: # bin each species if sp[key]['affin'] in mskeys or mzml.functions[sp[key]['function']][ 'type'] == 'MS': # if species is MS related sp[key][sumkey] = bindata(num, sp[key]['raw']) for mode in mskeys: sumkey = str(num) + 'sum' + mode modekey = 'raw' + mode if modekey in rtime.keys(): # if there is data for that mode rtime[sumkey] = bindata(num, rtime[modekey], num) tic[sumkey] = bindata(num, tic[modekey]) if verbose is True: sys.stdout.write(' DONE\n') sys.stdout.flush() # else: # for key in sp: # create key for normalization # sp[key]['1sum'] = sp[key]['raw'] for num in n: # normalize each peak's chromatogram if verbose is True: sys.stdout.write('\r%d Normalizing species traces.' % num) sys.stdout.flush() sumkey = str(num) + 'sum' normkey = str(num) + 'norm' for mode in mskeys: modekey = 'raw' + mode if modekey in rtime.keys(): # if there is data for that mode for key in sp: # for each species if sp[key]['affin'] in mskeys or mzml.functions[sp[key]['function']][ 'type'] == 'MS': # if species has affinity sp[key][normkey] = [] for ind, val in enumerate(sp[key][sumkey]): # sp[key][normkey].append(val/(mzml.function[func]['tic'][ind]+0.01)) #+0.01 to avoid div/0 errors sp[key][normkey].append( val / (tic[sumkey + sp[key]['affin']][ind] + 0.01)) # +0.01 to avoid div/0 errors if verbose is True: sys.stdout.write(' DONE\n') if return_data is True: # if data is to be used by another function, return the calculated data return mzml, sp, rtime, tic, chroms # import pickle #pickle objects (for troubleshooting) # pickle.dump(rtime,open("rtime.p","wb")) # pickle.dump(tic,open("tic.p","wb")) # pickle.dump(chroms,open("chroms.p","wb")) # pickle.dump(sp,open("sp.p","wb")) output() # write data to excel file if verbose is True: sys.stdout.write('\rUpdating paramters') sys.stdout.flush() xlfile.updatersimparams(sp) # update summing parameters if verbose is True: sys.stdout.write(' DONE\n') if verbose is True: sys.stdout.write('\rSaving "%s" (this may take some time)' % xlfile.bookname) sys.stdout.flush() xlfile.save() if verbose is True: sys.stdout.write(' DONE\n') if verbose is True: if verbose is True: sys.stdout.write('Plotting traces') if plot is True: plots() # plots for quick review if verbose is True: sys.stdout.write(' DONE\n') if verbose is True: stime.printelapsed()
plt.savefig('../{OUTFILE}.png'.format(OUTFILE=outputFile + str(minFilter)), bbox_inches='tight') ################################## ############################################################### # MAIN ############################################################### # _mzML processing variables filename = 'HZ-140516_HOTKEYMSMS 1376 II.raw' # raw or mzml file name fillzeros = True # fills spectrum with zeros decpl = 1 # number of decimal places to track mzrange = None # mzrange to track sr = 'all' # scan range to track mzml = mzML(filename, verbose=True) # EDESI Plot Production variable minFilter = 20 # minFilter intensity value threshold = 1156 # threshold of peak height for Breakdown tracing plotBreakdown = True # Construct Plot with Breakdown? plotZoom = True # Construct Plot with Zoom in region of interest? (Autozoom) msmsfns = [] for func in mzml.functions: # identify MSMS functions in the provided file if mzml.functions[func]['type'] == 'MS' and mzml.functions[func]['level'] > 1: msmsfns.append(func) if len(msmsfns) > 1: # if there is more than one msms function, ask the user which one to process sys.stdout.write( 'More than one MS/MS function is contained in this mzML file. Please indicate which one you wish to process:\nFunction\ttarget\n') for func in msmsfns:
} try: sys.stdout.write('Using figure preset "%s"\n' % (setting)) sys.stdout.flush() return ipsetting[typ] except KeyError: raise KeyError('\nThe specified figure setting "%s" is not defined.\nPlease check your spelling' % setting) if __name__ == '__main__': os.chdir(curdir) # change to current working directory keywords = presets(setting) # pull preset kwargs if spectrum.lower().endswith('.mzml.gz') or spectrum.lower().endswith('.raw'): # if supplied with a mass spec file mzml = mzML(spectrum, verbose=False) exp = mzml.sum_scans() keywords.update({'outname': mzml.filename.split('.')[0]}) # set default output filename else: # otherwise assume that it is an excel file xlfile = XLSX(spectrum, verbose=True) # load excel file if sheetname is None: # otherwise use the first sheet sheetname = xlfile.wb.sheetnames[0] exp = xlfile.pullspectrum(sheetname, skiplines=skiplines)[0] # load spectrum from first sheet in workbook keywords.update({ # set default output filename 'outname': f'{xlfile.bookname[:-5]} ({sheetname})', }) keywords.update(override) # apply any user overrides plot_mass_spectrum(exp, simdict, **keywords) import gc