def setUp(self): self.mol = Molecule('L2PdAr+I') self.ipmol = IPMolecule( 'L2PdAr+I', ipmethod='multiplicative', dropmethod='threshold', threshold=0.01, )
def test_ipmolecule_methods(self): for ipmethod in VALID_IPMETHODS: for dropmethod in VALID_DROPMETHODS: mol = IPMolecule( 'Pd2C10H5', ipmethod=ipmethod, dropmethod=dropmethod, ) test = mol.gaussian_isotope_pattern # test gaussian isotope pattern generation
def test_ipmolecule(self): self.assertEqual( self.ipmol.estimated_exact_mass, 1109.1303706381723, ) self.assertEqual( self.ipmol.barip, [[ 1105.130443, 1106.133823749481, 1107.1290292337153, 1108.1305157201678, 1109.1303706381723, 1110.1328590930914, 1111.1301978511672, 1112.1325950611867, 1113.1318575059308, 1114.134086933976, 1115.1370272665604, 1116.140052, 1117.143407 ], [ 2.287794397621507, 1.5228133756325326, 25.476059354316945, 66.8193866193291, 100.0, 52.65050639843156, 74.88108058795096, 42.5730473226288, 39.36707265932168, 20.17253048748261, 5.990476280101723, 1.1848920932846654, 0.16082254122736006 ]]) self.ipmol - 'PPh3' # test subtraction self.ipmol + 'PPh3' # test addition mol2 = IPMolecule('N(Et)2(CH2(13C)H2(2H))2') self.ipmol + mol2 # test class addition
def pyrsir( filename, xlsx, n, plot=True, # plot the data for a quick look verbose=True, # chatty bounds_confidence=0.99, # combine_spectra=True, # whether or not to output a summed spectrum return_data=False, # ): """ A method for generating reconstructed single ion monitoring traces. :param filename: path to mzML or raw file to process :param xlsx: path to excel file with correctly formatted columns :param n: number of scans to sum together (for binning algorithm) :param plot: whether to plot and show the data for a quick look :param verbose: chatty mode :param bounds_confidence: confidence interval for automatically generated bounds (only applicable if molecular formulas are provided). :param combine_spectra: whether to output a summed spectrum :param return_data: whether to return data (if the data from the function is required by another function) :return: """ def check_integer(val, name): """ This function checks that the supplied values are integers greater than 1 A integer value that is non-negative is required for the summing function. Please check your input value. """ if type(val) != list and type( val) != tuple: # if only one value given for n val = [val] for num in val: if type(num) != int: sys.exit('\nThe %s value (%s) is not an integer.\n%s' % (name, str(num), check_integer.__doc__)) if num < 1: sys.exit('\nThe %s value (%s) is less than 1.\n%s' % (name, str(num), check_integer.__doc__)) return val def plots(): """ Function for generating a set of plots for rapid visual assessment of the supplied n-level Outputs all MS species with the same sum level onto the same plot requirements: pylab as pl """ pl.clf() # clears and closes old figure (if still open) pl.close() nplots = len(n) + 1 # raw data pl.subplot(nplots, 1, 1) # top plot for mode in mskeys: modekey = 'raw' + mode if modekey in rtime.keys(): pl.plot(rtime[modekey], tic[modekey], linewidth=0.75, label='TIC') # plot tic for key in sp: # plot each species if sp[key]['affin'] is mode: pl.plot(rtime[modekey], sp[key]['raw'], linewidth=0.75, label=key) pl.title('Raw Data') pl.ylabel('Intensity') pl.tick_params(axis='x', labelbottom='off') # summed data loc = 2 for num in n: pl.subplot(nplots, 1, loc) sumkey = str(num) + 'sum' for mode in mskeys: modekey = str(num) + 'sum' + mode if modekey in rtime.keys(): pl.plot(rtime[modekey], tic[modekey], linewidth=0.75, label='TIC') # plot tic for key in sp: if sp[key]['affin'] is mode: # if a MS species pl.plot(rtime[modekey], sp[key][sumkey], linewidth=0.75, label=key) pl.title('Summed Data (n=%i)' % (num)) pl.ylabel('Intensity') pl.tick_params(axis='x', labelbottom='off') loc += 1 pl.tick_params(axis='x', labelbottom='on') pl.show() def output(): """ Writes the retrieved and calculated values to the excel workbook using the XLSX object """ if newpeaks is True: # looks for and deletes any sheets where the data will be changed if verbose is True: sys.stdout.write('Clearing duplicate XLSX sheets.') delete = [] for key in newsp: # generate strings to look for in excel file delete.append('Raw Data (' + sp[key]['affin'] + ')') for num in n: delete.append(str(num) + ' Sum (' + sp[key]['affin'] + ')') delete.append( str(num) + ' Normalized (' + sp[key]['affin'] + ')') delete.append('Isotope Patterns') xlfile.removesheets(delete) # remove those sheets if verbose is True: sys.stdout.write(' DONE.\n') if verbose is True: sys.stdout.write('Writing to "%s"' % xlfile.bookname) sys.stdout.flush() for mode in mskeys: # write raw data to sheets modekey = 'raw' + mode if modekey in rtime.keys(): sheetname = 'Raw Data (' + mode + ')' xlfile.writersim(sp, rtime[modekey], 'raw', sheetname, mode, tic[modekey]) for num in n: # write summed and normalized data to sheets sumkey = str(num) + 'sum' normkey = str(num) + 'norm' for mode in mskeys: modekey = 'raw' + mode if modekey in rtime.keys(): if max(n) > 1: # if data were summed sheetname = str(num) + ' Sum (' + mode + ')' xlfile.writersim( sp, rtime[sumkey + mode], sumkey, sheetname, mode, tic[sumkey + mode]) # write summed data sheetname = str(num) + ' Normalized (' + mode + ')' xlfile.writersim(sp, rtime[sumkey + mode], normkey, sheetname, mode) # write normalized data for key, val in sorted(sp.items()): # write isotope patterns if sp[key]['affin'] in mskeys: xlfile.writemultispectrum( sp[key]['spectrum'][0], # x values sp[key]['spectrum'][1], # y values key, # name of the spectrum xunit='m/z', # x unit yunit='Intensity (counts)', # y unit sheetname='Isotope Patterns', # sheet name chart=True, # output excel chart ) if rd is None: for key, val in sorted(chroms.items()): # write chromatograms xlfile.writemultispectrum(chroms[key]['x'], chroms[key]['y'], chroms[key]['xunit'], chroms[key]['yunit'], 'Function Chromatograms', key) uvstuff = False for key in sp: # check for UV-Vis spectra if sp[key]['affin'] is 'UV': uvstuff = True break if uvstuff is True: for ind, val in enumerate( tic['rawUV']): # normalize the UV intensities tic['rawUV'][ind] = val / 1000000. xlfile.writersim(sp, rtime['rawUV'], 'raw', 'UV-Vis', 'UV', tic['rawUV']) # write UV-Vis data to sheet if sum_spectra is not None: # write all summed spectra for fn in sum_spectra: specname = '%s %s' % (mzml.functions[fn]['mode'], mzml.functions[fn]['level']) if 'target' in mzml.functions[fn]: specname += ' %.3f' % mzml.functions[fn]['target'] specname += ' (%.3f-%.3f)' % (mzml.functions[fn]['window'][0], mzml.functions[fn]['window'][1]) xlfile.writemultispectrum( sum_spectra[fn][0], # x values sum_spectra[fn][1], # y values specname, # name of the spectrum xunit='m/z', # x unit yunit='Intensity (counts)', # y unit sheetname='Summed Spectra', # sheet name chart=True, # output excel chart ) if verbose is True: sys.stdout.write(' DONE\n') def prepformula(dct): """looks for formulas in a dictionary and prepares them for pullspeciesdata""" for species in dct: if 'affin' not in dct[species]: # set affinity if not specified fn = dct[species]['function'] if mzml.functions[fn]['type'] == 'MS': dct[species]['affin'] = mzml.functions[fn]['mode'] if mzml.functions[fn]['type'] == 'UV': dct[species]['affin'] = 'UV' if 'formula' in dct[species] and dct[species][ 'formula'] is not None: try: dct[species][ 'mol'].res = res # sets resolution in Molecule object except NameError: res = int(mzml.auto_resolution()) dct[species]['mol'].res = res # dct[species]['mol'].sigma = dct[species]['mol'].sigmafwhm()[1] # recalculates sigma with new resolution dct[species]['bounds'] = dct[species][ 'mol'].bounds # caclulates bounds return dct # ---------------------------------------------------------- # -------------------PROGRAM BEGINS------------------------- # ---------------------------------------------------------- if verbose is True: stime = ScriptTime() stime.printstart() n = check_integer( n, 'number of scans to sum') # checks integer input and converts to list if type(xlsx) != dict: if verbose is True: sys.stdout.write('Loading processing parameters from excel file') sys.stdout.flush() xlfile = XLSX(xlsx, verbose=verbose) sp = xlfile.pullrsimparams() else: # if parameters were provided in place of an excel file sp = xlsx mskeys = ['+', '-'] for key in sp: if 'formula' in sp[key] and sp[key][ 'formula'] is not None: # if formula is specified sp[key]['mol'] = IPMolecule( sp[key]['formula']) # create Molecule object sp[key]['bounds'] = sp[key]['mol'].calculate_bounds( bounds_confidence ) # generate bounds from molecule object with this confidence interval if verbose is True: sys.stdout.write(' DONE\n') rtime = {} # empty dictionaries for time and tic tic = {} rd = False for mode in mskeys: # look for existing positive and negative mode raw data try: modedata, modetime, modetic = xlfile.pullrsim('Raw Data (' + mode + ')') except KeyError: continue except UnboundLocalError: # catch for if pyrsir was not handed an excel file continue if verbose is True: sys.stdout.write( 'Existing (%s) mode raw data were found, grabbing those values.' % mode) sys.stdout.flush() rd = True # bool that rd is present modekey = 'raw' + mode sp.update(modedata) # update sp dictionary with raw data for key in modedata: # check for affinities if 'affin' not in sp[key]: sp[key]['affin'] = mode rtime[modekey] = list(modetime) # update time list tic[modekey] = list(modetic) # update tic list if verbose is True: sys.stdout.write(' DONE\n') # sp = prepformula(sp) newpeaks = False if rd is True: newsp = {} sum_spectra = None for key in sp: # checks whether there is a MS species that does not have raw data if 'raw' not in sp[key]: newsp[key] = sp[key] # create references in the namespace if len(newsp) is not 0: newpeaks = True if verbose is True: sys.stdout.write( 'Some peaks are not in the raw data, extracting these from raw file.\n' ) ips = xlfile.pullmultispectrum( 'Isotope Patterns' ) # pull predefined isotope patterns and add them to species for species in ips: # set spectrum list sp[species]['spectrum'] = [ ips[species]['x'], ips[species]['y'] ] mzml = mzML(filename) # load mzML class sp = prepformula(sp) # prep formula etc for summing newsp = prepformula(newsp) # prep formula species for summing for species in newsp: if 'spectrum' not in newsp[species]: newsp[species]['spectrum'] = Spectrum( 3, newsp[species]['bounds'][0], newsp[species]['bounds'][1]) newsp = mzml.pull_species_data(newsp) # pull data else: if verbose is True: sys.stdout.write( 'No new peaks were specified. Proceeding directly to summing and normalization.\n' ) if rd is False: # if no raw data is present, process mzML file mzml = mzML(filename, verbose=verbose) # load mzML class sp = prepformula(sp) sp, sum_spectra = mzml.pull_species_data( sp, combine_spectra) # pull relevant data from mzML chroms = mzml.pull_chromatograms() # pull chromatograms from mzML rtime = {} tic = {} for key in sp: # compare predicted isotope patterns to the real spectrum and save standard error of the regression func = sp[key]['function'] if mzml.functions[func]['type'] == 'MS': # determine mode key if combine_spectra is True: sp[key]['spectrum'] = sum_spectra[ sp[key]['function']].trim( xbounds=sp[key] ['bounds']) # extract the spectrum object mode = 'raw' + mzml.functions[func]['mode'] if mzml.functions[func]['type'] == 'UV': mode = 'rawUV' if mode not in rtime: # if rtime and tic have not been pulled from that function rtime[mode] = mzml.functions[func]['timepoints'] tic[mode] = mzml.functions[func]['tic'] # if 'formula' in sp[key] and sp[key]['formula'] is not None: # sp[key]['match'] = sp[key]['mol'].compare(sp[key]['spectrum']) if combine_spectra is True: for fn in sum_spectra: sum_spectra[fn] = sum_spectra[fn].trim( ) # convert Spectrum objects into x,y lists # if max(n) > 1: # run combine functions if n > 1 for num in n: # for each n to sum if verbose is True: sys.stdout.write('\r%d Summing species traces.' % num) sumkey = str(num) + 'sum' for key in sp: # bin each species if sp[key]['affin'] in mskeys or mzml.functions[sp[key][ 'function']]['type'] == 'MS': # if species is MS related sp[key][sumkey] = bindata(num, sp[key]['raw']) for mode in mskeys: sumkey = str(num) + 'sum' + mode modekey = 'raw' + mode if modekey in rtime.keys(): # if there is data for that mode rtime[sumkey] = bindata(num, rtime[modekey], num) tic[sumkey] = bindata(num, tic[modekey]) if verbose is True: sys.stdout.write(' DONE\n') sys.stdout.flush() # else: # for key in sp: # create key for normalization # sp[key]['1sum'] = sp[key]['raw'] for num in n: # normalize each peak's chromatogram if verbose is True: sys.stdout.write('\r%d Normalizing species traces.' % num) sys.stdout.flush() sumkey = str(num) + 'sum' normkey = str(num) + 'norm' for mode in mskeys: modekey = 'raw' + mode if modekey in rtime.keys(): # if there is data for that mode for key in sp: # for each species if sp[key]['affin'] in mskeys or mzml.functions[ sp[key]['function']][ 'type'] == 'MS': # if species has affinity sp[key][normkey] = [] for ind, val in enumerate(sp[key][sumkey]): # sp[key][normkey].append(val/(mzml.function[func]['tic'][ind]+0.01)) #+0.01 to avoid div/0 errors sp[key][normkey].append( val / (tic[sumkey + sp[key]['affin']][ind] + 0.01)) # +0.01 to avoid div/0 errors if verbose is True: sys.stdout.write(' DONE\n') if return_data is True: # if data is to be used by another function, return the calculated data return mzml, sp, rtime, tic, chroms # import pickle #pickle objects (for troubleshooting) # pickle.dump(rtime,open("rtime.p","wb")) # pickle.dump(tic,open("tic.p","wb")) # pickle.dump(chroms,open("chroms.p","wb")) # pickle.dump(sp,open("sp.p","wb")) output() # write data to excel file if verbose is True: sys.stdout.write('\rUpdating paramters') sys.stdout.flush() xlfile.updatersimparams(sp) # update summing parameters if verbose is True: sys.stdout.write(' DONE\n') if verbose is True: sys.stdout.write('\rSaving "%s" (this may take some time)' % xlfile.bookname) sys.stdout.flush() xlfile.save() if verbose is True: sys.stdout.write(' DONE\n') if verbose is True: if verbose is True: sys.stdout.write('Plotting traces') if plot is True: plots() # plots for quick review if verbose is True: sys.stdout.write(' DONE\n') if verbose is True: stime.printelapsed()
from pythoms.molecule import IPMolecule kwargs = { 'dropmethod': 'threshold' } Ar = 'Ar+' X = 'I' arunit = 'C6H4' cap = 'Ph' n = 6 print(Ar) print('aromatic units') print('n\tmass') for i in range(1, n): print('%d\t%.2f' % (i, IPMolecule(Ar + (arunit * i) + X, **kwargs).estimated_exact_mass)) print('\nPd units') print('n\tmass') for i in range(1, n): print('%d\t%.2f' % (i, IPMolecule('L2Pd' + Ar + (arunit * i) + X, **kwargs).estimated_exact_mass)) print('\ncapped') print('n\tmass') for i in range(0, n + 3): print('%d\t%.2f' % (i, IPMolecule(cap + Ar + (arunit * i), **kwargs).estimated_exact_mass))