Python mzML Examples

Programming Language: Python

Namespace/Package Name: _classes._mzML

Method/Function: mzML

Examples at hotexamples.com: 12

Python mzML - 12 examples found. These are the top rated real world Python examples of _classes._mzML.mzML extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: validate_distribution.py Project: Yeungdb/mass-spec-python-tools

def test_mzml():
    sys.stdout.write('Testing mzML class...')
    from _classes._mzML import mzML
    mzml = mzML('MultiTest', verbose=False)
    if mzml.functions.keys() != [1, 3, 4]:
        raise ValueError('Did not pull the correct functions')

    @mzml._foreachchrom
    def testperchrom(chromatogram):
        attr = mzml.attributes(chromatogram)
        return attr['id']

    if testperchrom() != [
            u'TIC', u'SRM SIC Q1=200 Q3=100 function=2 offset=0'
    ]:
        raise ValueError('For each chromatogram or attributes function failed')

    @mzml._foreachscan
    def testperspec(spectrum):
        p = mzml.cvparam(spectrum)
        return p["MS:1000016"]

    if testperspec() != [
            0.0171000008, 0.135733336, 0.254333347, 0.372983336, 0.491699994,
            0.0510833338, 0.169750005, 0.288383335, 0.407000005, 0.525833309,
            0.0847499967, 0.20341666, 0.322033346, 0.440683335
    ]:
        raise ValueError('For each scan or cvparam function failed')
    if sum(mzml.sum_scans()[1]) != 162806964:
        raise ValueError('sum_scans function failed')
    if sum((mzml[2])[1]) != 6742121:
        raise ValueError('scan indexing failed')
    if sum((mzml[0.01])[1]) != 56270834:
        raise ValueError('time indexing failed')
    sys.stdout.write(' PASS\n')

Example #2

Show file

File: validate_distribution.py Project: Yeungdb/mass-spec-python-tools

def test_mzml():
    sys.stdout.write('Testing mzML class...')
    from _classes._mzML import mzML
    mzml = mzML('MultiTest',verbose=False)
    if mzml.functions.keys() != [1,3,4]:
        raise ValueError('Did not pull the correct functions')
    @mzml._foreachchrom
    def testperchrom(chromatogram):
        attr = mzml.attributes(chromatogram)
        return attr['id']
    if testperchrom() != [u'TIC', u'SRM SIC Q1=200 Q3=100 function=2 offset=0']:
        raise ValueError('For each chromatogram or attributes function failed')
    @mzml._foreachscan
    def testperspec(spectrum):
        p = mzml.cvparam(spectrum)
        return p["MS:1000016"]
    if testperspec() != [0.0171000008, 0.135733336, 0.254333347, 0.372983336, 0.491699994, 0.0510833338, 0.169750005, 0.288383335, 0.407000005, 0.525833309, 0.0847499967, 0.20341666, 0.322033346, 0.440683335]:
        raise ValueError('For each scan or cvparam function failed')
    if sum(mzml.sum_scans()[1]) != 162806964:
        raise ValueError('sum_scans function failed')
    if sum((mzml[2])[1]) != 6742121:
        raise ValueError('scan indexing failed')
    if sum((mzml[0.01])[1]) != 56270834:
        raise ValueError('time indexing failed')
    sys.stdout.write(' PASS\n')

Example #3

Show file

File: spectrum binner.py Project: Yeungdb/mass-spec-python-tools

def sumspectra(filename,start=None, end=None,excel=None):
    """
    Sums spectra from raw file and outputs to excel file 
    
    input: filename, *kwargs
    filename:
        name of raw file
    sr:
        scan range to sum
        default 'all'
        specify with [start scan,end scan]
    """
    from _classes._mzML import mzML
    from _classes._XLSX import XLSX
    from _classes._ScriptTime import ScriptTime
    
    st = ScriptTime()
    st.printstart()
    mzml = mzML(filename) # create mzML object
    if start is None:
        start = mzml.functions[1]['sr'][0]+1
    if end is None:
        end = mzml.functions[1]['sr'][1]+1
    x,y = mzml.sum_scans(start=start,end=end)
    xlfile = XLSX(filename,create=True)
    xlfile.writespectrum(x,y,'summed spectra (scans %d-%d)' %(start,end))
    xlfile.save()
    st.printend()

Example #4

Show file

File: ED-ESI plot.py Project: Yeungdb/mass-spec-python-tools

    plt.savefig('../{OUTFILE}.png'.format(OUTFILE=outputFile + str(minFilter)),
                bbox_inches='tight')


##################################

###############################################################
#MAIN
###############################################################
#_mzML processing variables
filename = 'HZ-140516_HOTKEYMSMS 1376 II.raw'  # raw or mzml file name
fillzeros = True  # fills spectrum with zeros
decpl = 1  # number of decimal places to track
mzrange = None  # mzrange to track
sr = 'all'  # scan range to track
mzml = mzML(filename, verbose=True)

#EDESI Plot Production variable
minFilter = 20  # minFilter intensity value
threshold = 1156  # threshold of peak height for Breakdown tracing
plotBreakdown = True  # Construct Plot with Breakdown?
plotZoom = True  # Construct Plot with Zoom in region of interest? (Autozoom)

msmsfns = []
for func in mzml.functions:  # identify MSMS functions in the provided file
    if mzml.functions[func][
            'type'] == 'MS' and mzml.functions[func]['level'] > 1:
        msmsfns.append(func)
if len(
        msmsfns
) > 1:  # if there is more than one msms function, ask the user which one to process

Example #5

Show file

File: msms interpreter assistant.py Project: Yeungdb/mass-spec-python-tools

def mia(filename,dec=0):
    """MS/MS interpreter assistant"""
    def indexes(x,y, thres=0.3, min_dist=None):
        '''
        !!!!! based on PeakUtils https://bitbucket.org/lucashnegri/peakutils
        Peak detection routine.
    
        Finds the peaks in *y* by taking its first order difference. By using
        *thres* and *min_dist* parameters, it is possible to reduce the number of
        detected peaks. *y* must be signed.
    
        Parameters
        ----------
        x : list or ndarray
        y : list or ndarray (signed)
            1D amplitude data to search for peaks.
        thres : float between [0., 1.]
            Normalized threshold. Only the peaks with amplitude higher than the
            threshold will be detected.
        min_dist : int
            minimum x distance between each detected peak
    
        Returns
        -------
        ndarray
            Array containing the indexes of the peaks that were detected
        '''
        
        if isinstance(y, np.ndarray) and np.issubdtype(y.dtype, np.unsignedinteger):
            raise ValueError("y must be signed")
        if type(y) != np.ndarray: # converts to numpy array if not already
            y = np.asarray(y)
        thres = thres * (np.max(y) - np.min(y)) + np.min(y) # normalize threshold to y max
    
        # find the peaks by using the first order difference
        dy = np.diff(y) # generate a list of differences between data points
        peaks = np.where((np.hstack([dy, 0.]) < 0.)
                        & (np.hstack([0., dy]) > 0.)
                        & (y > thres))[0]
        
        if peaks.size > 1 and min_dist is not None: # if there are peaks and a minimum distance has been supplied
            highest = peaks[np.argsort(y[peaks])][::-1]
            rem = np.ones(y.size, dtype=bool)
            rem[peaks] = False
    
            for peak in highest:
                if not rem[peak]: # if the peak hasn't already been looked at
                    ind = x[peak]
                    l,r = max(0,np.searchsorted(x,ind-min_dist)),min(len(y)-1,np.searchsorted(x,ind+min_dist)) # find slice based on x values and min_dist
                    sl = slice(l,r) # create a slice object
                    #sl = slice(max(0, peak - min_dist), peak + min_dist + 1)
                    rem[sl] = True # set values in the slice to true
                    rem[peak] = False # set the peak to true
    
            peaks = np.arange(y.size)[~rem]
    
        return peaks
    
    def com_loss(dec=0,custom_losses=None):
        """takes a common loss dictionary and reduces the keys to the specified decimal place"""
        from _classes.common_losses import losses,stored_dec
        if dec > stored_dec:
            raise ValueError('The specified number of decimal places (%d) exceeds the number stored (%d)' %(dec,stored_dec))
        out = {}
        for key in losses: # round values and added to dictionary
            newkey = round(key,dec)
            if dec == 0:
                newkey = int(newkey)
            if out.has_key(newkey):
                out[newkey] += ', '
                out[newkey] += losses[key]
            else:
                out[newkey] = losses[key]
        if custom_losses is not None: # if supplied with a custom list of losses
            from _classes._Molecule import Molecule
            for item in custom_losses:
                mol = Molecule(item)
                key = round(mol.em,dec)
                if dec == 0:
                    key = int(key)
                if out.has_key(key):
                    out[key] += ', '
                    out[key] += item
                else:
                    out[key] = item
        return out
        
    def tabulate(diffs):
        """tabulates the data in the output"""
        
        string = '\t'
        for ind in inds:
            string += '%.1f\t' %x[ind]
        print string
        #string = ''
        for ind,row in enumerate(diffs):
            string = '%.1f\t' %round(x[inds[ind]],1)
            for col in diffs[ind]:
                string += '%.1f\t' %round(col,1)
            print string+'\n'
    
    def guess(diffs):
        """searches for common integer losses amoung the differences matrix and prints them"""
        loss = com_loss(0,specific_components) # grab dictionary of loss values and their probable representation
        print 'possible fragment assignments (from common losses):'
        for ind,peak in enumerate(diffs):
            for ind2,otherpeak in enumerate(diffs[ind]):
                val = int(round(otherpeak))
                if val > 0 and val in loss:
                    print `x[inds[ind]]`+' -> '+`x[inds[ind2]]`+':',val, loss[val]
    
    import numpy as np
    from _classes._mzML import mzML
    
    mzml = mzML(filename)
    x,y = mzml.sum_scans()
    
    # if not all peaks are being detected, decrease the last value handed to indexes
    inds = indexes(x,y,0.01,7)
    
    diffs = []
    for i in inds: # for each index
        difline = []
        for j in inds: # append the difference
            difline.append(x[i]-x[j])
        diffs.append(difline)
        
    tabulate(diffs) #tabulate differences in console
    guess(diffs) # guess at what the differences might mean
    
    annotations = {}
    top = max(y)
    for i in inds:
        annotations[str(x[i])] = [x[i],float(y[i])/float(top)*100.]
    from tome_v02 import plotms
    plotms([x,y],annotations=annotations,output='show')

Example #6

Show file

File: PyRSIR.py Project: Yeungdb/mass-spec-python-tools

def pyrsir(filename,xlsx,n,**kwargs):    
    def checkinteger(val,name):
        """
        This function checks that the supplied values are integers greater than 1
        
        A integer value that is non-negative is required for the summing function.
        Please check your input value. 
        """
        import sys
        if type(val) != list and type(val) != tuple: # if only one value given for n
            val = [val]
        for num in val:
            if type(num) != int:
                sys.exit('\nThe %s value (%s) is not an integer.\n%s' %(name,str(num),checkinteger.__doc__))
            if num < 1:
                sys.exit('\nThe %s value (%s) is less than 1.\n%s' %(name,str(num),checkinteger.__doc__))
        return val
    
    def plots():
        """
        Function for generating a set of plots for rapid visual assessment of the supplied n-level
        Outputs all MS species with the same sum level onto the same plot
        requirements: pylab as pl
        """
        import pylab as pl
        pl.clf() # clears and closes old figure (if still open)
        pl.close()
        nplots = len(n)+1
        
        # raw data
        pl.subplot(nplots,1,1) # top plot
        
        for mode in mskeys:
            modekey = 'raw'+mode
            if modekey in rtime.keys():
                pl.plot(rtime[modekey],tic[modekey], linewidth = 0.75, label = 'TIC') #plot tic
                for key in sp: # plot each species
                    if sp[key]['affin'] is mode:
                        pl.plot(rtime[modekey],sp[key]['raw'], linewidth=0.75, label=key)
        pl.title('Raw Data')
        pl.ylabel('Intensity')
        pl.tick_params(axis='x',labelbottom='off')
        
        # summed data
        loc = 2
        for num in n:
            pl.subplot(nplots,1,loc)
            sumkey = str(num)+'sum'
            for mode in mskeys:
                modekey = str(num)+'sum'+mode
                if modekey in rtime.keys():
                    pl.plot(rtime[modekey],tic[modekey], linewidth = 0.75, label = 'TIC') #plot tic
                    for key in sp:
                        if sp[key]['affin'] is mode: #if a MS species
                            pl.plot(rtime[modekey],sp[key][sumkey], linewidth=0.75, label=key)
            pl.title('Summed Data (n=%i)' %(num))
            pl.ylabel('Intensity')
            pl.tick_params(axis='x',labelbottom='off')
            loc+=1
        pl.tick_params(axis='x',labelbottom='on')
        pl.show()
  
    def output():
        """
        Writes the retrieved and calculated values to the excel workbook using the XLSX object
        """
        if newpeaks is True: # looks for and deletes any sheets where the data will be changed
            if ks['verbose'] is True:
                sys.stdout.write('Clearing duplicate XLSX sheets.')
            delete = []
            for key in newsp: # generate strings to look for in excel file
                delete.append('Raw Data ('+sp[key]['affin']+')')
                for num in n:
                    delete.append(str(num)+' Sum ('+sp[key]['affin']+')')
                    delete.append(str(num)+' Normalized ('+sp[key]['affin']+')')
            delete.append('Isotope Patterns')
            xlfile.removesheets(delete) # remove those sheets
            if ks['verbose'] is True:
                sys.stdout.write(' DONE.\n')
        
        if ks['verbose'] is True:
            sys.stdout.write('Writing to "%s"' %xlfile.bookname)
            sys.stdout.flush()
                
        for mode in mskeys: # write raw data to sheets
            modekey = 'raw'+mode
            if modekey in rtime.keys():
                sheetname = 'Raw Data ('+mode+')'
                xlfile.writersim(sp,rtime[modekey],'raw',sheetname,mode,tic[modekey])

        for num in n: # write summed and normalized data to sheets
            sumkey = str(num)+'sum'
            normkey = str(num)+'norm'
            for mode in mskeys:
                modekey = 'raw'+mode
                if modekey in rtime.keys():
                    if max(n) > 1: # if data were summed
                        sheetname = str(num)+' Sum ('+mode+')'
                        xlfile.writersim(sp,rtime[sumkey+mode],sumkey,sheetname,mode,tic[sumkey+mode]) # write summed data
                    sheetname = str(num)+' Normalized ('+mode+')'
                    xlfile.writersim(sp,rtime[sumkey+mode],normkey,sheetname,mode) # write normalized data
        
        for key,val in sorted(sp.items()): # write isotope patterns
            if sp[key]['affin'] in mskeys:
                xlfile.writemultispectrum(sp[key]['spectrum'][0],sp[key]['spectrum'][1],'m/z','intensity','Isotope Patterns',key)
        
        if rd is None:
            for key,val in sorted(chroms.items()): # write chromatograms
                xlfile.writemultispectrum(chroms[key]['x'],chroms[key]['y'],chroms[key]['xunit'],chroms[key]['yunit'],'Function Chromatograms',key)
        
        uvstuff = False
        for key in sp: # check for UV-Vis spectra
            if sp[key]['affin'] is 'UV':
                uvstuff = True
                break
        if uvstuff is True:
            for ind,val in enumerate(tic['rawUV']): # normalize the UV intensities
                tic['rawUV'][ind] = val/1000000.
            xlfile.writersim(sp,rtime['rawUV'],'raw','UV-Vis','UV',tic['rawUV']) # write UV-Vis data to sheet
        
        if sumspec is not None: # write all summed spectra
            for fn in sumspec:
                specname = '%s %s' %(mzml.functions[fn]['mode'],mzml.functions[fn]['level'])
                if mzml.functions[fn].has_key('target'):
                    specname += ' %.3f' %mzml.functions[fn]['target']
                specname += ' (%.3f-%.3f)' %(mzml.functions[fn]['window'][0],mzml.functions[fn]['window'][1])
                xlfile.writemultispectrum(sumspec[fn][0],sumspec[fn][1],'m/z','counts','Summed Spectra',specname)
            
        if ks['verbose'] is True:
            sys.stdout.write(' DONE\n')        
           
    def prepformula(dct):
        """looks for formulas in a dictionary and prepares them for pullspeciesdata"""
        for species in dct:
            if dct[species].has_key('affin') is False: # set affinity if not specified
                fn = dct[species]['function']
                if mzml.functions[fn]['type'] == 'MS':
                    dct[species]['affin'] = mzml.functions[fn]['mode']
                if mzml.functions[fn]['type'] == 'UV':
                    dct[species]['affin'] = 'UV'
            if dct[species].has_key('formula') and dct[species]['formula'] is not None:
                try:
                    dct[species]['mol'].res = res # sets resolution in Molecule object
                except NameError:
                    res = int(mzml.auto_resolution())
                    dct[species]['mol'].res = res
                dct[species]['mol'].sigma = dct[species]['mol'].sigmafwhm()[1] # recalculates sigma with new resolution
                dct[species]['bounds'] = dct[species]['mol'].bounds(0.95) # caclulates bounds
        return dct
    
    # ----------------------------------------------------------
    # -------------------PROGRAM BEGINS-------------------------
    # ----------------------------------------------------------
    ks = { # default keyword arguments
    'plot': True, # plot the data for a quick look
    'verbose': True, # chatty
    'bounds confidence': 0.99, # confidence interval for automatically generated bounds
    }
    if set(kwargs.keys()) - set(ks.keys()): # check for invalid keyword arguments
        string = ''
        for i in set(kwargs.keys()) - set(ks.keys()):
            string += ` i`
        raise KeyError('Unsupported keyword argument(s): %s' %string)
    ks.update(kwargs) # update defaules with provided keyword arguments
    
    global tome_v02,_ScriptTime,_mzML,_Spectrum,_Molecule,_XLSX
    from tome_v02 import bindata
    from _classes._ScriptTime import ScriptTime
    from _classes._mzML import mzML
    from _classes._Spectrum import Spectrum
    from _classes._Molecule import Molecule
    from _classes._XLSX import XLSX
    
    if ks['verbose'] is True:
        stime = ScriptTime()
        stime.printstart()
    
    n = checkinteger(n,'number of scans to sum') # checks integer input and converts to list
    
    if ks['verbose'] is True:
        sys.stdout.write('Loading processing parameters from excel file')
        sys.stdout.flush()
    xlfile = XLSX(xlsx)
    sp = xlfile.pullrsimparams()
    
    mskeys = ['+','-']
    for key in sp:
        if sp[key]['formula'] is not None: # if formula is specified
            sp[key]['mol'] = Molecule(sp[key]['formula']) # create Molecule object
            sp[key]['bounds'] = sp[key]['mol'].bounds(ks['bounds confidence']) # generate bounds from molecule object with this confidence interval
    if ks['verbose'] is True:
        sys.stdout.write(' DONE\n')
    
    
    rtime = {} # empty dictionaries for time and tic
    tic = {}
    rd = False
    for mode in mskeys: # look for existing positive and negative mode raw data
        try:
            modedata,modetime,modetic = xlfile.pullrsim('Raw Data ('+mode+')')
        except KeyError:
            continue
        if ks['verbose'] is True:
            sys.stdout.write('Existing (%s) mode raw data were found, grabbing those values.'%mode)
            sys.stdout.flush()
        rd = True # bool that rd is present
        modekey = 'raw'+mode
        sp.update(modedata) # update sp dictionary with raw data
        for key in modedata: # check for affinities
            if sp[key].has_key('affin') is False:
                sp[key]['affin'] = mode
        rtime[modekey] = list(modetime) # update time list
        tic[modekey] = list(modetic) # update tic list
        if ks['verbose'] is True:
            sys.stdout.write(' DONE\n')
    
    sp = prepformula(sp)
    newpeaks = False
    if rd is True:
        newsp = {}
        sumspec = None
        for key in sp: # checks whether there is a MS species that does not have raw data
            if sp[key].has_key('raw') is False:
                newsp[key] = sp[key] # create references in the namespace
        if len(newsp) is not 0:
            newpeaks = True
            if ks['verbose'] is True:
                sys.stdout.write('Some peaks are not in the raw data, extracting these from raw file.\n')
            ips = xlfile.pullmultispectrum('Isotope Patterns') # pull predefined isotope patterns and add them to species
            for species in ips: # set spectrum list
                sp[species]['spectrum'] = [ips[species]['x'],ips[species]['y']]
            mzml = mzML(filename) # load mzML class
            #newsp = prepformula(newsp) # prep formula species for summing
            for species in newsp:
                if newsp[species].has_key('spectrum') is False:
                    newsp[species]['spectrum'] = Spectrum(3,newsp[species]['bounds'][0],newsp[species]['bounds'][1])
            newsp = mzml.pull_species_data(newsp) # pull data
        else:
            if ks['verbose'] is True:
                sys.stdout.write('No new peaks were specified. Proceeding directly to summing and normalization.\n')
    
    if rd is False: # if no raw data is present, process mzML file
        mzml = mzML(filename,verbose=ks['verbose']) # load mzML class
        #sp = prepformula(sp)
        sp,sumspec = mzml.pull_species_data(sp,True) # pull relevant data from mzML
        chroms = mzml.pull_chromatograms() # pull chromatograms from mzML
        rtime = {}
        tic = {}
        for key in sp: # compare predicted isotope patterns to the real spectrum and save standard error of the regression
            func = sp[key]['function']
            if mzml.functions[func]['type'] == 'MS': # determine mode key
                sp[key]['spectrum'] = sumspec[sp[key]['function']].trim(xbounds=sp[key]['bounds']) # extract the spectrum object
                mode = 'raw'+mzml.functions[func]['mode']
            if mzml.functions[func]['type'] == 'UV':
                mode = 'rawUV'
            if mode not in rtime: # if rtime and tic have not been pulled from that function
                rtime[mode] = mzml.functions[func]['timepoints']
                tic[mode] = mzml.functions[func]['tic']
            if sp[key]['formula'] is not None:
                sp[key]['match'] = sp[key]['mol'].compare(sp[key]['spectrum'])
        for fn in sumspec:
            sumspec[fn] = sumspec[fn].trim() # convert Spectrum objects into x,y lists
    
    if max(n) > 1: # run combine functions if n > 1
        for num in n: # for each n to sum
            if ks['verbose'] is True:
                sys.stdout.write('\r%d Summing species traces.' %num)
            sumkey = str(num)+'sum'
            for ind,key in enumerate(sp): # bin each species
                if sp[key]['affin'] in mskeys or mzml.functions[sp[key]['function']]['type'] == 'MS': # if species is MS related
                    sp[key][sumkey] = bindata(num,1,sp[key]['raw'])
            for mode in mskeys: 
                sumkey = str(num)+'sum'+mode
                modekey = 'raw'+mode
                if modekey in rtime.keys(): # if there is data for that mode
                    rtime[sumkey] = bindata(num,num,rtime[modekey])
                    tic[sumkey] = bindata(num,1,tic[modekey])
        if ks['verbose'] is True:
            sys.stdout.write(' DONE\n')
            sys.stdout.flush()
    
    for num in n: # normalize each peak's chromatogram
        if ks['verbose'] is True:
            sys.stdout.write('\r%d Normalizing species traces.' %num)
            sys.stdout.flush()
        sumkey = str(num)+'sum'
        normkey = str(num)+'norm'
        for mode in mskeys:
            modekey = 'raw'+mode
            if modekey in rtime.keys(): # if there is data for that mode
                for key in sp: # for each species
                    if sp[key]['affin'] in mskeys or mzml.functions[sp[key]['function']]['type'] == 'MS': # if species has affinity
                        sp[key][normkey] = []
                        for ind,val in enumerate(sp[key][sumkey]):
                            #sp[key][normkey].append(val/(mzml.function[func]['tic'][ind]+0.01)) #+0.01 to avoid div/0 errors
                            sp[key][normkey].append(val/(tic[sumkey+sp[key]['affin']][ind]+0.01)) #+0.01 to avoid div/0 errors
    if ks['verbose'] is True:
        sys.stdout.write(' DONE\n')
    
    
    
    #import pickle #pickle objects (for troubleshooting)
    #pickle.dump(rtime,open("rtime.p","wb"))
    #pickle.dump(tic,open("tic.p","wb"))
    #pickle.dump(chroms,open("chroms.p","wb"))
    #pickle.dump(sp,open("sp.p","wb"))
    
    output() # write data to excel file
    #xlfile.updatersimparams(sp) # update summing parameters
    
    if ks['verbose'] is True:
        sys.stdout.write('\rSaving "%s" (this may take some time)' %xlfile.bookname)
        sys.stdout.flush()
    xlfile.save()
    if ks['verbose'] is True:
        sys.stdout.write(' DONE\n') 
    
    if ks['plot'] is True:
        if ks['verbose'] is True:
            sys.stdout.write('Plotting traces')
        plots() # plots for quick review
        if ks['verbose'] is True:
            sys.stdout.write(' DONE\n')
    if ks['verbose'] is True:
        stime.printelapsed()

Example #7

Show file

def pyrsir(filename, xlsx, n, **kwargs):
    def checkinteger(val, name):
        """
        This function checks that the supplied values are integers greater than 1
        
        A integer value that is non-negative is required for the summing function.
        Please check your input value. 
        """
        import sys
        if type(val) != list and type(
                val) != tuple:  # if only one value given for n
            val = [val]
        for num in val:
            if type(num) != int:
                sys.exit('\nThe %s value (%s) is not an integer.\n%s' %
                         (name, str(num), checkinteger.__doc__))
            if num < 1:
                sys.exit('\nThe %s value (%s) is less than 1.\n%s' %
                         (name, str(num), checkinteger.__doc__))
        return val

    def plots():
        """
        Function for generating a set of plots for rapid visual assessment of the supplied n-level
        Outputs all MS species with the same sum level onto the same plot
        requirements: pylab as pl
        """
        import pylab as pl
        pl.clf()  # clears and closes old figure (if still open)
        pl.close()
        nplots = len(n) + 1

        # raw data
        pl.subplot(nplots, 1, 1)  # top plot

        for mode in mskeys:
            modekey = 'raw' + mode
            if modekey in rtime.keys():
                pl.plot(rtime[modekey],
                        tic[modekey],
                        linewidth=0.75,
                        label='TIC')  #plot tic
                for key in sp:  # plot each species
                    if sp[key]['affin'] is mode:
                        pl.plot(rtime[modekey],
                                sp[key]['raw'],
                                linewidth=0.75,
                                label=key)
        pl.title('Raw Data')
        pl.ylabel('Intensity')
        pl.tick_params(axis='x', labelbottom='off')

        # summed data
        loc = 2
        for num in n:
            pl.subplot(nplots, 1, loc)
            sumkey = str(num) + 'sum'
            for mode in mskeys:
                modekey = str(num) + 'sum' + mode
                if modekey in rtime.keys():
                    pl.plot(rtime[modekey],
                            tic[modekey],
                            linewidth=0.75,
                            label='TIC')  #plot tic
                    for key in sp:
                        if sp[key]['affin'] is mode:  #if a MS species
                            pl.plot(rtime[modekey],
                                    sp[key][sumkey],
                                    linewidth=0.75,
                                    label=key)
            pl.title('Summed Data (n=%i)' % (num))
            pl.ylabel('Intensity')
            pl.tick_params(axis='x', labelbottom='off')
            loc += 1
        pl.tick_params(axis='x', labelbottom='on')
        pl.show()

    def output():
        """
        Writes the retrieved and calculated values to the excel workbook using the XLSX object
        """
        if newpeaks is True:  # looks for and deletes any sheets where the data will be changed
            if ks['verbose'] is True:
                sys.stdout.write('Clearing duplicate XLSX sheets.')
            delete = []
            for key in newsp:  # generate strings to look for in excel file
                delete.append('Raw Data (' + sp[key]['affin'] + ')')
                for num in n:
                    delete.append(str(num) + ' Sum (' + sp[key]['affin'] + ')')
                    delete.append(
                        str(num) + ' Normalized (' + sp[key]['affin'] + ')')
            delete.append('Isotope Patterns')
            xlfile.removesheets(delete)  # remove those sheets
            if ks['verbose'] is True:
                sys.stdout.write(' DONE.\n')

        if ks['verbose'] is True:
            sys.stdout.write('Writing to "%s"' % xlfile.bookname)
            sys.stdout.flush()

        for mode in mskeys:  # write raw data to sheets
            modekey = 'raw' + mode
            if modekey in rtime.keys():
                sheetname = 'Raw Data (' + mode + ')'
                xlfile.writersim(sp, rtime[modekey], 'raw', sheetname, mode,
                                 tic[modekey])

        for num in n:  # write summed and normalized data to sheets
            sumkey = str(num) + 'sum'
            normkey = str(num) + 'norm'
            for mode in mskeys:
                modekey = 'raw' + mode
                if modekey in rtime.keys():
                    if max(n) > 1:  # if data were summed
                        sheetname = str(num) + ' Sum (' + mode + ')'
                        xlfile.writersim(
                            sp, rtime[sumkey + mode], sumkey, sheetname, mode,
                            tic[sumkey + mode])  # write summed data
                    sheetname = str(num) + ' Normalized (' + mode + ')'
                    xlfile.writersim(sp, rtime[sumkey + mode], normkey,
                                     sheetname, mode)  # write normalized data

        for key, val in sorted(sp.items()):  # write isotope patterns
            if sp[key]['affin'] in mskeys:
                xlfile.writemultispectrum(sp[key]['spectrum'][0],
                                          sp[key]['spectrum'][1], 'm/z',
                                          'intensity', 'Isotope Patterns', key)

        if rd is None:
            for key, val in sorted(chroms.items()):  # write chromatograms
                xlfile.writemultispectrum(chroms[key]['x'], chroms[key]['y'],
                                          chroms[key]['xunit'],
                                          chroms[key]['yunit'],
                                          'Function Chromatograms', key)

        uvstuff = False
        for key in sp:  # check for UV-Vis spectra
            if sp[key]['affin'] is 'UV':
                uvstuff = True
                break
        if uvstuff is True:
            for ind, val in enumerate(
                    tic['rawUV']):  # normalize the UV intensities
                tic['rawUV'][ind] = val / 1000000.
            xlfile.writersim(sp, rtime['rawUV'], 'raw', 'UV-Vis', 'UV',
                             tic['rawUV'])  # write UV-Vis data to sheet

        if sumspec is not None:  # write all summed spectra
            for fn in sumspec:
                specname = '%s %s' % (mzml.functions[fn]['mode'],
                                      mzml.functions[fn]['level'])
                if mzml.functions[fn].has_key('target'):
                    specname += ' %.3f' % mzml.functions[fn]['target']
                specname += ' (%.3f-%.3f)' % (mzml.functions[fn]['window'][0],
                                              mzml.functions[fn]['window'][1])
                xlfile.writemultispectrum(sumspec[fn][0], sumspec[fn][1],
                                          'm/z', 'counts', 'Summed Spectra',
                                          specname)

        if ks['verbose'] is True:
            sys.stdout.write(' DONE\n')

    def prepformula(dct):
        """looks for formulas in a dictionary and prepares them for pullspeciesdata"""
        for species in dct:
            if dct[species].has_key(
                    'affin') is False:  # set affinity if not specified
                fn = dct[species]['function']
                if mzml.functions[fn]['type'] == 'MS':
                    dct[species]['affin'] = mzml.functions[fn]['mode']
                if mzml.functions[fn]['type'] == 'UV':
                    dct[species]['affin'] = 'UV'
            if dct[species].has_key(
                    'formula') and dct[species]['formula'] is not None:
                try:
                    dct[species][
                        'mol'].res = res  # sets resolution in Molecule object
                except NameError:
                    res = int(mzml.auto_resolution())
                    dct[species]['mol'].res = res
                dct[species]['mol'].sigma = dct[species]['mol'].sigmafwhm()[
                    1]  # recalculates sigma with new resolution
                dct[species]['bounds'] = dct[species]['mol'].bounds(
                    0.95)  # caclulates bounds
        return dct

    # ----------------------------------------------------------
    # -------------------PROGRAM BEGINS-------------------------
    # ----------------------------------------------------------
    ks = {  # default keyword arguments
        'plot': True,  # plot the data for a quick look
        'verbose': True,  # chatty
        'bounds confidence':
        0.99,  # confidence interval for automatically generated bounds
    }
    if set(kwargs.keys()) - set(
            ks.keys()):  # check for invalid keyword arguments
        string = ''
        for i in set(kwargs.keys()) - set(ks.keys()):
            string += ` i `
        raise KeyError('Unsupported keyword argument(s): %s' % string)
    ks.update(kwargs)  # update defaules with provided keyword arguments

    global tome_v02, _ScriptTime, _mzML, _Spectrum, _Molecule, _XLSX
    from tome_v02 import bindata
    from _classes._ScriptTime import ScriptTime
    from _classes._mzML import mzML
    from _classes._Spectrum import Spectrum
    from _classes._Molecule import Molecule
    from _classes._XLSX import XLSX

    if ks['verbose'] is True:
        stime = ScriptTime()
        stime.printstart()

    n = checkinteger(
        n,
        'number of scans to sum')  # checks integer input and converts to list

    if ks['verbose'] is True:
        sys.stdout.write('Loading processing parameters from excel file')
        sys.stdout.flush()
    xlfile = XLSX(xlsx)
    sp = xlfile.pullrsimparams()

    mskeys = ['+', '-']
    for key in sp:
        if sp[key]['formula'] is not None:  # if formula is specified
            sp[key]['mol'] = Molecule(
                sp[key]['formula'])  # create Molecule object
            sp[key]['bounds'] = sp[key]['mol'].bounds(
                ks['bounds confidence']
            )  # generate bounds from molecule object with this confidence interval
    if ks['verbose'] is True:
        sys.stdout.write(' DONE\n')

    rtime = {}  # empty dictionaries for time and tic
    tic = {}
    rd = False
    for mode in mskeys:  # look for existing positive and negative mode raw data
        try:
            modedata, modetime, modetic = xlfile.pullrsim('Raw Data (' + mode +
                                                          ')')
        except KeyError:
            continue
        if ks['verbose'] is True:
            sys.stdout.write(
                'Existing (%s) mode raw data were found, grabbing those values.'
                % mode)
            sys.stdout.flush()
        rd = True  # bool that rd is present
        modekey = 'raw' + mode
        sp.update(modedata)  # update sp dictionary with raw data
        for key in modedata:  # check for affinities
            if sp[key].has_key('affin') is False:
                sp[key]['affin'] = mode
        rtime[modekey] = list(modetime)  # update time list
        tic[modekey] = list(modetic)  # update tic list
        if ks['verbose'] is True:
            sys.stdout.write(' DONE\n')

    sp = prepformula(sp)
    newpeaks = False
    if rd is True:
        newsp = {}
        sumspec = None
        for key in sp:  # checks whether there is a MS species that does not have raw data
            if sp[key].has_key('raw') is False:
                newsp[key] = sp[key]  # create references in the namespace
        if len(newsp) is not 0:
            newpeaks = True
            if ks['verbose'] is True:
                sys.stdout.write(
                    'Some peaks are not in the raw data, extracting these from raw file.\n'
                )
            ips = xlfile.pullmultispectrum(
                'Isotope Patterns'
            )  # pull predefined isotope patterns and add them to species
            for species in ips:  # set spectrum list
                sp[species]['spectrum'] = [
                    ips[species]['x'], ips[species]['y']
                ]
            mzml = mzML(filename)  # load mzML class
            #newsp = prepformula(newsp) # prep formula species for summing
            for species in newsp:
                if newsp[species].has_key('spectrum') is False:
                    newsp[species]['spectrum'] = Spectrum(
                        3, newsp[species]['bounds'][0],
                        newsp[species]['bounds'][1])
            newsp = mzml.pull_species_data(newsp)  # pull data
        else:
            if ks['verbose'] is True:
                sys.stdout.write(
                    'No new peaks were specified. Proceeding directly to summing and normalization.\n'
                )

    if rd is False:  # if no raw data is present, process mzML file
        mzml = mzML(filename, verbose=ks['verbose'])  # load mzML class
        #sp = prepformula(sp)
        sp, sumspec = mzml.pull_species_data(
            sp, True)  # pull relevant data from mzML
        chroms = mzml.pull_chromatograms()  # pull chromatograms from mzML
        rtime = {}
        tic = {}
        for key in sp:  # compare predicted isotope patterns to the real spectrum and save standard error of the regression
            func = sp[key]['function']
            if mzml.functions[func]['type'] == 'MS':  # determine mode key
                sp[key]['spectrum'] = sumspec[sp[key]['function']].trim(
                    xbounds=sp[key]['bounds'])  # extract the spectrum object
                mode = 'raw' + mzml.functions[func]['mode']
            if mzml.functions[func]['type'] == 'UV':
                mode = 'rawUV'
            if mode not in rtime:  # if rtime and tic have not been pulled from that function
                rtime[mode] = mzml.functions[func]['timepoints']
                tic[mode] = mzml.functions[func]['tic']
            if sp[key]['formula'] is not None:
                sp[key]['match'] = sp[key]['mol'].compare(sp[key]['spectrum'])
        for fn in sumspec:
            sumspec[fn] = sumspec[fn].trim(
            )  # convert Spectrum objects into x,y lists

    if max(n) > 1:  # run combine functions if n > 1
        for num in n:  # for each n to sum
            if ks['verbose'] is True:
                sys.stdout.write('\r%d Summing species traces.' % num)
            sumkey = str(num) + 'sum'
            for ind, key in enumerate(sp):  # bin each species
                if sp[key]['affin'] in mskeys or mzml.functions[
                        sp[key]
                    ['function']]['type'] == 'MS':  # if species is MS related
                    sp[key][sumkey] = bindata(num, 1, sp[key]['raw'])
            for mode in mskeys:
                sumkey = str(num) + 'sum' + mode
                modekey = 'raw' + mode
                if modekey in rtime.keys():  # if there is data for that mode
                    rtime[sumkey] = bindata(num, num, rtime[modekey])
                    tic[sumkey] = bindata(num, 1, tic[modekey])
        if ks['verbose'] is True:
            sys.stdout.write(' DONE\n')
            sys.stdout.flush()

    for num in n:  # normalize each peak's chromatogram
        if ks['verbose'] is True:
            sys.stdout.write('\r%d Normalizing species traces.' % num)
            sys.stdout.flush()
        sumkey = str(num) + 'sum'
        normkey = str(num) + 'norm'
        for mode in mskeys:
            modekey = 'raw' + mode
            if modekey in rtime.keys():  # if there is data for that mode
                for key in sp:  # for each species
                    if sp[key]['affin'] in mskeys or mzml.functions[
                            sp[key]['function']][
                                'type'] == 'MS':  # if species has affinity
                        sp[key][normkey] = []
                        for ind, val in enumerate(sp[key][sumkey]):
                            #sp[key][normkey].append(val/(mzml.function[func]['tic'][ind]+0.01)) #+0.01 to avoid div/0 errors
                            sp[key][normkey].append(
                                val / (tic[sumkey + sp[key]['affin']][ind] +
                                       0.01))  #+0.01 to avoid div/0 errors
    if ks['verbose'] is True:
        sys.stdout.write(' DONE\n')

    #import pickle #pickle objects (for troubleshooting)
    #pickle.dump(rtime,open("rtime.p","wb"))
    #pickle.dump(tic,open("tic.p","wb"))
    #pickle.dump(chroms,open("chroms.p","wb"))
    #pickle.dump(sp,open("sp.p","wb"))

    output()  # write data to excel file
    #xlfile.updatersimparams(sp) # update summing parameters

    if ks['verbose'] is True:
        sys.stdout.write('\rSaving "%s" (this may take some time)' %
                         xlfile.bookname)
        sys.stdout.flush()
    xlfile.save()
    if ks['verbose'] is True:
        sys.stdout.write(' DONE\n')

    if ks['plot'] is True:
        if ks['verbose'] is True:
            sys.stdout.write('Plotting traces')
        plots()  # plots for quick review
        if ks['verbose'] is True:
            sys.stdout.write(' DONE\n')
    if ks['verbose'] is True:
        stime.printelapsed()

Example #8

Show file

File: msms interpreter assistant.py Project: Yeungdb/mass-spec-python-tools

def mia(filename, dec=0):
    """MS/MS interpreter assistant"""
    def indexes(x, y, thres=0.3, min_dist=None):
        '''
        !!!!! based on PeakUtils https://bitbucket.org/lucashnegri/peakutils
        Peak detection routine.
    
        Finds the peaks in *y* by taking its first order difference. By using
        *thres* and *min_dist* parameters, it is possible to reduce the number of
        detected peaks. *y* must be signed.
    
        Parameters
        ----------
        x : list or ndarray
        y : list or ndarray (signed)
            1D amplitude data to search for peaks.
        thres : float between [0., 1.]
            Normalized threshold. Only the peaks with amplitude higher than the
            threshold will be detected.
        min_dist : int
            minimum x distance between each detected peak
    
        Returns
        -------
        ndarray
            Array containing the indexes of the peaks that were detected
        '''

        if isinstance(y, np.ndarray) and np.issubdtype(y.dtype,
                                                       np.unsignedinteger):
            raise ValueError("y must be signed")
        if type(y) != np.ndarray:  # converts to numpy array if not already
            y = np.asarray(y)
        thres = thres * (np.max(y) - np.min(y)) + np.min(
            y)  # normalize threshold to y max

        # find the peaks by using the first order difference
        dy = np.diff(y)  # generate a list of differences between data points
        peaks = np.where((np.hstack([dy, 0.]) < 0.)
                         & (np.hstack([0., dy]) > 0.)
                         & (y > thres))[0]

        if peaks.size > 1 and min_dist is not None:  # if there are peaks and a minimum distance has been supplied
            highest = peaks[np.argsort(y[peaks])][::-1]
            rem = np.ones(y.size, dtype=bool)
            rem[peaks] = False

            for peak in highest:
                if not rem[peak]:  # if the peak hasn't already been looked at
                    ind = x[peak]
                    l, r = max(0, np.searchsorted(x, ind - min_dist)), min(
                        len(y) - 1, np.searchsorted(x, ind + min_dist)
                    )  # find slice based on x values and min_dist
                    sl = slice(l, r)  # create a slice object
                    #sl = slice(max(0, peak - min_dist), peak + min_dist + 1)
                    rem[sl] = True  # set values in the slice to true
                    rem[peak] = False  # set the peak to true

            peaks = np.arange(y.size)[~rem]

        return peaks

    def com_loss(dec=0, custom_losses=None):
        """takes a common loss dictionary and reduces the keys to the specified decimal place"""
        from _classes.common_losses import losses, stored_dec
        if dec > stored_dec:
            raise ValueError(
                'The specified number of decimal places (%d) exceeds the number stored (%d)'
                % (dec, stored_dec))
        out = {}
        for key in losses:  # round values and added to dictionary
            newkey = round(key, dec)
            if dec == 0:
                newkey = int(newkey)
            if out.has_key(newkey):
                out[newkey] += ', '
                out[newkey] += losses[key]
            else:
                out[newkey] = losses[key]
        if custom_losses is not None:  # if supplied with a custom list of losses
            from _classes._Molecule import Molecule
            for item in custom_losses:
                mol = Molecule(item)
                key = round(mol.em, dec)
                if dec == 0:
                    key = int(key)
                if out.has_key(key):
                    out[key] += ', '
                    out[key] += item
                else:
                    out[key] = item
        return out

    def tabulate(diffs):
        """tabulates the data in the output"""

        string = '\t'
        for ind in inds:
            string += '%.1f\t' % x[ind]
        print string
        #string = ''
        for ind, row in enumerate(diffs):
            string = '%.1f\t' % round(x[inds[ind]], 1)
            for col in diffs[ind]:
                string += '%.1f\t' % round(col, 1)
            print string + '\n'

    def guess(diffs):
        """searches for common integer losses amoung the differences matrix and prints them"""
        loss = com_loss(
            0, specific_components
        )  # grab dictionary of loss values and their probable representation
        print 'possible fragment assignments (from common losses):'
        for ind, peak in enumerate(diffs):
            for ind2, otherpeak in enumerate(diffs[ind]):
                val = int(round(otherpeak))
                if val > 0 and val in loss:
                    print ` x[inds[ind]] ` + ' -> ' + ` x[
                        inds[ind2]] ` + ':', val, loss[val]

    import numpy as np
    from _classes._mzML import mzML

    mzml = mzML(filename)
    x, y = mzml.sum_scans()

    # if not all peaks are being detected, decrease the last value handed to indexes
    inds = indexes(x, y, 0.01, 7)

    diffs = []
    for i in inds:  # for each index
        difline = []
        for j in inds:  # append the difference
            difline.append(x[i] - x[j])
        diffs.append(difline)

    tabulate(diffs)  #tabulate differences in console
    guess(diffs)  # guess at what the differences might mean

    annotations = {}
    top = max(y)
    for i in inds:
        annotations[str(x[i])] = [x[i], float(y[i]) / float(top) * 100.]
    from tome_v02 import plotms
    plotms([x, y], annotations=annotations, output='show')

Example #9

Show file

File: uv-vis plotter.py Project: Yeungdb/mass-spec-python-tools

# override settings here
override = {
    #'fs':16, # font size
    #'lw':1.5, # line width of traces
    #'size':[7.87,4.87], # image size [width,length] in inches
    #'xrange':[500,700], # wavelength bounds (in nm)
    #'yrange':[0,3], # absorbance bounds(in a.u.)
    #'legloc':0, # legend location (see ttp://matplotlib.org/api/legend_api.html for more location codes)
}

if __name__ == '__main__':
    from _classes._mzML import mzML
    from scipy import arange
    from tome_v02 import locateinlist, plotuv
    mzml = mzML(filename, ftt=True)  # initiate mzml object
    fn = mzml.associate_to_function(
        'UV')  # determine which function contains UV-Vis data
    uvspecs = mzml.retrieve_scans(start, end, fn)  # pull uv spectra
    wavelengths = list(uvspecs[0][0])  # wavelength list
    uvspecs = [y
               for x, y in uvspecs]  # set uvspecs list to be only the y values
    timepoints = mzml.functions[fn][
        'timepoints']  # pull time points of the UV function
    l, r = locateinlist(timepoints, start, 'greater'), locateinlist(
        timepoints, end, 'lesser')  # locate indicies of timepoints
    timepoints = timepoints[l:r + 1]  # trim time list accordingly
    times = arange(start, end,
                   deltat)  # evenly spaced times between start and end

    specin = []

Example #10

Show file

File: ED-ESI plot.py Project: Yeungdb/mass-spec-python-tools

        
        debugPrint(EDESIkwargs['debug'], "Breakdown End")

    plt.savefig('../{OUTFILE}.png'.format(OUTFILE = outputFile+str(minFilter)), bbox_inches='tight')
##################################

###############################################################
#MAIN
###############################################################
#_mzML processing variables
filename = 'HZ-140516_HOTKEYMSMS 1376 II.raw' # raw or mzml file name
fillzeros = True # fills spectrum with zeros
decpl = 1 # number of decimal places to track
mzrange = None # mzrange to track
sr = 'all' # scan range to track
mzml = mzML(filename,verbose=True)

#EDESI Plot Production variable
minFilter = 20 # minFilter intensity value
threshold = 1156 # threshold of peak height for Breakdown tracing
plotBreakdown = True # Construct Plot with Breakdown?
plotZoom = True # Construct Plot with Zoom in region of interest? (Autozoom)

msmsfns = []
for func in mzml.functions: # identify MSMS functions in the provided file
    if mzml.functions[func]['type'] == 'MS' and mzml.functions[func]['level'] > 1:
        msmsfns.append(func)
if len(msmsfns) > 1: # if there is more than one msms function, ask the user which one to process
    sys.stdout.write('More than one MS/MS function is contained in this mzML file. Please indicate which one you wish to process:\nFunction\ttarget\n')
    for func in msmsfns:
        sys.stdout.write('%d\t%.3f\n' %(func,mzml.functions[func]['target']))

Example #11

Show file

File: uv-vis plotter.py Project: Yeungdb/mass-spec-python-tools

# override settings here
override = {
#'fs':16, # font size
#'lw':1.5, # line width of traces
#'size':[7.87,4.87], # image size [width,length] in inches
#'xrange':[500,700], # wavelength bounds (in nm)
#'yrange':[0,3], # absorbance bounds(in a.u.)
#'legloc':0, # legend location (see ttp://matplotlib.org/api/legend_api.html for more location codes)
}

if __name__ == '__main__':
    from _classes._mzML import mzML
    from scipy import arange
    from tome_v02 import locateinlist,plotuv
    mzml = mzML(filename,ftt=True) # initiate mzml object
    fn = mzml.associate_to_function('UV') # determine which function contains UV-Vis data
    uvspecs = mzml.retrieve_scans(start,end,fn) # pull uv spectra
    wavelengths = list(uvspecs[0][0]) # wavelength list
    uvspecs = [y for x,y in uvspecs] # set uvspecs list to be only the y values
    timepoints = mzml.functions[fn]['timepoints'] # pull time points of the UV function
    l,r = locateinlist(timepoints,start,'greater'),locateinlist(timepoints,end,'lesser') # locate indicies of timepoints
    timepoints = timepoints[l:r+1] # trim time list accordingly
    times = arange(start,end,deltat) # evenly spaced times between start and end
    
    specin = []
    for time in times:
        ind = locateinlist(timepoints,time) # find the closest time to that
        specin.append(uvspecs[ind]) # append that spectrum to the input list
    
    if override.has_key('outname') is False:

Example #12

Show file

File: video frame renderer.py Project: Yeungdb/mass-spec-python-tools

def spectrumtrace(filename,sp,scr='all',n=1,mz='all',inj=0.,save=1):
    """
    plots a mass spectrum and an intensity trace for every scan in a raw file based on the parameters supplied
    
    filename: the *.raw filename in the working directory to work from
    sp: dictionary of species to track and render
    scr: scan range to sum
        [start,end]
    n: number of scans to sum
        integer
    mz: mz range to track
        [m/z start, m/z end]
    inj: injection point (e.g. for catalyst injection)
        float
    save: save every # number of scans
        integer
    """
    from tome_v02 import bindata,binnspectra
    from _classes._mzML import mzML
    from _classes._ScriptTime import ScriptTime
    from _classes._Spectrum import Spectrum
    from _classes._Colour import Colour
    from bisect import bisect_left ,bisect_right
    import pylab as pl
    import os,sys
    
    st = ScriptTime(profile=True)
    
    @st.profilefn
    def plotit(x,y,index):
        """
        generates plot
        input: x and y of mass spectrum, index of current time point
        """
        fig = pl.figure(figsize = (9.6,5.4),dpi= 100) # set figure size to 1920x1080
        font = {'fontname':'Arial'} #font parameters for axis/text labels
        tickfont = pl.matplotlib.font_manager.FontProperties(family='Arial',size=fs) # font parameters for axis ticks
    
        axl = fig.add_subplot(121) # left subplot (mass spectrum)
        axl.spines["right"].set_visible(False)
        axl.spines["top"].set_visible(False)
        axl.spines["bottom"].set_visible(False)
        axl.plot(x,y, 'k-', lw=0.75)
        
        pl.xlabel('m/z', style='italic',**font)
        pl.ylabel('Relative Intensity',**font)
        for axis in ["top","bottom","left","right"]:
            axl.spines[axis].set_linewidth(axwidth)
        axl.spines["bottom"].set_position(('axes',-0.01)) #offset x axis
        for label in axl.get_yticklabels():
            label.set_fontproperties(tickfont)
        for label in axl.get_xticklabels():
            label.set_fontproperties(tickfont)    
        pl.tick_params(axis='y', length=axwidth*3, width=axwidth, direction='out',right='off')
        pl.tick_params(axis='x', length=axwidth*3, width=axwidth, direction='out',top='off')
        for key in sp: #ind,val in enumerate(sp):
            l,r = bisect_left(x,sp[key]['bounds'][0]),bisect_right(x,sp[key]['bounds'][1]) # index location of selected peak in spectrum
            axl.plot(x[l:r],y[l:r],color = sp[key]['colour'], lw=1) # plot spectrum in colour for selected peaks
            axl.text(sp[key]['bounds'][0],1.01,key,fontsize=fs,color=sp[key]['colour'])
            
        pl.xlim(mz)
        pl.ylim([-0.001,1])
        
        axr = fig.add_subplot(122) # right subplot (chromatogram)
        pl.xlim([mintime,maxtime])
        pl.ylim([-0.001,1])
        axr.spines["right"].set_visible(False)
        axr.spines["top"].set_visible(False)
        pl.tick_params(axis='y', length=axwidth*3, width=axwidth, direction='out',right='off')
        pl.tick_params(axis='x', length=axwidth*3, width=axwidth, direction='out',top='off')
        
        for mode in mskeys: 
            sumkey = str(n)+'sum'+mode
            spkey = str(n)+'norm'
            for key in sp:
                if sp[key]['affin'] is mode: # pair species with appropriate rtime
                    axr.plot(rtime[sumkey][:index],sp[key][spkey][:index], linewidth = 1.0, label = key, color = sp[key]['colour'])
        pl.xlabel('time (min)',fontsize = fs, **font)
        pl.tick_params(axis='y',labelleft='off')
        for label in axr.get_yticklabels():
            label.set_fontproperties(tickfont)
        for label in axr.get_xticklabels():
            label.set_fontproperties(tickfont)
        
        for key in timepoints: # add vertical timepoint lines
            if maxtime+inj >= timepoints[key]: # and rtime[0] <= timepoints[ind][1]
                pl.axvline(x=(timepoints[key]-inj), ymin = 0, ymax = 1, linewidth=0.75, color = 'b', linestyle = ':')
                pl.text(timepoints[key]-inj,0.5,key, fontsize = fs, color = 'b', backgroundcolor = 'w', rotation = 'vertical', horizontalalignment='center',verticalalignment='center',alpha = 0.75,**font)
        textx = maxtime - (maxtime-mintime)*infop # calculate location for scan number and time text
        pl.text(textx,0.96, 'scan %i' %curspec,fontsize=fs,**font) # text for scan number
        pl.text(textx,0.92, '%.1f min' %maxtime,fontsize=fs,**font) # text for time
        pl.subplots_adjust(left = 0.07, right = 0.99, bottom = 0.095, top = 0.96, wspace = 0.06, hspace = 0.05) # hard coded subplot tightening
        #pl.tight_layout(pad=0.75) # automatically tighten subplots
        dpiset = 200 # 100 is 960x540, 150dpi is 1440x810, 200dpi is 1920x1080
        pl.savefig(os.getcwd()+r'\imgs\scan'+str(itr)[2:6]+'.png',figsize = (19.2,10.8),dpi=dpiset)
        pl.clf()
        pl.close()
    
    @st.profilefn
    def msfignorm(x,y):
        """
        Normalizes the height of a mass spectrum
        The height will be the sum of the heights of the base peaks in the window
        
        The function will normalize the y-values (assumes intensity) and return them
        """
        height = 0 # starting point
        for key in sp:
            l,r = bisect_right(x,sp[key]['bounds'][0]),bisect_left(x,sp[key]['bounds'][1]) # index location of selected peak in spectrum
            try:
                height += max(y[l:r]) # add maximum in selected region to height
            except ValueError: # if no intensity in region
                height += 0.01    
        
        for ind,val in enumerate(y): #normalizes all y values
            y[ind] = val/height
        
        return y
    
    def timelimits(index):
        """
        finds the appropriate time limits for the traces
        """
        mintime = 10000
        maxtime = -10000
        for mode in mskeys:
            sumkey = str(n)+'sum'+mode
            if sumkey in rtime.keys():
                if rtime[sumkey][0] < mintime:
                    mintime = rtime[sumkey][0]
                if index == 0:
                    index +=1
                if rtime[sumkey][index] > maxtime:
                    maxtime = rtime[sumkey][index]
        return mintime,maxtime
    
    st.printstart()
    
    # axis line width
    axwidth = 1.0
    # fontsize
    fs = 12
    # left-right scalar for scan info placement
    infop = 0.2
    
    if save < n: # if the script is told to save more often than it sums
        save = n
    
    mskeys = ['+','-']
    for key in sp: # append list places for chrom, summed chrom, and normalized chrom
        sp[key]['raw'] = []
        sp[key]['spectrum'] = Spectrum(3,startmz=sp[key]['bounds'][0],endmz=sp[key]['bounds'][1])
        sp[key]['%s' %(str(n)+'sum')] = []
        sp[key]['%s' %(str(n)+'norm')] = []
    
    mzml = mzML(filename) # load mzML class
    
    sp,TIC,rtime = mzml.pullspeciesdata(sp) # integrate species
    spec,sr,mz = mzml.pullspectra(mzrange=mz) # pull all spectra
    
    # run combine, regardless if called for (in order for keys to be correct
    #if n > 1: # run combine functions if n > 1
    sys.stdout.write('%s summing and normalizing species traces' %str(n))
    sumkey = str(n)+'sum'
    normkey = str(n)+'norm'
    sumsp = []
    for key in sp:
        sp[key][sumkey] = bindata(n,1,sp[key]['raw']) # bin each species
        sp[key]['colour'] = Colour(sp[key]['colour']).mpl # convert colour into matplotlib format
        for ind,val in enumerate(sp[key][sumkey]): # for normalization
            try:
                sumsp[ind] += val
            except IndexError:
                sumsp.append(val)
       
    for mode in mskeys: 
        sumkey = str(n)+'sum'+mode
        modekey = 'raw'+mode
        if modekey in rtime.keys(): # if there is data for that mode
            rtime[sumkey] = bindata(n,n,rtime[modekey])
            for ind,val in enumerate(rtime[sumkey]):
                rtime[sumkey][ind] = val - inj # shift time data to zero at injection point
            TIC[sumkey] = bindata(n,1,TIC[modekey])
            for key in sp: # for each species
                if sp[key]['affin'] in mskeys: # if species has affinity
                    spkey = str(n)+'sum'
                    sp[key][normkey] = []
                    for ind,val in enumerate(sp[key][spkey]):
                        sp[key][normkey].append(val/(sumsp[ind]+0.01)) #+0.01 to avoid div/0 errors
    sys.stdout.write(' DONE\n')
    sys.stdout.flush()

    spec = binnspectra(spec,n,startmz=mz[0],endmz=mz[1]) # bin mass spectra

    if os.path.isdir('imgs') == False: # check for /img directory and create if missing
        os.makedirs('imgs')
    for ind,val in enumerate(spec):
        curspec = ind*n+1
        if curspec >= scr[0] and curspec <= scr[1]: # if index is within scanrange to output
            sys.stdout.write('\rRendering scan #%i %.1f%% (scan range: %i to %i)' %(curspec,(float(curspec)-float(scr[0]))/(float(scr[1])-float(scr[0]))*100.,scr[0],scr[1]))
            val[1] = msfignorm(val[0],val[1]) # normalize spectrum
            
            itr = str(100000+curspec)
            mintime,maxtime = timelimits(ind)
            plotit(val[0],val[1],ind)
    sys.stdout.write(' DONE\n')
    st.printend()
    st.printprofiles()