def test_mzml(self):
        mzml = mzML(
            os.path.join(validation_file_path, 'MultiTest'),
            verbose=False
        )
        self.assertEqual(  # check that the correct function keys were pulled
            mzml.functions.keys(),
            {1, 3, 4},
        )

        @mzml.foreachchrom
        def testperchrom(chromatogram):
            attr = branch_attributes(chromatogram)
            return attr['id']

        self.assertEqual(  # test chromatogram decorator
            testperchrom(),
            [u'TIC', u'SRM SIC Q1=200 Q3=100 function=2 offset=0']
        )

        @mzml.foreachscan
        def testperspec(spectrum):
            p = branch_cvparams(spectrum)
            return p["MS:1000016"].value

        self.assertEqual(  # test spectrum decorator
            testperspec(),
            [0.0171000008, 0.135733336, 0.254333347, 0.372983336, 0.491699994, 0.0510833338, 0.169750005,
             0.288383335, 0.407000005, 0.525833309, 0.0847499967, 0.20341666, 0.322033346, 0.440683335]
        )

        self.assertEqual(  # test intensity summing
            sum(mzml.sum_scans()[1]),
            162804754.0
        )

        self.assertEqual(  # test scan indexing
            sum((mzml[2])[1]),
            6742121
        )

        self.assertEqual(  # test time indexing
            sum((mzml[0.01])[1]),
            56270834
        )
def bin_spectra(filename, start=None, end=None, save=True, dec=3, function=None):
    """
    Sums spectra from raw file and outputs to excel file

    :param filename: raw or mzML filename
    :param start: start scan (None will default to 1)
    :param end: end scan (None will default to the last scan)
    :param save: whether to save into an excel document (if a string is provided, that filename will be used)
    :param dec: decimal places to track when binning the spectrum
    :param function: mzml function number to sum (usually this is 1)
    :return: paired x, summed y lists
    """

    st = ScriptTime()
    st.printstart()
    mzml = mzML(filename)  # create mzML object
    if function is None:
        function = mzml.associate_to_function()
    if start is None:
        start = mzml.functions[function]['sr'][0] + 1
    if end is None:
        end = mzml.functions[function]['sr'][1] + 1
    x, y = mzml.sum_scans(
        start=start,
        end=end,
        function=function,
        dec=dec,
    )
    if save is not False:
        if type(save) == str:  # if a filename was provided for the Excel file
            xlfile = XLSX(save, create=True)
        else:  # otherwise use the mzML filename
            xlfile = XLSX(
                f'{filename}.xlsx',
                create=True
            )
        xlfile.writespectrum(  # write the spectrum to file
            x,
            y,
            'summed spectra (scans %d-%d)' % (start, end)
        )
        xlfile.save()
    st.printend()
    return x, y  # return if specified
Beispiel #3
0
def bin_spectra(filename,
                start=None,
                end=None,
                save=True,
                dec=3,
                function=None):
    """
    Sums spectra from raw file and outputs to excel file

    :param filename: raw or mzML filename
    :param start: start scan (None will default to 1)
    :param end: end scan (None will default to the last scan)
    :param save: whether to save into an excel document (if a string is provided, that filename will be used)
    :param dec: decimal places to track when binning the spectrum
    :param function: mzml function number to sum (usually this is 1)
    :return: paired x, summed y lists
    """

    st = ScriptTime()
    st.printstart()
    mzml = mzML(filename)  # create mzML object
    if function is None:
        function = mzml.associate_to_function()
    if start is None:
        start = mzml.functions[function]['sr'][0] + 1
    if end is None:
        end = mzml.functions[function]['sr'][1] + 1
    x, y = mzml.sum_scans(
        start=start,
        end=end,
        function=function,
        dec=dec,
    )
    if save is not False:
        if type(save) == str:  # if a filename was provided for the Excel file
            xlfile = XLSX(save, create=True)
        else:  # otherwise use the mzML filename
            xlfile = XLSX(f'{filename}.xlsx', create=True)
        xlfile.writespectrum(  # write the spectrum to file
            x, y, 'summed spectra (scans %d-%d)' % (start, end))
        xlfile.save()
    st.printend()
    return x, y  # return if specified
Beispiel #4
0
    def test_mzml(self):
        mzml = mzML(os.path.join(validation_file_path, 'MultiTest'),
                    verbose=False)
        self.assertEqual(  # check that the correct function keys were pulled
            mzml.functions.keys(),
            {1, 3, 4},
        )

        @mzml.foreachchrom
        def testperchrom(chromatogram):
            attr = branch_attributes(chromatogram)
            return attr['id']

        self.assertEqual(  # test chromatogram decorator
            testperchrom(),
            [u'TIC', u'SRM SIC Q1=200 Q3=100 function=2 offset=0'])

        @mzml.foreachscan
        def testperspec(spectrum):
            p = branch_cvparams(spectrum)
            return p["MS:1000016"].value

        self.assertEqual(  # test spectrum decorator
            testperspec(), [
                0.0171000008, 0.135733336, 0.254333347, 0.372983336,
                0.491699994, 0.0510833338, 0.169750005, 0.288383335,
                0.407000005, 0.525833309, 0.0847499967, 0.20341666,
                0.322033346, 0.440683335
            ])

        self.assertEqual(  # test intensity summing
            sum(mzml.sum_scans()[1]), 162804754.0)

        self.assertEqual(  # test scan indexing
            sum((mzml[2])[1]), 6742121)

        self.assertEqual(  # test time indexing
            sum((mzml[0.01])[1]), 56270834)
        sys.stdout.flush()
        return ipsetting[typ]
    except KeyError:
        raise KeyError(
            '\nThe specified figure setting "%s" is not defined.\nPlease check your spelling'
            % setting)


if __name__ == '__main__':
    os.chdir(curdir)  # change to current working directory

    keywords = presets(setting)  # pull preset kwargs

    if spectrum.lower().endswith('.mzml.gz') or spectrum.lower().endswith(
            '.raw'):  # if supplied with a mass spec file
        mzml = mzML(spectrum)
        exp = mzml.sum_scans()
        keywords.update({'outname': mzml.filename.split('.')[0]
                         })  # set default output filename

    else:  # otherwise assume that it is an excel file
        xlfile = XLSX(spectrum, verbose=True)  # load excel file
        if sheetname is None:  # otherwise use the first sheet
            sheetname = xlfile.wb.sheetnames[0]
        exp = xlfile.pullspectrum(sheetname, skiplines=skiplines)[
            0]  # load spectrum from first sheet in workbook
        keywords.update({  # set default output filename
            'outname': f'{xlfile.bookname[:-5]} ({sheetname})',
        })

    keywords.update(override)  # apply any user overrides
def mia(filename, show=True, specific_components=None, write=True, save=False):
    """
    MS/MS fragmentation interpreter assistant

    :param filename: mass spectrum file to parse
    :param show: whether to show the annotated spectrum
    :param specific_components: dictionary of the molecular formula of specific components in the mixture
    :param write: whether to write the output to console
    :param save: whether to save the results to an excel file
    """
    if specific_components is None:
        specific_components = []

    mzml = mzML(filename)  # load the mzML
    x, y = mzml.sum_scans()  # sum all the scans

    # if not all peaks are being detected, decrease the last value handed to indexes
    inds = indexes(x, y, 0.01, 7)

    diffs = []
    for i in inds:  # for each index
        difline = []
        for j in inds:  # append the difference
            difline.append(x[i] - x[j])
        diffs.append(difline)

    loss = com_loss(*specific_components)
    guesses = []
    for ind, peak in enumerate(diffs):
        for ind2, otherpeak in enumerate(diffs[ind]):
            val = int(round(otherpeak))
            if val > 0 and val in loss:
                guesses.append([x[inds[ind]], x[inds[ind2]], val, loss[val]])

    # print the results to console if specified
    if write is True:
        string = '\t'
        for ind in inds:
            string += f'{x[ind]:.1f}\t'
        sys.stdout.write(string + '\n')
        for ind, row in enumerate(diffs):
            string = f'{round(x[inds[ind]], 1):.1f}\t'
            for col in diffs[ind]:
                string += f'{round(col, 1):.1f}\t'
            sys.stdout.write(string + '\n')

        sys.stdout.write(
            '\nPossible fragment assignments (from common losses):\n')
        for a, b, val, change in guesses:
            sys.stdout.write(f'{a} -> {b}: {val} {change}\n')

    annotations = {}
    top = max(y)
    for i in inds:
        annotations[str(x[i])] = [x[i], float(y[i]) / float(top) * 100.]
    if show is True:
        from pythoms.tome import plot_mass_spectrum
        plot_mass_spectrum([x, y], annotations=annotations, output='show')

    if save is True:
        from pythoms.xlsx import XLSX
        xlfile = XLSX(filename, create=True)
        xlfile.writespectrum(
            x,
            y,
            'MSMS',
            # norm=False, # don't normalized data
            # chart=False, # don't save basic chart to sheet
        )
        cs = xlfile.wb.get_sheet_by_name('MSMS')
        cs.cell(row=1, column=6).value = 'differences'
        for ind, val in enumerate(
                inds):  # write column headers using cell references
            cs[xlfile.inds_to_cellname(
                0, 6 + ind)] = f'={xlfile.inds_to_cellname(val, 0)}'  # across
            cs[xlfile.inds_to_cellname(
                1 + ind, 5)] = f'={xlfile.inds_to_cellname(val, 0)}'  # down
        for ind, val in enumerate(
                inds):  # write differences based on cell references
            for ind2, val2 in enumerate(inds):
                cs[xlfile.inds_to_cellname(1 + ind, 6 + ind2)] = f'={xlfile.inds_to_cellname(val, 0)}' \
                                                                 f'-{xlfile.inds_to_cellname(val2, 0)}'  # value
                cs[xlfile.inds_to_cellname(
                    1 + ind, 6 + ind2)].number_format = '0'  # number format

        # write guesses
        cs[xlfile.inds_to_cellname(3 + ind, 5)] = 'from'
        cs[xlfile.inds_to_cellname(3 + ind, 6)] = 'to'
        cs[xlfile.inds_to_cellname(3 + ind, 7)] = 'difference'
        cs[xlfile.inds_to_cellname(3 + ind, 8)] = 'guess'
        for i, val in enumerate(guesses):
            cs[xlfile.inds_to_cellname(4 + ind + i, 5)] = val[0]
            cs[xlfile.inds_to_cellname(4 + ind + i, 6)] = val[1]
            cs[xlfile.inds_to_cellname(4 + ind + i, 7)] = val[2]
            cs[xlfile.inds_to_cellname(4 + ind + i, 8)] = val[3]

        xlfile.save()
Beispiel #7
0
# time spacing between the traces
deltat = 10

# override settings here
override = {
    # 'fs':16, # font size
    # 'lw':1.5, # line width of traces
    # 'size':[7.87,4.87], # image size [width,length] in inches
    # 'xrange':[500,700], # wavelength bounds (in nm)
    # 'yrange':[0,3], # absorbance bounds(in a.u.)
    # 'legloc':0, # legend location (see ttp://matplotlib.org/api/legend_api.html for more location codes)
}

if __name__ == '__main__':
    mzml = mzML(filename, ftt=True)  # initiate mzml object
    fn = mzml.associate_to_function(
        'UV')  # determine which function contains UV-Vis data
    uvspecs = mzml.retrieve_scans(start, end, fn)  # pull uv spectra
    wavelengths = list(uvspecs[0][0])  # wavelength list
    uvspecs = [y
               for x, y in uvspecs]  # set uvspecs list to be only the y values
    timepoints = mzml.functions[fn][
        'timepoints']  # pull time points of the UV function
    l, r = locate_in_list(timepoints, start, 'greater'), locate_in_list(
        timepoints, end, 'lesser')  # locate indicies of timepoints
    timepoints = timepoints[l:r + 1]  # trim time list accordingly
    times = arange(start, end,
                   deltat)  # evenly spaced times between start and end

    specin = []
        sys.stdout.flush()
        return ipsetting[typ]
    except KeyError:
        raise KeyError(
            '\nThe specified figure setting "%s" is not defined.\nPlease check your spelling'
            % setting)


if __name__ == '__main__':
    os.chdir(curdir)  # change to current working directory

    keywords = presets(setting)  # pull preset kwargs

    if spectrum.lower().endswith('.mzml.gz') or spectrum.lower().endswith(
            '.raw'):  # if supplied with a mass spec file
        mzml = mzML(spectrum, verbose=False)
        exp = mzml.sum_scans()
        keywords.update({'outname': mzml.filename.split('.')[0]
                         })  # set default output filename

    else:  # otherwise assume that it is an excel file
        xlfile = XLSX(spectrum, verbose=True)  # load excel file
        if sheetname is None:  # otherwise use the first sheet
            sheetname = xlfile.wb.sheetnames[0]
        exp = xlfile.pullspectrum(sheetname, skiplines=skiplines)[
            0]  # load spectrum from first sheet in workbook
        keywords.update({  # set default output filename
            'outname': f'{xlfile.bookname[:-5]} ({sheetname})',
        })

    keywords.update(override)  # apply any user overrides
# time spacing between the traces
deltat = 10

# override settings here
override = {
    # 'fs':16, # font size
    # 'lw':1.5, # line width of traces
    # 'size':[7.87,4.87], # image size [width,length] in inches
    # 'xrange':[500,700], # wavelength bounds (in nm)
    # 'yrange':[0,3], # absorbance bounds(in a.u.)
    # 'legloc':0, # legend location (see ttp://matplotlib.org/api/legend_api.html for more location codes)
}

if __name__ == '__main__':
    mzml = mzML(filename, ftt=True)  # initiate mzml object
    fn = mzml.associate_to_function('UV')  # determine which function contains UV-Vis data
    uvspecs = mzml.retrieve_scans(start, end, fn)  # pull uv spectra
    wavelengths = list(uvspecs[0][0])  # wavelength list
    uvspecs = [y for x, y in uvspecs]  # set uvspecs list to be only the y values
    timepoints = mzml.functions[fn]['timepoints']  # pull time points of the UV function
    l, r = locate_in_list(timepoints, start, 'greater'), locate_in_list(timepoints, end,
                                                                    'lesser')  # locate indicies of timepoints
    timepoints = timepoints[l:r + 1]  # trim time list accordingly
    times = arange(start, end, deltat)  # evenly spaced times between start and end

    specin = []
    for time in times:
        ind = locate_in_list(timepoints, time)  # find the closest time to that
        specin.append(uvspecs[ind])  # append that spectrum to the input list
Beispiel #10
0
def pyrsir(
        filename,
        xlsx,
        n,
        plot=True,  # plot the data for a quick look
        verbose=True,  # chatty
        bounds_confidence=0.99,  #
        combine_spectra=True,  # whether or not to output a summed spectrum
        return_data=False,  #
):
    """
    A method for generating reconstructed single ion monitoring traces.

    :param filename: path to mzML or raw file to process
    :param xlsx: path to excel file with correctly formatted columns
    :param n: number of scans to sum together (for binning algorithm)
    :param plot: whether to plot and show the data for a quick look
    :param verbose: chatty mode
    :param bounds_confidence: confidence interval for automatically generated bounds (only applicable if molecular
        formulas are provided).
    :param combine_spectra: whether to output a summed spectrum
    :param return_data: whether to return data (if the data from the function is required by another function)
    :return:
    """
    def check_integer(val, name):
        """
        This function checks that the supplied values are integers greater than 1
        
        A integer value that is non-negative is required for the summing function.
        Please check your input value. 
        """
        if type(val) != list and type(
                val) != tuple:  # if only one value given for n
            val = [val]
        for num in val:
            if type(num) != int:
                sys.exit('\nThe %s value (%s) is not an integer.\n%s' %
                         (name, str(num), check_integer.__doc__))
            if num < 1:
                sys.exit('\nThe %s value (%s) is less than 1.\n%s' %
                         (name, str(num), check_integer.__doc__))
        return val

    def plots():
        """
        Function for generating a set of plots for rapid visual assessment of the supplied n-level
        Outputs all MS species with the same sum level onto the same plot
        requirements: pylab as pl
        """
        pl.clf()  # clears and closes old figure (if still open)
        pl.close()
        nplots = len(n) + 1

        # raw data
        pl.subplot(nplots, 1, 1)  # top plot

        for mode in mskeys:
            modekey = 'raw' + mode
            if modekey in rtime.keys():
                pl.plot(rtime[modekey],
                        tic[modekey],
                        linewidth=0.75,
                        label='TIC')  # plot tic
                for key in sp:  # plot each species
                    if sp[key]['affin'] is mode:
                        pl.plot(rtime[modekey],
                                sp[key]['raw'],
                                linewidth=0.75,
                                label=key)
        pl.title('Raw Data')
        pl.ylabel('Intensity')
        pl.tick_params(axis='x', labelbottom='off')

        # summed data
        loc = 2
        for num in n:
            pl.subplot(nplots, 1, loc)
            sumkey = str(num) + 'sum'
            for mode in mskeys:
                modekey = str(num) + 'sum' + mode
                if modekey in rtime.keys():
                    pl.plot(rtime[modekey],
                            tic[modekey],
                            linewidth=0.75,
                            label='TIC')  # plot tic
                    for key in sp:
                        if sp[key]['affin'] is mode:  # if a MS species
                            pl.plot(rtime[modekey],
                                    sp[key][sumkey],
                                    linewidth=0.75,
                                    label=key)
            pl.title('Summed Data (n=%i)' % (num))
            pl.ylabel('Intensity')
            pl.tick_params(axis='x', labelbottom='off')
            loc += 1
        pl.tick_params(axis='x', labelbottom='on')
        pl.show()

    def output():
        """
        Writes the retrieved and calculated values to the excel workbook using the XLSX object
        """
        if newpeaks is True:  # looks for and deletes any sheets where the data will be changed
            if verbose is True:
                sys.stdout.write('Clearing duplicate XLSX sheets.')
            delete = []
            for key in newsp:  # generate strings to look for in excel file
                delete.append('Raw Data (' + sp[key]['affin'] + ')')
                for num in n:
                    delete.append(str(num) + ' Sum (' + sp[key]['affin'] + ')')
                    delete.append(
                        str(num) + ' Normalized (' + sp[key]['affin'] + ')')
            delete.append('Isotope Patterns')
            xlfile.removesheets(delete)  # remove those sheets
            if verbose is True:
                sys.stdout.write(' DONE.\n')

        if verbose is True:
            sys.stdout.write('Writing to "%s"' % xlfile.bookname)
            sys.stdout.flush()

        for mode in mskeys:  # write raw data to sheets
            modekey = 'raw' + mode
            if modekey in rtime.keys():
                sheetname = 'Raw Data (' + mode + ')'
                xlfile.writersim(sp, rtime[modekey], 'raw', sheetname, mode,
                                 tic[modekey])

        for num in n:  # write summed and normalized data to sheets
            sumkey = str(num) + 'sum'
            normkey = str(num) + 'norm'
            for mode in mskeys:
                modekey = 'raw' + mode
                if modekey in rtime.keys():
                    if max(n) > 1:  # if data were summed
                        sheetname = str(num) + ' Sum (' + mode + ')'
                        xlfile.writersim(
                            sp, rtime[sumkey + mode], sumkey, sheetname, mode,
                            tic[sumkey + mode])  # write summed data
                    sheetname = str(num) + ' Normalized (' + mode + ')'
                    xlfile.writersim(sp, rtime[sumkey + mode], normkey,
                                     sheetname, mode)  # write normalized data

        for key, val in sorted(sp.items()):  # write isotope patterns
            if sp[key]['affin'] in mskeys:
                xlfile.writemultispectrum(
                    sp[key]['spectrum'][0],  # x values
                    sp[key]['spectrum'][1],  # y values
                    key,  # name of the spectrum
                    xunit='m/z',  # x unit
                    yunit='Intensity (counts)',  # y unit
                    sheetname='Isotope Patterns',  # sheet name
                    chart=True,  # output excel chart
                )

        if rd is None:
            for key, val in sorted(chroms.items()):  # write chromatograms
                xlfile.writemultispectrum(chroms[key]['x'], chroms[key]['y'],
                                          chroms[key]['xunit'],
                                          chroms[key]['yunit'],
                                          'Function Chromatograms', key)

        uvstuff = False
        for key in sp:  # check for UV-Vis spectra
            if sp[key]['affin'] is 'UV':
                uvstuff = True
                break
        if uvstuff is True:
            for ind, val in enumerate(
                    tic['rawUV']):  # normalize the UV intensities
                tic['rawUV'][ind] = val / 1000000.
            xlfile.writersim(sp, rtime['rawUV'], 'raw', 'UV-Vis', 'UV',
                             tic['rawUV'])  # write UV-Vis data to sheet

        if sum_spectra is not None:  # write all summed spectra
            for fn in sum_spectra:
                specname = '%s %s' % (mzml.functions[fn]['mode'],
                                      mzml.functions[fn]['level'])
                if 'target' in mzml.functions[fn]:
                    specname += ' %.3f' % mzml.functions[fn]['target']
                specname += ' (%.3f-%.3f)' % (mzml.functions[fn]['window'][0],
                                              mzml.functions[fn]['window'][1])
                xlfile.writemultispectrum(
                    sum_spectra[fn][0],  # x values
                    sum_spectra[fn][1],  # y values
                    specname,  # name of the spectrum
                    xunit='m/z',  # x unit
                    yunit='Intensity (counts)',  # y unit
                    sheetname='Summed Spectra',  # sheet name
                    chart=True,  # output excel chart
                )

        if verbose is True:
            sys.stdout.write(' DONE\n')

    def prepformula(dct):
        """looks for formulas in a dictionary and prepares them for pullspeciesdata"""
        for species in dct:
            if 'affin' not in dct[species]:  # set affinity if not specified
                fn = dct[species]['function']
                if mzml.functions[fn]['type'] == 'MS':
                    dct[species]['affin'] = mzml.functions[fn]['mode']
                if mzml.functions[fn]['type'] == 'UV':
                    dct[species]['affin'] = 'UV'
            if 'formula' in dct[species] and dct[species][
                    'formula'] is not None:
                try:
                    dct[species][
                        'mol'].res = res  # sets resolution in Molecule object
                except NameError:
                    res = int(mzml.auto_resolution())
                    dct[species]['mol'].res = res
                # dct[species]['mol'].sigma = dct[species]['mol'].sigmafwhm()[1]  # recalculates sigma with new resolution
                dct[species]['bounds'] = dct[species][
                    'mol'].bounds  # caclulates bounds
        return dct

    # ----------------------------------------------------------
    # -------------------PROGRAM BEGINS-------------------------
    # ----------------------------------------------------------

    if verbose is True:
        stime = ScriptTime()
        stime.printstart()

    n = check_integer(
        n,
        'number of scans to sum')  # checks integer input and converts to list

    if type(xlsx) != dict:
        if verbose is True:
            sys.stdout.write('Loading processing parameters from excel file')
            sys.stdout.flush()
        xlfile = XLSX(xlsx, verbose=verbose)
        sp = xlfile.pullrsimparams()
    else:  # if parameters were provided in place of an excel file
        sp = xlsx

    mskeys = ['+', '-']
    for key in sp:
        if 'formula' in sp[key] and sp[key][
                'formula'] is not None:  # if formula is specified
            sp[key]['mol'] = IPMolecule(
                sp[key]['formula'])  # create Molecule object
            sp[key]['bounds'] = sp[key]['mol'].calculate_bounds(
                bounds_confidence
            )  # generate bounds from molecule object with this confidence interval
    if verbose is True:
        sys.stdout.write(' DONE\n')

    rtime = {}  # empty dictionaries for time and tic
    tic = {}
    rd = False
    for mode in mskeys:  # look for existing positive and negative mode raw data
        try:
            modedata, modetime, modetic = xlfile.pullrsim('Raw Data (' + mode +
                                                          ')')
        except KeyError:
            continue
        except UnboundLocalError:  # catch for if pyrsir was not handed an excel file
            continue
        if verbose is True:
            sys.stdout.write(
                'Existing (%s) mode raw data were found, grabbing those values.'
                % mode)
            sys.stdout.flush()
        rd = True  # bool that rd is present
        modekey = 'raw' + mode
        sp.update(modedata)  # update sp dictionary with raw data
        for key in modedata:  # check for affinities
            if 'affin' not in sp[key]:
                sp[key]['affin'] = mode
        rtime[modekey] = list(modetime)  # update time list
        tic[modekey] = list(modetic)  # update tic list
        if verbose is True:
            sys.stdout.write(' DONE\n')

    # sp = prepformula(sp)
    newpeaks = False
    if rd is True:
        newsp = {}
        sum_spectra = None
        for key in sp:  # checks whether there is a MS species that does not have raw data
            if 'raw' not in sp[key]:
                newsp[key] = sp[key]  # create references in the namespace
        if len(newsp) is not 0:
            newpeaks = True
            if verbose is True:
                sys.stdout.write(
                    'Some peaks are not in the raw data, extracting these from raw file.\n'
                )
            ips = xlfile.pullmultispectrum(
                'Isotope Patterns'
            )  # pull predefined isotope patterns and add them to species
            for species in ips:  # set spectrum list
                sp[species]['spectrum'] = [
                    ips[species]['x'], ips[species]['y']
                ]
            mzml = mzML(filename)  # load mzML class
            sp = prepformula(sp)  # prep formula etc for summing
            newsp = prepformula(newsp)  # prep formula species for summing
            for species in newsp:
                if 'spectrum' not in newsp[species]:
                    newsp[species]['spectrum'] = Spectrum(
                        3, newsp[species]['bounds'][0],
                        newsp[species]['bounds'][1])
            newsp = mzml.pull_species_data(newsp)  # pull data
        else:
            if verbose is True:
                sys.stdout.write(
                    'No new peaks were specified. Proceeding directly to summing and normalization.\n'
                )

    if rd is False:  # if no raw data is present, process mzML file
        mzml = mzML(filename, verbose=verbose)  # load mzML class
        sp = prepformula(sp)
        sp, sum_spectra = mzml.pull_species_data(
            sp, combine_spectra)  # pull relevant data from mzML
        chroms = mzml.pull_chromatograms()  # pull chromatograms from mzML
        rtime = {}
        tic = {}
        for key in sp:  # compare predicted isotope patterns to the real spectrum and save standard error of the regression
            func = sp[key]['function']
            if mzml.functions[func]['type'] == 'MS':  # determine mode key
                if combine_spectra is True:
                    sp[key]['spectrum'] = sum_spectra[
                        sp[key]['function']].trim(
                            xbounds=sp[key]
                            ['bounds'])  # extract the spectrum object
                mode = 'raw' + mzml.functions[func]['mode']
            if mzml.functions[func]['type'] == 'UV':
                mode = 'rawUV'
            if mode not in rtime:  # if rtime and tic have not been pulled from that function
                rtime[mode] = mzml.functions[func]['timepoints']
                tic[mode] = mzml.functions[func]['tic']
            # if 'formula' in sp[key] and sp[key]['formula'] is not None:
            #     sp[key]['match'] = sp[key]['mol'].compare(sp[key]['spectrum'])
        if combine_spectra is True:
            for fn in sum_spectra:
                sum_spectra[fn] = sum_spectra[fn].trim(
                )  # convert Spectrum objects into x,y lists

    # if max(n) > 1: # run combine functions if n > 1
    for num in n:  # for each n to sum
        if verbose is True:
            sys.stdout.write('\r%d Summing species traces.' % num)
        sumkey = str(num) + 'sum'
        for key in sp:  # bin each species
            if sp[key]['affin'] in mskeys or mzml.functions[sp[key][
                    'function']]['type'] == 'MS':  # if species is MS related
                sp[key][sumkey] = bindata(num, sp[key]['raw'])
        for mode in mskeys:
            sumkey = str(num) + 'sum' + mode
            modekey = 'raw' + mode
            if modekey in rtime.keys():  # if there is data for that mode
                rtime[sumkey] = bindata(num, rtime[modekey], num)
                tic[sumkey] = bindata(num, tic[modekey])
    if verbose is True:
        sys.stdout.write(' DONE\n')
        sys.stdout.flush()
    # else:
    #    for key in sp: # create key for normalization
    #        sp[key]['1sum'] = sp[key]['raw']

    for num in n:  # normalize each peak's chromatogram
        if verbose is True:
            sys.stdout.write('\r%d Normalizing species traces.' % num)
            sys.stdout.flush()
        sumkey = str(num) + 'sum'
        normkey = str(num) + 'norm'
        for mode in mskeys:
            modekey = 'raw' + mode
            if modekey in rtime.keys():  # if there is data for that mode
                for key in sp:  # for each species
                    if sp[key]['affin'] in mskeys or mzml.functions[
                            sp[key]['function']][
                                'type'] == 'MS':  # if species has affinity
                        sp[key][normkey] = []
                        for ind, val in enumerate(sp[key][sumkey]):
                            # sp[key][normkey].append(val/(mzml.function[func]['tic'][ind]+0.01)) #+0.01 to avoid div/0 errors
                            sp[key][normkey].append(
                                val / (tic[sumkey + sp[key]['affin']][ind] +
                                       0.01))  # +0.01 to avoid div/0 errors
    if verbose is True:
        sys.stdout.write(' DONE\n')

    if return_data is True:  # if data is to be used by another function, return the calculated data
        return mzml, sp, rtime, tic, chroms

    # import pickle #pickle objects (for troubleshooting)
    # pickle.dump(rtime,open("rtime.p","wb"))
    # pickle.dump(tic,open("tic.p","wb"))
    # pickle.dump(chroms,open("chroms.p","wb"))
    # pickle.dump(sp,open("sp.p","wb"))

    output()  # write data to excel file

    if verbose is True:
        sys.stdout.write('\rUpdating paramters')
        sys.stdout.flush()
    xlfile.updatersimparams(sp)  # update summing parameters
    if verbose is True:
        sys.stdout.write(' DONE\n')

    if verbose is True:
        sys.stdout.write('\rSaving "%s" (this may take some time)' %
                         xlfile.bookname)
        sys.stdout.flush()
    xlfile.save()
    if verbose is True:
        sys.stdout.write(' DONE\n')

    if verbose is True:
        if verbose is True:
            sys.stdout.write('Plotting traces')
        if plot is True:
            plots()  # plots for quick review
        if verbose is True:
            sys.stdout.write(' DONE\n')
    if verbose is True:
        stime.printelapsed()
def pyrsir(
        filename,
        xlsx,
        n,
        plot=True,  # plot the data for a quick look
        verbose=True,  # chatty
        bounds_confidence=0.99,  #
        combine_spectra=True,  # whether or not to output a summed spectrum
        return_data=False,  #
):
    """
    A method for generating reconstructed single ion monitoring traces.

    :param filename: path to mzML or raw file to process
    :param xlsx: path to excel file with correctly formatted columns
    :param n: number of scans to sum together (for binning algorithm)
    :param plot: whether to plot and show the data for a quick look
    :param verbose: chatty mode
    :param bounds_confidence: confidence interval for automatically generated bounds (only applicable if molecular
        formulas are provided).
    :param combine_spectra: whether to output a summed spectrum
    :param return_data: whether to return data (if the data from the function is required by another function)
    :return:
    """
    def check_integer(val, name):
        """
        This function checks that the supplied values are integers greater than 1
        
        A integer value that is non-negative is required for the summing function.
        Please check your input value. 
        """
        if type(val) != list and type(val) != tuple:  # if only one value given for n
            val = [val]
        for num in val:
            if type(num) != int:
                sys.exit('\nThe %s value (%s) is not an integer.\n%s' % (name, str(num), check_integer.__doc__))
            if num < 1:
                sys.exit('\nThe %s value (%s) is less than 1.\n%s' % (name, str(num), check_integer.__doc__))
        return val

    def plots():
        """
        Function for generating a set of plots for rapid visual assessment of the supplied n-level
        Outputs all MS species with the same sum level onto the same plot
        requirements: pylab as pl
        """
        pl.clf()  # clears and closes old figure (if still open)
        pl.close()
        nplots = len(n) + 1

        # raw data
        pl.subplot(nplots, 1, 1)  # top plot

        for mode in mskeys:
            modekey = 'raw' + mode
            if modekey in rtime.keys():
                pl.plot(rtime[modekey], tic[modekey], linewidth=0.75, label='TIC')  # plot tic
                for key in sp:  # plot each species
                    if sp[key]['affin'] is mode:
                        pl.plot(rtime[modekey], sp[key]['raw'], linewidth=0.75, label=key)
        pl.title('Raw Data')
        pl.ylabel('Intensity')
        pl.tick_params(axis='x', labelbottom='off')

        # summed data
        loc = 2
        for num in n:
            pl.subplot(nplots, 1, loc)
            sumkey = str(num) + 'sum'
            for mode in mskeys:
                modekey = str(num) + 'sum' + mode
                if modekey in rtime.keys():
                    pl.plot(rtime[modekey], tic[modekey], linewidth=0.75, label='TIC')  # plot tic
                    for key in sp:
                        if sp[key]['affin'] is mode:  # if a MS species
                            pl.plot(rtime[modekey], sp[key][sumkey], linewidth=0.75, label=key)
            pl.title('Summed Data (n=%i)' % (num))
            pl.ylabel('Intensity')
            pl.tick_params(axis='x', labelbottom='off')
            loc += 1
        pl.tick_params(axis='x', labelbottom='on')
        pl.show()

    def output():
        """
        Writes the retrieved and calculated values to the excel workbook using the XLSX object
        """
        if newpeaks is True:  # looks for and deletes any sheets where the data will be changed
            if verbose is True:
                sys.stdout.write('Clearing duplicate XLSX sheets.')
            delete = []
            for key in newsp:  # generate strings to look for in excel file
                delete.append('Raw Data (' + sp[key]['affin'] + ')')
                for num in n:
                    delete.append(str(num) + ' Sum (' + sp[key]['affin'] + ')')
                    delete.append(str(num) + ' Normalized (' + sp[key]['affin'] + ')')
            delete.append('Isotope Patterns')
            xlfile.removesheets(delete)  # remove those sheets
            if verbose is True:
                sys.stdout.write(' DONE.\n')

        if verbose is True:
            sys.stdout.write('Writing to "%s"' % xlfile.bookname)
            sys.stdout.flush()

        for mode in mskeys:  # write raw data to sheets
            modekey = 'raw' + mode
            if modekey in rtime.keys():
                sheetname = 'Raw Data (' + mode + ')'
                xlfile.writersim(sp, rtime[modekey], 'raw', sheetname, mode, tic[modekey])

        for num in n:  # write summed and normalized data to sheets
            sumkey = str(num) + 'sum'
            normkey = str(num) + 'norm'
            for mode in mskeys:
                modekey = 'raw' + mode
                if modekey in rtime.keys():
                    if max(n) > 1:  # if data were summed
                        sheetname = str(num) + ' Sum (' + mode + ')'
                        xlfile.writersim(sp, rtime[sumkey + mode], sumkey, sheetname, mode,
                                         tic[sumkey + mode])  # write summed data
                    sheetname = str(num) + ' Normalized (' + mode + ')'
                    xlfile.writersim(sp, rtime[sumkey + mode], normkey, sheetname, mode)  # write normalized data

        for key, val in sorted(sp.items()):  # write isotope patterns
            if sp[key]['affin'] in mskeys:
                xlfile.writemultispectrum(
                    sp[key]['spectrum'][0],  # x values
                    sp[key]['spectrum'][1],  # y values
                    key,  # name of the spectrum
                    xunit='m/z',  # x unit
                    yunit='Intensity (counts)',  # y unit
                    sheetname='Isotope Patterns',  # sheet name
                    chart=True,  # output excel chart
                )

        if rd is None:
            for key, val in sorted(chroms.items()):  # write chromatograms
                xlfile.writemultispectrum(chroms[key]['x'], chroms[key]['y'], chroms[key]['xunit'],
                                          chroms[key]['yunit'], 'Function Chromatograms', key)

        uvstuff = False
        for key in sp:  # check for UV-Vis spectra
            if sp[key]['affin'] is 'UV':
                uvstuff = True
                break
        if uvstuff is True:
            for ind, val in enumerate(tic['rawUV']):  # normalize the UV intensities
                tic['rawUV'][ind] = val / 1000000.
            xlfile.writersim(sp, rtime['rawUV'], 'raw', 'UV-Vis', 'UV', tic['rawUV'])  # write UV-Vis data to sheet

        if sum_spectra is not None:  # write all summed spectra
            for fn in sum_spectra:
                specname = '%s %s' % (mzml.functions[fn]['mode'], mzml.functions[fn]['level'])
                if 'target' in mzml.functions[fn]:
                    specname += ' %.3f' % mzml.functions[fn]['target']
                specname += ' (%.3f-%.3f)' % (mzml.functions[fn]['window'][0], mzml.functions[fn]['window'][1])
                xlfile.writemultispectrum(
                    sum_spectra[fn][0],  # x values
                    sum_spectra[fn][1],  # y values
                    specname,  # name of the spectrum
                    xunit='m/z',  # x unit
                    yunit='Intensity (counts)',  # y unit
                    sheetname='Summed Spectra',  # sheet name
                    chart=True,  # output excel chart
                )

        if verbose is True:
            sys.stdout.write(' DONE\n')

    def prepformula(dct):
        """looks for formulas in a dictionary and prepares them for pullspeciesdata"""
        for species in dct:
            if 'affin' not in dct[species]:  # set affinity if not specified
                fn = dct[species]['function']
                if mzml.functions[fn]['type'] == 'MS':
                    dct[species]['affin'] = mzml.functions[fn]['mode']
                if mzml.functions[fn]['type'] == 'UV':
                    dct[species]['affin'] = 'UV'
            if 'formula' in dct[species]and dct[species]['formula'] is not None:
                try:
                    dct[species]['mol'].res = res  # sets resolution in Molecule object
                except NameError:
                    res = int(mzml.auto_resolution())
                    dct[species]['mol'].res = res
                # dct[species]['mol'].sigma = dct[species]['mol'].sigmafwhm()[1]  # recalculates sigma with new resolution
                dct[species]['bounds'] = dct[species]['mol'].bounds  # caclulates bounds
        return dct

    # ----------------------------------------------------------
    # -------------------PROGRAM BEGINS-------------------------
    # ----------------------------------------------------------

    if verbose is True:
        stime = ScriptTime()
        stime.printstart()

    n = check_integer(n, 'number of scans to sum')  # checks integer input and converts to list

    if type(xlsx) != dict:
        if verbose is True:
            sys.stdout.write('Loading processing parameters from excel file')
            sys.stdout.flush()
        xlfile = XLSX(xlsx, verbose=verbose)
        sp = xlfile.pullrsimparams()
    else:  # if parameters were provided in place of an excel file
        sp = xlsx

    mskeys = ['+', '-']
    for key in sp:
        if 'formula' in sp[key] and sp[key]['formula'] is not None:  # if formula is specified
            sp[key]['mol'] = IPMolecule(sp[key]['formula'])  # create Molecule object
            sp[key]['bounds'] = sp[key]['mol'].calculate_bounds(
                bounds_confidence
            )  # generate bounds from molecule object with this confidence interval
    if verbose is True:
        sys.stdout.write(' DONE\n')

    rtime = {}  # empty dictionaries for time and tic
    tic = {}
    rd = False
    for mode in mskeys:  # look for existing positive and negative mode raw data
        try:
            modedata, modetime, modetic = xlfile.pullrsim('Raw Data (' + mode + ')')
        except KeyError:
            continue
        except UnboundLocalError:  # catch for if pyrsir was not handed an excel file
            continue
        if verbose is True:
            sys.stdout.write('Existing (%s) mode raw data were found, grabbing those values.' % mode)
            sys.stdout.flush()
        rd = True  # bool that rd is present
        modekey = 'raw' + mode
        sp.update(modedata)  # update sp dictionary with raw data
        for key in modedata:  # check for affinities
            if 'affin' not in sp[key]:
                sp[key]['affin'] = mode
        rtime[modekey] = list(modetime)  # update time list
        tic[modekey] = list(modetic)  # update tic list
        if verbose is True:
            sys.stdout.write(' DONE\n')

    # sp = prepformula(sp)
    newpeaks = False
    if rd is True:
        newsp = {}
        sum_spectra = None
        for key in sp:  # checks whether there is a MS species that does not have raw data
            if 'raw' not in sp[key]:
                newsp[key] = sp[key]  # create references in the namespace
        if len(newsp) is not 0:
            newpeaks = True
            if verbose is True:
                sys.stdout.write('Some peaks are not in the raw data, extracting these from raw file.\n')
            ips = xlfile.pullmultispectrum(
                'Isotope Patterns')  # pull predefined isotope patterns and add them to species
            for species in ips:  # set spectrum list
                sp[species]['spectrum'] = [ips[species]['x'], ips[species]['y']]
            mzml = mzML(filename)  # load mzML class
            sp = prepformula(sp)  # prep formula etc for summing
            newsp = prepformula(newsp)  # prep formula species for summing
            for species in newsp:
                if 'spectrum' not in newsp[species]:
                    newsp[species]['spectrum'] = Spectrum(3, newsp[species]['bounds'][0], newsp[species]['bounds'][1])
            newsp = mzml.pull_species_data(newsp)  # pull data
        else:
            if verbose is True:
                sys.stdout.write('No new peaks were specified. Proceeding directly to summing and normalization.\n')

    if rd is False:  # if no raw data is present, process mzML file
        mzml = mzML(filename, verbose=verbose)  # load mzML class
        sp = prepformula(sp)
        sp, sum_spectra = mzml.pull_species_data(sp, combine_spectra)  # pull relevant data from mzML
        chroms = mzml.pull_chromatograms()  # pull chromatograms from mzML
        rtime = {}
        tic = {}
        for key in sp:  # compare predicted isotope patterns to the real spectrum and save standard error of the regression
            func = sp[key]['function']
            if mzml.functions[func]['type'] == 'MS':  # determine mode key
                if combine_spectra is True:
                    sp[key]['spectrum'] = sum_spectra[sp[key]['function']].trim(
                        xbounds=sp[key]['bounds'])  # extract the spectrum object
                mode = 'raw' + mzml.functions[func]['mode']
            if mzml.functions[func]['type'] == 'UV':
                mode = 'rawUV'
            if mode not in rtime:  # if rtime and tic have not been pulled from that function
                rtime[mode] = mzml.functions[func]['timepoints']
                tic[mode] = mzml.functions[func]['tic']
            # if 'formula' in sp[key] and sp[key]['formula'] is not None:
            #     sp[key]['match'] = sp[key]['mol'].compare(sp[key]['spectrum'])
        if combine_spectra is True:
            for fn in sum_spectra:
                sum_spectra[fn] = sum_spectra[fn].trim()  # convert Spectrum objects into x,y lists

    # if max(n) > 1: # run combine functions if n > 1
    for num in n:  # for each n to sum
        if verbose is True:
            sys.stdout.write('\r%d Summing species traces.' % num)
        sumkey = str(num) + 'sum'
        for key in sp:  # bin each species
            if sp[key]['affin'] in mskeys or mzml.functions[sp[key]['function']][
                'type'] == 'MS':  # if species is MS related
                sp[key][sumkey] = bindata(num, sp[key]['raw'])
        for mode in mskeys:
            sumkey = str(num) + 'sum' + mode
            modekey = 'raw' + mode
            if modekey in rtime.keys():  # if there is data for that mode
                rtime[sumkey] = bindata(num, rtime[modekey], num)
                tic[sumkey] = bindata(num, tic[modekey])
    if verbose is True:
        sys.stdout.write(' DONE\n')
        sys.stdout.flush()
    # else:
    #    for key in sp: # create key for normalization
    #        sp[key]['1sum'] = sp[key]['raw']

    for num in n:  # normalize each peak's chromatogram
        if verbose is True:
            sys.stdout.write('\r%d Normalizing species traces.' % num)
            sys.stdout.flush()
        sumkey = str(num) + 'sum'
        normkey = str(num) + 'norm'
        for mode in mskeys:
            modekey = 'raw' + mode
            if modekey in rtime.keys():  # if there is data for that mode
                for key in sp:  # for each species
                    if sp[key]['affin'] in mskeys or mzml.functions[sp[key]['function']][
                        'type'] == 'MS':  # if species has affinity
                        sp[key][normkey] = []
                        for ind, val in enumerate(sp[key][sumkey]):
                            # sp[key][normkey].append(val/(mzml.function[func]['tic'][ind]+0.01)) #+0.01 to avoid div/0 errors
                            sp[key][normkey].append(
                                val / (tic[sumkey + sp[key]['affin']][ind] + 0.01))  # +0.01 to avoid div/0 errors
    if verbose is True:
        sys.stdout.write(' DONE\n')

    if return_data is True:  # if data is to be used by another function, return the calculated data
        return mzml, sp, rtime, tic, chroms

    # import pickle #pickle objects (for troubleshooting)
    # pickle.dump(rtime,open("rtime.p","wb"))
    # pickle.dump(tic,open("tic.p","wb"))
    # pickle.dump(chroms,open("chroms.p","wb"))
    # pickle.dump(sp,open("sp.p","wb"))

    output()  # write data to excel file

    if verbose is True:
        sys.stdout.write('\rUpdating paramters')
        sys.stdout.flush()
    xlfile.updatersimparams(sp)  # update summing parameters
    if verbose is True:
        sys.stdout.write(' DONE\n')

    if verbose is True:
        sys.stdout.write('\rSaving "%s" (this may take some time)' % xlfile.bookname)
        sys.stdout.flush()
    xlfile.save()
    if verbose is True:
        sys.stdout.write(' DONE\n')

    if verbose is True:
        if verbose is True:
            sys.stdout.write('Plotting traces')
        if plot is True:
            plots()  # plots for quick review
        if verbose is True:
            sys.stdout.write(' DONE\n')
    if verbose is True:
        stime.printelapsed()
Beispiel #12
0
    plt.savefig('../{OUTFILE}.png'.format(OUTFILE=outputFile + str(minFilter)), bbox_inches='tight')


##################################

###############################################################
# MAIN
###############################################################
# _mzML processing variables
filename = 'HZ-140516_HOTKEYMSMS 1376 II.raw'  # raw or mzml file name
fillzeros = True  # fills spectrum with zeros
decpl = 1  # number of decimal places to track
mzrange = None  # mzrange to track
sr = 'all'  # scan range to track
mzml = mzML(filename, verbose=True)

# EDESI Plot Production variable
minFilter = 20  # minFilter intensity value
threshold = 1156  # threshold of peak height for Breakdown tracing
plotBreakdown = True  # Construct Plot with Breakdown?
plotZoom = True  # Construct Plot with Zoom in region of interest? (Autozoom)

msmsfns = []
for func in mzml.functions:  # identify MSMS functions in the provided file
    if mzml.functions[func]['type'] == 'MS' and mzml.functions[func]['level'] > 1:
        msmsfns.append(func)
if len(msmsfns) > 1:  # if there is more than one msms function, ask the user which one to process
    sys.stdout.write(
        'More than one MS/MS function is contained in this mzML file. Please indicate which one you wish to process:\nFunction\ttarget\n')
    for func in msmsfns:
    }
    try:
        sys.stdout.write('Using figure preset "%s"\n' % (setting))
        sys.stdout.flush()
        return ipsetting[typ]
    except KeyError:
        raise KeyError('\nThe specified figure setting "%s" is not defined.\nPlease check your spelling' % setting)


if __name__ == '__main__':
    os.chdir(curdir)  # change to current working directory

    keywords = presets(setting)  # pull preset kwargs

    if spectrum.lower().endswith('.mzml.gz') or spectrum.lower().endswith('.raw'):  # if supplied with a mass spec file
        mzml = mzML(spectrum, verbose=False)
        exp = mzml.sum_scans()
        keywords.update({'outname': mzml.filename.split('.')[0]})  # set default output filename

    else:  # otherwise assume that it is an excel file
        xlfile = XLSX(spectrum, verbose=True)  # load excel file
        if sheetname is None:  # otherwise use the first sheet
            sheetname = xlfile.wb.sheetnames[0]
        exp = xlfile.pullspectrum(sheetname, skiplines=skiplines)[0]  # load spectrum from first sheet in workbook
        keywords.update({  # set default output filename
            'outname': f'{xlfile.bookname[:-5]} ({sheetname})',
        })

    keywords.update(override)  # apply any user overrides
    plot_mass_spectrum(exp, simdict, **keywords)
    import gc