Beispiel #1
0
 def test_indexing(self):
     """tests calculated indexing for filled Spectrum objects"""
     spec = Spectrum(3, empty=False)
     for i in range(1000):
         num = random()
         mz = num * spec.end
         try:
             index = spec.index(mz)
         except ValueError:
             continue
         self.assertEqual(round(mz, 3), round(spec.x[index], 3))
 def test_indexing(self):
     """tests calculated indexing for filled Spectrum objects"""
     spec = Spectrum(3, empty=False)
     for i in range(1000):
         num = random()
         mz = num * spec.end
         try:
             index = spec.index(mz)
         except ValueError:
             continue
         self.assertEqual(
             round(mz, 3),
             round(spec.x[index], 3)
         )
    def test_element(self):
        mol = Spectrum(
            3,
            start=0.,
            end=100.,
            filler=0.,
        )
        mol.add_spectrum(  # start with a Cl
            *element_intensity_list('Cl')
        )
        mol.add_element(  # add another Cl
            *element_intensity_list('Cl')
        )
        self.assertEqual(
            mol.trim(),
            [[69.938, 71.935, 73.932], [0.5739577600000001, 0.36728448, 0.05875776]]
        )
        mol.add_element(
            *element_intensity_list('Pd')
        )
        self.assertEqual(
            mol.trim(),
            [[171.843, 173.84, 173.841, 174.842, 175.837, 175.838, 175.841, 176.839, 177.835, 177.838, 177.841, 178.836,
              179.835, 179.838, 179.843, 181.835, 181.84, 183.837],
             [0.005854369152000001, 0.0037463016960000003, 0.06393889446400002, 0.128164767808, 0.000599329152,
              0.040915491072, 0.15686265580800002, 0.082014624384, 0.006545614464, 0.100378848384, 0.15186922329600003,
              0.013120607808, 0.016058495808, 0.097183473408, 0.06726784947200001, 0.015547303296, 0.043045741056,
              0.0068864094719999994]]

        )
        mol.charge = 2
        self.assertEqual(
            mol.trim()[0],
            [85.922, 86.92, 86.921, 87.421, 87.919, 87.919, 87.921, 88.42, 88.918, 88.919, 88.921, 89.418, 89.918,
             89.919, 89.922, 90.918, 90.92, 91.919]

        )
        del mol.charge
        self.assertEqual(
            mol.trim()[0],
            [171.843, 173.84, 173.841, 174.842, 175.837, 175.838, 175.841, 176.839, 177.835, 177.838, 177.841, 178.836,
             179.835, 179.838, 179.843, 181.835, 181.84, 183.837]
        )
    def test_spectrum(self):
        spec = Spectrum(3)
        spec.add_value(479.1, 1000)
        self.assertEqual(
            spec.trim(),
            [[479.1], [1000]]
        )

        spec2 = Spectrum(3)
        spec2.add_value(443.1, 1000)
        self.assertEqual(
            spec2.trim(),
            [[443.1], [1000]]
        )
        spec += spec2
        self.assertEqual(
            spec.trim(),
            [[443.1, 479.1], [1000, 1000]]
        )
        spec3 = Spectrum(3, start=50, end=2500)
        spec3.add_value(2150.9544, 1000)
        self.assertEqual(
            spec3.trim(),
            [[2150.954], [1000]]
        )
        spec += spec3

        self.assertEqual(
            spec.trim(True),
            [[50.0, 443.1, 479.1, 2150.954, 2500], [0.0, 1000, 1000, 1000, 0.0]]
        )
        spec.end = 2100.
        self.assertEqual(
            spec.trim(),
            [[443.1, 479.1], [1000, 1000]]
        )
Beispiel #5
0
 def test_element(self):
     mol = Spectrum(
         3,
         start=0.,
         end=100.,
         filler=0.,
     )
     mol.add_spectrum(  # start with a Cl
         *element_intensity_list('Cl'))
     mol.add_element(  # add another Cl
         *element_intensity_list('Cl'))
     self.assertEqual(mol.trim(),
                      [[69.938, 71.935, 73.932],
                       [0.5739577600000001, 0.36728448, 0.05875776]])
     mol.add_element(*element_intensity_list('Pd'))
     self.assertEqual(
         mol.trim(),
         [[
             171.843, 173.84, 173.841, 174.842, 175.837, 175.838, 175.841,
             176.839, 177.835, 177.838, 177.841, 178.836, 179.835, 179.838,
             179.843, 181.835, 181.84, 183.837
         ],
          [
              0.005854369152000001, 0.0037463016960000003,
              0.06393889446400002, 0.128164767808, 0.000599329152,
              0.040915491072, 0.15686265580800002, 0.082014624384,
              0.006545614464, 0.100378848384, 0.15186922329600003,
              0.013120607808, 0.016058495808, 0.097183473408,
              0.06726784947200001, 0.015547303296, 0.043045741056,
              0.0068864094719999994
          ]])
     mol.charge = 2
     self.assertEqual(mol.trim()[0], [
         85.922, 86.92, 86.921, 87.421, 87.919, 87.919, 87.921, 88.42,
         88.918, 88.919, 88.921, 89.418, 89.918, 89.919, 89.922, 90.918,
         90.92, 91.919
     ])
     del mol.charge
     self.assertEqual(mol.trim()[0], [
         171.843, 173.84, 173.841, 174.842, 175.837, 175.838, 175.841,
         176.839, 177.835, 177.838, 177.841, 178.836, 179.835, 179.838,
         179.843, 181.835, 181.84, 183.837
     ])
Beispiel #6
0
    def test_spectrum(self):
        spec = Spectrum(3)
        spec.add_value(479.1, 1000)
        self.assertEqual(spec.trim(), [[479.1], [1000]])

        spec2 = Spectrum(3)
        spec2.add_value(443.1, 1000)
        self.assertEqual(spec2.trim(), [[443.1], [1000]])
        spec += spec2
        self.assertEqual(spec.trim(), [[443.1, 479.1], [1000, 1000]])
        spec3 = Spectrum(3, start=50, end=2500)
        spec3.add_value(2150.9544, 1000)
        self.assertEqual(spec3.trim(), [[2150.954], [1000]])
        spec += spec3

        self.assertEqual(spec.trim(True),
                         [[50.0, 443.1, 479.1, 2150.954, 2500],
                          [0.0, 1000, 1000, 1000, 0.0]])
        spec.end = 2100.
        self.assertEqual(spec.trim(), [[443.1, 479.1], [1000, 1000]])
Beispiel #7
0
def pyrsir(
        filename,
        xlsx,
        n,
        plot=True,  # plot the data for a quick look
        verbose=True,  # chatty
        bounds_confidence=0.99,  #
        combine_spectra=True,  # whether or not to output a summed spectrum
        return_data=False,  #
):
    """
    A method for generating reconstructed single ion monitoring traces.

    :param filename: path to mzML or raw file to process
    :param xlsx: path to excel file with correctly formatted columns
    :param n: number of scans to sum together (for binning algorithm)
    :param plot: whether to plot and show the data for a quick look
    :param verbose: chatty mode
    :param bounds_confidence: confidence interval for automatically generated bounds (only applicable if molecular
        formulas are provided).
    :param combine_spectra: whether to output a summed spectrum
    :param return_data: whether to return data (if the data from the function is required by another function)
    :return:
    """
    def check_integer(val, name):
        """
        This function checks that the supplied values are integers greater than 1
        
        A integer value that is non-negative is required for the summing function.
        Please check your input value. 
        """
        if type(val) != list and type(
                val) != tuple:  # if only one value given for n
            val = [val]
        for num in val:
            if type(num) != int:
                sys.exit('\nThe %s value (%s) is not an integer.\n%s' %
                         (name, str(num), check_integer.__doc__))
            if num < 1:
                sys.exit('\nThe %s value (%s) is less than 1.\n%s' %
                         (name, str(num), check_integer.__doc__))
        return val

    def plots():
        """
        Function for generating a set of plots for rapid visual assessment of the supplied n-level
        Outputs all MS species with the same sum level onto the same plot
        requirements: pylab as pl
        """
        pl.clf()  # clears and closes old figure (if still open)
        pl.close()
        nplots = len(n) + 1

        # raw data
        pl.subplot(nplots, 1, 1)  # top plot

        for mode in mskeys:
            modekey = 'raw' + mode
            if modekey in rtime.keys():
                pl.plot(rtime[modekey],
                        tic[modekey],
                        linewidth=0.75,
                        label='TIC')  # plot tic
                for key in sp:  # plot each species
                    if sp[key]['affin'] is mode:
                        pl.plot(rtime[modekey],
                                sp[key]['raw'],
                                linewidth=0.75,
                                label=key)
        pl.title('Raw Data')
        pl.ylabel('Intensity')
        pl.tick_params(axis='x', labelbottom='off')

        # summed data
        loc = 2
        for num in n:
            pl.subplot(nplots, 1, loc)
            sumkey = str(num) + 'sum'
            for mode in mskeys:
                modekey = str(num) + 'sum' + mode
                if modekey in rtime.keys():
                    pl.plot(rtime[modekey],
                            tic[modekey],
                            linewidth=0.75,
                            label='TIC')  # plot tic
                    for key in sp:
                        if sp[key]['affin'] is mode:  # if a MS species
                            pl.plot(rtime[modekey],
                                    sp[key][sumkey],
                                    linewidth=0.75,
                                    label=key)
            pl.title('Summed Data (n=%i)' % (num))
            pl.ylabel('Intensity')
            pl.tick_params(axis='x', labelbottom='off')
            loc += 1
        pl.tick_params(axis='x', labelbottom='on')
        pl.show()

    def output():
        """
        Writes the retrieved and calculated values to the excel workbook using the XLSX object
        """
        if newpeaks is True:  # looks for and deletes any sheets where the data will be changed
            if verbose is True:
                sys.stdout.write('Clearing duplicate XLSX sheets.')
            delete = []
            for key in newsp:  # generate strings to look for in excel file
                delete.append('Raw Data (' + sp[key]['affin'] + ')')
                for num in n:
                    delete.append(str(num) + ' Sum (' + sp[key]['affin'] + ')')
                    delete.append(
                        str(num) + ' Normalized (' + sp[key]['affin'] + ')')
            delete.append('Isotope Patterns')
            xlfile.removesheets(delete)  # remove those sheets
            if verbose is True:
                sys.stdout.write(' DONE.\n')

        if verbose is True:
            sys.stdout.write('Writing to "%s"' % xlfile.bookname)
            sys.stdout.flush()

        for mode in mskeys:  # write raw data to sheets
            modekey = 'raw' + mode
            if modekey in rtime.keys():
                sheetname = 'Raw Data (' + mode + ')'
                xlfile.writersim(sp, rtime[modekey], 'raw', sheetname, mode,
                                 tic[modekey])

        for num in n:  # write summed and normalized data to sheets
            sumkey = str(num) + 'sum'
            normkey = str(num) + 'norm'
            for mode in mskeys:
                modekey = 'raw' + mode
                if modekey in rtime.keys():
                    if max(n) > 1:  # if data were summed
                        sheetname = str(num) + ' Sum (' + mode + ')'
                        xlfile.writersim(
                            sp, rtime[sumkey + mode], sumkey, sheetname, mode,
                            tic[sumkey + mode])  # write summed data
                    sheetname = str(num) + ' Normalized (' + mode + ')'
                    xlfile.writersim(sp, rtime[sumkey + mode], normkey,
                                     sheetname, mode)  # write normalized data

        for key, val in sorted(sp.items()):  # write isotope patterns
            if sp[key]['affin'] in mskeys:
                xlfile.writemultispectrum(
                    sp[key]['spectrum'][0],  # x values
                    sp[key]['spectrum'][1],  # y values
                    key,  # name of the spectrum
                    xunit='m/z',  # x unit
                    yunit='Intensity (counts)',  # y unit
                    sheetname='Isotope Patterns',  # sheet name
                    chart=True,  # output excel chart
                )

        if rd is None:
            for key, val in sorted(chroms.items()):  # write chromatograms
                xlfile.writemultispectrum(chroms[key]['x'], chroms[key]['y'],
                                          chroms[key]['xunit'],
                                          chroms[key]['yunit'],
                                          'Function Chromatograms', key)

        uvstuff = False
        for key in sp:  # check for UV-Vis spectra
            if sp[key]['affin'] is 'UV':
                uvstuff = True
                break
        if uvstuff is True:
            for ind, val in enumerate(
                    tic['rawUV']):  # normalize the UV intensities
                tic['rawUV'][ind] = val / 1000000.
            xlfile.writersim(sp, rtime['rawUV'], 'raw', 'UV-Vis', 'UV',
                             tic['rawUV'])  # write UV-Vis data to sheet

        if sum_spectra is not None:  # write all summed spectra
            for fn in sum_spectra:
                specname = '%s %s' % (mzml.functions[fn]['mode'],
                                      mzml.functions[fn]['level'])
                if 'target' in mzml.functions[fn]:
                    specname += ' %.3f' % mzml.functions[fn]['target']
                specname += ' (%.3f-%.3f)' % (mzml.functions[fn]['window'][0],
                                              mzml.functions[fn]['window'][1])
                xlfile.writemultispectrum(
                    sum_spectra[fn][0],  # x values
                    sum_spectra[fn][1],  # y values
                    specname,  # name of the spectrum
                    xunit='m/z',  # x unit
                    yunit='Intensity (counts)',  # y unit
                    sheetname='Summed Spectra',  # sheet name
                    chart=True,  # output excel chart
                )

        if verbose is True:
            sys.stdout.write(' DONE\n')

    def prepformula(dct):
        """looks for formulas in a dictionary and prepares them for pullspeciesdata"""
        for species in dct:
            if 'affin' not in dct[species]:  # set affinity if not specified
                fn = dct[species]['function']
                if mzml.functions[fn]['type'] == 'MS':
                    dct[species]['affin'] = mzml.functions[fn]['mode']
                if mzml.functions[fn]['type'] == 'UV':
                    dct[species]['affin'] = 'UV'
            if 'formula' in dct[species] and dct[species][
                    'formula'] is not None:
                try:
                    dct[species][
                        'mol'].res = res  # sets resolution in Molecule object
                except NameError:
                    res = int(mzml.auto_resolution())
                    dct[species]['mol'].res = res
                # dct[species]['mol'].sigma = dct[species]['mol'].sigmafwhm()[1]  # recalculates sigma with new resolution
                dct[species]['bounds'] = dct[species][
                    'mol'].bounds  # caclulates bounds
        return dct

    # ----------------------------------------------------------
    # -------------------PROGRAM BEGINS-------------------------
    # ----------------------------------------------------------

    if verbose is True:
        stime = ScriptTime()
        stime.printstart()

    n = check_integer(
        n,
        'number of scans to sum')  # checks integer input and converts to list

    if type(xlsx) != dict:
        if verbose is True:
            sys.stdout.write('Loading processing parameters from excel file')
            sys.stdout.flush()
        xlfile = XLSX(xlsx, verbose=verbose)
        sp = xlfile.pullrsimparams()
    else:  # if parameters were provided in place of an excel file
        sp = xlsx

    mskeys = ['+', '-']
    for key in sp:
        if 'formula' in sp[key] and sp[key][
                'formula'] is not None:  # if formula is specified
            sp[key]['mol'] = IPMolecule(
                sp[key]['formula'])  # create Molecule object
            sp[key]['bounds'] = sp[key]['mol'].calculate_bounds(
                bounds_confidence
            )  # generate bounds from molecule object with this confidence interval
    if verbose is True:
        sys.stdout.write(' DONE\n')

    rtime = {}  # empty dictionaries for time and tic
    tic = {}
    rd = False
    for mode in mskeys:  # look for existing positive and negative mode raw data
        try:
            modedata, modetime, modetic = xlfile.pullrsim('Raw Data (' + mode +
                                                          ')')
        except KeyError:
            continue
        except UnboundLocalError:  # catch for if pyrsir was not handed an excel file
            continue
        if verbose is True:
            sys.stdout.write(
                'Existing (%s) mode raw data were found, grabbing those values.'
                % mode)
            sys.stdout.flush()
        rd = True  # bool that rd is present
        modekey = 'raw' + mode
        sp.update(modedata)  # update sp dictionary with raw data
        for key in modedata:  # check for affinities
            if 'affin' not in sp[key]:
                sp[key]['affin'] = mode
        rtime[modekey] = list(modetime)  # update time list
        tic[modekey] = list(modetic)  # update tic list
        if verbose is True:
            sys.stdout.write(' DONE\n')

    # sp = prepformula(sp)
    newpeaks = False
    if rd is True:
        newsp = {}
        sum_spectra = None
        for key in sp:  # checks whether there is a MS species that does not have raw data
            if 'raw' not in sp[key]:
                newsp[key] = sp[key]  # create references in the namespace
        if len(newsp) is not 0:
            newpeaks = True
            if verbose is True:
                sys.stdout.write(
                    'Some peaks are not in the raw data, extracting these from raw file.\n'
                )
            ips = xlfile.pullmultispectrum(
                'Isotope Patterns'
            )  # pull predefined isotope patterns and add them to species
            for species in ips:  # set spectrum list
                sp[species]['spectrum'] = [
                    ips[species]['x'], ips[species]['y']
                ]
            mzml = mzML(filename)  # load mzML class
            sp = prepformula(sp)  # prep formula etc for summing
            newsp = prepformula(newsp)  # prep formula species for summing
            for species in newsp:
                if 'spectrum' not in newsp[species]:
                    newsp[species]['spectrum'] = Spectrum(
                        3, newsp[species]['bounds'][0],
                        newsp[species]['bounds'][1])
            newsp = mzml.pull_species_data(newsp)  # pull data
        else:
            if verbose is True:
                sys.stdout.write(
                    'No new peaks were specified. Proceeding directly to summing and normalization.\n'
                )

    if rd is False:  # if no raw data is present, process mzML file
        mzml = mzML(filename, verbose=verbose)  # load mzML class
        sp = prepformula(sp)
        sp, sum_spectra = mzml.pull_species_data(
            sp, combine_spectra)  # pull relevant data from mzML
        chroms = mzml.pull_chromatograms()  # pull chromatograms from mzML
        rtime = {}
        tic = {}
        for key in sp:  # compare predicted isotope patterns to the real spectrum and save standard error of the regression
            func = sp[key]['function']
            if mzml.functions[func]['type'] == 'MS':  # determine mode key
                if combine_spectra is True:
                    sp[key]['spectrum'] = sum_spectra[
                        sp[key]['function']].trim(
                            xbounds=sp[key]
                            ['bounds'])  # extract the spectrum object
                mode = 'raw' + mzml.functions[func]['mode']
            if mzml.functions[func]['type'] == 'UV':
                mode = 'rawUV'
            if mode not in rtime:  # if rtime and tic have not been pulled from that function
                rtime[mode] = mzml.functions[func]['timepoints']
                tic[mode] = mzml.functions[func]['tic']
            # if 'formula' in sp[key] and sp[key]['formula'] is not None:
            #     sp[key]['match'] = sp[key]['mol'].compare(sp[key]['spectrum'])
        if combine_spectra is True:
            for fn in sum_spectra:
                sum_spectra[fn] = sum_spectra[fn].trim(
                )  # convert Spectrum objects into x,y lists

    # if max(n) > 1: # run combine functions if n > 1
    for num in n:  # for each n to sum
        if verbose is True:
            sys.stdout.write('\r%d Summing species traces.' % num)
        sumkey = str(num) + 'sum'
        for key in sp:  # bin each species
            if sp[key]['affin'] in mskeys or mzml.functions[sp[key][
                    'function']]['type'] == 'MS':  # if species is MS related
                sp[key][sumkey] = bindata(num, sp[key]['raw'])
        for mode in mskeys:
            sumkey = str(num) + 'sum' + mode
            modekey = 'raw' + mode
            if modekey in rtime.keys():  # if there is data for that mode
                rtime[sumkey] = bindata(num, rtime[modekey], num)
                tic[sumkey] = bindata(num, tic[modekey])
    if verbose is True:
        sys.stdout.write(' DONE\n')
        sys.stdout.flush()
    # else:
    #    for key in sp: # create key for normalization
    #        sp[key]['1sum'] = sp[key]['raw']

    for num in n:  # normalize each peak's chromatogram
        if verbose is True:
            sys.stdout.write('\r%d Normalizing species traces.' % num)
            sys.stdout.flush()
        sumkey = str(num) + 'sum'
        normkey = str(num) + 'norm'
        for mode in mskeys:
            modekey = 'raw' + mode
            if modekey in rtime.keys():  # if there is data for that mode
                for key in sp:  # for each species
                    if sp[key]['affin'] in mskeys or mzml.functions[
                            sp[key]['function']][
                                'type'] == 'MS':  # if species has affinity
                        sp[key][normkey] = []
                        for ind, val in enumerate(sp[key][sumkey]):
                            # sp[key][normkey].append(val/(mzml.function[func]['tic'][ind]+0.01)) #+0.01 to avoid div/0 errors
                            sp[key][normkey].append(
                                val / (tic[sumkey + sp[key]['affin']][ind] +
                                       0.01))  # +0.01 to avoid div/0 errors
    if verbose is True:
        sys.stdout.write(' DONE\n')

    if return_data is True:  # if data is to be used by another function, return the calculated data
        return mzml, sp, rtime, tic, chroms

    # import pickle #pickle objects (for troubleshooting)
    # pickle.dump(rtime,open("rtime.p","wb"))
    # pickle.dump(tic,open("tic.p","wb"))
    # pickle.dump(chroms,open("chroms.p","wb"))
    # pickle.dump(sp,open("sp.p","wb"))

    output()  # write data to excel file

    if verbose is True:
        sys.stdout.write('\rUpdating paramters')
        sys.stdout.flush()
    xlfile.updatersimparams(sp)  # update summing parameters
    if verbose is True:
        sys.stdout.write(' DONE\n')

    if verbose is True:
        sys.stdout.write('\rSaving "%s" (this may take some time)' %
                         xlfile.bookname)
        sys.stdout.flush()
    xlfile.save()
    if verbose is True:
        sys.stdout.write(' DONE\n')

    if verbose is True:
        if verbose is True:
            sys.stdout.write('Plotting traces')
        if plot is True:
            plots()  # plots for quick review
        if verbose is True:
            sys.stdout.write(' DONE\n')
    if verbose is True:
        stime.printelapsed()