def run_search(config, IMS_dataset, sum_formulae, adducts, mz_list):
    from pyIMS.image_measures import level_sets_measure, isotope_image_correlation, isotope_pattern_match
    import time
    ### Runs the main pipeline
    # Get sum formula and predicted m/z peaks for molecules in database
    ppm = config['image_generation']['ppm']  # parts per million -  a measure of how accuracte the mass spectrometer is
    nlevels = config['image_generation']['nlevels']  # parameter for measure of chaos
    do_preprocessing = config['image_generation']['do_preprocessing']
    interp = config['image_generation']['smooth']
    measure_value_score = {}
    iso_correlation_score = {}
    iso_ratio_score = {}
    t0 = time.time()
    t_el = 0
    for adduct in adducts:
        print 'searching -> {}'.format(adduct)
        for ii,sum_formula in enumerate(sum_formulae):
            if adduct not in mz_list[sum_formula]:#adduct may not be present if it would make an impossible formula, is there a better way to handle this?
                # print '{} adduct not found for {}'.format(adduct, mz_list[sum_formula])
                continue
            if time.time() - t_el > 10.:
                t_el = time.time()
                print '{:3.2f} done in {:3.0f} seconds'.format(float(ii)/len(sum_formulae),time.time()-t0)
            # Allocate dicts if required
            if not sum_formula in measure_value_score:
                    measure_value_score[sum_formula] = {}
            if not sum_formula in iso_correlation_score:
                    iso_correlation_score[sum_formula] = {}
            if not sum_formula in iso_ratio_score:
                    iso_ratio_score[sum_formula] = {}
            try:
                # 1. Generate ion images
                ion_datacube = IMS_dataset.get_ion_image(mz_list[sum_formula][adduct][0],
                                                         ppm)  # for each spectrum, sum the intensity of all peaks within tol of mz_list
                if do_preprocessing:
                    apply_image_processing(config,ion_datacube) #currently just supports hot-spot removal
                # 2. Spatial Chaos
                measure_value_score[sum_formula][adduct] = level_sets_measure.measure_of_chaos(
                    ion_datacube.xic_to_image(0), nlevels, interp=None, clean_im=False)[0]
                if measure_value_score[sum_formula][adduct] == 1:
                    measure_value_score[sum_formula][adduct] = 0
                # 3. Score correlation with monoiso
                if len(mz_list[sum_formula][adduct][1]) > 1:
                    iso_correlation_score[sum_formula][adduct] = isotope_image_correlation.isotope_image_correlation(
                        ion_datacube.xic, weights=mz_list[sum_formula][adduct][1][1:])
                else:  # only one isotope peak, so correlation doesn't make sense
                    iso_correlation_score[sum_formula][adduct] = 1
                # 4. Score isotope ratio
                iso_ratio_score[sum_formula][adduct] = isotope_pattern_match.isotope_pattern_match(ion_datacube.xic,
                                                                                                   mz_list[sum_formula][
                                                                                                       adduct][1])
            except KeyError as e:
                print str(e)
                print "bad key in: \"{}\" \"{}\" ".format(sum_formula, adduct)
        output_results(config, measure_value_score, iso_correlation_score, iso_ratio_score, sum_formulae, [adduct], mz_list)
    return measure_value_score, iso_correlation_score, iso_ratio_score
Ejemplo n.º 2
0
    def score_ratio(self, sum_formula, adduct, imgs, intensities):
        if not sum_formula in self.iso_ratio_score:
            self.iso_ratio_score[sum_formula] = {}
            self.monotone_pattern_score[sum_formula] = {}
        self.iso_ratio_score[sum_formula][adduct] = isotope_pattern_match(imgs, intensities)

        not_null = imgs[0] > 0
        real_intensities = np.array([imgs[i][not_null].sum() for i in range(len(intensities))])
        normalized = real_intensities / (real_intensities.max() + 1e-16)
        if len(normalized) == 1:
            self.monotone_pattern_score[sum_formula][adduct] = 0.0
        else:
            self.monotone_pattern_score[sum_formula][adduct] = max(np.diff(normalized).max(), 0.0)
def run_search(config):
    ### Runs the main pipeline
    # Get sum formula and predicted m/z peaks for molecules in database
    sum_formulae,adducts,mz_list = generate_isotope_patterns(config)
    # Parse dataset
    from pyIMS.hdf5.inMemoryIMS_hdf5 import inMemoryIMS_hdf5
    from pyIMS.image_measures import level_sets_measure,isotope_image_correlation,isotope_pattern_match
    IMS_dataset=inMemoryIMS_hdf5(config['file_inputs']['data_file'])
        
    ppm = config['image_generation']['ppm'] #parts per million -  a measure of how accuracte the mass spectrometer is
    nlevels = config['image_generation']['nlevels']  # parameter for measure of chaos
    q=config['image_generation']['q'] 
    measure_value_score={}
    iso_correlation_score = {}
    iso_ratio_score = {}
    
    for sum_formula in sum_formulae:
        for adduct in adducts:
            # 1. Geneate ion images
            ion_datacube = IMS_dataset.get_ion_image(mz_list[sum_formula][adduct][0],ppm) #for each spectrum, sum the intensity of all peaks within tol of mz_list
            ion_datacube.xic=hot_spot_removal(ion_datacube.xic,q)

            # 2. Spatial Chaos 
            if not sum_formula in measure_value_score:
                measure_value_score[sum_formula] = {}
            measure_value_score[sum_formula][adduct] = 1-level_sets_measure.measure_of_chaos(ion_datacube.xic_to_image(0),nlevels,interp=False)[0]
            if measure_value_score[sum_formula][adduct] == 1:
                measure_value_score[sum_formula][adduct] = 0
            # only compare pixels with values in the monoisotopic (otherwise high correlation for large empty areas)
            notnull_monoiso = ion_datacube.xic[0] > 0 
            #for xic in ion_datacube.xic:
            #    xic = xic[notnull_monoiso]
            # 3. Score correlation with monoiso
            if not sum_formula in iso_correlation_score:
                iso_correlation_score[sum_formula] = {}
            if len(mz_list[sum_formula][adduct][1]) > 1:
                iso_correlation_score[sum_formula][adduct] = isotope_image_correlation.isotope_image_correlation(ion_datacube.xic,weights=mz_list[sum_formula][adduct][1][1:])
            else: # only one isotope peak, so correlation doesn't make sense
                iso_correlation_score[sum_formula][adduct] = 1
        
            # 4. Score isotope ratio
            if not sum_formula in iso_ratio_score:
                iso_ratio_score[sum_formula] = {}
            iso_ratio_score[sum_formula][adduct]  = isotope_pattern_match.isotope_pattern_match(ion_datacube.xic,mz_list[sum_formula][adduct][1])
    return measure_value_score,iso_correlation_score,iso_ratio_score
Ejemplo n.º 4
0
def run_search(config, IMS_dataset, sum_formulae, adducts, mz_list):
    from pyIMS.image_measures import level_sets_measure, isotope_image_correlation, isotope_pattern_match
    import time
    ### Runs the main pipeline
    # Get sum formula and predicted m/z peaks for molecules in database
    ppm = config['image_generation'][
        'ppm']  # parts per million -  a measure of how accuracte the mass spectrometer is
    nlevels = config['image_generation'][
        'nlevels']  # parameter for measure of chaos
    do_preprocessing = config['image_generation']['do_preprocessing']
    interp = config['image_generation']['smooth']
    measure_value_score = {}
    iso_correlation_score = {}
    iso_ratio_score = {}
    t0 = time.time()
    t_el = 0
    for adduct in adducts:
        print 'searching -> {}'.format(adduct)
        for ii, sum_formula in enumerate(sum_formulae):
            if adduct not in mz_list[
                    sum_formula]:  #adduct may not be present if it would make an impossible formula, is there a better way to handle this?
                # print '{} adduct not found for {}'.format(adduct, mz_list[sum_formula])
                continue
            if time.time() - t_el > 10.:
                t_el = time.time()
                print '{:3.2f} done in {:3.0f} seconds'.format(
                    float(ii) / len(sum_formulae),
                    time.time() - t0)
            # Allocate dicts if required
            if not sum_formula in measure_value_score:
                measure_value_score[sum_formula] = {}
            if not sum_formula in iso_correlation_score:
                iso_correlation_score[sum_formula] = {}
            if not sum_formula in iso_ratio_score:
                iso_ratio_score[sum_formula] = {}
            try:
                # 1. Generate ion images
                ion_datacube = IMS_dataset.get_ion_image(
                    mz_list[sum_formula][adduct][0], ppm
                )  # for each spectrum, sum the intensity of all peaks within tol of mz_list
                if do_preprocessing:
                    apply_image_processing(
                        config, ion_datacube
                    )  #currently just supports hot-spot removal
                # 2. Spatial Chaos
                measure_value_score[sum_formula][
                    adduct] = level_sets_measure.measure_of_chaos(
                        ion_datacube.xic_to_image(0),
                        nlevels,
                        interp=None,
                        clean_im=False)[0]
                if measure_value_score[sum_formula][adduct] == 1:
                    measure_value_score[sum_formula][adduct] = 0
                # 3. Score correlation with monoiso
                if len(mz_list[sum_formula][adduct][1]) > 1:
                    iso_correlation_score[sum_formula][
                        adduct] = isotope_image_correlation.isotope_image_correlation(
                            ion_datacube.xic,
                            weights=mz_list[sum_formula][adduct][1][1:])
                else:  # only one isotope peak, so correlation doesn't make sense
                    iso_correlation_score[sum_formula][adduct] = 1
                # 4. Score isotope ratio
                iso_ratio_score[sum_formula][
                    adduct] = isotope_pattern_match.isotope_pattern_match(
                        ion_datacube.xic, mz_list[sum_formula][adduct][1])
            except KeyError as e:
                print str(e)
                print "bad key in: \"{}\" \"{}\" ".format(sum_formula, adduct)
        output_results(config, measure_value_score, iso_correlation_score,
                       iso_ratio_score, sum_formulae, [adduct], mz_list)
    return measure_value_score, iso_correlation_score, iso_ratio_score
def plot_images(ion_datacube, iso_spect, iso_max, q_val=99, c_map="hot"):
    import numpy as np
    import matplotlib.pyplot as plt
    from pyIMS.image_measures import level_sets_measure, isotope_image_correlation, isotope_pattern_match

    measure_value_score = 1 - level_sets_measure.measure_of_chaos(ion_datacube.xic_to_image(0), 30, interp="median")[0]
    # 3. Score correlation with monoiso
    if len(iso_spect[1]) > 1:
        iso_correlation_score = isotope_image_correlation.isotope_image_correlation(
            ion_datacube.xic, weights=iso_spect[1][1:]
        )
    else:  # only one isotope peak, so correlation doesn't make sense
        iso_correlation_score = 1
    iso_ratio_score = isotope_pattern_match.isotope_pattern_match(ion_datacube.xic, iso_spect[1])
    msm_score = measure_value_score * iso_correlation_score * iso_ratio_score

    ax = [
        plt.subplot2grid((2, 4), (0, 0)),
        plt.subplot2grid((2, 4), (0, 1)),
        plt.subplot2grid((2, 4), (0, 2)),
        plt.subplot2grid((2, 4), (0, 3)),
        plt.subplot2grid((2, 4), (1, 0), colspan=4, rowspan=1),
    ]
    for a in ax:
        a.cla()
    # plot images
    for ii in range(0, iso_max):
        im = ion_datacube.xic_to_image(ii)
        # hot-spot removal
        notnull = im > 0
        if np.sum(notnull == False) == np.shape(im)[0] * np.shape(im)[1]:
            im = im
        else:
            im_q = np.percentile(im[notnull], q_val)
            im_rep = im > im_q
            im[im_rep] = im_q

        ax[ii].imshow(im, cmap=c_map, interpolation="nearest")
        ax[ii].set_title("m/z: {:3.4f}".format(iso_spect[0][ii]))
    # plot spectrum
    notnull = ion_datacube.xic_to_image(0) > 0
    data_spect = [np.sum(ion_datacube.xic_to_image(ii)) for ii in range(0, iso_max)]
    data_spect = data_spect / np.linalg.norm(data_spect)
    iso_spect[1] = iso_spect[1] / np.linalg.norm(iso_spect[1])

    markerline, stemlines, baseline = ax[4].stem(iso_spect[0][0:iso_max], iso_spect[1][0:iso_max], "g")
    plt.title(
        "moc: {:3.4f} spat: {:3.2f} spec: {:3.2f} msm: {:3.3f}".format(
            measure_value_score, iso_correlation_score, iso_ratio_score, msm_score
        )
    )
    plt.setp(stemlines, linewidth=2, color="g")  # set stems  colors
    plt.setp(markerline, "markerfacecolor", "g", "markeredgecolor", "g")  # make points

    markerline, stemlines, baseline = ax[4].stem(iso_spect[0][0:iso_max], data_spect, "r")
    plt.setp(stemlines, linewidth=2, color="r")  # set stems colors
    plt.setp(markerline, "markerfacecolor", "r", "markeredgecolor", "r")  # make points

    # plot proxy artist
    proxies = []
    h, = plt.plot(iso_spect[0][0], [0], "-g")
    proxies.append(h)
    h, = plt.plot(iso_spect[0][0], [0], "-r")
    proxies.append(h)
    ax[4].legend(proxies, ("predicted pattern", "data pattern"), numpoints=1)
    return ax
Ejemplo n.º 6
0
 def score_ratio(self, sum_formula, adduct, imgs, intensities):
     if not sum_formula in self.iso_ratio_score:
         self.iso_ratio_score[sum_formula] = {}
     self.iso_ratio_score[sum_formula][adduct] = isotope_pattern_match(imgs, intensities)
Ejemplo n.º 7
0
def plot_images(ion_datacube, iso_spect, iso_max, q_val=99, c_map='hot'):
    import numpy as np
    import matplotlib.pyplot as plt
    from pyIMS.image_measures import level_sets_measure, isotope_image_correlation, isotope_pattern_match
    measure_value_score = 1 - level_sets_measure.measure_of_chaos(
        ion_datacube.xic_to_image(0), 30, interp="median")[0]
    # 3. Score correlation with monoiso
    if len(iso_spect[1]) > 1:
        iso_correlation_score = isotope_image_correlation.isotope_image_correlation(
            ion_datacube.xic, weights=iso_spect[1][1:])
    else:  # only one isotope peak, so correlation doesn't make sense
        iso_correlation_score = 1
    iso_ratio_score = isotope_pattern_match.isotope_pattern_match(
        ion_datacube.xic, iso_spect[1])
    msm_score = measure_value_score * iso_correlation_score * iso_ratio_score

    ax = [
        plt.subplot2grid((2, 4), (0, 0)),
        plt.subplot2grid((2, 4), (0, 1)),
        plt.subplot2grid((2, 4), (0, 2)),
        plt.subplot2grid((2, 4), (0, 3)),
        plt.subplot2grid((2, 4), (1, 0), colspan=4, rowspan=1)
    ]
    for a in ax:
        a.cla()
    # plot images
    for ii in range(0, iso_max):
        im = ion_datacube.xic_to_image(ii)
        # hot-spot removal
        notnull = im > 0
        if np.sum(notnull == False) == np.shape(im)[0] * np.shape(im)[1]:
            im = im
        else:
            im_q = np.percentile(im[notnull], q_val)
            im_rep = im > im_q
            im[im_rep] = im_q

        ax[ii].imshow(im, cmap=c_map, interpolation='nearest')
        ax[ii].set_title('m/z: {:3.4f}'.format(iso_spect[0][ii]))
    # plot spectrum
    notnull = ion_datacube.xic_to_image(0) > 0
    data_spect = [
        np.sum(ion_datacube.xic_to_image(ii)) for ii in range(0, iso_max)
    ]
    data_spect = data_spect / np.linalg.norm(data_spect)
    iso_spect[1] = iso_spect[1] / np.linalg.norm(iso_spect[1])

    markerline, stemlines, baseline = ax[4].stem(iso_spect[0][0:iso_max],
                                                 iso_spect[1][0:iso_max], 'g')
    plt.title("moc: {:3.4f} spat: {:3.2f} spec: {:3.2f} msm: {:3.3f}".format(
        measure_value_score, iso_correlation_score, iso_ratio_score,
        msm_score))
    plt.setp(stemlines, linewidth=2, color='g')  # set stems  colors
    plt.setp(markerline, 'markerfacecolor', 'g', 'markeredgecolor',
             'g')  # make points

    markerline, stemlines, baseline = ax[4].stem(iso_spect[0][0:iso_max],
                                                 data_spect, 'r')
    plt.setp(stemlines, linewidth=2, color='r')  # set stems colors
    plt.setp(markerline, 'markerfacecolor', 'r', 'markeredgecolor',
             'r')  # make points

    #plot proxy artist
    proxies = []
    h, = plt.plot(iso_spect[0][0], [0], '-g')
    proxies.append(h)
    h, = plt.plot(iso_spect[0][0], [0], '-r')
    proxies.append(h)
    ax[4].legend(proxies, ('predicted pattern', 'data pattern'), numpoints=1)
    return ax