Beispiel #1
0
 def score_chaos(self, sum_formula, adduct, img):
     if not sum_formula in self.measure_value_score:
         self.measure_value_score[sum_formula] = {}
     result = 1 - measure_of_chaos(img, self.nlevels, interp=False)[0]
     if result == 1:
         result = 0
     self.measure_value_score[sum_formula][adduct] = result
def run_search(config, IMS_dataset, sum_formulae, adducts, mz_list):
    from pyIMS.image_measures import level_sets_measure, isotope_image_correlation, isotope_pattern_match
    import time
    ### Runs the main pipeline
    # Get sum formula and predicted m/z peaks for molecules in database
    ppm = config['image_generation']['ppm']  # parts per million -  a measure of how accuracte the mass spectrometer is
    nlevels = config['image_generation']['nlevels']  # parameter for measure of chaos
    do_preprocessing = config['image_generation']['do_preprocessing']
    interp = config['image_generation']['smooth']
    measure_value_score = {}
    iso_correlation_score = {}
    iso_ratio_score = {}
    t0 = time.time()
    t_el = 0
    for adduct in adducts:
        print 'searching -> {}'.format(adduct)
        for ii,sum_formula in enumerate(sum_formulae):
            if adduct not in mz_list[sum_formula]:#adduct may not be present if it would make an impossible formula, is there a better way to handle this?
                # print '{} adduct not found for {}'.format(adduct, mz_list[sum_formula])
                continue
            if time.time() - t_el > 10.:
                t_el = time.time()
                print '{:3.2f} done in {:3.0f} seconds'.format(float(ii)/len(sum_formulae),time.time()-t0)
            # Allocate dicts if required
            if not sum_formula in measure_value_score:
                    measure_value_score[sum_formula] = {}
            if not sum_formula in iso_correlation_score:
                    iso_correlation_score[sum_formula] = {}
            if not sum_formula in iso_ratio_score:
                    iso_ratio_score[sum_formula] = {}
            try:
                # 1. Generate ion images
                ion_datacube = IMS_dataset.get_ion_image(mz_list[sum_formula][adduct][0],
                                                         ppm)  # for each spectrum, sum the intensity of all peaks within tol of mz_list
                if do_preprocessing:
                    apply_image_processing(config,ion_datacube) #currently just supports hot-spot removal
                # 2. Spatial Chaos
                measure_value_score[sum_formula][adduct] = level_sets_measure.measure_of_chaos(
                    ion_datacube.xic_to_image(0), nlevels, interp=None, clean_im=False)[0]
                if measure_value_score[sum_formula][adduct] == 1:
                    measure_value_score[sum_formula][adduct] = 0
                # 3. Score correlation with monoiso
                if len(mz_list[sum_formula][adduct][1]) > 1:
                    iso_correlation_score[sum_formula][adduct] = isotope_image_correlation.isotope_image_correlation(
                        ion_datacube.xic, weights=mz_list[sum_formula][adduct][1][1:])
                else:  # only one isotope peak, so correlation doesn't make sense
                    iso_correlation_score[sum_formula][adduct] = 1
                # 4. Score isotope ratio
                iso_ratio_score[sum_formula][adduct] = isotope_pattern_match.isotope_pattern_match(ion_datacube.xic,
                                                                                                   mz_list[sum_formula][
                                                                                                       adduct][1])
            except KeyError as e:
                print str(e)
                print "bad key in: \"{}\" \"{}\" ".format(sum_formula, adduct)
        output_results(config, measure_value_score, iso_correlation_score, iso_ratio_score, sum_formulae, [adduct], mz_list)
    return measure_value_score, iso_correlation_score, iso_ratio_score
def run_search(config):
    ### Runs the main pipeline
    # Get sum formula and predicted m/z peaks for molecules in database
    sum_formulae,adducts,mz_list = generate_isotope_patterns(config)
    # Parse dataset
    from pyIMS.hdf5.inMemoryIMS_hdf5 import inMemoryIMS_hdf5
    from pyIMS.image_measures import level_sets_measure,isotope_image_correlation,isotope_pattern_match
    IMS_dataset=inMemoryIMS_hdf5(config['file_inputs']['data_file'])
        
    ppm = config['image_generation']['ppm'] #parts per million -  a measure of how accuracte the mass spectrometer is
    nlevels = config['image_generation']['nlevels']  # parameter for measure of chaos
    q=config['image_generation']['q'] 
    measure_value_score={}
    iso_correlation_score = {}
    iso_ratio_score = {}
    
    for sum_formula in sum_formulae:
        for adduct in adducts:
            # 1. Geneate ion images
            ion_datacube = IMS_dataset.get_ion_image(mz_list[sum_formula][adduct][0],ppm) #for each spectrum, sum the intensity of all peaks within tol of mz_list
            ion_datacube.xic=hot_spot_removal(ion_datacube.xic,q)

            # 2. Spatial Chaos 
            if not sum_formula in measure_value_score:
                measure_value_score[sum_formula] = {}
            measure_value_score[sum_formula][adduct] = 1-level_sets_measure.measure_of_chaos(ion_datacube.xic_to_image(0),nlevels,interp=False)[0]
            if measure_value_score[sum_formula][adduct] == 1:
                measure_value_score[sum_formula][adduct] = 0
            # only compare pixels with values in the monoisotopic (otherwise high correlation for large empty areas)
            notnull_monoiso = ion_datacube.xic[0] > 0 
            #for xic in ion_datacube.xic:
            #    xic = xic[notnull_monoiso]
            # 3. Score correlation with monoiso
            if not sum_formula in iso_correlation_score:
                iso_correlation_score[sum_formula] = {}
            if len(mz_list[sum_formula][adduct][1]) > 1:
                iso_correlation_score[sum_formula][adduct] = isotope_image_correlation.isotope_image_correlation(ion_datacube.xic,weights=mz_list[sum_formula][adduct][1][1:])
            else: # only one isotope peak, so correlation doesn't make sense
                iso_correlation_score[sum_formula][adduct] = 1
        
            # 4. Score isotope ratio
            if not sum_formula in iso_ratio_score:
                iso_ratio_score[sum_formula] = {}
            iso_ratio_score[sum_formula][adduct]  = isotope_pattern_match.isotope_pattern_match(ion_datacube.xic,mz_list[sum_formula][adduct][1])
    return measure_value_score,iso_correlation_score,iso_ratio_score
Beispiel #4
0
def mzImages(filename_in,save_dir):
    import sys
    sys.path.append('C:\\Users\\Luca Rappez\\Desktop\\python_codebase\\')
    from pyMS.centroid_detection import gradient
    from pyIMS.hdf5.inMemoryIMS_hdf5 import inMemoryIMS_hdf5
    from pyIMS.image_measures import level_sets_measure
    import matplotlib.pyplot as plt
    import numpy as np
    #%matplotlib inline
    import bokeh as bk
    from bokeh.plotting import output_notebook
    output_notebook()

    print 'step1'
    #filename_in =  '//psi.embl.de/alexandr/shared/Luca/20150825_CoCulture_Candida_Ecoli/20150821_ADP_LR_colony250K12_DHBsub_260x280_50um.hdf5' #using a temporary hdf5 based format
    #save_dir= '//psi.embl.de/alexandr/shared/Luca/20150825_CoCulture_Candida_Ecoli/20150821_ADP_LR_colony250K12_DHBsub_260x280_50um_figures'

    # Parse data
    IMS_dataset = inMemoryIMS_hdf5(filename_in)
    print 'In memory'
    ppm = 1.5

    # Generate mean spectrum
    #hist_axis,mean_spec =IMS_dataset.generate_summary_spectrum(summary_type='mean')
    hist_axis,freq_spec = IMS_dataset.generate_summary_spectrum(summary_type='freq',ppm=ppm/2)

    #p1 = bk.plotting.figure()
    #p1.line(hist_axis,mean_spec/np.max(mean_spec),color='red')
    #p1.line(hist_axis,freq_spec/np.max(freq_spec),color='orange')
    #bk.plotting.show(p1)
    print len(hist_axis)
    #plt.figure(figsize=(20,10))
    #plt.plot(hist_axis,freq_spec)
    #plt.show()

    # Centroid detection of frequency spectrum
    mz_list,count_list,idx_list = gradient(np.asarray(hist_axis),np.asarray(freq_spec),weighted_bins=2)

    c_thresh=0.05
    moc_thresh=0.99
    print np.sum(count_list>c_thresh)

    # Calcualte MoC for images of all peaks
    nlevels=30
    im_list={}
    for ii, c in enumerate(count_list):
        #print ii
        #print c
        if c>c_thresh:
            ion_image = IMS_dataset.get_ion_image(np.asarray([mz_list[ii],]),ppm)
            im = ion_image.xic_to_image(0)
            m,im_moc,levels,nobjs = level_sets_measure.measure_of_chaos(im,nlevels,interp='median') #just output measure value
            m=1-m
            im_list[mz_list[ii]]={'image':im,'moc':m,'freq':c}

    from pySpatialMetabolomics.tools import colourmaps
    c_map = colourmaps.get_colormap('grey')#if black images: open->save->rerun
    c_pal=[[int(255*cc) for cc in c_map(c)] for c in range(0,254)]

    # Export all images
    import png as pypng
    for mz in im_list:
        if im_list[mz]['moc']>moc_thresh:
             with open('{}/{}_{}.png'.format(save_dir,mz,im_list[mz]['moc']),'wb') as f_out:
                im_out = im_list[mz]['image']
                im_out = 254*im_out/np.max(im_out)
                w,h = np.shape(im_out)
                w = pypng.Writer(h, w, palette=c_pal, bitdepth=8)
                w.write(f_out,im_out)

    #im_out = im_list[mz]['image']
    mz=333.334188269
    ion_image = IMS_dataset.get_ion_image(np.asarray([mz,]),ppm)
    im_out=ion_image.xic_to_image(0)
    m,im_moc,levels,nobjs = level_sets_measure.measure_of_chaos(im,nlevels,interp='') #just output measure value
    print 1-m
    im_out = 254.*im_out/np.max(im_out)
    print mz
    #print im_list[mz]['moc']
    #plt.figure()
    #plt.imshow(im_moc)
    #plt.show()
Beispiel #5
0
def run_search(config, IMS_dataset, sum_formulae, adducts, mz_list):
    from pyIMS.image_measures import level_sets_measure, isotope_image_correlation, isotope_pattern_match
    import time
    ### Runs the main pipeline
    # Get sum formula and predicted m/z peaks for molecules in database
    ppm = config['image_generation'][
        'ppm']  # parts per million -  a measure of how accuracte the mass spectrometer is
    nlevels = config['image_generation'][
        'nlevels']  # parameter for measure of chaos
    do_preprocessing = config['image_generation']['do_preprocessing']
    interp = config['image_generation']['smooth']
    measure_value_score = {}
    iso_correlation_score = {}
    iso_ratio_score = {}
    t0 = time.time()
    t_el = 0
    for adduct in adducts:
        print 'searching -> {}'.format(adduct)
        for ii, sum_formula in enumerate(sum_formulae):
            if adduct not in mz_list[
                    sum_formula]:  #adduct may not be present if it would make an impossible formula, is there a better way to handle this?
                # print '{} adduct not found for {}'.format(adduct, mz_list[sum_formula])
                continue
            if time.time() - t_el > 10.:
                t_el = time.time()
                print '{:3.2f} done in {:3.0f} seconds'.format(
                    float(ii) / len(sum_formulae),
                    time.time() - t0)
            # Allocate dicts if required
            if not sum_formula in measure_value_score:
                measure_value_score[sum_formula] = {}
            if not sum_formula in iso_correlation_score:
                iso_correlation_score[sum_formula] = {}
            if not sum_formula in iso_ratio_score:
                iso_ratio_score[sum_formula] = {}
            try:
                # 1. Generate ion images
                ion_datacube = IMS_dataset.get_ion_image(
                    mz_list[sum_formula][adduct][0], ppm
                )  # for each spectrum, sum the intensity of all peaks within tol of mz_list
                if do_preprocessing:
                    apply_image_processing(
                        config, ion_datacube
                    )  #currently just supports hot-spot removal
                # 2. Spatial Chaos
                measure_value_score[sum_formula][
                    adduct] = level_sets_measure.measure_of_chaos(
                        ion_datacube.xic_to_image(0),
                        nlevels,
                        interp=None,
                        clean_im=False)[0]
                if measure_value_score[sum_formula][adduct] == 1:
                    measure_value_score[sum_formula][adduct] = 0
                # 3. Score correlation with monoiso
                if len(mz_list[sum_formula][adduct][1]) > 1:
                    iso_correlation_score[sum_formula][
                        adduct] = isotope_image_correlation.isotope_image_correlation(
                            ion_datacube.xic,
                            weights=mz_list[sum_formula][adduct][1][1:])
                else:  # only one isotope peak, so correlation doesn't make sense
                    iso_correlation_score[sum_formula][adduct] = 1
                # 4. Score isotope ratio
                iso_ratio_score[sum_formula][
                    adduct] = isotope_pattern_match.isotope_pattern_match(
                        ion_datacube.xic, mz_list[sum_formula][adduct][1])
            except KeyError as e:
                print str(e)
                print "bad key in: \"{}\" \"{}\" ".format(sum_formula, adduct)
        output_results(config, measure_value_score, iso_correlation_score,
                       iso_ratio_score, sum_formulae, [adduct], mz_list)
    return measure_value_score, iso_correlation_score, iso_ratio_score
def plot_images(ion_datacube, iso_spect, iso_max, q_val=99, c_map="hot"):
    import numpy as np
    import matplotlib.pyplot as plt
    from pyIMS.image_measures import level_sets_measure, isotope_image_correlation, isotope_pattern_match

    measure_value_score = 1 - level_sets_measure.measure_of_chaos(ion_datacube.xic_to_image(0), 30, interp="median")[0]
    # 3. Score correlation with monoiso
    if len(iso_spect[1]) > 1:
        iso_correlation_score = isotope_image_correlation.isotope_image_correlation(
            ion_datacube.xic, weights=iso_spect[1][1:]
        )
    else:  # only one isotope peak, so correlation doesn't make sense
        iso_correlation_score = 1
    iso_ratio_score = isotope_pattern_match.isotope_pattern_match(ion_datacube.xic, iso_spect[1])
    msm_score = measure_value_score * iso_correlation_score * iso_ratio_score

    ax = [
        plt.subplot2grid((2, 4), (0, 0)),
        plt.subplot2grid((2, 4), (0, 1)),
        plt.subplot2grid((2, 4), (0, 2)),
        plt.subplot2grid((2, 4), (0, 3)),
        plt.subplot2grid((2, 4), (1, 0), colspan=4, rowspan=1),
    ]
    for a in ax:
        a.cla()
    # plot images
    for ii in range(0, iso_max):
        im = ion_datacube.xic_to_image(ii)
        # hot-spot removal
        notnull = im > 0
        if np.sum(notnull == False) == np.shape(im)[0] * np.shape(im)[1]:
            im = im
        else:
            im_q = np.percentile(im[notnull], q_val)
            im_rep = im > im_q
            im[im_rep] = im_q

        ax[ii].imshow(im, cmap=c_map, interpolation="nearest")
        ax[ii].set_title("m/z: {:3.4f}".format(iso_spect[0][ii]))
    # plot spectrum
    notnull = ion_datacube.xic_to_image(0) > 0
    data_spect = [np.sum(ion_datacube.xic_to_image(ii)) for ii in range(0, iso_max)]
    data_spect = data_spect / np.linalg.norm(data_spect)
    iso_spect[1] = iso_spect[1] / np.linalg.norm(iso_spect[1])

    markerline, stemlines, baseline = ax[4].stem(iso_spect[0][0:iso_max], iso_spect[1][0:iso_max], "g")
    plt.title(
        "moc: {:3.4f} spat: {:3.2f} spec: {:3.2f} msm: {:3.3f}".format(
            measure_value_score, iso_correlation_score, iso_ratio_score, msm_score
        )
    )
    plt.setp(stemlines, linewidth=2, color="g")  # set stems  colors
    plt.setp(markerline, "markerfacecolor", "g", "markeredgecolor", "g")  # make points

    markerline, stemlines, baseline = ax[4].stem(iso_spect[0][0:iso_max], data_spect, "r")
    plt.setp(stemlines, linewidth=2, color="r")  # set stems colors
    plt.setp(markerline, "markerfacecolor", "r", "markeredgecolor", "r")  # make points

    # plot proxy artist
    proxies = []
    h, = plt.plot(iso_spect[0][0], [0], "-g")
    proxies.append(h)
    h, = plt.plot(iso_spect[0][0], [0], "-r")
    proxies.append(h)
    ax[4].legend(proxies, ("predicted pattern", "data pattern"), numpoints=1)
    return ax
Beispiel #7
0
def plot_images(ion_datacube, iso_spect, iso_max, q_val=99, c_map='hot'):
    import numpy as np
    import matplotlib.pyplot as plt
    from pyIMS.image_measures import level_sets_measure, isotope_image_correlation, isotope_pattern_match
    measure_value_score = 1 - level_sets_measure.measure_of_chaos(
        ion_datacube.xic_to_image(0), 30, interp="median")[0]
    # 3. Score correlation with monoiso
    if len(iso_spect[1]) > 1:
        iso_correlation_score = isotope_image_correlation.isotope_image_correlation(
            ion_datacube.xic, weights=iso_spect[1][1:])
    else:  # only one isotope peak, so correlation doesn't make sense
        iso_correlation_score = 1
    iso_ratio_score = isotope_pattern_match.isotope_pattern_match(
        ion_datacube.xic, iso_spect[1])
    msm_score = measure_value_score * iso_correlation_score * iso_ratio_score

    ax = [
        plt.subplot2grid((2, 4), (0, 0)),
        plt.subplot2grid((2, 4), (0, 1)),
        plt.subplot2grid((2, 4), (0, 2)),
        plt.subplot2grid((2, 4), (0, 3)),
        plt.subplot2grid((2, 4), (1, 0), colspan=4, rowspan=1)
    ]
    for a in ax:
        a.cla()
    # plot images
    for ii in range(0, iso_max):
        im = ion_datacube.xic_to_image(ii)
        # hot-spot removal
        notnull = im > 0
        if np.sum(notnull == False) == np.shape(im)[0] * np.shape(im)[1]:
            im = im
        else:
            im_q = np.percentile(im[notnull], q_val)
            im_rep = im > im_q
            im[im_rep] = im_q

        ax[ii].imshow(im, cmap=c_map, interpolation='nearest')
        ax[ii].set_title('m/z: {:3.4f}'.format(iso_spect[0][ii]))
    # plot spectrum
    notnull = ion_datacube.xic_to_image(0) > 0
    data_spect = [
        np.sum(ion_datacube.xic_to_image(ii)) for ii in range(0, iso_max)
    ]
    data_spect = data_spect / np.linalg.norm(data_spect)
    iso_spect[1] = iso_spect[1] / np.linalg.norm(iso_spect[1])

    markerline, stemlines, baseline = ax[4].stem(iso_spect[0][0:iso_max],
                                                 iso_spect[1][0:iso_max], 'g')
    plt.title("moc: {:3.4f} spat: {:3.2f} spec: {:3.2f} msm: {:3.3f}".format(
        measure_value_score, iso_correlation_score, iso_ratio_score,
        msm_score))
    plt.setp(stemlines, linewidth=2, color='g')  # set stems  colors
    plt.setp(markerline, 'markerfacecolor', 'g', 'markeredgecolor',
             'g')  # make points

    markerline, stemlines, baseline = ax[4].stem(iso_spect[0][0:iso_max],
                                                 data_spect, 'r')
    plt.setp(stemlines, linewidth=2, color='r')  # set stems colors
    plt.setp(markerline, 'markerfacecolor', 'r', 'markeredgecolor',
             'r')  # make points

    #plot proxy artist
    proxies = []
    h, = plt.plot(iso_spect[0][0], [0], '-g')
    proxies.append(h)
    h, = plt.plot(iso_spect[0][0], [0], '-r')
    proxies.append(h)
    ax[4].legend(proxies, ('predicted pattern', 'data pattern'), numpoints=1)
    return ax