def run_search(config, IMS_dataset, sum_formulae, adducts, mz_list): from pyIMS.image_measures import level_sets_measure, isotope_image_correlation, isotope_pattern_match import time ### Runs the main pipeline # Get sum formula and predicted m/z peaks for molecules in database ppm = config['image_generation']['ppm'] # parts per million - a measure of how accuracte the mass spectrometer is nlevels = config['image_generation']['nlevels'] # parameter for measure of chaos do_preprocessing = config['image_generation']['do_preprocessing'] interp = config['image_generation']['smooth'] measure_value_score = {} iso_correlation_score = {} iso_ratio_score = {} t0 = time.time() t_el = 0 for adduct in adducts: print 'searching -> {}'.format(adduct) for ii,sum_formula in enumerate(sum_formulae): if adduct not in mz_list[sum_formula]:#adduct may not be present if it would make an impossible formula, is there a better way to handle this? # print '{} adduct not found for {}'.format(adduct, mz_list[sum_formula]) continue if time.time() - t_el > 10.: t_el = time.time() print '{:3.2f} done in {:3.0f} seconds'.format(float(ii)/len(sum_formulae),time.time()-t0) # Allocate dicts if required if not sum_formula in measure_value_score: measure_value_score[sum_formula] = {} if not sum_formula in iso_correlation_score: iso_correlation_score[sum_formula] = {} if not sum_formula in iso_ratio_score: iso_ratio_score[sum_formula] = {} try: # 1. Generate ion images ion_datacube = IMS_dataset.get_ion_image(mz_list[sum_formula][adduct][0], ppm) # for each spectrum, sum the intensity of all peaks within tol of mz_list if do_preprocessing: apply_image_processing(config,ion_datacube) #currently just supports hot-spot removal # 2. Spatial Chaos measure_value_score[sum_formula][adduct] = level_sets_measure.measure_of_chaos( ion_datacube.xic_to_image(0), nlevels, interp=None, clean_im=False)[0] if measure_value_score[sum_formula][adduct] == 1: measure_value_score[sum_formula][adduct] = 0 # 3. Score correlation with monoiso if len(mz_list[sum_formula][adduct][1]) > 1: iso_correlation_score[sum_formula][adduct] = isotope_image_correlation.isotope_image_correlation( ion_datacube.xic, weights=mz_list[sum_formula][adduct][1][1:]) else: # only one isotope peak, so correlation doesn't make sense iso_correlation_score[sum_formula][adduct] = 1 # 4. Score isotope ratio iso_ratio_score[sum_formula][adduct] = isotope_pattern_match.isotope_pattern_match(ion_datacube.xic, mz_list[sum_formula][ adduct][1]) except KeyError as e: print str(e) print "bad key in: \"{}\" \"{}\" ".format(sum_formula, adduct) output_results(config, measure_value_score, iso_correlation_score, iso_ratio_score, sum_formulae, [adduct], mz_list) return measure_value_score, iso_correlation_score, iso_ratio_score
def score_ratio(self, sum_formula, adduct, imgs, intensities): if not sum_formula in self.iso_ratio_score: self.iso_ratio_score[sum_formula] = {} self.monotone_pattern_score[sum_formula] = {} self.iso_ratio_score[sum_formula][adduct] = isotope_pattern_match(imgs, intensities) not_null = imgs[0] > 0 real_intensities = np.array([imgs[i][not_null].sum() for i in range(len(intensities))]) normalized = real_intensities / (real_intensities.max() + 1e-16) if len(normalized) == 1: self.monotone_pattern_score[sum_formula][adduct] = 0.0 else: self.monotone_pattern_score[sum_formula][adduct] = max(np.diff(normalized).max(), 0.0)
def run_search(config): ### Runs the main pipeline # Get sum formula and predicted m/z peaks for molecules in database sum_formulae,adducts,mz_list = generate_isotope_patterns(config) # Parse dataset from pyIMS.hdf5.inMemoryIMS_hdf5 import inMemoryIMS_hdf5 from pyIMS.image_measures import level_sets_measure,isotope_image_correlation,isotope_pattern_match IMS_dataset=inMemoryIMS_hdf5(config['file_inputs']['data_file']) ppm = config['image_generation']['ppm'] #parts per million - a measure of how accuracte the mass spectrometer is nlevels = config['image_generation']['nlevels'] # parameter for measure of chaos q=config['image_generation']['q'] measure_value_score={} iso_correlation_score = {} iso_ratio_score = {} for sum_formula in sum_formulae: for adduct in adducts: # 1. Geneate ion images ion_datacube = IMS_dataset.get_ion_image(mz_list[sum_formula][adduct][0],ppm) #for each spectrum, sum the intensity of all peaks within tol of mz_list ion_datacube.xic=hot_spot_removal(ion_datacube.xic,q) # 2. Spatial Chaos if not sum_formula in measure_value_score: measure_value_score[sum_formula] = {} measure_value_score[sum_formula][adduct] = 1-level_sets_measure.measure_of_chaos(ion_datacube.xic_to_image(0),nlevels,interp=False)[0] if measure_value_score[sum_formula][adduct] == 1: measure_value_score[sum_formula][adduct] = 0 # only compare pixels with values in the monoisotopic (otherwise high correlation for large empty areas) notnull_monoiso = ion_datacube.xic[0] > 0 #for xic in ion_datacube.xic: # xic = xic[notnull_monoiso] # 3. Score correlation with monoiso if not sum_formula in iso_correlation_score: iso_correlation_score[sum_formula] = {} if len(mz_list[sum_formula][adduct][1]) > 1: iso_correlation_score[sum_formula][adduct] = isotope_image_correlation.isotope_image_correlation(ion_datacube.xic,weights=mz_list[sum_formula][adduct][1][1:]) else: # only one isotope peak, so correlation doesn't make sense iso_correlation_score[sum_formula][adduct] = 1 # 4. Score isotope ratio if not sum_formula in iso_ratio_score: iso_ratio_score[sum_formula] = {} iso_ratio_score[sum_formula][adduct] = isotope_pattern_match.isotope_pattern_match(ion_datacube.xic,mz_list[sum_formula][adduct][1]) return measure_value_score,iso_correlation_score,iso_ratio_score
def run_search(config, IMS_dataset, sum_formulae, adducts, mz_list): from pyIMS.image_measures import level_sets_measure, isotope_image_correlation, isotope_pattern_match import time ### Runs the main pipeline # Get sum formula and predicted m/z peaks for molecules in database ppm = config['image_generation'][ 'ppm'] # parts per million - a measure of how accuracte the mass spectrometer is nlevels = config['image_generation'][ 'nlevels'] # parameter for measure of chaos do_preprocessing = config['image_generation']['do_preprocessing'] interp = config['image_generation']['smooth'] measure_value_score = {} iso_correlation_score = {} iso_ratio_score = {} t0 = time.time() t_el = 0 for adduct in adducts: print 'searching -> {}'.format(adduct) for ii, sum_formula in enumerate(sum_formulae): if adduct not in mz_list[ sum_formula]: #adduct may not be present if it would make an impossible formula, is there a better way to handle this? # print '{} adduct not found for {}'.format(adduct, mz_list[sum_formula]) continue if time.time() - t_el > 10.: t_el = time.time() print '{:3.2f} done in {:3.0f} seconds'.format( float(ii) / len(sum_formulae), time.time() - t0) # Allocate dicts if required if not sum_formula in measure_value_score: measure_value_score[sum_formula] = {} if not sum_formula in iso_correlation_score: iso_correlation_score[sum_formula] = {} if not sum_formula in iso_ratio_score: iso_ratio_score[sum_formula] = {} try: # 1. Generate ion images ion_datacube = IMS_dataset.get_ion_image( mz_list[sum_formula][adduct][0], ppm ) # for each spectrum, sum the intensity of all peaks within tol of mz_list if do_preprocessing: apply_image_processing( config, ion_datacube ) #currently just supports hot-spot removal # 2. Spatial Chaos measure_value_score[sum_formula][ adduct] = level_sets_measure.measure_of_chaos( ion_datacube.xic_to_image(0), nlevels, interp=None, clean_im=False)[0] if measure_value_score[sum_formula][adduct] == 1: measure_value_score[sum_formula][adduct] = 0 # 3. Score correlation with monoiso if len(mz_list[sum_formula][adduct][1]) > 1: iso_correlation_score[sum_formula][ adduct] = isotope_image_correlation.isotope_image_correlation( ion_datacube.xic, weights=mz_list[sum_formula][adduct][1][1:]) else: # only one isotope peak, so correlation doesn't make sense iso_correlation_score[sum_formula][adduct] = 1 # 4. Score isotope ratio iso_ratio_score[sum_formula][ adduct] = isotope_pattern_match.isotope_pattern_match( ion_datacube.xic, mz_list[sum_formula][adduct][1]) except KeyError as e: print str(e) print "bad key in: \"{}\" \"{}\" ".format(sum_formula, adduct) output_results(config, measure_value_score, iso_correlation_score, iso_ratio_score, sum_formulae, [adduct], mz_list) return measure_value_score, iso_correlation_score, iso_ratio_score
def plot_images(ion_datacube, iso_spect, iso_max, q_val=99, c_map="hot"): import numpy as np import matplotlib.pyplot as plt from pyIMS.image_measures import level_sets_measure, isotope_image_correlation, isotope_pattern_match measure_value_score = 1 - level_sets_measure.measure_of_chaos(ion_datacube.xic_to_image(0), 30, interp="median")[0] # 3. Score correlation with monoiso if len(iso_spect[1]) > 1: iso_correlation_score = isotope_image_correlation.isotope_image_correlation( ion_datacube.xic, weights=iso_spect[1][1:] ) else: # only one isotope peak, so correlation doesn't make sense iso_correlation_score = 1 iso_ratio_score = isotope_pattern_match.isotope_pattern_match(ion_datacube.xic, iso_spect[1]) msm_score = measure_value_score * iso_correlation_score * iso_ratio_score ax = [ plt.subplot2grid((2, 4), (0, 0)), plt.subplot2grid((2, 4), (0, 1)), plt.subplot2grid((2, 4), (0, 2)), plt.subplot2grid((2, 4), (0, 3)), plt.subplot2grid((2, 4), (1, 0), colspan=4, rowspan=1), ] for a in ax: a.cla() # plot images for ii in range(0, iso_max): im = ion_datacube.xic_to_image(ii) # hot-spot removal notnull = im > 0 if np.sum(notnull == False) == np.shape(im)[0] * np.shape(im)[1]: im = im else: im_q = np.percentile(im[notnull], q_val) im_rep = im > im_q im[im_rep] = im_q ax[ii].imshow(im, cmap=c_map, interpolation="nearest") ax[ii].set_title("m/z: {:3.4f}".format(iso_spect[0][ii])) # plot spectrum notnull = ion_datacube.xic_to_image(0) > 0 data_spect = [np.sum(ion_datacube.xic_to_image(ii)) for ii in range(0, iso_max)] data_spect = data_spect / np.linalg.norm(data_spect) iso_spect[1] = iso_spect[1] / np.linalg.norm(iso_spect[1]) markerline, stemlines, baseline = ax[4].stem(iso_spect[0][0:iso_max], iso_spect[1][0:iso_max], "g") plt.title( "moc: {:3.4f} spat: {:3.2f} spec: {:3.2f} msm: {:3.3f}".format( measure_value_score, iso_correlation_score, iso_ratio_score, msm_score ) ) plt.setp(stemlines, linewidth=2, color="g") # set stems colors plt.setp(markerline, "markerfacecolor", "g", "markeredgecolor", "g") # make points markerline, stemlines, baseline = ax[4].stem(iso_spect[0][0:iso_max], data_spect, "r") plt.setp(stemlines, linewidth=2, color="r") # set stems colors plt.setp(markerline, "markerfacecolor", "r", "markeredgecolor", "r") # make points # plot proxy artist proxies = [] h, = plt.plot(iso_spect[0][0], [0], "-g") proxies.append(h) h, = plt.plot(iso_spect[0][0], [0], "-r") proxies.append(h) ax[4].legend(proxies, ("predicted pattern", "data pattern"), numpoints=1) return ax
def score_ratio(self, sum_formula, adduct, imgs, intensities): if not sum_formula in self.iso_ratio_score: self.iso_ratio_score[sum_formula] = {} self.iso_ratio_score[sum_formula][adduct] = isotope_pattern_match(imgs, intensities)
def plot_images(ion_datacube, iso_spect, iso_max, q_val=99, c_map='hot'): import numpy as np import matplotlib.pyplot as plt from pyIMS.image_measures import level_sets_measure, isotope_image_correlation, isotope_pattern_match measure_value_score = 1 - level_sets_measure.measure_of_chaos( ion_datacube.xic_to_image(0), 30, interp="median")[0] # 3. Score correlation with monoiso if len(iso_spect[1]) > 1: iso_correlation_score = isotope_image_correlation.isotope_image_correlation( ion_datacube.xic, weights=iso_spect[1][1:]) else: # only one isotope peak, so correlation doesn't make sense iso_correlation_score = 1 iso_ratio_score = isotope_pattern_match.isotope_pattern_match( ion_datacube.xic, iso_spect[1]) msm_score = measure_value_score * iso_correlation_score * iso_ratio_score ax = [ plt.subplot2grid((2, 4), (0, 0)), plt.subplot2grid((2, 4), (0, 1)), plt.subplot2grid((2, 4), (0, 2)), plt.subplot2grid((2, 4), (0, 3)), plt.subplot2grid((2, 4), (1, 0), colspan=4, rowspan=1) ] for a in ax: a.cla() # plot images for ii in range(0, iso_max): im = ion_datacube.xic_to_image(ii) # hot-spot removal notnull = im > 0 if np.sum(notnull == False) == np.shape(im)[0] * np.shape(im)[1]: im = im else: im_q = np.percentile(im[notnull], q_val) im_rep = im > im_q im[im_rep] = im_q ax[ii].imshow(im, cmap=c_map, interpolation='nearest') ax[ii].set_title('m/z: {:3.4f}'.format(iso_spect[0][ii])) # plot spectrum notnull = ion_datacube.xic_to_image(0) > 0 data_spect = [ np.sum(ion_datacube.xic_to_image(ii)) for ii in range(0, iso_max) ] data_spect = data_spect / np.linalg.norm(data_spect) iso_spect[1] = iso_spect[1] / np.linalg.norm(iso_spect[1]) markerline, stemlines, baseline = ax[4].stem(iso_spect[0][0:iso_max], iso_spect[1][0:iso_max], 'g') plt.title("moc: {:3.4f} spat: {:3.2f} spec: {:3.2f} msm: {:3.3f}".format( measure_value_score, iso_correlation_score, iso_ratio_score, msm_score)) plt.setp(stemlines, linewidth=2, color='g') # set stems colors plt.setp(markerline, 'markerfacecolor', 'g', 'markeredgecolor', 'g') # make points markerline, stemlines, baseline = ax[4].stem(iso_spect[0][0:iso_max], data_spect, 'r') plt.setp(stemlines, linewidth=2, color='r') # set stems colors plt.setp(markerline, 'markerfacecolor', 'r', 'markeredgecolor', 'r') # make points #plot proxy artist proxies = [] h, = plt.plot(iso_spect[0][0], [0], '-g') proxies.append(h) h, = plt.plot(iso_spect[0][0], [0], '-r') proxies.append(h) ax[4].legend(proxies, ('predicted pattern', 'data pattern'), numpoints=1) return ax