Exemple #1
0
def get_lists_of_mzs(sf):
	try:
		isotope_ms = pyisocalc.isodist(sf,plot=False,sigma=0.01,charges=-2,resolution=100000.0,do_centroid=False)
		mzlist = list(isotope_ms.get_mzs())
		intenslist = list(isotope_ms.get_intensities())
		mzs_list, intensities_list, indices_list = gradient(isotope_ms.get_mzs(), isotope_ms.get_intensities(), max_output=-1, weighted_bins=0)
		indices_list = [i if intenslist[i] > intenslist[i+1] else i+1 for i in indices_list]
		mzs_list = [mzlist[i] for i in indices_list]
		intensities_list = [intenslist[i] for i in indices_list]
		min_i = np.min([ i for i in xrange(len(intenslist)) if intenslist[i] > 0.01])
		max_i = np.max([ i for i in xrange(len(intenslist)) if intenslist[i] > 0.01])
		return {
			"isodist_mzs" : mzlist[min_i:max_i],
			"isodist_int" : intenslist[min_i:max_i],
			"grad_mzs"	  : list(mzs_list),
			"grad_int"	  : list(intensities_list),
			"grad_ind"	  : list(indices_list - min_i) }
	except:
		return {
			"isodist_mzs" : [],
			"isodist_int" : [],
			"grad_mzs"	  : [],
			"grad_int"	  : [],
			"grad_ind"	  : []
		}
Exemple #2
0
    def load_queries(self):
        config = self.config
        db_filename = config['file_inputs']['database_file']
        db_dump_folder = config['file_inputs']['database_load_folder']  
        isocalc_sig = config['isotope_generation']['isocalc_sig']  
        isocalc_resolution = config['isotope_generation']['isocalc_resolution']  
        if len(config['isotope_generation']['charge']) > 1:
            print 'Warning: only first charge state currently accepted'
        charge = int('{}{}'.format(config['isotope_generation']['charge'][0]['polarity'], config['isotope_generation']['charge'][0]['n_charges'])) #currently only supports first charge!!
        self.adducts=[a['adduct'] for a in config['isotope_generation']['adducts']]
      
        # Read in molecules
        self.sum_formulae = [l.strip() for l in open(db_filename).readlines()]
        # Check if already generated and load if possible, otherwise calculate fresh   
        db_name =  os.path.splitext(os.path.basename(db_filename))[0] 
        self.mz_list={}
        for adduct in self.adducts:
            for sum_formula in self.sum_formulae:
                isotope_ms = pyisocalc.isodist(sum_formula + adduct,
                                               plot=False,
                                               sigma=isocalc_sig,
                                               charges=charge,
                                               resolution=isocalc_resolution,
                                               do_centroid=True)
                if not sum_formula in self.mz_list:
                     self.mz_list[sum_formula] = {}

                mzs, ints = map(np.array, isotope_ms.get_spectrum(source='centroids'))
                order = ints.argsort()[::-1]
                self.mz_list[sum_formula][adduct] = (mzs[order], ints[order])
Exemple #3
0
def make_sf_adduct_optimusfilter(sum_formulae,adducts,output_filename,sigma=0.001,resolution=10000,charge=1):
    from pyMS.pyisocalc import pyisocalc
    # Extract variables from config dict
    # Check if already genrated and load if possible, otherwise calculate fresh
    with open(output_filename,'a') as f_out:
        for sum_formula in sum_formulae:
            #print sum_formula
            for adduct in adducts:
                try:
                    sf = pyisocalc.complex_to_simple(sum_formula+adduct)
                    if sf is None: # not possible to form adduct
                        continue

                    isotope_ms = pyisocalc.isodist(sf, plot=False, sigma=sigma, charges=charge,
                                               resolution=resolution)
                except KeyError as e:
                    if str(e).startswith("KeyError:"):
                        print str(e)
                        continue
                except ValueError as e:
                    if str(e).startswith("Element not recognised"):
                        print str(e)
                        continue
                except:
                    print sf=="", sum_formula, adduct
                    raise
                f_out.write("{} [M{}],-1,{}\n".format(sum_formula,adduct,isotope_ms.get_spectrum(source='centroids')[0][0]))
 def calcualte_isotope_patterns(sum_formulae,adducts='',isocalc_sig=0.01,isocalc_resolution = 200000.,isocalc_do_centroid = True, charge='1'):
     ### Generate a mz list of peak centroids for each sum formula with the given adduct
     # todo - parse sum formula and adduct properly so that subtractions (losses) can be utilised (this code already exists somewhere)
     mz_list={}
     for n,sum_formula in enumerate(sum_formulae):   
         isotope_ms = pyisocalc.isodist(sum_formula+adduct,plot=False,sigma=isocalc_sig,charges=charge,resolution=isocalc_resolution,do_centroid=isocalc_do_centroid)
         if not sum_formula in mz_list:
              mz_list[sum_formula] = {}
         mz_list[sum_formula][adduct] = isotope_ms.get_spectrum(source='centroids')
     return mz_list
def show_images_get():
    dataset = bottle.request.params.get('dataset', app.paths.iterkeys().next())
    formula = bottle.request.params.get('formula', '')
    tolerance = float(bottle.request.params.get('tolerance', 5.0))
    resolution = float(bottle.request.params.get('resolution', 1e5))
    selected_adduct = bottle.request.params.get('adduct', 'H')
    hs_removal = bottle.request.GET.get('hs_removal', False)
    k = int(bottle.request.params.get('npeaks', 4))
    if hs_removal == 'on':
        hs_removal = True
    pts = float(bottle.request.params.get('pts', 10))
    cutoff = float(bottle.request.params.get('pyisocalc_cutoff', 1e-3))

    adducts = ['H', 'K', 'Na']
    isotope_patterns = {}
    for adduct in adducts:
        sf = pyisocalc.SumFormulaParser.parse_string(formula + adduct)
        raw_pattern = pyisocalc.isodist(sf, cutoff)
        fwhm = raw_pattern.get_spectrum()[0][0] / resolution
        pattern = pyisocalc.apply_gaussian(raw_pattern, fwhm, pts, exact=True)

        mzs, intensities = map(np.array, pattern.get_spectrum(source='centroids'))
        if len(mzs) > k:
            order = intensities.argsort()[::-1]
            mzs = mzs[order][:k]
            intensities = intensities[order][:k]
            order = mzs.argsort()
            mzs = mzs[order]
            intensities = intensities[order]

        datacube = app.get_datacube(dataset, mzs, tolerance)
        if hs_removal:
            for img in datacube.xic:
                if len(img) > 0:
                    pc = np.percentile(img, 99)
                    img[img > pc] = pc

        chaos = measure_of_chaos(datacube.xic_to_image(0), 30, overwrite=False)

        iso_corr = isotope_pattern_match(datacube.xic, intensities)

        img_corr = 1.0 # return 1 if there's a single peak
        if len(intensities[1:]) > 1:
            img_corr = isotope_image_correlation(datacube.xic, weights=intensities[1:])

        stats = {'measure of chaos': chaos,
                 'image correlation score': img_corr,
                 'isotope pattern score': iso_corr}

        isotope_patterns[adduct] = (mzs, intensities, stats)
    return bottle.template('show_images', hs_removal=hs_removal,
                           isotope_patterns=isotope_patterns, formula=formula, selected_adduct=selected_adduct,
                           pretty_formula=re.sub(r"(\d+)", r"<sub>\1</sub>", formula),
                           resolution=resolution, tol=tolerance, datasets=app.paths.keys(),
                           npeaks=k, selected_dataset=dataset)
Exemple #6
0
def calculate_isotope_patterns(sum_formulae,
                               adduct='',
                               isocalc_sig=0.01,
                               isocalc_resolution=200000.,
                               isocalc_do_centroid=True,
                               charge=1,
                               verbose=True):
    from pyMS.pyisocalc import pyisocalc
    ### Generate a mz list of peak centroids for each sum formula with the given adduct
    mz_list = {}
    for n, sum_formula in enumerate(sum_formulae):
        try:
            if verbose:
                print sum_formula, adduct
            sf = pyisocalc.complex_to_simple(sum_formula + adduct)
        except KeyError as e:
            if str(e).startswith("KeyError: "):
                print str(e)
                continue
        except ValueError as e:
            if str(e).startswith("Element not recognised"):
                print str(e)
                continue
            else:
                print sum_formula, adduct
                raise
        if sf == None:  #negative atoms as a result of simplification
            print 'negative adduct for {} : {}'.format(sum_formula, adduct)
            continue
        try:
            isotope_ms = pyisocalc.isodist(sf,
                                           plot=False,
                                           sigma=isocalc_sig,
                                           charges=charge,
                                           resolution=isocalc_resolution,
                                           do_centroid=isocalc_do_centroid)
        except KeyError as e:
            if str(e).startswith("KeyError: "):
                print str(e)
                continue

        if not sum_formula in mz_list:
            mz_list[sum_formula] = {}
        mz_list[sum_formula][adduct] = isotope_ms.get_spectrum(
            source='centroids')

    return mz_list
 def calculate_isotope_patterns(sum_formulae,adduct,isocalc_resolution,isocalc_do_centroid = True, charge=1):
     ### Generate a mz list of peak centroids for each sum formula with the given adduct
     mz_list={}
     for n, sum_formula in enumerate(sum_formulae):
         sf = pyisocalc.SumFormulaParser.parse_string(str(sum_formula + adduct))
         raw_pattern = pyisocalc.isodist(sf, cutoff=1e-4, charge=charge)
         mz = raw_pattern.get_spectrum()[0][0]
         # if mz < 200 or mz > 2000:
         #   continue
         fwhm = mz / isocalc_resolution # TODO: resolution = resolution(mz)
         isotope_ms = pyisocalc.apply_gaussian(raw_pattern, fwhm, exact=False)
         if not sum_formula in mz_list:
              mz_list[sum_formula] = {}
         mzs, intensities = isotope_ms.get_spectrum(source='centroids')
         order = intensities.argsort()[::-1][:5]
         mz_list[sum_formula][adduct] = (mzs[order], intensities[order])
     return mz_list
def generate_patterns(formulas_fn, resolution_func, mz_range):
    mz_min, mz_max = mz_range
    patterns = {}
    adducts = ['H', 'K', 'Na']
    formulae = [s.strip() for s in open(formulas_fn).readlines()]
    for f in formulae:
        for a in adducts:
            sf = pyisocalc.SumFormulaParser.parse_string(f + a)
            raw_pattern = pyisocalc.isodist(sf, cutoff=1e-4, charge=1)
            mz = raw_pattern.get_spectrum()[0][0]
            if mz < mz_min or mz > mz_max:
                continue
            fwhm = mz / resolution_func(mz)
            mzs, intensities = pyisocalc.apply_gaussian(raw_pattern, fwhm, exact=False).get_spectrum(source="centroids")
            mzs = np.array(mzs)
            intensities = np.array(intensities)
            order = np.argsort(intensities)[::-1]
            patterns[(f, a)] = (mzs[order], intensities[order])
    return patterns
Exemple #9
0
def get_lists_of_mzs(sf):
    try:
        isotope_ms = pyisocalc.isodist(sf,
                                       plot=False,
                                       sigma=0.01,
                                       charges=-2,
                                       resolution=100000.0,
                                       do_centroid=False)
        mzlist = list(isotope_ms.get_mzs())
        intenslist = list(isotope_ms.get_intensities())
        mzs_list, intensities_list, indices_list = gradient(
            isotope_ms.get_mzs(),
            isotope_ms.get_intensities(),
            max_output=-1,
            weighted_bins=0)
        indices_list = [
            i if intenslist[i] > intenslist[i + 1] else i + 1
            for i in indices_list
        ]
        mzs_list = [mzlist[i] for i in indices_list]
        intensities_list = [intenslist[i] for i in indices_list]
        min_i = np.min(
            [i for i in xrange(len(intenslist)) if intenslist[i] > 0.01])
        max_i = np.max(
            [i for i in xrange(len(intenslist)) if intenslist[i] > 0.01])
        return {
            "isodist_mzs": mzlist[min_i:max_i],
            "isodist_int": intenslist[min_i:max_i],
            "grad_mzs": list(mzs_list),
            "grad_int": list(intensities_list),
            "grad_ind": list(indices_list - min_i)
        }
    except:
        return {
            "isodist_mzs": [],
            "isodist_int": [],
            "grad_mzs": [],
            "grad_int": [],
            "grad_ind": []
        }
def calculate_isotope_patterns(sum_formulae, adduct='', isocalc_sig=0.01, isocalc_resolution=200000.,
                                   isocalc_do_centroid=True, charge=1,verbose=True):
    from pyMS.pyisocalc import pyisocalc
    ### Generate a mz list of peak centroids for each sum formula with the given adduct
    mz_list = {}
    for n, sum_formula in enumerate(sum_formulae):
        try:
            if verbose:
                print sum_formula, adduct
            sf = pyisocalc.complex_to_simple(sum_formula+adduct)
        except KeyError as e:
            if str(e).startswith("KeyError: "):
                print str(e)
                continue
        except ValueError as e:
            if str(e).startswith("Element not recognised"):
                print str(e)
                continue
            else:
                print sum_formula, adduct
                raise
        if sf == None: #negative atoms as a result of simplification
            print 'negative adduct for {} : {}'.format(sum_formula,adduct)
            continue
        try:
            isotope_ms = pyisocalc.isodist(sf, plot=False, sigma=isocalc_sig, charges=charge,
                                       resolution=isocalc_resolution, do_centroid=isocalc_do_centroid)
        except KeyError as e:
            if str(e).startswith("KeyError: "):
                print str(e)
                continue

        if not sum_formula in mz_list:
            mz_list[sum_formula] = {}
        mz_list[sum_formula][adduct] = isotope_ms.get_spectrum(source='centroids')

    return mz_list
Exemple #11
0
    def load_queries(self):
        config = self.config
        db_filename = config['file_inputs']['database_file']
        db_dump_folder = config['file_inputs']['database_load_folder']
        isocalc_sig = config['isotope_generation']['isocalc_sig']
        isocalc_resolution = config['isotope_generation']['isocalc_resolution']
        if len(config['isotope_generation']['charge']) > 1:
            print 'Warning: only first charge state currently accepted'
        charge = int('{}{}'.format(
            config['isotope_generation']['charge'][0]['polarity'],
            config['isotope_generation']['charge'][0]
            ['n_charges']))  #currently only supports first charge!!
        self.adducts = [
            a['adduct'] for a in config['isotope_generation']['adducts']
        ]

        # Read in molecules
        self.sum_formulae = [l.strip() for l in open(db_filename).readlines()]
        # Check if already generated and load if possible, otherwise calculate fresh
        db_name = os.path.splitext(os.path.basename(db_filename))[0]
        self.mz_list = {}
        for adduct in self.adducts:
            for sum_formula in self.sum_formulae:
                isotope_ms = pyisocalc.isodist(sum_formula + adduct,
                                               plot=False,
                                               sigma=isocalc_sig,
                                               charges=charge,
                                               resolution=isocalc_resolution,
                                               do_centroid=True)
                if not sum_formula in self.mz_list:
                    self.mz_list[sum_formula] = {}

                mzs, ints = map(np.array,
                                isotope_ms.get_spectrum(source='centroids'))
                order = ints.argsort()[::-1]
                self.mz_list[sum_formula][adduct] = (mzs[order], ints[order])
from pyMS.pyisocalc import pyisocalc
sum_formulae = [l.strip() for l in open("formulae.txt")]

adducts = ['H', 'Na', 'K']
patterns = {}
import os
import cPickle
if os.path.isfile("patterns.pkl"):
    patterns = cPickle.load(open("patterns.pkl"))
else:
    print "generating patterns..." 
    for n, sum_formula in enumerate(sum_formulae):   
        for adduct in adducts:
            isotope_ms = pyisocalc.isodist(sum_formula + adduct,
                                           plot=False,
                                           sigma=0.01,
                                           charges=1,
                                           resolution=200000,
                                           do_centroid=True)
            if not sum_formula in patterns:
                 patterns[sum_formula] = {}
            patterns[sum_formula][adduct] = isotope_ms.get_spectrum(source='centroids')
    with open('patterns.pkl', 'w') as f:
        cPickle.dump(patterns, f)

formulas = [k + '+' + a for k in patterns for a in patterns[k]]
masses = [patterns[k][a][0] for k in patterns for a in patterns[k]]
all_masses = np.concatenate(masses)
order = all_masses.argsort()
all_masses = all_masses[order]
mol_indices = np.repeat(np.arange(len(masses)), map(len, masses))[order]
mass_diffs = np.diff(all_masses)