def generate_isotope_patterns(config,verbose=True):
    from pySpatialMetabolomics.parse_databases import parse_databases

    import pickle
    # Extract variables from config dict
    db_filename = config['file_inputs']['database_file']
    db_dump_folder = config['file_inputs']['database_load_folder']
    isocalc_sig = float(config['isotope_generation']['isocalc_sig'])
    isocalc_resolution = float(config['isotope_generation']['isocalc_resolution'])
    if len(config['isotope_generation']['charge']) > 1:
        print 'Warning: only first charge state currently accepted'
    charge = int('{}{}'.format(config['isotope_generation']['charge'][0]['polarity'],
                               config['isotope_generation']['charge'][0][
                                   'n_charges']))  # currently only supports first charge!!
    adducts = [a['adduct'] for a in config['isotope_generation']['adducts']]

    # Read in molecules
    sum_formulae = parse_databases.read_generic_csv(db_filename)
    if '' in sum_formulae:
        if verbose:
            print 'empty sf removed from list'
        del sum_formulae['']
    # Check if already genrated and load if possible, otherwise calculate fresh   
    db_name = os.path.splitext(os.path.basename(db_filename))[0]
    mz_list = {}
    for adduct in adducts:
        load_file = '{}/{}_{}_{}_{}.dbasedump'.format(db_dump_folder, db_name, adduct, isocalc_sig, isocalc_resolution)
        if os.path.isfile(load_file):
            if verbose:
                print  "{} -> loading".format(load_file)
            mz_list_tmp = pickle.load(open(load_file, 'r'))
        else:
            if verbose:
                print "{} -> generating".format(load_file)
            mz_list_tmp = calculate_isotope_patterns(sum_formulae, adduct=adduct, isocalc_sig=isocalc_sig,
                                                     isocalc_resolution=isocalc_resolution, charge=charge)
            if db_dump_folder != "":
                pickle.dump(mz_list_tmp, open(load_file, 'w'))
        # add patterns to total list
        for sum_formula in sum_formulae:
            if sum_formula not in mz_list_tmp:# could be missing if [M-a] would have negative atoms
                continue
            if sum_formula not in mz_list:
                mz_list[sum_formula]={}
            ## this limit of 4 is hardcoded to reduce the number of calculations
            n = np.min([4,len(mz_list_tmp[sum_formula][adduct][0])])
            mz_list[sum_formula][adduct] = [mz_list_tmp[sum_formula][adduct][0][0:n],mz_list_tmp[sum_formula][adduct][1][0:n]]
    if verbose:
        print  'all isotope patterns generated and loaded'
    return sum_formulae, adducts, mz_list
def generate_isotope_patterns(config):
    from pySpatialMetabolomics.parse_databases import parse_databases
    import pickle
    ### We simulate a mass spectrum for each sum formula/adduct combination. This generates a set of isotope patterns (see http://www.mi.fu-berlin.de/wiki/pub/ABI/QuantProtP4/isotope-distribution.pdf) which can provide additional informaiton on the molecule detected. This gives us a list of m/z centres for the molecule
    def calcualte_isotope_patterns(sum_formulae,adducts='',isocalc_sig=0.01,isocalc_resolution = 200000.,isocalc_do_centroid = True, charge='1'):
        ### Generate a mz list of peak centroids for each sum formula with the given adduct
        # todo - parse sum formula and adduct properly so that subtractions (losses) can be utilised (this code already exists somewhere)
        mz_list={}
        for n,sum_formula in enumerate(sum_formulae):   
            isotope_ms = pyisocalc.isodist(sum_formula+adduct,plot=False,sigma=isocalc_sig,charges=charge,resolution=isocalc_resolution,do_centroid=isocalc_do_centroid)
            if not sum_formula in mz_list:
                 mz_list[sum_formula] = {}
            mz_list[sum_formula][adduct] = isotope_ms.get_spectrum(source='centroids')
        return mz_list
    # Extract variables from config dict
    db_filename = config['file_inputs']['database_file']
    db_dump_folder = config['file_inputs']['database_load_folder']  
    isocalc_sig = config['isotope_generation']['isocalc_sig']  
    isocalc_resolution = config['isotope_generation']['isocalc_resolution']  
    if len(config['isotope_generation']['charge']) > 1:
        print 'Warning: only first charge state currently accepted'
    charge = int('{}{}'.format(config['isotope_generation']['charge'][0]['polarity'], config['isotope_generation']['charge'][0]['n_charges'])) #currently only supports first charge!!
    adducts=[a['adduct'] for a in config['isotope_generation']['adducts']]
  
    # Read in molecules
    sum_formulae = parse_databases.read_generic_csv(db_filename) 
    # Check if already genrated and load if possible, otherwise calculate fresh   
    db_name =  os.path.splitext(os.path.basename(db_filename))[0] 
    mz_list={}
    for adduct in adducts:
        load_file = '{}/{}_{}_{}_{}.dbasedump'.format(db_dump_folder,db_name,adduct,isocalc_sig,isocalc_resolution)
        if os.path.isfile(load_file):
            mz_list_tmp = pickle.load(open(load_file,'r'))
        else:
            mz_list_tmp = calcualte_isotope_patterns(sum_formulae,adducts=(adduct,),isocalc_sig=isocalc_sig,isocalc_resolution=isocalc_resolution,charge=charge)
            if db_dump_folder != "":
                pickle.dump(mz_list_tmp,open(load_file,'w'))
        # add patterns to total list
        for sum_formula in mz_list_tmp:
            if not sum_formula in mz_list:
                mz_list[sum_formula] = {}
            mz_list[sum_formula][adduct] = mz_list_tmp[sum_formula][adduct]
    print 'all isotope patterns generated and loaded'
    return sum_formulae,adducts,mz_list
Exemple #3
0
def generate_isotope_patterns(config, verbose=True):
    from pySpatialMetabolomics.parse_databases import parse_databases

    import pickle
    # Extract variables from config dict
    db_filename = config['file_inputs']['database_file']
    db_dump_folder = config['file_inputs']['database_load_folder']
    isocalc_sig = float(config['isotope_generation']['isocalc_sig'])
    isocalc_resolution = float(
        config['isotope_generation']['isocalc_resolution'])
    if len(config['isotope_generation']['charge']) > 1:
        print 'Warning: only first charge state currently accepted'
    charge = int('{}{}'.format(
        config['isotope_generation']['charge'][0]['polarity'],
        config['isotope_generation']['charge'][0]
        ['n_charges']))  # currently only supports first charge!!
    adducts = [a['adduct'] for a in config['isotope_generation']['adducts']]

    # Read in molecules
    sum_formulae = parse_databases.read_generic_csv(db_filename)
    if '' in sum_formulae:
        if verbose:
            print 'empty sf removed from list'
        del sum_formulae['']
    # Check if already genrated and load if possible, otherwise calculate fresh
    db_name = os.path.splitext(os.path.basename(db_filename))[0]
    mz_list = {}
    for adduct in adducts:
        load_file = '{}/{}_{}_{}_{}.dbasedump'.format(db_dump_folder, db_name,
                                                      adduct, isocalc_sig,
                                                      isocalc_resolution)
        if os.path.isfile(load_file):
            if verbose:
                print "{} -> loading".format(load_file)
            mz_list_tmp = pickle.load(open(load_file, 'r'))
        else:
            if verbose:
                print "{} -> generating".format(load_file)
            mz_list_tmp = calculate_isotope_patterns(
                sum_formulae,
                adduct=adduct,
                isocalc_sig=isocalc_sig,
                isocalc_resolution=isocalc_resolution,
                charge=charge)
            if db_dump_folder != "":
                pickle.dump(mz_list_tmp, open(load_file, 'w'))
        # add patterns to total list
        for sum_formula in sum_formulae:
            if sum_formula not in mz_list_tmp:  # could be missing if [M-a] would have negative atoms
                continue
            if sum_formula not in mz_list:
                mz_list[sum_formula] = {}
            ## this limit of 4 is hardcoded to reduce the number of calculations
            n = np.min([4, len(mz_list_tmp[sum_formula][adduct][0])])
            mz_list[sum_formula][adduct] = [
                mz_list_tmp[sum_formula][adduct][0][0:n],
                mz_list_tmp[sum_formula][adduct][1][0:n]
            ]
    if verbose:
        print 'all isotope patterns generated and loaded'
    return sum_formulae, adducts, mz_list