def generate_isotope_patterns(config,verbose=True): from pySpatialMetabolomics.parse_databases import parse_databases import pickle # Extract variables from config dict db_filename = config['file_inputs']['database_file'] db_dump_folder = config['file_inputs']['database_load_folder'] isocalc_sig = float(config['isotope_generation']['isocalc_sig']) isocalc_resolution = float(config['isotope_generation']['isocalc_resolution']) if len(config['isotope_generation']['charge']) > 1: print 'Warning: only first charge state currently accepted' charge = int('{}{}'.format(config['isotope_generation']['charge'][0]['polarity'], config['isotope_generation']['charge'][0][ 'n_charges'])) # currently only supports first charge!! adducts = [a['adduct'] for a in config['isotope_generation']['adducts']] # Read in molecules sum_formulae = parse_databases.read_generic_csv(db_filename) if '' in sum_formulae: if verbose: print 'empty sf removed from list' del sum_formulae[''] # Check if already genrated and load if possible, otherwise calculate fresh db_name = os.path.splitext(os.path.basename(db_filename))[0] mz_list = {} for adduct in adducts: load_file = '{}/{}_{}_{}_{}.dbasedump'.format(db_dump_folder, db_name, adduct, isocalc_sig, isocalc_resolution) if os.path.isfile(load_file): if verbose: print "{} -> loading".format(load_file) mz_list_tmp = pickle.load(open(load_file, 'r')) else: if verbose: print "{} -> generating".format(load_file) mz_list_tmp = calculate_isotope_patterns(sum_formulae, adduct=adduct, isocalc_sig=isocalc_sig, isocalc_resolution=isocalc_resolution, charge=charge) if db_dump_folder != "": pickle.dump(mz_list_tmp, open(load_file, 'w')) # add patterns to total list for sum_formula in sum_formulae: if sum_formula not in mz_list_tmp:# could be missing if [M-a] would have negative atoms continue if sum_formula not in mz_list: mz_list[sum_formula]={} ## this limit of 4 is hardcoded to reduce the number of calculations n = np.min([4,len(mz_list_tmp[sum_formula][adduct][0])]) mz_list[sum_formula][adduct] = [mz_list_tmp[sum_formula][adduct][0][0:n],mz_list_tmp[sum_formula][adduct][1][0:n]] if verbose: print 'all isotope patterns generated and loaded' return sum_formulae, adducts, mz_list
def generate_isotope_patterns(config): from pySpatialMetabolomics.parse_databases import parse_databases import pickle ### We simulate a mass spectrum for each sum formula/adduct combination. This generates a set of isotope patterns (see http://www.mi.fu-berlin.de/wiki/pub/ABI/QuantProtP4/isotope-distribution.pdf) which can provide additional informaiton on the molecule detected. This gives us a list of m/z centres for the molecule def calcualte_isotope_patterns(sum_formulae,adducts='',isocalc_sig=0.01,isocalc_resolution = 200000.,isocalc_do_centroid = True, charge='1'): ### Generate a mz list of peak centroids for each sum formula with the given adduct # todo - parse sum formula and adduct properly so that subtractions (losses) can be utilised (this code already exists somewhere) mz_list={} for n,sum_formula in enumerate(sum_formulae): isotope_ms = pyisocalc.isodist(sum_formula+adduct,plot=False,sigma=isocalc_sig,charges=charge,resolution=isocalc_resolution,do_centroid=isocalc_do_centroid) if not sum_formula in mz_list: mz_list[sum_formula] = {} mz_list[sum_formula][adduct] = isotope_ms.get_spectrum(source='centroids') return mz_list # Extract variables from config dict db_filename = config['file_inputs']['database_file'] db_dump_folder = config['file_inputs']['database_load_folder'] isocalc_sig = config['isotope_generation']['isocalc_sig'] isocalc_resolution = config['isotope_generation']['isocalc_resolution'] if len(config['isotope_generation']['charge']) > 1: print 'Warning: only first charge state currently accepted' charge = int('{}{}'.format(config['isotope_generation']['charge'][0]['polarity'], config['isotope_generation']['charge'][0]['n_charges'])) #currently only supports first charge!! adducts=[a['adduct'] for a in config['isotope_generation']['adducts']] # Read in molecules sum_formulae = parse_databases.read_generic_csv(db_filename) # Check if already genrated and load if possible, otherwise calculate fresh db_name = os.path.splitext(os.path.basename(db_filename))[0] mz_list={} for adduct in adducts: load_file = '{}/{}_{}_{}_{}.dbasedump'.format(db_dump_folder,db_name,adduct,isocalc_sig,isocalc_resolution) if os.path.isfile(load_file): mz_list_tmp = pickle.load(open(load_file,'r')) else: mz_list_tmp = calcualte_isotope_patterns(sum_formulae,adducts=(adduct,),isocalc_sig=isocalc_sig,isocalc_resolution=isocalc_resolution,charge=charge) if db_dump_folder != "": pickle.dump(mz_list_tmp,open(load_file,'w')) # add patterns to total list for sum_formula in mz_list_tmp: if not sum_formula in mz_list: mz_list[sum_formula] = {} mz_list[sum_formula][adduct] = mz_list_tmp[sum_formula][adduct] print 'all isotope patterns generated and loaded' return sum_formulae,adducts,mz_list
def generate_isotope_patterns(config, verbose=True): from pySpatialMetabolomics.parse_databases import parse_databases import pickle # Extract variables from config dict db_filename = config['file_inputs']['database_file'] db_dump_folder = config['file_inputs']['database_load_folder'] isocalc_sig = float(config['isotope_generation']['isocalc_sig']) isocalc_resolution = float( config['isotope_generation']['isocalc_resolution']) if len(config['isotope_generation']['charge']) > 1: print 'Warning: only first charge state currently accepted' charge = int('{}{}'.format( config['isotope_generation']['charge'][0]['polarity'], config['isotope_generation']['charge'][0] ['n_charges'])) # currently only supports first charge!! adducts = [a['adduct'] for a in config['isotope_generation']['adducts']] # Read in molecules sum_formulae = parse_databases.read_generic_csv(db_filename) if '' in sum_formulae: if verbose: print 'empty sf removed from list' del sum_formulae[''] # Check if already genrated and load if possible, otherwise calculate fresh db_name = os.path.splitext(os.path.basename(db_filename))[0] mz_list = {} for adduct in adducts: load_file = '{}/{}_{}_{}_{}.dbasedump'.format(db_dump_folder, db_name, adduct, isocalc_sig, isocalc_resolution) if os.path.isfile(load_file): if verbose: print "{} -> loading".format(load_file) mz_list_tmp = pickle.load(open(load_file, 'r')) else: if verbose: print "{} -> generating".format(load_file) mz_list_tmp = calculate_isotope_patterns( sum_formulae, adduct=adduct, isocalc_sig=isocalc_sig, isocalc_resolution=isocalc_resolution, charge=charge) if db_dump_folder != "": pickle.dump(mz_list_tmp, open(load_file, 'w')) # add patterns to total list for sum_formula in sum_formulae: if sum_formula not in mz_list_tmp: # could be missing if [M-a] would have negative atoms continue if sum_formula not in mz_list: mz_list[sum_formula] = {} ## this limit of 4 is hardcoded to reduce the number of calculations n = np.min([4, len(mz_list_tmp[sum_formula][adduct][0])]) mz_list[sum_formula][adduct] = [ mz_list_tmp[sum_formula][adduct][0][0:n], mz_list_tmp[sum_formula][adduct][1][0:n] ] if verbose: print 'all isotope patterns generated and loaded' return sum_formulae, adducts, mz_list