def get_save_isotope_patterns(db_filename, db_dump_folder, db_name, adduct, isocalc_sig, isocalc_resolution, charge, verbose, instrument): import pickle from pySM.parse_databases import parse_databases # Check if already generated and load if possible, otherwise calculate fresh sum_formulae = parse_databases.read_generic_csv(db_filename,header=0,idcol=0,namecol=1,sfcol=2, sep='\t') load_file = '{}/{}_{}_{}_{}_{}.dbasedump'.format(db_dump_folder, db_name, instrument.__class__.__name__, adduct, isocalc_sig, isocalc_resolution) if os.path.isfile(load_file): if verbose: print "{} -> loading".format(load_file) try: mz_list_tmp = pickle.load(open(load_file, 'r')) except ValueError as e: if verbose: print str(e) print "{} -> generating".format(load_file) mz_list_tmp = calculate_isotope_patterns(sum_formulae, adduct=adduct, instrument=instrument, isocalc_sig=isocalc_sig, isocalc_resolution=isocalc_resolution, charge=charge, verbose=False) save_pattern(load_file, db_dump_folder, mz_list_tmp) else: if verbose: print "{} -> generating".format(load_file) mz_list_tmp = calculate_isotope_patterns(sum_formulae, adduct=adduct, isocalc_sig=isocalc_sig, isocalc_resolution=isocalc_resolution, charge=charge, verbose=False, instrument=instrument) save_pattern(load_file, db_dump_folder, mz_list_tmp) return mz_list_tmp
def generate_isotope_patterns(config,verbose=True): #todo: verbose -> logging.debug from pySM.parse_databases import parse_databases from pySM.spatial_metabolomics import get_save_isotope_patterns from pyMSpec import instrument # Extract variables from config dict db_filenames = config['file_inputs']['database_file'] db_dump_folder = config['file_inputs']['database_load_folder'] #isocalc_sig = float(config['isotope_generation']['isocalc_sig']) #isocalc_resolution = float(config['isotope_generation']['isocalc_resolution']) if len(config['isotope_generation']['charge']) > 1: print 'Warning: only first charge state currently accepted' charge = int('{}{}'.format(config['isotope_generation']['charge'][0]['polarity'], config['isotope_generation']['charge'][0][ 'n_charges'])) # currently only supports first charge!! adducts = set([a['adduct'] for a in config['isotope_generation']['adducts']]) instrument = get_instrument(config["isotope_generation"]["instrument"], config["isotope_generation"]["resolving_power"], config["isotope_generation"]["at_mz"]) isocalc_sig = np.round(instrument.sigma_at_mz(200), decimals=4) isocalc_resolution = instrument.points_per_mz(isocalc_sig) # Get master list of sum formulae sum_formulae = {} for db_filename in db_filenames: sum_formulae = parse_databases.read_generic_csv(db_filename,header=0,idcol=0,namecol=1,sfcol=2, sep='\t', sum_formulae=sum_formulae) if '' in sum_formulae: if verbose: print 'empty sf removed from list' del sum_formulae[''] # Get isotope patterns for all sum_formulae mz_list = {} for db_filename in db_filenames: db_name = os.path.splitext(os.path.basename(db_filename))[0] for adduct in adducts: _mz_list = get_save_isotope_patterns(db_filename, db_dump_folder, db_name, adduct, isocalc_sig, isocalc_resolution, charge, verbose, instrument) # add patterns to total list for sum_formula in sum_formulae: if sum_formula not in _mz_list:# could be missing if [M-a] would have negative atoms continue if sum_formula not in mz_list: mz_list[sum_formula]={} ## this limit of 4 is hardcoded to reduce the number of calculations todo: add to config file n = np.min([4,len(_mz_list[sum_formula][adduct][0])]) sort_idx = np.argsort(_mz_list[sum_formula][adduct][1])[-n:][-1::-1] mz_list[sum_formula][adduct] = [_mz_list[sum_formula][adduct][0][sort_idx],_mz_list[sum_formula][adduct][1][sort_idx]] # Clean up # poorly formatted formulae may not recieve an isotope pattern rm_list = [] for sum_formula in sum_formulae: if sum_formula not in mz_list: rm_list.append(sum_formula) if verbose: print "{} formula to remove".format(len(rm_list)) for sum_formula in rm_list: sum_formulae.pop(sum_formula,None) if verbose: print 'all isotope patterns generated and loaded' return sum_formulae, adducts, mz_list
isocalc_resolution = instrument.points_per_mz(isocalc_sig) db_dump_folder = config['file_inputs']['database_load_folder'] if len(config['isotope_generation']['charge']) > 1: print 'Warning: only first charge state currently accepted' charge = int('{}{}'.format(config['isotope_generation']['charge'][0]['polarity'], config['isotope_generation']['charge'][0][ 'n_charges'])) # currently only supports first charge!! db_name = os.path.splitext(os.path.basename(db_filename))[0] load_file = '{}/{}_{}_{}_{}_{}.dbasedump'.format(db_dump_folder, db_name, instrument.__class__.__name__, adduct, isocalc_sig, isocalc_resolution) print "{} -> generating".format(load_file) mz_list_tmp = calculate_isotope_patterns(sum_formulae, adduct=adduct, isocalc_sig=isocalc_sig, isocalc_resolution=isocalc_resolution, charge=charge, verbose=False, instrument=instrument) spatial_metabolomics.save_pattern(load_file, db_dump_folder, mz_list_tmp) return True if __name__== '__main__': json_filename = "/home/palmer/Documents/tmp_data/MB/URenn/16135s1-3_mousebrain_dhbsub.json" config = spatial_metabolomics.get_variables(json_filename) adducts = [a['adduct'] for a in config['isotope_generation']['adducts']] db_filenames = config['file_inputs']['database_file'] if isinstance(db_filenames, basestring): db_filenames = [db_filenames,] for db_filename in db_filenames: sum_formulae = parse_databases.read_generic_csv(db_filename,header=1,idcol=0,namecol=1,sfcol=2, sep='\t') if '' in sum_formulae: del sum_formulae[''] for a in adducts: generate_isotope_pattern(sum_formulae,a,config, db_filename)
adduct=adduct, isocalc_sig=isocalc_sig, isocalc_resolution=isocalc_resolution, charge=charge, verbose=False, instrument=instrument) spatial_metabolomics.save_pattern(load_file, db_dump_folder, mz_list_tmp) return True if __name__ == '__main__': json_filename = "/home/palmer/Documents/tmp_data/MB/URenn/16135s1-3_mousebrain_dhbsub.json" config = spatial_metabolomics.get_variables(json_filename) adducts = [a['adduct'] for a in config['isotope_generation']['adducts']] db_filenames = config['file_inputs']['database_file'] if isinstance(db_filenames, basestring): db_filenames = [ db_filenames, ] for db_filename in db_filenames: sum_formulae = parse_databases.read_generic_csv(db_filename, header=1, idcol=0, namecol=1, sfcol=2, sep='\t') if '' in sum_formulae: del sum_formulae[''] for a in adducts: generate_isotope_pattern(sum_formulae, a, config, db_filename)