def read_xcms_peaks(input_file): fp = open(input_file, 'r') lines = fp.readlines() peak_list = [] for line in lines[2:]: parts = line.split('\t') xcms_id = parts[0].strip('"') rt = parts[4] mass = parts[1] if parts[-3] != '' and parts[-2] == '': tag = parts[-3] elif parts[-3] == '' and parts[-2] != '': tag = parts[-2] elif parts[-3] != '' and parts[-2] != '': tag = parts[-3]+' ' + parts[-2] else: tag = '' tag = tag.strip('"') compound_group = parts[-1] name = 'unknown' peak = Peak(float(mass), float(rt), compound_group = compound_group, \ tag=tag, name=name) peak.set_xcms_id(int(xcms_id)) peak_list.append(peak) return peak_list
def read_library_peaks(input_file): fp = open(input_file, 'r') lines = fp.readlines() peak_list = [] for i, line in enumerate(lines): parts = [x.strip('$').strip() for x in line.split('$')] try: rt = float(parts[1]) mass = float(parts[2]) except: try: parts = [x.strip(',').strip() for x in line.split(',')] rt = float(parts[2]) mass = float(parts[1]) except: print 'Warning: Error reading line %s from reference file' %i print '=====================================================' print 'Line contents: %s' %line.strip() print continue name = parts[0] #print name, rt, mass tag = '' peak = Peak(float(mass), float(rt), name=name) peak.add_tag(tag) peak_list.append(peak) return peak_list
def add_isotope(peak, isotope): mass = peak.get_mass() rt = peak.get_rt() tag = "M+"+str(isotope) name = peak.get_name() id_num = peak.get_id_num() new_mass = mass + isotope*1.0086710869 new_peak = Peak(new_mass, rt, '0', tag, name=name+' '+tag) new_peak.set_parent(id_num) return new_peak
def get_decoy_peaks(peak, mode): """Based on the idea of Palmer, Alexandrov et al Use other elements from the periodic table to create impossible adducts """ mass = peak.get_mass() rt = peak.get_rt() name = peak.get_name() decoy_dict = {'He':4.002602, 'Be':9.0121831, 'F':18.998403, 'Al':26.9815385, 'Sc':44.955908, 'Fe':55.845, 'Ge':72.630, 'Sr':87.62} HYDROGEN_MASS = 1.0078250 id_num = peak.get_id_num() peaks = [] if mode == 'pos': for key,value in decoy_dict.iteritems(): new_mass = mass + value new_peak = Peak(new_mass, rt, '0', tag='+'+key, name=name+'+'+key) new_peak.set_parent(id_num) peaks.append(new_peak) if mode == 'neg': for key,value in decoy_dict.iteritems(): new_mass = mass - HYDROGEN_MASS + value new_peak = Peak(new_mass, rt, '0', tag='-H+'+key, name=name+'-H+'+key) new_peak.set_parent(id_num) peaks.append(new_peak) return peaks
def read_peaks(infile): fp = open(infile, 'r').readlines() peaks = [] index = 0 fnames = [] ints = [] for i, l in enumerate(fp): l = l.strip().split(',') if i == 0: metNames = [str(x) for x in l[1:]] if i == 1: mzs = [float(x) for x in l[1:]] if i == 2: rts = [float(x) for x in l[1:]] if i > 4: fnames.append(str(l[0])) ints.append([float(x) for x in l[1:]]) # transpose ints ints = map(list, zip(*ints)) for i in range(len(mzs)): peak = Peak(i, mzs[i], rts[i], fnames, ints[i], metNames[i]) peaks.append(peak) return peaks
def get_adduct_peaks(peak, mode): """ Adducts are from CAMERA paper supplemental material """ if mode not in ['pos','neg']: print "Error: mode must be 'pos' or 'neg'" mass = peak.get_mass() rt = peak.get_rt() name = peak.get_name() positive_dict = {'M+H':1.0078250, 'M+Na':22.989769, 'M+K':39.0983, 'M+NH4':18.03773, 'M+2H(2+)':2.01565, 'M+Na+HCOOH':69.013079, 'M+H+K(2+)':40.106125, '2M+Na':22.989769, 'M+K+HCOOH':85.12161, '2M+K':39.0983} negative_dict = {"M-H":-1.0078250,"M-H+NaCOOH":66.997429, "M-2H+Na":20.974119,"2M-2H+Na":20.974119, "M-H+HCOOH":45.015485, "2M-H":-1.0078250, "2M-2H+K":37.08265, "M-2H+K":37.08265} peaks = [] if mode == 'pos': for key,value in positive_dict.iteritems(): if '2M' in key: new_mass = 2*mass + value else: new_mass = mass + value if '(2+)' in key: new_mass = new_mass/2 new_peak = Peak(new_mass, rt, '0', tag=key, name=name+' '+key) peaks.append(new_peak) if mode == 'neg': for key,value in negative_dict.iteritems(): if '2M' in key: new_mass = 2*mass + value else: new_mass = mass + value if '(2+)' in key: new_mass = new_mass/2 new_peak = Peak(new_mass, rt, '0', tag=key, name=name+' '+key) peaks.append(new_peak) return peaks
def get_adduct_peak(peak, adduct, mode): """ Adducts are from CAMERA paper supplemental material """ if mode not in ['pos','neg']: print "Error: mode must be 'pos' or 'neg'" mass = peak.get_mass() rt = peak.get_rt() positive_dict = {'M+H':1.0078250, 'M+Na':22.989769, 'M+K':39.0983, 'M+NH4':18.03773, 'M+2H(2+)':2.01565, 'M+Na+HCOOH':69.013079, 'M+H+K(2+)':40.106125, '2M+Na':22.989769, 'M+K+HCOOH':85.12161, '2M+K':39.0983} negative_dict = {"M-H":-1.0078250,"M-H+NaCOOH":66.997429, "M-2H+Na":20.974119,"2M-2H+Na":20.974119, "M-H+HCOOH":45.015485, "2M-H":-1.0078250, "2M-2H+K":37.08265, "M-2H+K":37.08265} if adduct not in [positive_dict.keys(),negative_dict.keys()]: print "Error, adduct not found" if mode == 'pos': if '2M' in adduct: new_mass = 2*mass + positive_dict[adduct] else: new_mass = mass + positive_dict[adduct] if '(2+)' in adduct: new_mass = new_mass/2 if mode == 'neg': if '2M' in adduct: new_mass = 2*mass + negative_dict[adduct] else: new_mass = mass + negative_dict[adduct] new_peak = Peak(new_mass, rt, '0', adduct) return new_peak
def read_peaks(infile): fp = open(infile, 'r') peaks = [] for line in fp.readlines()[1:]: parts = line.split('\t') for part in parts: index = int(parts[0].strip('"')) mz = float(parts[1]) rt = float(parts[4]) areas = [] for i in range(11, 16): areas.append(float(parts[i])) peak = Peak(index, mz, rt, areas) peaks.append(peak) return peaks