def readms1peak(bn): ms1peaks = [] with open('ms1feature_' + bn + '.txt') as ms1peakfile: for line in ms1peakfile: lsp = line.rstrip().split() if len(lsp) == 5: ms1peaks.append(Peak(*[float(x) for x in lsp], float(lsp[0]))) ms1peaks.sort() iso_diff = 1.00335 single = 0 doub = 0 peak_double = set() for ii in range(len(ms1peaks)): peak0 = ms1peaks[ii] err_bd = min(.01, bound_ppm(peak0.mz * ms1ppm)) pos0 = bisect_left(ms1peaks, (peak0.mz - iso_diff - err_bd, )) pos1 = bisect_left(ms1peaks, (peak0.mz - iso_diff + err_bd, ), lo=pos0) if pos0 != pos1: peak1 = min(ms1peaks[pos0:pos1], key=lambda p: abs(peak0.rt - p.rt)) if abs(peak0.rt - peak1.rt) < 1 and peak0.auc < peak1.auc: ms1peaks[ii] = Peak(*peak0[:-1], peak1.mmz) if peak1.mmz == peak1.mz: single += 1 return ms1peaks, peak_double
def print_tab(lib_ent): with open('quant_rex.txt', 'w') as quant_auc: quant_auc.write( 'name\tISF\tadduct\tfeature_m/z(library)\tRT(library)\tfeature_m/z(experimental median)\t%detected\t' ) quant_auc.write('\t'.join(x[:-5] for x in mzML_files) + '\t') quant_auc.write('\t'.join('RT_' + x[:-5] for x in mzML_files) + '\n') for ent in lib_ent: ent_p = [] for nn, mzML_file in enumerate(mzML_files): ms1peaks = ms1peaks_dict[mzML_file] bd = bound_ppm(ent.Mmass * ms1ppm) pos0 = bisect_left(ms1peaks, (ent.Mmass - bd, )) pos1 = bisect_left(ms1peaks, (ent.Mmass + bd, )) if ent.rt != 'NA': peak = [ p for p in ms1peaks[pos0:pos1] if abs(p.rt - ent.rt) < RT_shift ] else: peak = ms1peaks[pos0:pos1] if peak: if ent.rt != 'NA': peak = [max(peak, key=operator.attrgetter('auc'))] for p in peak: ent_p.append((nn, p)) line_str = [] if ent_p: for nn in range(len(mzML_files)): pos0 = bisect_left([x[0] for x in ent_p], nn) pos1 = bisect.bisect_right([x[0] for x in ent_p], nn, lo=pos0) line_str.append(( \ ','.join(format(dat_n[1].auc,'.1f') for dat_n in ent_p[pos0:pos1]), \ ','.join(format(dat_n[1].rt/60,'.2f') for dat_n in ent_p[pos0:pos1]) )) mzmed = statistics.median(x[1].mz for x in ent_p) if ent.rt != 'NA': quant_auc.write( '{}\t{}\t{}\t{:.5f}\t{:.2f}\t{:.5f}'.format( ent.name, '*' if ent.name.startswith('ISF of ') else '', ent.adduct, ent.Mmass, ent.rt / 60, mzmed)) else: quant_auc.write('{}\t{}\t{:.5f}\tNA\t{:.5f}'.format( ent.name, ent.adduct, ent.Mmass, mzmed)) quant_auc.write('\t{:.2f}'.format( sum((1 if x else 0) for x, _ in line_str) / len(mzML_files))) quant_auc.write('\t' + '\t'.join(x for x, _ in line_str) + '\t' + '\t'.join(x for _, x in line_str) + '\n')
break mz_list = [float(x) for x in mz_list] I_list = [x / max(I_list) * 999. for x in I_list] all_dat[name_id].append( (nn, dotp, premz, rt, mz_list, I_list, auc, feat)) lib_dict[name_id] = (dotp, lib_dat) isf_dict = dict() for name_id in all_dat.keys(): name_add = name_id.split('\n') name, adduct = name_add[:-1], name_add[-1] name0 = name[0] if name0.startswith('ISF of'): pmz, rt, mz = [float(x) for x in re.findall("\d+\.\d+", name0)[:3]] for pmz0, rt0, mz0, adduct0 in isf_dict.keys(): if abs(pmz - pmz0) < bound_ppm(pmz * ms1ppm) and abs( rt - rt0) < RT_shift and abs(mz - mz0) < bound_ppm( mz * ms1ppm) and adduct == adduct0: isf_dict[(pmz0, rt0, mz0, adduct)].append(name_id) break else: isf_dict[(pmz, rt, mz, adduct)] = [name_id] name_dict0 = dict() for key, vv in isf_dict.items(): if len(vv) > 1: pmz_rt_mz = [ tuple(float(x) for x in re.findall("\d+\.\d+", v)[:3]) for v in vv ] key0 = statistics.median(x for x, _, _ in pmz_rt_mz) key1 = statistics.median(x for _, x, _ in pmz_rt_mz)
def scoring(cpd): ent,name=cpd ms1mz,charge,frag_mz,frag_I,adduct,RT,frag_ann=ent.split(' ',6) ms1mz=float(ms1mz) if re.fullmatch("\d+\.\d+",RT): RT=float(RT) frag_I=[float(x) for x in frag_I.split(',')] frag_mz=[float(x) for x in frag_mz.split(',')] frag_ann=[x for x in frag_ann.split(',')] sorted_I=sorted(zip(frag_I,frag_mz),reverse=True)[:topNfrag] frag_mz=[x for _,x in sorted_I] frag_I=[x for x,_ in sorted_I] if len(frag_I)!=len(frag_mz): print('abb') adduct=adduct.replace(',','') name='---'.join(name) frag_mz_l=[0]*len(frag_mz) frag_mz_r=[0]*len(frag_mz) for nn,f_mz in enumerate(frag_mz): err_bd=.01 frag_mz_l[nn]=f_mz-err_bd frag_mz_r[nn]=f_mz+err_bd err_bd=bound_ppm(ms1mz*ms1ppm) pos0=bisect_left(ms1peaks,(ms1mz-err_bd,)) pos1=bisect_left(ms1peaks,(ms1mz+err_bd,)) pseudo_feat=[]#pseudo feature for entries w/o feature ms1peaks_match=[x for x in ms1peaks[pos0:pos1] if not isinstance(RT,float) or abs(RT-x.rt)<rt_diff] if ms2_auc_no_feat and not ms1peaks_match and isinstance(RT,float):#if no feature found and RT in library pseudo_feat.append(Peak(mz=ms1mz,rt=RT,sc=10,coef=0,auc=0,mmz=ms1mz)) score_peaks=[] for ms1peak in ms1peaks_match+pseudo_feat: if not minmz<ms1peak.mz<maxmz: continue pos0=bisect_left(startpt,ms1peak.mz) pos1=bisect_left(end__pt,ms1peak.mz) if pos0==pos1: # ms1peak in one window only pos=pos0 else: # take the window whose boundaries are furthest from ms1peak pos=(pos0 if ms1peak.mz-startpt[pos0-1]>end__pt[pos1]-ms1peak.mz else pos1) iso=ms2scans[sswath[pos]] ms2_I=[0]*len(frag_I) ms2_auc=[0]*len(frag_I) pfc=[0]*len(frag_I) rt_l=ms1peak.rt-ms1peak.sc*1.5 rt_r=ms1peak.rt+ms1peak.sc*1.5 ms1rt=rtset[bisect_left(rtset,rt_l):bisect_left(rtset,rt_r)] p_dict=dict() # highest intensities per scan bounded by m/z p_area=[x for x in ms1scans[bisect_left(ms1scans,(ms1peak.mz-.01,)):bisect_left(ms1scans,(ms1peak.mz+.01,))] if rt_l<x.rt<rt_r] for pt in p_area: if pt.rt not in p_dict or p_dict[pt.rt]<pt.I: p_dict[pt.rt]=pt.I p_maxI=[p_dict.get(rt,0.) for rt in ms1rt] for nn,(f_mz_l,f_mz_r,f_I) in enumerate(zip(frag_mz_l,frag_mz_r,frag_I)): f_area=[x for x in iso[bisect_left(iso,(f_mz_l,)):bisect_left(iso,(f_mz_r,))] if rt_l<x.rt<rt_r] if f_area: f_area_=[x.I for x in f_area if abs(x.rt-ms1peak.rt)<2] if f_area_: ms2_I[nn]=max(f_area_) f_dict=dict() # highest intensities per scan bounded by m/z for pt in f_area: if pt.rt not in f_dict or f_dict[pt.rt]<pt.I: f_dict[pt.rt]=pt.I ms2_maxI=[f_dict.get(rt,0.) for rt in ms1rt] pfc[nn]=cos_sim(p_maxI,ms2_maxI) ms2_auc[nn]=sum((I0+I1)*(rt1-rt0) for rt0,rt1,I0,I1 in zip(ms1rt,ms1rt[1:],ms2_maxI,ms2_maxI[1:]))/2 ms1_auc=sum((I0+I1)*(rt1-rt0) for rt0,rt1,I0,I1 in zip(ms1rt,ms1rt[1:],p_maxI,p_maxI[1:]))/2 ssm=cos_sim0(frag_I,ms2_I) score_peaks.append((ssm,pfc,ms1peak,ms2_auc,ms1_auc)) if score_peaks: score_peaks.sort(reverse=True) max_score_peaks=[score_peaks[0]] for x in score_peaks[1:]: if max_score_peaks[0][0]-x[0]<.1: max_score_peaks.append(x) else: break max_peak=max(max_score_peaks,key=lambda x:x[2].auc) #pick top scoring ms2 for each entry, if score difference is insignificant use auc return (name,adduct,ms1mz,RT,frag_mz,frag_ann,max_peak) return False
def mass_matching(jj): spec, peak = ms2_wp[jj] adduct_match = [] if "NoMatch" in lib_types[0]: for adduct0, (mass0, charge0, _) in list(adduct_list.items())[:-1]: Mmass = ((spec.ms1mz if peak is None else peak.mz) * charge0 - mass0) / (1 if adduct0[0] == 'M' else int(adduct0[0])) err_bd = bound_ppm(Mmass * ms1ppm) pos0 = bisect_left(dlist, (Mmass - err_bd, )) pos1 = bisect_left(dlist, (Mmass + err_bd, ), lo=pos0) adduct_match.append((adduct0, (pos0, pos1))) else: score_ent = [] premz = (spec.ms1mz if peak is None else peak.mz) for iiiiii in range(1): err_bd = bound_ppm(premz * ms1ppm) pos_0 = bisect_left(lib_ent, (premz - err_bd, )) pos_1 = bisect_left(lib_ent, (premz + err_bd, ), lo=pos_0) lib_ent_ = (x for x in lib_ent[pos_0:pos_1] if x.rt == None or abs(spec.rt - x.rt) < RT_shift) for ent in lib_ent_: adduct0, charge0 = ent.adduct, ent.charge ms2_I = [] ent_I = [] xfrag = set() for nn, (f_mz, f_I) in enumerate( (x, y) for x, y in zip(ent.mz, ent.I) if (charge0 * premz - x) > 3.3): err_bd = .01 pos0 = bisect_left(spec.mz, f_mz - err_bd) pos1 = bisect_left(spec.mz, f_mz + err_bd, lo=pos0) ent_I.append(f_I) if pos0 != pos1: ms2_I.append(max(spec.I[pos0:pos1])) for i in range(pos0, pos1): xfrag.add(i) else: ms2_I.append(0) if sum(1 for x in ms2_I if x > 0) >= min_peaks: #min number of matching peak for nn, (f_mz, f_I) in enumerate(zip(spec.mz, spec.I)): if nn not in xfrag and (charge0 * premz - f_mz) > 3.3: ms2_I.append(f_I) ent_I.append(0) cs = cos_sim( ent_I, ms2_I ) #*sum(x for x,y in zip(ent_I,ms2_I) if y>0)/sum(ent_I) if cs > MS2_score: score_ent.append((adduct0, cs, ent)) if score_ent: max_score_ent = max( score_ent, key=operator.itemgetter(1)) #pick top scoring entry adduct_match.append(max_score_ent) for cs, ii in isf_sc[jj]: #if isf, attach data _, peak1 = ms2_wp[ii] adduct_match.append( ('?', cs, Ent( 0, 'ISF of (m/z={:.6f}, rt={:.3f}) {:.6f}'.format( peak1.mz, peak1.rt, peak.mz), [0], [0], None, None, None))) return adduct_match, spec, peak
def ms2_spectrum(): ms2_wp = [] #ms2 with ms1 peak info for nn, spec in enumerate(ms2scans): err_bd = bound_ppm(spec.ms1mz * ms1ppm) pos0 = bisect_left(ms1peaks, (spec.ms1mz - err_bd, )) pos1 = bisect_left(ms1peaks, (spec.ms1mz + err_bd, ), lo=pos0) s_peak = None if pos0 != pos1: peak = min(ms1peaks[pos0:pos1], key=lambda p: abs(spec.rt - p.rt)) if abs(spec.rt - peak.rt) < peak.sc * 1.5: s_peak = peak if s_peak is None or s_peak.mz == s_peak.mmz: #append monoisotopic peak only ms2_wp.append((spec, s_peak)) peak_ms2 = dict() ms2_wp_ = [] for s, p in ms2_wp: if p is None: ms2_wp_.append((s, p)) elif p not in peak_ms2 or abs(s.rt - p.rt) < abs(peak_ms2[p].rt - p.rt): peak_ms2[p] = s for p, s in peak_ms2.items(): ms2_wp_.append((s, p)) ms2_wp = sorted(ms2_wp_) isf_sc = [[] for x in range(len(ms2_wp))] for ii in range(len(ms2_wp) - 1, -1, -1): ms2, peak = ms2_wp[ii] if peak: # and peak.mz==peak.mmz: for jj in range(bisect_left(ms2_wp, ((ms2.ms1mz, ), ))): i_ms2, i_p = ms2_wp[jj] if i_p: err_bd = min(.01, bound_ppm(i_p.mz * ms2ppm)) pos0 = bisect_left(ms2.mz, i_p.mz - err_bd) pos1 = bisect_left(ms2.mz, i_p.mz + err_bd, lo=pos0) if i_p and abs(peak.rt - i_p.rt ) < ISF_rt_diff and pos0 != pos1 and max( ms2.I) * .1 < max( ms2.I[pos0:pos1] ): #require ISF to be n% of base peak ms2_I = [] i_ms2_I = [] xfrag = set() for f_i, f_mz in ( (x, y) for x, y in zip(i_ms2.I_all[:10], i_ms2.mz_all) if y < i_ms2.ms1mz + .01): err_bd = .01 pos0 = bisect_left(ms2.mz, f_mz - err_bd) pos1 = bisect_left(ms2.mz, f_mz + err_bd, lo=pos0) if pos0 != pos1: i_ms2_I.append(f_i) ms2_I.append(max(ms2.I[pos0:pos1])) for i in range(pos0, pos1): xfrag.add(i) else: i_ms2_I.append(f_i) ms2_I.append(0) if sum(1 for x in ms2_I if x > 0) > 1: for nn, (f_mz, f_I) in enumerate(zip(ms2.mz, ms2.I)): if nn not in xfrag and f_mz < i_ms2.ms1mz + .01: ms2_I.append(f_I) i_ms2_I.append(0) isf_par0 = [(x, y) for x, y in zip(i_ms2_I, ms2_I) if y > 0] isf_par1 = [(x, y) for x, y in zip(i_ms2_I, ms2_I) if x > 0] cs=max(cos_sim([x for x,y in isf_par0],[y for x,y in isf_par0]), \ cos_sim([x for x,y in isf_par1],[y for x,y in isf_par1])) if cs > ISF_score: isf_sc[jj].append((cs, ii)) for jj, ic in enumerate(isf_sc): for cs, ii in ic: print_ms2(*ms2_wp[ii]) print_ms2(*ms2_wp[jj]) ins_f.write('{}\n\n'.format(cs)) return ms2_wp, isf_sc
def print_score(mzML_file): basename0 = os.path.basename(mzML_file) print(basename0) ms2scans = read_ms2('ms2spectra_' + basename0 + '.txt') bg = param_dict.get("background") if bg is not None: ms2scans_bg = read_ms2('ms2spectra_' + os.path.basename(glob.glob(bg)[0]) + '.txt') for ii, ms2sc in enumerate(ms2scans): err_bd = bound_ppm(ms2sc.ms1mz * ms1ppm) pos0 = bisect_left(ms2scans_bg, (ms2sc.ms1mz - err_bd, )) pos1 = bisect_left(ms2scans_bg, (ms2sc.ms1mz + err_bd, ), lo=pos0) if pos1 != pos0: closest_bg = min(ms2scans_bg[pos0:pos1], key=lambda x: abs(x.rt - ms2sc.rt)) if abs(closest_bg.rt - ms2sc.rt) < 60: new_mz = [] new_I = [] for m, i in zip(ms2sc.mz, ms2sc.I): pos0 = bisect_left(closest_bg.mz, m - err_bd) pos1 = bisect_left(closest_bg.mz, m + err_bd, lo=pos0) if pos0 == pos1: new_mz.append(m) new_I.append(i) ms2scans[ii] = Spec(*ms2sc[:2], new_mz, new_I, *ms2sc[-2:]) ms1peaks, peak_double = readms1peak(basename0) ins_f = open('isf_' + basename0 + '.txt', 'w') def print_ms2(ms2, p): ins_f.write('{} {}\n'.format(p.mz, p.rt)) ins_f.write(' '.join(str(x) for x in ms2.mz_all) + '\n') ins_f.write(' '.join(str(x) for x in ms2.I_all) + '\n') def ms2_spectrum(): ms2_wp = [] #ms2 with ms1 peak info for nn, spec in enumerate(ms2scans): err_bd = bound_ppm(spec.ms1mz * ms1ppm) pos0 = bisect_left(ms1peaks, (spec.ms1mz - err_bd, )) pos1 = bisect_left(ms1peaks, (spec.ms1mz + err_bd, ), lo=pos0) s_peak = None if pos0 != pos1: peak = min(ms1peaks[pos0:pos1], key=lambda p: abs(spec.rt - p.rt)) if abs(spec.rt - peak.rt) < peak.sc * 1.5: s_peak = peak if s_peak is None or s_peak.mz == s_peak.mmz: #append monoisotopic peak only ms2_wp.append((spec, s_peak)) peak_ms2 = dict() ms2_wp_ = [] for s, p in ms2_wp: if p is None: ms2_wp_.append((s, p)) elif p not in peak_ms2 or abs(s.rt - p.rt) < abs(peak_ms2[p].rt - p.rt): peak_ms2[p] = s for p, s in peak_ms2.items(): ms2_wp_.append((s, p)) ms2_wp = sorted(ms2_wp_) isf_sc = [[] for x in range(len(ms2_wp))] for ii in range(len(ms2_wp) - 1, -1, -1): ms2, peak = ms2_wp[ii] if peak: # and peak.mz==peak.mmz: for jj in range(bisect_left(ms2_wp, ((ms2.ms1mz, ), ))): i_ms2, i_p = ms2_wp[jj] if i_p: err_bd = min(.01, bound_ppm(i_p.mz * ms2ppm)) pos0 = bisect_left(ms2.mz, i_p.mz - err_bd) pos1 = bisect_left(ms2.mz, i_p.mz + err_bd, lo=pos0) if i_p and abs(peak.rt - i_p.rt ) < ISF_rt_diff and pos0 != pos1 and max( ms2.I) * .1 < max( ms2.I[pos0:pos1] ): #require ISF to be n% of base peak ms2_I = [] i_ms2_I = [] xfrag = set() for f_i, f_mz in ( (x, y) for x, y in zip(i_ms2.I_all[:10], i_ms2.mz_all) if y < i_ms2.ms1mz + .01): err_bd = .01 pos0 = bisect_left(ms2.mz, f_mz - err_bd) pos1 = bisect_left(ms2.mz, f_mz + err_bd, lo=pos0) if pos0 != pos1: i_ms2_I.append(f_i) ms2_I.append(max(ms2.I[pos0:pos1])) for i in range(pos0, pos1): xfrag.add(i) else: i_ms2_I.append(f_i) ms2_I.append(0) if sum(1 for x in ms2_I if x > 0) > 1: for nn, (f_mz, f_I) in enumerate(zip(ms2.mz, ms2.I)): if nn not in xfrag and f_mz < i_ms2.ms1mz + .01: ms2_I.append(f_I) i_ms2_I.append(0) isf_par0 = [(x, y) for x, y in zip(i_ms2_I, ms2_I) if y > 0] isf_par1 = [(x, y) for x, y in zip(i_ms2_I, ms2_I) if x > 0] cs=max(cos_sim([x for x,y in isf_par0],[y for x,y in isf_par0]), \ cos_sim([x for x,y in isf_par1],[y for x,y in isf_par1])) if cs > ISF_score: isf_sc[jj].append((cs, ii)) for jj, ic in enumerate(isf_sc): for cs, ii in ic: print_ms2(*ms2_wp[ii]) print_ms2(*ms2_wp[jj]) ins_f.write('{}\n\n'.format(cs)) return ms2_wp, isf_sc ms2_wp, isf_sc = ms2_spectrum() def mass_matching(jj): spec, peak = ms2_wp[jj] adduct_match = [] if "NoMatch" in lib_types[0]: for adduct0, (mass0, charge0, _) in list(adduct_list.items())[:-1]: Mmass = ((spec.ms1mz if peak is None else peak.mz) * charge0 - mass0) / (1 if adduct0[0] == 'M' else int(adduct0[0])) err_bd = bound_ppm(Mmass * ms1ppm) pos0 = bisect_left(dlist, (Mmass - err_bd, )) pos1 = bisect_left(dlist, (Mmass + err_bd, ), lo=pos0) adduct_match.append((adduct0, (pos0, pos1))) else: score_ent = [] premz = (spec.ms1mz if peak is None else peak.mz) for iiiiii in range(1): err_bd = bound_ppm(premz * ms1ppm) pos_0 = bisect_left(lib_ent, (premz - err_bd, )) pos_1 = bisect_left(lib_ent, (premz + err_bd, ), lo=pos_0) lib_ent_ = (x for x in lib_ent[pos_0:pos_1] if x.rt == None or abs(spec.rt - x.rt) < RT_shift) for ent in lib_ent_: adduct0, charge0 = ent.adduct, ent.charge ms2_I = [] ent_I = [] xfrag = set() for nn, (f_mz, f_I) in enumerate( (x, y) for x, y in zip(ent.mz, ent.I) if (charge0 * premz - x) > 3.3): err_bd = .01 pos0 = bisect_left(spec.mz, f_mz - err_bd) pos1 = bisect_left(spec.mz, f_mz + err_bd, lo=pos0) ent_I.append(f_I) if pos0 != pos1: ms2_I.append(max(spec.I[pos0:pos1])) for i in range(pos0, pos1): xfrag.add(i) else: ms2_I.append(0) if sum(1 for x in ms2_I if x > 0) >= min_peaks: #min number of matching peak for nn, (f_mz, f_I) in enumerate(zip(spec.mz, spec.I)): if nn not in xfrag and (charge0 * premz - f_mz) > 3.3: ms2_I.append(f_I) ent_I.append(0) cs = cos_sim( ent_I, ms2_I ) #*sum(x for x,y in zip(ent_I,ms2_I) if y>0)/sum(ent_I) if cs > MS2_score: score_ent.append((adduct0, cs, ent)) if score_ent: max_score_ent = max( score_ent, key=operator.itemgetter(1)) #pick top scoring entry adduct_match.append(max_score_ent) for cs, ii in isf_sc[jj]: #if isf, attach data _, peak1 = ms2_wp[ii] adduct_match.append( ('?', cs, Ent( 0, 'ISF of (m/z={:.6f}, rt={:.3f}) {:.6f}'.format( peak1.mz, peak1.rt, peak.mz), [0], [0], None, None, None))) return adduct_match, spec, peak ms1scans, rtall = read_scans(basename0) def print_ann(ann_, adduct, spec, peak, name): ann_.write('NAME:\n') ann_.write(name + '\n') adduct_c = adduct_list.get(adduct[0], ('', '', '')) if adduct[0] == '?': #if isf ann_.write('ADDUCT: -\n') else: ann_.write('ADDUCT: {} {}{}\n'.format(adduct[0], adduct_c[1], adduct_c[2])) ann_.write('TARGET_M/Z, FEATURE_M/Z: {:.6f}'.format(spec.ms1mz)) ann_.write(', no_ms1_feature_detected' if peak is None else ', ' + format(peak.mz, '.6f')) ann_.write('\n') ann_.write('SCAN_START_TIME, RT: {:.3f}'.format(spec.rt)) if peak is None: rt_l, rt_r = spec.rt - 10, spec.rt + 10 p_area = [ x for x in ms1scans[bisect_left(ms1scans, ( spec.ms1mz - .01, )):bisect_left(ms1scans, (spec.ms1mz + .01, ))] if rt_l < x.rt < rt_r ] else: rt_l, rt_r = peak.rt - peak.sc * 1.5, peak.rt + peak.sc * 1.5 p_area = [ x for x in ms1scans[bisect_left(ms1scans, ( peak.mz - .01, )):bisect_left(ms1scans, (peak.mz + .01, ))] if rt_l < x.rt < rt_r ] ms1rt = rtall[bisect_left(rtall, rt_l):bisect_left(rtall, rt_r)] p_dict = dict() # highest intensities per scan bounded by m/z for pt in p_area: if pt.rt not in p_dict or p_dict[pt.rt] < pt.I: p_dict[pt.rt] = pt.I p_maxI = [p_dict.get(rt, 0.) for rt in ms1rt] ms1_auc = sum((I0 + I1) * (rt1 - rt0) for rt0, rt1, I0, I1 in zip( ms1rt, ms1rt[1:], p_maxI, p_maxI[1:])) / 2 ann_.write(', no_ms1_feature_detected' if peak is None else ', ' + format(peak.rt, '.3f')) ann_.write('\n') if len(adduct) == 3: dotp = adduct[1] elif len(adduct) == 2: dotp = False ann_.write('PEAK_AREA: ' + str(ms1_auc) + '\n') ann_.write('DOT_PRODUCT: ' + (format(dotp, '.3f') if dotp else '-') + '\n') ann_.write('EXPERIMENTAL_SPECTRUM:\n') for i, mz in zip(spec.I_all, spec.mz_all): ann_.write('{:.6f} {:.6g}\n'.format(mz, i)) def rec_all(): uk_count = 1 if "NoMatch" not in lib_types[0]: una_ = open( 'una_' + '_'.join(lib_types) + '_' + basename0 + '.txt', 'w') id_quant_ma = collections.defaultdict(list) for adduct_match, spec, peak in map(mass_matching, range(len(ms2_wp))): for adduct in adduct_match: if len(adduct) == 3: # if lipidblast name0 = adduct[2].name elif len(adduct) == 2: #if nomatch pos0, pos1 = adduct[1] nameset = {x for _, x in dlist[pos0:pos1]} name0 = '\n'.join(sorted(nameset)) id_quant_ma[name0, adduct[0]].append((adduct, spec, peak)) if not adduct_match and "NoMatch" not in lib_types[0]: una_.write("NAME: unknown_{} {} MS1 feature\n".format( uk_count, ('with' if peak else 'no'))) uk_count += 1 una_.write("PRECURSORMZ: {:.6f}\n".format(spec.ms1mz)) if ispos: una_.write( "PRECURSORTYPE: [M+H]+ [M+2H]2+ [M+Na]+ [M+NH4]+ [M-H2O+H]+\n" ) else: una_.write( "PRECURSORTYPE: [M-H]- [M-2H]2- [M-H2O-H]- [M+HCOO]- [M+Cl]-\n" ) una_.write("RETENTIONTIME: {:.3f}\n".format(spec.rt / 60)) thres = bpfilter[0] * max(spec.I_all) I_mz_list = [(i, mz) for i, mz in zip(spec.I_all, spec.mz_all) if i > thres] I_mz_list = I_mz_list[:bpfilter[1]] una_.write("Num Peaks: {}\n".format(len(I_mz_list))) for i, mz in I_mz_list: una_.write('{:.6f} {:.6g}\n'.format(mz, i)) una_.write('\n') return id_quant_ma id_quant_ma = rec_all() def print_all(id_quant): annotated_ = open( 'ann_' + '_'.join(lib_types) + '_' + basename0 + '.txt', 'w') for (name, adductid), adducts in id_quant.items(): for adduct, spec, peak in adducts: print_ann(annotated_, adduct, spec, peak, name) if len(adduct) == 3: ent = adduct[2] annotated_.write('LIBRARY_SPECTRUM:\n') for mz, i in zip(ent.mz, ent.I): annotated_.write('{} {}\n'.format(mz, i)) annotated_.write('\n') print_all(id_quant_ma)