예제 #1
0
def readms1peak(bn):
    ms1peaks = []
    with open('ms1feature_' + bn + '.txt') as ms1peakfile:
        for line in ms1peakfile:
            lsp = line.rstrip().split()
            if len(lsp) == 5:
                ms1peaks.append(Peak(*[float(x) for x in lsp], float(lsp[0])))
    ms1peaks.sort()
    iso_diff = 1.00335
    single = 0
    doub = 0
    peak_double = set()
    for ii in range(len(ms1peaks)):
        peak0 = ms1peaks[ii]
        err_bd = min(.01, bound_ppm(peak0.mz * ms1ppm))

        pos0 = bisect_left(ms1peaks, (peak0.mz - iso_diff - err_bd, ))
        pos1 = bisect_left(ms1peaks, (peak0.mz - iso_diff + err_bd, ), lo=pos0)
        if pos0 != pos1:
            peak1 = min(ms1peaks[pos0:pos1],
                        key=lambda p: abs(peak0.rt - p.rt))
            if abs(peak0.rt - peak1.rt) < 1 and peak0.auc < peak1.auc:
                ms1peaks[ii] = Peak(*peak0[:-1], peak1.mmz)
                if peak1.mmz == peak1.mz:
                    single += 1
    return ms1peaks, peak_double
예제 #2
0
def print_tab(lib_ent):
    with open('quant_rex.txt', 'w') as quant_auc:
        quant_auc.write(
            'name\tISF\tadduct\tfeature_m/z(library)\tRT(library)\tfeature_m/z(experimental median)\t%detected\t'
        )
        quant_auc.write('\t'.join(x[:-5] for x in mzML_files) + '\t')
        quant_auc.write('\t'.join('RT_' + x[:-5] for x in mzML_files) + '\n')
        for ent in lib_ent:
            ent_p = []
            for nn, mzML_file in enumerate(mzML_files):
                ms1peaks = ms1peaks_dict[mzML_file]
                bd = bound_ppm(ent.Mmass * ms1ppm)
                pos0 = bisect_left(ms1peaks, (ent.Mmass - bd, ))
                pos1 = bisect_left(ms1peaks, (ent.Mmass + bd, ))
                if ent.rt != 'NA':
                    peak = [
                        p for p in ms1peaks[pos0:pos1]
                        if abs(p.rt - ent.rt) < RT_shift
                    ]
                else:
                    peak = ms1peaks[pos0:pos1]
                if peak:
                    if ent.rt != 'NA':
                        peak = [max(peak, key=operator.attrgetter('auc'))]
                    for p in peak:
                        ent_p.append((nn, p))
            line_str = []
            if ent_p:
                for nn in range(len(mzML_files)):
                    pos0 = bisect_left([x[0] for x in ent_p], nn)
                    pos1 = bisect.bisect_right([x[0] for x in ent_p],
                                               nn,
                                               lo=pos0)
                    line_str.append(( \
                            ','.join(format(dat_n[1].auc,'.1f') for dat_n in ent_p[pos0:pos1]), \
                            ','.join(format(dat_n[1].rt/60,'.2f') for dat_n in ent_p[pos0:pos1]) ))
                mzmed = statistics.median(x[1].mz for x in ent_p)
                if ent.rt != 'NA':
                    quant_auc.write(
                        '{}\t{}\t{}\t{:.5f}\t{:.2f}\t{:.5f}'.format(
                            ent.name,
                            '*' if ent.name.startswith('ISF of ') else '',
                            ent.adduct, ent.Mmass, ent.rt / 60, mzmed))
                else:
                    quant_auc.write('{}\t{}\t{:.5f}\tNA\t{:.5f}'.format(
                        ent.name, ent.adduct, ent.Mmass, mzmed))
                quant_auc.write('\t{:.2f}'.format(
                    sum((1 if x else 0)
                        for x, _ in line_str) / len(mzML_files)))
                quant_auc.write('\t' + '\t'.join(x for x, _ in line_str) +
                                '\t' + '\t'.join(x
                                                 for _, x in line_str) + '\n')
예제 #3
0
                    break
            mz_list = [float(x) for x in mz_list]
            I_list = [x / max(I_list) * 999. for x in I_list]
            all_dat[name_id].append(
                (nn, dotp, premz, rt, mz_list, I_list, auc, feat))
            lib_dict[name_id] = (dotp, lib_dat)

isf_dict = dict()
for name_id in all_dat.keys():
    name_add = name_id.split('\n')
    name, adduct = name_add[:-1], name_add[-1]
    name0 = name[0]
    if name0.startswith('ISF of'):
        pmz, rt, mz = [float(x) for x in re.findall("\d+\.\d+", name0)[:3]]
        for pmz0, rt0, mz0, adduct0 in isf_dict.keys():
            if abs(pmz - pmz0) < bound_ppm(pmz * ms1ppm) and abs(
                    rt - rt0) < RT_shift and abs(mz - mz0) < bound_ppm(
                        mz * ms1ppm) and adduct == adduct0:
                isf_dict[(pmz0, rt0, mz0, adduct)].append(name_id)
                break
        else:
            isf_dict[(pmz, rt, mz, adduct)] = [name_id]

name_dict0 = dict()
for key, vv in isf_dict.items():
    if len(vv) > 1:
        pmz_rt_mz = [
            tuple(float(x) for x in re.findall("\d+\.\d+", v)[:3]) for v in vv
        ]
        key0 = statistics.median(x for x, _, _ in pmz_rt_mz)
        key1 = statistics.median(x for _, x, _ in pmz_rt_mz)
예제 #4
0
    def scoring(cpd):
        ent,name=cpd
        ms1mz,charge,frag_mz,frag_I,adduct,RT,frag_ann=ent.split(' ',6)
        ms1mz=float(ms1mz)
        if re.fullmatch("\d+\.\d+",RT): RT=float(RT)
        frag_I=[float(x) for x in frag_I.split(',')]
        frag_mz=[float(x) for x in frag_mz.split(',')]
        frag_ann=[x for x in frag_ann.split(',')]
        sorted_I=sorted(zip(frag_I,frag_mz),reverse=True)[:topNfrag]
        frag_mz=[x for _,x in sorted_I]
        frag_I=[x for x,_ in sorted_I]
        if len(frag_I)!=len(frag_mz): print('abb')
        adduct=adduct.replace(',','')
        name='---'.join(name)
        
        frag_mz_l=[0]*len(frag_mz)
        frag_mz_r=[0]*len(frag_mz)
        for nn,f_mz in enumerate(frag_mz):
            err_bd=.01
            frag_mz_l[nn]=f_mz-err_bd
            frag_mz_r[nn]=f_mz+err_bd

        err_bd=bound_ppm(ms1mz*ms1ppm)
        pos0=bisect_left(ms1peaks,(ms1mz-err_bd,))
        pos1=bisect_left(ms1peaks,(ms1mz+err_bd,))
        pseudo_feat=[]#pseudo feature for entries w/o feature
        ms1peaks_match=[x for x in ms1peaks[pos0:pos1] if not isinstance(RT,float) or abs(RT-x.rt)<rt_diff]
        if ms2_auc_no_feat and not ms1peaks_match and isinstance(RT,float):#if no feature found and RT in library
            pseudo_feat.append(Peak(mz=ms1mz,rt=RT,sc=10,coef=0,auc=0,mmz=ms1mz))
        score_peaks=[]
        for ms1peak in ms1peaks_match+pseudo_feat:
            if not minmz<ms1peak.mz<maxmz:
                continue
            pos0=bisect_left(startpt,ms1peak.mz)
            pos1=bisect_left(end__pt,ms1peak.mz)
            if pos0==pos1: # ms1peak in one window only
                pos=pos0
            else: # take the window whose boundaries are furthest from ms1peak
                pos=(pos0 if ms1peak.mz-startpt[pos0-1]>end__pt[pos1]-ms1peak.mz else pos1)
            iso=ms2scans[sswath[pos]]
            ms2_I=[0]*len(frag_I)
            ms2_auc=[0]*len(frag_I)
            pfc=[0]*len(frag_I)
            rt_l=ms1peak.rt-ms1peak.sc*1.5
            rt_r=ms1peak.rt+ms1peak.sc*1.5
            ms1rt=rtset[bisect_left(rtset,rt_l):bisect_left(rtset,rt_r)]
            p_dict=dict() # highest intensities per scan bounded by m/z
            p_area=[x for x in ms1scans[bisect_left(ms1scans,(ms1peak.mz-.01,)):bisect_left(ms1scans,(ms1peak.mz+.01,))] if rt_l<x.rt<rt_r]
            for pt in p_area:
                if pt.rt not in p_dict or p_dict[pt.rt]<pt.I:
                    p_dict[pt.rt]=pt.I
            p_maxI=[p_dict.get(rt,0.) for rt in ms1rt]
            for nn,(f_mz_l,f_mz_r,f_I) in enumerate(zip(frag_mz_l,frag_mz_r,frag_I)):
                f_area=[x for x in iso[bisect_left(iso,(f_mz_l,)):bisect_left(iso,(f_mz_r,))] if rt_l<x.rt<rt_r]
                if f_area:
                    f_area_=[x.I for x in f_area if abs(x.rt-ms1peak.rt)<2]
                    if f_area_:
                        ms2_I[nn]=max(f_area_)
                        f_dict=dict() # highest intensities per scan bounded by m/z
                        for pt in f_area:
                            if pt.rt not in f_dict or f_dict[pt.rt]<pt.I:
                                f_dict[pt.rt]=pt.I
                        ms2_maxI=[f_dict.get(rt,0.) for rt in ms1rt]
                        pfc[nn]=cos_sim(p_maxI,ms2_maxI)
                        ms2_auc[nn]=sum((I0+I1)*(rt1-rt0) for rt0,rt1,I0,I1 in zip(ms1rt,ms1rt[1:],ms2_maxI,ms2_maxI[1:]))/2
            ms1_auc=sum((I0+I1)*(rt1-rt0) for rt0,rt1,I0,I1 in zip(ms1rt,ms1rt[1:],p_maxI,p_maxI[1:]))/2
            ssm=cos_sim0(frag_I,ms2_I)
            score_peaks.append((ssm,pfc,ms1peak,ms2_auc,ms1_auc))
        if score_peaks:
            score_peaks.sort(reverse=True)
            max_score_peaks=[score_peaks[0]]
            for x in score_peaks[1:]:
                if max_score_peaks[0][0]-x[0]<.1:
                    max_score_peaks.append(x)
                else:
                    break
            max_peak=max(max_score_peaks,key=lambda x:x[2].auc) #pick top scoring ms2 for each entry, if score difference is insignificant use auc
            return (name,adduct,ms1mz,RT,frag_mz,frag_ann,max_peak)
        return False
예제 #5
0
    def mass_matching(jj):
        spec, peak = ms2_wp[jj]
        adduct_match = []

        if "NoMatch" in lib_types[0]:
            for adduct0, (mass0, charge0, _) in list(adduct_list.items())[:-1]:
                Mmass = ((spec.ms1mz if peak is None else peak.mz) * charge0 -
                         mass0) / (1 if adduct0[0] == 'M' else int(adduct0[0]))
                err_bd = bound_ppm(Mmass * ms1ppm)
                pos0 = bisect_left(dlist, (Mmass - err_bd, ))
                pos1 = bisect_left(dlist, (Mmass + err_bd, ), lo=pos0)
                adduct_match.append((adduct0, (pos0, pos1)))
        else:
            score_ent = []
            premz = (spec.ms1mz if peak is None else peak.mz)

            for iiiiii in range(1):
                err_bd = bound_ppm(premz * ms1ppm)
                pos_0 = bisect_left(lib_ent, (premz - err_bd, ))
                pos_1 = bisect_left(lib_ent, (premz + err_bd, ), lo=pos_0)
                lib_ent_ = (x for x in lib_ent[pos_0:pos_1]
                            if x.rt == None or abs(spec.rt - x.rt) < RT_shift)

                for ent in lib_ent_:
                    adduct0, charge0 = ent.adduct, ent.charge
                    ms2_I = []
                    ent_I = []
                    xfrag = set()
                    for nn, (f_mz, f_I) in enumerate(
                        (x, y) for x, y in zip(ent.mz, ent.I)
                            if (charge0 * premz - x) > 3.3):
                        err_bd = .01
                        pos0 = bisect_left(spec.mz, f_mz - err_bd)
                        pos1 = bisect_left(spec.mz, f_mz + err_bd, lo=pos0)
                        ent_I.append(f_I)
                        if pos0 != pos1:
                            ms2_I.append(max(spec.I[pos0:pos1]))
                            for i in range(pos0, pos1):
                                xfrag.add(i)
                        else:
                            ms2_I.append(0)
                    if sum(1 for x in ms2_I if
                           x > 0) >= min_peaks:  #min number of matching peak
                        for nn, (f_mz, f_I) in enumerate(zip(spec.mz, spec.I)):
                            if nn not in xfrag and (charge0 * premz -
                                                    f_mz) > 3.3:
                                ms2_I.append(f_I)
                                ent_I.append(0)
                        cs = cos_sim(
                            ent_I, ms2_I
                        )  #*sum(x for x,y in zip(ent_I,ms2_I) if y>0)/sum(ent_I)
                        if cs > MS2_score:
                            score_ent.append((adduct0, cs, ent))
            if score_ent:
                max_score_ent = max(
                    score_ent,
                    key=operator.itemgetter(1))  #pick top scoring entry
                adduct_match.append(max_score_ent)

        for cs, ii in isf_sc[jj]:  #if isf, attach data
            _, peak1 = ms2_wp[ii]
            adduct_match.append(
                ('?', cs,
                 Ent(
                     0, 'ISF of (m/z={:.6f}, rt={:.3f}) {:.6f}'.format(
                         peak1.mz, peak1.rt, peak.mz), [0], [0], None, None,
                     None)))
        return adduct_match, spec, peak
예제 #6
0
    def ms2_spectrum():
        ms2_wp = []  #ms2 with ms1 peak info
        for nn, spec in enumerate(ms2scans):
            err_bd = bound_ppm(spec.ms1mz * ms1ppm)
            pos0 = bisect_left(ms1peaks, (spec.ms1mz - err_bd, ))
            pos1 = bisect_left(ms1peaks, (spec.ms1mz + err_bd, ), lo=pos0)
            s_peak = None
            if pos0 != pos1:
                peak = min(ms1peaks[pos0:pos1],
                           key=lambda p: abs(spec.rt - p.rt))
                if abs(spec.rt - peak.rt) < peak.sc * 1.5:
                    s_peak = peak
            if s_peak is None or s_peak.mz == s_peak.mmz:  #append monoisotopic peak only
                ms2_wp.append((spec, s_peak))

        peak_ms2 = dict()
        ms2_wp_ = []
        for s, p in ms2_wp:
            if p is None:
                ms2_wp_.append((s, p))
            elif p not in peak_ms2 or abs(s.rt - p.rt) < abs(peak_ms2[p].rt -
                                                             p.rt):
                peak_ms2[p] = s
        for p, s in peak_ms2.items():
            ms2_wp_.append((s, p))
        ms2_wp = sorted(ms2_wp_)

        isf_sc = [[] for x in range(len(ms2_wp))]
        for ii in range(len(ms2_wp) - 1, -1, -1):
            ms2, peak = ms2_wp[ii]
            if peak:  # and peak.mz==peak.mmz:
                for jj in range(bisect_left(ms2_wp, ((ms2.ms1mz, ), ))):
                    i_ms2, i_p = ms2_wp[jj]
                    if i_p:
                        err_bd = min(.01, bound_ppm(i_p.mz * ms2ppm))
                        pos0 = bisect_left(ms2.mz, i_p.mz - err_bd)
                        pos1 = bisect_left(ms2.mz, i_p.mz + err_bd, lo=pos0)
                    if i_p and abs(peak.rt - i_p.rt
                                   ) < ISF_rt_diff and pos0 != pos1 and max(
                                       ms2.I) * .1 < max(
                                           ms2.I[pos0:pos1]
                                       ):  #require ISF to be n% of base peak
                        ms2_I = []
                        i_ms2_I = []
                        xfrag = set()
                        for f_i, f_mz in (
                            (x, y)
                                for x, y in zip(i_ms2.I_all[:10], i_ms2.mz_all)
                                if y < i_ms2.ms1mz + .01):
                            err_bd = .01
                            pos0 = bisect_left(ms2.mz, f_mz - err_bd)
                            pos1 = bisect_left(ms2.mz, f_mz + err_bd, lo=pos0)
                            if pos0 != pos1:
                                i_ms2_I.append(f_i)
                                ms2_I.append(max(ms2.I[pos0:pos1]))
                                for i in range(pos0, pos1):
                                    xfrag.add(i)
                            else:
                                i_ms2_I.append(f_i)
                                ms2_I.append(0)
                        if sum(1 for x in ms2_I if x > 0) > 1:
                            for nn, (f_mz,
                                     f_I) in enumerate(zip(ms2.mz, ms2.I)):
                                if nn not in xfrag and f_mz < i_ms2.ms1mz + .01:
                                    ms2_I.append(f_I)
                                    i_ms2_I.append(0)
                            isf_par0 = [(x, y) for x, y in zip(i_ms2_I, ms2_I)
                                        if y > 0]
                            isf_par1 = [(x, y) for x, y in zip(i_ms2_I, ms2_I)
                                        if x > 0]
                            cs=max(cos_sim([x for x,y in isf_par0],[y for x,y in isf_par0]), \
                                    cos_sim([x for x,y in isf_par1],[y for x,y in isf_par1]))
                            if cs > ISF_score:
                                isf_sc[jj].append((cs, ii))

        for jj, ic in enumerate(isf_sc):
            for cs, ii in ic:
                print_ms2(*ms2_wp[ii])
                print_ms2(*ms2_wp[jj])
                ins_f.write('{}\n\n'.format(cs))
        return ms2_wp, isf_sc
예제 #7
0
def print_score(mzML_file):
    basename0 = os.path.basename(mzML_file)
    print(basename0)

    ms2scans = read_ms2('ms2spectra_' + basename0 + '.txt')

    bg = param_dict.get("background")
    if bg is not None:
        ms2scans_bg = read_ms2('ms2spectra_' +
                               os.path.basename(glob.glob(bg)[0]) + '.txt')
        for ii, ms2sc in enumerate(ms2scans):
            err_bd = bound_ppm(ms2sc.ms1mz * ms1ppm)
            pos0 = bisect_left(ms2scans_bg, (ms2sc.ms1mz - err_bd, ))
            pos1 = bisect_left(ms2scans_bg, (ms2sc.ms1mz + err_bd, ), lo=pos0)
            if pos1 != pos0:
                closest_bg = min(ms2scans_bg[pos0:pos1],
                                 key=lambda x: abs(x.rt - ms2sc.rt))
                if abs(closest_bg.rt - ms2sc.rt) < 60:
                    new_mz = []
                    new_I = []
                    for m, i in zip(ms2sc.mz, ms2sc.I):
                        pos0 = bisect_left(closest_bg.mz, m - err_bd)
                        pos1 = bisect_left(closest_bg.mz, m + err_bd, lo=pos0)
                        if pos0 == pos1:
                            new_mz.append(m)
                            new_I.append(i)
                    ms2scans[ii] = Spec(*ms2sc[:2], new_mz, new_I, *ms2sc[-2:])

    ms1peaks, peak_double = readms1peak(basename0)

    ins_f = open('isf_' + basename0 + '.txt', 'w')

    def print_ms2(ms2, p):
        ins_f.write('{} {}\n'.format(p.mz, p.rt))
        ins_f.write(' '.join(str(x) for x in ms2.mz_all) + '\n')
        ins_f.write(' '.join(str(x) for x in ms2.I_all) + '\n')

    def ms2_spectrum():
        ms2_wp = []  #ms2 with ms1 peak info
        for nn, spec in enumerate(ms2scans):
            err_bd = bound_ppm(spec.ms1mz * ms1ppm)
            pos0 = bisect_left(ms1peaks, (spec.ms1mz - err_bd, ))
            pos1 = bisect_left(ms1peaks, (spec.ms1mz + err_bd, ), lo=pos0)
            s_peak = None
            if pos0 != pos1:
                peak = min(ms1peaks[pos0:pos1],
                           key=lambda p: abs(spec.rt - p.rt))
                if abs(spec.rt - peak.rt) < peak.sc * 1.5:
                    s_peak = peak
            if s_peak is None or s_peak.mz == s_peak.mmz:  #append monoisotopic peak only
                ms2_wp.append((spec, s_peak))

        peak_ms2 = dict()
        ms2_wp_ = []
        for s, p in ms2_wp:
            if p is None:
                ms2_wp_.append((s, p))
            elif p not in peak_ms2 or abs(s.rt - p.rt) < abs(peak_ms2[p].rt -
                                                             p.rt):
                peak_ms2[p] = s
        for p, s in peak_ms2.items():
            ms2_wp_.append((s, p))
        ms2_wp = sorted(ms2_wp_)

        isf_sc = [[] for x in range(len(ms2_wp))]
        for ii in range(len(ms2_wp) - 1, -1, -1):
            ms2, peak = ms2_wp[ii]
            if peak:  # and peak.mz==peak.mmz:
                for jj in range(bisect_left(ms2_wp, ((ms2.ms1mz, ), ))):
                    i_ms2, i_p = ms2_wp[jj]
                    if i_p:
                        err_bd = min(.01, bound_ppm(i_p.mz * ms2ppm))
                        pos0 = bisect_left(ms2.mz, i_p.mz - err_bd)
                        pos1 = bisect_left(ms2.mz, i_p.mz + err_bd, lo=pos0)
                    if i_p and abs(peak.rt - i_p.rt
                                   ) < ISF_rt_diff and pos0 != pos1 and max(
                                       ms2.I) * .1 < max(
                                           ms2.I[pos0:pos1]
                                       ):  #require ISF to be n% of base peak
                        ms2_I = []
                        i_ms2_I = []
                        xfrag = set()
                        for f_i, f_mz in (
                            (x, y)
                                for x, y in zip(i_ms2.I_all[:10], i_ms2.mz_all)
                                if y < i_ms2.ms1mz + .01):
                            err_bd = .01
                            pos0 = bisect_left(ms2.mz, f_mz - err_bd)
                            pos1 = bisect_left(ms2.mz, f_mz + err_bd, lo=pos0)
                            if pos0 != pos1:
                                i_ms2_I.append(f_i)
                                ms2_I.append(max(ms2.I[pos0:pos1]))
                                for i in range(pos0, pos1):
                                    xfrag.add(i)
                            else:
                                i_ms2_I.append(f_i)
                                ms2_I.append(0)
                        if sum(1 for x in ms2_I if x > 0) > 1:
                            for nn, (f_mz,
                                     f_I) in enumerate(zip(ms2.mz, ms2.I)):
                                if nn not in xfrag and f_mz < i_ms2.ms1mz + .01:
                                    ms2_I.append(f_I)
                                    i_ms2_I.append(0)
                            isf_par0 = [(x, y) for x, y in zip(i_ms2_I, ms2_I)
                                        if y > 0]
                            isf_par1 = [(x, y) for x, y in zip(i_ms2_I, ms2_I)
                                        if x > 0]
                            cs=max(cos_sim([x for x,y in isf_par0],[y for x,y in isf_par0]), \
                                    cos_sim([x for x,y in isf_par1],[y for x,y in isf_par1]))
                            if cs > ISF_score:
                                isf_sc[jj].append((cs, ii))

        for jj, ic in enumerate(isf_sc):
            for cs, ii in ic:
                print_ms2(*ms2_wp[ii])
                print_ms2(*ms2_wp[jj])
                ins_f.write('{}\n\n'.format(cs))
        return ms2_wp, isf_sc

    ms2_wp, isf_sc = ms2_spectrum()

    def mass_matching(jj):
        spec, peak = ms2_wp[jj]
        adduct_match = []

        if "NoMatch" in lib_types[0]:
            for adduct0, (mass0, charge0, _) in list(adduct_list.items())[:-1]:
                Mmass = ((spec.ms1mz if peak is None else peak.mz) * charge0 -
                         mass0) / (1 if adduct0[0] == 'M' else int(adduct0[0]))
                err_bd = bound_ppm(Mmass * ms1ppm)
                pos0 = bisect_left(dlist, (Mmass - err_bd, ))
                pos1 = bisect_left(dlist, (Mmass + err_bd, ), lo=pos0)
                adduct_match.append((adduct0, (pos0, pos1)))
        else:
            score_ent = []
            premz = (spec.ms1mz if peak is None else peak.mz)

            for iiiiii in range(1):
                err_bd = bound_ppm(premz * ms1ppm)
                pos_0 = bisect_left(lib_ent, (premz - err_bd, ))
                pos_1 = bisect_left(lib_ent, (premz + err_bd, ), lo=pos_0)
                lib_ent_ = (x for x in lib_ent[pos_0:pos_1]
                            if x.rt == None or abs(spec.rt - x.rt) < RT_shift)

                for ent in lib_ent_:
                    adduct0, charge0 = ent.adduct, ent.charge
                    ms2_I = []
                    ent_I = []
                    xfrag = set()
                    for nn, (f_mz, f_I) in enumerate(
                        (x, y) for x, y in zip(ent.mz, ent.I)
                            if (charge0 * premz - x) > 3.3):
                        err_bd = .01
                        pos0 = bisect_left(spec.mz, f_mz - err_bd)
                        pos1 = bisect_left(spec.mz, f_mz + err_bd, lo=pos0)
                        ent_I.append(f_I)
                        if pos0 != pos1:
                            ms2_I.append(max(spec.I[pos0:pos1]))
                            for i in range(pos0, pos1):
                                xfrag.add(i)
                        else:
                            ms2_I.append(0)
                    if sum(1 for x in ms2_I if
                           x > 0) >= min_peaks:  #min number of matching peak
                        for nn, (f_mz, f_I) in enumerate(zip(spec.mz, spec.I)):
                            if nn not in xfrag and (charge0 * premz -
                                                    f_mz) > 3.3:
                                ms2_I.append(f_I)
                                ent_I.append(0)
                        cs = cos_sim(
                            ent_I, ms2_I
                        )  #*sum(x for x,y in zip(ent_I,ms2_I) if y>0)/sum(ent_I)
                        if cs > MS2_score:
                            score_ent.append((adduct0, cs, ent))
            if score_ent:
                max_score_ent = max(
                    score_ent,
                    key=operator.itemgetter(1))  #pick top scoring entry
                adduct_match.append(max_score_ent)

        for cs, ii in isf_sc[jj]:  #if isf, attach data
            _, peak1 = ms2_wp[ii]
            adduct_match.append(
                ('?', cs,
                 Ent(
                     0, 'ISF of (m/z={:.6f}, rt={:.3f}) {:.6f}'.format(
                         peak1.mz, peak1.rt, peak.mz), [0], [0], None, None,
                     None)))
        return adduct_match, spec, peak

    ms1scans, rtall = read_scans(basename0)

    def print_ann(ann_, adduct, spec, peak, name):
        ann_.write('NAME:\n')
        ann_.write(name + '\n')
        adduct_c = adduct_list.get(adduct[0], ('', '', ''))
        if adduct[0] == '?':  #if isf
            ann_.write('ADDUCT: -\n')
        else:
            ann_.write('ADDUCT: {} {}{}\n'.format(adduct[0], adduct_c[1],
                                                  adduct_c[2]))
        ann_.write('TARGET_M/Z, FEATURE_M/Z: {:.6f}'.format(spec.ms1mz))
        ann_.write(', no_ms1_feature_detected' if peak is None else ', ' +
                   format(peak.mz, '.6f'))
        ann_.write('\n')
        ann_.write('SCAN_START_TIME, RT: {:.3f}'.format(spec.rt))

        if peak is None:
            rt_l, rt_r = spec.rt - 10, spec.rt + 10
            p_area = [
                x for x in ms1scans[bisect_left(ms1scans, (
                    spec.ms1mz - .01, )):bisect_left(ms1scans, (spec.ms1mz +
                                                                .01, ))]
                if rt_l < x.rt < rt_r
            ]
        else:
            rt_l, rt_r = peak.rt - peak.sc * 1.5, peak.rt + peak.sc * 1.5
            p_area = [
                x for x in ms1scans[bisect_left(ms1scans, (
                    peak.mz - .01, )):bisect_left(ms1scans, (peak.mz + .01, ))]
                if rt_l < x.rt < rt_r
            ]
        ms1rt = rtall[bisect_left(rtall, rt_l):bisect_left(rtall, rt_r)]
        p_dict = dict()  # highest intensities per scan bounded by m/z
        for pt in p_area:
            if pt.rt not in p_dict or p_dict[pt.rt] < pt.I:
                p_dict[pt.rt] = pt.I
        p_maxI = [p_dict.get(rt, 0.) for rt in ms1rt]
        ms1_auc = sum((I0 + I1) * (rt1 - rt0) for rt0, rt1, I0, I1 in zip(
            ms1rt, ms1rt[1:], p_maxI, p_maxI[1:])) / 2

        ann_.write(', no_ms1_feature_detected' if peak is None else ', ' +
                   format(peak.rt, '.3f'))
        ann_.write('\n')
        if len(adduct) == 3:
            dotp = adduct[1]
        elif len(adduct) == 2:
            dotp = False
        ann_.write('PEAK_AREA: ' + str(ms1_auc) + '\n')
        ann_.write('DOT_PRODUCT: ' + (format(dotp, '.3f') if dotp else '-') +
                   '\n')
        ann_.write('EXPERIMENTAL_SPECTRUM:\n')
        for i, mz in zip(spec.I_all, spec.mz_all):
            ann_.write('{:.6f} {:.6g}\n'.format(mz, i))

    def rec_all():
        uk_count = 1
        if "NoMatch" not in lib_types[0]:
            una_ = open(
                'una_' + '_'.join(lib_types) + '_' + basename0 + '.txt', 'w')
        id_quant_ma = collections.defaultdict(list)
        for adduct_match, spec, peak in map(mass_matching, range(len(ms2_wp))):
            for adduct in adduct_match:
                if len(adduct) == 3:  # if lipidblast
                    name0 = adduct[2].name
                elif len(adduct) == 2:  #if nomatch
                    pos0, pos1 = adduct[1]
                    nameset = {x for _, x in dlist[pos0:pos1]}
                    name0 = '\n'.join(sorted(nameset))
                id_quant_ma[name0, adduct[0]].append((adduct, spec, peak))
            if not adduct_match and "NoMatch" not in lib_types[0]:
                una_.write("NAME: unknown_{} {} MS1 feature\n".format(
                    uk_count, ('with' if peak else 'no')))
                uk_count += 1
                una_.write("PRECURSORMZ: {:.6f}\n".format(spec.ms1mz))
                if ispos:
                    una_.write(
                        "PRECURSORTYPE: [M+H]+ [M+2H]2+ [M+Na]+ [M+NH4]+ [M-H2O+H]+\n"
                    )
                else:
                    una_.write(
                        "PRECURSORTYPE: [M-H]- [M-2H]2- [M-H2O-H]- [M+HCOO]- [M+Cl]-\n"
                    )
                una_.write("RETENTIONTIME: {:.3f}\n".format(spec.rt / 60))
                thres = bpfilter[0] * max(spec.I_all)
                I_mz_list = [(i, mz) for i, mz in zip(spec.I_all, spec.mz_all)
                             if i > thres]
                I_mz_list = I_mz_list[:bpfilter[1]]
                una_.write("Num Peaks: {}\n".format(len(I_mz_list)))
                for i, mz in I_mz_list:
                    una_.write('{:.6f} {:.6g}\n'.format(mz, i))
                una_.write('\n')
        return id_quant_ma

    id_quant_ma = rec_all()

    def print_all(id_quant):
        annotated_ = open(
            'ann_' + '_'.join(lib_types) + '_' + basename0 + '.txt', 'w')
        for (name, adductid), adducts in id_quant.items():
            for adduct, spec, peak in adducts:
                print_ann(annotated_, adduct, spec, peak, name)
                if len(adduct) == 3:
                    ent = adduct[2]
                    annotated_.write('LIBRARY_SPECTRUM:\n')
                    for mz, i in zip(ent.mz, ent.I):
                        annotated_.write('{} {}\n'.format(mz, i))
                annotated_.write('\n')

    print_all(id_quant_ma)