def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z): # this is another attempt to perform ladder - size standard alignment one peak by one anchor_pairs = sorted(anchor_pairs) anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs) anchor_rtimes = list(anchor_rtimes) anchor_bpsizes = list(anchor_bpsizes) remaining_sizes = [x for x in ladder['sizes'] if x > anchor_bpsizes[-1]] current_sizes = anchor_bpsizes order = ladder['order'] z = estimate_z(anchor_rtimes, anchor_bpsizes, order).z f = ZFunc(peaks, current_sizes, anchor_pairs, estimate=True) pairs, rss = f.get_pairs(z) while True: if not remaining_sizes: return pairs, z, rss, f current_sizes.append(remaining_sizes.pop(0)) f.set_sizes(current_sizes) score, next_z = minimize_score(f, z, order) pairs, rss = f.get_pairs(z) if rss < 100: z = next_z if is_verbosity(5): plot(f.rtimes, f.sizes, z, pairs)
def estimate_de( peaks, sizes ): f = ZFunc( peaks, sizes, [], estimate=True ) bounds = [ (0.01, 0.5), (-275, 75) ] niter = 0 results = [] while niter < 3: #prev_rss = rss res = differential_evolution(f, bounds, tol=1e-5, mutation=(0.3, 1.7), popsize=45, recombination=0.5, strategy='rand1bin') pairs, final_rss = f.get_pairs(res.x) pairs.sort() rtimes, bpsizes = zip( *pairs) zres = estimate_z(rtimes, bpsizes, 1) niter += 1 cerr('I: DE iter: %2d - pairs: %2d - Cur RSS: %6.2f' % (niter, len(pairs), zres.rss)) results.append( (zres, pairs ) ) if zres.rss < len(bpsizes) * 1.0: break results.sort( key = lambda x: x[0].rss ) zres, pairs = results[0] plot(f.rtimes, f.sizes, zres.z, pairs) return pairs, zres.z
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z): # this is another attempt to perform ladder - size standard alignment one peak by one anchor_pairs = sorted(anchor_pairs) anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs) anchor_rtimes = list(anchor_rtimes) anchor_bpsizes = list(anchor_bpsizes) remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]] current_sizes = anchor_bpsizes z = estimate_z(anchor_rtimes, anchor_bpsizes, 3).z f = ZFunc(peaks, current_sizes, anchor_pairs, estimate=True) pairs, rss = f.get_pairs(z) while True: if not remaining_sizes: return pairs, z, rss, f current_sizes.insert(0, remaining_sizes.pop(-1)) f.set_sizes(current_sizes) score, z = minimize_score(f, z, 3) pairs, rss = f.get_pairs(z) if is_verbosity(5): plot(f.rtimes, f.sizes, z, pairs)
def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z): # this is another attempt to perform ladder - size standard alignment one peak by one anchor_pairs = sorted(anchor_pairs) anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs ) anchor_rtimes = list(anchor_rtimes) anchor_bpsizes = list(anchor_bpsizes) remaining_sizes = [x for x in ladder['sizes'] if x > anchor_bpsizes[-1]] current_sizes = anchor_bpsizes order = ladder['order'] zres = estimate_z(anchor_rtimes, anchor_bpsizes, order) z,rss = zres.z, zres.rss f = ZFunc(peaks, current_sizes, anchor_pairs) while remaining_sizes: current_sizes.append( remaining_sizes.pop(0) ) if ( remaining_sizes and (remaining_sizes[-1] - current_sizes[-1]) < 100 and (remaining_sizes[0] - current_sizes[-1]) < 11 ): current_sizes.append( remaining_sizes.pop(0) ) f.set_sizes(current_sizes) score, next_z = minimize_score(f, z, order) next_pairs, next_rss = f.get_pairs(z) if (next_rss - rss) < 70: z = next_z rss = next_rss pairs = next_pairs if is_verbosity(5): plot(f.rtimes, f.sizes, z, pairs ) # finalize the alignment with stringent criteria dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss) if dp_result.rss - rss > 50: return pairs, z, rss, f dp_pairs = [(x[1], x[0]) for x in dp_result.sized_peaks] if is_verbosity(5): plot(f.rtimes, f.sizes, dp_result.z, dp_pairs) return dp_pairs, dp_result.z, dp_result.rss, f
def estimate_pm(peaks, bpsizes): rtimes = [p.rtime for p in peaks] rtime_points = prepare_rtimes(rtimes) bpsize_pair = [bpsizes[1], bpsizes[-2]] f = ZFunc(peaks, bpsizes, [], estimate=True) scores = [] for rtime_pair in rtime_points: if rtime_pair[0] >= rtime_pair[1]: continue # y = ax + b # y1 = ax1 + b # y2 = ax2 + b # ------------ - # y1 - y2 = a(x1 - x2) # a = (y1 - y2)/(x1 - x2) # b = y1 - ax1 #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0]) #intercept = bpsize_pair[0] - slope * rtime_pair[0] #z = [ slope intercept ] zres = estimate_z(rtime_pair, bpsize_pair, 1) score = f(zres.z) scores.append((score, zres)) if is_verbosity(5): plot(f.rtimes, f.sizes, zres.z, []) scores.sort(key=lambda x: x[0]) #import pprint; pprint.pprint(scores[:5]) zresult = scores[0][1] dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z, zresult.rss) #import pprint; pprint.pprint(dp_result.sized_peaks) if is_verbosity(5): plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return ([(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z)
def estimate_pm(peaks, bpsizes): rtimes = [ p.rtime for p in peaks ] rtime_points = prepare_rtimes( rtimes ) bpsize_pair = [ bpsizes[1], bpsizes[-2]] f = ZFunc(peaks, bpsizes, [], estimate = True) scores = [] for rtime_pair in rtime_points: if rtime_pair[0] >= rtime_pair[1]: continue # y = ax + b # y1 = ax1 + b # y2 = ax2 + b # ------------ - # y1 - y2 = a(x1 - x2) # a = (y1 - y2)/(x1 - x2) # b = y1 - ax1 #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0]) #intercept = bpsize_pair[0] - slope * rtime_pair[0] #z = [ slope intercept ] zres = estimate_z(rtime_pair, bpsize_pair, 1) score = f(zres.z) scores.append( (score, zres) ) if is_verbosity(5): plot(f.rtimes, f.sizes, zres.z, [] ) scores.sort( key = lambda x: x[0] ) #import pprint; pprint.pprint(scores[:5]) zresult = scores[0][1] dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z, zresult.rss) #import pprint; pprint.pprint(dp_result.sized_peaks) if is_verbosity(5): plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return ( [(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z )
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z): # this is another attempt to perform ladder - size standard alignment one peak by one while True: anchor_pairs = sorted(anchor_pairs) anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs) anchor_rtimes = list(anchor_rtimes) anchor_bpsizes = list(anchor_bpsizes) remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]] if not remaining_sizes: return pairs, z, rss, f current_sizes = [remaining_sizes[-1]] + anchor_bpsizes print('current_sizes:', current_sizes) f = ZFunc(peaks, current_sizes, anchor_pairs, estimate=True) zres = estimate_z(anchor_rtimes, anchor_bpsizes, 3) score, z = minimize_score(f, zres.z, 3) pairs, rss = f.get_pairs(z) plot(f.rtimes, f.sizes, z, pairs) anchor_pairs = pairs
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z): # this is another attempt to perform ladder - size standard alignment one peak by one while True: anchor_pairs = sorted(anchor_pairs) anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs ) anchor_rtimes = list(anchor_rtimes) anchor_bpsizes = list(anchor_bpsizes) remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]] if not remaining_sizes: return pairs, z, rss, f current_sizes = [ remaining_sizes[-1] ] + anchor_bpsizes print('current_sizes:', current_sizes) f = ZFunc(peaks, current_sizes, anchor_pairs, estimate=True) zres = estimate_z(anchor_rtimes, anchor_bpsizes, 3) score, z = minimize_score(f, zres.z, 3) pairs, rss = f.get_pairs(z) plot(f.rtimes, f.sizes, z, pairs ) anchor_pairs = pairs
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z): # this is another attempt to perform ladder - size standard alignment one peak by one anchor_pairs = sorted(anchor_pairs) anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs ) anchor_rtimes = list(anchor_rtimes) anchor_bpsizes = list(anchor_bpsizes) remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]] current_sizes = anchor_bpsizes zscore = estimate_z(anchor_rtimes, anchor_bpsizes, 3) z = zscore.z rss = zscore.rss f = ZFunc(peaks, current_sizes, anchor_pairs) while True: if not remaining_sizes: return pairs, z, rss, f current_sizes.insert(0, remaining_sizes.pop(-1)) f.set_sizes(current_sizes) score, next_z = minimize_score(f, z, 3) next_pairs, next_rss = f.get_pairs(next_z) # if delta rss (current rss - prev rss) is above certain threshold, # then assume the latest peak standar is not appropriate, and # use previous z and rss if (next_rss - rss) > 20: current_sizes.pop(0) else: z = next_z rss = next_rss pairs = next_pairs if is_verbosity(5): plot(f.rtimes, f.sizes, z, pairs )
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z): # this is another attempt to perform ladder - size standard alignment one peak by one anchor_pairs = sorted(anchor_pairs) anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs) anchor_rtimes = list(anchor_rtimes) anchor_bpsizes = list(anchor_bpsizes) remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]] current_sizes = anchor_bpsizes zscore = estimate_z(anchor_rtimes, anchor_bpsizes, 3) z = zscore.z rss = zscore.rss f = ZFunc(peaks, current_sizes, anchor_pairs) while True: if not remaining_sizes: return pairs, z, rss, f current_sizes.insert(0, remaining_sizes.pop(-1)) f.set_sizes(current_sizes) score, next_z = minimize_score(f, z, 3) next_pairs, next_rss = f.get_pairs(next_z) # if delta rss (current rss - prev rss) is above certain threshold, # then assume the latest peak standar is not appropriate, and # use previous z and rss if (next_rss - rss) > 20: current_sizes.pop(0) else: z = next_z rss = next_rss pairs = next_pairs if is_verbosity(5): plot(f.rtimes, f.sizes, z, pairs)
def align_pm(peaks, ladder, anchor_pairs=None): if not anchor_pairs: longest_rtime_peak = max([p.rtime for p in peaks]) if longest_rtime_peak > PEAK_RTIME_UPPER_BOUND: bound_adjust_ratio = longest_rtime_peak / PEAK_RTIME_UPPER_BOUND anchor_start = ANCHOR_RTIME_LOWER_BOUND * bound_adjust_ratio anchor_end = ANCHOR_RTIME_UPPER_BOUND * bound_adjust_ratio else: anchor_start = ANCHOR_RTIME_LOWER_BOUND anchor_end = ANCHOR_RTIME_UPPER_BOUND anchor_peaks = [ p for p in peaks if anchor_start < p.rtime < anchor_end ] anchor_pairs, initial_z = estimate_pm( anchor_peaks, ladder['signature'] ) else: rtimes, bpsizes = zip( *anchor_pairs ) initial_z = estimate_z(rtimes, bpsizes, 1) anchor_pairs.sort() pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z) #print(pairs) pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z) #print(rss) #plot(f.rtimes, f.sizes, z, pairs) # last dp dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss) if is_verbosity(1): import pprint; pprint.pprint(dp_result.sized_peaks) if is_verbosity(4): plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='strict') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict) score, msg = ladder['qcfunc'](dp_result, method='relax') return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax) f = ZFunc(peaks, ladder['sizes'], anchor_pairs) z = initial_z score = last_score = 0 last_z = None for order in [1, 2, 3]: last_rss = -1 rss = 0 niter = 0 while abs(rss - last_rss) > 1e-3: niter += 1 cverr(5, 'Iter: %d' % niter) cverr(5, z) score = f(z) if last_score and last_score < score: # score does not converge; just exit cverr(5, 'does not converge!') break pairs, cur_rss = f.get_pairs(z) rtimes, bpsizes = zip( *pairs ) zres = estimate_z(rtimes, bpsizes, order) last_z = z z = zres.z last_rss = rss rss = zres.rss cverr(5, rss) dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss) return align_gm2(peaks, ladder, anchor_pairs, dp_result.z) new_anchor_pairs = [] zf = np.poly1d(dp_result.z) for p in dp_result.sized_peaks: if (p[0] - zf(p[1]))**2 < 2: new_anchor_pairs.append( (p[1], p[0]) ) #import pprint; pprint.pprint(dp_result.sized_peaks) plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
def align_pm(peaks, ladder, anchor_pairs=None): if not anchor_pairs: longest_rtime_peak = max([p.rtime for p in peaks]) if longest_rtime_peak > PEAK_RTIME_UPPER_BOUND: bound_adjust_ratio = longest_rtime_peak / PEAK_RTIME_UPPER_BOUND anchor_start = ANCHOR_RTIME_LOWER_BOUND * bound_adjust_ratio anchor_end = ANCHOR_RTIME_UPPER_BOUND * bound_adjust_ratio else: anchor_start = ANCHOR_RTIME_LOWER_BOUND anchor_end = ANCHOR_RTIME_UPPER_BOUND anchor_peaks = [ p for p in peaks if anchor_start < p.rtime < anchor_end ] anchor_pairs, initial_z = estimate_pm(anchor_peaks, ladder['signature']) else: rtimes, bpsizes = zip(*anchor_pairs) initial_z = estimate_z(rtimes, bpsizes, 1) anchor_pairs.sort() pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z) #print(pairs) pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z) #print(rss) #plot(f.rtimes, f.sizes, z, pairs) # last dp dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss) if is_verbosity(1): import pprint pprint.pprint(dp_result.sized_peaks) if is_verbosity(4): plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='strict') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict) score, msg = ladder['qcfunc'](dp_result, method='relax') return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax) f = ZFunc(peaks, ladder['sizes'], anchor_pairs) z = initial_z score = last_score = 0 last_z = None for order in [1, 2, 3]: last_rss = -1 rss = 0 niter = 0 while abs(rss - last_rss) > 1e-3: niter += 1 cverr(5, 'Iter: %d' % niter) cverr(5, z) score = f(z) if last_score and last_score < score: # score does not converge; just exit cverr(5, 'does not converge!') break pairs, cur_rss = f.get_pairs(z) rtimes, bpsizes = zip(*pairs) zres = estimate_z(rtimes, bpsizes, order) last_z = z z = zres.z last_rss = rss rss = zres.rss cverr(5, rss) dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss) return align_gm2(peaks, ladder, anchor_pairs, dp_result.z) new_anchor_pairs = [] zf = np.poly1d(dp_result.z) for p in dp_result.sized_peaks: if (p[0] - zf(p[1]))**2 < 2: new_anchor_pairs.append((p[1], p[0])) #import pprint; pprint.pprint(dp_result.sized_peaks) plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
def align_pm(peaks, ladder, anchor_pairs=None): if not anchor_pairs: anchor_peaks = [p for p in peaks if 1500 < p.rtime < 5000] # this finds the pair of peaks that best match to the 2nd and next-to-last ladder steps, and # does a linear fit to the rest of peaks to find the peaks matched to ladder steps anchor_pairs, initial_z = estimate_pm(anchor_peaks, ladder['signature']) else: rtimes, bpsizes = zip(*anchor_pairs) initial_z = estimate_z(rtimes, bpsizes, 1) anchor_pairs.sort() # if the number of anchor pairs equals the number of ladder steps, no need to do pair matching if len(anchor_pairs) == len(ladder['sizes']): f = ZFunc(peaks, ladder['sizes'], anchor_pairs, estimate=True) anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs) zres = estimate_z(anchor_rtimes, anchor_bpsizes, 2) score, z = minimize_score(f, zres.z, 2) pairs, rss = f.get_pairs(z) else: pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z) if is_verbosity(4): print(pairs) pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, z) #print(rss) #plot(f.rtimes, f.sizes, z, pairs) # last dp dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss) if is_verbosity(4): import pprint pprint.pprint(dp_result.sized_peaks) plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='strict') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict) score, msg = ladder['qcfunc'](dp_result, method='relax') return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax) f = ZFunc(peaks, ladder['sizes'], anchor_pairs) z = initial_z score = last_score = 0 last_z = None for order in [1, 2, 3]: last_rss = -1 rss = 0 niter = 0 while abs(rss - last_rss) > 1e-3: niter += 1 print('Iter: %d' % niter) print(z) score = f(z) if last_score and last_score < score: # score does not converge; just exit print('does not converge!') break pairs, cur_rss = f.get_pairs(z) rtimes, bpsizes = zip(*pairs) zres = estimate_z(rtimes, bpsizes, order) last_z = z z = zres.z last_rss = rss rss = zres.rss print(rss) dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss) return align_gm2(peaks, ladder, anchor_pairs, dp_result.z) new_anchor_pairs = [] zf = np.poly1d(dp_result.z) for p in dp_result.sized_peaks: if (p[0] - zf(p[1]))**2 < 2: new_anchor_pairs.append((p[1], p[0])) if is_verbosity(4): #import pprint; pprint.pprint(dp_result.sized_peaks) plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
def align_pm(peaks, ladder, anchor_pairs=None): if not anchor_pairs: anchor_peaks = [p for p in peaks if 1500 < p.rtime < 5000] anchor_pairs, initial_z = estimate_pm(anchor_peaks, ladder['signature']) else: rtimes, bpsizes = zip(*anchor_pairs) initial_z = estimate_z(rtimes, bpsizes, 1) anchor_pairs.sort() pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z) pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z) #print(rss) #plot(f.rtimes, f.sizes, z, pairs) # last dp dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss) import pprint pprint.pprint(dp_result.sized_peaks) if is_verbosity(4): plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='strict') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict) score, msg = ladder['qcfunc'](dp_result, method='relax') return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax) f = ZFunc(peaks, ladder['sizes'], anchor_pairs) z = initial_z score = last_score = 0 last_z = None for order in [1, 2, 3]: last_rss = -1 rss = 0 niter = 0 while abs(rss - last_rss) > 1e-3: niter += 1 print('Iter: %d' % niter) print(z) score = f(z) if last_score and last_score < score: # score does not converge; just exit print('does not converge!') break pairs, cur_rss = f.get_pairs(z) rtimes, bpsizes = zip(*pairs) zres = estimate_z(rtimes, bpsizes, order) last_z = z z = zres.z last_rss = rss rss = zres.rss print(rss) dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss) return align_gm2(peaks, ladder, anchor_pairs, dp_result.z) new_anchor_pairs = [] zf = np.poly1d(dp_result.z) for p in dp_result.sized_peaks: if (p[0] - zf(p[1]))**2 < 2: new_anchor_pairs.append((p[1], p[0])) import pprint pprint.pprint(dp_result.sized_peaks) plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return align_gm(peaks, ladder, anchor_pairs, dp_result.z)