def get_initial_z(self): """ return (initial_z, initial_rss) based on anchor pairs """ # check which order we want to use for initial Z if ( (self.anchor_sizes[-1] - self.anchor_sizes[0]) > 0.2 * (self.sizes[-1] - self.sizes[0]) and len(self.anchor_pairs) >= 5 ): orders = [1, 2] else: orders = [1] zresults = [] for order in orders: zresult = estimate_z( self.anchor_rtimes, self.anchor_sizes, order ) zres = align_dp( self.rtimes, self.sizes, zresult.z, zresult.rss, order) zresults.append( zres ) zresults.sort( key = lambda x: x.rss) return zresults[0]
def estimate_pm(peaks, bpsizes): """ returns sorted list of bp sizes matched to peaks and fits """ rtimes = [p.rtime for p in peaks] rtime_points = prepare_rtimes(rtimes) bpsize_pair = [bpsizes[1], bpsizes[-2]] f = ZFunc(peaks, bpsizes, [], estimate=True) # find linear fit for pair of peaks best matched to 2nd and next-to-last ladder step scores = [] for rtime_pair in rtime_points: if rtime_pair[0] >= rtime_pair[1]: continue # y = ax + b # y1 = ax1 + b # y2 = ax2 + b # ------------ - # y1 - y2 = a(x1 - x2) # a = (y1 - y2)/(x1 - x2) # b = y1 - ax1 #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0]) #intercept = bpsize_pair[0] - slope * rtime_pair[0] #z = [ slope intercept ] # get the linear fit to this pair of peaks zres = estimate_z(rtime_pair, bpsize_pair, 1) # see how well all ladder peaks fit to this linear fit score = f(zres.z) scores.append((score, zres)) #plot(f.rtimes, f.sizes, zres.z, [] ) scores.sort(key=lambda x: x[0]) #import pprint; pprint.pprint(scores[:5]) zresult = scores[0][1] # check this linear fit dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z, zresult.rss) #import pprint; pprint.pprint(dp_result.sized_peaks) #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return ([(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z)
def align_de(peaks, ladder, initial_pair=[]): """ differential evolution method """ cerr('I: differential evolution method is running!') sizes = ladder['sizes'] f = ZFunc(peaks, sizes, initial_pair) bounds = [(-1e-10, 1e-10), (-1e-5, 1e-5), (0.01, 0.1), (-75, 75)] niter = 0 results = [] while niter < 3: #prev_rss = rss res = differential_evolution(f, bounds, tol=1e-5, mutation=(0.4, 1.5), popsize=30, recombination=0.8) pairs, final_rss = f.get_pairs(res.x) rtimes, bpsizes = zip(*pairs) zres = estimate_z(rtimes, bpsizes, niter if niter < 3 else 3) niter += 1 cerr('I: DE iter: %2d - pairs: %2d - Cur RSS: %6.2f' % (niter, len(pairs), zres.rss)) results.append(zres) if zres.rss < len(bpsizes) * 1.0: break results.sort(key=lambda x: x.rss) zres = results[0] # last dp dp_result = align_dp(f.rtimes, f.sizes, zres.z, zres.rss) #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) #import pprint; pprint.pprint(dp_result.sized_peaks) dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='relax') return AlignResult(score, msg, dp_result, const.alignmethod.de_relax)
def align_de( peaks, ladder, initial_pair=[] ): """ differential evolution method """ cerr('I: differential evolution method is running!') sizes = ladder['sizes'] f = ZFunc( peaks, sizes, initial_pair ) bounds = [ (-1e-10, 1e-10), (-1e-5, 1e-5), (0.01, 0.1), (-75, 75) ] niter = 0 results = [] while niter < 3: #prev_rss = rss res = differential_evolution(f, bounds, tol=1e-5, mutation=(0.4, 1.5), popsize=30, recombination=0.8) pairs, final_rss = f.get_pairs(res.x) rtimes, bpsizes = zip( *pairs) zres = estimate_z(rtimes, bpsizes, niter if niter < 3 else 3) niter += 1 cerr('I: DE iter: %2d - pairs: %2d - Cur RSS: %6.2f' % (niter, len(pairs), zres.rss)) results.append( zres ) if zres.rss < len(bpsizes) * 1.0: break results.sort( key = lambda x: x.rss ) zres = results[0] # last dp dp_result = align_dp(f.rtimes, f.sizes, zres.z, zres.rss) #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) #import pprint; pprint.pprint(dp_result.sized_peaks) dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='relax') return AlignResult(score, msg, dp_result, const.alignmethod.de_relax)
def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z): # this is another attempt to perform ladder - size standard alignment one peak by one anchor_pairs = sorted(anchor_pairs) anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs ) anchor_rtimes = list(anchor_rtimes) anchor_bpsizes = list(anchor_bpsizes) remaining_sizes = [x for x in ladder['sizes'] if x > anchor_bpsizes[-1]] current_sizes = anchor_bpsizes order = ladder['order'] zres = estimate_z(anchor_rtimes, anchor_bpsizes, order) z,rss = zres.z, zres.rss f = ZFunc(peaks, current_sizes, anchor_pairs) while remaining_sizes: current_sizes.append( remaining_sizes.pop(0) ) if ( remaining_sizes and (remaining_sizes[-1] - current_sizes[-1]) < 100 and (remaining_sizes[0] - current_sizes[-1]) < 11 ): current_sizes.append( remaining_sizes.pop(0) ) f.set_sizes(current_sizes) score, next_z = minimize_score(f, z, order) next_pairs, next_rss = f.get_pairs(z) if (next_rss - rss) < 70: z = next_z rss = next_rss pairs = next_pairs if is_verbosity(5): plot(f.rtimes, f.sizes, z, pairs ) # finalize the alignment with stringent criteria dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss) if dp_result.rss - rss > 50: return pairs, z, rss, f dp_pairs = [(x[1], x[0]) for x in dp_result.sized_peaks] if is_verbosity(5): plot(f.rtimes, f.sizes, dp_result.z, dp_pairs) return dp_pairs, dp_result.z, dp_result.rss, f
def estimate_pm(peaks, bpsizes): rtimes = [p.rtime for p in peaks] rtime_points = prepare_rtimes(rtimes) bpsize_pair = [bpsizes[1], bpsizes[-2]] f = ZFunc(peaks, bpsizes, [], estimate=True) scores = [] for rtime_pair in rtime_points: if rtime_pair[0] >= rtime_pair[1]: continue # y = ax + b # y1 = ax1 + b # y2 = ax2 + b # ------------ - # y1 - y2 = a(x1 - x2) # a = (y1 - y2)/(x1 - x2) # b = y1 - ax1 #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0]) #intercept = bpsize_pair[0] - slope * rtime_pair[0] #z = [ slope intercept ] zres = estimate_z(rtime_pair, bpsize_pair, 1) score = f(zres.z) scores.append((score, zres)) if is_verbosity(5): plot(f.rtimes, f.sizes, zres.z, []) scores.sort(key=lambda x: x[0]) #import pprint; pprint.pprint(scores[:5]) zresult = scores[0][1] dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z, zresult.rss) #import pprint; pprint.pprint(dp_result.sized_peaks) if is_verbosity(5): plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return ([(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z)
def estimate_pm(peaks, bpsizes): rtimes = [ p.rtime for p in peaks ] rtime_points = prepare_rtimes( rtimes ) bpsize_pair = [ bpsizes[1], bpsizes[-2]] f = ZFunc(peaks, bpsizes, [], estimate = True) scores = [] for rtime_pair in rtime_points: if rtime_pair[0] >= rtime_pair[1]: continue # y = ax + b # y1 = ax1 + b # y2 = ax2 + b # ------------ - # y1 - y2 = a(x1 - x2) # a = (y1 - y2)/(x1 - x2) # b = y1 - ax1 #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0]) #intercept = bpsize_pair[0] - slope * rtime_pair[0] #z = [ slope intercept ] zres = estimate_z(rtime_pair, bpsize_pair, 1) score = f(zres.z) scores.append( (score, zres) ) if is_verbosity(5): plot(f.rtimes, f.sizes, zres.z, [] ) scores.sort( key = lambda x: x[0] ) #import pprint; pprint.pprint(scores[:5]) zresult = scores[0][1] dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z, zresult.rss) #import pprint; pprint.pprint(dp_result.sized_peaks) if is_verbosity(5): plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return ( [(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z )
def align_pm(peaks, ladder, anchor_pairs=None): if not anchor_pairs: anchor_peaks = [p for p in peaks if 1500 < p.rtime < 5000] # this finds the pair of peaks that best match to the 2nd and next-to-last ladder steps, and # does a linear fit to the rest of peaks to find the peaks matched to ladder steps anchor_pairs, initial_z = estimate_pm(anchor_peaks, ladder['signature']) else: rtimes, bpsizes = zip(*anchor_pairs) initial_z = estimate_z(rtimes, bpsizes, 1) anchor_pairs.sort() # if the number of anchor pairs equals the number of ladder steps, no need to do pair matching if len(anchor_pairs) == len(ladder['sizes']): f = ZFunc(peaks, ladder['sizes'], anchor_pairs, estimate=True) anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs) zres = estimate_z(anchor_rtimes, anchor_bpsizes, 2) score, z = minimize_score(f, zres.z, 2) pairs, rss = f.get_pairs(z) else: pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z) if is_verbosity(4): print(pairs) pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, z) #print(rss) #plot(f.rtimes, f.sizes, z, pairs) # last dp dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss) if is_verbosity(4): import pprint pprint.pprint(dp_result.sized_peaks) plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='strict') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict) score, msg = ladder['qcfunc'](dp_result, method='relax') return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax) f = ZFunc(peaks, ladder['sizes'], anchor_pairs) z = initial_z score = last_score = 0 last_z = None for order in [1, 2, 3]: last_rss = -1 rss = 0 niter = 0 while abs(rss - last_rss) > 1e-3: niter += 1 print('Iter: %d' % niter) print(z) score = f(z) if last_score and last_score < score: # score does not converge; just exit print('does not converge!') break pairs, cur_rss = f.get_pairs(z) rtimes, bpsizes = zip(*pairs) zres = estimate_z(rtimes, bpsizes, order) last_z = z z = zres.z last_rss = rss rss = zres.rss print(rss) dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss) return align_gm2(peaks, ladder, anchor_pairs, dp_result.z) new_anchor_pairs = [] zf = np.poly1d(dp_result.z) for p in dp_result.sized_peaks: if (p[0] - zf(p[1]))**2 < 2: new_anchor_pairs.append((p[1], p[0])) if is_verbosity(4): #import pprint; pprint.pprint(dp_result.sized_peaks) plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
def align_pm(peaks, ladder, anchor_pairs=None): if not anchor_pairs: longest_rtime_peak = max([p.rtime for p in peaks]) if longest_rtime_peak > PEAK_RTIME_UPPER_BOUND: bound_adjust_ratio = longest_rtime_peak / PEAK_RTIME_UPPER_BOUND anchor_start = ANCHOR_RTIME_LOWER_BOUND * bound_adjust_ratio anchor_end = ANCHOR_RTIME_UPPER_BOUND * bound_adjust_ratio else: anchor_start = ANCHOR_RTIME_LOWER_BOUND anchor_end = ANCHOR_RTIME_UPPER_BOUND anchor_peaks = [ p for p in peaks if anchor_start < p.rtime < anchor_end ] anchor_pairs, initial_z = estimate_pm( anchor_peaks, ladder['signature'] ) else: rtimes, bpsizes = zip( *anchor_pairs ) initial_z = estimate_z(rtimes, bpsizes, 1) anchor_pairs.sort() pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z) #print(pairs) pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z) #print(rss) #plot(f.rtimes, f.sizes, z, pairs) # last dp dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss) if is_verbosity(1): import pprint; pprint.pprint(dp_result.sized_peaks) if is_verbosity(4): plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='strict') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict) score, msg = ladder['qcfunc'](dp_result, method='relax') return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax) f = ZFunc(peaks, ladder['sizes'], anchor_pairs) z = initial_z score = last_score = 0 last_z = None for order in [1, 2, 3]: last_rss = -1 rss = 0 niter = 0 while abs(rss - last_rss) > 1e-3: niter += 1 cverr(5, 'Iter: %d' % niter) cverr(5, z) score = f(z) if last_score and last_score < score: # score does not converge; just exit cverr(5, 'does not converge!') break pairs, cur_rss = f.get_pairs(z) rtimes, bpsizes = zip( *pairs ) zres = estimate_z(rtimes, bpsizes, order) last_z = z z = zres.z last_rss = rss rss = zres.rss cverr(5, rss) dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss) return align_gm2(peaks, ladder, anchor_pairs, dp_result.z) new_anchor_pairs = [] zf = np.poly1d(dp_result.z) for p in dp_result.sized_peaks: if (p[0] - zf(p[1]))**2 < 2: new_anchor_pairs.append( (p[1], p[0]) ) #import pprint; pprint.pprint(dp_result.sized_peaks) plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
def align_gm( peaks, ladder, anchor_pairs, z=None): cerr('I: generalized minimization method is running!') sizes = ladder['sizes'] f = ZFunc( peaks, sizes, anchor_pairs) if z is None: zresult = f.get_initial_z() z = zresult.z # try pair f #result = pair_f( np.poly1d(z), f.rtimes, sizes) #import pprint; pprint.pprint(result) rss = -1 prev_rss = 0 #print('>>> Initial rss: ', rss) #plot(f.rtimes, f.sizes, z, result) #minimizer_kwargs = {'method': 'BFGS'} #bounds = [ (-1e-10,1e-10), (-1e-5,1e-5), (0,1e-3), (-1e2,1e2) ] bounds = [ (-1e-10, 1e-10), (-1e-5, 1e-5), (0.05, 0.18), (-175, 10) ] niter = 1 results = [] while abs(rss - prev_rss) > 1e-3: prev_rss = rss #res = minimize(f, z, method='Powell', tol=1e-6) #res = minimize(f, z, method='SLSQP', tol = 1e-6, bounds=bounds) res = minimize(f, z, method='Nelder-Mead', tol=1e-6) pairs, final_rss = f.get_pairs(res.x) rtimes, bpsizes = zip( *pairs) zresult = estimate_z(rtimes, bpsizes, niter if niter < 3 else 3) rss = zresult.rss z = zresult.z cerr('I: GM iter: %2d - pairs: %2d - Cur RSS: %6.2f' % (niter, len(pairs), rss)) niter += 1 results.append( zresult ) #plot(f.rtimes, f.sizes, z, pairs) if rss < len(pairs) * 1.0: break # get the best result results.sort( key = lambda x: x.rss ) zresult = results[0] # last dp dp_result = align_dp(f.rtimes, f.sizes, zresult.z, zresult.rss) #import pprint; pprint.pprint(dp_result.sized_peaks) #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='strict') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.gm_strict) score, msg = ladder['qcfunc'](dp_result, method='relax') return AlignResult(score, msg, dp_result, const.alignmethod.gm_relax)
def align_pm(peaks, ladder, anchor_pairs=None): if not anchor_pairs: anchor_peaks = [p for p in peaks if 1500 < p.rtime < 5000] anchor_pairs, initial_z = estimate_pm(anchor_peaks, ladder['signature']) else: rtimes, bpsizes = zip(*anchor_pairs) initial_z = estimate_z(rtimes, bpsizes, 1) anchor_pairs.sort() pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z) pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z) #print(rss) #plot(f.rtimes, f.sizes, z, pairs) # last dp dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss) import pprint pprint.pprint(dp_result.sized_peaks) if is_verbosity(4): plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='strict') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict) score, msg = ladder['qcfunc'](dp_result, method='relax') return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax) f = ZFunc(peaks, ladder['sizes'], anchor_pairs) z = initial_z score = last_score = 0 last_z = None for order in [1, 2, 3]: last_rss = -1 rss = 0 niter = 0 while abs(rss - last_rss) > 1e-3: niter += 1 print('Iter: %d' % niter) print(z) score = f(z) if last_score and last_score < score: # score does not converge; just exit print('does not converge!') break pairs, cur_rss = f.get_pairs(z) rtimes, bpsizes = zip(*pairs) zres = estimate_z(rtimes, bpsizes, order) last_z = z z = zres.z last_rss = rss rss = zres.rss print(rss) dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss) return align_gm2(peaks, ladder, anchor_pairs, dp_result.z) new_anchor_pairs = [] zf = np.poly1d(dp_result.z) for p in dp_result.sized_peaks: if (p[0] - zf(p[1]))**2 < 2: new_anchor_pairs.append((p[1], p[0])) import pprint pprint.pprint(dp_result.sized_peaks) plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
def align_pm(peaks, ladder, anchor_pairs=None): if not anchor_pairs: longest_rtime_peak = max([p.rtime for p in peaks]) if longest_rtime_peak > PEAK_RTIME_UPPER_BOUND: bound_adjust_ratio = longest_rtime_peak / PEAK_RTIME_UPPER_BOUND anchor_start = ANCHOR_RTIME_LOWER_BOUND * bound_adjust_ratio anchor_end = ANCHOR_RTIME_UPPER_BOUND * bound_adjust_ratio else: anchor_start = ANCHOR_RTIME_LOWER_BOUND anchor_end = ANCHOR_RTIME_UPPER_BOUND anchor_peaks = [ p for p in peaks if anchor_start < p.rtime < anchor_end ] anchor_pairs, initial_z = estimate_pm(anchor_peaks, ladder['signature']) else: rtimes, bpsizes = zip(*anchor_pairs) initial_z = estimate_z(rtimes, bpsizes, 1) anchor_pairs.sort() pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z) #print(pairs) pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z) #print(rss) #plot(f.rtimes, f.sizes, z, pairs) # last dp dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss) if is_verbosity(1): import pprint pprint.pprint(dp_result.sized_peaks) if is_verbosity(4): plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='strict') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict) score, msg = ladder['qcfunc'](dp_result, method='relax') return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax) f = ZFunc(peaks, ladder['sizes'], anchor_pairs) z = initial_z score = last_score = 0 last_z = None for order in [1, 2, 3]: last_rss = -1 rss = 0 niter = 0 while abs(rss - last_rss) > 1e-3: niter += 1 cverr(5, 'Iter: %d' % niter) cverr(5, z) score = f(z) if last_score and last_score < score: # score does not converge; just exit cverr(5, 'does not converge!') break pairs, cur_rss = f.get_pairs(z) rtimes, bpsizes = zip(*pairs) zres = estimate_z(rtimes, bpsizes, order) last_z = z z = zres.z last_rss = rss rss = zres.rss cverr(5, rss) dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss) return align_gm2(peaks, ladder, anchor_pairs, dp_result.z) new_anchor_pairs = [] zf = np.poly1d(dp_result.z) for p in dp_result.sized_peaks: if (p[0] - zf(p[1]))**2 < 2: new_anchor_pairs.append((p[1], p[0])) #import pprint; pprint.pprint(dp_result.sized_peaks) plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks]) return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
def align_hc( peaks, ladder): """ peaks: list of rtime, in ascending order ladders: list of size from ladders, in ascending order returns: (score, msg, result, method) """ #import pprint; pprint.pprint(peaks) # generate P for ladder if 'C' not in ladder: if 'T' not in ladder: ladder['T'] = generate_tree( [ (n,0) for n in ladder['sizes'] ] ) ladder['C'] = generate_cluster(ladder['T'], ladder['k']) ladder_clusters = ladder['C'] ladder_sizes = ladder['sizes'] P = generate_tree( [ (n.rtime, 0) for n in peaks ] ) peak_clusters = generate_cluster( P, ladder['k'] ) # generate cluster should use so-called balance tree print(peak_clusters) if len(peak_clusters[-1]) == 1: if len( reduce(operator.add, peak_clusters ) ) > len(ladder_sizes): del peak_clusters[-1] #del peaks[-1] if len(peak_clusters[0]) == 1: if len( reduce(operator.add, peak_clusters ) ) > len(ladder_sizes): del peak_clusters[0] #del peaks[0] if len(peak_clusters) < ladder['k']: P = generate_tree( [ (n, 0) for n in reduce(operator.add, peak_clusters) ] ) peak_clusters = generate_cluster(P, ladder['k']) # short cut, in case we have good high quality peaks if sum( len(c) for c in peak_clusters ) == len(ladder_sizes): hq_peaks = sum(peak_clusters, []) #hq_pairs = zip(hq_peaks, ladder_sizes) zres = estimate_z(hq_peaks, ladder_sizes) dp_result = align_dp( hq_peaks, ladder_sizes, [1.0] * len(hq_peaks), zres.z, zres.rss ) dp_result.sized_peaks = pair_sized_peaks(peaks, dp_result.sized_peaks) score, msg = ladder['qcfunc']( dp_result, method = 'relax') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.hcm_strict) #print(">>> clusters:\n", peak_clusters) cluster_pairings, expected_missing = align_clusters( peak_clusters, ladder_clusters ) #print(">>> cluster pairs:\n", cluster_pairings) # check each cluster pairing initial_pairs = [] for pairs in cluster_pairings: if is_good_pairing(pairs): initial_pairs.extend( pairs ) else: cverr(3, '>> this pairings is not included:\n%s' % pairs) cverr(3, '>> initial pairs:\n%s' % initial_pairs) if not initial_pairs: return AlignResult(-1, 'E: no initial pairs defined!', None, None) # try to dp align the initial pairs as a shortcut for good sample or peaks rtimes, sizes = zip( *initial_pairs ) zres = estimate_z(rtimes, sizes) dp_result = align_dp( [p.rtime for p in peaks], ladder_sizes, generate_similarity(peaks), zres.z, zres.rss ) dp_result.sized_peaks = pair_sized_peaks(peaks, dp_result.sized_peaks) score, msg = ladder['qcfunc']( dp_result, method = 'strict') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.hcm_strict) return AlignResult(-1, 'ERR: alignment needs minimization', None, None, initial_pairs=initial_pairs)
def align_hc(peaks, ladder): """ peaks: list of rtime, in ascending order ladders: list of size from ladders, in ascending order returns: (score, msg, result, method) """ #import pprint; pprint.pprint(peaks) # generate P for ladder if 'C' not in ladder: if 'T' not in ladder: ladder['T'] = generate_tree([(n, 0) for n in ladder['sizes']]) ladder['C'] = generate_cluster(ladder['T'], ladder['k']) ladder_clusters = ladder['C'] ladder_sizes = ladder['sizes'] P = generate_tree([(n.rtime, 0) for n in peaks]) peak_clusters = generate_cluster(P, ladder['k']) # generate cluster should use so-called balance tree #print(peak_clusters) if len(peak_clusters[-1]) == 1: if len(reduce(operator.add, peak_clusters)) > len(ladder_sizes): del peak_clusters[-1] #del peaks[-1] if len(peak_clusters[0]) == 1: if len(reduce(operator.add, peak_clusters)) > len(ladder_sizes): del peak_clusters[0] #del peaks[0] if len(peak_clusters) < ladder['k']: P = generate_tree([(n, 0) for n in reduce(operator.add, peak_clusters)]) peak_clusters = generate_cluster(P, ladder['k']) # short cut, in case we have good high quality peaks if sum(len(c) for c in peak_clusters) == len(ladder_sizes): hq_peaks = sum(peak_clusters, []) #hq_pairs = zip(hq_peaks, ladder_sizes) zres = estimate_z(hq_peaks, ladder_sizes) dp_result = align_dp(hq_peaks, ladder_sizes, [1.0] * len(hq_peaks), zres.z, zres.rss) dp_result.sized_peaks = pair_sized_peaks(peaks, dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='relax') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.hcm_strict) #print(">>> clusters:\n", peak_clusters) cluster_pairings, expected_missing = align_clusters( peak_clusters, ladder_clusters) #print(">>> cluster pairs:\n", cluster_pairings) # check each cluster pairing initial_pairs = [] for pairs in cluster_pairings: if is_good_pairing(pairs): initial_pairs.extend(pairs) else: cverr(3, '>> this pairings is not included:\n%s' % pairs) cverr(3, '>> initial pairs:\n%s' % initial_pairs) if not initial_pairs: return AlignResult(-1, 'E: no initial pairs defined!', None, None) # try to dp align the initial pairs as a shortcut for good sample or peaks rtimes, sizes = zip(*initial_pairs) zres = estimate_z(rtimes, sizes) dp_result = align_dp([p.rtime for p in peaks], ladder_sizes, generate_similarity(peaks), zres.z, zres.rss) dp_result.sized_peaks = pair_sized_peaks(peaks, dp_result.sized_peaks) score, msg = ladder['qcfunc'](dp_result, method='strict') if score > 0.9: return AlignResult(score, msg, dp_result, const.alignmethod.hcm_strict) return AlignResult(-1, 'ERR: alignment needs minimization', None, None, initial_pairs=initial_pairs)