Python align_dp 예제들, fatools.lib.fautil.alignutils.align_dp Python 예제들

예제 #1

0

파일 보기

파일: gmalign.py 프로젝트: nebiolabs/fatools

    def get_initial_z(self):
        """
        return (initial_z, initial_rss) based on anchor pairs
        """

        # check which order we want to use for initial Z
        if (    (self.anchor_sizes[-1] - self.anchor_sizes[0]) >
                0.2 * (self.sizes[-1] - self.sizes[0])
                and len(self.anchor_pairs) >= 5
            ):
                orders = [1, 2]
        else:
                orders = [1]

        zresults = []
        for order in orders:
            zresult = estimate_z( self.anchor_rtimes, self.anchor_sizes, order )

            zres = align_dp(
                    self.rtimes, self.sizes, zresult.z, zresult.rss, order)

            zresults.append( zres )

        zresults.sort( key = lambda x: x.rss)
        return zresults[0]

예제 #2

0

파일 보기

파일: gmalign.py 프로젝트: edawine/fatools

    def get_initial_z(self):
        """
        return (initial_z, initial_rss) based on anchor pairs
        """

        # check which order we want to use for initial Z
        if (    (self.anchor_sizes[-1] - self.anchor_sizes[0]) >
                0.2 * (self.sizes[-1] - self.sizes[0])
                and len(self.anchor_pairs) >= 5
            ):
                orders = [1, 2]
        else:
                orders = [1]

        zresults = []
        for order in orders:
            zresult = estimate_z( self.anchor_rtimes, self.anchor_sizes, order )

            zres = align_dp(
                    self.rtimes, self.sizes, zresult.z, zresult.rss, order)

            zresults.append( zres )

        zresults.sort( key = lambda x: x.rss)
        return zresults[0]

예제 #3

0

파일 보기

파일: pmalign.py 프로젝트: nebiolabs/fatools

def estimate_pm(peaks, bpsizes):
    """
    returns sorted list of bp sizes matched to peaks and fits
    """

    rtimes = [p.rtime for p in peaks]

    rtime_points = prepare_rtimes(rtimes)
    bpsize_pair = [bpsizes[1], bpsizes[-2]]

    f = ZFunc(peaks, bpsizes, [], estimate=True)

    # find linear fit for pair of peaks best matched to 2nd and next-to-last ladder step
    scores = []
    for rtime_pair in rtime_points:

        if rtime_pair[0] >= rtime_pair[1]:
            continue

        # y = ax + b
        # y1 = ax1 + b
        # y2 = ax2 + b
        # ------------ -
        # y1 - y2 = a(x1 - x2)
        # a = (y1 - y2)/(x1 - x2)
        # b = y1 - ax1

        #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0])
        #intercept = bpsize_pair[0] - slope * rtime_pair[0]
        #z = [ slope intercept ]

        # get the linear fit to this pair of peaks
        zres = estimate_z(rtime_pair, bpsize_pair, 1)

        # see how well all ladder peaks fit to this linear fit
        score = f(zres.z)

        scores.append((score, zres))
        #plot(f.rtimes, f.sizes, zres.z, [] )

    scores.sort(key=lambda x: x[0])

    #import pprint; pprint.pprint(scores[:5])
    zresult = scores[0][1]

    # check this linear fit
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z,
                         zresult.rss)

    #import pprint; pprint.pprint(dp_result.sized_peaks)
    #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    return ([(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z)

예제 #4

0

파일 보기

def align_de(peaks, ladder, initial_pair=[]):
    """ differential evolution method
    """

    cerr('I: differential evolution method is running!')

    sizes = ladder['sizes']

    f = ZFunc(peaks, sizes, initial_pair)
    bounds = [(-1e-10, 1e-10), (-1e-5, 1e-5), (0.01, 0.1), (-75, 75)]

    niter = 0
    results = []

    while niter < 3:

        #prev_rss = rss

        res = differential_evolution(f,
                                     bounds,
                                     tol=1e-5,
                                     mutation=(0.4, 1.5),
                                     popsize=30,
                                     recombination=0.8)

        pairs, final_rss = f.get_pairs(res.x)
        rtimes, bpsizes = zip(*pairs)
        zres = estimate_z(rtimes, bpsizes, niter if niter < 3 else 3)

        niter += 1
        cerr('I: DE iter: %2d  - pairs: %2d  - Cur RSS: %6.2f' %
             (niter, len(pairs), zres.rss))
        results.append(zres)

        if zres.rss < len(bpsizes) * 1.0:
            break

    results.sort(key=lambda x: x.rss)
    zres = results[0]

    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, zres.z, zres.rss)
    #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])
    #import pprint; pprint.pprint(dp_result.sized_peaks)

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.de_relax)

예제 #5

0

파일 보기

파일: gmalign.py 프로젝트: edawine/fatools

def align_de( peaks, ladder, initial_pair=[] ):
    """ differential evolution method
    """

    cerr('I: differential evolution method is running!')

    sizes = ladder['sizes']

    f = ZFunc( peaks, sizes, initial_pair )
    bounds = [ (-1e-10, 1e-10), (-1e-5, 1e-5), (0.01, 0.1), (-75, 75) ]

    niter = 0
    results = []

    while niter < 3:

        #prev_rss = rss

        res = differential_evolution(f, bounds, tol=1e-5, mutation=(0.4, 1.5),
                popsize=30, recombination=0.8)

        pairs, final_rss = f.get_pairs(res.x)
        rtimes, bpsizes = zip( *pairs)
        zres = estimate_z(rtimes, bpsizes, niter if niter < 3 else 3)

        niter += 1
        cerr('I: DE iter: %2d  - pairs: %2d  - Cur RSS: %6.2f' % (niter, len(pairs), zres.rss))
        results.append( zres )

        if zres.rss < len(bpsizes) * 1.0:
            break

    results.sort( key = lambda x: x.rss )
    zres = results[0]

    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, zres.z, zres.rss)
    #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])
    #import pprint; pprint.pprint(dp_result.sized_peaks)

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.de_relax)

예제 #6

0

파일 보기

파일: pmalign.py 프로젝트: edawine/fatools

def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs )
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x > anchor_bpsizes[-1]]
    current_sizes = anchor_bpsizes
    order = ladder['order']
    zres = estimate_z(anchor_rtimes, anchor_bpsizes, order)
    z,rss = zres.z, zres.rss
    f = ZFunc(peaks, current_sizes, anchor_pairs)

    while remaining_sizes:

        current_sizes.append( remaining_sizes.pop(0) )
        if ( remaining_sizes and
             (remaining_sizes[-1] - current_sizes[-1]) < 100 and
             (remaining_sizes[0] - current_sizes[-1]) < 11 ):
            current_sizes.append( remaining_sizes.pop(0) )

        f.set_sizes(current_sizes)
        score, next_z = minimize_score(f, z, order)
        next_pairs, next_rss = f.get_pairs(z)

        if (next_rss - rss) < 70:
            z = next_z
            rss = next_rss
            pairs = next_pairs

        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs )

    # finalize the alignment with stringent criteria
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    if dp_result.rss - rss > 50:
        return pairs, z, rss, f
    dp_pairs = [(x[1], x[0]) for x in dp_result.sized_peaks]
    if is_verbosity(5):
        plot(f.rtimes, f.sizes, dp_result.z, dp_pairs)

    return dp_pairs, dp_result.z, dp_result.rss, f

예제 #7

0

파일 보기

파일: pmalign.py 프로젝트: kayDaramola/fatools

def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs )
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x > anchor_bpsizes[-1]]
    current_sizes = anchor_bpsizes
    order = ladder['order']
    zres = estimate_z(anchor_rtimes, anchor_bpsizes, order)
    z,rss = zres.z, zres.rss
    f = ZFunc(peaks, current_sizes, anchor_pairs)

    while remaining_sizes:

        current_sizes.append( remaining_sizes.pop(0) )
        if ( remaining_sizes and
             (remaining_sizes[-1] - current_sizes[-1]) < 100 and
             (remaining_sizes[0] - current_sizes[-1]) < 11 ):
            current_sizes.append( remaining_sizes.pop(0) )

        f.set_sizes(current_sizes)
        score, next_z = minimize_score(f, z, order)
        next_pairs, next_rss = f.get_pairs(z)

        if (next_rss - rss) < 70:
            z = next_z
            rss = next_rss
            pairs = next_pairs

        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs )

    # finalize the alignment with stringent criteria
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    if dp_result.rss - rss > 50:
        return pairs, z, rss, f
    dp_pairs = [(x[1], x[0]) for x in dp_result.sized_peaks]
    if is_verbosity(5):
        plot(f.rtimes, f.sizes, dp_result.z, dp_pairs)

    return dp_pairs, dp_result.z, dp_result.rss, f

예제 #8

0

파일 보기

def estimate_pm(peaks, bpsizes):

    rtimes = [p.rtime for p in peaks]

    rtime_points = prepare_rtimes(rtimes)
    bpsize_pair = [bpsizes[1], bpsizes[-2]]

    f = ZFunc(peaks, bpsizes, [], estimate=True)

    scores = []
    for rtime_pair in rtime_points:
        if rtime_pair[0] >= rtime_pair[1]:
            continue

        # y = ax + b
        # y1 = ax1 + b
        # y2 = ax2 + b
        # ------------ -
        # y1 - y2 = a(x1 - x2)
        # a = (y1 - y2)/(x1 - x2)
        # b = y1 - ax1

        #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0])
        #intercept = bpsize_pair[0] - slope * rtime_pair[0]
        #z = [ slope intercept ]
        zres = estimate_z(rtime_pair, bpsize_pair, 1)
        score = f(zres.z)
        scores.append((score, zres))
        if is_verbosity(5):
            plot(f.rtimes, f.sizes, zres.z, [])

    scores.sort(key=lambda x: x[0])
    #import pprint; pprint.pprint(scores[:5])
    zresult = scores[0][1]

    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z,
                         zresult.rss)
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(5):
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    return ([(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z)

예제 #9

0

파일 보기

파일: pmalign.py 프로젝트: edawine/fatools

def estimate_pm(peaks, bpsizes):

    rtimes = [ p.rtime for p in peaks ]

    rtime_points = prepare_rtimes( rtimes )
    bpsize_pair = [ bpsizes[1], bpsizes[-2]]

    f = ZFunc(peaks, bpsizes, [], estimate = True)

    scores = []
    for rtime_pair in rtime_points:
        if rtime_pair[0] >= rtime_pair[1]:
            continue

        # y = ax + b
        # y1 = ax1 + b
        # y2 = ax2 + b
        # ------------ -
        # y1 - y2 = a(x1 - x2)
        # a = (y1 - y2)/(x1 - x2)
        # b = y1 - ax1

        #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0])
        #intercept = bpsize_pair[0] - slope * rtime_pair[0]
        #z = [ slope intercept ]
        zres = estimate_z(rtime_pair, bpsize_pair, 1)
        score = f(zres.z)
        scores.append( (score, zres) )
        if is_verbosity(5):
            plot(f.rtimes, f.sizes, zres.z, [] )

    scores.sort( key = lambda x: x[0] )
    #import pprint; pprint.pprint(scores[:5])
    zresult = scores[0][1]

    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z, zresult.rss)
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(5):
        plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    return ( [(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z )

예제 #10

0

파일 보기

파일: pmalign.py 프로젝트: nebiolabs/fatools

def align_pm(peaks, ladder, anchor_pairs=None):

    if not anchor_pairs:
        anchor_peaks = [p for p in peaks if 1500 < p.rtime < 5000]

        # this finds the pair of peaks that best match to the 2nd and next-to-last ladder steps, and
        # does a linear fit to the rest of peaks to find the peaks matched to ladder steps
        anchor_pairs, initial_z = estimate_pm(anchor_peaks,
                                              ladder['signature'])

    else:
        rtimes, bpsizes = zip(*anchor_pairs)
        initial_z = estimate_z(rtimes, bpsizes, 1)

    anchor_pairs.sort()

    # if the number of anchor pairs equals the number of ladder steps, no need to do pair matching
    if len(anchor_pairs) == len(ladder['sizes']):

        f = ZFunc(peaks, ladder['sizes'], anchor_pairs, estimate=True)

        anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
        zres = estimate_z(anchor_rtimes, anchor_bpsizes, 2)
        score, z = minimize_score(f, zres.z, 2)
        pairs, rss = f.get_pairs(z)

    else:
        pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs,
                                          initial_z)
        if is_verbosity(4):
            print(pairs)
        pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs)
    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)

    if is_verbosity(4):
        import pprint
        pprint.pprint(dp_result.sized_peaks)
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax)

    f = ZFunc(peaks, ladder['sizes'], anchor_pairs)

    z = initial_z
    score = last_score = 0
    last_z = None

    for order in [1, 2, 3]:

        last_rss = -1
        rss = 0

        niter = 0
        while abs(rss - last_rss) > 1e-3:

            niter += 1
            print('Iter: %d' % niter)

            print(z)
            score = f(z)
            if last_score and last_score < score:
                # score does not converge; just exit
                print('does not converge!')
                break

            pairs, cur_rss = f.get_pairs(z)
            rtimes, bpsizes = zip(*pairs)
            zres = estimate_z(rtimes, bpsizes, order)

            last_z = z
            z = zres.z
            last_rss = rss
            rss = zres.rss
            print(rss)

    dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss)

    return align_gm2(peaks, ladder, anchor_pairs, dp_result.z)

    new_anchor_pairs = []
    zf = np.poly1d(dp_result.z)
    for p in dp_result.sized_peaks:
        if (p[0] - zf(p[1]))**2 < 2:
            new_anchor_pairs.append((p[1], p[0]))
    if is_verbosity(4):
        #import pprint; pprint.pprint(dp_result.sized_peaks)
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    return align_gm(peaks, ladder, anchor_pairs, dp_result.z)

예제 #11

0

파일 보기

파일: pmalign.py 프로젝트: edawine/fatools

def align_pm(peaks, ladder, anchor_pairs=None):

    if not anchor_pairs:
        longest_rtime_peak = max([p.rtime for p in peaks])
        if longest_rtime_peak > PEAK_RTIME_UPPER_BOUND:
            bound_adjust_ratio = longest_rtime_peak / PEAK_RTIME_UPPER_BOUND
            anchor_start = ANCHOR_RTIME_LOWER_BOUND * bound_adjust_ratio
            anchor_end = ANCHOR_RTIME_UPPER_BOUND * bound_adjust_ratio
        else:
            anchor_start = ANCHOR_RTIME_LOWER_BOUND
            anchor_end = ANCHOR_RTIME_UPPER_BOUND
        anchor_peaks = [ p for p in peaks if anchor_start < p.rtime < anchor_end ]
        anchor_pairs, initial_z = estimate_pm( anchor_peaks, ladder['signature'] )

    else:
        rtimes, bpsizes = zip( *anchor_pairs )
        initial_z = estimate_z(rtimes, bpsizes, 1)

    anchor_pairs.sort()
    pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z)
    #print(pairs)
    pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs)
    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    if is_verbosity(1):
        import pprint; pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(4):
        plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax)


    f = ZFunc(peaks, ladder['sizes'], anchor_pairs)

    z = initial_z
    score = last_score = 0
    last_z = None

    for order in [1, 2, 3]:

        last_rss = -1
        rss = 0

        niter = 0
        while abs(rss - last_rss) > 1e-3:

            niter += 1
            cverr(5, 'Iter: %d' % niter)

            cverr(5, z)
            score = f(z)
            if last_score and last_score < score:
                # score does not converge; just exit
                cverr(5, 'does not converge!')
                break

            pairs, cur_rss = f.get_pairs(z)
            rtimes, bpsizes = zip( *pairs )
            zres = estimate_z(rtimes, bpsizes, order)

            last_z = z
            z = zres.z
            last_rss = rss
            rss = zres.rss
            cverr(5, rss)

    dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss)

    return align_gm2(peaks, ladder, anchor_pairs, dp_result.z)



    new_anchor_pairs = []
    zf = np.poly1d(dp_result.z)
    for p in dp_result.sized_peaks:
        if (p[0] - zf(p[1]))**2 < 2:
            new_anchor_pairs.append( (p[1], p[0]) )
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    return align_gm(peaks, ladder, anchor_pairs, dp_result.z)

예제 #12

0

파일 보기

파일: gmalign.py 프로젝트: edawine/fatools

def align_gm( peaks, ladder, anchor_pairs, z=None):

    cerr('I: generalized minimization method is running!')

    sizes = ladder['sizes']

    f = ZFunc( peaks, sizes, anchor_pairs)
    if z is None:
        zresult = f.get_initial_z()
        z = zresult.z

    # try pair f
    #result = pair_f( np.poly1d(z), f.rtimes, sizes)
    #import pprint; pprint.pprint(result)

    rss = -1
    prev_rss = 0
    #print('>>> Initial rss: ', rss)
    #plot(f.rtimes, f.sizes, z, result)

    #minimizer_kwargs = {'method': 'BFGS'}
    #bounds = [ (-1e-10,1e-10), (-1e-5,1e-5), (0,1e-3), (-1e2,1e2) ]
    bounds = [ (-1e-10, 1e-10), (-1e-5, 1e-5), (0.05, 0.18), (-175, 10) ]

    niter = 1
    results = []
    while abs(rss - prev_rss) > 1e-3:

        prev_rss = rss

        #res = minimize(f, z, method='Powell', tol=1e-6)
        #res = minimize(f, z, method='SLSQP', tol = 1e-6, bounds=bounds)
        res = minimize(f, z, method='Nelder-Mead', tol=1e-6)

        pairs, final_rss = f.get_pairs(res.x)

        rtimes, bpsizes = zip( *pairs)
        zresult = estimate_z(rtimes, bpsizes, niter if niter < 3 else 3)
        rss = zresult.rss
        z = zresult.z
        cerr('I: GM iter: %2d  - pairs: %2d  - Cur RSS: %6.2f' % (niter, len(pairs), rss))
        niter += 1
        results.append( zresult )

        #plot(f.rtimes, f.sizes, z, pairs)

        if rss < len(pairs) * 1.0:
            break

    # get the best result
    results.sort( key = lambda x: x.rss )
    zresult = results[0]

    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, zresult.z, zresult.rss)
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.gm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.gm_relax)

예제 #13

0

파일 보기

파일: pmalign.py 프로젝트: bwlang/fatools

def align_pm(peaks, ladder, anchor_pairs=None):

    if not anchor_pairs:
        anchor_peaks = [p for p in peaks if 1500 < p.rtime < 5000]
        anchor_pairs, initial_z = estimate_pm(anchor_peaks,
                                              ladder['signature'])

    else:
        rtimes, bpsizes = zip(*anchor_pairs)
        initial_z = estimate_z(rtimes, bpsizes, 1)

    anchor_pairs.sort()
    pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z)
    pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs)
    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    import pprint
    pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(4):
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax)

    f = ZFunc(peaks, ladder['sizes'], anchor_pairs)

    z = initial_z
    score = last_score = 0
    last_z = None

    for order in [1, 2, 3]:

        last_rss = -1
        rss = 0

        niter = 0
        while abs(rss - last_rss) > 1e-3:

            niter += 1
            print('Iter: %d' % niter)

            print(z)
            score = f(z)
            if last_score and last_score < score:
                # score does not converge; just exit
                print('does not converge!')
                break

            pairs, cur_rss = f.get_pairs(z)
            rtimes, bpsizes = zip(*pairs)
            zres = estimate_z(rtimes, bpsizes, order)

            last_z = z
            z = zres.z
            last_rss = rss
            rss = zres.rss
            print(rss)

    dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss)

    return align_gm2(peaks, ladder, anchor_pairs, dp_result.z)

    new_anchor_pairs = []
    zf = np.poly1d(dp_result.z)
    for p in dp_result.sized_peaks:
        if (p[0] - zf(p[1]))**2 < 2:
            new_anchor_pairs.append((p[1], p[0]))
    import pprint
    pprint.pprint(dp_result.sized_peaks)
    plot(f.rtimes, f.sizes, dp_result.z,
         [(x[1], x[0]) for x in dp_result.sized_peaks])

    return align_gm(peaks, ladder, anchor_pairs, dp_result.z)

예제 #14

0

파일 보기

def align_pm(peaks, ladder, anchor_pairs=None):

    if not anchor_pairs:
        longest_rtime_peak = max([p.rtime for p in peaks])
        if longest_rtime_peak > PEAK_RTIME_UPPER_BOUND:
            bound_adjust_ratio = longest_rtime_peak / PEAK_RTIME_UPPER_BOUND
            anchor_start = ANCHOR_RTIME_LOWER_BOUND * bound_adjust_ratio
            anchor_end = ANCHOR_RTIME_UPPER_BOUND * bound_adjust_ratio
        else:
            anchor_start = ANCHOR_RTIME_LOWER_BOUND
            anchor_end = ANCHOR_RTIME_UPPER_BOUND
        anchor_peaks = [
            p for p in peaks if anchor_start < p.rtime < anchor_end
        ]
        anchor_pairs, initial_z = estimate_pm(anchor_peaks,
                                              ladder['signature'])

    else:
        rtimes, bpsizes = zip(*anchor_pairs)
        initial_z = estimate_z(rtimes, bpsizes, 1)

    anchor_pairs.sort()
    pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z)
    #print(pairs)
    pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs)
    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    if is_verbosity(1):
        import pprint
        pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(4):
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax)

    f = ZFunc(peaks, ladder['sizes'], anchor_pairs)

    z = initial_z
    score = last_score = 0
    last_z = None

    for order in [1, 2, 3]:

        last_rss = -1
        rss = 0

        niter = 0
        while abs(rss - last_rss) > 1e-3:

            niter += 1
            cverr(5, 'Iter: %d' % niter)

            cverr(5, z)
            score = f(z)
            if last_score and last_score < score:
                # score does not converge; just exit
                cverr(5, 'does not converge!')
                break

            pairs, cur_rss = f.get_pairs(z)
            rtimes, bpsizes = zip(*pairs)
            zres = estimate_z(rtimes, bpsizes, order)

            last_z = z
            z = zres.z
            last_rss = rss
            rss = zres.rss
            cverr(5, rss)

    dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss)

    return align_gm2(peaks, ladder, anchor_pairs, dp_result.z)

    new_anchor_pairs = []
    zf = np.poly1d(dp_result.z)
    for p in dp_result.sized_peaks:
        if (p[0] - zf(p[1]))**2 < 2:
            new_anchor_pairs.append((p[1], p[0]))
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    plot(f.rtimes, f.sizes, dp_result.z,
         [(x[1], x[0]) for x in dp_result.sized_peaks])

    return align_gm(peaks, ladder, anchor_pairs, dp_result.z)

예제 #15

0

파일 보기

파일: hcalign.py 프로젝트: edawine/fatools

def align_hc( peaks, ladder):
    """ peaks: list of rtime, in ascending order
        ladders: list of size from ladders, in ascending order

        returns: (score, msg, result, method)
    """

    #import pprint; pprint.pprint(peaks)

    # generate P for ladder
    if 'C' not in ladder:
        if 'T' not in ladder:
            ladder['T'] = generate_tree( [ (n,0) for n in ladder['sizes'] ] )
        ladder['C'] = generate_cluster(ladder['T'], ladder['k'])
    ladder_clusters = ladder['C']
    ladder_sizes = ladder['sizes']

    P = generate_tree( [ (n.rtime, 0) for n in peaks ] )
    peak_clusters = generate_cluster( P, ladder['k'] )

    # generate cluster should use so-called balance tree

    print(peak_clusters)

    if len(peak_clusters[-1]) == 1:
        if len( reduce(operator.add, peak_clusters ) ) > len(ladder_sizes):
            del peak_clusters[-1]
            #del peaks[-1]
    if len(peak_clusters[0]) == 1:
        if len( reduce(operator.add, peak_clusters ) ) > len(ladder_sizes):
            del peak_clusters[0]
            #del peaks[0]

    if len(peak_clusters) < ladder['k']:
        P = generate_tree( [ (n, 0) for n in reduce(operator.add, peak_clusters) ] )
        peak_clusters = generate_cluster(P, ladder['k'])

    # short cut, in case we have good high quality peaks
    if sum( len(c) for c in peak_clusters ) == len(ladder_sizes):
        hq_peaks = sum(peak_clusters, [])
        #hq_pairs = zip(hq_peaks, ladder_sizes)
        zres = estimate_z(hq_peaks, ladder_sizes)
        dp_result = align_dp( hq_peaks, ladder_sizes, [1.0] * len(hq_peaks),
                                    zres.z, zres.rss )
        dp_result.sized_peaks = pair_sized_peaks(peaks, dp_result.sized_peaks)
        score, msg = ladder['qcfunc']( dp_result, method = 'relax')
        if score > 0.9:
            return AlignResult(score, msg, dp_result, const.alignmethod.hcm_strict)

    #print(">>> clusters:\n", peak_clusters)
    cluster_pairings, expected_missing = align_clusters( peak_clusters,
            ladder_clusters )

    #print(">>> cluster pairs:\n", cluster_pairings)
    # check each cluster pairing

    initial_pairs = []
    for pairs in cluster_pairings:
        if is_good_pairing(pairs):
            initial_pairs.extend( pairs )
        else:
            cverr(3, '>> this pairings is not included:\n%s' % pairs)

    cverr(3, '>> initial pairs:\n%s' % initial_pairs)

    if not initial_pairs:
        return AlignResult(-1, 'E: no initial pairs defined!', None, None)

    # try to dp align the initial pairs as a shortcut for good sample or peaks

    rtimes, sizes = zip( *initial_pairs )
    zres = estimate_z(rtimes, sizes)

    dp_result = align_dp( [p.rtime for p in peaks], ladder_sizes,
                            generate_similarity(peaks), zres.z, zres.rss )
    dp_result.sized_peaks = pair_sized_peaks(peaks, dp_result.sized_peaks)
    score, msg = ladder['qcfunc']( dp_result, method = 'strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.hcm_strict)

    return AlignResult(-1, 'ERR: alignment needs minimization', None, None,
                initial_pairs=initial_pairs)

예제 #16

0

파일 보기

파일: gmalign.py 프로젝트: nebiolabs/fatools

def align_gm( peaks, ladder, anchor_pairs, z=None):

    cerr('I: generalized minimization method is running!')

    sizes = ladder['sizes']

    f = ZFunc( peaks, sizes, anchor_pairs)
    if z is None:
        zresult = f.get_initial_z()
        z = zresult.z

    # try pair f
    #result = pair_f( np.poly1d(z), f.rtimes, sizes)
    #import pprint; pprint.pprint(result)

    rss = -1
    prev_rss = 0
    #print('>>> Initial rss: ', rss)
    #plot(f.rtimes, f.sizes, z, result)

    #minimizer_kwargs = {'method': 'BFGS'}
    #bounds = [ (-1e-10,1e-10), (-1e-5,1e-5), (0,1e-3), (-1e2,1e2) ]
    bounds = [ (-1e-10, 1e-10), (-1e-5, 1e-5), (0.05, 0.18), (-175, 10) ]

    niter = 1
    results = []
    while abs(rss - prev_rss) > 1e-3:

        prev_rss = rss

        #res = minimize(f, z, method='Powell', tol=1e-6)
        #res = minimize(f, z, method='SLSQP', tol = 1e-6, bounds=bounds)
        res = minimize(f, z, method='Nelder-Mead', tol=1e-6)

        pairs, final_rss = f.get_pairs(res.x)

        rtimes, bpsizes = zip( *pairs)
        zresult = estimate_z(rtimes, bpsizes, niter if niter < 3 else 3)
        rss = zresult.rss
        z = zresult.z
        cerr('I: GM iter: %2d  - pairs: %2d  - Cur RSS: %6.2f' % (niter, len(pairs), rss))
        niter += 1
        results.append( zresult )

        #plot(f.rtimes, f.sizes, z, pairs)

        if rss < len(pairs) * 1.0:
            break

    # get the best result
    results.sort( key = lambda x: x.rss )
    zresult = results[0]

    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, zresult.z, zresult.rss)
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.gm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.gm_relax)

예제 #17

0

파일 보기

파일: hcalign.py 프로젝트: nebiolabs/fatools

def align_hc(peaks, ladder):
    """ peaks: list of rtime, in ascending order
        ladders: list of size from ladders, in ascending order

        returns: (score, msg, result, method)
    """

    #import pprint; pprint.pprint(peaks)

    # generate P for ladder
    if 'C' not in ladder:
        if 'T' not in ladder:
            ladder['T'] = generate_tree([(n, 0) for n in ladder['sizes']])
        ladder['C'] = generate_cluster(ladder['T'], ladder['k'])
    ladder_clusters = ladder['C']
    ladder_sizes = ladder['sizes']

    P = generate_tree([(n.rtime, 0) for n in peaks])
    peak_clusters = generate_cluster(P, ladder['k'])

    # generate cluster should use so-called balance tree

    #print(peak_clusters)

    if len(peak_clusters[-1]) == 1:
        if len(reduce(operator.add, peak_clusters)) > len(ladder_sizes):
            del peak_clusters[-1]
            #del peaks[-1]
    if len(peak_clusters[0]) == 1:
        if len(reduce(operator.add, peak_clusters)) > len(ladder_sizes):
            del peak_clusters[0]
            #del peaks[0]

    if len(peak_clusters) < ladder['k']:
        P = generate_tree([(n, 0)
                           for n in reduce(operator.add, peak_clusters)])
        peak_clusters = generate_cluster(P, ladder['k'])

    # short cut, in case we have good high quality peaks
    if sum(len(c) for c in peak_clusters) == len(ladder_sizes):
        hq_peaks = sum(peak_clusters, [])
        #hq_pairs = zip(hq_peaks, ladder_sizes)
        zres = estimate_z(hq_peaks, ladder_sizes)
        dp_result = align_dp(hq_peaks, ladder_sizes, [1.0] * len(hq_peaks),
                             zres.z, zres.rss)
        dp_result.sized_peaks = pair_sized_peaks(peaks, dp_result.sized_peaks)
        score, msg = ladder['qcfunc'](dp_result, method='relax')
        if score > 0.9:
            return AlignResult(score, msg, dp_result,
                               const.alignmethod.hcm_strict)

    #print(">>> clusters:\n", peak_clusters)
    cluster_pairings, expected_missing = align_clusters(
        peak_clusters, ladder_clusters)

    #print(">>> cluster pairs:\n", cluster_pairings)
    # check each cluster pairing

    initial_pairs = []
    for pairs in cluster_pairings:
        if is_good_pairing(pairs):
            initial_pairs.extend(pairs)
        else:
            cverr(3, '>> this pairings is not included:\n%s' % pairs)

    cverr(3, '>> initial pairs:\n%s' % initial_pairs)

    if not initial_pairs:
        return AlignResult(-1, 'E: no initial pairs defined!', None, None)

    # try to dp align the initial pairs as a shortcut for good sample or peaks

    rtimes, sizes = zip(*initial_pairs)
    zres = estimate_z(rtimes, sizes)

    dp_result = align_dp([p.rtime for p in peaks], ladder_sizes,
                         generate_similarity(peaks), zres.z, zres.rss)
    dp_result.sized_peaks = pair_sized_peaks(peaks, dp_result.sized_peaks)
    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.hcm_strict)

    return AlignResult(-1,
                       'ERR: alignment needs minimization',
                       None,
                       None,
                       initial_pairs=initial_pairs)