Ejemplo n.º 1
0
def estimate_de( peaks, sizes ):

    f = ZFunc( peaks, sizes, [], estimate=True )
    bounds = [ (0.01, 0.5), (-275, 75) ]


    niter = 0
    results = []

    while niter < 3:

        #prev_rss = rss

        res = differential_evolution(f, bounds, tol=1e-5, mutation=(0.3, 1.7),
                popsize=45, recombination=0.5, strategy='rand1bin')

        pairs, final_rss = f.get_pairs(res.x)
        pairs.sort()
        rtimes, bpsizes = zip( *pairs)
        zres = estimate_z(rtimes, bpsizes, 1)

        niter += 1
        cerr('I: DE iter: %2d  - pairs: %2d  - Cur RSS: %6.2f' % (niter, len(pairs), zres.rss))
        results.append( (zres, pairs ) )

        if zres.rss < len(bpsizes) * 1.0:
            break

    results.sort( key = lambda x: x[0].rss )
    zres, pairs = results[0]

    plot(f.rtimes, f.sizes, zres.z, pairs)

    return pairs, zres.z
Ejemplo n.º 2
0
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]]
    current_sizes = anchor_bpsizes
    z = estimate_z(anchor_rtimes, anchor_bpsizes, 3).z
    f = ZFunc(peaks, current_sizes, anchor_pairs, estimate=True)

    pairs, rss = f.get_pairs(z)

    while True:

        if not remaining_sizes:
            return pairs, z, rss, f

        current_sizes.insert(0, remaining_sizes.pop(-1))
        f.set_sizes(current_sizes)
        score, z = minimize_score(f, z, 3)
        pairs, rss = f.get_pairs(z)
        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs)
Ejemplo n.º 3
0
    def get_initial_z(self):
        """
        return (initial_z, initial_rss) based on anchor pairs
        """

        # check which order we want to use for initial Z
        if (    (self.anchor_sizes[-1] - self.anchor_sizes[0]) >
                0.2 * (self.sizes[-1] - self.sizes[0])
                and len(self.anchor_pairs) >= 5
            ):
                orders = [1, 2]
        else:
                orders = [1]

        zresults = []
        for order in orders:
            zresult = estimate_z( self.anchor_rtimes, self.anchor_sizes, order )

            zres = align_dp(
                    self.rtimes, self.sizes, zresult.z, zresult.rss, order)

            zresults.append( zres )

        zresults.sort( key = lambda x: x.rss)
        return zresults[0]
Ejemplo n.º 4
0
def estimate_de( peaks, sizes ):

    f = ZFunc( peaks, sizes, [], estimate=True )
    bounds = [ (0.01, 0.5), (-275, 75) ]


    niter = 0
    results = []

    while niter < 3:

        #prev_rss = rss

        res = differential_evolution(f, bounds, tol=1e-5, mutation=(0.3, 1.7),
                popsize=45, recombination=0.5, strategy='rand1bin')

        pairs, final_rss = f.get_pairs(res.x)
        pairs.sort()
        rtimes, bpsizes = zip( *pairs)
        zres = estimate_z(rtimes, bpsizes, 1)

        niter += 1
        cerr('I: DE iter: %2d  - pairs: %2d  - Cur RSS: %6.2f' % (niter, len(pairs), zres.rss))
        results.append( (zres, pairs ) )

        if zres.rss < len(bpsizes) * 1.0:
            break

    results.sort( key = lambda x: x[0].rss )
    zres, pairs = results[0]

    plot(f.rtimes, f.sizes, zres.z, pairs)

    return pairs, zres.z
Ejemplo n.º 5
0
    def get_initial_z(self):
        """
        return (initial_z, initial_rss) based on anchor pairs
        """

        # check which order we want to use for initial Z
        if (    (self.anchor_sizes[-1] - self.anchor_sizes[0]) >
                0.2 * (self.sizes[-1] - self.sizes[0])
                and len(self.anchor_pairs) >= 5
            ):
                orders = [1, 2]
        else:
                orders = [1]

        zresults = []
        for order in orders:
            zresult = estimate_z( self.anchor_rtimes, self.anchor_sizes, order )

            zres = align_dp(
                    self.rtimes, self.sizes, zresult.z, zresult.rss, order)

            zresults.append( zres )

        zresults.sort( key = lambda x: x.rss)
        return zresults[0]
Ejemplo n.º 6
0
def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x > anchor_bpsizes[-1]]
    current_sizes = anchor_bpsizes
    order = ladder['order']
    z = estimate_z(anchor_rtimes, anchor_bpsizes, order).z
    f = ZFunc(peaks, current_sizes, anchor_pairs, estimate=True)

    pairs, rss = f.get_pairs(z)

    while True:

        if not remaining_sizes:
            return pairs, z, rss, f

        current_sizes.append(remaining_sizes.pop(0))
        f.set_sizes(current_sizes)
        score, next_z = minimize_score(f, z, order)
        pairs, rss = f.get_pairs(z)
        if rss < 100:
            z = next_z
        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs)
Ejemplo n.º 7
0
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z):

    # anchor pairs must be in asceding order
    print(anchor_pairs)
    raise RuntimeError

    last_rtime = anchor_pairs[-1][0]
    last_size = anchor_pairs[-1][1]
    anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)

    # preparing all "lower peaks", including anchor peaks
    lower_peaks = [p for p in peaks if p.rtime <= last_rtime]
    lower_sizes = [s for s in ladder['sizes'] if s <= last_size]

    # we try to pair-minimize lower_peaks and lower_sizes

    f = ZFunc(peaks, ladder['sizes'], anchor_pairs, estimate=True)

    scores = []

    # check the first
    est_first_bpsize = np.poly1d(anchor_z)(lower_peaks[0].rtime)
    remaining_sizes = [s for s in lower_sizes if s >= est_first_bpsize]

    if remaining_sizes:
        first_bpsize = remaining_sizes[0]

        for first_peak in lower_peaks[:-2]:
            if first_peak.rtime >= anchor_pairs[0][0]:
                break

            for first_bpsize in ladder['sizes'][:2]:
                zres = estimate_z([first_peak.rtime] + anchor_rtimes,
                                  [first_bpsize] + anchor_bpsizes, 3)
                #print('rss:', zres.rss)
                #plot(f.rtimes, f.sizes, zres.z, [ (first_peak.rtime, first_bpsize), ] )
                score, z = minimize_score(f, zres.z, 3)

                scores.append((score, z))
                #plot(f.rtimes, f.sizes, z, [ (first_peak.rtime, first_bpsize), ] )

        scores.sort(key=lambda x: x[0])
        #import pprint; pprint.pprint( scores[:10] )

    if scores:
        z = scores[0][1]
        pairs, rss = f.get_pairs(z)
    else:
        z = anchor_z
        pairs = anchor_pairs
        rss = None

    #plot(f.rtimes, f.sizes, z, pairs )

    return pairs, z, rss, f

    raise RuntimeError
Ejemplo n.º 8
0
def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z):

    # anchor pairs must be in ascending order
    anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)

    # we try to pair-minimize higher peaks and sizes
    first_rtime = anchor_rtimes[0]
    first_bpsize = anchor_bpsizes[0]
    peaks = [p for p in peaks if p.rtime >= first_rtime]
    sizes = [s for s in ladder['sizes'] if s >= first_bpsize]
    remaining_sizes = [s for s in ladder['sizes'] if s > anchor_bpsizes[-1]]

    scores = []

    f = ZFunc(peaks, sizes, anchor_pairs, estimate=True)

    if remaining_sizes:

        #sizes = ladder['sizes']

        # check the first
        est_last_bpsize = np.poly1d(anchor_z)(peaks[-1].rtime)

        last_bpsize = max(remaining_sizes[1] if remaining_sizes else 0,
                          [s for s in sizes if s < est_last_bpsize][-3])

        for last_peak in reversed(peaks[-14:]):
            if last_peak.rtime <= anchor_pairs[-1][0]:
                break

            zres = estimate_z(anchor_rtimes + [last_peak.rtime],
                              anchor_bpsizes + [last_bpsize], 2)
            #plot(f.rtimes, f.sizes, zres.z, [ (last_peak.rtime, last_bpsize)] )
            score, z = minimize_score(f, zres.z, 2)
            #print(score)
            #plot(f.rtimes, f.sizes, z, [] )

            scores.append((score, z))

        scores.sort(key=lambda x: x[0])
        #import pprint; pprint.pprint( scores[:10] )

    if scores:
        z = scores[0][1]
        pairs, rss = f.get_pairs(z)
    else:
        z = anchor_z
        pairs = anchor_pairs
        rss = None

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs )

    return pairs, z, rss, f
Ejemplo n.º 9
0
def is_good_pairing(pairs):

    rtimes, bpsizes = zip(*pairs)
    zres = estimate_z(rtimes, bpsizes, 1)
    #f = np.poly1d(zres.z)
    #for (rtime, bpsize) in pairs:
    #    print(':', rtime, bpsize, (f(rtime)-bpsize)**2)
    #print(rss)
    # check if total rss is less than the threshold
    return (zres.rss < (0.5 * len(bpsizes)))
Ejemplo n.º 10
0
def is_good_pairing( pairs ):

    rtimes, bpsizes = zip( *pairs )
    zres = estimate_z(rtimes, bpsizes, 1)
    #f = np.poly1d(zres.z)
    #for (rtime, bpsize) in pairs:
    #    print(':', rtime, bpsize, (f(rtime)-bpsize)**2)
    #print(rss)
    # check if total rss is less than the threshold
    return (zres.rss < (0.5 * len(bpsizes)))
Ejemplo n.º 11
0
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z):

    # anchor pairs must be in asceding order
    print(anchor_pairs)
    raise RuntimeError

    last_rtime = anchor_pairs[-1][0]
    last_size = anchor_pairs[-1][1]
    anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs )
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)

    # preparing all "lower peaks", including anchor peaks
    lower_peaks= [ p for p in peaks if p.rtime <= last_rtime ]
    lower_sizes = [ s for s in ladder['sizes'] if s <= last_size]

    # we try to pair-minimize lower_peaks and lower_sizes

    f = ZFunc(peaks, ladder['sizes'], anchor_pairs, estimate=True)

    # check the first
    print(anchor_z)
    est_first_bpsize = anchor_z[2] * lower_peaks[0].rtime + anchor_z[3]
    print('est_first_bpsize:', est_first_bpsize)
    first_bpsize = [ s for s in lower_sizes if s >= est_first_bpsize ][0]

    scores = []
    for first_peak in lower_peaks[:-2]:
        if first_peak.rtime >= anchor_pairs[0][0]:
            break

        for first_bpsize in ladder['sizes'][:2]:
            zres = estimate_z( [ first_peak.rtime ] + anchor_rtimes, [ first_bpsize ] + anchor_bpsizes, 3 )
            #print('rss:', zres.rss)
            #plot(f.rtimes, f.sizes, zres.z, [ (first_peak.rtime, first_bpsize), ] )
            score, z = minimize_score(f, zres.z, 3)

            scores.append( (score, z) )
            #plot(f.rtimes, f.sizes, z, [ (first_peak.rtime, first_bpsize), ] )

    scores.sort( key = lambda x: x[0] )
    #import pprint; pprint.pprint( scores[:10] )

    z = scores[0][1]
    pairs, rss = f.get_pairs(z)

    print(rss)
    #plot(f.rtimes, f.sizes, z, pairs )

    return pairs, z, rss, f


    raise RuntimeError
Ejemplo n.º 12
0
def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z):

    # anchor pairs must be in asceding order


    anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs )
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)

    # we try to pair-minimize higher peaks and sizes

    first_rtime = anchor_rtimes[0]
    first_bpsize = anchor_bpsizes[0]
    peaks = [ p for p in peaks if p.rtime >= first_rtime]
    sizes = [ s for s in ladder['sizes'] if s >= first_bpsize]
    remaining_sizes = [ s for s in ladder['sizes'] if s > anchor_bpsizes[-1] ]

    #sizes = ladder['sizes']
    f = ZFunc(peaks, sizes, anchor_pairs, estimate=True)

    # check the first
    #print(peaks[-1])
    est_last_bpsize = np.poly1d(anchor_z)(peaks[-1].rtime)
    #est_last_bpsize = anchor_z[1] * peaks[-1].rtime**2 + anchor_z[2] * peaks[-1].rtime + anchor_z[3]
    #print(est_last_bpsize)
    last_bpsize = max( remaining_sizes[1], [ s for s in sizes if s < est_last_bpsize ][-3] )
    #print('last_bpsize:', last_bpsize)
    #plot(f.rtimes, f.sizes, anchor_z, [])

    scores = []
    #print(peaks)
    for last_peak in reversed(peaks[-14:]):
        if last_peak.rtime <= anchor_pairs[-1][0]:
            break

        zres = estimate_z(anchor_rtimes + [last_peak.rtime], anchor_bpsizes + [last_bpsize], 2)
        #plot(f.rtimes, f.sizes, zres.z, [ (last_peak.rtime, last_bpsize)] )
        score, z = minimize_score(f, zres.z, 2)
        #print(score)
        #plot(f.rtimes, f.sizes, z, [] )

        scores.append( (score, z) )

    scores.sort( key = lambda x: x[0] )
    #import pprint; pprint.pprint( scores[:10] )

    z = scores[0][1]
    pairs, rss = f.get_pairs(z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs )

    return pairs, z, rss, f
Ejemplo n.º 13
0
def estimate_pm(peaks, bpsizes):
    """
    returns sorted list of bp sizes matched to peaks and fits
    """

    rtimes = [p.rtime for p in peaks]

    rtime_points = prepare_rtimes(rtimes)
    bpsize_pair = [bpsizes[1], bpsizes[-2]]

    f = ZFunc(peaks, bpsizes, [], estimate=True)

    # find linear fit for pair of peaks best matched to 2nd and next-to-last ladder step
    scores = []
    for rtime_pair in rtime_points:

        if rtime_pair[0] >= rtime_pair[1]:
            continue

        # y = ax + b
        # y1 = ax1 + b
        # y2 = ax2 + b
        # ------------ -
        # y1 - y2 = a(x1 - x2)
        # a = (y1 - y2)/(x1 - x2)
        # b = y1 - ax1

        #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0])
        #intercept = bpsize_pair[0] - slope * rtime_pair[0]
        #z = [ slope intercept ]

        # get the linear fit to this pair of peaks
        zres = estimate_z(rtime_pair, bpsize_pair, 1)

        # see how well all ladder peaks fit to this linear fit
        score = f(zres.z)

        scores.append((score, zres))
        #plot(f.rtimes, f.sizes, zres.z, [] )

    scores.sort(key=lambda x: x[0])

    #import pprint; pprint.pprint(scores[:5])
    zresult = scores[0][1]

    # check this linear fit
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z,
                         zresult.rss)

    #import pprint; pprint.pprint(dp_result.sized_peaks)
    #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    return ([(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z)
Ejemplo n.º 14
0
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z):

    # anchor pairs must be in asceding order

    last_rtime = anchor_pairs[-1][0]
    last_size = anchor_pairs[-1][1]
    anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)

    lower_peaks = [p for p in peaks if p.rtime <= last_rtime]
    lower_sizes = [s for s in ladder['sizes'] if s <= last_size]

    # we try to pair-minimize lower_peaks and lower_sizes

    f = ZFunc(peaks, ladder['sizes'], anchor_pairs, estimate=True)

    # check the first
    print(anchor_z)
    est_first_bpsize = anchor_z[2] * lower_peaks[0].rtime + anchor_z[3]
    print('est_first_bpsize:', est_first_bpsize)
    first_bpsize = [s for s in lower_sizes if s >= est_first_bpsize][0]

    scores = []
    for first_peak in lower_peaks[:-2]:
        if first_peak.rtime >= anchor_pairs[0][0]:
            break

        for first_bpsize in ladder['sizes'][:2]:
            zres = estimate_z([first_peak.rtime] + anchor_rtimes,
                              [first_bpsize] + anchor_bpsizes, 3)
            #print('rss:', zres.rss)
            #plot(f.rtimes, f.sizes, zres.z, [ (first_peak.rtime, first_bpsize), ] )
            score, z = minimize_score(f, zres.z, 3)

            scores.append((score, z))
            #plot(f.rtimes, f.sizes, z, [ (first_peak.rtime, first_bpsize), ] )

    scores.sort(key=lambda x: x[0])
    #import pprint; pprint.pprint( scores[:10] )

    z = scores[0][1]
    pairs, rss = f.get_pairs(z)

    print(rss)
    #plot(f.rtimes, f.sizes, z, pairs )

    return pairs, z, rss, f

    raise RuntimeError
Ejemplo n.º 15
0
def align_de(peaks, ladder, initial_pair=[]):
    """ differential evolution method
    """

    cerr('I: differential evolution method is running!')

    sizes = ladder['sizes']

    f = ZFunc(peaks, sizes, initial_pair)
    bounds = [(-1e-10, 1e-10), (-1e-5, 1e-5), (0.01, 0.1), (-75, 75)]

    niter = 0
    results = []

    while niter < 3:

        #prev_rss = rss

        res = differential_evolution(f,
                                     bounds,
                                     tol=1e-5,
                                     mutation=(0.4, 1.5),
                                     popsize=30,
                                     recombination=0.8)

        pairs, final_rss = f.get_pairs(res.x)
        rtimes, bpsizes = zip(*pairs)
        zres = estimate_z(rtimes, bpsizes, niter if niter < 3 else 3)

        niter += 1
        cerr('I: DE iter: %2d  - pairs: %2d  - Cur RSS: %6.2f' %
             (niter, len(pairs), zres.rss))
        results.append(zres)

        if zres.rss < len(bpsizes) * 1.0:
            break

    results.sort(key=lambda x: x.rss)
    zres = results[0]

    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, zres.z, zres.rss)
    #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])
    #import pprint; pprint.pprint(dp_result.sized_peaks)

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.de_relax)
Ejemplo n.º 16
0
def align_de( peaks, ladder, initial_pair=[] ):
    """ differential evolution method
    """

    cerr('I: differential evolution method is running!')

    sizes = ladder['sizes']

    f = ZFunc( peaks, sizes, initial_pair )
    bounds = [ (-1e-10, 1e-10), (-1e-5, 1e-5), (0.01, 0.1), (-75, 75) ]

    niter = 0
    results = []

    while niter < 3:

        #prev_rss = rss

        res = differential_evolution(f, bounds, tol=1e-5, mutation=(0.4, 1.5),
                popsize=30, recombination=0.8)

        pairs, final_rss = f.get_pairs(res.x)
        rtimes, bpsizes = zip( *pairs)
        zres = estimate_z(rtimes, bpsizes, niter if niter < 3 else 3)

        niter += 1
        cerr('I: DE iter: %2d  - pairs: %2d  - Cur RSS: %6.2f' % (niter, len(pairs), zres.rss))
        results.append( zres )

        if zres.rss < len(bpsizes) * 1.0:
            break

    results.sort( key = lambda x: x.rss )
    zres = results[0]

    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, zres.z, zres.rss)
    #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])
    #import pprint; pprint.pprint(dp_result.sized_peaks)

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.de_relax)
Ejemplo n.º 17
0
def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs )
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x > anchor_bpsizes[-1]]
    current_sizes = anchor_bpsizes
    order = ladder['order']
    zres = estimate_z(anchor_rtimes, anchor_bpsizes, order)
    z,rss = zres.z, zres.rss
    f = ZFunc(peaks, current_sizes, anchor_pairs)

    while remaining_sizes:

        current_sizes.append( remaining_sizes.pop(0) )
        if ( remaining_sizes and
             (remaining_sizes[-1] - current_sizes[-1]) < 100 and
             (remaining_sizes[0] - current_sizes[-1]) < 11 ):
            current_sizes.append( remaining_sizes.pop(0) )

        f.set_sizes(current_sizes)
        score, next_z = minimize_score(f, z, order)
        next_pairs, next_rss = f.get_pairs(z)

        if (next_rss - rss) < 70:
            z = next_z
            rss = next_rss
            pairs = next_pairs

        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs )

    # finalize the alignment with stringent criteria
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    if dp_result.rss - rss > 50:
        return pairs, z, rss, f
    dp_pairs = [(x[1], x[0]) for x in dp_result.sized_peaks]
    if is_verbosity(5):
        plot(f.rtimes, f.sizes, dp_result.z, dp_pairs)

    return dp_pairs, dp_result.z, dp_result.rss, f
Ejemplo n.º 18
0
def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs )
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x > anchor_bpsizes[-1]]
    current_sizes = anchor_bpsizes
    order = ladder['order']
    zres = estimate_z(anchor_rtimes, anchor_bpsizes, order)
    z,rss = zres.z, zres.rss
    f = ZFunc(peaks, current_sizes, anchor_pairs)

    while remaining_sizes:

        current_sizes.append( remaining_sizes.pop(0) )
        if ( remaining_sizes and
             (remaining_sizes[-1] - current_sizes[-1]) < 100 and
             (remaining_sizes[0] - current_sizes[-1]) < 11 ):
            current_sizes.append( remaining_sizes.pop(0) )

        f.set_sizes(current_sizes)
        score, next_z = minimize_score(f, z, order)
        next_pairs, next_rss = f.get_pairs(z)

        if (next_rss - rss) < 70:
            z = next_z
            rss = next_rss
            pairs = next_pairs

        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs )

    # finalize the alignment with stringent criteria
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    if dp_result.rss - rss > 50:
        return pairs, z, rss, f
    dp_pairs = [(x[1], x[0]) for x in dp_result.sized_peaks]
    if is_verbosity(5):
        plot(f.rtimes, f.sizes, dp_result.z, dp_pairs)

    return dp_pairs, dp_result.z, dp_result.rss, f
Ejemplo n.º 19
0
def estimate_pm(peaks, bpsizes):

    rtimes = [p.rtime for p in peaks]

    rtime_points = prepare_rtimes(rtimes)
    bpsize_pair = [bpsizes[1], bpsizes[-2]]

    f = ZFunc(peaks, bpsizes, [], estimate=True)

    scores = []
    for rtime_pair in rtime_points:
        if rtime_pair[0] >= rtime_pair[1]:
            continue

        # y = ax + b
        # y1 = ax1 + b
        # y2 = ax2 + b
        # ------------ -
        # y1 - y2 = a(x1 - x2)
        # a = (y1 - y2)/(x1 - x2)
        # b = y1 - ax1

        #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0])
        #intercept = bpsize_pair[0] - slope * rtime_pair[0]
        #z = [ slope intercept ]
        zres = estimate_z(rtime_pair, bpsize_pair, 1)
        score = f(zres.z)
        scores.append((score, zres))
        if is_verbosity(5):
            plot(f.rtimes, f.sizes, zres.z, [])

    scores.sort(key=lambda x: x[0])
    #import pprint; pprint.pprint(scores[:5])
    zresult = scores[0][1]

    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z,
                         zresult.rss)
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(5):
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    return ([(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z)
Ejemplo n.º 20
0
def estimate_pm(peaks, bpsizes):

    rtimes = [ p.rtime for p in peaks ]

    rtime_points = prepare_rtimes( rtimes )
    bpsize_pair = [ bpsizes[1], bpsizes[-2]]

    f = ZFunc(peaks, bpsizes, [], estimate = True)

    scores = []
    for rtime_pair in rtime_points:
        if rtime_pair[0] >= rtime_pair[1]:
            continue

        # y = ax + b
        # y1 = ax1 + b
        # y2 = ax2 + b
        # ------------ -
        # y1 - y2 = a(x1 - x2)
        # a = (y1 - y2)/(x1 - x2)
        # b = y1 - ax1

        #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0])
        #intercept = bpsize_pair[0] - slope * rtime_pair[0]
        #z = [ slope intercept ]
        zres = estimate_z(rtime_pair, bpsize_pair, 1)
        score = f(zres.z)
        scores.append( (score, zres) )
        if is_verbosity(5):
            plot(f.rtimes, f.sizes, zres.z, [] )

    scores.sort( key = lambda x: x[0] )
    #import pprint; pprint.pprint(scores[:5])
    zresult = scores[0][1]

    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z, zresult.rss)
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(5):
        plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    return ( [(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z )
Ejemplo n.º 21
0
def minimize_score(f, z, order):

    last_score = score = 0

    niter = 1
    while niter < 50:

        score = f(z)
        #print(score)

        if last_score and abs(last_score - score) < 1e-6:
            break

        pairs, rss = f.get_pairs(z)
        rtimes, bpsizes = zip(*pairs)
        zres = estimate_z(rtimes, bpsizes, order)

        z = zres.z
        last_score = score
        niter += 1

    return last_score, z
Ejemplo n.º 22
0
def minimize_score( f, z, order ):

    last_score = score = 0

    niter = 1
    while niter  < 50:

        score = f(z)
        #print(score)

        if last_score and abs(last_score - score) < 1e-6:
            break

        pairs, rss = f.get_pairs(z)
        rtimes, bpsizes = zip( *pairs )
        zres = estimate_z(rtimes, bpsizes, order)

        z = zres.z
        last_score = score
        niter += 1

    return last_score, z
Ejemplo n.º 23
0
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    while True:

        anchor_pairs = sorted(anchor_pairs)
        anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
        anchor_rtimes = list(anchor_rtimes)
        anchor_bpsizes = list(anchor_bpsizes)
        remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]]
        if not remaining_sizes:
            return pairs, z, rss, f

        current_sizes = [remaining_sizes[-1]] + anchor_bpsizes
        print('current_sizes:', current_sizes)
        f = ZFunc(peaks, current_sizes, anchor_pairs, estimate=True)
        zres = estimate_z(anchor_rtimes, anchor_bpsizes, 3)
        score, z = minimize_score(f, zres.z, 3)
        pairs, rss = f.get_pairs(z)
        plot(f.rtimes, f.sizes, z, pairs)
        anchor_pairs = pairs
Ejemplo n.º 24
0
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one


    while True:

        anchor_pairs = sorted(anchor_pairs)
        anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs )
        anchor_rtimes = list(anchor_rtimes)
        anchor_bpsizes = list(anchor_bpsizes)
        remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]]
        if not remaining_sizes:
            return pairs, z, rss, f

        current_sizes = [ remaining_sizes[-1] ] + anchor_bpsizes
        print('current_sizes:', current_sizes)
        f = ZFunc(peaks, current_sizes, anchor_pairs, estimate=True)
        zres = estimate_z(anchor_rtimes, anchor_bpsizes, 3)
        score, z = minimize_score(f, zres.z, 3)
        pairs, rss = f.get_pairs(z)
        plot(f.rtimes, f.sizes, z, pairs )
        anchor_pairs = pairs
Ejemplo n.º 25
0
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs )
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]]
    current_sizes = anchor_bpsizes
    zscore = estimate_z(anchor_rtimes, anchor_bpsizes, 3)
    z = zscore.z
    rss = zscore.rss
    f = ZFunc(peaks, current_sizes, anchor_pairs)

    while True:

        if not remaining_sizes:
            return pairs, z, rss, f

        current_sizes.insert(0, remaining_sizes.pop(-1))
        f.set_sizes(current_sizes)
        score, next_z = minimize_score(f, z, 3)
        next_pairs, next_rss = f.get_pairs(next_z)

        # if delta rss (current rss - prev rss) is above certain threshold,
        # then assume the latest peak standar is not appropriate, and
        # use previous z and rss
        if (next_rss - rss) > 20:
            current_sizes.pop(0)
        else:
            z = next_z
            rss = next_rss
            pairs = next_pairs

        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs )
Ejemplo n.º 26
0
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]]
    current_sizes = anchor_bpsizes
    zscore = estimate_z(anchor_rtimes, anchor_bpsizes, 3)
    z = zscore.z
    rss = zscore.rss
    f = ZFunc(peaks, current_sizes, anchor_pairs)

    while True:

        if not remaining_sizes:
            return pairs, z, rss, f

        current_sizes.insert(0, remaining_sizes.pop(-1))
        f.set_sizes(current_sizes)
        score, next_z = minimize_score(f, z, 3)
        next_pairs, next_rss = f.get_pairs(next_z)

        # if delta rss (current rss - prev rss) is above certain threshold,
        # then assume the latest peak standar is not appropriate, and
        # use previous z and rss
        if (next_rss - rss) > 20:
            current_sizes.pop(0)
        else:
            z = next_z
            rss = next_rss
            pairs = next_pairs

        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs)
Ejemplo n.º 27
0
def align_pm(peaks, ladder, anchor_pairs=None):

    if not anchor_pairs:
        anchor_peaks = [p for p in peaks if 1500 < p.rtime < 5000]

        # this finds the pair of peaks that best match to the 2nd and next-to-last ladder steps, and
        # does a linear fit to the rest of peaks to find the peaks matched to ladder steps
        anchor_pairs, initial_z = estimate_pm(anchor_peaks,
                                              ladder['signature'])

    else:
        rtimes, bpsizes = zip(*anchor_pairs)
        initial_z = estimate_z(rtimes, bpsizes, 1)

    anchor_pairs.sort()

    # if the number of anchor pairs equals the number of ladder steps, no need to do pair matching
    if len(anchor_pairs) == len(ladder['sizes']):

        f = ZFunc(peaks, ladder['sizes'], anchor_pairs, estimate=True)

        anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
        zres = estimate_z(anchor_rtimes, anchor_bpsizes, 2)
        score, z = minimize_score(f, zres.z, 2)
        pairs, rss = f.get_pairs(z)

    else:
        pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs,
                                          initial_z)
        if is_verbosity(4):
            print(pairs)
        pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs)
    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)

    if is_verbosity(4):
        import pprint
        pprint.pprint(dp_result.sized_peaks)
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax)

    f = ZFunc(peaks, ladder['sizes'], anchor_pairs)

    z = initial_z
    score = last_score = 0
    last_z = None

    for order in [1, 2, 3]:

        last_rss = -1
        rss = 0

        niter = 0
        while abs(rss - last_rss) > 1e-3:

            niter += 1
            print('Iter: %d' % niter)

            print(z)
            score = f(z)
            if last_score and last_score < score:
                # score does not converge; just exit
                print('does not converge!')
                break

            pairs, cur_rss = f.get_pairs(z)
            rtimes, bpsizes = zip(*pairs)
            zres = estimate_z(rtimes, bpsizes, order)

            last_z = z
            z = zres.z
            last_rss = rss
            rss = zres.rss
            print(rss)

    dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss)

    return align_gm2(peaks, ladder, anchor_pairs, dp_result.z)

    new_anchor_pairs = []
    zf = np.poly1d(dp_result.z)
    for p in dp_result.sized_peaks:
        if (p[0] - zf(p[1]))**2 < 2:
            new_anchor_pairs.append((p[1], p[0]))
    if is_verbosity(4):
        #import pprint; pprint.pprint(dp_result.sized_peaks)
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
Ejemplo n.º 28
0
def align_hc( peaks, ladder):
    """ peaks: list of rtime, in ascending order
        ladders: list of size from ladders, in ascending order

        returns: (score, msg, result, method)
    """

    #import pprint; pprint.pprint(peaks)

    # generate P for ladder
    if 'C' not in ladder:
        if 'T' not in ladder:
            ladder['T'] = generate_tree( [ (n,0) for n in ladder['sizes'] ] )
        ladder['C'] = generate_cluster(ladder['T'], ladder['k'])
    ladder_clusters = ladder['C']
    ladder_sizes = ladder['sizes']

    P = generate_tree( [ (n.rtime, 0) for n in peaks ] )
    peak_clusters = generate_cluster( P, ladder['k'] )

    # generate cluster should use so-called balance tree

    print(peak_clusters)

    if len(peak_clusters[-1]) == 1:
        if len( reduce(operator.add, peak_clusters ) ) > len(ladder_sizes):
            del peak_clusters[-1]
            #del peaks[-1]
    if len(peak_clusters[0]) == 1:
        if len( reduce(operator.add, peak_clusters ) ) > len(ladder_sizes):
            del peak_clusters[0]
            #del peaks[0]

    if len(peak_clusters) < ladder['k']:
        P = generate_tree( [ (n, 0) for n in reduce(operator.add, peak_clusters) ] )
        peak_clusters = generate_cluster(P, ladder['k'])

    # short cut, in case we have good high quality peaks
    if sum( len(c) for c in peak_clusters ) == len(ladder_sizes):
        hq_peaks = sum(peak_clusters, [])
        #hq_pairs = zip(hq_peaks, ladder_sizes)
        zres = estimate_z(hq_peaks, ladder_sizes)
        dp_result = align_dp( hq_peaks, ladder_sizes, [1.0] * len(hq_peaks),
                                    zres.z, zres.rss )
        dp_result.sized_peaks = pair_sized_peaks(peaks, dp_result.sized_peaks)
        score, msg = ladder['qcfunc']( dp_result, method = 'relax')
        if score > 0.9:
            return AlignResult(score, msg, dp_result, const.alignmethod.hcm_strict)

    #print(">>> clusters:\n", peak_clusters)
    cluster_pairings, expected_missing = align_clusters( peak_clusters,
            ladder_clusters )

    #print(">>> cluster pairs:\n", cluster_pairings)
    # check each cluster pairing

    initial_pairs = []
    for pairs in cluster_pairings:
        if is_good_pairing(pairs):
            initial_pairs.extend( pairs )
        else:
            cverr(3, '>> this pairings is not included:\n%s' % pairs)

    cverr(3, '>> initial pairs:\n%s' % initial_pairs)

    if not initial_pairs:
        return AlignResult(-1, 'E: no initial pairs defined!', None, None)

    # try to dp align the initial pairs as a shortcut for good sample or peaks

    rtimes, sizes = zip( *initial_pairs )
    zres = estimate_z(rtimes, sizes)

    dp_result = align_dp( [p.rtime for p in peaks], ladder_sizes,
                            generate_similarity(peaks), zres.z, zres.rss )
    dp_result.sized_peaks = pair_sized_peaks(peaks, dp_result.sized_peaks)
    score, msg = ladder['qcfunc']( dp_result, method = 'strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.hcm_strict)

    return AlignResult(-1, 'ERR: alignment needs minimization', None, None,
                initial_pairs=initial_pairs)
Ejemplo n.º 29
0
def align_hc(peaks, ladder):
    """ peaks: list of rtime, in ascending order
        ladders: list of size from ladders, in ascending order

        returns: (score, msg, result, method)
    """

    #import pprint; pprint.pprint(peaks)

    # generate P for ladder
    if 'C' not in ladder:
        if 'T' not in ladder:
            ladder['T'] = generate_tree([(n, 0) for n in ladder['sizes']])
        ladder['C'] = generate_cluster(ladder['T'], ladder['k'])
    ladder_clusters = ladder['C']
    ladder_sizes = ladder['sizes']

    P = generate_tree([(n.rtime, 0) for n in peaks])
    peak_clusters = generate_cluster(P, ladder['k'])

    # generate cluster should use so-called balance tree

    #print(peak_clusters)

    if len(peak_clusters[-1]) == 1:
        if len(reduce(operator.add, peak_clusters)) > len(ladder_sizes):
            del peak_clusters[-1]
            #del peaks[-1]
    if len(peak_clusters[0]) == 1:
        if len(reduce(operator.add, peak_clusters)) > len(ladder_sizes):
            del peak_clusters[0]
            #del peaks[0]

    if len(peak_clusters) < ladder['k']:
        P = generate_tree([(n, 0)
                           for n in reduce(operator.add, peak_clusters)])
        peak_clusters = generate_cluster(P, ladder['k'])

    # short cut, in case we have good high quality peaks
    if sum(len(c) for c in peak_clusters) == len(ladder_sizes):
        hq_peaks = sum(peak_clusters, [])
        #hq_pairs = zip(hq_peaks, ladder_sizes)
        zres = estimate_z(hq_peaks, ladder_sizes)
        dp_result = align_dp(hq_peaks, ladder_sizes, [1.0] * len(hq_peaks),
                             zres.z, zres.rss)
        dp_result.sized_peaks = pair_sized_peaks(peaks, dp_result.sized_peaks)
        score, msg = ladder['qcfunc'](dp_result, method='relax')
        if score > 0.9:
            return AlignResult(score, msg, dp_result,
                               const.alignmethod.hcm_strict)

    #print(">>> clusters:\n", peak_clusters)
    cluster_pairings, expected_missing = align_clusters(
        peak_clusters, ladder_clusters)

    #print(">>> cluster pairs:\n", cluster_pairings)
    # check each cluster pairing

    initial_pairs = []
    for pairs in cluster_pairings:
        if is_good_pairing(pairs):
            initial_pairs.extend(pairs)
        else:
            cverr(3, '>> this pairings is not included:\n%s' % pairs)

    cverr(3, '>> initial pairs:\n%s' % initial_pairs)

    if not initial_pairs:
        return AlignResult(-1, 'E: no initial pairs defined!', None, None)

    # try to dp align the initial pairs as a shortcut for good sample or peaks

    rtimes, sizes = zip(*initial_pairs)
    zres = estimate_z(rtimes, sizes)

    dp_result = align_dp([p.rtime for p in peaks], ladder_sizes,
                         generate_similarity(peaks), zres.z, zres.rss)
    dp_result.sized_peaks = pair_sized_peaks(peaks, dp_result.sized_peaks)
    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.hcm_strict)

    return AlignResult(-1,
                       'ERR: alignment needs minimization',
                       None,
                       None,
                       initial_pairs=initial_pairs)
Ejemplo n.º 30
0
def align_pm(peaks, ladder, anchor_pairs=None):

    if not anchor_pairs:
        longest_rtime_peak = max([p.rtime for p in peaks])
        if longest_rtime_peak > PEAK_RTIME_UPPER_BOUND:
            bound_adjust_ratio = longest_rtime_peak / PEAK_RTIME_UPPER_BOUND
            anchor_start = ANCHOR_RTIME_LOWER_BOUND * bound_adjust_ratio
            anchor_end = ANCHOR_RTIME_UPPER_BOUND * bound_adjust_ratio
        else:
            anchor_start = ANCHOR_RTIME_LOWER_BOUND
            anchor_end = ANCHOR_RTIME_UPPER_BOUND
        anchor_peaks = [ p for p in peaks if anchor_start < p.rtime < anchor_end ]
        anchor_pairs, initial_z = estimate_pm( anchor_peaks, ladder['signature'] )

    else:
        rtimes, bpsizes = zip( *anchor_pairs )
        initial_z = estimate_z(rtimes, bpsizes, 1)

    anchor_pairs.sort()
    pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z)
    #print(pairs)
    pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs)
    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    if is_verbosity(1):
        import pprint; pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(4):
        plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax)


    f = ZFunc(peaks, ladder['sizes'], anchor_pairs)

    z = initial_z
    score = last_score = 0
    last_z = None

    for order in [1, 2, 3]:

        last_rss = -1
        rss = 0

        niter = 0
        while abs(rss - last_rss) > 1e-3:

            niter += 1
            cverr(5, 'Iter: %d' % niter)

            cverr(5, z)
            score = f(z)
            if last_score and last_score < score:
                # score does not converge; just exit
                cverr(5, 'does not converge!')
                break

            pairs, cur_rss = f.get_pairs(z)
            rtimes, bpsizes = zip( *pairs )
            zres = estimate_z(rtimes, bpsizes, order)

            last_z = z
            z = zres.z
            last_rss = rss
            rss = zres.rss
            cverr(5, rss)

    dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss)

    return align_gm2(peaks, ladder, anchor_pairs, dp_result.z)



    new_anchor_pairs = []
    zf = np.poly1d(dp_result.z)
    for p in dp_result.sized_peaks:
        if (p[0] - zf(p[1]))**2 < 2:
            new_anchor_pairs.append( (p[1], p[0]) )
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
Ejemplo n.º 31
0
def align_gm( peaks, ladder, anchor_pairs, z=None):

    cerr('I: generalized minimization method is running!')

    sizes = ladder['sizes']

    f = ZFunc( peaks, sizes, anchor_pairs)
    if z is None:
        zresult = f.get_initial_z()
        z = zresult.z

    # try pair f
    #result = pair_f( np.poly1d(z), f.rtimes, sizes)
    #import pprint; pprint.pprint(result)

    rss = -1
    prev_rss = 0
    #print('>>> Initial rss: ', rss)
    #plot(f.rtimes, f.sizes, z, result)

    #minimizer_kwargs = {'method': 'BFGS'}
    #bounds = [ (-1e-10,1e-10), (-1e-5,1e-5), (0,1e-3), (-1e2,1e2) ]
    bounds = [ (-1e-10, 1e-10), (-1e-5, 1e-5), (0.05, 0.18), (-175, 10) ]

    niter = 1
    results = []
    while abs(rss - prev_rss) > 1e-3:

        prev_rss = rss

        #res = minimize(f, z, method='Powell', tol=1e-6)
        #res = minimize(f, z, method='SLSQP', tol = 1e-6, bounds=bounds)
        res = minimize(f, z, method='Nelder-Mead', tol=1e-6)

        pairs, final_rss = f.get_pairs(res.x)

        rtimes, bpsizes = zip( *pairs)
        zresult = estimate_z(rtimes, bpsizes, niter if niter < 3 else 3)
        rss = zresult.rss
        z = zresult.z
        cerr('I: GM iter: %2d  - pairs: %2d  - Cur RSS: %6.2f' % (niter, len(pairs), rss))
        niter += 1
        results.append( zresult )

        #plot(f.rtimes, f.sizes, z, pairs)

        if rss < len(pairs) * 1.0:
            break

    # get the best result
    results.sort( key = lambda x: x.rss )
    zresult = results[0]

    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, zresult.z, zresult.rss)
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.gm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.gm_relax)
Ejemplo n.º 32
0
def align_gm( peaks, ladder, anchor_pairs, z=None):

    cerr('I: generalized minimization method is running!')

    sizes = ladder['sizes']

    f = ZFunc( peaks, sizes, anchor_pairs)
    if z is None:
        zresult = f.get_initial_z()
        z = zresult.z

    # try pair f
    #result = pair_f( np.poly1d(z), f.rtimes, sizes)
    #import pprint; pprint.pprint(result)

    rss = -1
    prev_rss = 0
    #print('>>> Initial rss: ', rss)
    #plot(f.rtimes, f.sizes, z, result)

    #minimizer_kwargs = {'method': 'BFGS'}
    #bounds = [ (-1e-10,1e-10), (-1e-5,1e-5), (0,1e-3), (-1e2,1e2) ]
    bounds = [ (-1e-10, 1e-10), (-1e-5, 1e-5), (0.05, 0.18), (-175, 10) ]

    niter = 1
    results = []
    while abs(rss - prev_rss) > 1e-3:

        prev_rss = rss

        #res = minimize(f, z, method='Powell', tol=1e-6)
        #res = minimize(f, z, method='SLSQP', tol = 1e-6, bounds=bounds)
        res = minimize(f, z, method='Nelder-Mead', tol=1e-6)

        pairs, final_rss = f.get_pairs(res.x)

        rtimes, bpsizes = zip( *pairs)
        zresult = estimate_z(rtimes, bpsizes, niter if niter < 3 else 3)
        rss = zresult.rss
        z = zresult.z
        cerr('I: GM iter: %2d  - pairs: %2d  - Cur RSS: %6.2f' % (niter, len(pairs), rss))
        niter += 1
        results.append( zresult )

        #plot(f.rtimes, f.sizes, z, pairs)

        if rss < len(pairs) * 1.0:
            break

    # get the best result
    results.sort( key = lambda x: x.rss )
    zresult = results[0]

    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, zresult.z, zresult.rss)
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    #plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.gm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.gm_relax)
Ejemplo n.º 33
0
def align_pm(peaks, ladder, anchor_pairs=None):

    if not anchor_pairs:
        longest_rtime_peak = max([p.rtime for p in peaks])
        if longest_rtime_peak > PEAK_RTIME_UPPER_BOUND:
            bound_adjust_ratio = longest_rtime_peak / PEAK_RTIME_UPPER_BOUND
            anchor_start = ANCHOR_RTIME_LOWER_BOUND * bound_adjust_ratio
            anchor_end = ANCHOR_RTIME_UPPER_BOUND * bound_adjust_ratio
        else:
            anchor_start = ANCHOR_RTIME_LOWER_BOUND
            anchor_end = ANCHOR_RTIME_UPPER_BOUND
        anchor_peaks = [
            p for p in peaks if anchor_start < p.rtime < anchor_end
        ]
        anchor_pairs, initial_z = estimate_pm(anchor_peaks,
                                              ladder['signature'])

    else:
        rtimes, bpsizes = zip(*anchor_pairs)
        initial_z = estimate_z(rtimes, bpsizes, 1)

    anchor_pairs.sort()
    pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z)
    #print(pairs)
    pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs)
    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    if is_verbosity(1):
        import pprint
        pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(4):
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax)

    f = ZFunc(peaks, ladder['sizes'], anchor_pairs)

    z = initial_z
    score = last_score = 0
    last_z = None

    for order in [1, 2, 3]:

        last_rss = -1
        rss = 0

        niter = 0
        while abs(rss - last_rss) > 1e-3:

            niter += 1
            cverr(5, 'Iter: %d' % niter)

            cverr(5, z)
            score = f(z)
            if last_score and last_score < score:
                # score does not converge; just exit
                cverr(5, 'does not converge!')
                break

            pairs, cur_rss = f.get_pairs(z)
            rtimes, bpsizes = zip(*pairs)
            zres = estimate_z(rtimes, bpsizes, order)

            last_z = z
            z = zres.z
            last_rss = rss
            rss = zres.rss
            cverr(5, rss)

    dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss)

    return align_gm2(peaks, ladder, anchor_pairs, dp_result.z)

    new_anchor_pairs = []
    zf = np.poly1d(dp_result.z)
    for p in dp_result.sized_peaks:
        if (p[0] - zf(p[1]))**2 < 2:
            new_anchor_pairs.append((p[1], p[0]))
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    plot(f.rtimes, f.sizes, dp_result.z,
         [(x[1], x[0]) for x in dp_result.sized_peaks])

    return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
Ejemplo n.º 34
0
def align_pm(peaks, ladder, anchor_pairs=None):

    if not anchor_pairs:
        anchor_peaks = [p for p in peaks if 1500 < p.rtime < 5000]
        anchor_pairs, initial_z = estimate_pm(anchor_peaks,
                                              ladder['signature'])

    else:
        rtimes, bpsizes = zip(*anchor_pairs)
        initial_z = estimate_z(rtimes, bpsizes, 1)

    anchor_pairs.sort()
    pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z)
    pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs)
    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    import pprint
    pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(4):
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax)

    f = ZFunc(peaks, ladder['sizes'], anchor_pairs)

    z = initial_z
    score = last_score = 0
    last_z = None

    for order in [1, 2, 3]:

        last_rss = -1
        rss = 0

        niter = 0
        while abs(rss - last_rss) > 1e-3:

            niter += 1
            print('Iter: %d' % niter)

            print(z)
            score = f(z)
            if last_score and last_score < score:
                # score does not converge; just exit
                print('does not converge!')
                break

            pairs, cur_rss = f.get_pairs(z)
            rtimes, bpsizes = zip(*pairs)
            zres = estimate_z(rtimes, bpsizes, order)

            last_z = z
            z = zres.z
            last_rss = rss
            rss = zres.rss
            print(rss)

    dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss)

    return align_gm2(peaks, ladder, anchor_pairs, dp_result.z)

    new_anchor_pairs = []
    zf = np.poly1d(dp_result.z)
    for p in dp_result.sized_peaks:
        if (p[0] - zf(p[1]))**2 < 2:
            new_anchor_pairs.append((p[1], p[0]))
    import pprint
    pprint.pprint(dp_result.sized_peaks)
    plot(f.rtimes, f.sizes, dp_result.z,
         [(x[1], x[0]) for x in dp_result.sized_peaks])

    return align_gm(peaks, ladder, anchor_pairs, dp_result.z)