Example #1
0
def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x > anchor_bpsizes[-1]]
    current_sizes = anchor_bpsizes
    order = ladder['order']
    z = estimate_z(anchor_rtimes, anchor_bpsizes, order).z
    f = ZFunc(peaks, current_sizes, anchor_pairs, estimate=True)

    pairs, rss = f.get_pairs(z)

    while True:

        if not remaining_sizes:
            return pairs, z, rss, f

        current_sizes.append(remaining_sizes.pop(0))
        f.set_sizes(current_sizes)
        score, next_z = minimize_score(f, z, order)
        pairs, rss = f.get_pairs(z)
        if rss < 100:
            z = next_z
        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs)
Example #2
0
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]]
    current_sizes = anchor_bpsizes
    z = estimate_z(anchor_rtimes, anchor_bpsizes, 3).z
    f = ZFunc(peaks, current_sizes, anchor_pairs, estimate=True)

    pairs, rss = f.get_pairs(z)

    while True:

        if not remaining_sizes:
            return pairs, z, rss, f

        current_sizes.insert(0, remaining_sizes.pop(-1))
        f.set_sizes(current_sizes)
        score, z = minimize_score(f, z, 3)
        pairs, rss = f.get_pairs(z)
        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs)
Example #3
0
    def align(self, parameters, ladder=None, anchor_pairs=None):

        # sanity checks
        if self.marker.code != 'ladder':
            raise RuntimeError(
                'E: align() must be performed on ladder channel!')

        ladder = self.fsa.panel.get_ladder()

        # prepare ladder qcfunc
        if 'qcfunc' not in ladder:
            ladder['qcfunc'] = algo.generate_scoring_function(
                ladder['strict'], ladder['relax'])

        start_time = time.process_time()
        result = algo.align_peaks(self, parameters, ladder, anchor_pairs)
        dpresult = result.dpresult
        fsa = self.fsa
        fsa.z = dpresult.z
        fsa.rss = dpresult.rss
        fsa.nladder = len(dpresult.sized_peaks)
        fsa.score = result.score
        fsa.duration = time.process_time() - start_time

        # set allele sizes from ladder steps
        alleles = self.get_alleles()
        alleles.sort(key=lambda x: x.rtime)

        ladder_sizes = ladder['sizes']
        ladder_sizes.sort()

        for allele, ladder_size in zip(alleles, ladder_sizes):
            allele.size = ladder_size

        # check the allele method
        method = parameters.allelemethod

        if method == const.allelemethod.leastsquare:
            fsa.allele_fit_func = algo.least_square(alleles, self.fsa.z)
        elif method == const.allelemethod.cubicspline:
            fsa.allele_fit_func = algo.cubic_spline(alleles)
        elif method == const.allelemethod.localsouthern:
            fsa.allele_fit_func = algo.local_southern(alleles)
        else:
            raise RuntimeError

        #min_rtime = ladders[1].rtime
        #max_rtime = ladders[-2].rtime
        fsa.min_rtime = parameters.ladder.min_rtime
        fsa.max_rtime = parameters.ladder.max_rtime

        #import pprint; pprint.pprint(dpresult.sized_peaks)
        #print(fsa.z)
        if is_verbosity(4):
            cout('O: Score %3.2f | %5.2f | %d/%d | %s | %5.1f | %s' %
                 (fsa.score, fsa.rss, fsa.nladder, len(ladder['sizes']),
                  result.method, fsa.duration, fsa.filename))
Example #4
0
def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs )
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x > anchor_bpsizes[-1]]
    current_sizes = anchor_bpsizes
    order = ladder['order']
    zres = estimate_z(anchor_rtimes, anchor_bpsizes, order)
    z,rss = zres.z, zres.rss
    f = ZFunc(peaks, current_sizes, anchor_pairs)

    while remaining_sizes:

        current_sizes.append( remaining_sizes.pop(0) )
        if ( remaining_sizes and
             (remaining_sizes[-1] - current_sizes[-1]) < 100 and
             (remaining_sizes[0] - current_sizes[-1]) < 11 ):
            current_sizes.append( remaining_sizes.pop(0) )

        f.set_sizes(current_sizes)
        score, next_z = minimize_score(f, z, order)
        next_pairs, next_rss = f.get_pairs(z)

        if (next_rss - rss) < 70:
            z = next_z
            rss = next_rss
            pairs = next_pairs

        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs )

    # finalize the alignment with stringent criteria
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    if dp_result.rss - rss > 50:
        return pairs, z, rss, f
    dp_pairs = [(x[1], x[0]) for x in dp_result.sized_peaks]
    if is_verbosity(5):
        plot(f.rtimes, f.sizes, dp_result.z, dp_pairs)

    return dp_pairs, dp_result.z, dp_result.rss, f
Example #5
0
def align_upper_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs )
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x > anchor_bpsizes[-1]]
    current_sizes = anchor_bpsizes
    order = ladder['order']
    zres = estimate_z(anchor_rtimes, anchor_bpsizes, order)
    z,rss = zres.z, zres.rss
    f = ZFunc(peaks, current_sizes, anchor_pairs)

    while remaining_sizes:

        current_sizes.append( remaining_sizes.pop(0) )
        if ( remaining_sizes and
             (remaining_sizes[-1] - current_sizes[-1]) < 100 and
             (remaining_sizes[0] - current_sizes[-1]) < 11 ):
            current_sizes.append( remaining_sizes.pop(0) )

        f.set_sizes(current_sizes)
        score, next_z = minimize_score(f, z, order)
        next_pairs, next_rss = f.get_pairs(z)

        if (next_rss - rss) < 70:
            z = next_z
            rss = next_rss
            pairs = next_pairs

        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs )

    # finalize the alignment with stringent criteria
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    if dp_result.rss - rss > 50:
        return pairs, z, rss, f
    dp_pairs = [(x[1], x[0]) for x in dp_result.sized_peaks]
    if is_verbosity(5):
        plot(f.rtimes, f.sizes, dp_result.z, dp_pairs)

    return dp_pairs, dp_result.z, dp_result.rss, f
Example #6
0
def estimate_pm(peaks, bpsizes):

    rtimes = [p.rtime for p in peaks]

    rtime_points = prepare_rtimes(rtimes)
    bpsize_pair = [bpsizes[1], bpsizes[-2]]

    f = ZFunc(peaks, bpsizes, [], estimate=True)

    scores = []
    for rtime_pair in rtime_points:
        if rtime_pair[0] >= rtime_pair[1]:
            continue

        # y = ax + b
        # y1 = ax1 + b
        # y2 = ax2 + b
        # ------------ -
        # y1 - y2 = a(x1 - x2)
        # a = (y1 - y2)/(x1 - x2)
        # b = y1 - ax1

        #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0])
        #intercept = bpsize_pair[0] - slope * rtime_pair[0]
        #z = [ slope intercept ]
        zres = estimate_z(rtime_pair, bpsize_pair, 1)
        score = f(zres.z)
        scores.append((score, zres))
        if is_verbosity(5):
            plot(f.rtimes, f.sizes, zres.z, [])

    scores.sort(key=lambda x: x[0])
    #import pprint; pprint.pprint(scores[:5])
    zresult = scores[0][1]

    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z,
                         zresult.rss)
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(5):
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    return ([(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z)
Example #7
0
def estimate_pm(peaks, bpsizes):

    rtimes = [ p.rtime for p in peaks ]

    rtime_points = prepare_rtimes( rtimes )
    bpsize_pair = [ bpsizes[1], bpsizes[-2]]

    f = ZFunc(peaks, bpsizes, [], estimate = True)

    scores = []
    for rtime_pair in rtime_points:
        if rtime_pair[0] >= rtime_pair[1]:
            continue

        # y = ax + b
        # y1 = ax1 + b
        # y2 = ax2 + b
        # ------------ -
        # y1 - y2 = a(x1 - x2)
        # a = (y1 - y2)/(x1 - x2)
        # b = y1 - ax1

        #slope = (bpsize_pair[1] - bpsize_pair[0]) / (rtime_pair[1] - rtime_pair[0])
        #intercept = bpsize_pair[0] - slope * rtime_pair[0]
        #z = [ slope intercept ]
        zres = estimate_z(rtime_pair, bpsize_pair, 1)
        score = f(zres.z)
        scores.append( (score, zres) )
        if is_verbosity(5):
            plot(f.rtimes, f.sizes, zres.z, [] )

    scores.sort( key = lambda x: x[0] )
    #import pprint; pprint.pprint(scores[:5])
    zresult = scores[0][1]

    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, zresult.z, zresult.rss)
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(5):
        plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    return ( [(x[1], x[0]) for x in dp_result.sized_peaks], dp_result.z )
Example #8
0
 def create_channels(self, params):
     if is_verbosity(4):
         cerr('I: Generating channels for %s' % self.filename)
     trace = self.get_trace()
     trace_channels = algo.separate_channels(trace, params)
     for tc in trace_channels:
         channel = self.Channel(data=tc.smooth_channel,
                                dye=tc.dye_name,
                                wavelen=tc.dye_wavelength,
                                status=const.channelstatus.reseted,
                                fsa=self)
         self.add_channel(channel)
Example #9
0
def generate_similarity(peaks):

    rfus = [p.rfu for p in peaks]

    # use the 2nd highest peaks since the 1st or 2nd may be noises
    highest_rfu = list(sorted(rfus, reverse=True))[2]

    N = len(rfus)
    similarity = list([
        (np.log10(rfu / highest_rfu) + N) / N if rfu < highest_rfu else 1.0
        for rfu in rfus
    ])
    if is_verbosity(4):
        print(N, ' => ')
        print(rfus)
        print(highest_rfu)
        print(similarity)

    return similarity
Example #10
0
def align_dp(rtimes, sizes, similarity, z, rss, order=3):
    """ align ladders with peaks using dynamic programming (global alignment)
        return (dpscore, RSS, Z, ladder_aligned_peaks)
    """

    sizes = list(sorted(sizes, reverse=True))
    rtimes = list(sorted(rtimes, reverse=True))

    dpscore = -1

    while True:

        S = generate_scores(sizes, rtimes, similarity, np.poly1d(z))

        result = dp(S, -5e-3)

        cur_dpscore = result['D'][-1][-1]
        matches = result['matches']

        aligned_peaks = [(sizes[i], rtimes[j]) for i, j in matches]

        # realign

        std_size, peak_sizes = zip(*aligned_peaks)
        cur_zres = estimate_z(peak_sizes, std_size, order)

        if cur_dpscore < dpscore:
            if is_verbosity(4):
                cerr('W: dynamic programming did not converge!!')
            break

        if cur_dpscore == dpscore:
            break

        z = cur_zres.z
        rss = cur_zres.rss
        dpscore = cur_dpscore
        sized_peaks = aligned_peaks

    return DPResult(dpscore, rss, z, sized_peaks)
Example #11
0
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip( *anchor_pairs )
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]]
    current_sizes = anchor_bpsizes
    zscore = estimate_z(anchor_rtimes, anchor_bpsizes, 3)
    z = zscore.z
    rss = zscore.rss
    f = ZFunc(peaks, current_sizes, anchor_pairs)

    while True:

        if not remaining_sizes:
            return pairs, z, rss, f

        current_sizes.insert(0, remaining_sizes.pop(-1))
        f.set_sizes(current_sizes)
        score, next_z = minimize_score(f, z, 3)
        next_pairs, next_rss = f.get_pairs(next_z)

        # if delta rss (current rss - prev rss) is above certain threshold,
        # then assume the latest peak standar is not appropriate, and
        # use previous z and rss
        if (next_rss - rss) > 20:
            current_sizes.pop(0)
        else:
            z = next_z
            rss = next_rss
            pairs = next_pairs

        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs )
Example #12
0
def align_lower_pm(peaks, ladder, anchor_pairs, anchor_z):

    # this is another attempt to perform ladder - size standard alignment one peak by one

    anchor_pairs = sorted(anchor_pairs)
    anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
    anchor_rtimes = list(anchor_rtimes)
    anchor_bpsizes = list(anchor_bpsizes)
    remaining_sizes = [x for x in ladder['sizes'] if x < anchor_bpsizes[0]]
    current_sizes = anchor_bpsizes
    zscore = estimate_z(anchor_rtimes, anchor_bpsizes, 3)
    z = zscore.z
    rss = zscore.rss
    f = ZFunc(peaks, current_sizes, anchor_pairs)

    while True:

        if not remaining_sizes:
            return pairs, z, rss, f

        current_sizes.insert(0, remaining_sizes.pop(-1))
        f.set_sizes(current_sizes)
        score, next_z = minimize_score(f, z, 3)
        next_pairs, next_rss = f.get_pairs(next_z)

        # if delta rss (current rss - prev rss) is above certain threshold,
        # then assume the latest peak standar is not appropriate, and
        # use previous z and rss
        if (next_rss - rss) > 20:
            current_sizes.pop(0)
        else:
            z = next_z
            rss = next_rss
            pairs = next_pairs

        if is_verbosity(5):
            plot(f.rtimes, f.sizes, z, pairs)
Example #13
0
def do_listpeaks(args, fsa_list, dbh):

    if args.outfile != '-':
        out_stream = open(args.outfile, 'w')
    else:
        out_stream = sys.stdout

    if args.peaks_format == 'standard':
        out_stream.write(
            'SAMPLE\tFILENAME   \tDYE\tRTIME\tSIZE\tHEIGHT\tAREA\tSCORE\n')
    elif args.peaks_format == 'peakscanner':
        out_stream.write(
            "Dye/Sample Peak,Sample File Name,Type,Size,Height,Area in Point,Area in BP,Corrected Area in BP,Data Point,Begin Point,"
        )
        if args.merge:
            out_stream.write(
                "Begin BP,End Point,End BP,Width in Point,Width in BP,Score,Peak Group,User Comments,User Edit\n"
            )
        else:
            out_stream.write(
                "Begin BP,End Point,End BP,Width in Point,Width in BP,Score,User Comments,User Edit\n"
            )

    else:
        raise RuntimeError("Unknown value for args.peaks_format")
    out_stream.close()

    for (fsa, fsa_index) in fsa_list:
        cverr(3, 'D: calling FSA %s' % fsa.filename)

        markers = fsa.panel.data['markers']

        if args.outfile != '-':
            out_stream = open(args.outfile, 'a')
        else:
            out_stream = sys.stdout

        for channel in fsa.channels:
            if channel.is_ladder():
                color = markers['x/ladder']['filter']
            else:
                color = markers['x/' + channel.dye]['filter']

            alleles = channel.get_alleles(broad_peaks_only=False)

            if is_verbosity(4):
                cout('Marker => %s | %s [%d]' %
                     (channel.marker.code, channel.dye, len(alleles)))
                cout("channel has alleles :", len(alleles))

            i = 1

            smeared_alleles = channel.smeared_alleles
            if (not args.merge) or channel.is_ladder():
                for p in alleles:
                    if args.peaks_format == 'standard':
                        out_stream.write(
                            '%6s\t%10s\t%3s\t%d\t%d\t%5i\t%3.2f\t%3.2f\n' %
                            (fsa_index, fsa.filename[:-4], color, p.rtime,
                             p.size, p.height, p.area, p.qscore))
                    else:
                        out_stream.write(
                            '"%s, %i",%s, %s, %f, %i, %i, %i, %i, %i, %i, %f, %i, %f, %i, %f, %f,,\n'
                            % (color, i, fsa.filename, p.type, p.size,
                               p.height, p.area, p.area_bp, p.area_bp_corr,
                               p.rtime, p.brtime, p.begin_bp, p.ertime,
                               p.end_bp, p.wrtime, p.width_bp, p.qscore))
                    i = i + 1

            else:
                if is_verbosity(4):
                    cout('Marker => %s | %s [%d]' %
                         (channel.marker.code, channel.dye,
                          len(smeared_alleles)))
                    cout("channel has smeared alleles :", len(smeared_alleles))
                i = 1
                for p in smeared_alleles:
                    out_stream.write(
                        '"%s, %i", %s, %s, %f, %i, %i, %i, %i, %i, %i, %f, %i, %f, %i, %f, %f, %i,,\n'
                        % (color, i, fsa.filename, p.type, p.size, p.height,
                           p.area, p.area_bp, p.area_bp_corr, p.rtime,
                           p.brtime, p.begin_bp, p.ertime, p.end_bp, p.wrtime,
                           p.width_bp, p.qscore, p.group))
                    i = i + 1

        out_stream.close()
Example #14
0
def align_pm(peaks, ladder, anchor_pairs=None):

    if not anchor_pairs:
        anchor_peaks = [p for p in peaks if 1500 < p.rtime < 5000]
        anchor_pairs, initial_z = estimate_pm(anchor_peaks,
                                              ladder['signature'])

    else:
        rtimes, bpsizes = zip(*anchor_pairs)
        initial_z = estimate_z(rtimes, bpsizes, 1)

    anchor_pairs.sort()
    pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z)
    pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs)
    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    import pprint
    pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(4):
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax)

    f = ZFunc(peaks, ladder['sizes'], anchor_pairs)

    z = initial_z
    score = last_score = 0
    last_z = None

    for order in [1, 2, 3]:

        last_rss = -1
        rss = 0

        niter = 0
        while abs(rss - last_rss) > 1e-3:

            niter += 1
            print('Iter: %d' % niter)

            print(z)
            score = f(z)
            if last_score and last_score < score:
                # score does not converge; just exit
                print('does not converge!')
                break

            pairs, cur_rss = f.get_pairs(z)
            rtimes, bpsizes = zip(*pairs)
            zres = estimate_z(rtimes, bpsizes, order)

            last_z = z
            z = zres.z
            last_rss = rss
            rss = zres.rss
            print(rss)

    dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss)

    return align_gm2(peaks, ladder, anchor_pairs, dp_result.z)

    new_anchor_pairs = []
    zf = np.poly1d(dp_result.z)
    for p in dp_result.sized_peaks:
        if (p[0] - zf(p[1]))**2 < 2:
            new_anchor_pairs.append((p[1], p[0]))
    import pprint
    pprint.pprint(dp_result.sized_peaks)
    plot(f.rtimes, f.sizes, dp_result.z,
         [(x[1], x[0]) for x in dp_result.sized_peaks])

    return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
Example #15
0
def align_pm(peaks, ladder, anchor_pairs=None):

    if not anchor_pairs:
        longest_rtime_peak = max([p.rtime for p in peaks])
        if longest_rtime_peak > PEAK_RTIME_UPPER_BOUND:
            bound_adjust_ratio = longest_rtime_peak / PEAK_RTIME_UPPER_BOUND
            anchor_start = ANCHOR_RTIME_LOWER_BOUND * bound_adjust_ratio
            anchor_end = ANCHOR_RTIME_UPPER_BOUND * bound_adjust_ratio
        else:
            anchor_start = ANCHOR_RTIME_LOWER_BOUND
            anchor_end = ANCHOR_RTIME_UPPER_BOUND
        anchor_peaks = [ p for p in peaks if anchor_start < p.rtime < anchor_end ]
        anchor_pairs, initial_z = estimate_pm( anchor_peaks, ladder['signature'] )

    else:
        rtimes, bpsizes = zip( *anchor_pairs )
        initial_z = estimate_z(rtimes, bpsizes, 1)

    anchor_pairs.sort()
    pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z)
    #print(pairs)
    pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs)
    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    if is_verbosity(1):
        import pprint; pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(4):
        plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax)


    f = ZFunc(peaks, ladder['sizes'], anchor_pairs)

    z = initial_z
    score = last_score = 0
    last_z = None

    for order in [1, 2, 3]:

        last_rss = -1
        rss = 0

        niter = 0
        while abs(rss - last_rss) > 1e-3:

            niter += 1
            cverr(5, 'Iter: %d' % niter)

            cverr(5, z)
            score = f(z)
            if last_score and last_score < score:
                # score does not converge; just exit
                cverr(5, 'does not converge!')
                break

            pairs, cur_rss = f.get_pairs(z)
            rtimes, bpsizes = zip( *pairs )
            zres = estimate_z(rtimes, bpsizes, order)

            last_z = z
            z = zres.z
            last_rss = rss
            rss = zres.rss
            cverr(5, rss)

    dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss)

    return align_gm2(peaks, ladder, anchor_pairs, dp_result.z)



    new_anchor_pairs = []
    zf = np.poly1d(dp_result.z)
    for p in dp_result.sized_peaks:
        if (p[0] - zf(p[1]))**2 < 2:
            new_anchor_pairs.append( (p[1], p[0]) )
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    plot(f.rtimes, f.sizes, dp_result.z, [(x[1], x[0]) for x in dp_result.sized_peaks])

    return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
Example #16
0
def filter_for_artifact(peaks, params, expected_peak_number=0):
    """
    params.max_peak_number
    params.artifact_ratio
    params.artifact_dist ~ 5
    """

    # the following code in this function performs the necessary acrobatic act
    # to select the most likely peaks that can be considered as true signals,
    # which is especially necessary for ladder - size assignment

    if len(peaks) == expected_peak_number:
        return peaks

    # we need to adapt to the noise level of current channel
    if expected_peak_number > 0:
        epn = expected_peak_number
        theta_peaks = sorted(peaks, key=lambda x: x.theta,
                             reverse=True)[round(epn / 2) + 3:epn - 1]
        #theta_peaks = theta_peaks[2:4] + theta_peaks[round(epn/2):epn-1]
        omega_peaks = sorted(peaks, key=lambda x: x.omega, reverse=True)
        omega_peaks = omega_peaks[2:4] + omega_peaks[round(epn / 2):epn - 1]
        rfu_peaks = sorted(peaks, key=lambda x: x.rfu, reverse=True)[:epn - 1]

        if theta_peaks[-1].theta < 8:
            theta_peaks.sort()
            thetas = np.array([p.theta for p in theta_peaks])
            rtimes = [p.rtime for p in theta_peaks]

            #plt.scatter(rtimes, thetas)
            #plt.show()
            popt, pcov = curve_fit(math_func, rtimes, 0.5 * thetas, p0=[-1, 1])

            if is_verbosity(4):
                xx = np.linspace(rtimes[0], rtimes[-1] + 2000, 100)
                yy = math_func(xx, *popt)
                plt.plot(xx, yy)
                plt.scatter([p.rtime for p in peaks], [p.theta for p in peaks])
                plt.show()

            q_theta = lambda x: x.theta >= math_func(x.rtime, *popt
                                                     ) or x.theta > 100

        else:
            q_theta = lambda x: x.theta >= min(theta_peaks[-1].theta, params.
                                               min_theta)

        if omega_peaks[-1].omega < 200:
            omega_peaks.sort()
            omegas = np.array([p.omega for p in omega_peaks])
            rtimes = np.array([p.rtime for p in omega_peaks])

            # generate a quadratic threshold for omega

            # generate a quadratic ratio series first
            popt, pcov = curve_fit(
                quadratic_math_func,
                [rtimes[0],
                 (rtimes[0] + rtimes[-1]) / 2, rtimes[-1]], [0.05, 0.25, 0.05])
            ratios = quadratic_math_func(rtimes, *popt)
            if is_verbosity(4):
                plt.plot(rtimes, ratios)
                plt.show()

            # use the ratios to enforce quadratic threshold
            popt, pcov = curve_fit(quadratic_math_func,
                                   rtimes,
                                   ratios * omegas,
                                   p0=[-1, 1, 0])
            if popt[0] > 0:
                # enforce small flat ratio
                popt, pcov = curve_fit(math_func,
                                       rtimes,
                                       0.25 * omegas,
                                       p0=[1, 0])
                popt = np.insert(popt, 0, 0.0)  # convert to 3 params
            if is_verbosity(4):
                plt.scatter(rtimes, omegas)
                xx = np.linspace(rtimes[0], rtimes[-1] + 2000, 100)
                yy = quadratic_math_func(xx, *popt)
                plt.plot(xx, yy)
                plt.scatter([p.rtime for p in peaks], [p.omega for p in peaks])
                plt.show()

            q_omega = lambda x: (x.omega >= 100 or x.omega >=
                                 quadratic_math_func(x.rtime, *popt))

        else:

            q_omega = lambda x: x.omega >= min(omega_peaks[-1].omega, 50)

        min_rfu = rfu_peaks[-1].rfu * 0.125

    else:
        min_theta = 0
        min_omega = 0
        min_theta_omega = 0
        min_rfu = 2

    # filter for too sharp/thin peaks
    filtered_peaks = []
    for p in peaks:
        #filtered_peaks.append(p); continue
        cverr(5, p)

        if len(filtered_peaks) < 2 and p.area > 50:
            # first two real peaks might be a bit lower
            filtered_peaks.append(p)
            continue

        if not q_omega(p):
            cverr(5, '! q_omega')
            continue
        #if not q_theta(p):
        #    print('! q_theta')
        #    continue

        #if min_theta and min_omega and p.omega < min_omega and p.theta < min_theta:
        #    print('! omega & theta')
        #    continue
        #if min_theta_omega and p.theta * p.omega < min_theta_omega:
        #    print('! theta_omega')
        #    continue
        if p.theta < 1.0 and p.area < 25 and p.omega < 5:
            cverr(5, '! extreme theta & area & omega')
            continue
        if p.rfu < min_rfu:
            cverr(5, '! extreme min_rfu')
            continue
        if p.beta > 25 and p.theta < 0.5:
            cverr(5, '! extreme beta')
            continue
        if p.wrtime < 3:
            continue
        if p.rfu >= 25 and p.beta * p.theta < 6:
            continue
        if p.rfu < 25 and p.beta * p.theta < 3:
            continue
        #if p.omega < 50:
        #    continue
        #if p.omega < 100 and p.theta < 5:
        #    continue
        #if ( params.max_beta and min_theta and
        #        (p.beta > params.max_beta and p.theta < min_theta) ):
        #    print('! max_beta')
        #    continue
        filtered_peaks.append(p)

    #import pprint; pprint.pprint(filtered_peaks)

    # filter for distance between peaks and their rfu ratio
    peaks = sorted(filtered_peaks, key=lambda x: x.rtime)
    non_artifact_peaks = []
    for idx in range(len(peaks)):
        p = peaks[idx]

        if idx > 0:
            prev_p = peaks[idx - 1]
            if (p.brtime - prev_p.ertime < params.artifact_dist
                    and p.rfu < params.artifact_ratio * prev_p.rfu):
                # we are artifact, just skip
                print('artifact1:', p)
                continue

        if idx < len(peaks) - 1:
            next_p = peaks[idx + 1]
            if (next_p.brtime - p.ertime < params.artifact_dist
                    and p.rfu < params.artifact_ratio * next_p.rfu):
                # we are artifact, just skip
                print('artefact2:', p)
                continue

        non_artifact_peaks.append(p)

    #import pprint; pprint.pprint(non_artifact_peaks)
    #print(len(non_artifact_peaks))

    peaks = non_artifact_peaks

    cverr(3, '## non artifact peaks: %d' % len(peaks))

    return peaks
Example #17
0
def filter_for_artifact(peaks, params, expected_peak_number = 0):
    """
    params.max_peak_number
    params.artifact_ratio
    params.artifact_dist ~ 5
    """

    # the following code in this function performs the necessary acrobatic act
    # to select the most likely peaks that can be considered as true signals,
    # which is especially necessary for ladder - size assignment

    if len(peaks) == expected_peak_number:
        return peaks

    # we need to adapt to the noise level of current channel
    if expected_peak_number > 0:
        epn = expected_peak_number
        theta_peaks = sorted(peaks, key = lambda x: x.theta, reverse=True)[round(epn/2)+3:epn-1]
        #theta_peaks = theta_peaks[2:4] + theta_peaks[round(epn/2):epn-1]
        omega_peaks = sorted(peaks, key = lambda x: x.omega, reverse=True)
        omega_peaks = omega_peaks[2:4] + omega_peaks[round(epn/2):epn-1]
        rfu_peaks = sorted(peaks, key = lambda x: x.rfu, reverse=True)[:epn-1]

        if theta_peaks[-1].theta < 8:
            theta_peaks.sort()
            thetas = np.array([ p.theta for p in theta_peaks ])
            rtimes = [ p.rtime for p in theta_peaks ]

            #plt.scatter(rtimes, thetas)
            #plt.show()
            popt, pcov = curve_fit( math_func, rtimes, 0.5 * thetas, p0 = [ -1, 1 ])

            if is_verbosity(4):
                xx = np.linspace( rtimes[0], rtimes[-1]+2000, 100 )
                yy = math_func(xx, *popt)
                plt.plot(xx, yy)
                plt.scatter( [p.rtime for p in peaks], [p.theta for p in peaks])
                plt.show()

            q_theta = lambda x: x.theta >= math_func(x.rtime, *popt) or x.theta > 100

        else:
            q_theta = lambda x: x.theta >= min(theta_peaks[-1].theta, params.min_theta)


        if omega_peaks[-1].omega < 200:
            omega_peaks.sort()
            omegas = np.array([ p.omega for p in omega_peaks ])
            rtimes = np.array([ p.rtime for p in omega_peaks ])

            # generate a quadratic threshold for omega

            # generate a quadratic ratio series first
            popt, pcov = curve_fit( quadratic_math_func,
                    [rtimes[0], (rtimes[0] + rtimes[-1])/2, rtimes[-1]],
                    [0.05, 0.25, 0.05])
            ratios = quadratic_math_func(rtimes, *popt)
            if is_verbosity(4):
                plt.plot(rtimes, ratios)
                plt.show()

            # use the ratios to enforce quadratic threshold
            popt, pcov = curve_fit( quadratic_math_func, rtimes, ratios * omegas,
                                        p0 = [ -1, 1, 0 ])
            if popt[0] > 0:
                # enforce small flat ratio
                popt, pcov = curve_fit( math_func, rtimes, 0.25 * omegas, p0 = [ 1, 0 ])
                popt = np.insert(popt, 0, 0.0)  # convert to 3 params
            if is_verbosity(4):
                plt.scatter(rtimes, omegas)
                xx = np.linspace( rtimes[0], rtimes[-1]+2000, 100 )
                yy = quadratic_math_func(xx, *popt)
                plt.plot(xx, yy)
                plt.scatter( [p.rtime for p in peaks], [p.omega for p in peaks])
                plt.show()

            q_omega = lambda x: (   x.omega >= 100 or
                                    x.omega >= quadratic_math_func(x.rtime, *popt) )

        else:

            q_omega = lambda x: x.omega >= min(omega_peaks[-1].omega, 50)


        min_rfu = rfu_peaks[-1].rfu * 0.125

    else:
        min_theta = 0
        min_omega = 0
        min_theta_omega = 0
        min_rfu = 2


    # filter for too sharp/thin peaks
    filtered_peaks = []
    for p in peaks:
        #filtered_peaks.append(p); continue
        cverr(5, str(p))

        if len(filtered_peaks) < 2 and p.area > 50:
            # first two real peaks might be a bit lower
            filtered_peaks.append(p)
            continue

        if not q_omega(p):
            cverr(5, '! q_omega')
            continue
        #if not q_theta(p):
        #    print('! q_theta')
        #    continue

        #if min_theta and min_omega and p.omega < min_omega and p.theta < min_theta:
        #    print('! omega & theta')
        #    continue
        #if min_theta_omega and p.theta * p.omega < min_theta_omega:
        #    print('! theta_omega')
        #    continue
        if p.theta < 1.0 and p.area < 25 and p.omega < 5:
            cverr(5, '! extreme theta & area & omega')
            continue
        if p.rfu < min_rfu:
            cverr(5, '! extreme min_rfu')
            continue
        if p.beta > 25 and p.theta < 0.5:
            cverr(5, '! extreme beta')
            continue
        if p.wrtime < 3:
            continue
        if p.rfu >= 25 and p.beta * p.theta < 6:
            continue
        if p.rfu < 25 and p.beta * p.theta < 3:
            continue
        #if p.omega < 50:
        #    continue
        #if p.omega < 100 and p.theta < 5:
        #    continue
        #if ( params.max_beta and min_theta and
        #        (p.beta > params.max_beta and p.theta < min_theta) ):
        #    print('! max_beta')
        #    continue
        filtered_peaks.append(p)

    #import pprint; pprint.pprint(filtered_peaks)

    # filter for distance between peaks and their rfu ratio
    peaks = sorted(filtered_peaks, key = lambda x: x.rtime)
    non_artifact_peaks = []
    for idx in range(len(peaks)):
        p = peaks[idx]

        if idx > 0:
            prev_p = peaks[idx-1]
            if ( p.brtime - prev_p.ertime < params.artifact_dist
                    and p.rfu < params.artifact_ratio * prev_p.rfu ):
                # we are artifact, just skip
                print('artifact1:', p)
                continue

        if idx < len(peaks)-1:
            next_p = peaks[idx+1]
            if ( next_p.brtime - p.ertime < params.artifact_dist
                    and p.rfu < params.artifact_ratio * next_p.rfu ):
                # we are artifact, just skip
                print('artefact2:', p)
                continue

        non_artifact_peaks.append( p )

    #import pprint; pprint.pprint(non_artifact_peaks)
    #print(len(non_artifact_peaks))

    peaks = non_artifact_peaks

    cverr(3, '## non artifact peaks: %d' % len(peaks))

    return peaks
Example #18
0
def align_pm(peaks, ladder, anchor_pairs=None):

    if not anchor_pairs:
        longest_rtime_peak = max([p.rtime for p in peaks])
        if longest_rtime_peak > PEAK_RTIME_UPPER_BOUND:
            bound_adjust_ratio = longest_rtime_peak / PEAK_RTIME_UPPER_BOUND
            anchor_start = ANCHOR_RTIME_LOWER_BOUND * bound_adjust_ratio
            anchor_end = ANCHOR_RTIME_UPPER_BOUND * bound_adjust_ratio
        else:
            anchor_start = ANCHOR_RTIME_LOWER_BOUND
            anchor_end = ANCHOR_RTIME_UPPER_BOUND
        anchor_peaks = [
            p for p in peaks if anchor_start < p.rtime < anchor_end
        ]
        anchor_pairs, initial_z = estimate_pm(anchor_peaks,
                                              ladder['signature'])

    else:
        rtimes, bpsizes = zip(*anchor_pairs)
        initial_z = estimate_z(rtimes, bpsizes, 1)

    anchor_pairs.sort()
    pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs, initial_z)
    #print(pairs)
    pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, initial_z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs)
    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)
    if is_verbosity(1):
        import pprint
        pprint.pprint(dp_result.sized_peaks)
    if is_verbosity(4):
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax)

    f = ZFunc(peaks, ladder['sizes'], anchor_pairs)

    z = initial_z
    score = last_score = 0
    last_z = None

    for order in [1, 2, 3]:

        last_rss = -1
        rss = 0

        niter = 0
        while abs(rss - last_rss) > 1e-3:

            niter += 1
            cverr(5, 'Iter: %d' % niter)

            cverr(5, z)
            score = f(z)
            if last_score and last_score < score:
                # score does not converge; just exit
                cverr(5, 'does not converge!')
                break

            pairs, cur_rss = f.get_pairs(z)
            rtimes, bpsizes = zip(*pairs)
            zres = estimate_z(rtimes, bpsizes, order)

            last_z = z
            z = zres.z
            last_rss = rss
            rss = zres.rss
            cverr(5, rss)

    dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss)

    return align_gm2(peaks, ladder, anchor_pairs, dp_result.z)

    new_anchor_pairs = []
    zf = np.poly1d(dp_result.z)
    for p in dp_result.sized_peaks:
        if (p[0] - zf(p[1]))**2 < 2:
            new_anchor_pairs.append((p[1], p[0]))
    #import pprint; pprint.pprint(dp_result.sized_peaks)
    plot(f.rtimes, f.sizes, dp_result.z,
         [(x[1], x[0]) for x in dp_result.sized_peaks])

    return align_gm(peaks, ladder, anchor_pairs, dp_result.z)
Example #19
0
def align_pm(peaks, ladder, anchor_pairs=None):

    if not anchor_pairs:
        anchor_peaks = [p for p in peaks if 1500 < p.rtime < 5000]

        # this finds the pair of peaks that best match to the 2nd and next-to-last ladder steps, and
        # does a linear fit to the rest of peaks to find the peaks matched to ladder steps
        anchor_pairs, initial_z = estimate_pm(anchor_peaks,
                                              ladder['signature'])

    else:
        rtimes, bpsizes = zip(*anchor_pairs)
        initial_z = estimate_z(rtimes, bpsizes, 1)

    anchor_pairs.sort()

    # if the number of anchor pairs equals the number of ladder steps, no need to do pair matching
    if len(anchor_pairs) == len(ladder['sizes']):

        f = ZFunc(peaks, ladder['sizes'], anchor_pairs, estimate=True)

        anchor_rtimes, anchor_bpsizes = zip(*anchor_pairs)
        zres = estimate_z(anchor_rtimes, anchor_bpsizes, 2)
        score, z = minimize_score(f, zres.z, 2)
        pairs, rss = f.get_pairs(z)

    else:
        pairs, z, rss, f = align_upper_pm(peaks, ladder, anchor_pairs,
                                          initial_z)
        if is_verbosity(4):
            print(pairs)
        pairs, z, rss, f = align_lower_pm(peaks, ladder, pairs, z)

    #print(rss)
    #plot(f.rtimes, f.sizes, z, pairs)
    # last dp
    dp_result = align_dp(f.rtimes, f.sizes, f.similarity, z, rss)

    if is_verbosity(4):
        import pprint
        pprint.pprint(dp_result.sized_peaks)
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    dp_result.sized_peaks = f.get_sized_peaks(dp_result.sized_peaks)

    score, msg = ladder['qcfunc'](dp_result, method='strict')
    if score > 0.9:
        return AlignResult(score, msg, dp_result, const.alignmethod.pm_strict)

    score, msg = ladder['qcfunc'](dp_result, method='relax')
    return AlignResult(score, msg, dp_result, const.alignmethod.pm_relax)

    f = ZFunc(peaks, ladder['sizes'], anchor_pairs)

    z = initial_z
    score = last_score = 0
    last_z = None

    for order in [1, 2, 3]:

        last_rss = -1
        rss = 0

        niter = 0
        while abs(rss - last_rss) > 1e-3:

            niter += 1
            print('Iter: %d' % niter)

            print(z)
            score = f(z)
            if last_score and last_score < score:
                # score does not converge; just exit
                print('does not converge!')
                break

            pairs, cur_rss = f.get_pairs(z)
            rtimes, bpsizes = zip(*pairs)
            zres = estimate_z(rtimes, bpsizes, order)

            last_z = z
            z = zres.z
            last_rss = rss
            rss = zres.rss
            print(rss)

    dp_result = align_dp(f.rtimes, f.sizes, last_z, last_rss)

    return align_gm2(peaks, ladder, anchor_pairs, dp_result.z)

    new_anchor_pairs = []
    zf = np.poly1d(dp_result.z)
    for p in dp_result.sized_peaks:
        if (p[0] - zf(p[1]))**2 < 2:
            new_anchor_pairs.append((p[1], p[0]))
    if is_verbosity(4):
        #import pprint; pprint.pprint(dp_result.sized_peaks)
        plot(f.rtimes, f.sizes, dp_result.z,
             [(x[1], x[0]) for x in dp_result.sized_peaks])

    return align_gm(peaks, ladder, anchor_pairs, dp_result.z)