예제 #1
0
 def attemptJump(self, rec):
     # type: (Record) -> bool
     bound = self.findAndFilterResolvedBound(rec, params.l)
     bad_segments = SegmentStorage()
     for al in rec:
         if al.seg_to.left > bound:
             break
         if al.seg_from.left > min(params.bad_end_length, params.k / 2) and \
                 al.rc.seg_from.left > min(params.bad_end_length, params.k / 2):
             bad_segments.add(al.seg_to)
     for al in self.dot_plot.allInter(
             rec.line.segment(rec.resolved.right - params.k, bound)):
         if al.seg_from.left > min(params.bad_end_length, params.k / 2):
             if al.rc.seg_from.left > min(params.bad_end_length,
                                          params.k / 2):
                 bad_segments.add(al.seg_to)
     bad_segments.mergeSegments(params.k - 200)
     sys.stdout.trace("Bad segments:", bad_segments)
     good_segments = bad_segments.reverse(rec.line, params.k - 100).reduce(
         rec.line.segment(rec.resolved.right - params.k, bound))
     for seg in good_segments:
         seg = Segment(seg.contig, max(0, seg.left), seg.right)
         for seg1 in self.segmentsWithGoodCopies(rec.resolved, seg,
                                                 params.k):
             if len(seg1) >= params.k and seg1.right > rec.resolved.right:
                 rec.setResolved(seg1)
                 return True
     return False
예제 #2
0
 def segmentsWithGoodCopies(self, resolved, seg, inter_size):
     # type: (Segment, Segment, int) -> List[Segment]
     als = [
         al for al in self.dot_plot.allInter(seg) if al.seg_from.left > 20
         or al.rc.seg_to.left > 20 or al.isIdentical()
     ]
     segs = SegmentStorage()
     for al in als:
         line = al.seg_from.contig  # type: NewLine
         if len(al.seg_to
                ) >= inter_size and al.seg_from.right > line.initial[
                    0].seg_to.left:
             cap = al.seg_from.cap(
                 line.suffix(pos=line.initial[0].seg_to.left))
             incorrect = line.correct_segments.reverse(
                 line, inter_size - 1).reduce(cap)
             matching = al.matchingSequence()
             sys.stdout.trace("Incorrect: ", line, cap, incorrect)
             for seg1 in incorrect:
                 seg2 = matching.mapSegDown(seg.contig, seg1, mapIn=False)
                 sys.stdout.trace(
                     "Relevant unpolished k-mer segment alignment:", seg1,
                     seg2)
                 segs.add(seg2)
             if al.rc.seg_from.left < 50 and al.seg_to.right >= resolved.right - 100:
                 segs.add(
                     al.seg_to.contig.suffix(
                         pos=al.seg_to.right).expand(inter_size + 100))
                 sys.stdout.trace("Incoming line:", resolved, seg, al)
     segs.mergeSegments(inter_size - 1)
     return list(
         segs.reverse(seg.contig, inter_size - 1 -
                      max(100, inter_size / 10)).reduce(seg))
예제 #3
0
def evaluatePI(dir, contigs_file, initial_file, ref_file):
    basic.ensure_dir_existance(dir)
    CreateLog(dir)
    dd = DirDistributor(os.path.join(dir, "alignments"))
    aligner = Aligner(dd)
    contigs = ContigStorage().loadFromFasta(open(contigs_file, "r"), False)
    initial = ContigStorage().loadFromFasta(open(initial_file, "r"), False)
    ref = ContigStorage().loadFromFasta(open(ref_file, "r"), False)
    segs = []
    for al in aligner.overlapAlign(initial.unique(), contigs):
        if basic.isCanonocal(al.seg_to.contig.id):
            segs.append(al.seg_to)
        else:
            segs.append(al.rc.seg_to)
    segs = sorted(segs, key=lambda seg: basic.Normalize(seg.contig.id))
    interesting = dict()
    print "Interesting segments:"
    for contig in contigs:
        interesting[contig.id] = [contig.asSegment()]
    for contig, segit in itertools.groupby(segs, lambda seg: seg.contig):
        csegs = SegmentStorage().addAll(segit)
        csegs.mergeSegments()
        csegs = csegs.reverse(contig)
        interesting[contig.id] = list(csegs)
        print list(csegs)
    print "Analysis of contigs"
    scorer = Scorer()
    for al in aligner.localAlign(contigs.unique(), ref):
        print al
        for seg in interesting[al.seg_from.contig.id]:
            if al.seg_from.expand(500).contains(
                    seg) or al.seg_from.interSize(seg) > 40000:
                tmp_al = al.reduce(query=al.seg_from.cap(seg))
                scorer.polyshMatching(tmp_al.matchingSequence(),
                                      params.score_counting_radius)
                print tmp_al.seg_from, tmp_al.seg_to, str(events)
    print ""
    print "Analysis of initial"
    for al in aligner.overlapAlign(initial, ref):
        scorer.polyshMatching(al.matchingSequence(),
                              params.score_counting_radius)
        print al.seg_from, al.seg_to, str(events)