예제 #1
0
 def splitBad(self, line, lines):
     # type: (NewLine, NewLineStorage) -> None
     s = AlignmentStorage()
     s.addAll(al for al in line.read_alignments if not al.contradictingRTC())
     segs = list(s.filterByCoverage(mi=params.reliable_coverage, k=params.k)) # type: List[Segment]
     segs = filter(lambda seg: len(seg) >= params.k, segs)
     if len(segs) == 0:
         sys.stdout.warn("No part of a unique edge is covered by reads", line.id)
         lines.removeLine(line)
         return
     if len(segs) == 1 and len(segs[0]) > len(line) - 10:
         sys.stdout.info("Whole line", line.id, "is covered by reads")
         return
     sys.stdout.info( "Line", line.id, "has poorly covered regions. Splitting into", len(segs), "parts")
     sys.stdout.trace( segs)
     next_left = segs[-1].left
     line.cutRight(segs[-1].right)
     for seg in list(segs)[-2::-1]:
         if next_left < seg.right:
             line, new_line = lines.splitLine(line.segment(next_left, seg.right))
         else:
             line, new_line = lines.splitLine(line.segment(next_left, next_left))
             line.cutRight(seg.right)
         next_left = seg.left
     line.rc.cutRight(len(segs[0]))
예제 #2
0
 def splitBad(self, lines):
     # type: (NewLineStorage) -> None
     all_covs = []
     for line in lines:
         for rec in  line.read_alignments.calculateCoverage(params.k):
             all_covs.append(rec)
     median = self.medianCoverage(all_covs)
     sys.stdout.info("Median coverage determined as", median)
     lids = [line.id for line in lines.unique()]
     for line_id in lids:
         line = lines[line_id]
         s = AlignmentStorage()
         s.addAll(al for al in line.read_alignments if not al.contradictingRTC())
         segs = SegmentStorage().addAll(s.filterByCoverage(mi=params.reliable_coverage, ma=median * 7 /4, k=params.k))
         segs.mergeSegments(max(params.k - params.bad_end_length * 2, params.k / 2))
         if len(segs) == 0:
             sys.stdout.warn("No part of a unique edge is covered by reads", line.id)
             lines.removeLine(line)
             continue
         if len(segs) == 1 and len(segs[0]) > len(line) - 10:
             sys.stdout.info("Whole line", line.id, "is covered by reads")
             continue
         sys.stdout.info( "Line", line.id, "has poorly covered regions. Splitting into", len(segs), "parts")
         sys.stdout.trace(segs)
         next_left = segs[-1].left
         line.cutRight(segs[-1].right)
         for seg in list(segs)[-2::-1]:
             if next_left < seg.right:
                 line, new_line = lines.splitLine(line.segment(next_left, seg.right))
             else:
                 line, new_line = lines.splitLine(line.segment(next_left, next_left))
                 line.cutRight(seg.right)
             next_left = seg.left
         line.rc.cutRight(len(segs[0]))
예제 #3
0
class Disjointig(Contig):
    def __init__(self, seq, id, rc=None):
        # type: (str, str, Optional[Disjointig]) -> None
        self.seq = seq
        self.id = id
        if rc is None:
            self.read_alignments = AlignmentStorage()  # type: AlignmentStorage
            rc = Disjointig(basic.RC(seq), basic.Reverse(id),
                            self)  # type: Disjointig
            self.rc = rc
        else:
            self.rc = rc
            self.read_alignments = self.rc.read_alignments.rc  # type: AlignmentStorage
        Contig.__init__(self, seq, id, rc)
        self.rc = rc  # type:Disjointig

    def addAlignments(self, als):
        # type: (Iterable[AlignmentPiece]) -> None
        self.read_alignments.addAll(als)

    def addAlignment(self, al):
        # type: (AlignmentPiece) -> None
        self.read_alignments.add(al)

    def getAlignmentsTo(self, seg):
        # type: (Segment) -> Generator[AlignmentPiece]
        return self.read_alignments.getAlignmentsTo(seg)

    def allInter(self, seg, min_inter=1):
        # type: (Segment, int) -> Generator[AlignmentPiece]
        return self.read_alignments.allInter(seg, min_inter)

    def save(self, handler):
        # type: (TokenWriter) -> None
        handler.writeTokenLine(self.id)
        handler.writeTokenLine(self.seq)
        self.read_alignments.save(handler)

    def loadDisjointig(self, handler, reads):
        # type: (TokenReader, ReadCollection) -> None
        self.id = handler.readToken()
        self.rc.id = basic.Reverse(self.id)
        seq = handler.readToken()
        self.read_alignments.load(handler, reads, self)
예제 #4
0
 def testManual(self):
     contig1 = Contig("ACGTACGTACGT", "from")
     contig2 = Contig("ACGTACGTACGT", "to")
     al1 = AlignmentPiece.Identical(contig1.segment(0, 4),
                                    contig2.segment(0, 4))
     al2 = AlignmentPiece.Identical(contig1.segment(0, 4),
                                    contig2.segment(4, 8))
     al3 = AlignmentPiece.Identical(contig1.segment(4, 8),
                                    contig2.segment(8, 12))
     storage = AlignmentStorage()
     storage.addAll([al1, al2, al3])
     assert str(
         list(storage)
     ) == "[(from[0:4]->to[0:4]:1.000), (from[0:4]->to[4:12-4]:1.000), (from[4:12-4]->to[8:12-0]:1.000)]"
     assert str(
         list(storage.rc)
     ) == "[(-from[4:12-4]->-to[0:4]:1.000), (-from[8:12-0]->-to[4:12-4]:1.000), (-from[8:12-0]->-to[8:12-0]:1.000)]"
     assert str(list(storage.calculateCoverage())) == "[(to[0:12-0], 1)]"
     assert str(list(storage.filterByCoverage(0, 1))) == "[]"
     assert str(list(storage.filterByCoverage(1, 2))) == "[to[0:12-0]]"
     assert str(list(storage.filterByCoverage(2))) == "[]"
     storage.addAndMergeRight(al3)
     assert str(
         list(storage)
     ) == "[(from[0:4]->to[0:4]:1.000), (from[0:4]->to[4:12-4]:1.000), (from[4:12-4]->to[8:12-0]:1.000)]"
     al4 = AlignmentPiece.Identical(contig1.segment(2, 8),
                                    contig2.segment(2, 8))
     al5 = AlignmentPiece.Identical(contig1.segment(4, 10),
                                    contig2.segment(4, 10))
     storage.addAll([al4, al5])
     assert str(
         list(storage.calculateCoverage())
     ) == "[(to[0:2], 1), (to[2:4], 2), (to[4:12-4], 3), (to[8:12-2], 2), (to[10:12-0], 1)]"
     assert str(list(storage.filterByCoverage(
         2, 3))) == "[to[2:4], to[8:12-2]]"
     assert str(list(storage.filterByCoverage(2))) == "[to[2:12-2]]"
     assert str(
         list(storage.getAlignmentsTo(contig2.segment(2, 3)))
     ) == "[(from[0:4]->to[0:4]:1.000), (from[2:12-4]->to[2:12-4]:1.000)]"
     assert str(list(storage.getAlignmentsTo(contig2.segment(
         2, 6)))) == "[(from[2:12-4]->to[2:12-4]:1.000)]"