예제 #1
0
 def __init__(self, seq, id, extension_handler, rc = None):
     # type: (str, str, ExtensionHandler, Optional[NewLine]) -> None
     self.extensionHandler = extension_handler
     self.seq = seq
     self.id = id # type: str
     self.circular = False
     self.name_printer = None
     self.max_extension = False
     if rc is None:
         self.initial = AlignmentStorage()
         self.correct_segments = SegmentStorage()
         self.completely_resolved = SegmentStorage()
         self.disjointig_alignments = AlignmentStorage()
         self.read_alignments = ReadAlignmentStorage()
         self.listeners = [self.initial, self.correct_segments, self.completely_resolved, self.disjointig_alignments, self.read_alignments, extension_handler] # type: List[LineListener]
         rc = NewLine(basic.RC(seq), basic.Reverse(self.id), extension_handler.rc, self) #type: NewLine
         self.rc = rc
         self.addListener(ReadAlignmentListener(self))
         # self.initial.add(AlignmentPiece.Identical(self.asSegment().asContig().asSegment(), self.asSegment()))
     else:
         self.initial = rc.initial.rc # type: AlignmentStorage
         self.correct_segments = rc.correct_segments.rc # type: SegmentStorage
         self.completely_resolved = rc.completely_resolved.rc # type: SegmentStorage
         self.disjointig_alignments = rc.disjointig_alignments.rc # type: AlignmentStorage
         self.read_alignments = rc.read_alignments.rc # type: ReadAlignmentStorage
         self.listeners = [listener.rc for listener in rc.listeners] # type: List[LineListener]
     Contig.__init__(self, seq, id, rc)
     self.rc = rc #type: NewLine
     self.knot = None # type: Knot
예제 #2
0
 def splitBad(self, line, lines):
     # type: (NewLine, NewLineStorage) -> None
     s = AlignmentStorage()
     s.addAll(al for al in line.read_alignments if not al.contradictingRTC())
     segs = list(s.filterByCoverage(mi=params.reliable_coverage, k=params.k)) # type: List[Segment]
     segs = filter(lambda seg: len(seg) >= params.k, segs)
     if len(segs) == 0:
         sys.stdout.warn("No part of a unique edge is covered by reads", line.id)
         lines.removeLine(line)
         return
     if len(segs) == 1 and len(segs[0]) > len(line) - 10:
         sys.stdout.info("Whole line", line.id, "is covered by reads")
         return
     sys.stdout.info( "Line", line.id, "has poorly covered regions. Splitting into", len(segs), "parts")
     sys.stdout.trace( segs)
     next_left = segs[-1].left
     line.cutRight(segs[-1].right)
     for seg in list(segs)[-2::-1]:
         if next_left < seg.right:
             line, new_line = lines.splitLine(line.segment(next_left, seg.right))
         else:
             line, new_line = lines.splitLine(line.segment(next_left, next_left))
             line.cutRight(seg.right)
         next_left = seg.left
     line.rc.cutRight(len(segs[0]))
예제 #3
0
 def splitBad(self, lines):
     # type: (NewLineStorage) -> None
     all_covs = []
     for line in lines:
         for rec in  line.read_alignments.calculateCoverage(params.k):
             all_covs.append(rec)
     median = self.medianCoverage(all_covs)
     sys.stdout.info("Median coverage determined as", median)
     lids = [line.id for line in lines.unique()]
     for line_id in lids:
         line = lines[line_id]
         s = AlignmentStorage()
         s.addAll(al for al in line.read_alignments if not al.contradictingRTC())
         segs = SegmentStorage().addAll(s.filterByCoverage(mi=params.reliable_coverage, ma=median * 7 /4, k=params.k))
         segs.mergeSegments(max(params.k - params.bad_end_length * 2, params.k / 2))
         if len(segs) == 0:
             sys.stdout.warn("No part of a unique edge is covered by reads", line.id)
             lines.removeLine(line)
             continue
         if len(segs) == 1 and len(segs[0]) > len(line) - 10:
             sys.stdout.info("Whole line", line.id, "is covered by reads")
             continue
         sys.stdout.info( "Line", line.id, "has poorly covered regions. Splitting into", len(segs), "parts")
         sys.stdout.trace(segs)
         next_left = segs[-1].left
         line.cutRight(segs[-1].right)
         for seg in list(segs)[-2::-1]:
             if next_left < seg.right:
                 line, new_line = lines.splitLine(line.segment(next_left, seg.right))
             else:
                 line, new_line = lines.splitLine(line.segment(next_left, next_left))
                 line.cutRight(seg.right)
             next_left = seg.left
         line.rc.cutRight(len(segs[0]))
예제 #4
0
 def FireMergedLines(self, al1, al2):
     # type: (AlignmentPiece, AlignmentPiece) -> None
     sys.stdout.trace("Fire merged lines", al1, al2)
     new_line = al1.seg_to.contig
     line1 = al1.seg_from.contig
     line2 = al2.seg_from.contig
     sys.stdout.trace(list(self.allInter(line1.asSegment())))
     sys.stdout.trace(list(self.allInter(line2.asSegment())))
     self.addLine(new_line)
     self.auto_alignments[line1.id].setState(-1)
     self.auto_alignments[line2.id].setState(-1)
     auto1 = AutoAlignmentStorage(new_line).addAll([
         al1.composeTargetDifference(al.compose(al1))
         for al in self.auto_alignments[line1.id].content
     ])
     auto2 = AutoAlignmentStorage(new_line).addAll([
         al2.composeTargetDifference(al.compose(al2))
         for al in self.auto_alignments[line2.id].content
     ])
     auto3 = AutoAlignmentStorage(new_line).addAll([
         al2.composeTargetDifference(al.compose(al1))
         for al in self.getAlignmentsToFrom(line1, line2)
     ])
     self.auto_alignments[new_line.id].addAll(
         auto1.merge(auto3).merge(auto2).content)
     rc1 = RCAlignmentStorage(new_line).addAll([
         al1.rc.composeTargetDifference(al.compose(al1))
         for al in self.rc_alignments[line1.id]
     ])
     rc2 = RCAlignmentStorage(new_line).addAll([
         al2.rc.composeTargetDifference(al.compose(al2))
         for al in self.rc_alignments[line2.id]
     ])
     rc3 = RCAlignmentStorage(new_line).addAll([
         al2.rc.composeTargetDifference(al.compose(al1))
         for al in self.getAlignmentsToFrom(line1, line2.rc)
     ])
     self.rc_alignments[new_line.id].addAll(rc1.merge(rc3).merge(rc2))
     common = set(self.alignmentsToFrom[line1.id].keys()).intersection(
         set(self.alignmentsToFrom[line2.id].keys()))
     for storage in self.alignmentsToFrom[line1.id].values():
         if storage.line_from.id not in common and storage.line_from != line2 and storage.line_from != line2.rc:
             self.addTwoLineStorage(new_line, storage.line_from).addAll(
                 [al.compose(al1) for al in storage])
     for storage in self.alignmentsToFrom[line2.id].values():
         if storage.line_from.id not in common and storage.line_from != line1 and storage.line_from != line1.rc:
             self.addTwoLineStorage(new_line, storage.line_from).addAll(
                 [al.compose(al2) for al in storage])
     for c in common:
         storage1 = self.alignmentsToFrom[line1.id][c]
         storage2 = self.alignmentsToFrom[line2.id][c]
         als1 = AlignmentStorage().addAll(
             [al.compose(al1) for al in storage1])
         als2 = AlignmentStorage().addAll(
             [al.compose(al2) for al in storage2])
         self.addTwoLineStorage(new_line,
                                storage1.line_from).addAll(als1.merge(als2))
     self.removeLine(al1.seg_from.contig)
     self.removeLine(al2.seg_from.contig)
     sys.stdout.trace(list(self.allInter(new_line.asSegment())))
예제 #5
0
 def __init__(self, line, rc = None):
     # type: (Contig, Optional[RCAlignmentStorage]) -> None
     self.line = line
     if rc is None:
         self.content = AlignmentStorage()
         rc = RCAlignmentStorage(line.rc, self)
         rc.content = self.content.rc
     LineListener.__init__(self, rc)
     self.rc = rc # type: AutoAlignmentStorage
예제 #6
0
 def __init__(self, line, rc = None):
     # type: (Contig, Optional[AutoAlignmentStorage]) -> None
     self.line = line
     if rc is None:
         self.content = AlignmentStorage()
         rc = AutoAlignmentStorage(line.rc, self)
         rc.content = self.content.rc
         rc.state = -1
     LineListener.__init__(self, rc)
     self.rc = rc # type: AutoAlignmentStorage
     self.state = 1 # from precedes to
예제 #7
0
 def __init__(self, seq, id, rc=None):
     # type: (str, str, Optional[Disjointig]) -> None
     self.seq = seq
     self.id = id
     if rc is None:
         self.read_alignments = AlignmentStorage()  # type: AlignmentStorage
         rc = Disjointig(basic.RC(seq), basic.Reverse(id),
                         self)  # type: Disjointig
         self.rc = rc
     else:
         self.rc = rc
         self.read_alignments = self.rc.read_alignments.rc  # type: AlignmentStorage
     Contig.__init__(self, seq, id, rc)
     self.rc = rc  # type:Disjointig
예제 #8
0
 def polyshSegments(self, line, to_polysh):
     # type: (NewLine, Iterable[Segment]) -> List[Segment]
     segs = SegmentStorage()
     corrections = AlignmentStorage()
     line.addListener(segs)
     segs.addAll(to_polysh)
     segs.mergeSegments()
     segs.sort()
     for seg in segs:
         corrections.add(
             self.polisher.polishSegment(
                 seg, list(line.read_alignments.allInter(seg))))
     line.correctSequence(list(corrections))
     line.removeListener(segs)
     return list(segs)
예제 #9
0
class Disjointig(Contig):
    def __init__(self, seq, id, rc=None):
        # type: (str, str, Optional[Disjointig]) -> None
        self.seq = seq
        self.id = id
        if rc is None:
            self.read_alignments = AlignmentStorage()  # type: AlignmentStorage
            rc = Disjointig(basic.RC(seq), basic.Reverse(id),
                            self)  # type: Disjointig
            self.rc = rc
        else:
            self.rc = rc
            self.read_alignments = self.rc.read_alignments.rc  # type: AlignmentStorage
        Contig.__init__(self, seq, id, rc)
        self.rc = rc  # type:Disjointig

    def addAlignments(self, als):
        # type: (Iterable[AlignmentPiece]) -> None
        self.read_alignments.addAll(als)

    def addAlignment(self, al):
        # type: (AlignmentPiece) -> None
        self.read_alignments.add(al)

    def getAlignmentsTo(self, seg):
        # type: (Segment) -> Generator[AlignmentPiece]
        return self.read_alignments.getAlignmentsTo(seg)

    def allInter(self, seg, min_inter=1):
        # type: (Segment, int) -> Generator[AlignmentPiece]
        return self.read_alignments.allInter(seg, min_inter)

    def save(self, handler):
        # type: (TokenWriter) -> None
        handler.writeTokenLine(self.id)
        handler.writeTokenLine(self.seq)
        self.read_alignments.save(handler)

    def loadDisjointig(self, handler, reads):
        # type: (TokenReader, ReadCollection) -> None
        self.id = handler.readToken()
        self.rc.id = basic.Reverse(self.id)
        seq = handler.readToken()
        self.read_alignments.load(handler, reads, self)
예제 #10
0
 def __init__(self, line_from, line_to, rc = None, reverse = None):
     # type: (Contig, Contig, Optional[TwoLineAlignmentStorage], Optional[TwoLineAlignmentStorage]) -> None
     assert line_from.id != line_to.id and line_from.rc.id != line_to.id
     self.line_from = line_from
     self.line_to = line_to
     self.reverse = reverse
     if rc is None:
         self.content = AlignmentStorage()
         self.rc = TwoLineAlignmentStorage(line_from.rc, line_to.rc, self, None)
     else:
         self.rc = rc
         self.content = rc.content.rc # type: AlignmentStorage
     LineListener.__init__(self, self.rc)
     self.rc = self.rc # type: TwoLineAlignmentStorage
     if reverse is None and rc is None:
         reverse = TwoLineAlignmentStorage(line_to, line_from, None, self)
         self.reverse = reverse
         self.reverse.reverse = self
         self.rc.reverse = self.reverse.rc
         self.rc.reverse.reverse = self.rc
예제 #11
0
 def fireAfterExtendRight(self, line, seq, relevant_als = None):
     # type: (NewLine, str, Optional[List[AlignmentPiece]]) -> None
     line = line # type: NewLine
     if relevant_als is not None:
         tmp = line.read_alignments.merge(AlignmentStorage().addAll(relevant_als).targetAsSegment(line.asSegment()))
         line.read_alignments.clean()
         line.read_alignments.addAll(tmp)
     new_seg = line.asSegment().suffix(length = min(len(line), len(seq) + params.k + 100))
     for al in self.aligner.dotplotAlign([new_seg.asContig()], self.disjointigs):
         if len(al) < params.k:
             continue
         al = al.reverse().targetAsSegment(new_seg)
         line.disjointig_alignments.addAndMergeRight(al)
예제 #12
0
class TwoLineAlignmentStorage(LineListener):

    def __init__(self, line_from, line_to, rc = None, reverse = None):
        # type: (Contig, Contig, Optional[TwoLineAlignmentStorage], Optional[TwoLineAlignmentStorage]) -> None
        assert line_from.id != line_to.id and line_from.rc.id != line_to.id
        self.line_from = line_from
        self.line_to = line_to
        self.reverse = reverse
        if rc is None:
            self.content = AlignmentStorage()
            self.rc = TwoLineAlignmentStorage(line_from.rc, line_to.rc, self, None)
        else:
            self.rc = rc
            self.content = rc.content.rc # type: AlignmentStorage
        LineListener.__init__(self, self.rc)
        self.rc = self.rc # type: TwoLineAlignmentStorage
        if reverse is None and rc is None:
            reverse = TwoLineAlignmentStorage(line_to, line_from, None, self)
            self.reverse = reverse
            self.reverse.reverse = self
            self.rc.reverse = self.reverse.rc
            self.rc.reverse.reverse = self.rc

    def add(self, al):
        # type: (AlignmentPiece) -> None
        assert al.seg_from.contig == self.line_from
        assert al.seg_to.contig == self.line_to
        self.content.add(al)
        reverse = al.reverse()
        self.reverse.content.add(reverse)

    def addAll(self, als):
        for al in als:
            self.add(al)
        return self

    def __iter__(self):
        # type: () -> Generator[AlignmentPiece]
        return self.content.__iter__()

    def getAlignmentsTo(self, seg):
        # type: (Segment) -> Generator[AlignmentPiece]
        return self.content.getAlignmentsTo(seg)

    def allInter(self, seg):
        return self.content.allInter(seg)

    def normalizeReverse(self):
        self.reverse.content = self.content.reverse()
        self.reverse.rc.content = self.reverse.content.rc

    def fireBeforeExtendRight(self, line, new_seq, seq):
        # type: (Any, Contig, str) -> None
        self.content.fireBeforeExtendRight(line, new_seq, seq)
        self.normalizeReverse()

    def fireBeforeCutRight(self, line, new_seq, pos):
        # type: (Any, Contig, int) -> None
        self.content.fireBeforeCutRight(line, new_seq, pos)
        self.normalizeReverse()

    # alignments from new sequence to new sequence

    def fireBeforeCorrect(self, alignments):
        # type: (Correction) -> None
        self.content.fireBeforeCorrect(alignments)
        self.normalizeReverse()

    def fireAfterExtendRight(self, line, seq, relevant_als = None):
        # type: (Any, str, Optional[List[AlignmentPiece]]) -> None
        self.content.fireAfterExtendRight(line, seq)
        self.normalizeReverse()

    def fireAfterCutRight(self, line, pos):
        # type: (Any, int) -> None
        self.content.fireAfterCutRight(line, pos)
        self.normalizeReverse()

    def fireAfterCorrect(self, line, alignments):
        # type: (Any, Correction) -> None
        self.content.fireAfterCorrect(line, alignments)
        self.normalizeReverse()

    def addAndMergeRight(self, al):
        self.content.addAndMergeRight(al)
        self.normalizeReverse()

    def merge(self, other):
        # type: (TwoLineAlignmentStorage) -> TwoLineAlignmentStorage
        res = TwoLineAlignmentStorage(self.line_from, self.line_to)
        res.content.addAll(self.content.merge(other.content))
        res.normalizeReverse()

    def save(self, handler):
        # type: (TokenWriter) -> None
        self.content.save(handler)

    def load(self, handler, lines):
        # type: (TokenReader, Any) -> None
        self.content.load(handler, lines, lines)
        self.normalizeReverse()
예제 #13
0
class NewLine(Contig):
    def __init__(self, seq, id, extension_handler, rc = None):
        # type: (str, str, ExtensionHandler, Optional[NewLine]) -> None
        self.extensionHandler = extension_handler
        self.seq = seq
        self.id = id # type: str
        self.circular = False
        self.name_printer = None
        self.max_extension = False
        if rc is None:
            self.initial = AlignmentStorage()
            self.correct_segments = SegmentStorage()
            self.completely_resolved = SegmentStorage()
            self.disjointig_alignments = AlignmentStorage()
            self.read_alignments = ReadAlignmentStorage()
            self.listeners = [self.initial, self.correct_segments, self.completely_resolved, self.disjointig_alignments, self.read_alignments, extension_handler] # type: List[LineListener]
            rc = NewLine(basic.RC(seq), basic.Reverse(self.id), extension_handler.rc, self) #type: NewLine
            self.rc = rc
            self.addListener(ReadAlignmentListener(self))
            # self.initial.add(AlignmentPiece.Identical(self.asSegment().asContig().asSegment(), self.asSegment()))
        else:
            self.initial = rc.initial.rc # type: AlignmentStorage
            self.correct_segments = rc.correct_segments.rc # type: SegmentStorage
            self.completely_resolved = rc.completely_resolved.rc # type: SegmentStorage
            self.disjointig_alignments = rc.disjointig_alignments.rc # type: AlignmentStorage
            self.read_alignments = rc.read_alignments.rc # type: ReadAlignmentStorage
            self.listeners = [listener.rc for listener in rc.listeners] # type: List[LineListener]
        Contig.__init__(self, seq, id, rc)
        self.rc = rc #type: NewLine
        self.knot = None # type: Knot

    def updateCorrectSegments(self, seg, threshold = params.reliable_coverage):
        # type: (Segment, int) -> None
        segs = AlignmentStorage().addAll(self.read_alignments.allInter(seg)).filterByCoverage(mi=threshold)
        self.correct_segments.addAll(segs)
        self.correct_segments.mergeSegments()

    def addReads(self, alignments):
        # type: (Iterable[AlignmentPiece]) -> None
        self.read_alignments.addAll(alignments)
        self.max_extension = False

    def getReadAlignmentsTo(self, seg):
        # type: (Segment) -> Iterable[AlignmentPiece]
        return self.read_alignments.getAlignmentsTo(seg)

    def getPotentialAlignmentsTo(self, seg):
        # type: (Segment) -> Generator[AlignmentPiece]
        result = []
        for alDL in self.disjointig_alignments.getAlignmentsTo(seg):
            reduced = alDL.reduce(target=seg)
            dt = alDL.seg_from.contig # type: Disjointig
            for alRD in dt.getAlignmentsTo(reduced.seg_from):
                result.append(alRD.compose(alDL))
        result = sorted(result, key = lambda al: (al.seg_from.contig.id, -len(al.seg_from)))
        for read, iter in itertools.groupby(result, key = lambda al: al.seg_from.contig):
            readRes = []
            for al in iter:
                found = False
                for al1 in readRes:
                    inter = al.matchingSequence(True).inter(al1.matchingSequence(True))
                    if len(inter.matches) != 0:
                        found = True
                if not found:
                    yield al
                    readRes.append(al)

    def getRelevantAlignmentsFor(self, seg):
        # type: (Segment) -> Generator[AlignmentPiece]
        sys.stdout.trace("Requesting read alignments for", seg)
        result = []
        if params.debug:
            print self.disjointig_alignments
            print list(self.disjointig_alignments.allInter(seg))
        for alDL in self.disjointig_alignments.allInter(seg):
            if len(alDL.seg_to) < params.k:
                continue
            reduced = alDL.reduce(target=seg)
            dt = alDL.seg_from.contig # type: Disjointig
            cnt = 0
            als = filter(lambda al: al.seg_to.interSize(alDL.seg_from) > 8 * params.k / 10, dt.allInter(reduced.seg_from))
            compositions = alDL.massComposeBack(als)
            for al in compositions:
                if len(al.seg_to) >= params.k:
                    result.append(al)
                cnt += 1
        sys.stdout.trace("Request for read alignments for", seg, " collecting finished. Started filtering")
        result = sorted(result, key = lambda al: (al.seg_from.contig.id, -len(al.seg_from)))
        for read, iter in itertools.groupby(result, key = lambda al: al.seg_from.contig): # type: AlignedRead, Generator[AlignmentPiece]
            readRes = []
            for al in iter:
                found = False
                for al1 in readRes:
                    inter = al.matchingSequence(True).inter(al1.matchingSequence(True))
                    if len(inter.matches) != 0:
                        found = True
                        break
                if not found:
                    if params.debug:
                        print al
                    yield al
                    readRes.append(al)
        sys.stdout.trace("Request for read alignments for", seg, "finished")

    def position(self, pos):
        # type: (int) -> LinePosition
        return LinePosition(self, pos)

    def extendRight(self, seq, relevant_als = None):
        # type: (str, List[AlignmentPiece]) -> None
        sys.stdout.trace("Line operation Extend:", self, len(seq), relevant_als)
        assert self.knot is None
        if relevant_als is None:
            relevant_als = []
        new_seq = Contig(self.seq + seq, "TMP2_" + self.id)
        self.notifyBeforeExtendRight(new_seq, seq)
        self.seq = self.seq + seq
        self.rc.seq = basic.RC(seq) + self.rc.seq
        self.notifyAfterExtendRight(seq, relevant_als)
        self.updateCorrectSegments(self.asSegment())
        self.max_extension = True

    def notifyBeforeExtendRight(self, new_seq, seq):
        # type: (Contig, str) -> None
        for listener in self.listeners:
            listener.fireBeforeExtendRight(self, new_seq, seq)

    def notifyAfterExtendRight(self, seq, relevant_als):
        # type: (str, Optional[List[AlignmentPiece]]) -> None
        for listener in self.listeners:
            listener.fireAfterExtendRight(self, seq, relevant_als)

    def cutRight(self, pos):
        sys.stdout.trace("Line operation Cut:", self, pos)
        assert pos > 0 and pos <= len(self)
        cut_length = len(self) - pos
        if cut_length == 0:
            return
        new_seq = Contig(self.seq[:pos], "TMP3_" + self.id)
        self.notifyBeforeCutRight(new_seq, pos)
        self.seq = self.seq[:-cut_length]
        self.rc.seq = self.rc.seq[cut_length:]
        self.notifyAfterCutRight(pos)

    def notifyBeforeCutRight(self, new_seq, pos):
        # type: (Contig, int) -> None
        for listener in self.listeners:
            listener.fireBeforeCutRight(self, new_seq, pos)

    def notifyAfterCutRight(self, pos):
        # type: (int) -> None
        for listener in self.listeners:
            listener.fireAfterCutRight(self, pos)

    def correctSequence(self, alignments):
        # type: (Iterable[AlignmentPiece]) -> None
        sys.stdout.trace("Line operation Correct:", alignments)
        alignments = [al.cutIdenticalEnds() for al in alignments if al.seg_from.Seq() != al.seg_to.Seq()]
        if len(alignments) == 0:
            sys.stdout.trace("Skipping trivial correction operation")
            return
        assert len(alignments) > 0
        correction = Correction.constructCorrection(alignments)
        self.notifyBeforeCorrect(correction)
        old = Contig(self.seq, "old")
        self.seq = correction.seq_from.seq
        self.rc.seq = basic.RC(self.seq)
        correction.changeQT(self, old)
        self.notifyAfterCorrect(correction)

    def notifyBeforeCorrect(self, alignments):
        # type: (Correction) -> None
        for listener in self.listeners:
            listener.fireBeforeCorrect(alignments)

    def notifyAfterCorrect(self, alignments):
        # type: (Correction) -> None
        for listener in self.listeners:
            listener.fireAfterCorrect(self, alignments)

    def addReadAlignment(self, al):
        # type: (AlignmentPiece) -> AlignmentPiece
        self.read_alignments.add(al)
        self.max_extension = False
        return al

    def addListener(self, listener):
        self.listeners.append(listener)
        self.rc.listeners.append(listener.rc)

    def removeListener(self, listener):
        self.listeners.remove(listener)
        self.rc.listeners.remove(listener.rc)

    def save(self, handler):
        # type: (TokenWriter) -> None
        handler.writeTokenLine(self.id)
        handler.writeTokenLine(self.seq)
        self.initial.save(handler)
        self.correct_segments.save(handler)
        self.completely_resolved.save(handler)
        self.disjointig_alignments.save(handler)
        self.read_alignments.save(handler)

    def loadLine(self, handler, disjointigs, reads, contigs):
        # type: (TokenReader, DisjointigCollection, ReadCollection, ContigCollection) -> None
        self.id = handler.readToken()
        self.seq = handler.readToken()
        self.rc.id = basic.Reverse(self.id)
        n = handler.readInt()
        for i in range(n):
            handler.readToken()
            handler.readToken()
            handler.readToken()
            seg = Segment.load(handler, self)
            handler.readToken()
            self.initial.add(AlignmentPiece.Identical(seg.asContig().asSegment(), seg))
            # self.add(AlignmentPiece.load(handler, collection_from, collection_to))
        self.correct_segments.load(handler, self)
        self.completely_resolved.load(handler, self)
        self.disjointig_alignments.load(handler, disjointigs, self)
        self.read_alignments.load(handler, reads, self)
        for al in self.read_alignments:
            read = al.seg_from.contig #type: AlignedRead
            read.addAlignment(al)
        self.max_extension = False

    def __str__(self):
        if self.name_printer is not None:
            return self.name_printer(self)
        points = [self.left()]
        if len(self.initial) == 0:
            points.append("NA")
        else:
            points.append(self.initial[0].seg_to.left)
            points.append(self.initial[-1].seg_to.right)
        points.append(self.right())
        points = map(str, points)
        return "Line:" + str(self.id) + ":" + "[" + ":".join(points) +"]"

    def __repr__(self):
        points = [self.left()]
        points.extend(self.initial)
        points.append(self.right())
        points = map(str, points)
        return "Line:" + str(self.id) + ":" + "[" + ":".join(points) +"]"

    def setCircular(self):
        self.circular = True
        self.rc.circular = True

    def cleanReadAlignments(self):
        for read in self.read_alignments:
            read.seg_from.contig.removeContig(self)
        self.read_alignments.clean()
        self.max_extension = False

    def tie(self, other, gap, gap_seq):
        self.knot = Knot(self, other, gap, gap_seq)
        other.rc.knot = self.knot.rc
        if self == other:
            self.setCircular()

    def unTie(self):
        if self.knot is not None:
            self.knot.line_right.rc.knot = None
            if self.knot is not None:
                self.knot = None
예제 #14
0
class RCAlignmentStorage(LineListener):

    def __init__(self, line, rc = None):
        # type: (Contig, Optional[RCAlignmentStorage]) -> None
        self.line = line
        if rc is None:
            self.content = AlignmentStorage()
            rc = RCAlignmentStorage(line.rc, self)
            rc.content = self.content.rc
        LineListener.__init__(self, rc)
        self.rc = rc # type: AutoAlignmentStorage

    def __iter__(self):
        # type: () -> Generator[AlignmentPiece]
        return self.content.__iter__()

    def getAlignmentsTo(self, seg):
        # type: (Segment) -> Generator[AlignmentPiece]
        return self.content.getAlignmentsTo(seg)

    def allInter(self, seg):
        return self.content.allInter(seg)

    def add(self, alignment):
        self.content.add(alignment)
        self.content.add(alignment.reverse().rc)

    def addAndMergeRight(self, al):
        # type: (AlignmentPiece) -> None
        self.content.addAndMergeRight(al)
        self.content.addAndMergeLeft(al.reverse().rc)

    def addAll(self, als):
        for al in als:
            self.add(al)
        return self

    def fireBeforeExtendRight(self, line, new_seq, seq):
        # type: (Any, Contig, str) -> None
        self.content.fireBeforeExtendRight(line, new_seq, seq)
        self.reverse()
        self.content.fireBeforeExtendRight(line, new_seq, seq)

    def fireBeforeCutRight(self, line, new_seq, pos):
        # type: (Any, Contig, int) -> None
        self.content.fireBeforeCutRight(line, new_seq, pos)
        self.reverse()
        self.content.fireBeforeCutRight(line, new_seq, pos)
    # alignments from new sequence to new sequence

    def fireBeforeCorrect(self, alignments):
        # type: (Correction) -> None
        self.content.fireBeforeCorrect(alignments)
        self.reverse()
        self.content.fireBeforeCorrect(alignments)

    def fireAfterExtendRight(self, line, seq, relevant_als = None):
        # type: (Any, str, Optional[List[AlignmentPiece]]) -> None
        self.content.fireAfterExtendRight(line, seq)
        self.reverse()
        self.content.fireAfterExtendRight(line, seq)

    def fireAfterCutRight(self, line, pos):
        # type: (Any, int) -> None
        self.content.fireAfterCutRight(line, pos)
        self.reverse()
        self.content.fireAfterCutRight(line, pos)

    def fireAfterCorrect(self, line, alignments):
        # type: (Any, Correction) -> None
        self.content.fireAfterCorrect(line, alignments)
        self.reverse()
        self.content.fireAfterCorrect(line, alignments)
    # This is CRAAAZY!!! But correct.

    def reverse(self):
        self.rc.content = self.content.reverse()
        self.content = self.rc.content.rc

    def merge(self, other):
        # type: (RCAlignmentStorage) -> RCAlignmentStorage
        res = RCAlignmentStorage(self.line)
        res.content.addAll(self.content.merge(other.content))
        return res

    def save(self, handler):
        # type: (TokenWriter) -> None
        self.content.save(handler)

    def load(self, handler):
        # type: (TokenReader) -> None
        self.content.load(handler, self.line.rc, self.line)
예제 #15
0
 def updateCorrectSegments(self, seg, threshold = params.reliable_coverage):
     # type: (Segment, int) -> None
     segs = AlignmentStorage().addAll(self.read_alignments.allInter(seg)).filterByCoverage(mi=threshold)
     self.correct_segments.addAll(segs)
     self.correct_segments.mergeSegments()
예제 #16
0
class AutoAlignmentStorage(LineListener):

    def __init__(self, line, rc = None):
        # type: (Contig, Optional[AutoAlignmentStorage]) -> None
        self.line = line
        if rc is None:
            self.content = AlignmentStorage()
            rc = AutoAlignmentStorage(line.rc, self)
            rc.content = self.content.rc
            rc.state = -1
        LineListener.__init__(self, rc)
        self.rc = rc # type: AutoAlignmentStorage
        self.state = 1 # from precedes to

    def makeCanonical(self, al):
        if (self.state == 1) == (al.seg_from.left < al.seg_to.left):
            return al
        else:
            return al.reverse()

    def isCanonical(self, al):
        return (self.state == 1) == (al.seg_from.left < al.seg_to.left)

    def add(self, al):
        # type: (AlignmentPiece) -> None
        if al.isIdentical():
            return
        self.content.add(self.makeCanonical(al))

    def addAll(self, als):
        for al in als:
            self.add(al)
        return self

    def addAndMergeRight(self, al):
        if al.isIdentical():
            return
        if self.isCanonical(al):
            self.content.addAndMergeRight(al)
        else:
            self.content.addAndMergeRight(al.reverse())

    def __iter__(self):
        # type: () -> Generator[AlignmentPiece]
        for al in self.content:
            yield al
        for al in self.content:
            yield al.reverse()
        yield AlignmentPiece.Identical(self.line.asSegment())

    def getAlignmentsTo(self, seg):
        # type: (Segment) -> Generator[AlignmentPiece]
        for al in self:
            if al.seg_to.contains(seg):
                yield al

    def allInter(self, seg):
        for al in self:
            if al.seg_to.inter(seg):
                yield al

    def fireBeforeExtendRight(self, line, new_seq, seq):
        # type: (Any, Contig, str) -> None
        self.content.fireBeforeExtendRight(line, new_seq, seq)
        self.reverse()
        self.content.fireBeforeExtendRight(line, new_seq, seq)
    # alignments from new sequence to new sequence

    def fireBeforeCutRight(self, line, new_seq, pos):
        # type: (Any, Contig, int) -> None
        self.content.fireBeforeCutRight(line, new_seq, pos)
        self.reverse()
        self.content.fireBeforeCutRight(line, new_seq, pos)

    def fireBeforeCorrect(self, alignments):
        # type: (Correction) -> None
        self.content.fireBeforeCorrect(alignments)
        self.reverse()
        self.content.fireBeforeCorrect(alignments)

    def fireAfterExtendRight(self, line, seq, relevant_als = None):
        # type: (Any, str, Optional[List[AlignmentPiece]]) -> None
        self.content.fireAfterExtendRight(line, seq)
        self.reverse()
        self.content.fireAfterExtendRight(line, seq)

    def fireAfterCutRight(self, line, pos):
        # type: (Any, int) -> None
        self.content.fireAfterCutRight(line, pos)
        self.reverse()
        self.content.fireAfterCutRight(line, pos)

    def fireAfterCorrect(self, line, alignments):
        # type: (Any, Correction) -> None
        self.content.fireAfterCorrect(line, alignments)
        self.reverse()
        self.content.fireAfterCorrect(line, alignments)

    def reverse(self):
        self.state = -self.state
        self.rc.state = -self.rc.state
        self.content = self.content.reverse()
        self.rc.content = self.content.rc

    def merge(self, other):
        # type: (AutoAlignmentStorage) -> AutoAlignmentStorage
        if self.state != other.state:
            self.reverse()
        res = AutoAlignmentStorage(self.line)
        res.state = self.state
        res.content.addAll(self.content.merge(other.content))
        return res

    def save(self, handler):
        # type: (TokenWriter) -> None
        self.content.save(handler)

    def load(self, handler):
        # type: (TokenReader) -> None
        self.content.load(handler, self.line, self.line)

    def setState(self, state):
        assert state in [-1, 1]
        if self.state != state:
            self.reverse()
예제 #17
0
    def mergeLines(self, alignment, k):
        # type: (AlignmentPiece, int) -> NewLine
        sys.stdout.trace("Line operation Merge", alignment.seg_from.contig,
                         alignment.seg_to.contig, alignment)
        line1 = alignment.seg_from.contig  #type: NewLine
        line2 = alignment.seg_to.contig  #type: NewLine
        assert line1 != line2
        if len(alignment) < k + 100:
            sys.stdout.trace(
                "Prolonging line to ensure alignment of at least k")
            seg = line2.segment(
                alignment.seg_to.right,
                alignment.seg_to.right + k + 100 - len(alignment))
            line1.extendRight(seg.Seq())
            alignment = alignment.mergeDistant(
                AlignmentPiece.Identical(
                    line1.asSegment().suffix(length=len(seg)), seg))
        # Cutting hanging tips of both lines
        al_storage = AlignmentStorage()
        al_storage.add(alignment)
        storage = TwoLineAlignmentStorage(line1, line2)
        line2.addListener(storage)
        line1.addListener(storage.reverse)
        storage.add(alignment)
        if alignment.seg_from.right < len(line1):
            line1.cutRight(alignment.seg_from.right)
            sys.stdout.trace("Cut right")
            sys.stdout.trace(list(storage.content)[0])
            sys.stdout.trace("\n".join(
                list(storage.content)[0].asMatchingStrings()))
            sys.stdout.trace(list(storage.content)[0].cigar)
        if alignment.seg_to.left > 0:
            line2.rc.cutRight(len(line2) - alignment.seg_to.left)
            sys.stdout.trace("Cut left")
            sys.stdout.trace(list(storage.content)[0])
            sys.stdout.trace("\n".join(
                list(storage.content)[0].asMatchingStrings()))
            sys.stdout.trace(list(storage.content)[0].cigar)
        alignment = list(storage.content)[0]  # type: AlignmentPiece
        line2.removeListener(storage)
        line1.removeListener(storage.reverse)

        # Making sure line sequences match on the overlap
        if alignment.seg_from.left > 0:
            new_seq = Contig(
                line1.asSegment().prefix(pos=alignment.seg_from.left).Seq() +
                line2.seq, "new_seq")
        else:
            new_seq = Contig(line2.seq, "new_seq")
        al2 = AlignmentPiece.Identical(
            line2.asSegment(),
            new_seq.asSegment().suffix(length=len(line2)))
        sys.stdout.trace("Al2:", al2)
        alignment = alignment.compose(al2).reverse()
        sys.stdout.trace("Composed alignment", alignment)
        sys.stdout.trace("\n".join(alignment.asMatchingStrings()))
        sys.stdout.trace(alignment.cigar)
        assert alignment.seg_to.right == len(line1)
        assert alignment.seg_from.left == al2.seg_to.left
        line1.correctSequence([alignment])

        # Now lines have exact match
        name = "(" + ",".join(
            basic.parseLineName(line1.id) +
            basic.parseLineName(line2.id)) + ")"
        line = self.addNew(new_seq.seq, name)
        assert line.seq.startswith(line1.seq)
        assert line.seq.endswith(line2.seq)
        al1 = AlignmentPiece.Identical(
            line1.asSegment(),
            line.asSegment().prefix(length=len(line1)))
        al2 = AlignmentPiece.Identical(
            line2.asSegment(),
            line.asSegment().suffix(length=len(line2)))

        line.initial.addAll(
            line1.initial.targetAsSegment(al1.seg_to).merge(
                line2.initial.targetAsSegment(al2.seg_to)))
        line.correct_segments.addAll(
            line1.correct_segments.contigAsSegment(al1.seg_to).merge(
                line2.correct_segments.contigAsSegment(al2.seg_to)))
        line.completely_resolved.addAll(
            line1.completely_resolved.contigAsSegment(al1.seg_to).merge(
                line2.completely_resolved.contigAsSegment(al2.seg_to), k))
        line.disjointig_alignments.addAll(
            line1.disjointig_alignments.targetAsSegment(al1.seg_to).merge(
                line2.disjointig_alignments.targetAsSegment(al2.seg_to)))
        for al in line1.read_alignments.targetAsSegment(al1.seg_to).merge(
                line2.read_alignments.targetAsSegment(al2.seg_to)):
            line.addReadAlignment(al)
        line1.cleanReadAlignments()
        line2.cleanReadAlignments()

        self.notifyMergedLines(al1, al2)
        knot_right = line2.knot
        knot_left = line1.rc.knot
        self.remove(line1)
        self.remove(line2)
        if knot_right is not None:
            if knot_right.line_right == line1:
                line.tie(line, knot_right.gap, knot_right.gap_seq)
            else:
                line.tie(knot_right.line_right, knot_right.gap,
                         knot_right.gap_seq)
        if knot_left is not None and knot_left.line_right != line2.rc:
            line.rc.tie(knot_left.line_right, knot_left.gap, knot_left.gap_seq)
        return line
예제 #18
0
 def testManual(self):
     contig1 = Contig("ACGTACGTACGT", "from")
     contig2 = Contig("ACGTACGTACGT", "to")
     al1 = AlignmentPiece.Identical(contig1.segment(0, 4),
                                    contig2.segment(0, 4))
     al2 = AlignmentPiece.Identical(contig1.segment(0, 4),
                                    contig2.segment(4, 8))
     al3 = AlignmentPiece.Identical(contig1.segment(4, 8),
                                    contig2.segment(8, 12))
     storage = AlignmentStorage()
     storage.addAll([al1, al2, al3])
     assert str(
         list(storage)
     ) == "[(from[0:4]->to[0:4]:1.000), (from[0:4]->to[4:12-4]:1.000), (from[4:12-4]->to[8:12-0]:1.000)]"
     assert str(
         list(storage.rc)
     ) == "[(-from[4:12-4]->-to[0:4]:1.000), (-from[8:12-0]->-to[4:12-4]:1.000), (-from[8:12-0]->-to[8:12-0]:1.000)]"
     assert str(list(storage.calculateCoverage())) == "[(to[0:12-0], 1)]"
     assert str(list(storage.filterByCoverage(0, 1))) == "[]"
     assert str(list(storage.filterByCoverage(1, 2))) == "[to[0:12-0]]"
     assert str(list(storage.filterByCoverage(2))) == "[]"
     storage.addAndMergeRight(al3)
     assert str(
         list(storage)
     ) == "[(from[0:4]->to[0:4]:1.000), (from[0:4]->to[4:12-4]:1.000), (from[4:12-4]->to[8:12-0]:1.000)]"
     al4 = AlignmentPiece.Identical(contig1.segment(2, 8),
                                    contig2.segment(2, 8))
     al5 = AlignmentPiece.Identical(contig1.segment(4, 10),
                                    contig2.segment(4, 10))
     storage.addAll([al4, al5])
     assert str(
         list(storage.calculateCoverage())
     ) == "[(to[0:2], 1), (to[2:4], 2), (to[4:12-4], 3), (to[8:12-2], 2), (to[10:12-0], 1)]"
     assert str(list(storage.filterByCoverage(
         2, 3))) == "[to[2:4], to[8:12-2]]"
     assert str(list(storage.filterByCoverage(2))) == "[to[2:12-2]]"
     assert str(
         list(storage.getAlignmentsTo(contig2.segment(2, 3)))
     ) == "[(from[0:4]->to[0:4]:1.000), (from[2:12-4]->to[2:12-4]:1.000)]"
     assert str(list(storage.getAlignmentsTo(contig2.segment(
         2, 6)))) == "[(from[2:12-4]->to[2:12-4]:1.000)]"