def __init__(self, seq, id, extension_handler, rc = None): # type: (str, str, ExtensionHandler, Optional[NewLine]) -> None self.extensionHandler = extension_handler self.seq = seq self.id = id # type: str self.circular = False self.name_printer = None self.max_extension = False if rc is None: self.initial = AlignmentStorage() self.correct_segments = SegmentStorage() self.completely_resolved = SegmentStorage() self.disjointig_alignments = AlignmentStorage() self.read_alignments = ReadAlignmentStorage() self.listeners = [self.initial, self.correct_segments, self.completely_resolved, self.disjointig_alignments, self.read_alignments, extension_handler] # type: List[LineListener] rc = NewLine(basic.RC(seq), basic.Reverse(self.id), extension_handler.rc, self) #type: NewLine self.rc = rc self.addListener(ReadAlignmentListener(self)) # self.initial.add(AlignmentPiece.Identical(self.asSegment().asContig().asSegment(), self.asSegment())) else: self.initial = rc.initial.rc # type: AlignmentStorage self.correct_segments = rc.correct_segments.rc # type: SegmentStorage self.completely_resolved = rc.completely_resolved.rc # type: SegmentStorage self.disjointig_alignments = rc.disjointig_alignments.rc # type: AlignmentStorage self.read_alignments = rc.read_alignments.rc # type: ReadAlignmentStorage self.listeners = [listener.rc for listener in rc.listeners] # type: List[LineListener] Contig.__init__(self, seq, id, rc) self.rc = rc #type: NewLine self.knot = None # type: Knot
def splitBad(self, line, lines): # type: (NewLine, NewLineStorage) -> None s = AlignmentStorage() s.addAll(al for al in line.read_alignments if not al.contradictingRTC()) segs = list(s.filterByCoverage(mi=params.reliable_coverage, k=params.k)) # type: List[Segment] segs = filter(lambda seg: len(seg) >= params.k, segs) if len(segs) == 0: sys.stdout.warn("No part of a unique edge is covered by reads", line.id) lines.removeLine(line) return if len(segs) == 1 and len(segs[0]) > len(line) - 10: sys.stdout.info("Whole line", line.id, "is covered by reads") return sys.stdout.info( "Line", line.id, "has poorly covered regions. Splitting into", len(segs), "parts") sys.stdout.trace( segs) next_left = segs[-1].left line.cutRight(segs[-1].right) for seg in list(segs)[-2::-1]: if next_left < seg.right: line, new_line = lines.splitLine(line.segment(next_left, seg.right)) else: line, new_line = lines.splitLine(line.segment(next_left, next_left)) line.cutRight(seg.right) next_left = seg.left line.rc.cutRight(len(segs[0]))
def splitBad(self, lines): # type: (NewLineStorage) -> None all_covs = [] for line in lines: for rec in line.read_alignments.calculateCoverage(params.k): all_covs.append(rec) median = self.medianCoverage(all_covs) sys.stdout.info("Median coverage determined as", median) lids = [line.id for line in lines.unique()] for line_id in lids: line = lines[line_id] s = AlignmentStorage() s.addAll(al for al in line.read_alignments if not al.contradictingRTC()) segs = SegmentStorage().addAll(s.filterByCoverage(mi=params.reliable_coverage, ma=median * 7 /4, k=params.k)) segs.mergeSegments(max(params.k - params.bad_end_length * 2, params.k / 2)) if len(segs) == 0: sys.stdout.warn("No part of a unique edge is covered by reads", line.id) lines.removeLine(line) continue if len(segs) == 1 and len(segs[0]) > len(line) - 10: sys.stdout.info("Whole line", line.id, "is covered by reads") continue sys.stdout.info( "Line", line.id, "has poorly covered regions. Splitting into", len(segs), "parts") sys.stdout.trace(segs) next_left = segs[-1].left line.cutRight(segs[-1].right) for seg in list(segs)[-2::-1]: if next_left < seg.right: line, new_line = lines.splitLine(line.segment(next_left, seg.right)) else: line, new_line = lines.splitLine(line.segment(next_left, next_left)) line.cutRight(seg.right) next_left = seg.left line.rc.cutRight(len(segs[0]))
def FireMergedLines(self, al1, al2): # type: (AlignmentPiece, AlignmentPiece) -> None sys.stdout.trace("Fire merged lines", al1, al2) new_line = al1.seg_to.contig line1 = al1.seg_from.contig line2 = al2.seg_from.contig sys.stdout.trace(list(self.allInter(line1.asSegment()))) sys.stdout.trace(list(self.allInter(line2.asSegment()))) self.addLine(new_line) self.auto_alignments[line1.id].setState(-1) self.auto_alignments[line2.id].setState(-1) auto1 = AutoAlignmentStorage(new_line).addAll([ al1.composeTargetDifference(al.compose(al1)) for al in self.auto_alignments[line1.id].content ]) auto2 = AutoAlignmentStorage(new_line).addAll([ al2.composeTargetDifference(al.compose(al2)) for al in self.auto_alignments[line2.id].content ]) auto3 = AutoAlignmentStorage(new_line).addAll([ al2.composeTargetDifference(al.compose(al1)) for al in self.getAlignmentsToFrom(line1, line2) ]) self.auto_alignments[new_line.id].addAll( auto1.merge(auto3).merge(auto2).content) rc1 = RCAlignmentStorage(new_line).addAll([ al1.rc.composeTargetDifference(al.compose(al1)) for al in self.rc_alignments[line1.id] ]) rc2 = RCAlignmentStorage(new_line).addAll([ al2.rc.composeTargetDifference(al.compose(al2)) for al in self.rc_alignments[line2.id] ]) rc3 = RCAlignmentStorage(new_line).addAll([ al2.rc.composeTargetDifference(al.compose(al1)) for al in self.getAlignmentsToFrom(line1, line2.rc) ]) self.rc_alignments[new_line.id].addAll(rc1.merge(rc3).merge(rc2)) common = set(self.alignmentsToFrom[line1.id].keys()).intersection( set(self.alignmentsToFrom[line2.id].keys())) for storage in self.alignmentsToFrom[line1.id].values(): if storage.line_from.id not in common and storage.line_from != line2 and storage.line_from != line2.rc: self.addTwoLineStorage(new_line, storage.line_from).addAll( [al.compose(al1) for al in storage]) for storage in self.alignmentsToFrom[line2.id].values(): if storage.line_from.id not in common and storage.line_from != line1 and storage.line_from != line1.rc: self.addTwoLineStorage(new_line, storage.line_from).addAll( [al.compose(al2) for al in storage]) for c in common: storage1 = self.alignmentsToFrom[line1.id][c] storage2 = self.alignmentsToFrom[line2.id][c] als1 = AlignmentStorage().addAll( [al.compose(al1) for al in storage1]) als2 = AlignmentStorage().addAll( [al.compose(al2) for al in storage2]) self.addTwoLineStorage(new_line, storage1.line_from).addAll(als1.merge(als2)) self.removeLine(al1.seg_from.contig) self.removeLine(al2.seg_from.contig) sys.stdout.trace(list(self.allInter(new_line.asSegment())))
def __init__(self, line, rc = None): # type: (Contig, Optional[RCAlignmentStorage]) -> None self.line = line if rc is None: self.content = AlignmentStorage() rc = RCAlignmentStorage(line.rc, self) rc.content = self.content.rc LineListener.__init__(self, rc) self.rc = rc # type: AutoAlignmentStorage
def __init__(self, line, rc = None): # type: (Contig, Optional[AutoAlignmentStorage]) -> None self.line = line if rc is None: self.content = AlignmentStorage() rc = AutoAlignmentStorage(line.rc, self) rc.content = self.content.rc rc.state = -1 LineListener.__init__(self, rc) self.rc = rc # type: AutoAlignmentStorage self.state = 1 # from precedes to
def __init__(self, seq, id, rc=None): # type: (str, str, Optional[Disjointig]) -> None self.seq = seq self.id = id if rc is None: self.read_alignments = AlignmentStorage() # type: AlignmentStorage rc = Disjointig(basic.RC(seq), basic.Reverse(id), self) # type: Disjointig self.rc = rc else: self.rc = rc self.read_alignments = self.rc.read_alignments.rc # type: AlignmentStorage Contig.__init__(self, seq, id, rc) self.rc = rc # type:Disjointig
def polyshSegments(self, line, to_polysh): # type: (NewLine, Iterable[Segment]) -> List[Segment] segs = SegmentStorage() corrections = AlignmentStorage() line.addListener(segs) segs.addAll(to_polysh) segs.mergeSegments() segs.sort() for seg in segs: corrections.add( self.polisher.polishSegment( seg, list(line.read_alignments.allInter(seg)))) line.correctSequence(list(corrections)) line.removeListener(segs) return list(segs)
class Disjointig(Contig): def __init__(self, seq, id, rc=None): # type: (str, str, Optional[Disjointig]) -> None self.seq = seq self.id = id if rc is None: self.read_alignments = AlignmentStorage() # type: AlignmentStorage rc = Disjointig(basic.RC(seq), basic.Reverse(id), self) # type: Disjointig self.rc = rc else: self.rc = rc self.read_alignments = self.rc.read_alignments.rc # type: AlignmentStorage Contig.__init__(self, seq, id, rc) self.rc = rc # type:Disjointig def addAlignments(self, als): # type: (Iterable[AlignmentPiece]) -> None self.read_alignments.addAll(als) def addAlignment(self, al): # type: (AlignmentPiece) -> None self.read_alignments.add(al) def getAlignmentsTo(self, seg): # type: (Segment) -> Generator[AlignmentPiece] return self.read_alignments.getAlignmentsTo(seg) def allInter(self, seg, min_inter=1): # type: (Segment, int) -> Generator[AlignmentPiece] return self.read_alignments.allInter(seg, min_inter) def save(self, handler): # type: (TokenWriter) -> None handler.writeTokenLine(self.id) handler.writeTokenLine(self.seq) self.read_alignments.save(handler) def loadDisjointig(self, handler, reads): # type: (TokenReader, ReadCollection) -> None self.id = handler.readToken() self.rc.id = basic.Reverse(self.id) seq = handler.readToken() self.read_alignments.load(handler, reads, self)
def __init__(self, line_from, line_to, rc = None, reverse = None): # type: (Contig, Contig, Optional[TwoLineAlignmentStorage], Optional[TwoLineAlignmentStorage]) -> None assert line_from.id != line_to.id and line_from.rc.id != line_to.id self.line_from = line_from self.line_to = line_to self.reverse = reverse if rc is None: self.content = AlignmentStorage() self.rc = TwoLineAlignmentStorage(line_from.rc, line_to.rc, self, None) else: self.rc = rc self.content = rc.content.rc # type: AlignmentStorage LineListener.__init__(self, self.rc) self.rc = self.rc # type: TwoLineAlignmentStorage if reverse is None and rc is None: reverse = TwoLineAlignmentStorage(line_to, line_from, None, self) self.reverse = reverse self.reverse.reverse = self self.rc.reverse = self.reverse.rc self.rc.reverse.reverse = self.rc
def fireAfterExtendRight(self, line, seq, relevant_als = None): # type: (NewLine, str, Optional[List[AlignmentPiece]]) -> None line = line # type: NewLine if relevant_als is not None: tmp = line.read_alignments.merge(AlignmentStorage().addAll(relevant_als).targetAsSegment(line.asSegment())) line.read_alignments.clean() line.read_alignments.addAll(tmp) new_seg = line.asSegment().suffix(length = min(len(line), len(seq) + params.k + 100)) for al in self.aligner.dotplotAlign([new_seg.asContig()], self.disjointigs): if len(al) < params.k: continue al = al.reverse().targetAsSegment(new_seg) line.disjointig_alignments.addAndMergeRight(al)
class TwoLineAlignmentStorage(LineListener): def __init__(self, line_from, line_to, rc = None, reverse = None): # type: (Contig, Contig, Optional[TwoLineAlignmentStorage], Optional[TwoLineAlignmentStorage]) -> None assert line_from.id != line_to.id and line_from.rc.id != line_to.id self.line_from = line_from self.line_to = line_to self.reverse = reverse if rc is None: self.content = AlignmentStorage() self.rc = TwoLineAlignmentStorage(line_from.rc, line_to.rc, self, None) else: self.rc = rc self.content = rc.content.rc # type: AlignmentStorage LineListener.__init__(self, self.rc) self.rc = self.rc # type: TwoLineAlignmentStorage if reverse is None and rc is None: reverse = TwoLineAlignmentStorage(line_to, line_from, None, self) self.reverse = reverse self.reverse.reverse = self self.rc.reverse = self.reverse.rc self.rc.reverse.reverse = self.rc def add(self, al): # type: (AlignmentPiece) -> None assert al.seg_from.contig == self.line_from assert al.seg_to.contig == self.line_to self.content.add(al) reverse = al.reverse() self.reverse.content.add(reverse) def addAll(self, als): for al in als: self.add(al) return self def __iter__(self): # type: () -> Generator[AlignmentPiece] return self.content.__iter__() def getAlignmentsTo(self, seg): # type: (Segment) -> Generator[AlignmentPiece] return self.content.getAlignmentsTo(seg) def allInter(self, seg): return self.content.allInter(seg) def normalizeReverse(self): self.reverse.content = self.content.reverse() self.reverse.rc.content = self.reverse.content.rc def fireBeforeExtendRight(self, line, new_seq, seq): # type: (Any, Contig, str) -> None self.content.fireBeforeExtendRight(line, new_seq, seq) self.normalizeReverse() def fireBeforeCutRight(self, line, new_seq, pos): # type: (Any, Contig, int) -> None self.content.fireBeforeCutRight(line, new_seq, pos) self.normalizeReverse() # alignments from new sequence to new sequence def fireBeforeCorrect(self, alignments): # type: (Correction) -> None self.content.fireBeforeCorrect(alignments) self.normalizeReverse() def fireAfterExtendRight(self, line, seq, relevant_als = None): # type: (Any, str, Optional[List[AlignmentPiece]]) -> None self.content.fireAfterExtendRight(line, seq) self.normalizeReverse() def fireAfterCutRight(self, line, pos): # type: (Any, int) -> None self.content.fireAfterCutRight(line, pos) self.normalizeReverse() def fireAfterCorrect(self, line, alignments): # type: (Any, Correction) -> None self.content.fireAfterCorrect(line, alignments) self.normalizeReverse() def addAndMergeRight(self, al): self.content.addAndMergeRight(al) self.normalizeReverse() def merge(self, other): # type: (TwoLineAlignmentStorage) -> TwoLineAlignmentStorage res = TwoLineAlignmentStorage(self.line_from, self.line_to) res.content.addAll(self.content.merge(other.content)) res.normalizeReverse() def save(self, handler): # type: (TokenWriter) -> None self.content.save(handler) def load(self, handler, lines): # type: (TokenReader, Any) -> None self.content.load(handler, lines, lines) self.normalizeReverse()
class NewLine(Contig): def __init__(self, seq, id, extension_handler, rc = None): # type: (str, str, ExtensionHandler, Optional[NewLine]) -> None self.extensionHandler = extension_handler self.seq = seq self.id = id # type: str self.circular = False self.name_printer = None self.max_extension = False if rc is None: self.initial = AlignmentStorage() self.correct_segments = SegmentStorage() self.completely_resolved = SegmentStorage() self.disjointig_alignments = AlignmentStorage() self.read_alignments = ReadAlignmentStorage() self.listeners = [self.initial, self.correct_segments, self.completely_resolved, self.disjointig_alignments, self.read_alignments, extension_handler] # type: List[LineListener] rc = NewLine(basic.RC(seq), basic.Reverse(self.id), extension_handler.rc, self) #type: NewLine self.rc = rc self.addListener(ReadAlignmentListener(self)) # self.initial.add(AlignmentPiece.Identical(self.asSegment().asContig().asSegment(), self.asSegment())) else: self.initial = rc.initial.rc # type: AlignmentStorage self.correct_segments = rc.correct_segments.rc # type: SegmentStorage self.completely_resolved = rc.completely_resolved.rc # type: SegmentStorage self.disjointig_alignments = rc.disjointig_alignments.rc # type: AlignmentStorage self.read_alignments = rc.read_alignments.rc # type: ReadAlignmentStorage self.listeners = [listener.rc for listener in rc.listeners] # type: List[LineListener] Contig.__init__(self, seq, id, rc) self.rc = rc #type: NewLine self.knot = None # type: Knot def updateCorrectSegments(self, seg, threshold = params.reliable_coverage): # type: (Segment, int) -> None segs = AlignmentStorage().addAll(self.read_alignments.allInter(seg)).filterByCoverage(mi=threshold) self.correct_segments.addAll(segs) self.correct_segments.mergeSegments() def addReads(self, alignments): # type: (Iterable[AlignmentPiece]) -> None self.read_alignments.addAll(alignments) self.max_extension = False def getReadAlignmentsTo(self, seg): # type: (Segment) -> Iterable[AlignmentPiece] return self.read_alignments.getAlignmentsTo(seg) def getPotentialAlignmentsTo(self, seg): # type: (Segment) -> Generator[AlignmentPiece] result = [] for alDL in self.disjointig_alignments.getAlignmentsTo(seg): reduced = alDL.reduce(target=seg) dt = alDL.seg_from.contig # type: Disjointig for alRD in dt.getAlignmentsTo(reduced.seg_from): result.append(alRD.compose(alDL)) result = sorted(result, key = lambda al: (al.seg_from.contig.id, -len(al.seg_from))) for read, iter in itertools.groupby(result, key = lambda al: al.seg_from.contig): readRes = [] for al in iter: found = False for al1 in readRes: inter = al.matchingSequence(True).inter(al1.matchingSequence(True)) if len(inter.matches) != 0: found = True if not found: yield al readRes.append(al) def getRelevantAlignmentsFor(self, seg): # type: (Segment) -> Generator[AlignmentPiece] sys.stdout.trace("Requesting read alignments for", seg) result = [] if params.debug: print self.disjointig_alignments print list(self.disjointig_alignments.allInter(seg)) for alDL in self.disjointig_alignments.allInter(seg): if len(alDL.seg_to) < params.k: continue reduced = alDL.reduce(target=seg) dt = alDL.seg_from.contig # type: Disjointig cnt = 0 als = filter(lambda al: al.seg_to.interSize(alDL.seg_from) > 8 * params.k / 10, dt.allInter(reduced.seg_from)) compositions = alDL.massComposeBack(als) for al in compositions: if len(al.seg_to) >= params.k: result.append(al) cnt += 1 sys.stdout.trace("Request for read alignments for", seg, " collecting finished. Started filtering") result = sorted(result, key = lambda al: (al.seg_from.contig.id, -len(al.seg_from))) for read, iter in itertools.groupby(result, key = lambda al: al.seg_from.contig): # type: AlignedRead, Generator[AlignmentPiece] readRes = [] for al in iter: found = False for al1 in readRes: inter = al.matchingSequence(True).inter(al1.matchingSequence(True)) if len(inter.matches) != 0: found = True break if not found: if params.debug: print al yield al readRes.append(al) sys.stdout.trace("Request for read alignments for", seg, "finished") def position(self, pos): # type: (int) -> LinePosition return LinePosition(self, pos) def extendRight(self, seq, relevant_als = None): # type: (str, List[AlignmentPiece]) -> None sys.stdout.trace("Line operation Extend:", self, len(seq), relevant_als) assert self.knot is None if relevant_als is None: relevant_als = [] new_seq = Contig(self.seq + seq, "TMP2_" + self.id) self.notifyBeforeExtendRight(new_seq, seq) self.seq = self.seq + seq self.rc.seq = basic.RC(seq) + self.rc.seq self.notifyAfterExtendRight(seq, relevant_als) self.updateCorrectSegments(self.asSegment()) self.max_extension = True def notifyBeforeExtendRight(self, new_seq, seq): # type: (Contig, str) -> None for listener in self.listeners: listener.fireBeforeExtendRight(self, new_seq, seq) def notifyAfterExtendRight(self, seq, relevant_als): # type: (str, Optional[List[AlignmentPiece]]) -> None for listener in self.listeners: listener.fireAfterExtendRight(self, seq, relevant_als) def cutRight(self, pos): sys.stdout.trace("Line operation Cut:", self, pos) assert pos > 0 and pos <= len(self) cut_length = len(self) - pos if cut_length == 0: return new_seq = Contig(self.seq[:pos], "TMP3_" + self.id) self.notifyBeforeCutRight(new_seq, pos) self.seq = self.seq[:-cut_length] self.rc.seq = self.rc.seq[cut_length:] self.notifyAfterCutRight(pos) def notifyBeforeCutRight(self, new_seq, pos): # type: (Contig, int) -> None for listener in self.listeners: listener.fireBeforeCutRight(self, new_seq, pos) def notifyAfterCutRight(self, pos): # type: (int) -> None for listener in self.listeners: listener.fireAfterCutRight(self, pos) def correctSequence(self, alignments): # type: (Iterable[AlignmentPiece]) -> None sys.stdout.trace("Line operation Correct:", alignments) alignments = [al.cutIdenticalEnds() for al in alignments if al.seg_from.Seq() != al.seg_to.Seq()] if len(alignments) == 0: sys.stdout.trace("Skipping trivial correction operation") return assert len(alignments) > 0 correction = Correction.constructCorrection(alignments) self.notifyBeforeCorrect(correction) old = Contig(self.seq, "old") self.seq = correction.seq_from.seq self.rc.seq = basic.RC(self.seq) correction.changeQT(self, old) self.notifyAfterCorrect(correction) def notifyBeforeCorrect(self, alignments): # type: (Correction) -> None for listener in self.listeners: listener.fireBeforeCorrect(alignments) def notifyAfterCorrect(self, alignments): # type: (Correction) -> None for listener in self.listeners: listener.fireAfterCorrect(self, alignments) def addReadAlignment(self, al): # type: (AlignmentPiece) -> AlignmentPiece self.read_alignments.add(al) self.max_extension = False return al def addListener(self, listener): self.listeners.append(listener) self.rc.listeners.append(listener.rc) def removeListener(self, listener): self.listeners.remove(listener) self.rc.listeners.remove(listener.rc) def save(self, handler): # type: (TokenWriter) -> None handler.writeTokenLine(self.id) handler.writeTokenLine(self.seq) self.initial.save(handler) self.correct_segments.save(handler) self.completely_resolved.save(handler) self.disjointig_alignments.save(handler) self.read_alignments.save(handler) def loadLine(self, handler, disjointigs, reads, contigs): # type: (TokenReader, DisjointigCollection, ReadCollection, ContigCollection) -> None self.id = handler.readToken() self.seq = handler.readToken() self.rc.id = basic.Reverse(self.id) n = handler.readInt() for i in range(n): handler.readToken() handler.readToken() handler.readToken() seg = Segment.load(handler, self) handler.readToken() self.initial.add(AlignmentPiece.Identical(seg.asContig().asSegment(), seg)) # self.add(AlignmentPiece.load(handler, collection_from, collection_to)) self.correct_segments.load(handler, self) self.completely_resolved.load(handler, self) self.disjointig_alignments.load(handler, disjointigs, self) self.read_alignments.load(handler, reads, self) for al in self.read_alignments: read = al.seg_from.contig #type: AlignedRead read.addAlignment(al) self.max_extension = False def __str__(self): if self.name_printer is not None: return self.name_printer(self) points = [self.left()] if len(self.initial) == 0: points.append("NA") else: points.append(self.initial[0].seg_to.left) points.append(self.initial[-1].seg_to.right) points.append(self.right()) points = map(str, points) return "Line:" + str(self.id) + ":" + "[" + ":".join(points) +"]" def __repr__(self): points = [self.left()] points.extend(self.initial) points.append(self.right()) points = map(str, points) return "Line:" + str(self.id) + ":" + "[" + ":".join(points) +"]" def setCircular(self): self.circular = True self.rc.circular = True def cleanReadAlignments(self): for read in self.read_alignments: read.seg_from.contig.removeContig(self) self.read_alignments.clean() self.max_extension = False def tie(self, other, gap, gap_seq): self.knot = Knot(self, other, gap, gap_seq) other.rc.knot = self.knot.rc if self == other: self.setCircular() def unTie(self): if self.knot is not None: self.knot.line_right.rc.knot = None if self.knot is not None: self.knot = None
class RCAlignmentStorage(LineListener): def __init__(self, line, rc = None): # type: (Contig, Optional[RCAlignmentStorage]) -> None self.line = line if rc is None: self.content = AlignmentStorage() rc = RCAlignmentStorage(line.rc, self) rc.content = self.content.rc LineListener.__init__(self, rc) self.rc = rc # type: AutoAlignmentStorage def __iter__(self): # type: () -> Generator[AlignmentPiece] return self.content.__iter__() def getAlignmentsTo(self, seg): # type: (Segment) -> Generator[AlignmentPiece] return self.content.getAlignmentsTo(seg) def allInter(self, seg): return self.content.allInter(seg) def add(self, alignment): self.content.add(alignment) self.content.add(alignment.reverse().rc) def addAndMergeRight(self, al): # type: (AlignmentPiece) -> None self.content.addAndMergeRight(al) self.content.addAndMergeLeft(al.reverse().rc) def addAll(self, als): for al in als: self.add(al) return self def fireBeforeExtendRight(self, line, new_seq, seq): # type: (Any, Contig, str) -> None self.content.fireBeforeExtendRight(line, new_seq, seq) self.reverse() self.content.fireBeforeExtendRight(line, new_seq, seq) def fireBeforeCutRight(self, line, new_seq, pos): # type: (Any, Contig, int) -> None self.content.fireBeforeCutRight(line, new_seq, pos) self.reverse() self.content.fireBeforeCutRight(line, new_seq, pos) # alignments from new sequence to new sequence def fireBeforeCorrect(self, alignments): # type: (Correction) -> None self.content.fireBeforeCorrect(alignments) self.reverse() self.content.fireBeforeCorrect(alignments) def fireAfterExtendRight(self, line, seq, relevant_als = None): # type: (Any, str, Optional[List[AlignmentPiece]]) -> None self.content.fireAfterExtendRight(line, seq) self.reverse() self.content.fireAfterExtendRight(line, seq) def fireAfterCutRight(self, line, pos): # type: (Any, int) -> None self.content.fireAfterCutRight(line, pos) self.reverse() self.content.fireAfterCutRight(line, pos) def fireAfterCorrect(self, line, alignments): # type: (Any, Correction) -> None self.content.fireAfterCorrect(line, alignments) self.reverse() self.content.fireAfterCorrect(line, alignments) # This is CRAAAZY!!! But correct. def reverse(self): self.rc.content = self.content.reverse() self.content = self.rc.content.rc def merge(self, other): # type: (RCAlignmentStorage) -> RCAlignmentStorage res = RCAlignmentStorage(self.line) res.content.addAll(self.content.merge(other.content)) return res def save(self, handler): # type: (TokenWriter) -> None self.content.save(handler) def load(self, handler): # type: (TokenReader) -> None self.content.load(handler, self.line.rc, self.line)
def updateCorrectSegments(self, seg, threshold = params.reliable_coverage): # type: (Segment, int) -> None segs = AlignmentStorage().addAll(self.read_alignments.allInter(seg)).filterByCoverage(mi=threshold) self.correct_segments.addAll(segs) self.correct_segments.mergeSegments()
class AutoAlignmentStorage(LineListener): def __init__(self, line, rc = None): # type: (Contig, Optional[AutoAlignmentStorage]) -> None self.line = line if rc is None: self.content = AlignmentStorage() rc = AutoAlignmentStorage(line.rc, self) rc.content = self.content.rc rc.state = -1 LineListener.__init__(self, rc) self.rc = rc # type: AutoAlignmentStorage self.state = 1 # from precedes to def makeCanonical(self, al): if (self.state == 1) == (al.seg_from.left < al.seg_to.left): return al else: return al.reverse() def isCanonical(self, al): return (self.state == 1) == (al.seg_from.left < al.seg_to.left) def add(self, al): # type: (AlignmentPiece) -> None if al.isIdentical(): return self.content.add(self.makeCanonical(al)) def addAll(self, als): for al in als: self.add(al) return self def addAndMergeRight(self, al): if al.isIdentical(): return if self.isCanonical(al): self.content.addAndMergeRight(al) else: self.content.addAndMergeRight(al.reverse()) def __iter__(self): # type: () -> Generator[AlignmentPiece] for al in self.content: yield al for al in self.content: yield al.reverse() yield AlignmentPiece.Identical(self.line.asSegment()) def getAlignmentsTo(self, seg): # type: (Segment) -> Generator[AlignmentPiece] for al in self: if al.seg_to.contains(seg): yield al def allInter(self, seg): for al in self: if al.seg_to.inter(seg): yield al def fireBeforeExtendRight(self, line, new_seq, seq): # type: (Any, Contig, str) -> None self.content.fireBeforeExtendRight(line, new_seq, seq) self.reverse() self.content.fireBeforeExtendRight(line, new_seq, seq) # alignments from new sequence to new sequence def fireBeforeCutRight(self, line, new_seq, pos): # type: (Any, Contig, int) -> None self.content.fireBeforeCutRight(line, new_seq, pos) self.reverse() self.content.fireBeforeCutRight(line, new_seq, pos) def fireBeforeCorrect(self, alignments): # type: (Correction) -> None self.content.fireBeforeCorrect(alignments) self.reverse() self.content.fireBeforeCorrect(alignments) def fireAfterExtendRight(self, line, seq, relevant_als = None): # type: (Any, str, Optional[List[AlignmentPiece]]) -> None self.content.fireAfterExtendRight(line, seq) self.reverse() self.content.fireAfterExtendRight(line, seq) def fireAfterCutRight(self, line, pos): # type: (Any, int) -> None self.content.fireAfterCutRight(line, pos) self.reverse() self.content.fireAfterCutRight(line, pos) def fireAfterCorrect(self, line, alignments): # type: (Any, Correction) -> None self.content.fireAfterCorrect(line, alignments) self.reverse() self.content.fireAfterCorrect(line, alignments) def reverse(self): self.state = -self.state self.rc.state = -self.rc.state self.content = self.content.reverse() self.rc.content = self.content.rc def merge(self, other): # type: (AutoAlignmentStorage) -> AutoAlignmentStorage if self.state != other.state: self.reverse() res = AutoAlignmentStorage(self.line) res.state = self.state res.content.addAll(self.content.merge(other.content)) return res def save(self, handler): # type: (TokenWriter) -> None self.content.save(handler) def load(self, handler): # type: (TokenReader) -> None self.content.load(handler, self.line, self.line) def setState(self, state): assert state in [-1, 1] if self.state != state: self.reverse()
def mergeLines(self, alignment, k): # type: (AlignmentPiece, int) -> NewLine sys.stdout.trace("Line operation Merge", alignment.seg_from.contig, alignment.seg_to.contig, alignment) line1 = alignment.seg_from.contig #type: NewLine line2 = alignment.seg_to.contig #type: NewLine assert line1 != line2 if len(alignment) < k + 100: sys.stdout.trace( "Prolonging line to ensure alignment of at least k") seg = line2.segment( alignment.seg_to.right, alignment.seg_to.right + k + 100 - len(alignment)) line1.extendRight(seg.Seq()) alignment = alignment.mergeDistant( AlignmentPiece.Identical( line1.asSegment().suffix(length=len(seg)), seg)) # Cutting hanging tips of both lines al_storage = AlignmentStorage() al_storage.add(alignment) storage = TwoLineAlignmentStorage(line1, line2) line2.addListener(storage) line1.addListener(storage.reverse) storage.add(alignment) if alignment.seg_from.right < len(line1): line1.cutRight(alignment.seg_from.right) sys.stdout.trace("Cut right") sys.stdout.trace(list(storage.content)[0]) sys.stdout.trace("\n".join( list(storage.content)[0].asMatchingStrings())) sys.stdout.trace(list(storage.content)[0].cigar) if alignment.seg_to.left > 0: line2.rc.cutRight(len(line2) - alignment.seg_to.left) sys.stdout.trace("Cut left") sys.stdout.trace(list(storage.content)[0]) sys.stdout.trace("\n".join( list(storage.content)[0].asMatchingStrings())) sys.stdout.trace(list(storage.content)[0].cigar) alignment = list(storage.content)[0] # type: AlignmentPiece line2.removeListener(storage) line1.removeListener(storage.reverse) # Making sure line sequences match on the overlap if alignment.seg_from.left > 0: new_seq = Contig( line1.asSegment().prefix(pos=alignment.seg_from.left).Seq() + line2.seq, "new_seq") else: new_seq = Contig(line2.seq, "new_seq") al2 = AlignmentPiece.Identical( line2.asSegment(), new_seq.asSegment().suffix(length=len(line2))) sys.stdout.trace("Al2:", al2) alignment = alignment.compose(al2).reverse() sys.stdout.trace("Composed alignment", alignment) sys.stdout.trace("\n".join(alignment.asMatchingStrings())) sys.stdout.trace(alignment.cigar) assert alignment.seg_to.right == len(line1) assert alignment.seg_from.left == al2.seg_to.left line1.correctSequence([alignment]) # Now lines have exact match name = "(" + ",".join( basic.parseLineName(line1.id) + basic.parseLineName(line2.id)) + ")" line = self.addNew(new_seq.seq, name) assert line.seq.startswith(line1.seq) assert line.seq.endswith(line2.seq) al1 = AlignmentPiece.Identical( line1.asSegment(), line.asSegment().prefix(length=len(line1))) al2 = AlignmentPiece.Identical( line2.asSegment(), line.asSegment().suffix(length=len(line2))) line.initial.addAll( line1.initial.targetAsSegment(al1.seg_to).merge( line2.initial.targetAsSegment(al2.seg_to))) line.correct_segments.addAll( line1.correct_segments.contigAsSegment(al1.seg_to).merge( line2.correct_segments.contigAsSegment(al2.seg_to))) line.completely_resolved.addAll( line1.completely_resolved.contigAsSegment(al1.seg_to).merge( line2.completely_resolved.contigAsSegment(al2.seg_to), k)) line.disjointig_alignments.addAll( line1.disjointig_alignments.targetAsSegment(al1.seg_to).merge( line2.disjointig_alignments.targetAsSegment(al2.seg_to))) for al in line1.read_alignments.targetAsSegment(al1.seg_to).merge( line2.read_alignments.targetAsSegment(al2.seg_to)): line.addReadAlignment(al) line1.cleanReadAlignments() line2.cleanReadAlignments() self.notifyMergedLines(al1, al2) knot_right = line2.knot knot_left = line1.rc.knot self.remove(line1) self.remove(line2) if knot_right is not None: if knot_right.line_right == line1: line.tie(line, knot_right.gap, knot_right.gap_seq) else: line.tie(knot_right.line_right, knot_right.gap, knot_right.gap_seq) if knot_left is not None and knot_left.line_right != line2.rc: line.rc.tie(knot_left.line_right, knot_left.gap, knot_left.gap_seq) return line
def testManual(self): contig1 = Contig("ACGTACGTACGT", "from") contig2 = Contig("ACGTACGTACGT", "to") al1 = AlignmentPiece.Identical(contig1.segment(0, 4), contig2.segment(0, 4)) al2 = AlignmentPiece.Identical(contig1.segment(0, 4), contig2.segment(4, 8)) al3 = AlignmentPiece.Identical(contig1.segment(4, 8), contig2.segment(8, 12)) storage = AlignmentStorage() storage.addAll([al1, al2, al3]) assert str( list(storage) ) == "[(from[0:4]->to[0:4]:1.000), (from[0:4]->to[4:12-4]:1.000), (from[4:12-4]->to[8:12-0]:1.000)]" assert str( list(storage.rc) ) == "[(-from[4:12-4]->-to[0:4]:1.000), (-from[8:12-0]->-to[4:12-4]:1.000), (-from[8:12-0]->-to[8:12-0]:1.000)]" assert str(list(storage.calculateCoverage())) == "[(to[0:12-0], 1)]" assert str(list(storage.filterByCoverage(0, 1))) == "[]" assert str(list(storage.filterByCoverage(1, 2))) == "[to[0:12-0]]" assert str(list(storage.filterByCoverage(2))) == "[]" storage.addAndMergeRight(al3) assert str( list(storage) ) == "[(from[0:4]->to[0:4]:1.000), (from[0:4]->to[4:12-4]:1.000), (from[4:12-4]->to[8:12-0]:1.000)]" al4 = AlignmentPiece.Identical(contig1.segment(2, 8), contig2.segment(2, 8)) al5 = AlignmentPiece.Identical(contig1.segment(4, 10), contig2.segment(4, 10)) storage.addAll([al4, al5]) assert str( list(storage.calculateCoverage()) ) == "[(to[0:2], 1), (to[2:4], 2), (to[4:12-4], 3), (to[8:12-2], 2), (to[10:12-0], 1)]" assert str(list(storage.filterByCoverage( 2, 3))) == "[to[2:4], to[8:12-2]]" assert str(list(storage.filterByCoverage(2))) == "[to[2:12-2]]" assert str( list(storage.getAlignmentsTo(contig2.segment(2, 3))) ) == "[(from[0:4]->to[0:4]:1.000), (from[2:12-4]->to[2:12-4]:1.000)]" assert str(list(storage.getAlignmentsTo(contig2.segment( 2, 6)))) == "[(from[2:12-4]->to[2:12-4]:1.000)]"