Esempio n. 1
0
 def polishSegment(self, seg, als):
     # type: (Segment, List[AlignmentPiece]) -> AlignmentPiece
     sys.stdout.trace("Polishing segment", seg)
     w = max(900, params.k)
     r = 50
     first = seg.left / w
     last = min(seg.right + w - 1, len(seg.contig)) / w
     segs = []
     for i in range(first, last + 1):
         segs.append(
             Segment(seg.contig, max(0, i * w - r),
                     min(len(seg.contig), (i + 1) * w + r)))
     als_by_segment = [[] for i in range(last - first + 1)]
     for al in als:
         l = al.seg_to.left / w
         r = al.seg_to.right / w + 1
         for i in range(max(0, l - first - 1),
                        min(last - first + 1, r - first + 2)):
             if al.seg_to.inter(segs[i]):
                 als_by_segment[i].append(al)
     res_als = []
     for seg1, seg_als in zip(segs, als_by_segment):
         if seg1.inter(seg):
             res_als.append(self.polishSmallSegment(seg1, seg_als))
     res = AlignmentPiece.GlueOverlappingAlignments(res_als)
     return res.reduce(target=seg)
Esempio n. 2
0
 def attemptJump(self, rec):
     # type: (Record) -> bool
     bound = self.findAndFilterResolvedBound(rec, params.l)
     bad_segments = SegmentStorage()
     for al in rec:
         if al.seg_to.left > bound:
             break
         if al.seg_from.left > min(params.bad_end_length, params.k / 2) and \
                 al.rc.seg_from.left > min(params.bad_end_length, params.k / 2):
             bad_segments.add(al.seg_to)
     for al in self.dot_plot.allInter(
             rec.line.segment(rec.resolved.right - params.k, bound)):
         if al.seg_from.left > min(params.bad_end_length, params.k / 2):
             if al.rc.seg_from.left > min(params.bad_end_length,
                                          params.k / 2):
                 bad_segments.add(al.seg_to)
     bad_segments.mergeSegments(params.k - 200)
     sys.stdout.trace("Bad segments:", bad_segments)
     good_segments = bad_segments.reverse(rec.line, params.k - 100).reduce(
         rec.line.segment(rec.resolved.right - params.k, bound))
     for seg in good_segments:
         seg = Segment(seg.contig, max(0, seg.left), seg.right)
         for seg1 in self.segmentsWithGoodCopies(rec.resolved, seg,
                                                 params.k):
             if len(seg1) >= params.k and seg1.right > rec.resolved.right:
                 rec.setResolved(seg1)
                 return True
     return False
Esempio n. 3
0
 def constructCorrection(alignments):
     # type: (List[AlignmentPiece]) -> Correction
     initial = alignments[0].seg_to.contig
     alignments = sorted(alignments, key=lambda al: al.seg_to.left)
     sb = []
     pos = initial.left()
     new_pos = 0
     for al in alignments:
         sb.append(initial.subSequence(pos, al.seg_to.left).seq)
         new_pos += al.seg_to.left - pos
         pos = al.seg_to.left
         sb.append(al.seg_from.Seq())
         new_pos += al.seg_from.__len__()
         pos = al.seg_to.right
     sb.append(
         initial.segment(alignments[-1].seg_to.right,
                         initial.right()).Seq())
     new_pos += initial.right() - alignments[-1].seg_to.right
     new_seq = Contig("".join(sb), "TMP1_" + initial.id)
     new_als = []
     pos = initial.left()
     new_pos = 0
     for al in alignments:
         new_pos += al.seg_to.left - pos
         new_seg_from = Segment(new_seq, new_pos,
                                new_pos + al.seg_from.__len__())
         new_als.append(al.changeQuerySegment(new_seg_from))
         pos = al.seg_to.right
         new_pos += al.seg_from.__len__()
     return Correction(new_seq, initial, new_als)
Esempio n. 4
0
 def fireBeforeExtendRight(self, line, new_seq, seq):
     # type: (accurate_line.NewLine, Contig, str) -> None
     self.makeCanonical()
     self.items = [
         al.targetAsSegment(Segment(new_seq, 0, len(line)))
         for al in self.items
     ]
Esempio n. 5
0
 def fireBeforeCutRight(self, line, new_seq, pos):
     # type: (accurate_line.NewLine, Contig, int) -> None
     self.makeCanonical()
     new_items = []
     for al in self.items: # type: AlignmentPiece
         if al.seg_to.right <= pos:
             new_items.append(al.changeTargetContig(new_seq))
         elif al.seg_to.left <= pos - params.k:
             new_items.append(al.reduce(target=Segment(line, line.left(), pos)).changeTargetContig(new_seq))
     self.items = new_items # type: List[Segment]
Esempio n. 6
0
 def mergeSegments(self, inter_size = 0):
     # type: (int) -> None
     if self.isCanonical():
         self.sort()
         if len(self.items) == 0:
             return
         res = [self.items[0]]  # type: List[Segment]
         for seg in self.items[1:]:  # type: Segment
             if seg.left <= res[-1].right - inter_size or seg.right <= res[-1].right:
                 res[-1] = Segment(res[-1].contig, res[-1].left, max(res[-1].right, seg.right))
             else:
                 res.append(seg)
         res = [seg for seg in res if len(seg) >= inter_size]
         self.items = res
     else:
         self.rc.mergeSegments(inter_size)
def main(ref_file, segment, dir):
    ref = ContigCollection().loadFromFasta(open(ref_file, "r"), False)
    chr1 = ref["chr1"]
    if segment[0] < 0:
        segment = (-segment[0], -segment[1])
        chr1 = chr1.rc
    reads = ReadCollection()
    reads_list = []
    for i in range(segment[0], segment[1], 500):
        read = reads.addNewRead(Segment(chr1, i, i + 500).asNamedSequence())
        reads_list.append(read)
    chr1.seq = chr1.seq[:segment[0]] + "N" * (segment[1] - segment[0]) + chr1.seq[segment[1]:]
    chr1.rc.seq = basic.RC(chr1.seq)
    basic.ensure_dir_existance(dir)
    aligner = Aligner(DirDistributor(dir))
    aligner.alignReadCollection(reads, ref)
    out = sys.stdout
    for read in reads_list:
        # print read
        out.write(str(len(read.alignments)) + " " + str(max([0] + map(lambda al: al.percentIdentity(), read.alignments))) + "\n")
    out.close()
Esempio n. 8
0
 def loadLine(self, handler, disjointigs, reads, contigs):
     # type: (TokenReader, DisjointigCollection, ReadCollection, ContigCollection) -> None
     self.id = handler.readToken()
     self.seq = handler.readToken()
     self.rc.id = basic.Reverse(self.id)
     n = handler.readInt()
     for i in range(n):
         handler.readToken()
         handler.readToken()
         handler.readToken()
         seg = Segment.load(handler, self)
         handler.readToken()
         self.initial.add(AlignmentPiece.Identical(seg.asContig().asSegment(), seg))
         # self.add(AlignmentPiece.load(handler, collection_from, collection_to))
     self.correct_segments.load(handler, self)
     self.completely_resolved.load(handler, self)
     self.disjointig_alignments.load(handler, disjointigs, self)
     self.read_alignments.load(handler, reads, self)
     for al in self.read_alignments:
         read = al.seg_from.contig #type: AlignedRead
         read.addAlignment(al)
     self.max_extension = False
Esempio n. 9
0
 def load(self, handler, contig):
     # type: (TokenReader, NamedSequence) -> None
     n = handler.readInt()
     for i in range(n):
         self.add(Segment.load(handler, contig))
Esempio n. 10
0
def main(argv):
    sys.stdout.write("Started\n")
    dot_file = argv[1]
    edge_sequences = argv[2]
    reference_file = argv[3]
    alignment_file = argv[4]
    edges = ParseVertices(argv[5])
    output_file = argv[6]
    sys.stdout.write("Loading dot\n")
    dot = DotParser(open(dot_file, "r")).parse()
    edge_collection = ContigCollection().loadFromFasta(
        open(edge_sequences, "r"), True)
    graph = Graph().loadFromDot(edge_collection, dot)
    vertices = [graph.E[id].start.id for id in edges]
    graph.printToFile(sys.stdout)
    print vertices
    ref = ContigCollection().loadFromFasta(open(reference_file, "r"), False)

    print "Looking for relevant"
    pq = PriorityQueue()
    for v in graph.V.values():
        if v.id in vertices:
            pq.push((0, v))
    visited = []
    while not pq.empty():
        d, v = pq.pop()
        if v in visited:
            continue
        visited.append(v)
        for e in v.inc:
            print e.id, e.start.id, e.end.id
            if d + len(e) < dist:
                pq.push((d + len(e), e.start))
        for e in v.out:
            print e.id, e.start.id, e.end.id
            if d + len(e) < dist:
                pq.push((d + len(e), e.end))
    print "Visited", len(visited)
    print map(str, list(visited))
    relevant = []
    edge_alignments = ReadCollection().loadFromFasta(open(edge_sequences,
                                                          "r")).addAllRC()
    for edge in graph.E.values():
        if edge.start in visited or edge.start.rc in visited:
            relevant.append(edge_alignments[edge.id])
    print "Loading sam"
    edge_alignments.fillFromSam(Samfile(open(alignment_file, "r")), ref)
    for rel in relevant:
        print rel.__str__()
    print "Collecting segments"
    segments = []
    chr1 = ref["chr1"]
    for edge in relevant:
        for al in edge.alignments:
            print al
            if al.seg_from.inter(edge.prefix(dist)):
                l = dist - al.seg_from.left
                contig = al.seg_to.contig
                start = al.seg_to.left
                segments.append(
                    Segment(contig, start, min(start + l, len(contig))))
                print segments[-1]
    tmp = []
    print "Rotating"
    for seg in segments:
        if seg.contig != chr1:
            seg = seg.RC()
        if seg.contig != chr1:
            print "WARNING", seg
        tmp.append(seg)
    segments = sorted(tmp, key=lambda seg: seg.left)
    print "All relevant segments"
    print "\n".join(map(str, segments))
    cur_seg = None
    interesting_segments = []
    print "Gluing"
    for seg in segments:
        if cur_seg is None:
            cur_seg = seg.copy()
            continue
        if cur_seg.right + 20000 < seg.left:
            interesting_segments.append(cur_seg.copy())
            cur_seg = seg.copy()
        else:
            cur_seg.right = max(cur_seg.right, seg.right)
    if cur_seg is not None:
        interesting_segments.append(cur_seg.copy())

    alignments = []
    for edge in edge_alignments:
        for al in edge.alignments:
            ok = False
            for seg in interesting_segments:
                if al.seg_to.inter(seg):
                    alignments.append(al)
    alignments = sorted(alignments, key=lambda al: al.seg_to.left)
    print "All relevant alignments"
    print "\n".join(map(str, alignments))

    print "Interesting segments:", len(interesting_segments), sum(
        map(len, interesting_segments))
    for seg in interesting_segments:
        print seg
    f = open(output_file, "w")
    tmp = []
    for seg in interesting_segments:
        SeqIO.write(SeqIO.SeqRecord(seg.Seq(), seg.__str__()), f, "fasta")
        tmp.append(seg.Seq())
    f.close()
    f1 = open(output_file + "1", "w")
    SeqIO.write(SeqIO.SeqRecord(("N" * 20000).join(tmp), "concat"), f1,
                "fasta")
Esempio n. 11
0
 def mapSegmentsDown(self, segments):
     # type: (Iterable[Segment]) -> List[Segment]
     segments = list(segments)
     left = self.mapPositionsDown([seg.left for seg in segments])
     right = self.mapPositionsDown([seg.right - 1 for seg in segments])
     return [Segment(self.seq_to, l, r + 1) for l, r in zip(left, right)]
Esempio n. 12
0
 def mapSegmentsUp(self, segments):
     # type: (List[Segment]) -> List[Segment]
     left = self.mapPositionsUp([seg.left for seg in segments])
     right = self.mapPositionsUp([seg.right - 1 for seg in segments])
     return [Segment(self.seq_from, l, r + 1) for l, r in zip(left, right)]