def PrintResults(recs, reference, references_file, coordinates_file): aln = open(coordinates_file, "w") fasta = open(references_file, "w") for rec in recs: aln.write(str(rec) + "\n") sequence = reference[rec.rname][rec.left:rec.right] rec_id = str(rec.rname) + "_(" + str(rec.left) + "-" + str(rec.right)+")" SeqIO.write(SeqIO.SeqRecord(sequence, rec_id), fasta, "fasta") aln.close() fasta.close()
def main(argv): sys.stdout.write("Started\n") dot_file = argv[1] edge_sequences = argv[2] reference_file = argv[3] alignment_file = argv[4] edges = ParseVertices(argv[5]) output_file = argv[6] sys.stdout.write("Loading dot\n") dot = DotParser(open(dot_file, "r")).parse() edge_collection = ContigCollection().loadFromFasta( open(edge_sequences, "r"), True) graph = Graph().loadFromDot(edge_collection, dot) vertices = [graph.E[id].start.id for id in edges] graph.printToFile(sys.stdout) print vertices ref = ContigCollection().loadFromFasta(open(reference_file, "r"), False) print "Looking for relevant" pq = PriorityQueue() for v in graph.V.values(): if v.id in vertices: pq.push((0, v)) visited = [] while not pq.empty(): d, v = pq.pop() if v in visited: continue visited.append(v) for e in v.inc: print e.id, e.start.id, e.end.id if d + len(e) < dist: pq.push((d + len(e), e.start)) for e in v.out: print e.id, e.start.id, e.end.id if d + len(e) < dist: pq.push((d + len(e), e.end)) print "Visited", len(visited) print map(str, list(visited)) relevant = [] edge_alignments = ReadCollection().loadFromFasta(open(edge_sequences, "r")).addAllRC() for edge in graph.E.values(): if edge.start in visited or edge.start.rc in visited: relevant.append(edge_alignments[edge.id]) print "Loading sam" edge_alignments.fillFromSam(Samfile(open(alignment_file, "r")), ref) for rel in relevant: print rel.__str__() print "Collecting segments" segments = [] chr1 = ref["chr1"] for edge in relevant: for al in edge.alignments: print al if al.seg_from.inter(edge.prefix(dist)): l = dist - al.seg_from.left contig = al.seg_to.contig start = al.seg_to.left segments.append( Segment(contig, start, min(start + l, len(contig)))) print segments[-1] tmp = [] print "Rotating" for seg in segments: if seg.contig != chr1: seg = seg.RC() if seg.contig != chr1: print "WARNING", seg tmp.append(seg) segments = sorted(tmp, key=lambda seg: seg.left) print "All relevant segments" print "\n".join(map(str, segments)) cur_seg = None interesting_segments = [] print "Gluing" for seg in segments: if cur_seg is None: cur_seg = seg.copy() continue if cur_seg.right + 20000 < seg.left: interesting_segments.append(cur_seg.copy()) cur_seg = seg.copy() else: cur_seg.right = max(cur_seg.right, seg.right) if cur_seg is not None: interesting_segments.append(cur_seg.copy()) alignments = [] for edge in edge_alignments: for al in edge.alignments: ok = False for seg in interesting_segments: if al.seg_to.inter(seg): alignments.append(al) alignments = sorted(alignments, key=lambda al: al.seg_to.left) print "All relevant alignments" print "\n".join(map(str, alignments)) print "Interesting segments:", len(interesting_segments), sum( map(len, interesting_segments)) for seg in interesting_segments: print seg f = open(output_file, "w") tmp = [] for seg in interesting_segments: SeqIO.write(SeqIO.SeqRecord(seg.Seq(), seg.__str__()), f, "fasta") tmp.append(seg.Seq()) f.close() f1 = open(output_file + "1", "w") SeqIO.write(SeqIO.SeqRecord(("N" * 20000).join(tmp), "concat"), f1, "fasta")