def test_draw_template(self): # def draw_template(self, canvas, template, target_width, height, labels=None, colors=None): d = DiagramSettings() canvas = Drawing(size=(1000, 50)) t = genomic.Template( '1', 1, 100000, bands=[ BioInterval(None, 1, 8000, 'p1'), BioInterval(None, 10000, 15000, 'p2') ], ) g = draw_template(d, canvas, t, 1000) canvas.add(g) canvas.attribs['height'] = g.height canvas = Drawing(size=(1000, 50)) g = draw_template(d, canvas, TEMPLATE_METADATA['1'], 1000) self.assertEqual( d.breakpoint_top_margin + d.breakpoint_bottom_margin + d.template_track_height, g.height) canvas.add(g) canvas.attribs['height'] = g.height self.assertEqual(2, len(canvas.elements))
def test_draw_overlay(self): gene = genomic.Gene('12', 25357723, 25403870, strand=STRAND.NEG, name='KRAS') marker = BioInterval('12', 25403865, name='splice site mutation') t = build_transcript(cds_start=193, cds_end=759, exons=[(25403685, 25403865), (25398208, 25398329), (25380168, 25380346), (25378548, 25378707), (25357723, 25362845)], gene=gene, domains=[]) build_transcript(cds_start=198, cds_end=425, exons=[(25403685, 25403870), (25398208, 25398329), (25362102, 25362845)], gene=gene, domains=[]) build_transcript(cds_start=65, cds_end=634, exons=[(25403685, 25403737), (25398208, 25398329), (25380168, 25380346), (25378548, 25378707), (25368371, 25368494), (25362365, 25362845)], gene=gene, domains=[ protein.Domain('domain1', [(1, 10)]), protein.Domain('domain1', [(4, 10)]) ], is_best_transcript=True) build_transcript(cds_start=65, cds_end=634, exons=[(25403698, 25403863), (25398208, 25398329), (25386753, 25388160)], gene=gene, domains=[]) d = DiagramSettings() for i, t in enumerate(gene.transcripts): t.name = 'transcript {}'.format(i + 1) scatterx = [x + 100 for x in range(gene.start, gene.end + 1, 400)] scattery = [random.uniform(-0.2, 0.2) for x in scatterx] s = ScatterPlot(list(zip(scatterx, scattery)), 'cna', ymin=-1, ymax=1, yticks=[-1, 0, 1]) d.gene_min_buffer = 0 canvas = draw_multi_transcript_overlay(d, gene, vmarkers=[marker], plots=[s, s]) self.assertEqual(2, len(canvas.elements)) # defs counts as element if OUTPUT_SVG: canvas.saveas('test_draw_overlay.svg')
def main(): args = parse_arguments() repeat_sequences = sorted(list(set([s.lower() for s in args.repeat_seq]))) log('loading:', args.input) reference_genome = load_reference_genome(args.input) comments = [ os.path.basename(__file__), 'input: {}'.format(args.input), 'min_length: {}'.format(args.min_length), 'repeat_seq: {}'.format(', '.join(args.repeat_seq)), ] log('writing:', args.output) with open(args.output, 'w') as fh: for comment in comments: fh.write('## {}\n'.format(comment)) fh.write('chr\tstart\tend\tname\n') visited = set() for chrom, seq in sorted(reference_genome.items()): if chrom.startswith('chr'): chrom = chrom[3:] seq = str(seq.seq).lower() if seq in visited: continue else: visited.add(seq) spans = [] for repseq in repeat_sequences: log( 'finding {}_repeat (min_length: {}), for chr{} (length: {})'.format( repseq, args.min_length, chrom, len(seq) ) ) index = 0 while index < len(seq): next_n = seq.find(repseq, index) if next_n < 0: break index = next_n while ( index + len(repseq) <= len(seq) and seq[index : index + len(repseq)] == repseq ): index += len(repseq) span = BioInterval(chrom, next_n + 1, index, name='repeat_{}'.format(repseq)) if len(span) >= args.min_length and len(span) >= 2 * len(repseq): spans.append(span) log('found', len(spans), 'spans', time_stamp=False) for span in spans: fh.write( '{}\t{}\t{}\t{}\n'.format( span.reference_object, span.start, span.end, span.name ) )