예제 #1
0
    def test_draw_template(self):
        # def draw_template(self, canvas, template, target_width, height, labels=None, colors=None):
        d = DiagramSettings()
        canvas = Drawing(size=(1000, 50))
        t = genomic.Template(
            '1',
            1,
            100000,
            bands=[
                BioInterval(None, 1, 8000, 'p1'),
                BioInterval(None, 10000, 15000, 'p2')
            ],
        )
        g = draw_template(d, canvas, t, 1000)
        canvas.add(g)
        canvas.attribs['height'] = g.height
        canvas = Drawing(size=(1000, 50))

        g = draw_template(d, canvas, TEMPLATE_METADATA['1'], 1000)
        self.assertEqual(
            d.breakpoint_top_margin + d.breakpoint_bottom_margin +
            d.template_track_height, g.height)
        canvas.add(g)
        canvas.attribs['height'] = g.height
        self.assertEqual(2, len(canvas.elements))
예제 #2
0
    def test_draw_overlay(self):
        gene = genomic.Gene('12',
                            25357723,
                            25403870,
                            strand=STRAND.NEG,
                            name='KRAS')
        marker = BioInterval('12', 25403865, name='splice site mutation')
        t = build_transcript(cds_start=193,
                             cds_end=759,
                             exons=[(25403685, 25403865), (25398208, 25398329),
                                    (25380168, 25380346), (25378548, 25378707),
                                    (25357723, 25362845)],
                             gene=gene,
                             domains=[])
        build_transcript(cds_start=198,
                         cds_end=425,
                         exons=[(25403685, 25403870), (25398208, 25398329),
                                (25362102, 25362845)],
                         gene=gene,
                         domains=[])
        build_transcript(cds_start=65,
                         cds_end=634,
                         exons=[(25403685, 25403737), (25398208, 25398329),
                                (25380168, 25380346), (25378548, 25378707),
                                (25368371, 25368494), (25362365, 25362845)],
                         gene=gene,
                         domains=[
                             protein.Domain('domain1', [(1, 10)]),
                             protein.Domain('domain1', [(4, 10)])
                         ],
                         is_best_transcript=True)
        build_transcript(cds_start=65,
                         cds_end=634,
                         exons=[(25403698, 25403863), (25398208, 25398329),
                                (25386753, 25388160)],
                         gene=gene,
                         domains=[])
        d = DiagramSettings()
        for i, t in enumerate(gene.transcripts):
            t.name = 'transcript {}'.format(i + 1)
        scatterx = [x + 100 for x in range(gene.start, gene.end + 1, 400)]
        scattery = [random.uniform(-0.2, 0.2) for x in scatterx]
        s = ScatterPlot(list(zip(scatterx, scattery)),
                        'cna',
                        ymin=-1,
                        ymax=1,
                        yticks=[-1, 0, 1])

        d.gene_min_buffer = 0
        canvas = draw_multi_transcript_overlay(d,
                                               gene,
                                               vmarkers=[marker],
                                               plots=[s, s])
        self.assertEqual(2, len(canvas.elements))  # defs counts as element
        if OUTPUT_SVG:
            canvas.saveas('test_draw_overlay.svg')
예제 #3
0
def main():
    args = parse_arguments()
    repeat_sequences = sorted(list(set([s.lower() for s in args.repeat_seq])))
    log('loading:', args.input)
    reference_genome = load_reference_genome(args.input)
    comments = [
        os.path.basename(__file__),
        'input: {}'.format(args.input),
        'min_length: {}'.format(args.min_length),
        'repeat_seq: {}'.format(', '.join(args.repeat_seq)),
    ]
    log('writing:', args.output)
    with open(args.output, 'w') as fh:
        for comment in comments:
            fh.write('## {}\n'.format(comment))
        fh.write('chr\tstart\tend\tname\n')
        visited = set()
        for chrom, seq in sorted(reference_genome.items()):
            if chrom.startswith('chr'):
                chrom = chrom[3:]
            seq = str(seq.seq).lower()
            if seq in visited:
                continue
            else:
                visited.add(seq)
            spans = []
            for repseq in repeat_sequences:
                log(
                    'finding {}_repeat (min_length: {}), for chr{} (length: {})'.format(
                        repseq, args.min_length, chrom, len(seq)
                    )
                )
                index = 0
                while index < len(seq):
                    next_n = seq.find(repseq, index)
                    if next_n < 0:
                        break
                    index = next_n
                    while (
                        index + len(repseq) <= len(seq)
                        and seq[index : index + len(repseq)] == repseq
                    ):
                        index += len(repseq)
                    span = BioInterval(chrom, next_n + 1, index, name='repeat_{}'.format(repseq))
                    if len(span) >= args.min_length and len(span) >= 2 * len(repseq):
                        spans.append(span)
            log('found', len(spans), 'spans', time_stamp=False)
            for span in spans:
                fh.write(
                    '{}\t{}\t{}\t{}\n'.format(
                        span.reference_object, span.start, span.end, span.name
                    )
                )