def test_cds_regions(self):
        transcript = Transcript()
        record = {
            'strand': '+',
            'exons': '10000-20000,30000-40000,50000-60000',
            'coding_start': '12000',
            'coding_end': '53000'
        }
        transcript.read_from_database_record(record)

        cds_regions = transcript.cds_regions()
        assert cds_regions[0] == (12000, 20000)
        assert cds_regions[1] == (30000, 40000)
        assert cds_regions[2] == (50000, 53001)

        transcript = Transcript()
        record = {
            'strand': '-',
            'exons': '50-60, 30-40, 10-20',
            'coding_start': '55',
            'coding_end': '15'
        }
        transcript.read_from_database_record(record)

        cds_regions = transcript.cds_regions()
        assert cds_regions[0] == (50, 56)
        assert cds_regions[1] == (30, 40)
        assert cds_regions[2] == (15, 20)
    def test_utr5_regions(self):

        transcript = Transcript()
        record = {
            'strand': '+',
            'exons': '10000-20000,25000-26000,30000-40000,50000-60000',
            'coding_start': '25500',
            'coding_end': '53000'
        }
        transcript.read_from_database_record(record)
        utr5_regions = transcript.utr5_regions()
        assert len(utr5_regions) == 2
        assert utr5_regions[0] == (10000, 20000)
        assert utr5_regions[1] == (25000, 25500)

        transcript = Transcript()
        record = {
            'strand': '+',
            'exons': '10000-20000,30000-40000,50000-60000',
            'coding_start': '10000',
            'coding_end': '35000'
        }
        transcript.read_from_database_record(record)
        assert transcript.utr5_regions() == []

        transcript = Transcript()
        record = {
            'strand': '-',
            'exons': '70-80, 50-60, 30-40, 10-20',
            'coding_start': '55',
            'coding_end': '15'
        }
        transcript.read_from_database_record(record)
        utr5_regions = transcript.utr5_regions()
        assert len(utr5_regions) == 2
        assert utr5_regions[0] == (70, 80)
        assert utr5_regions[1] == (56, 60)

        transcript = Transcript()
        record = {
            'strand': '-',
            'exons': '50-60, 30-40, 10-20',
            'coding_start': '59',
            'coding_end': '15'
        }
        transcript.read_from_database_record(record)
        assert transcript.utr5_regions() == []
    def test_set_info(self):
        transcript = Transcript(strand='-', start=10000, end=19600)
        transcript.exons = [Exon('1-2'), Exon('3-4'), Exon('5-6')]
        transcript.cdna_length = 8100
        transcript.prot_length = 1200

        transcript.set_info()
        assert transcript.info == '-/9.6kb/3/8.1kb/1200'
    def test_finalize(self):

        transcript = Transcript(strand='+', coding_start=16, coding_end=53)
        transcript.exons = [Exon('10-20'), Exon('30-40'), Exon('50-60')]
        transcript.finalize()
        assert transcript.cdna_length == 30
        assert transcript.prot_length == 5
        assert transcript.start == 10
        assert transcript.end == 60

        transcript = Transcript(strand='-', coding_start=55, coding_end=15)
        transcript.exons = [Exon('50-60'), Exon('30-40'), Exon('10-20')]
        transcript.finalize()
        assert transcript.cdna_length == 30
        assert transcript.prot_length == 6
        assert transcript.start == 10
        assert transcript.end == 60
    def test_finalize(self):
        t1 = Transcript(id='1',
                        chrom='8',
                        strand='+',
                        start=130,
                        end=580,
                        exons=[])
        t2 = Transcript(id='2',
                        chrom='8',
                        strand='+',
                        start=800,
                        end=900,
                        exons=[])
        t3 = Transcript(id='3',
                        chrom='8',
                        strand='+',
                        start=800,
                        end=880,
                        exons=[])
        t4 = Transcript(id='4',
                        chrom='8',
                        strand='+',
                        start=30,
                        end=40,
                        exons=[])

        self.tdb_writer.add(t1)
        self.tdb_writer.add(t2)
        self.tdb_writer.add(t3)
        self.tdb_writer.add(t4)

        self.tdb_writer.finalize()

        assert os.path.isfile(self.fn + '.gz')
        assert os.path.isfile(self.fn + '.gz.tbi')

        order = []
        for line in gzip.open(self.fn + '.gz'):
            line = line.strip()
            if line.startswith('#'):
                continue
            cols = line.split('\t')
            order.append(int(cols[-1]))

        assert order == [40, 580, 880, 900]
    def setUp(self):
        self.fn = str(uuid.uuid4())
        tdb_writer = TranscriptDBWriter(
            self.fn,
            source='xyz',
            build='GRCh37',
            columns=['id', 'chrom', 'strand', 'start', 'end'])

        tdb_writer.add(
            Transcript(id='t1', chrom='8', strand='+', start=130, end=580))
        tdb_writer.add(
            Transcript(id='t2', chrom='8', strand='+', start=800, end=900))
        tdb_writer.add(
            Transcript(id='t3', chrom='8', strand='+', start=800, end=880))
        tdb_writer.add(
            Transcript(id='t4', chrom='8', strand='+', start=30, end=40))
        tdb_writer.finalize()

        self.tdb = TranscriptDB(self.fn + '.gz')
    def test_get_protein_length(self):
        transcript = Transcript()
        record = {
            'strand': '+',
            'exons': '10-20,30-40,50-60',
            'coding_start': '16',
            'coding_end': '53'
        }
        transcript.read_from_database_record(record)
        assert transcript.get_protein_length() == 5

        transcript = Transcript()
        record = {
            'strand': '-',
            'exons': '50-60,30-40,10-20',
            'coding_start': '55',
            'coding_end': '15'
        }
        transcript.read_from_database_record(record)
        assert transcript.get_protein_length() == 6
    def test_add(self):
        transcript = Transcript(id='xyz',
                                chrom='11',
                                strand='+',
                                start=130,
                                end=580)
        transcript.exons = [Exon('100-200'), Exon('300-400'), Exon('500-600')]
        self.tdb_writer.add(transcript)

        assert self.tdb_writer._records['11'][0] == [
            'xyz', '11', '100-200,300-400,500-600', 130, 580
        ]
    def test_same_cds(self):
        t1 = Transcript()
        record = {
            'strand': '+',
            'exons': '10000-20000,30000-40000,50000-60000',
            'coding_start': '12000',
            'coding_end': '53000'
        }
        t1.read_from_database_record(record)

        t2 = Transcript()
        record = {
            'strand': '+',
            'exons': '9000-20000,30000-40000,50000-68000',
            'coding_start': '12000',
            'coding_end': '53000'
        }
        t2.read_from_database_record(record)

        t3 = Transcript()
        record = {
            'strand': '+',
            'exons': '3000-20000,30000-40000,50000-68000',
            'coding_start': '12000',
            'coding_end': '53000'
        }
        t3.read_from_database_record(record)

        t4 = Transcript()
        record = {
            'strand': '+',
            'exons': '10000-20000,30000-40000,50000-60000',
            'coding_start': '12500',
            'coding_end': '53000'
        }
        t4.read_from_database_record(record)

        assert helper.same_cds(t1, t2)
        assert helper.same_cds(t1, t3)
        assert not helper.same_cds(t2, t4)
    def test_sort_records(self):

        t1 = Transcript(id='1',
                        chrom='8',
                        strand='+',
                        start=130,
                        end=580,
                        exons=[])
        t2 = Transcript(id='2',
                        chrom='8',
                        strand='+',
                        start=800,
                        end=900,
                        exons=[])
        t3 = Transcript(id='3',
                        chrom='8',
                        strand='+',
                        start=800,
                        end=880,
                        exons=[])
        t4 = Transcript(id='4',
                        chrom='8',
                        strand='+',
                        start=30,
                        end=40,
                        exons=[])

        self.tdb_writer.add(t1)
        self.tdb_writer.add(t2)
        self.tdb_writer.add(t3)
        self.tdb_writer.add(t4)

        self.tdb_writer._sort_records()

        assert self.tdb_writer._records['8'][0][-1] == 40
        assert self.tdb_writer._records['8'][1][-1] == 580
        assert self.tdb_writer._records['8'][2][-1] == 880
    def test_read_from_database_record(self):
        record = {
            'one': 'x',
            'two': 'y',
            'start': '12345',
            'exons': '10000-20000,30000-40000'
        }
        transcript = Transcript()
        transcript.read_from_database_record(record)

        for key in record:
            assert key in transcript.__dict__

        assert transcript.one == 'x'
        assert transcript.two == 'y'
        assert transcript.start == 12345

        for exon in transcript.exons:
            assert type(exon) == Exon

        assert transcript.exons[0].start == 10000
        assert transcript.exons[1].end == 40000
 def test_any_unset(self):
     transcript = Transcript()
     assert transcript._any_unset(['start', 'end'])
     transcript.start = 1
     transcript.end = 2
     assert not transcript._any_unset(['start', 'end'])
 def test_get_cdna_length(self):
     transcript = Transcript()
     record = {'strand': '+', 'exons': '10-20,100-200,1000-2000'}
     transcript.read_from_database_record(record)
     assert transcript.get_cdna_length() == 1110