Example #1
0
    def test_00_mysql_con(self):
        args = deepcopy(self.default_args)

        # Connect with MYSQL config
        ac = AnnCoordinates(**args)
        self.assertIsNotNone(ac.mysql_con)

        # Connect with MYSQL connection
        args['mysql_config'] = None
        args['mysql_con'] = self.MYSQL_CON
        ac = AnnCoordinates(**args)
        self.assertIsNotNone(ac.mysql_con)
Example #2
0
    def test_07_ann_special_cases(self):
        args = deepcopy(self.default_args)

        # intergenic at left most side
        ac = AnnCoordinates(**args)
        ac.query('chr2', 100)
        gene_info = ac.gene_info
        self.assertIsNone(gene_info['id'])
        self.assertEqual(gene_info['name'], 'intergenic(-,FAM110C)')
        self.assertEqual(gene_info['type'], 'intergenic')
        self.assertEqual(gene_info['start'], 0)
        self.assertEqual(gene_info['end'], 38813)
        self.assertIsNone(gene_info['source'])
        self.assertIsNone(gene_info['strand'])
        self.assertEqual(gene_info['intergenic_nearest']['name'], 'FAM110C')
        self.assertEqual(gene_info['intergenic_nearest']['distance'], -38713)

        # intergenic at right most side
        ac = AnnCoordinates(**args)
        ac.query('chr2', 243199373 - 100)
        gene_info = ac.gene_info
        self.assertIsNone(gene_info['id'])
        self.assertEqual(gene_info['name'], 'intergenic(LINC01881,-)')
        self.assertEqual(gene_info['type'], 'intergenic')
        self.assertEqual(gene_info['start'], 243176659)
        self.assertEqual(gene_info['end'], 0)
        self.assertIsNone(gene_info['source'])
        self.assertIsNone(gene_info['strand'])
        self.assertEqual(gene_info['intergenic_nearest']['name'], 'LINC01881')
        self.assertEqual(gene_info['intergenic_nearest']['distance'], 22614)
Example #3
0
    def test_05_ann_intergenic_igh_gene(self):
        args = deepcopy(self.default_args)

        ac = AnnCoordinates(**args)
        # intergenic(IGHJ6,IGHJ5)
        ac.query('chr14:106329469')
        self.assertEqual(len(ac.gene_hits), 1)
        gene_hit = ac.gene_hits[0]
        self.assertIsNone(gene_hit['id'])
        self.assertEqual(gene_hit['name'], 'IGHJ6')
        self.assertEqual(gene_hit['type'], 'intergenic_igh')
        self.assertIsNone(gene_hit['source'])
        self.assertEqual(gene_hit['strand'], '-')
        rank_info = ac.rank_info
        self.assertEqual(rank_info['trans_strand'], '-')
Example #4
0
    def test_04_ann_intergenic_gene(self):
        args = deepcopy(self.default_args)

        ac = AnnCoordinates(**args)

        # intergenic(TMEM99,KRT12)
        ac.query('chr17:39013407')
        self.assertEqual(len(ac.gene_hits), 1)
        gene_hit = ac.gene_hits[0]
        self.assertIsNone(gene_hit['id'])
        self.assertEqual(gene_hit['name'], 'intergenic(TMEM99,KRT12)')
        self.assertEqual(gene_hit['type'], 'intergenic')
        self.assertIsNone(gene_hit['source'])
        self.assertIsNone(gene_hit['strand'])
        self.assertEqual(gene_hit['intergenic_nearest']['name'], 'KRT12')
        self.assertEqual(gene_hit['intergenic_nearest']['distance'], -4147)
Example #5
0
    def test_05_ann_intergenic_igh_gene(self):
        args = deepcopy(self.default_args)

        ac = AnnCoordinates(**args)
        # IGHJ6 chr14:106329405-106329470
        ac.query('chr14:106329469')
        self.assertEqual(len(ac.gene_hits), 1)
        gene_hit = ac.gene_hits[0]
        # self.assertIsNone(gene_hit['id'])
        self.assertEqual(gene_hit['name'], 'IGHJ6')
        self.assertEqual(gene_hit['type'], 'protein_coding')
        # self.assertIsNone(gene_hit['source'])
        self.assertEqual(gene_hit['strand'], '-')
        rank_info = ac.rank_info
        self.assertEqual(rank_info['trans_strand'], '-')
        # IGHJ3P chr14:106329624-106329675
        # intergenic(IGHJ6,IGHJ3P)
        ac.query('chr14:106329500')
        self.assertEqual(len(ac.gene_hits), 1)
        gene_hit = ac.gene_hits[0]
        self.assertIsNone(gene_hit['id'])
        self.assertEqual(gene_hit['name'], 'IGHJ6')
        self.assertEqual(gene_hit['type'], 'intergenic_igh')
        self.assertIsNone(gene_hit['source'])
        self.assertEqual(gene_hit['strand'], '-')
        self.assertEqual(gene_hit['intergenic_nearest']['name'], 'IGHJ6')
        self.assertEqual(gene_hit['intergenic_nearest']['distance'], 30)
        rank_info = ac.rank_info
        self.assertEqual(rank_info['trans_strand'], '-')
Example #6
0
    def test_06_ann_beyond_transcript_range(self):
        args = deepcopy(self.default_args)

        # BCL6 without specifying transcript map
        ac = AnnCoordinates(**args)
        ac.query('chr3', 187462599)
        gene_info = ac.gene_info
        self.assertEqual(gene_info['id'], 'BCL6')
        self.assertEqual(gene_info['name'], 'BCL6')
        self.assertEqual(gene_info['type'], 'protein_coding')
        self.assertEqual(gene_info['source'], 'RefSeq')
        self.assertEqual(gene_info['strand'], '-')
        rank_info = ac.rank_info
        self.assertEqual(rank_info['trans_id'], 'NM_001706.5')
        self.assertEqual(rank_info['trans_start'], 187439168)
        self.assertEqual(rank_info['trans_end'], 187463256)
        self.assertEqual(rank_info['trans_strand'], '-')
        self.assertEqual(rank_info['exon_total'], 10)
        self.assertEqual(rank_info['cds_min'], 3)
        self.assertEqual(rank_info['cds_max'], 10)
        self.assertEqual(rank_info['rank'], "Intron1")
        self.assertEqual(rank_info['exon_num'], 1)

        # BCL6 with specifying transcript map
        args['transcript_map'] = {
            'NM_001130845'
        }
        ac = AnnCoordinates(**args)
        ac.query('chr3', 187462599)
        gene_info = ac.gene_info
        self.assertEqual(gene_info['id'], 'BCL6')
        self.assertEqual(gene_info['name'], 'BCL6')
        self.assertEqual(gene_info['type'], 'protein_coding')
        self.assertEqual(gene_info['source'], 'RefSeq')
        self.assertEqual(gene_info['strand'], '-')
        rank_info = ac.rank_info
        self.assertEqual(rank_info['trans_id'], 'NM_001130845.2')
        self.assertEqual(rank_info['trans_start'], 187439168)
        self.assertEqual(rank_info['trans_end'], 187454289)
        self.assertEqual(rank_info['trans_strand'], '-')
        self.assertEqual(rank_info['exon_total'], 10)
        self.assertEqual(rank_info['cds_min'], 3)
        self.assertEqual(rank_info['cds_max'], 10)
        self.assertEqual(rank_info['rank'], "intragenic")
        self.assertEqual(rank_info['exon_num'], -5)
Example #7
0
    def test_06_ann_beyond_transcript_range(self):
        args = deepcopy(self.default_args)

        # BCL6 without specifying transcript map
        ac = AnnCoordinates(**args)
        ac.query('chr3', 187462599)
        gene_info = ac.gene_info
        self.assertEqual(gene_info['id'], 'ENSG00000113916.17')
        self.assertEqual(gene_info['name'], 'BCL6')
        self.assertEqual(gene_info['type'], 'protein_coding')
        self.assertEqual(gene_info['source'], 'HAVANA')
        self.assertEqual(gene_info['strand'], '-')
        rank_info = ac.rank_info
        self.assertEqual(rank_info['trans_id'], 'ENST00000406870.6')
        self.assertEqual(rank_info['trans_start'], 187439165)
        self.assertEqual(rank_info['trans_end'], 187463515)
        self.assertEqual(rank_info['trans_strand'], '-')
        self.assertEqual(rank_info['exon_total'], 10)
        self.assertEqual(rank_info['cds_min'], 3)
        self.assertEqual(rank_info['cds_max'], 10)
        self.assertEqual(rank_info['rank'], "Intron1")
        self.assertEqual(rank_info['exon_num'], 1)

        # BCL6 with specifying transcript map
        args['transcript_map'] = {
            'ENST00000232014': {'gene': 'BCL6', 'refseq': 'NM_001130845'}
        }
        ac = AnnCoordinates(**args)
        ac.query('chr3', 187462599)
        gene_info = ac.gene_info
        self.assertEqual(gene_info['id'], 'ENSG00000113916.17')
        self.assertEqual(gene_info['name'], 'BCL6')
        self.assertEqual(gene_info['type'], 'protein_coding')
        self.assertEqual(gene_info['source'], 'HAVANA')
        self.assertEqual(gene_info['strand'], '-')
        rank_info = ac.rank_info
        self.assertEqual(rank_info['trans_id'], 'ENST00000232014.8')
        self.assertEqual(rank_info['trans_start'], 187440186)
        self.assertEqual(rank_info['trans_end'], 187454357)
        self.assertEqual(rank_info['trans_strand'], '-')
        self.assertEqual(rank_info['exon_total'], 10)
        self.assertEqual(rank_info['cds_min'], 3)
        self.assertEqual(rank_info['cds_max'], 10)
        self.assertEqual(rank_info['rank'], "intragenic")
        self.assertEqual(rank_info['exon_num'], -5)
Example #8
0
    def _test_ann_alk_(self, args):
        """
        ALK Gene Structure
        Chromosome: chr2
        Genbank ID: NM_004304 Orientation: -
        Length coding sequence : 4860 nucleotides.
        Region  start       end         region length   phase at end
        UTR     30,143,526  30,144,477
        Exon    30,142,859  30,143,525  667             1
        Exon    29,940,444  29,940,563  120             1
        Exon    29,917,716  29,917,880  165             1
        Exon    29,754,781  29,754,982  202             2
        Exon    29,606,598  29,606,725  128             1
        Exon    29,551,216  29,551,347  132             1
        Exon    29,543,617  29,543,748  132             1
        Exon    29,541,170  29,541,270  101             0
        Exon    29,519,754  29,519,923  170             2
        Exon    29,498,268  29,498,362  95              1
        Exon    29,497,965  29,498,093  129             1
        Exon    29,473,971  29,474,133  163             2
        Exon    29,462,546  29,462,696  151             0
        Exon    29,456,431  29,456,562  132             0
        Exon    29,455,170  29,455,314  145             1
        Exon    29,451,750  29,451,932  183             1
        Exon    29,450,440  29,450,538  99              1
        Exon    29,449,788  29,449,940  153             1
        Exon    29,448,327  29,448,431  105             1
        Exon    29,446,208  29,446,394  187             2
        Exon    29,445,383  29,445,473  91              0
        Exon    29,445,210  29,445,274  65              2
        Exon    29,443,572  29,443,701  130             0
        Exon    29,436,850  29,436,947  98              2
        Exon    29,432,652  29,432,744  93              2
        Exon    29,430,037  29,430,138  102             2
        Exon    29,420,408  29,420,542  135             2
        Exon    29,419,636  29,419,726  91              0
        Exon    29,416,090  29,416,788  699             0
        UTR     29,415,640  29,416,089
        """
        # ALK chr2:29,415,640-30,144,477(GRCh37/hg19) from GeneCards
        # ALK chr2:29,415,640-30,144,432(GRCh37/hg19) from GENCODE/HAVANA
        # ENST00000389048 -> NM_004304

        # Case01: Normal
        ac = AnnCoordinates(**args)
        # 5'UTR
        ac.query('2:30,143,526')
        gene_info = ac.gene_info
        self.assertEqual(gene_info['id'], 'ENSG00000171094.17')
        self.assertEqual(gene_info['name'], 'ALK')
        self.assertEqual(gene_info['type'], 'protein_coding')
        self.assertEqual(gene_info['strand'], '-')
        rank_info = ac.rank_info
        self.assertEqual(rank_info['trans_id'], 'ENST00000389048.7')
        self.assertEqual(rank_info['trans_start'], 29415640)
        self.assertEqual(rank_info['trans_end'], 30144432)
        self.assertEqual(rank_info['trans_strand'], '-')
        self.assertEqual(rank_info['exon_total'], 29)
        self.assertEqual(rank_info['cds_min'], 1)
        self.assertEqual(rank_info['cds_max'], 29)
        self.assertEqual(rank_info['rank'], "5'UTR")
        self.assertEqual(rank_info['exon_num'], 1)
        # Exon20
        bp = np.random.choice(range(29446208, 29446394 + 1))
        ac.query("2:{}".format(bp))
        gene_info = ac.gene_info
        self.assertEqual(gene_info['id'], 'ENSG00000171094.17')
        self.assertEqual(gene_info['name'], 'ALK')
        self.assertEqual(gene_info['type'], 'protein_coding')
        self.assertEqual(gene_info['strand'], '-')
        rank_info = ac.rank_info
        self.assertEqual(rank_info['trans_id'], 'ENST00000389048.7')
        self.assertEqual(rank_info['trans_start'], 29415640)
        self.assertEqual(rank_info['trans_end'], 30144432)
        self.assertEqual(rank_info['trans_strand'], '-')
        self.assertEqual(rank_info['exon_total'], 29)
        self.assertEqual(rank_info['cds_min'], 1)
        self.assertEqual(rank_info['cds_max'], 29)
        self.assertEqual(rank_info['rank'], "Exon20")
        self.assertEqual(rank_info['exon_num'], 20)
        # Intron10
        bp = np.random.choice(range(29498093 + 1, 29498268))
        ac.query("2:{}".format(bp))
        gene_info = ac.gene_info
        self.assertEqual(gene_info['id'], 'ENSG00000171094.17')
        self.assertEqual(gene_info['name'], 'ALK')
        self.assertEqual(gene_info['type'], 'protein_coding')
        self.assertEqual(gene_info['strand'], '-')
        rank_info = ac.rank_info
        self.assertEqual(rank_info['trans_id'], 'ENST00000389048.7')
        self.assertEqual(rank_info['trans_start'], 29415640)
        self.assertEqual(rank_info['trans_end'], 30144432)
        self.assertEqual(rank_info['trans_strand'], '-')
        self.assertEqual(rank_info['exon_total'], 29)
        self.assertEqual(rank_info['cds_min'], 1)
        self.assertEqual(rank_info['cds_max'], 29)
        self.assertEqual(rank_info['rank'], "Intron10")
        self.assertEqual(rank_info['exon_num'], 10)
        # 3'UTR
        ac.query('2:29,415,640')
        gene_info = ac.gene_info
        self.assertEqual(gene_info['id'], 'ENSG00000171094.17')
        self.assertEqual(gene_info['name'], 'ALK')
        self.assertEqual(gene_info['type'], 'protein_coding')
        self.assertEqual(gene_info['strand'], '-')
        rank_info = ac.rank_info
        self.assertEqual(rank_info['trans_id'], 'ENST00000389048.7')
        self.assertEqual(rank_info['trans_start'], 29415640)
        self.assertEqual(rank_info['trans_end'], 30144432)
        self.assertEqual(rank_info['trans_strand'], '-')
        self.assertEqual(rank_info['exon_total'], 29)
        self.assertEqual(rank_info['cds_min'], 1)
        self.assertEqual(rank_info['cds_max'], 29)
        self.assertEqual(rank_info['rank'], "3'UTR")
        self.assertEqual(rank_info['exon_num'], 29)
Example #9
0
    def test_03_ann_bp_handling(self):
        args = deepcopy(self.default_args)

        ac = AnnCoordinates(**args)

        ac.query('2:30,143,526')
        self.assertEqual(ac.chr_raw, '2')
        self.assertEqual(ac.chr, 'chr2')
        self.assertEqual(ac.pos, 30143526)

        ac.query('2:30143526')
        self.assertEqual(ac.chr_raw, '2')
        self.assertEqual(ac.chr, 'chr2')
        self.assertEqual(ac.pos, 30143526)

        ac.query('chr2:30,143,526')
        self.assertEqual(ac.chr_raw, 'chr2')
        self.assertEqual(ac.chr, 'chr2')
        self.assertEqual(ac.pos, 30143526)

        ac.query('chr2:30143526')
        self.assertEqual(ac.chr_raw, 'chr2')
        self.assertEqual(ac.chr, 'chr2')
        self.assertEqual(ac.pos, 30143526)

        ac.query('2', '30,143,526')
        self.assertEqual(ac.chr_raw, '2')
        self.assertEqual(ac.chr, 'chr2')
        self.assertEqual(ac.pos, 30143526)

        ac.query('2', '30143526')
        self.assertEqual(ac.chr_raw, '2')
        self.assertEqual(ac.chr, 'chr2')
        self.assertEqual(ac.pos, 30143526)

        ac.query('2', 30143526)
        self.assertEqual(ac.chr_raw, '2')
        self.assertEqual(ac.chr, 'chr2')
        self.assertEqual(ac.pos, 30143526)

        ac.query('chr2', '30,143,526')
        self.assertEqual(ac.chr_raw, 'chr2')
        self.assertEqual(ac.chr, 'chr2')
        self.assertEqual(ac.pos, 30143526)

        ac.query('chr2', '30143526')
        self.assertEqual(ac.chr_raw, 'chr2')
        self.assertEqual(ac.chr, 'chr2')
        self.assertEqual(ac.pos, 30143526)

        ac.query('chr2', 30143526)
        self.assertEqual(ac.chr_raw, 'chr2')
        self.assertEqual(ac.chr, 'chr2')
        self.assertEqual(ac.pos, 30143526)