Example #1
0
    def test_writeGeneDataFile(self):
        """Test writeGeneDataFile"""

        lGenes = [Gene('GENE1','seq1',12,600,1),Gene('GENE2','seq2',100,1000,-1)]
        GeneDataFile = self.plot.writeGeneDataFile(lGenes,'gene.txt') 
        self.assertTrue(filecmp.cmp(GeneDataFile,'test-data/gene.txt'))
        os.remove('gene.txt')
Example #2
0
 def test_writeCircosConf(self):
     """Test writeCircosConf"""
    
     lSeqs = [('seq1',20000),('seq2',30000)] 
     GenomeDataFile = self.plot.writeSeqDataFile(lSeqs, 'genome.txt')
     lRegions = [(Region('seq1',100,220,1),Region('seq2',100,220,1)),
                 (Region('seq1',1200,1300,-1),Region('seq2',1300,1400,1))] 
     lAlgmts = [('ATGCATGCATGCATGCATGCATGCATGCATGCATGCAGGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATATGTGTAGTGAGTCGTCCC',
                 'ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGATGTACGATATAGCCCAC'),
                ('ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAA',
                 'ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAA')]
     lDuplications = [Duplication('seq1',1,5000,'seq2',1,6000,lRegions,lAlgmts)]
     SDDataFile = self.plot.writeSegDupDataFile(lDuplications,'segdup.txt') 
     self.plot.writeCircosConf()
     self.assertTrue(filecmp.cmp('circos.conf','test-data/circos.conf'))
     lGenes = [Gene('GENE1','seq1',12,600,1),Gene('GENE2','seq2',100,1000,-1)]
     GeneDataFile = self.plot.writeGeneDataFile(lGenes,'gene.txt') 
     lTEs = [Feature('TE1','seq1',1000,2000,1,'TE'),Feature('TE2','seq2',4000,4500,-1,'TE')]
     TEDataFile = self.plot.writeTEDataFile(lTEs,'TE.txt')
     self.plot.writeCircosConf()
     self.assertTrue(filecmp.cmp('circos.conf','test-data/circos2.conf'))
     SimilarityDataFile = self.plot.writeSimilarityDataFile(lDuplications,'similarity.txt')
     self.plot.writeCircosConf()
     self.assertTrue(filecmp.cmp('circos.conf','test-data/circos3.conf'))
     os.remove('circos.conf')
Example #3
0
    def test_writeGeneLinkDataFile(self):
        """Test writeGeneLinkDataFile"""

        iDup = Duplication('seq1',5,5000,'seq2',10,5000)
        iGene1 = Gene('GENE1','seq1',10,100,1)
        iGene2 = Gene('GENE2','seq2',100,190,-1)
        lGeneLinks = [GeneLink(dup=iDup,gene1=iGene1,gene2=iGene2)]
        GeneLinkDataFile = self.plot.writeGeneLinkDataFile(lGeneLinks,'gene-link.txt')
        self.assertTrue(filecmp.cmp(GeneLinkDataFile,'test-data/gene-link.txt'))
        os.remove('gene-link.txt')
Example #4
0
    def test_getCDSAlignment(self):
        """test getCDSAlignment"""

        gene1 = Gene('G00001', 'Chr1', 1, 27, 1, [
            Transcript('G00001.1', 'Chr1', 1, 27, 1, 'G00001', [
                CDS('G00001.1_cds_1', 'Chr1', 1, 6, 1, 'G00001.1'),
                CDS('G00001.1_cds_1', 'Chr1', 13, 21, 1, 'G00001.1')
            ])
        ])
        gene2 = Gene('G00002', 'Chr5', 1, 27, 1, [
            Transcript('G00002.1', 'Chr5', 1, 27, 1, 'G00002', [
                CDS('G00002.1_cds_1', 'Chr5', 1, 6, 1, 'G00002.1'),
                CDS('G00002.1_cds_1', 'Chr5', 13, 27, 1, 'G00002.1')
            ])
        ])

        gl = GeneLink(
            Duplication('Chr1', 1, 58, 'Chr5', 1, 60, [
                (Region('Chr1', 1, 58, 1), Region('Chr5', 1, 60, 1))
            ], [(
                'ATGTATTCTATCTCATGTTAATGCTAATACTAGTCATGATCAGATACGATGATGAT--TA',
                'ATGTATTCTATCTCATGTTACTGCTAATACTAGTCATGATCAGATACGATGATGATCATA')
                ]), gene1, gene2)

        self.assertEquals(
            ('ATGTATtctatcTCATGTTAAtgctaa', 'ATGTATtctatcTCATGTTACTGCTAA',
             Region('Chr1', 1, 27, 1), Region('Chr5', 1, 27, 1)),
            gl.getCDSAlignment())

        gene1 = Gene('G00001', 'Chr1', 1, 27, 1, [
            Transcript('G00001.1', 'Chr1', 1, 27, 1, 'G00001', [
                CDS('G00001.1_cds_1', 'Chr1', 1, 6, 1, 'G00001.1'),
                CDS('G00001.1_cds_1', 'Chr1', 13, 21, 1, 'G00001.1')
            ])
        ])
        gene2 = Gene('G00002', 'Chr5', 27, 60, -1, [
            Transcript('G00002.1', 'Chr5', 27, 60, -1, 'G00002', [
                CDS('G00002.1_cds_1', 'Chr5', 27, 39, -1, 'G00002.1'),
                CDS('G00002.1_cds_1', 'Chr5', 48, 60, -1, 'G00002.1')
            ])
        ])

        gl = GeneLink(
            Duplication('Chr1', 1, 58, 'Chr5', 1, 60, [
                (Region('Chr1', 1, 58, -1), Region('Chr5', 1, 60, 1))
            ], [(
                'ATGTATTCTATCTCATGTTAATGCTAATACTAGTCATGATCAGATACGATGATGAG--TA',
                'ATGTATTCTATCTCATGTTACTGCTAATACTAGTCATGATCAGATACGATGATGATCATA')
                ]), gene1, gene2)

        self.assertEquals(
            ('atactagtcatGATCAGATAcgatgaTGAG--TA',
             'ATACTAGTCATGAtcagatacGATGATGATCATA', Region(
                 'Chr1', 1, 32, -1), Region('Chr5', 27, 60, 1)),
            gl.getCDSAlignment())
Example #5
0
    def test_getlGenesFromCoordinates(self):
        """Test getlGenesFromCoordinates"""

        gene1 = Gene('G00001','Chr1',23988,24919,-1,[Transcript('G00001.1','Chr1',23988,24919,-1,'G00001',[CDS('G00001.1_cds_1','Chr1',23988,24083, -1, 'G00001.1'),CDS('G00001.1_cds_1','Chr1',24274,24427,-1,'G00001.1'),CDS('G00001.1_cds_1','Chr1',24489,24919,-1,'G00001.1')])])
        gene2 = Gene('G00002','Chr1',239880,249190,-1,[Transcript('G00002.1','Chr1',239880,249190,-1,'G00002',[CDS('G00002.1_cds_1','Chr1',239880,240830, -1, 'G00002.1'),CDS('G00002.1_cds_1','Chr1',242740,244270,-1,'G00002.1'),CDS('G00002.1_cds_1','Chr1',244890,249190,-1,'G00002.1')])])

        lGenes = [gene1,gene2]
        # self.db.deleteAllGenes()  TODO
        self.db.insertlGenes(lGenes)

        self.assertEquals([gene2],self.db.getlGenesFromCoordinates('Chr1',230000,250000))
Example #6
0
    def test_selectAllGenes(self):
        """Test selectAllGenes"""

        gene1 = Gene('G00001','Chr1',23988,24919,-1,[Transcript('G00001.1','Chr1',23988,24919,-1,'G00001',[CDS('G00001.1_cds_1','Chr1',23988,24083, -1, 'G00001.1'),CDS('G00001.1_cds_1','Chr1',24274,24427,-1,'G00001.1'),CDS('G00001.1_cds_1','Chr1',24489,24919,-1,'G00001.1')])])

        lGenes = [gene1]
        # self.db.deleteAllGenes()  TODO
        self.db.insertlGenes(lGenes)

        self.assertEquals([gene1],self.db.selectAllGenes())
Example #7
0
    def getlGenesFromCoordinates(self, seqid, start, end):
        """Get genes included in a defined region"""

        lGenes = []
        dGenes = {}
        lTranscripts = []
        dTranscripts = {}
        lCDS = []
        dCDS = {}

        cursor = self.conn.execute(
            '''select id, seqid, start, end, strand from gene where seqid = \'{}\' and start > {} and end < {} order by start'''
            .format(seqid, start, end))
        for row in cursor:
            dGenes[row[0]] = Gene(row[0], row[1], row[2], row[3], row[4])

        if dGenes:
            cursor = self.conn.execute(
                '''select id, seqid, start,end,strand,gene_id from transcript where seqid = \'{}\' and start > {} and end < {} order by start'''
                .format(seqid, start, end))
            for row in cursor:
                transcript = Transcript(row[0], row[1], row[2], row[3], row[4],
                                        row[5])

                if transcript.gene_id in dGenes:
                    dTranscripts[row[0]] = transcript
                    if len(dGenes[transcript.gene_id].lTranscripts) > 0:
                        dGenes[transcript.gene_id].lTranscripts.append(
                            transcript)
                    else:
                        dGenes[transcript.gene_id].lTranscripts = [transcript]

        if dTranscripts:
            cursor = self.conn.execute(
                '''select cds_id, seqid, start,end,strand,transcript_id from cds where seqid = \'{}\' and start > {} and end < {} order by start'''
                .format(seqid, start, end))
            for row in cursor:
                cds = CDS(row[0], row[1], row[2], row[3], row[4], row[5])

                if cds.transcript_id in dTranscripts:
                    if len(dTranscripts[cds.transcript_id].lCDS) > 0:
                        dTranscripts[cds.transcript_id].lCDS.append(cds)
                    else:
                        dTranscripts[cds.transcript_id].lCDS = [cds]

        return dGenes.values()
Example #8
0
    def _parse(self):
        """Parse the gff file"""

        dGenes = {}
        dTranscripts = {}
        dCDS = {}

        with open(self.inputGffFile, 'r') as input:
            for line in input:
                if not re.match('^#', line):
                    line = line.rstrip('\n')
                    values = line.split('\t')

                    if values[2] == 'gene':
                        id = self._getFeatureTagValue('ID',values[8])
                        currentGene = Gene(id, values[0], int(values[3]), int(values[4]), self._getStrand(values[6]))
                        dGenes[id] = currentGene
                        self.lGenes.append(currentGene)
                        
                    if values[2] == 'mRNA':
                        id = self._getFeatureTagValue('ID',values[8])
                        gene_id = self._getFeatureTagValue('Parent', values[8])
                        #gene_id = '{}_G'.format(id)
                        #currentGene = Gene(gene_id, values[0], int(values[3]), int(values[4]), self._getStrand(values[6]))
                        #dGenes[gene_id] = currentGene
                        #self.lGenes.append(currentGene)
                        currentTranscript = Transcript(id, values[0], int(values[3]), int(values[4]), self._getStrand(values[6]), gene_id)
                        dTranscripts[id] = currentTranscript

                        if len(dGenes[gene_id].lTranscripts) > 0:
                            dGenes[gene_id].lTranscripts.append(currentTranscript)
                        else:
                            dGenes[gene_id].lTranscripts = [currentTranscript]

                    if values[2] == 'CDS':
                        id = self._getFeatureTagValue('ID',values[8])
                        transcript_id = self._getFeatureTagValue('Parent', values[8])
                        #id = '{}_CDS'.format(transcript_id)
                        currentCDS = CDS(id, values[0], int(values[3]), int(values[4]), self._getStrand(values[6]), transcript_id)
                        if len(dTranscripts[transcript_id].lCDS) > 0:
                            dTranscripts[transcript_id].lCDS.append(currentCDS)
                        else:
                            dTranscripts[transcript_id].lCDS = [currentCDS]
Example #9
0
    def selectAllGenes(self):
        """Select all genes"""

        lGenes = []
        dGenes = {}
        lTranscripts = []
        dTranscripts = {}
        lCDS = []
        dCDS = {}

        cursor = self.conn.execute(
            '''select id, seqid, start, end, strand from gene''')
        for row in cursor:
            dGenes[row[0]] = Gene(row[0], row[1], row[2], row[3], row[4])

        cursor = self.conn.execute(
            '''select id, seqid, start,end,strand,gene_id from transcript''')
        for row in cursor:
            transcript = Transcript(row[0], row[1], row[2], row[3], row[4],
                                    row[5])
            dTranscripts[row[0]] = transcript

            if len(dGenes[transcript.gene_id].lTranscripts) > 0:
                dGenes[transcript.gene_id].lTranscripts.append(transcript)
            else:
                dGenes[transcript.gene_id].lTranscripts = [transcript]

        cursor = self.conn.execute(
            '''select cds_id, seqid, start,end,strand,transcript_id from cds order by start'''
        )
        for row in cursor:
            cds = CDS(row[0], row[1], row[2], row[3], row[4], row[5])

            if len(dTranscripts[cds.transcript_id].lCDS) > 0:
                dTranscripts[cds.transcript_id].lCDS.append(cds)
            else:
                dTranscripts[cds.transcript_id].lCDS = [cds]

        return dGenes.values()
Example #10
0
    def test_getAllGenes(self):
        """Test getAllGenes method"""
        iGffGeneParser = GffGeneParser("test-data/gene.gff3")
        lGenes = [Gene('G00001','Chr1',23988,24919,-1,[Transcript('G00001.1','Chr1',23988,24919,-1,'G00001',[CDS('G00001.1_cds_1','Chr1',23988,24083, -1, 'G00001.1'),CDS('G00001.1_cds_1','Chr1',24274,24427,-1,'G00001.1'),CDS('G00001.1_cds_1','Chr1',24489,24919,-1,'G00001.1')])])]

        self.assertEqual(iGffGeneParser.getAllGenes()[0],lGenes[0])