Example #1
0
    def test_getNonRedondantDuplications(self):
        """Test getNonRedondantDuplications method"""

        iGffDuplicationParser = GffDuplicationParser("test-data/sdd.gff3")
        lDuplications = [
            Duplication('seq1', 6010863, 6029759, 'seq8', 4391356, 4410272,
                        [(Region('seq1', 6010863, 6029759,
                                 1), Region('seq8', 4391356, 4410272, 1))]),
            Duplication('seq2', 26727, 32020, 'seq11', 521201, 524615,
                        [(Region('seq2', 26727, 29266,
                                 -1), Region('seq11', 522092, 524615, 1)),
                         (Region('seq2', 31119, 32020,
                                 -1), Region('seq11', 521201, 522101, 1))]),
            Duplication('seq11', 26582, 33594, 'seq11', 584193, 591205,
                        [(Region('seq11', 26582, 33594,
                                 -1), Region('seq11', 584193, 591205, 1))]),
            Duplication('seq11', 38277, 40563, 'seq11', 554466, 556516,
                        [(Region('seq11', 38277, 38402,
                                 1), Region('seq11', 554466, 554591, 1)),
                         (Region('seq11', 38511, 40563,
                                 -1), Region('seq11', 554467, 556516, 1))])
        ]

        self.assertEqual(iGffDuplicationParser.getNonRedondantDuplications(),
                         lDuplications)
Example #2
0
    def test_getCDSAlignment(self):
        """test getCDSAlignment"""

        gene1 = Gene('G00001', 'Chr1', 1, 27, 1, [
            Transcript('G00001.1', 'Chr1', 1, 27, 1, 'G00001', [
                CDS('G00001.1_cds_1', 'Chr1', 1, 6, 1, 'G00001.1'),
                CDS('G00001.1_cds_1', 'Chr1', 13, 21, 1, 'G00001.1')
            ])
        ])
        gene2 = Gene('G00002', 'Chr5', 1, 27, 1, [
            Transcript('G00002.1', 'Chr5', 1, 27, 1, 'G00002', [
                CDS('G00002.1_cds_1', 'Chr5', 1, 6, 1, 'G00002.1'),
                CDS('G00002.1_cds_1', 'Chr5', 13, 27, 1, 'G00002.1')
            ])
        ])

        gl = GeneLink(
            Duplication('Chr1', 1, 58, 'Chr5', 1, 60, [
                (Region('Chr1', 1, 58, 1), Region('Chr5', 1, 60, 1))
            ], [(
                'ATGTATTCTATCTCATGTTAATGCTAATACTAGTCATGATCAGATACGATGATGAT--TA',
                'ATGTATTCTATCTCATGTTACTGCTAATACTAGTCATGATCAGATACGATGATGATCATA')
                ]), gene1, gene2)

        self.assertEquals(
            ('ATGTATtctatcTCATGTTAAtgctaa', 'ATGTATtctatcTCATGTTACTGCTAA',
             Region('Chr1', 1, 27, 1), Region('Chr5', 1, 27, 1)),
            gl.getCDSAlignment())

        gene1 = Gene('G00001', 'Chr1', 1, 27, 1, [
            Transcript('G00001.1', 'Chr1', 1, 27, 1, 'G00001', [
                CDS('G00001.1_cds_1', 'Chr1', 1, 6, 1, 'G00001.1'),
                CDS('G00001.1_cds_1', 'Chr1', 13, 21, 1, 'G00001.1')
            ])
        ])
        gene2 = Gene('G00002', 'Chr5', 27, 60, -1, [
            Transcript('G00002.1', 'Chr5', 27, 60, -1, 'G00002', [
                CDS('G00002.1_cds_1', 'Chr5', 27, 39, -1, 'G00002.1'),
                CDS('G00002.1_cds_1', 'Chr5', 48, 60, -1, 'G00002.1')
            ])
        ])

        gl = GeneLink(
            Duplication('Chr1', 1, 58, 'Chr5', 1, 60, [
                (Region('Chr1', 1, 58, -1), Region('Chr5', 1, 60, 1))
            ], [(
                'ATGTATTCTATCTCATGTTAATGCTAATACTAGTCATGATCAGATACGATGATGAG--TA',
                'ATGTATTCTATCTCATGTTACTGCTAATACTAGTCATGATCAGATACGATGATGATCATA')
                ]), gene1, gene2)

        self.assertEquals(
            ('atactagtcatGATCAGATAcgatgaTGAG--TA',
             'ATACTAGTCATGAtcagatacGATGATGATCATA', Region(
                 'Chr1', 1, 32, -1), Region('Chr5', 27, 60, 1)),
            gl.getCDSAlignment())
Example #3
0
    def test_getSeqAlignment(self):
        """test"""

        dup = Duplication('seq1',1,164,'seq2',237,400,[(Region('seq1',1,164,1),Region('seq2',237,400,1))],[('TCCTAGCCGATCTAATCGGAGACTATCTAGGCGACATATAAAATTTTGACGAGCTATAATTATCCTCTAGGCTAATCGAGACCACCGCTATCCTACAGCCAAAACAGCGACTTCATCATCGAGCCGCACTATCCTCTACGGCGAGCGCTCTCTACGAGCATCAT','TCCTAGCCGATCTAATCGGAGACTATCTAGGCGACATATAAAATTTTGACGAGCTATAATTATCCTCTAGGCTAATCGAGACCACCGCTATCCTACAGCCAAAACAGCGACTTCATCATCGAGCCGCACTATCCTCTACGGCGAGCGCTCTCTACGAGCATCAT')])
        
        self.assertEquals('TCCT',dup.getSeqAlignment('seq1',1,4)[0])
 
        dup = Duplication('seq1',1,164,'seq2',237,400,[(Region('seq1',1,164,-1),Region('seq2',237,400,1))],[('TCCTAGCCGATCTAATCGGAGACTATCTAGGCGACATATAAAATTTTGACGAGCTATAATTATCCTCTAGGCTAATCGAGACCACCGCTATCCTACAGCCAAAACAGCGACTTCATCATCGAGCCGCACTATCCTCTACGGCGAGCGCTCTCTACGAGCATCAT','TCCTAGCCGATCTAATCGGAGACTATCTAGGCGACATATAAAATTTTGACGAGCTATAATTATCCTCTAGGCTAATCGAGACCACCGCTATCCTACAGCCAAAACAGCGACTTCATCATCGAGCCGCACTATCCTCTACGGCGAGCGCTCTCTACGAGCATCAT')])
        
        self.assertEquals('TCAT',dup.getSeqAlignment('seq1',1,4)[0]) 

        dup = Duplication('seq4',1,40,'seq5',20,30,[(Region('seq4',1,10,-1),Region('seq5',20,25,1)),(Region('seq4',35,40,-1),Region('seq5',25,30,1))],[('ATATATATAT','AT----ATAT'),('ATGT-TT','AT-TTTG')])

        self.assertEquals('ATGT',dup.getSeqAlignment('seq4',37,40)[0]) 
        self.assertEquals('T----ATAT',dup.getSeqAlignment('seq5',21,25)[0]) 
Example #4
0
    def test_writeSegDupDataFile(self):
        """Test writeSegDupDataFile"""

        lDuplications = [Duplication('seq1',5,5000,'seq2',10,5000)]
        SDDataFile = self.plot.writeSegDupDataFile(lDuplications,'segdup.txt') 
        self.assertTrue(filecmp.cmp(SDDataFile,'test-data/segdup.txt'))
        os.remove('segdup.txt')
Example #5
0
 def test_writeCircosConf(self):
     """Test writeCircosConf"""
    
     lSeqs = [('seq1',20000),('seq2',30000)] 
     GenomeDataFile = self.plot.writeSeqDataFile(lSeqs, 'genome.txt')
     lRegions = [(Region('seq1',100,220,1),Region('seq2',100,220,1)),
                 (Region('seq1',1200,1300,-1),Region('seq2',1300,1400,1))] 
     lAlgmts = [('ATGCATGCATGCATGCATGCATGCATGCATGCATGCAGGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATATGTGTAGTGAGTCGTCCC',
                 'ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGATGTACGATATAGCCCAC'),
                ('ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAA',
                 'ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAA')]
     lDuplications = [Duplication('seq1',1,5000,'seq2',1,6000,lRegions,lAlgmts)]
     SDDataFile = self.plot.writeSegDupDataFile(lDuplications,'segdup.txt') 
     self.plot.writeCircosConf()
     self.assertTrue(filecmp.cmp('circos.conf','test-data/circos.conf'))
     lGenes = [Gene('GENE1','seq1',12,600,1),Gene('GENE2','seq2',100,1000,-1)]
     GeneDataFile = self.plot.writeGeneDataFile(lGenes,'gene.txt') 
     lTEs = [Feature('TE1','seq1',1000,2000,1,'TE'),Feature('TE2','seq2',4000,4500,-1,'TE')]
     TEDataFile = self.plot.writeTEDataFile(lTEs,'TE.txt')
     self.plot.writeCircosConf()
     self.assertTrue(filecmp.cmp('circos.conf','test-data/circos2.conf'))
     SimilarityDataFile = self.plot.writeSimilarityDataFile(lDuplications,'similarity.txt')
     self.plot.writeCircosConf()
     self.assertTrue(filecmp.cmp('circos.conf','test-data/circos3.conf'))
     os.remove('circos.conf')
Example #6
0
    def test_writeGeneLinkDataFile(self):
        """Test writeGeneLinkDataFile"""

        iDup = Duplication('seq1',5,5000,'seq2',10,5000)
        iGene1 = Gene('GENE1','seq1',10,100,1)
        iGene2 = Gene('GENE2','seq2',100,190,-1)
        lGeneLinks = [GeneLink(dup=iDup,gene1=iGene1,gene2=iGene2)]
        GeneLinkDataFile = self.plot.writeGeneLinkDataFile(lGeneLinks,'gene-link.txt')
        self.assertTrue(filecmp.cmp(GeneLinkDataFile,'test-data/gene-link.txt'))
        os.remove('gene-link.txt')
Example #7
0
    def test_writeSimilarityDataFile(self):
        """Test writeSimilarityDataFile"""

        lRegions = [(Region('seq1',100,220,1),Region('seq2',100,220,1)),
                    (Region('seq1',1200,1300,-1),Region('seq2',1300,1400,1))] 
        lAlgmts = [('ATGCATGCATGCATGCATGCATGCATGCATGCATGCAGGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATATGTGTAGTGAGTCGTCCC',
                    'ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGATGTACGATATAGCCCAC'),
                   ('ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAA',
                    'ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAA')]
        lDuplications = [Duplication('seq1',1,5000,'seq2',1,6000,lRegions,lAlgmts)]
        SimilarityDataFile = self.plot.writeSimilarityDataFile(lDuplications,'similarity.txt')
        self.assertTrue(filecmp.cmp(SimilarityDataFile,'test-data/similarity.txt'))
        os.remove('similarity.txt')
    def _buildDuplication(self,seq1Dup,lRegions):
        """build duplication"""

        # define seq2 boundaries
        seq2 = None
        seq2Start = 999999999999
        seq2End = 0
        for reg1,reg2 in lRegions:
            if reg2.start < seq2Start:
                seq2Start = reg2.start
            if reg2.end > seq2End:
                seq2End = reg2.end
                seq2 = reg2.seq
                            
        seq2Dup = (seq2,int(seq2Start),int(seq2End))

        # create and return Duplication
        return Duplication(seq1Dup[0],seq1Dup[1],seq1Dup[2],seq2Dup[0],seq2Dup[1],seq2Dup[2],lRegions)
Example #9
0
    def test_dSeqToSeq(self):
        """Duplication with alignment"""

        dSeqToSeq = {'seq1': {1: ('seq1', 1), 2: ('seq1', 2),
                              3: ('seq1', 3), 4: ('seq1', 4),
                              5: ('seq1', 5)}}

        dup = Duplication('seq1', 1, 5, 'seq1', 1, 5, [(Region('seq1', 1, 5, 1),
                          Region('seq1', 1, 5, 1))], [('ATGAT', 'ATGAT')]) 

        self.maxDiff = None
        self.assertEqual(dSeqToSeq,dup.dSeqToSeq)

        dSeqToSeq = {'seq2': {54: ('seq2', 302), 55: ('seq2', 301),
                              56: ('seq2', 300), 57: ('seq2', None),
                              58: ('seq2', 299), 59: ('seq2', 298),
                              60: ('seq2', 297), 61: ('seq2', 295),
                              62: ('seq2', 294), 63: ('seq2', 293),
                              64: ('seq2', None), 65: ('seq2', 292),
                              66: ('seq2', 291), 67: ('seq2', 290),
                              68: ('seq2', 289), 69: ('seq2', 288),
                              288: ('seq2', 69), 289: ('seq2', 68),
                              290: ('seq2', 67), 291: ('seq2', 66),
                              292: ('seq2', 65), 293: ('seq2', 63),
                              294: ('seq2', 62), 295: ('seq2', 61),
                              296: ('seq2', None), 297: ('seq2', 60),
                              298: ('seq2', 59), 299: ('seq2', 58),
                              300: ('seq2', 56), 301: ('seq2', 55),
                              302: ('seq2', 54)}}

        dup =  Duplication('seq2', 288, 302, 'seq2', 54, 69, [(Region('seq2',288, 302, -1),
                           Region('seq2', 54, 69, 1))],[('TGA-AGTCGCT-GTTTT', 'TGATAGT-GCTGGTTTT')])
 
        self.maxDiff = None
        self.assertEqual(dSeqToSeq,dup.dSeqToSeq)

        dSeqToSeq = {'seq1': {1: ('seq2', 241), 2: ('seq2', 240),
                              3: ('seq2', 239), 4: ('seq2', 238),
                              5: ('seq2', 237)},
                     'seq2': {237: ('seq1', 5), 238: ('seq1', 4),
                              239: ('seq1', 3), 240: ('seq1', 2),
                              241: ('seq1', 1)}}

        dup = Duplication('seq1', 1, 5, 'seq2', 237, 241, [(Region('seq1', 1, 5, -1),
                          Region('seq2', 237, 241, 1))],[('TCCTA', 'TCCTA')])

        self.maxDiff = None
        self.assertEqual(dSeqToSeq,dup.dSeqToSeq)

        dSeqToSeq = {'Chr1': {1: ('Chr5', 60), 2: ('Chr5', 59),
                              3: ('Chr5', 56), 4: ('Chr5', 55),
                              5: ('Chr5', 54), 6: ('Chr5', 53),
                              7: ('Chr5', 52), 8: ('Chr5', 51),
                              9: ('Chr5', 50), 10: ('Chr5', 49),
                              11: ('Chr5', 48), 12: ('Chr5', 47),
                              13: ('Chr5', 46), 14: ('Chr5', 45),
                              15: ('Chr5', 44), 16: ('Chr5', 43),
                              17: ('Chr5', 42), 18: ('Chr5', 41),
                              19: ('Chr5', 40), 20: ('Chr5', 39),
                              21: ('Chr5', 38), 22: ('Chr5', 37),
                              23: ('Chr5', 36), 24: ('Chr5', 35),
                              25: ('Chr5', 34), 26: ('Chr5', 33),
                              27: ('Chr5', 32), 28: ('Chr5', 31),
                              29: ('Chr5', 30), 30: ('Chr5', 29),
                              31: ('Chr5', 28), 32: ('Chr5', 27),
                              33: ('Chr5', 26), 34: ('Chr5', 25),
                              35: ('Chr5', 24), 36: ('Chr5', 23),
                              37: ('Chr5', 22), 38: ('Chr5', 21),
                              39: ('Chr5', 20), 40: ('Chr5', 19),
                              41: ('Chr5', 18), 42: ('Chr5', 17),
                              43: ('Chr5', 16), 44: ('Chr5', 15),
                              45: ('Chr5', 14), 46: ('Chr5', 13),
                              47: ('Chr5', 12), 48: ('Chr5', 11),
                              49: ('Chr5', 10), 50: ('Chr5', 9),
                              51: ('Chr5', 8), 52: ('Chr5', 7),
                              53: ('Chr5', 6), 54: ('Chr5', 5),
                              55: ('Chr5', 4), 56: ('Chr5', 3),
                              57: ('Chr5', 2), 58: ('Chr5', 1)},
                    'Chr5': {1: ('Chr1', 58), 2: ('Chr1', 57),
                              3: ('Chr1', 56), 4: ('Chr1', 55),
                              5: ('Chr1', 54), 6: ('Chr1', 53),
                              7: ('Chr1', 52), 8: ('Chr1', 51),
                              9: ('Chr1', 50), 10: ('Chr1', 49),
                              11: ('Chr1', 48), 12: ('Chr1', 47),
                              13: ('Chr1', 46), 14: ('Chr1', 45),
                              15: ('Chr1', 44), 16: ('Chr1', 43),
                              17: ('Chr1', 42), 18: ('Chr1', 41),
                              19: ('Chr1', 40), 20: ('Chr1', 39),
                              21: ('Chr1', 38), 22: ('Chr1', 37),
                              23: ('Chr1', 36), 24: ('Chr1', 35),
                              25: ('Chr1', 34), 26: ('Chr1', 33),
                              27: ('Chr1', 32), 28: ('Chr1', 31),
                              29: ('Chr1', 30), 30: ('Chr1', 29),
                              31: ('Chr1', 28), 32: ('Chr1', 27),
                              33: ('Chr1', 26), 34: ('Chr1', 25),
                              35: ('Chr1', 24), 36: ('Chr1', 23),
                              37: ('Chr1', 22), 38: ('Chr1', 21),
                              39: ('Chr1', 20), 40: ('Chr1', 19),
                              41: ('Chr1', 18), 42: ('Chr1', 17),
                              43: ('Chr1', 16), 44: ('Chr1', 15),
                              45: ('Chr1', 14), 46: ('Chr1', 13),
                              47: ('Chr1', 12), 48: ('Chr1', 11),
                              49: ('Chr1', 10), 50: ('Chr1', 9),
                              51: ('Chr1', 8), 52: ('Chr1', 7),
                              53: ('Chr1', 6), 54: ('Chr1', 5),
                              55: ('Chr1', 4), 56: ('Chr1', 3),
                              57: ('Chr1', None), 58: ('Chr1', None),
                              59: ('Chr1', 2), 60: ('Chr1', 1)}}

        dup = Duplication('Chr1' ,1 ,58 ,'Chr5' ,1 ,60 , 
                          [(Region('Chr1' ,1 ,58 ,-1),Region('Chr5',1,60,1))],
                          [('ATGTATTCTATCTCATGTTAATGCTAATACTAGTCATGATCAGATACG'
                            'ATGATGAG--TA',
                            'ATGTATTCTATCTCATGTTACTGCTAATACTAGTCATGATCAGATACG'
                            'ATGATGATCATA')])
 

        self.maxDiff = None
        self.assertEqual(dSeqToSeq,dup.dSeqToSeq)