def test_getNonRedondantDuplications(self): """Test getNonRedondantDuplications method""" iGffDuplicationParser = GffDuplicationParser("test-data/sdd.gff3") lDuplications = [ Duplication('seq1', 6010863, 6029759, 'seq8', 4391356, 4410272, [(Region('seq1', 6010863, 6029759, 1), Region('seq8', 4391356, 4410272, 1))]), Duplication('seq2', 26727, 32020, 'seq11', 521201, 524615, [(Region('seq2', 26727, 29266, -1), Region('seq11', 522092, 524615, 1)), (Region('seq2', 31119, 32020, -1), Region('seq11', 521201, 522101, 1))]), Duplication('seq11', 26582, 33594, 'seq11', 584193, 591205, [(Region('seq11', 26582, 33594, -1), Region('seq11', 584193, 591205, 1))]), Duplication('seq11', 38277, 40563, 'seq11', 554466, 556516, [(Region('seq11', 38277, 38402, 1), Region('seq11', 554466, 554591, 1)), (Region('seq11', 38511, 40563, -1), Region('seq11', 554467, 556516, 1))]) ] self.assertEqual(iGffDuplicationParser.getNonRedondantDuplications(), lDuplications)
def test_getCDSAlignment(self): """test getCDSAlignment""" gene1 = Gene('G00001', 'Chr1', 1, 27, 1, [ Transcript('G00001.1', 'Chr1', 1, 27, 1, 'G00001', [ CDS('G00001.1_cds_1', 'Chr1', 1, 6, 1, 'G00001.1'), CDS('G00001.1_cds_1', 'Chr1', 13, 21, 1, 'G00001.1') ]) ]) gene2 = Gene('G00002', 'Chr5', 1, 27, 1, [ Transcript('G00002.1', 'Chr5', 1, 27, 1, 'G00002', [ CDS('G00002.1_cds_1', 'Chr5', 1, 6, 1, 'G00002.1'), CDS('G00002.1_cds_1', 'Chr5', 13, 27, 1, 'G00002.1') ]) ]) gl = GeneLink( Duplication('Chr1', 1, 58, 'Chr5', 1, 60, [ (Region('Chr1', 1, 58, 1), Region('Chr5', 1, 60, 1)) ], [( 'ATGTATTCTATCTCATGTTAATGCTAATACTAGTCATGATCAGATACGATGATGAT--TA', 'ATGTATTCTATCTCATGTTACTGCTAATACTAGTCATGATCAGATACGATGATGATCATA') ]), gene1, gene2) self.assertEquals( ('ATGTATtctatcTCATGTTAAtgctaa', 'ATGTATtctatcTCATGTTACTGCTAA', Region('Chr1', 1, 27, 1), Region('Chr5', 1, 27, 1)), gl.getCDSAlignment()) gene1 = Gene('G00001', 'Chr1', 1, 27, 1, [ Transcript('G00001.1', 'Chr1', 1, 27, 1, 'G00001', [ CDS('G00001.1_cds_1', 'Chr1', 1, 6, 1, 'G00001.1'), CDS('G00001.1_cds_1', 'Chr1', 13, 21, 1, 'G00001.1') ]) ]) gene2 = Gene('G00002', 'Chr5', 27, 60, -1, [ Transcript('G00002.1', 'Chr5', 27, 60, -1, 'G00002', [ CDS('G00002.1_cds_1', 'Chr5', 27, 39, -1, 'G00002.1'), CDS('G00002.1_cds_1', 'Chr5', 48, 60, -1, 'G00002.1') ]) ]) gl = GeneLink( Duplication('Chr1', 1, 58, 'Chr5', 1, 60, [ (Region('Chr1', 1, 58, -1), Region('Chr5', 1, 60, 1)) ], [( 'ATGTATTCTATCTCATGTTAATGCTAATACTAGTCATGATCAGATACGATGATGAG--TA', 'ATGTATTCTATCTCATGTTACTGCTAATACTAGTCATGATCAGATACGATGATGATCATA') ]), gene1, gene2) self.assertEquals( ('atactagtcatGATCAGATAcgatgaTGAG--TA', 'ATACTAGTCATGAtcagatacGATGATGATCATA', Region( 'Chr1', 1, 32, -1), Region('Chr5', 27, 60, 1)), gl.getCDSAlignment())
def test_getSeqAlignment(self): """test""" dup = Duplication('seq1',1,164,'seq2',237,400,[(Region('seq1',1,164,1),Region('seq2',237,400,1))],[('TCCTAGCCGATCTAATCGGAGACTATCTAGGCGACATATAAAATTTTGACGAGCTATAATTATCCTCTAGGCTAATCGAGACCACCGCTATCCTACAGCCAAAACAGCGACTTCATCATCGAGCCGCACTATCCTCTACGGCGAGCGCTCTCTACGAGCATCAT','TCCTAGCCGATCTAATCGGAGACTATCTAGGCGACATATAAAATTTTGACGAGCTATAATTATCCTCTAGGCTAATCGAGACCACCGCTATCCTACAGCCAAAACAGCGACTTCATCATCGAGCCGCACTATCCTCTACGGCGAGCGCTCTCTACGAGCATCAT')]) self.assertEquals('TCCT',dup.getSeqAlignment('seq1',1,4)[0]) dup = Duplication('seq1',1,164,'seq2',237,400,[(Region('seq1',1,164,-1),Region('seq2',237,400,1))],[('TCCTAGCCGATCTAATCGGAGACTATCTAGGCGACATATAAAATTTTGACGAGCTATAATTATCCTCTAGGCTAATCGAGACCACCGCTATCCTACAGCCAAAACAGCGACTTCATCATCGAGCCGCACTATCCTCTACGGCGAGCGCTCTCTACGAGCATCAT','TCCTAGCCGATCTAATCGGAGACTATCTAGGCGACATATAAAATTTTGACGAGCTATAATTATCCTCTAGGCTAATCGAGACCACCGCTATCCTACAGCCAAAACAGCGACTTCATCATCGAGCCGCACTATCCTCTACGGCGAGCGCTCTCTACGAGCATCAT')]) self.assertEquals('TCAT',dup.getSeqAlignment('seq1',1,4)[0]) dup = Duplication('seq4',1,40,'seq5',20,30,[(Region('seq4',1,10,-1),Region('seq5',20,25,1)),(Region('seq4',35,40,-1),Region('seq5',25,30,1))],[('ATATATATAT','AT----ATAT'),('ATGT-TT','AT-TTTG')]) self.assertEquals('ATGT',dup.getSeqAlignment('seq4',37,40)[0]) self.assertEquals('T----ATAT',dup.getSeqAlignment('seq5',21,25)[0])
def test_writeSegDupDataFile(self): """Test writeSegDupDataFile""" lDuplications = [Duplication('seq1',5,5000,'seq2',10,5000)] SDDataFile = self.plot.writeSegDupDataFile(lDuplications,'segdup.txt') self.assertTrue(filecmp.cmp(SDDataFile,'test-data/segdup.txt')) os.remove('segdup.txt')
def test_writeCircosConf(self): """Test writeCircosConf""" lSeqs = [('seq1',20000),('seq2',30000)] GenomeDataFile = self.plot.writeSeqDataFile(lSeqs, 'genome.txt') lRegions = [(Region('seq1',100,220,1),Region('seq2',100,220,1)), (Region('seq1',1200,1300,-1),Region('seq2',1300,1400,1))] lAlgmts = [('ATGCATGCATGCATGCATGCATGCATGCATGCATGCAGGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATATGTGTAGTGAGTCGTCCC', 'ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGATGTACGATATAGCCCAC'), ('ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAA', 'ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAA')] lDuplications = [Duplication('seq1',1,5000,'seq2',1,6000,lRegions,lAlgmts)] SDDataFile = self.plot.writeSegDupDataFile(lDuplications,'segdup.txt') self.plot.writeCircosConf() self.assertTrue(filecmp.cmp('circos.conf','test-data/circos.conf')) lGenes = [Gene('GENE1','seq1',12,600,1),Gene('GENE2','seq2',100,1000,-1)] GeneDataFile = self.plot.writeGeneDataFile(lGenes,'gene.txt') lTEs = [Feature('TE1','seq1',1000,2000,1,'TE'),Feature('TE2','seq2',4000,4500,-1,'TE')] TEDataFile = self.plot.writeTEDataFile(lTEs,'TE.txt') self.plot.writeCircosConf() self.assertTrue(filecmp.cmp('circos.conf','test-data/circos2.conf')) SimilarityDataFile = self.plot.writeSimilarityDataFile(lDuplications,'similarity.txt') self.plot.writeCircosConf() self.assertTrue(filecmp.cmp('circos.conf','test-data/circos3.conf')) os.remove('circos.conf')
def test_writeGeneLinkDataFile(self): """Test writeGeneLinkDataFile""" iDup = Duplication('seq1',5,5000,'seq2',10,5000) iGene1 = Gene('GENE1','seq1',10,100,1) iGene2 = Gene('GENE2','seq2',100,190,-1) lGeneLinks = [GeneLink(dup=iDup,gene1=iGene1,gene2=iGene2)] GeneLinkDataFile = self.plot.writeGeneLinkDataFile(lGeneLinks,'gene-link.txt') self.assertTrue(filecmp.cmp(GeneLinkDataFile,'test-data/gene-link.txt')) os.remove('gene-link.txt')
def test_writeSimilarityDataFile(self): """Test writeSimilarityDataFile""" lRegions = [(Region('seq1',100,220,1),Region('seq2',100,220,1)), (Region('seq1',1200,1300,-1),Region('seq2',1300,1400,1))] lAlgmts = [('ATGCATGCATGCATGCATGCATGCATGCATGCATGCAGGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATATGTGTAGTGAGTCGTCCC', 'ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGATGTACGATATAGCCCAC'), ('ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAA', 'ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAATGCATGCATGCATGCATGCATGCATGCATGCATGCATGAA')] lDuplications = [Duplication('seq1',1,5000,'seq2',1,6000,lRegions,lAlgmts)] SimilarityDataFile = self.plot.writeSimilarityDataFile(lDuplications,'similarity.txt') self.assertTrue(filecmp.cmp(SimilarityDataFile,'test-data/similarity.txt')) os.remove('similarity.txt')
def _buildDuplication(self,seq1Dup,lRegions): """build duplication""" # define seq2 boundaries seq2 = None seq2Start = 999999999999 seq2End = 0 for reg1,reg2 in lRegions: if reg2.start < seq2Start: seq2Start = reg2.start if reg2.end > seq2End: seq2End = reg2.end seq2 = reg2.seq seq2Dup = (seq2,int(seq2Start),int(seq2End)) # create and return Duplication return Duplication(seq1Dup[0],seq1Dup[1],seq1Dup[2],seq2Dup[0],seq2Dup[1],seq2Dup[2],lRegions)
def test_dSeqToSeq(self): """Duplication with alignment""" dSeqToSeq = {'seq1': {1: ('seq1', 1), 2: ('seq1', 2), 3: ('seq1', 3), 4: ('seq1', 4), 5: ('seq1', 5)}} dup = Duplication('seq1', 1, 5, 'seq1', 1, 5, [(Region('seq1', 1, 5, 1), Region('seq1', 1, 5, 1))], [('ATGAT', 'ATGAT')]) self.maxDiff = None self.assertEqual(dSeqToSeq,dup.dSeqToSeq) dSeqToSeq = {'seq2': {54: ('seq2', 302), 55: ('seq2', 301), 56: ('seq2', 300), 57: ('seq2', None), 58: ('seq2', 299), 59: ('seq2', 298), 60: ('seq2', 297), 61: ('seq2', 295), 62: ('seq2', 294), 63: ('seq2', 293), 64: ('seq2', None), 65: ('seq2', 292), 66: ('seq2', 291), 67: ('seq2', 290), 68: ('seq2', 289), 69: ('seq2', 288), 288: ('seq2', 69), 289: ('seq2', 68), 290: ('seq2', 67), 291: ('seq2', 66), 292: ('seq2', 65), 293: ('seq2', 63), 294: ('seq2', 62), 295: ('seq2', 61), 296: ('seq2', None), 297: ('seq2', 60), 298: ('seq2', 59), 299: ('seq2', 58), 300: ('seq2', 56), 301: ('seq2', 55), 302: ('seq2', 54)}} dup = Duplication('seq2', 288, 302, 'seq2', 54, 69, [(Region('seq2',288, 302, -1), Region('seq2', 54, 69, 1))],[('TGA-AGTCGCT-GTTTT', 'TGATAGT-GCTGGTTTT')]) self.maxDiff = None self.assertEqual(dSeqToSeq,dup.dSeqToSeq) dSeqToSeq = {'seq1': {1: ('seq2', 241), 2: ('seq2', 240), 3: ('seq2', 239), 4: ('seq2', 238), 5: ('seq2', 237)}, 'seq2': {237: ('seq1', 5), 238: ('seq1', 4), 239: ('seq1', 3), 240: ('seq1', 2), 241: ('seq1', 1)}} dup = Duplication('seq1', 1, 5, 'seq2', 237, 241, [(Region('seq1', 1, 5, -1), Region('seq2', 237, 241, 1))],[('TCCTA', 'TCCTA')]) self.maxDiff = None self.assertEqual(dSeqToSeq,dup.dSeqToSeq) dSeqToSeq = {'Chr1': {1: ('Chr5', 60), 2: ('Chr5', 59), 3: ('Chr5', 56), 4: ('Chr5', 55), 5: ('Chr5', 54), 6: ('Chr5', 53), 7: ('Chr5', 52), 8: ('Chr5', 51), 9: ('Chr5', 50), 10: ('Chr5', 49), 11: ('Chr5', 48), 12: ('Chr5', 47), 13: ('Chr5', 46), 14: ('Chr5', 45), 15: ('Chr5', 44), 16: ('Chr5', 43), 17: ('Chr5', 42), 18: ('Chr5', 41), 19: ('Chr5', 40), 20: ('Chr5', 39), 21: ('Chr5', 38), 22: ('Chr5', 37), 23: ('Chr5', 36), 24: ('Chr5', 35), 25: ('Chr5', 34), 26: ('Chr5', 33), 27: ('Chr5', 32), 28: ('Chr5', 31), 29: ('Chr5', 30), 30: ('Chr5', 29), 31: ('Chr5', 28), 32: ('Chr5', 27), 33: ('Chr5', 26), 34: ('Chr5', 25), 35: ('Chr5', 24), 36: ('Chr5', 23), 37: ('Chr5', 22), 38: ('Chr5', 21), 39: ('Chr5', 20), 40: ('Chr5', 19), 41: ('Chr5', 18), 42: ('Chr5', 17), 43: ('Chr5', 16), 44: ('Chr5', 15), 45: ('Chr5', 14), 46: ('Chr5', 13), 47: ('Chr5', 12), 48: ('Chr5', 11), 49: ('Chr5', 10), 50: ('Chr5', 9), 51: ('Chr5', 8), 52: ('Chr5', 7), 53: ('Chr5', 6), 54: ('Chr5', 5), 55: ('Chr5', 4), 56: ('Chr5', 3), 57: ('Chr5', 2), 58: ('Chr5', 1)}, 'Chr5': {1: ('Chr1', 58), 2: ('Chr1', 57), 3: ('Chr1', 56), 4: ('Chr1', 55), 5: ('Chr1', 54), 6: ('Chr1', 53), 7: ('Chr1', 52), 8: ('Chr1', 51), 9: ('Chr1', 50), 10: ('Chr1', 49), 11: ('Chr1', 48), 12: ('Chr1', 47), 13: ('Chr1', 46), 14: ('Chr1', 45), 15: ('Chr1', 44), 16: ('Chr1', 43), 17: ('Chr1', 42), 18: ('Chr1', 41), 19: ('Chr1', 40), 20: ('Chr1', 39), 21: ('Chr1', 38), 22: ('Chr1', 37), 23: ('Chr1', 36), 24: ('Chr1', 35), 25: ('Chr1', 34), 26: ('Chr1', 33), 27: ('Chr1', 32), 28: ('Chr1', 31), 29: ('Chr1', 30), 30: ('Chr1', 29), 31: ('Chr1', 28), 32: ('Chr1', 27), 33: ('Chr1', 26), 34: ('Chr1', 25), 35: ('Chr1', 24), 36: ('Chr1', 23), 37: ('Chr1', 22), 38: ('Chr1', 21), 39: ('Chr1', 20), 40: ('Chr1', 19), 41: ('Chr1', 18), 42: ('Chr1', 17), 43: ('Chr1', 16), 44: ('Chr1', 15), 45: ('Chr1', 14), 46: ('Chr1', 13), 47: ('Chr1', 12), 48: ('Chr1', 11), 49: ('Chr1', 10), 50: ('Chr1', 9), 51: ('Chr1', 8), 52: ('Chr1', 7), 53: ('Chr1', 6), 54: ('Chr1', 5), 55: ('Chr1', 4), 56: ('Chr1', 3), 57: ('Chr1', None), 58: ('Chr1', None), 59: ('Chr1', 2), 60: ('Chr1', 1)}} dup = Duplication('Chr1' ,1 ,58 ,'Chr5' ,1 ,60 , [(Region('Chr1' ,1 ,58 ,-1),Region('Chr5',1,60,1))], [('ATGTATTCTATCTCATGTTAATGCTAATACTAGTCATGATCAGATACG' 'ATGATGAG--TA', 'ATGTATTCTATCTCATGTTACTGCTAATACTAGTCATGATCAGATACG' 'ATGATGATCATA')]) self.maxDiff = None self.assertEqual(dSeqToSeq,dup.dSeqToSeq)