class TestGenome(GenomeTestBase): def test_other_features(self): """should correctly return record for ENSESTG00000035043""" est = self.human.getEstMatching(StableId='ENSESTG00000035043') direct = list(est)[0] ests = self.human.getFeatures(feature_types='est', CoordName=8, Start=121470000, End=121600000) stable_ids = [est.StableId for est in ests] self.assertContains(stable_ids, direct.StableId) def test_genome_comparison(self): """different genome instances with same CoreDb connection are equal""" h2 = Genome(Species='human', Release=Release, account=account) self.assertEquals(self.human, h2) def test_make_location(self): """should correctly make a location for an entire chromosome""" loc = self.human.makeLocation(CoordName=1) self.assertEquals(len(loc), 249250621) def test_get_region(self): """should return a generic region that extracts correct sequence""" chrom = 1 Start = 11137 End = Start+20 region = self.human.getRegion(CoordName=chrom, Start=Start, End=End, ensembl_coord=True) self.assertEquals(region.Location.Start, Start-1) self.assertEquals(region.Location.End, End) self.assertEquals(region.Location.CoordName, str(chrom)) self.assertEquals(region.Location.CoordType, 'chromosome') self.assertEquals(region.Seq, 'ACCTCAGTAATCCGAAAAGCC') def test_get_assembly_exception_region(self): """should return correct sequence for region with an assembly exception""" ##old:chrY:57767412-57767433; New: chrY:59358024-59358045 region = self.human.getRegion(CoordName = "Y", Start = 59358024, End = 59358045, Strand = 1, ensembl_coord = True) self.assertEquals(str(region.Seq), 'CGAGGACGACTGGGAATCCTAG') def test_no_assembly(self): """return N's for coordinates with no assembly""" krat = Genome('Kangaroo rat', Release=58) Start=24385 End=Start+100 region = krat.getRegion(CoordName='scaffold_13754', Start=Start, End=End) self.assertEquals(str(region.Seq), 'N' * (End-Start)) def test_getting_annotated_seq(self): """a region should return a sequence with the correct annotation""" new_loc = self.brca2.Location.resized(-100, 100) region = self.human.getRegion(region=new_loc) annot_seq = region.getAnnotatedSeq(feature_types='gene') gene_annots = annot_seq.getAnnotationsMatching('gene') self.assertEquals(gene_annots[0].Name, self.brca2.Symbol) def test_correct_feature_type_id_cache(self): """should obtain the feature type identifiers without failure""" self.assertNotEquals(self.human._feature_type_ids.CpGisland, None) def test_strand_conversion(self): """should consistently convert strand info""" self.assertEquals(convert_strand(None), 1) self.assertEquals(convert_strand(-1), -1) self.assertEquals(convert_strand(1), 1) self.assertEquals(convert_strand('-'), -1) self.assertEquals(convert_strand('+'), 1) self.assertEquals(convert_strand(-1.0), -1) self.assertEquals(convert_strand(1.0), 1) def test_pool_connection(self): """excercising ability to specify pool connection""" dog = Genome(Species="dog", Release=Release, account=account, pool_recycle=1000) def test_gorilla(self): """should correctly return a gorilla gene""" self.gorilla = Genome(Species="gorilla", Release=Release, account=account) gene = self.gorilla.getGeneByStableId('ENSGGOG00000005730') self.assertEquals(str(gene.Seq[:10]), 'TGGGAGTCCA') def test_diff_strand_contig_chrom(self): """get correct sequence when contig and chromosome strands differ""" gene = self.gorilla.getGeneByStableId('ENSGGOG00000001953') cds = gene.CanonicalTranscript.Cds self.assertEquals(str(cds), 'ATGGCCCAGGATCTCAGCGAGAAGGACCTGTTGAAGATG' 'GAGGTGGAGCAGCTGAAGAAAGAAGTGAAAAACACAAGAATTCCGATTTCCAAAGCGGGAAAGGAAAT' 'CAAAGAGTACGTGGAGGCCCAAGCAGGAAACGATCCTTTTCTCAAAGGCATCCCTGAGGACAAGAATC' 'CCTTCAAGGAGAAAGGTGGCTGTCTGATAAGCTGA') def test_get_distinct_biotype(self): """Genome instance getDistinct for biotype should work on all genomes""" for genome in self.gorilla, self.human, self.mouse, self.rat, self.macaq: biotypes = genome.getDistinct('biotype') def test_get_distinct_effect(self): """Genome instance getDistinct for SNP effect should work on all genomes""" for genome in self.human, self.mouse, self.rat, self.macaq: biotypes = genome.getDistinct('effect')
class TestGenome(GenomeTestBase): def test_other_features(self): """should correctly return record for ENSESTG00000000010""" est = self.human.getEstMatching(StableId='ENSESTG00000000010') direct = list(est)[0] ests = self.human.getFeatures(feature_types='est', CoordName=6, Start=99994000, End=100076519) stable_ids = [est.StableId for est in ests] self.assertContains(stable_ids, direct.StableId) def test_genome_comparison(self): """different genome instances with same CoreDb connection are equal""" h2 = Genome(Species='human', Release=Release, account=account) self.assertEquals(self.human, h2) def test_make_location(self): """should correctly make a location for an entire chromosome""" loc = self.human.makeLocation(CoordName=1) self.assertEquals(len(loc), 248956422) def test_get_region(self): """should return a generic region that extracts correct sequence""" chrom = 1 Start = 11137 End = Start + 20 region = self.human.getRegion(CoordName=chrom, Start=Start, End=End, ensembl_coord=True) self.assertEquals(region.Location.Start, Start - 1) self.assertEquals(region.Location.End, End) self.assertEquals(region.Location.CoordName, str(chrom)) self.assertEquals(region.Location.CoordType, 'chromosome') self.assertEquals(region.Seq, 'ACCTCAGTAATCCGAAAAGCC') def test_get_assembly_exception_region(self): """should return correct sequence for region with an assembly exception""" region = self.human.getRegion(CoordName="Y", Start=57211873, End=57211894, Strand=1, ensembl_coord=True) self.assertEquals(str(region.Seq), 'CGAGGACGACTGGGAATCCTAG') def test_no_assembly(self): """return N's for coordinates with no assembly""" krat = Genome('Kangaroo rat', Release=58) Start = 24385 End = Start + 100 region = krat.getRegion(CoordName='scaffold_13754', Start=Start, End=End) self.assertEquals(str(region.Seq), 'N' * (End - Start)) def test_getting_annotated_seq(self): """a region should return a sequence with the correct annotation""" new_loc = self.brca2.Location.resized(-100, 100) region = self.human.getRegion(region=new_loc) annot_seq = region.getAnnotatedSeq(feature_types='gene') gene_annots = annot_seq.getAnnotationsMatching('gene') self.assertEquals(gene_annots[0].Name, self.brca2.Symbol) def test_correct_feature_type_id_cache(self): """should obtain the feature type identifiers without failure""" self.assertNotEquals(self.human._feature_type_ids.CpGisland, None) def test_strand_conversion(self): """should consistently convert strand info""" self.assertEquals(convert_strand(None), 1) self.assertEquals(convert_strand(-1), -1) self.assertEquals(convert_strand(1), 1) self.assertEquals(convert_strand('-'), -1) self.assertEquals(convert_strand('+'), 1) self.assertEquals(convert_strand(-1.0), -1) self.assertEquals(convert_strand(1.0), 1) def test_pool_connection(self): """excercising ability to specify pool connection""" dog = Genome(Species="dog", Release=Release, account=account, pool_recycle=1000) def test_gorilla(self): """should correctly return a gorilla gene""" self.gorilla = Genome(Species="gorilla", Release=Release, account=account) gene = self.gorilla.getGeneByStableId('ENSGGOG00000005730') self.assertEquals(str(gene.Seq[:10]), 'TGGGAGTCCA') def test_diff_strand_contig_chrom(self): """get correct sequence when contig and chromosome strands differ""" gene = self.gorilla.getGeneByStableId('ENSGGOG00000001953') cds = gene.CanonicalTranscript.Cds self.assertEquals( str(cds), 'ATGGCCCAGGATCTCAGCGAGAAGGACCTGTTGAAGATG' 'GAGGTGGAGCAGCTGAAGAAAGAAGTGAAAAACACAAGAATTCCGATTTCCAAAGCGGGAAAGGAAAT' 'CAAAGAGTACGTGGAGGCCCAAGCAGGAAACGATCCTTTTCTCAAAGGCATCCCTGAGGACAAGAATC' 'CCTTCAAGGAGAAAGGTGGCTGTCTGATAAGCTGA') def test_get_distinct_biotype(self): """Genome instance getDistinct for biotype should work on all genomes""" for genome in self.gorilla, self.human, self.mouse, self.rat, self.macaq: biotypes = genome.getDistinct('biotype') def test_get_distinct_effect(self): """Genome instance getDistinct for SNP effect should work on all genomes""" for genome in self.human, self.mouse, self.rat, self.macaq: biotypes = genome.getDistinct('effect')