Esempio n. 1
0
class TestGenome(GenomeTestBase):
    
    def test_other_features(self):
       """should correctly return record for ENSESTG00000035043"""
       est = self.human.getEstMatching(StableId='ENSESTG00000035043')
       direct = list(est)[0]
       ests = self.human.getFeatures(feature_types='est', CoordName=8,
                                               Start=121470000, End=121600000)
       stable_ids = [est.StableId for est in ests]
       self.assertContains(stable_ids, direct.StableId)

    def test_genome_comparison(self):
        """different genome instances with same CoreDb connection are equal"""
        h2 = Genome(Species='human', Release=Release, account=account)
        self.assertEquals(self.human, h2)

    def test_make_location(self):
        """should correctly make a location for an entire chromosome"""
        loc = self.human.makeLocation(CoordName=1)
        self.assertEquals(len(loc), 249250621)

    def test_get_region(self):
        """should return a generic region that extracts correct sequence"""
        chrom = 1
        Start = 11137
        End = Start+20
        region = self.human.getRegion(CoordName=chrom, Start=Start, End=End,
                        ensembl_coord=True)
        self.assertEquals(region.Location.Start, Start-1)
        self.assertEquals(region.Location.End, End)
        self.assertEquals(region.Location.CoordName, str(chrom))
        self.assertEquals(region.Location.CoordType, 'chromosome')
        self.assertEquals(region.Seq, 'ACCTCAGTAATCCGAAAAGCC')

    def test_get_assembly_exception_region(self):
        """should return correct sequence for region with an assembly
        exception"""
        ##old:chrY:57767412-57767433; New: chrY:59358024-59358045
        region = self.human.getRegion(CoordName = "Y", Start = 59358024,
                            End = 59358045, Strand = 1, ensembl_coord = True)

        self.assertEquals(str(region.Seq), 'CGAGGACGACTGGGAATCCTAG')

    def test_no_assembly(self):
        """return N's for coordinates with no assembly"""
        krat = Genome('Kangaroo rat', Release=58)
        Start=24385
        End=Start+100
        region = krat.getRegion(CoordName='scaffold_13754', Start=Start,
            End=End)
        self.assertEquals(str(region.Seq), 'N' * (End-Start))

    def test_getting_annotated_seq(self):
        """a region should return a sequence with the correct annotation"""
        new_loc = self.brca2.Location.resized(-100, 100)
        region = self.human.getRegion(region=new_loc)
        annot_seq = region.getAnnotatedSeq(feature_types='gene')
        gene_annots = annot_seq.getAnnotationsMatching('gene')
        self.assertEquals(gene_annots[0].Name, self.brca2.Symbol)

    def test_correct_feature_type_id_cache(self):
        """should obtain the feature type identifiers without failure"""
        self.assertNotEquals(self.human._feature_type_ids.CpGisland, None)

    def test_strand_conversion(self):
        """should consistently convert strand info"""
        self.assertEquals(convert_strand(None), 1)
        self.assertEquals(convert_strand(-1), -1)
        self.assertEquals(convert_strand(1), 1)
        self.assertEquals(convert_strand('-'), -1)
        self.assertEquals(convert_strand('+'), 1)
        self.assertEquals(convert_strand(-1.0), -1)
        self.assertEquals(convert_strand(1.0), 1)

    def test_pool_connection(self):
        """excercising ability to specify pool connection"""
        dog = Genome(Species="dog", Release=Release, account=account,
                pool_recycle=1000)

    def test_gorilla(self):
        """should correctly return a gorilla gene"""
        self.gorilla = Genome(Species="gorilla", Release=Release, account=account)
        gene = self.gorilla.getGeneByStableId('ENSGGOG00000005730')
        self.assertEquals(str(gene.Seq[:10]), 'TGGGAGTCCA')

    def test_diff_strand_contig_chrom(self):
        """get correct sequence when contig and chromosome strands differ"""
        gene = self.gorilla.getGeneByStableId('ENSGGOG00000001953')
        cds = gene.CanonicalTranscript.Cds
        self.assertEquals(str(cds), 'ATGGCCCAGGATCTCAGCGAGAAGGACCTGTTGAAGATG'
        'GAGGTGGAGCAGCTGAAGAAAGAAGTGAAAAACACAAGAATTCCGATTTCCAAAGCGGGAAAGGAAAT'
        'CAAAGAGTACGTGGAGGCCCAAGCAGGAAACGATCCTTTTCTCAAAGGCATCCCTGAGGACAAGAATC'
        'CCTTCAAGGAGAAAGGTGGCTGTCTGATAAGCTGA')
    
    def test_get_distinct_biotype(self):
        """Genome instance getDistinct for biotype should work on all genomes"""
        for genome in self.gorilla, self.human, self.mouse, self.rat, self.macaq:
            biotypes = genome.getDistinct('biotype')
        
    def test_get_distinct_effect(self):
        """Genome instance getDistinct for SNP effect should work on all genomes"""
        for genome in self.human, self.mouse, self.rat, self.macaq:
            biotypes = genome.getDistinct('effect')
Esempio n. 2
0
class TestGenome(GenomeTestBase):
    def test_other_features(self):
        """should correctly return record for ENSESTG00000000010"""
        est = self.human.getEstMatching(StableId='ENSESTG00000000010')
        direct = list(est)[0]
        ests = self.human.getFeatures(feature_types='est',
                                      CoordName=6,
                                      Start=99994000,
                                      End=100076519)
        stable_ids = [est.StableId for est in ests]
        self.assertContains(stable_ids, direct.StableId)

    def test_genome_comparison(self):
        """different genome instances with same CoreDb connection are equal"""
        h2 = Genome(Species='human', Release=Release, account=account)
        self.assertEquals(self.human, h2)

    def test_make_location(self):
        """should correctly make a location for an entire chromosome"""
        loc = self.human.makeLocation(CoordName=1)
        self.assertEquals(len(loc), 248956422)

    def test_get_region(self):
        """should return a generic region that extracts correct sequence"""
        chrom = 1
        Start = 11137
        End = Start + 20
        region = self.human.getRegion(CoordName=chrom,
                                      Start=Start,
                                      End=End,
                                      ensembl_coord=True)
        self.assertEquals(region.Location.Start, Start - 1)
        self.assertEquals(region.Location.End, End)
        self.assertEquals(region.Location.CoordName, str(chrom))
        self.assertEquals(region.Location.CoordType, 'chromosome')
        self.assertEquals(region.Seq, 'ACCTCAGTAATCCGAAAAGCC')

    def test_get_assembly_exception_region(self):
        """should return correct sequence for region with an assembly
        exception"""
        region = self.human.getRegion(CoordName="Y",
                                      Start=57211873,
                                      End=57211894,
                                      Strand=1,
                                      ensembl_coord=True)

        self.assertEquals(str(region.Seq), 'CGAGGACGACTGGGAATCCTAG')

    def test_no_assembly(self):
        """return N's for coordinates with no assembly"""
        krat = Genome('Kangaroo rat', Release=58)
        Start = 24385
        End = Start + 100
        region = krat.getRegion(CoordName='scaffold_13754',
                                Start=Start,
                                End=End)
        self.assertEquals(str(region.Seq), 'N' * (End - Start))

    def test_getting_annotated_seq(self):
        """a region should return a sequence with the correct annotation"""
        new_loc = self.brca2.Location.resized(-100, 100)
        region = self.human.getRegion(region=new_loc)
        annot_seq = region.getAnnotatedSeq(feature_types='gene')
        gene_annots = annot_seq.getAnnotationsMatching('gene')
        self.assertEquals(gene_annots[0].Name, self.brca2.Symbol)

    def test_correct_feature_type_id_cache(self):
        """should obtain the feature type identifiers without failure"""
        self.assertNotEquals(self.human._feature_type_ids.CpGisland, None)

    def test_strand_conversion(self):
        """should consistently convert strand info"""
        self.assertEquals(convert_strand(None), 1)
        self.assertEquals(convert_strand(-1), -1)
        self.assertEquals(convert_strand(1), 1)
        self.assertEquals(convert_strand('-'), -1)
        self.assertEquals(convert_strand('+'), 1)
        self.assertEquals(convert_strand(-1.0), -1)
        self.assertEquals(convert_strand(1.0), 1)

    def test_pool_connection(self):
        """excercising ability to specify pool connection"""
        dog = Genome(Species="dog",
                     Release=Release,
                     account=account,
                     pool_recycle=1000)

    def test_gorilla(self):
        """should correctly return a gorilla gene"""
        self.gorilla = Genome(Species="gorilla",
                              Release=Release,
                              account=account)
        gene = self.gorilla.getGeneByStableId('ENSGGOG00000005730')
        self.assertEquals(str(gene.Seq[:10]), 'TGGGAGTCCA')

    def test_diff_strand_contig_chrom(self):
        """get correct sequence when contig and chromosome strands differ"""
        gene = self.gorilla.getGeneByStableId('ENSGGOG00000001953')
        cds = gene.CanonicalTranscript.Cds
        self.assertEquals(
            str(cds), 'ATGGCCCAGGATCTCAGCGAGAAGGACCTGTTGAAGATG'
            'GAGGTGGAGCAGCTGAAGAAAGAAGTGAAAAACACAAGAATTCCGATTTCCAAAGCGGGAAAGGAAAT'
            'CAAAGAGTACGTGGAGGCCCAAGCAGGAAACGATCCTTTTCTCAAAGGCATCCCTGAGGACAAGAATC'
            'CCTTCAAGGAGAAAGGTGGCTGTCTGATAAGCTGA')

    def test_get_distinct_biotype(self):
        """Genome instance getDistinct for biotype should work on all genomes"""
        for genome in self.gorilla, self.human, self.mouse, self.rat, self.macaq:
            biotypes = genome.getDistinct('biotype')

    def test_get_distinct_effect(self):
        """Genome instance getDistinct for SNP effect should work on all genomes"""
        for genome in self.human, self.mouse, self.rat, self.macaq:
            biotypes = genome.getDistinct('effect')