Esempio n. 1
0
 def test_gorilla(self):
     """should correctly return a gorilla gene"""
     self.gorilla = Genome(Species="gorilla",
                           Release=Release,
                           account=account)
     gene = self.gorilla.getGeneByStableId('ENSGGOG00000005730')
     self.assertEquals(str(gene.Seq[:10]), 'TGGGAGTCCA')
Esempio n. 2
0
class GenomeTestBase(TestCase):
    human = Genome(Species="human", Release=Release, account=account)
    mouse = Genome(Species="mouse", Release=Release, account=account)
    rat = Genome(Species="rat", Release=Release, account=account)
    macaq = Genome(Species="macaque", Release=Release, account=account)
    gorilla = Genome(Species="gorilla", Release=Release, account=account)
    brca2 = human.getGeneByStableId(StableId="ENSG00000139618")
Esempio n. 3
0
 def test_no_assembly(self):
     """return N's for coordinates with no assembly"""
     krat = Genome('Kangaroo rat', Release=58)
     Start=24385
     End=Start+100
     region = krat.getRegion(CoordName='scaffold_13754', Start=Start,
         End=End)
     self.assertEquals(str(region.Seq), 'N' * (End-Start))
Esempio n. 4
0
 def test_no_assembly(self):
     """return N's for coordinates with no assembly"""
     krat = Genome('Kangaroo rat', Release=58)
     Start = 24385
     End = Start + 100
     region = krat.getRegion(CoordName='scaffold_13754',
                             Start=Start,
                             End=End)
     self.assertEquals(str(region.Seq), 'N' * (End - Start))
Esempio n. 5
0
class TestFeatureCoordLevels(TestCase):
    def setUp(self):
        self.chicken = Genome(Species='chicken',
                              Release=Release,
                              account=account)

    def test_feature_levels(self):
        ChickenFeatureLevels = FeatureCoordLevels('chicken')
        chicken_feature_levels = ChickenFeatureLevels(
            feature_types=['gene', 'cpg', 'est'],
            core_db=self.chicken.CoreDb,
            otherfeature_db=self.chicken.OtherFeaturesDb)
        self.assertEquals(chicken_feature_levels['repeat'].levels,
                          ['chromosome', 'scaffold'])
        self.assertEquals(set(chicken_feature_levels['cpg'].levels),
                          set(['chromosome', 'scaffold']))

    def test_repeat(self):
        # use chicken genome as it need to do conversion
        # chicken coordinate correspondent toRefSeq human IL2A region
        coord = dict(CoordName=9, Start=21727352, End=21729141)
        region = self.chicken.getRegion(**coord)
        # repeat is recorded at contig level, strand is 0
        repeats = region.getFeatures(feature_types='repeat')
        expect = [("9", 21727499, 21727527), ("9", 21728009, 21728018),
                  ("9", 21728169, 21728178)]
        obs = []
        for repeat in repeats:
            loc = repeat.Location
            obs.append((str(loc.CoordName), loc.Start, loc.End))
        self.assertEquals(set(obs), set(expect))

    def test_cpg(self):
        # contain 3 CpG island recorded at chromosome level
        coord1 = dict(CoordName=26, Start=105184, End=184346)
        cpgs1 = self.chicken.getFeatures(feature_types='cpg', **coord1)
        exp = [("26", 112153, 113139), ("26", 134125, 135050),
               ("26", 178899, 180227)]
        obs = []
        for cpg in cpgs1:
            loc = cpg.Location
            obs.append((str(loc.CoordName), loc.Start, loc.End))
        self.assertEquals(set(obs), set(exp))

        # test cpg features record at scaffold level:
        coord2 = dict(CoordName='JH376196.1', Start=1, End=14640)
        cpgs2 = self.chicken.getFeatures(feature_types='cpg', **coord2)
        self.assertEquals(len(list(cpgs2)), 3)
Esempio n. 6
0
class TestFeatureCoordLevels(TestCase):
    def setUp(self):
        self.chicken = Genome(Species='chicken',
                              Release=Release,
                              account=account)

    def test_feature_levels(self):
        ChickenFeatureLevels = FeatureCoordLevels('chicken')
        chicken_feature_levels = ChickenFeatureLevels(
            feature_types=['gene', 'cpg', 'est'],
            core_db=self.chicken.CoreDb,
            otherfeature_db=self.chicken.OtherFeaturesDb)
        self.assertEquals(chicken_feature_levels['repeat'].levels, ['contig'])
        self.assertEquals(set(chicken_feature_levels['cpg'].levels),\
                            set(['contig', 'supercontig', 'chromosome']))

    def test_repeat(self):
        # use chicken genome as it need to do conversion
        # chicken coordinate correspondent toRefSeq human IL2A region
        coord = dict(CoordName=9, Start=23817146, End=23818935)
        region = self.chicken.getRegion(**coord)
        # repeat is recorded at contig level, strand is 0
        repeats = region.getFeatures(feature_types='repeat')
        expect = [("9", 23817293, 23817321), ("9", 23817803, 23817812),
                  ("9", 23817963, 23817972)]
        obs = []
        for repeat in repeats:
            loc = repeat.Location
            obs.append((loc.CoordName, loc.Start, loc.End))
        self.assertEquals(set(obs), set(expect))

    def test_cpg(self):
        # contain 3 CpG island recorded at chromosome level
        coord1 = dict(CoordName=26, Start=110000, End=190000)
        cpgs1 = self.chicken.getFeatures(feature_types='cpg', **coord1)
        exp = [("26", 116969, 117955), ("26", 139769, 140694),
               ("26", 184546, 185881)]
        obs = []
        for cpg in cpgs1:
            loc = cpg.Location
            obs.append((loc.CoordName, loc.Start, loc.End))
        self.assertEquals(set(exp), set(obs))

        # test cpg features record at supercontig level:
        coord2 = dict(CoordName='Un_random', Start=29434117, End=29439117)
        cpgs2 = self.chicken.getFeatures(feature_types='cpg', **coord2)
        self.assertEquals(len(list(cpgs2)), 1)
class TestFeatureCoordLevels(TestCase):
    def setUp(self):
        self.chicken = Genome(Species='chicken', Release=Release,
                            account=account)
    
    def test_feature_levels(self):
        ChickenFeatureLevels = FeatureCoordLevels('chicken')
        chicken_feature_levels = ChickenFeatureLevels(
                    feature_types=['gene', 'cpg', 'est'],
                    core_db=self.chicken.CoreDb,
                    otherfeature_db=self.chicken.OtherFeaturesDb)
        self.assertEquals(chicken_feature_levels['repeat'].levels,
                                ['chromosome', 'scaffold'])
        self.assertEquals(set(chicken_feature_levels['cpg'].levels),
                            set(['chromosome', 'scaffold']))
    
    def test_repeat(self):
        # use chicken genome as it need to do conversion
        # chicken coordinate correspondent toRefSeq human IL2A region
        coord = dict(CoordName=9, Start=21727352, End=21729141)
        region = self.chicken.getRegion(**coord)
        # repeat is recorded at contig level, strand is 0
        repeats = region.getFeatures(feature_types = 'repeat')
        expect = [("9", 21727499, 21727527), ("9", 21728009, 21728018),
                  ("9", 21728169, 21728178)]
        obs = []
        for repeat in repeats:
            loc = repeat.Location
            obs.append((str(loc.CoordName), loc.Start, loc.End))
        self.assertEquals(set(obs), set(expect))
    
    def test_cpg(self):
        # contain 3 CpG island recorded at chromosome level
        coord1 = dict(CoordName=26, Start=105184, End=184346)
        cpgs1 = self.chicken.getFeatures(feature_types='cpg', **coord1)
        exp = [("26", 112153, 113139), ("26", 134125, 135050),
               ("26", 178899, 180227)]
        obs = []
        for cpg in cpgs1:
            loc = cpg.Location
            obs.append((str(loc.CoordName), loc.Start, loc.End))
        self.assertEquals(set(obs), set(exp))
        
        # test cpg features record at scaffold level:
        coord2 = dict(CoordName='JH376196.1', Start=1, End=14640)
        cpgs2 = self.chicken.getFeatures(feature_types='cpg', **coord2)
        self.assertEquals(len(list(cpgs2)), 3)
Esempio n. 8
0
class TestFeatureCoordLevels(TestCase):
    def setUp(self):
        self.chicken = Genome(Species='chicken', Release=Release,
                            account=account)
    
    def test_feature_levels(self):
        ChickenFeatureLevels = FeatureCoordLevels('chicken')
        chicken_feature_levels = ChickenFeatureLevels(
                    feature_types=['gene', 'cpg', 'est'],
                    core_db=self.chicken.CoreDb,
                    otherfeature_db=self.chicken.OtherFeaturesDb)
        self.assertEquals(chicken_feature_levels['repeat'].levels, ['contig'])
        self.assertEquals(set(chicken_feature_levels['cpg'].levels),\
                            set(['contig', 'supercontig', 'chromosome']))
    
    def test_repeat(self):
        # use chicken genome as it need to do conversion
        # chicken coordinate correspondent toRefSeq human IL2A region
        coord = dict(CoordName=9, Start=23817146, End=23818935)
        region = self.chicken.getRegion(**coord)
        # repeat is recorded at contig level, strand is 0
        repeats = region.getFeatures(feature_types = 'repeat')
        expect = [("9", 23817293, 23817321), ("9", 23817803, 23817812),
                  ("9", 23817963, 23817972)]
        obs = []
        for repeat in repeats:
            loc = repeat.Location
            obs.append((loc.CoordName, loc.Start, loc.End))
        self.assertEquals(set(obs), set(expect))
    
    def test_cpg(self):
        # contain 3 CpG island recorded at chromosome level
        coord1 = dict(CoordName=26, Start=110000, End=190000)
        cpgs1 = self.chicken.getFeatures(feature_types = 'cpg', **coord1)
        exp = [("26", 116969, 117955), ("26", 139769, 140694),
               ("26", 184546, 185881)]
        obs = []
        for cpg in cpgs1:
            loc = cpg.Location
            obs.append((loc.CoordName, loc.Start, loc.End))
        self.assertEquals(set(exp), set(obs))
        
        # test cpg features record at supercontig level:
        coord2 = dict(CoordName='Un_random', Start=29434117, End=29439117)
        cpgs2 = self.chicken.getFeatures(feature_types='cpg', **coord2)
        self.assertEquals(len(list(cpgs2)), 1)
Esempio n. 9
0
 def _attach_genomes(self):
     for species in self.Species:
         attr_name = _Species.getComparaName(species)
         genome = Genome(Species=species,
                         Release=self.Release,
                         account=self._account)
         self._genomes[species] = genome
         setattr(self, attr_name, genome)
Esempio n. 10
0
 def setUp(self):
     self.chicken = Genome(Species='chicken', Release=Release,
                         account=account)
Esempio n. 11
0
class TestGenome(GenomeTestBase):
    
    def test_other_features(self):
       """should correctly return record for ENSESTG00000035043"""
       est = self.human.getEstMatching(StableId='ENSESTG00000035043')
       direct = list(est)[0]
       ests = self.human.getFeatures(feature_types='est', CoordName=8,
                                               Start=121470000, End=121600000)
       stable_ids = [est.StableId for est in ests]
       self.assertContains(stable_ids, direct.StableId)

    def test_genome_comparison(self):
        """different genome instances with same CoreDb connection are equal"""
        h2 = Genome(Species='human', Release=Release, account=account)
        self.assertEquals(self.human, h2)

    def test_make_location(self):
        """should correctly make a location for an entire chromosome"""
        loc = self.human.makeLocation(CoordName=1)
        self.assertEquals(len(loc), 249250621)

    def test_get_region(self):
        """should return a generic region that extracts correct sequence"""
        chrom = 1
        Start = 11137
        End = Start+20
        region = self.human.getRegion(CoordName=chrom, Start=Start, End=End,
                        ensembl_coord=True)
        self.assertEquals(region.Location.Start, Start-1)
        self.assertEquals(region.Location.End, End)
        self.assertEquals(region.Location.CoordName, str(chrom))
        self.assertEquals(region.Location.CoordType, 'chromosome')
        self.assertEquals(region.Seq, 'ACCTCAGTAATCCGAAAAGCC')

    def test_get_assembly_exception_region(self):
        """should return correct sequence for region with an assembly
        exception"""
        ##old:chrY:57767412-57767433; New: chrY:59358024-59358045
        region = self.human.getRegion(CoordName = "Y", Start = 59358024,
                            End = 59358045, Strand = 1, ensembl_coord = True)

        self.assertEquals(str(region.Seq), 'CGAGGACGACTGGGAATCCTAG')

    def test_no_assembly(self):
        """return N's for coordinates with no assembly"""
        krat = Genome('Kangaroo rat', Release=58)
        Start=24385
        End=Start+100
        region = krat.getRegion(CoordName='scaffold_13754', Start=Start,
            End=End)
        self.assertEquals(str(region.Seq), 'N' * (End-Start))

    def test_getting_annotated_seq(self):
        """a region should return a sequence with the correct annotation"""
        new_loc = self.brca2.Location.resized(-100, 100)
        region = self.human.getRegion(region=new_loc)
        annot_seq = region.getAnnotatedSeq(feature_types='gene')
        gene_annots = annot_seq.getAnnotationsMatching('gene')
        self.assertEquals(gene_annots[0].Name, self.brca2.Symbol)

    def test_correct_feature_type_id_cache(self):
        """should obtain the feature type identifiers without failure"""
        self.assertNotEquals(self.human._feature_type_ids.CpGisland, None)

    def test_strand_conversion(self):
        """should consistently convert strand info"""
        self.assertEquals(convert_strand(None), 1)
        self.assertEquals(convert_strand(-1), -1)
        self.assertEquals(convert_strand(1), 1)
        self.assertEquals(convert_strand('-'), -1)
        self.assertEquals(convert_strand('+'), 1)
        self.assertEquals(convert_strand(-1.0), -1)
        self.assertEquals(convert_strand(1.0), 1)

    def test_pool_connection(self):
        """excercising ability to specify pool connection"""
        dog = Genome(Species="dog", Release=Release, account=account,
                pool_recycle=1000)

    def test_gorilla(self):
        """should correctly return a gorilla gene"""
        self.gorilla = Genome(Species="gorilla", Release=Release, account=account)
        gene = self.gorilla.getGeneByStableId('ENSGGOG00000005730')
        self.assertEquals(str(gene.Seq[:10]), 'TGGGAGTCCA')

    def test_diff_strand_contig_chrom(self):
        """get correct sequence when contig and chromosome strands differ"""
        gene = self.gorilla.getGeneByStableId('ENSGGOG00000001953')
        cds = gene.CanonicalTranscript.Cds
        self.assertEquals(str(cds), 'ATGGCCCAGGATCTCAGCGAGAAGGACCTGTTGAAGATG'
        'GAGGTGGAGCAGCTGAAGAAAGAAGTGAAAAACACAAGAATTCCGATTTCCAAAGCGGGAAAGGAAAT'
        'CAAAGAGTACGTGGAGGCCCAAGCAGGAAACGATCCTTTTCTCAAAGGCATCCCTGAGGACAAGAATC'
        'CCTTCAAGGAGAAAGGTGGCTGTCTGATAAGCTGA')
    
    def test_get_distinct_biotype(self):
        """Genome instance getDistinct for biotype should work on all genomes"""
        for genome in self.gorilla, self.human, self.mouse, self.rat, self.macaq:
            biotypes = genome.getDistinct('biotype')
        
    def test_get_distinct_effect(self):
        """Genome instance getDistinct for SNP effect should work on all genomes"""
        for genome in self.human, self.mouse, self.rat, self.macaq:
            biotypes = genome.getDistinct('effect')
Esempio n. 12
0
 def test_gorilla(self):
     """should correctly return a gorilla gene"""
     self.gorilla = Genome(Species="gorilla", Release=Release, account=account)
     gene = self.gorilla.getGeneByStableId('ENSGGOG00000005730')
     self.assertEquals(str(gene.Seq[:10]), 'TGGGAGTCCA')
Esempio n. 13
0
 def setUp(self):
     self.chicken = Genome(Species='chicken', Release=Release,
                         account=account)
Esempio n. 14
0
__email__ = "*****@*****.**"
__status__ = "alpha"

Release = 76

if 'ENSEMBL_ACCOUNT' in os.environ:
    args = os.environ['ENSEMBL_ACCOUNT'].split()
    host, username, password = args[0:3]
    kwargs = {}
    if len(args) > 3:
        kwargs['port'] = int(args[3])
    account = HostAccount(host, username, password, **kwargs)
else:
    account = get_ensembl_account(release=Release)

human = Genome(Species='human', Release=Release, account=account)
platypus = Genome(Species='platypus', Release=Release, account=account)


class TestLocation(TestCase):
    def test_init(self):
        human_loc = Coordinate(CoordName='x',
                               Start=1000,
                               End=10000,
                               Strand=-1,
                               genome=human)
        # TODO: complete test for platpus
        self.assertEqual(human_loc.CoordType, 'chromosome')
        self.assertEqual(human_loc.CoordName, 'x')
        self.assertEqual(human_loc.Start, 1000)
        self.assertEqual(human_loc.End, 10000)
Esempio n. 15
0
class GenomeTestBase(TestCase):
    human = Genome(Species="human", Release=Release, account=account)
    mouse = Genome(Species="mouse", Release=Release, account=account)
    rat = Genome(Species="rat", Release=Release, account=account)
    macaq = Genome(Species="macaque", Release=Release, account=account)
    brca2 = list(human.getGenesMatching(StableId="ENSG00000139618"))[0]
Esempio n. 16
0
 def test_pool_connection(self):
     """excercising ability to specify pool connection"""
     dog = Genome(Species="dog",
                  Release=Release,
                  account=account,
                  pool_recycle=1000)
Esempio n. 17
0
 def test_genome_comparison(self):
     """different genome instances with same CoreDb connection are equal"""
     h2 = Genome(Species='human', Release=Release, account=account)
     self.assertEquals(self.human, h2)
Esempio n. 18
0
class TestGenome(GenomeTestBase):
    def test_other_features(self):
        """should correctly return record for ENSESTG00000000010"""
        est = self.human.getEstMatching(StableId='ENSESTG00000000010')
        direct = list(est)[0]
        ests = self.human.getFeatures(feature_types='est',
                                      CoordName=6,
                                      Start=99994000,
                                      End=100076519)
        stable_ids = [est.StableId for est in ests]
        self.assertContains(stable_ids, direct.StableId)

    def test_genome_comparison(self):
        """different genome instances with same CoreDb connection are equal"""
        h2 = Genome(Species='human', Release=Release, account=account)
        self.assertEquals(self.human, h2)

    def test_make_location(self):
        """should correctly make a location for an entire chromosome"""
        loc = self.human.makeLocation(CoordName=1)
        self.assertEquals(len(loc), 248956422)

    def test_get_region(self):
        """should return a generic region that extracts correct sequence"""
        chrom = 1
        Start = 11137
        End = Start + 20
        region = self.human.getRegion(CoordName=chrom,
                                      Start=Start,
                                      End=End,
                                      ensembl_coord=True)
        self.assertEquals(region.Location.Start, Start - 1)
        self.assertEquals(region.Location.End, End)
        self.assertEquals(region.Location.CoordName, str(chrom))
        self.assertEquals(region.Location.CoordType, 'chromosome')
        self.assertEquals(region.Seq, 'ACCTCAGTAATCCGAAAAGCC')

    def test_get_assembly_exception_region(self):
        """should return correct sequence for region with an assembly
        exception"""
        region = self.human.getRegion(CoordName="Y",
                                      Start=57211873,
                                      End=57211894,
                                      Strand=1,
                                      ensembl_coord=True)

        self.assertEquals(str(region.Seq), 'CGAGGACGACTGGGAATCCTAG')

    def test_no_assembly(self):
        """return N's for coordinates with no assembly"""
        krat = Genome('Kangaroo rat', Release=58)
        Start = 24385
        End = Start + 100
        region = krat.getRegion(CoordName='scaffold_13754',
                                Start=Start,
                                End=End)
        self.assertEquals(str(region.Seq), 'N' * (End - Start))

    def test_getting_annotated_seq(self):
        """a region should return a sequence with the correct annotation"""
        new_loc = self.brca2.Location.resized(-100, 100)
        region = self.human.getRegion(region=new_loc)
        annot_seq = region.getAnnotatedSeq(feature_types='gene')
        gene_annots = annot_seq.getAnnotationsMatching('gene')
        self.assertEquals(gene_annots[0].Name, self.brca2.Symbol)

    def test_correct_feature_type_id_cache(self):
        """should obtain the feature type identifiers without failure"""
        self.assertNotEquals(self.human._feature_type_ids.CpGisland, None)

    def test_strand_conversion(self):
        """should consistently convert strand info"""
        self.assertEquals(convert_strand(None), 1)
        self.assertEquals(convert_strand(-1), -1)
        self.assertEquals(convert_strand(1), 1)
        self.assertEquals(convert_strand('-'), -1)
        self.assertEquals(convert_strand('+'), 1)
        self.assertEquals(convert_strand(-1.0), -1)
        self.assertEquals(convert_strand(1.0), 1)

    def test_pool_connection(self):
        """excercising ability to specify pool connection"""
        dog = Genome(Species="dog",
                     Release=Release,
                     account=account,
                     pool_recycle=1000)

    def test_gorilla(self):
        """should correctly return a gorilla gene"""
        self.gorilla = Genome(Species="gorilla",
                              Release=Release,
                              account=account)
        gene = self.gorilla.getGeneByStableId('ENSGGOG00000005730')
        self.assertEquals(str(gene.Seq[:10]), 'TGGGAGTCCA')

    def test_diff_strand_contig_chrom(self):
        """get correct sequence when contig and chromosome strands differ"""
        gene = self.gorilla.getGeneByStableId('ENSGGOG00000001953')
        cds = gene.CanonicalTranscript.Cds
        self.assertEquals(
            str(cds), 'ATGGCCCAGGATCTCAGCGAGAAGGACCTGTTGAAGATG'
            'GAGGTGGAGCAGCTGAAGAAAGAAGTGAAAAACACAAGAATTCCGATTTCCAAAGCGGGAAAGGAAAT'
            'CAAAGAGTACGTGGAGGCCCAAGCAGGAAACGATCCTTTTCTCAAAGGCATCCCTGAGGACAAGAATC'
            'CCTTCAAGGAGAAAGGTGGCTGTCTGATAAGCTGA')

    def test_get_distinct_biotype(self):
        """Genome instance getDistinct for biotype should work on all genomes"""
        for genome in self.gorilla, self.human, self.mouse, self.rat, self.macaq:
            biotypes = genome.getDistinct('biotype')

    def test_get_distinct_effect(self):
        """Genome instance getDistinct for SNP effect should work on all genomes"""
        for genome in self.human, self.mouse, self.rat, self.macaq:
            biotypes = genome.getDistinct('effect')