Ejemplo n.º 1
0
class TestEnsemblCachePy(unittest.TestCase):
    """ unit test the EnsemblCache class
    """
    
    @classmethod
    def setUpClass(self):
        self.temp_dir = tempfile.mkdtemp()
        self.cache = EnsemblCache(self.temp_dir)
    
    @classmethod
    def tearDownClass(self):
        shutil.rmtree(self.temp_dir)
    
    def test_get_key_from_url(self):
        """ test that get_key_from_url() works correctly
        """
        
        self.assertEqual(self.cache.get_key_from_url("http://rest.ensembl.org/info/rest"), ("info.rest", 'grch38'))
        self.assertEqual(self.cache.get_key_from_url("http://rest.ensembl.org/xrefs/symbol/homo_sapiens/ABO"), ("xrefs.symbol.homo_sapiens.ABO", 'grch38'))
        self.assertEqual(self.cache.get_key_from_url("http://rest.ensembl.org/sequence/id/ENST00000378520?type=protein"), ("sequence.id.ENST00000378520.protein", 'grch38'))
        self.assertEqual(self.cache.get_key_from_url("http://rest.ensembl.org/feature/id/ENSG00000175164?feature=transcript"), ("feature.id.ENSG00000175164.transcript", 'grch38'))
        self.assertEqual(self.cache.get_key_from_url("http://rest.ensembl.org/sequence/id/ENST00000538324?type=genomic;expand_3prime=10;expand_5prime=10"), ("sequence.id.ENST00000538324.genomic", 'grch38'))
        self.assertEqual(self.cache.get_key_from_url("http://rest.ensembl.org/sequence/id/ENST00000538324?type=cds"), ("sequence.id.ENST00000538324.cds", 'grch38'))
        self.assertEqual(self.cache.get_key_from_url("http://rest.ensembl.org/feature/id/ENST00000538324?feature=exon"), ("feature.id.ENST00000538324.exon", 'grch38'))
        self.assertEqual(self.cache.get_key_from_url("http://rest.ensembl.org/vep/human/id/rs3887873/consequences?"), ("vep.human.id.rs3887873.consequences", 'grch38'))
        self.assertEqual(self.cache.get_key_from_url("http://rest.ensembl.org/vep/human/9:22125503-22125502:1/C/consequences?"), ("vep.human.9_22125503-22125502_1.C.consequences", 'grch38'))
        self.assertEqual(self.cache.get_key_from_url("http://grch37.rest.ensembl.org/vep/human/9:22125503-22125502:1/C/consequences?"), ("vep.human.9_22125503-22125502_1.C.consequences", 'grch37'))
    
    def test_get_cached_data(self):
        """ test that get_cached_data() works correctly
        """
        
        # set up the data to go in the database
        url = "http://rest.ensembl.org/feature/id/temp1?feature=exon"
        string = b"temp_data"
        
        # check that the data is not in the database to start
        self.assertIsNone(self.cache.get_cached_data(url))
        
        # insert the data in the database
        self.cache.cache_url_data(url, string)
        
        # check that some data is now in the database
        data = self.cache.get_cached_data(url)
        self.assertIsNotNone(data)
        
        # check that the data is correct if the row is in the database
        self.assertEqual(data, string)
    
    def test_get_cached_data_old_date(self):
        """ check that the cache ignores outdated data
        """
        url = "http://rest.ensembl.org/feature/id/temp1?feature=exon"
        string = b"temp_data"
        today = datetime.today()
        long_ago = today - timedelta(days=181)
        
        # check that obsolete data returns False
        self.cache.today = long_ago
        self.cache.cache_url_data(url, string)
        self.assertIsNotNone(self.cache.get_cached_data(url))
        
        self.cache.today = today
        self.assertIsNone(self.cache.get_cached_data(url))
    
    def test_cache_url_data(self):
        """ test that cache_url_data works correctly
        """
        
        # set up the data to go in the database
        url = "http://rest.ensembl.org/feature/id/temp2?feature=exon"
        temp_data = b"temp_data"
        
        # check that the data is not in before we insert it
        self.assertIsNone(self.cache.get_cached_data(url))
        
        # insert the data, then check that it has gone in
        self.cache.cache_url_data(url, temp_data)
        self.assertIsNotNone(self.cache.get_cached_data(url))
    
    def test_cache_load(self):
        """ make sure the cache can handle a reasonable load
    
        This test uses multiple threads writing to the cache simultaneously to
        show the cache can handle the load. Failure is shown by an exception.
        """
    
        cache_dir = os.path.join(self.temp_dir, 'loading')
        os.mkdir(cache_dir)
        text = lambda l: '{:x}'.format(random.getrandbits(l * 4)).strip().encode('utf8')
        url = lambda : 'example.com/base/sub/{}'.format(text(10))
        write = lambda cache: cache.cache_url_data(url(), text(100))
        
        class Runner(Thread):
            def __init__(self, counter=100):
                super(Runner, self).__init__()
                self.counter = counter
            def run(self):
                cache = EnsemblCache(cache_dir)
                while self.counter > 0:
                    write(cache)
                    self.counter -= 1
        
        try:
            threads = [ Runner() for x in range(50) ]
            [ x.start() for x in threads ]
            [ x.join() for x in threads ]
        except:
            self.fail("EnsemblCache failed under heavy load")