def test_check_bulk(self): ''' test de-dupe with 100k URLs ''' cc = DeDupeCache() for idx in range(100000): url = 'http://www.nyu.edu/engineering/access.aspx?magicnum=%d' % idx self.assertFalse(cc.is_url_dup(url)) self.assertTrue(cc.url_count == idx + 1) self.assertTrue(cc.is_url_dup(url)) self.assertTrue(cc.url_count == idx + 1) self.assertTrue(cc.is_url_dup(url)) self.assertTrue(cc.url_count == idx + 1) for idx in range(100000): url = 'http://www.nyu.edu/engineering/access.aspx?magicnum=%d' % idx cc.del_url(url) self.assertTrue(cc.url_count == 100000 - idx - 1) self.check_empty_cache(cc)