def testFlushBatches(self): """ L{WikipediaPageHandler} automatically flushes cached data to disk, when the batch size is reached. """ handler = WikipediaPageHandler(self.outputPath, batchSize=1) handler.handle(WikipediaPage('Sample page 1')) self.assertEqual(['wikipedia-titles-00001.json'], os.listdir(self.outputPath)) handler.handle(WikipediaPage('Sample page 2')) self.assertEqual(['wikipedia-titles-00001.json', 'wikipedia-titles-00002.json'], sorted(os.listdir(self.outputPath)))
class WikipediaPageHandlerTest(TestCase): def setUp(self): super(WikipediaPageHandlerTest, self).setUp() self.outputPath = mkdtemp() self.handler = WikipediaPageHandler(self.outputPath) def tearDown(self): rmtree(self.outputPath) super(WikipediaPageHandlerTest, self).tearDown() def testCloseWithoutPages(self): """ L{WikipediaPageHandler.close} is a no-op if no pages have been generated. """ self.handler.close() self.assertEqual([], os.listdir(self.outputPath)) def testCloseFlushesPages(self): """ L{WikipediaPageHandler.close} flushes any pages that have not yet been written to disk. """ self.handler.handle(WikipediaPage('Sample page')) self.handler.close() path = os.path.join(self.outputPath, 'wikipedia-titles-00001.json') with open(path, 'r') as file: data = load(file) url = 'http://en.wikipedia.org/wiki/Sample_page' self.assertEqual( {'objects': [{'about': 'sample page', 'values': {'en.wikipedia.org/url': url}}]}, data) def testFlushBatches(self): """ L{WikipediaPageHandler} automatically flushes cached data to disk, when the batch size is reached. """ handler = WikipediaPageHandler(self.outputPath, batchSize=1) handler.handle(WikipediaPage('Sample page 1')) self.assertEqual(['wikipedia-titles-00001.json'], os.listdir(self.outputPath)) handler.handle(WikipediaPage('Sample page 2')) self.assertEqual(['wikipedia-titles-00001.json', 'wikipedia-titles-00002.json'], sorted(os.listdir(self.outputPath)))
def setUp(self): super(WikipediaPageHandlerTest, self).setUp() self.outputPath = mkdtemp() self.handler = WikipediaPageHandler(self.outputPath)