def test_bulkdata(self): j = self.project.push_job(self.spidername, state='running') for i in xrange(20): j.logs.info("log line %d" % i) j.items.write(dict(field1="item%d" % i)) j.requests.add("http://test.com/%d" % i, 200, 'GET', 10, None, 10, 120) for resourcename in ('logs', 'items', 'requests'): resource = getattr(j, resourcename) resource.flush() # downloading resource, with simulated failures with failing_downloader(resource): downloaded = list(resource.iter_values()) self.assertEqual(len(downloaded), 20)
def test_data_download(self): col = self.project.collections.new_store(self.test_collection_name) items = [] with closing(col.create_writer()) as writer: for i in xrange(20): test_item = _mkitem() test_item['_key'] = "test_data_download%d" % i test_item['counter'] = i writer.write(test_item) items.append(test_item) # check parameters are passed correctly downloaded = list(col.iter_values(prefix='test_data_download1')) self.assertEqual(len(downloaded), 11) # simulate network timeouts and download data with failing_downloader(self.project.collections): downloaded = list(col.iter_values(start='test_data_download1')) self.assertEqual(len(downloaded), 19)