def run_robot(request_body, job_url): robot = ndb_serialize.loads(request_body) crawler = Crawler(robot) crawl = crawler.crawl for job in crawler.get_jobs(): # TODO batch add taskqueue.add(url=job_url, payload=ndb_serialize.dumps((robot, crawl, job)))
class CrawlerTests(unittest.TestCase): TestUrls = [ "http://www.google.com", "http://www.markvelez.com", "https://www.cia.gov/library/publications/the-world-factbook/geos/ag.html", "http://k2_7.asdf1234.net"] def setUp(self): self.crawler = Crawler(CrawlerTests.TestUrls) def test_crawl(self): rs = self.crawler.crawl() for url_index in range(0, len(CrawlerTests.TestUrls)-2): self.assertEqual(CrawlerTests.TestUrls[url_index], rs[url_index]['url']) self.assertTrue(len(rs[url_index]['response']) > 0) lastIndex = len(CrawlerTests.TestUrls)-1 badResult = rs[lastIndex] self.assertEqual(badResult['url'], CrawlerTests.TestUrls[lastIndex]) self.assertIsNone(badResult['response']) self.assertIsNotNone(badResult['error'])
def setUp(self): self.crawler = Crawler(CrawlerTests.TestUrls)
def run_job(request_body): robot, crawl, job = ndb_serialize.loads(request_body) crawler = Crawler(robot=robot, crawl=crawl) crawler.run_job(job)