Exemple #1
0
def run_robot(request_body, job_url):
    robot = ndb_serialize.loads(request_body)
    crawler = Crawler(robot)
    crawl = crawler.crawl
    for job in crawler.get_jobs():
        # TODO batch add
        taskqueue.add(url=job_url, payload=ndb_serialize.dumps((robot, crawl, job)))
class CrawlerTests(unittest.TestCase):
	TestUrls = [ "http://www.google.com", "http://www.markvelez.com", "https://www.cia.gov/library/publications/the-world-factbook/geos/ag.html", "http://k2_7.asdf1234.net"]
	def setUp(self):
		self.crawler = Crawler(CrawlerTests.TestUrls)
		
	def test_crawl(self):
		rs = self.crawler.crawl()
		for url_index in range(0, len(CrawlerTests.TestUrls)-2):
			self.assertEqual(CrawlerTests.TestUrls[url_index], rs[url_index]['url'])
			self.assertTrue(len(rs[url_index]['response']) > 0)
		lastIndex = len(CrawlerTests.TestUrls)-1
		badResult = rs[lastIndex]
		self.assertEqual(badResult['url'], CrawlerTests.TestUrls[lastIndex])
		self.assertIsNone(badResult['response'])
		self.assertIsNotNone(badResult['error'])
	def setUp(self):
		self.crawler = Crawler(CrawlerTests.TestUrls)
Exemple #4
0
def run_job(request_body):
    robot, crawl, job = ndb_serialize.loads(request_body)
    crawler = Crawler(robot=robot, crawl=crawl)
    crawler.run_job(job)