def test_double(self): checker_rt = SchedulerRuntime() checker_rt.save() event = Event(title=u'Event 1', url=u'http://localhost:8010/static/site_generic/event1.html', checker_runtime=checker_rt) event.save() self.run_event_spider(1) self.assertEqual(len(Event.objects.all()), 4) self.assertEqual(len(Event.objects.filter(title='Event 1')), 1)
def test_double_standard_id_field(self): checker_rt = SchedulerRuntime() checker_rt.save() event = Event(title=u'Event 1', event_website=self.event_website, description=u'Event 1 description', url=u'http://localhost:8010/static/site_generic/event5.html', checker_runtime=checker_rt) event.save() event = Event(title=u'Event 2', event_website=self.event_website, description=u'Event 1 description', url=u'http://localhost:8010/static/site_generic/event6.html', checker_runtime=checker_rt) event.save() event = Event(title=u'Event 1', event_website=self.event_website, description=u'Event 2 description', url=u'http://localhost:8010/static/site_generic/event7.html', checker_runtime=checker_rt) event.save() self.soa_url.id_field = False self.soa_url.save() self.soa_title.id_field = True self.soa_title.save() self.soa_desc.id_field = True self.soa_desc.save() self.run_event_spider(1) self.assertEqual(len(Event.objects.all()), 6) self.assertEqual(Event.objects.filter(description='Event 1 description').count(), 2)
def test_double(self): checker_rt = SchedulerRuntime() checker_rt.save() event = Event(title=u'Event 1', event_website=self.event_website, url=u'http://localhost:8010/static/site_generic/event1.html', checker_runtime=checker_rt) event.save() self.run_event_spider(1) self.assertEqual(len(Event.objects.all()), 4) self.assertEqual(len(Event.objects.filter(title='Event 1')), 1)
def test_detail_page_url_id_field(self): checker_rt = SchedulerRuntime() checker_rt.save() event = Event(title=u'Event 1', event_website=self.event_website, url=u'http://localhost:8010/static/site_generic/event5.html', checker_runtime=checker_rt) event.save() self.run_event_spider(1) self.assertEqual(len(Event.objects.all()), 5) self.assertEqual(Event.objects.filter(title='Event 1').count(), 2)
def test_standard_update_field_update(self): checker_rt = SchedulerRuntime() checker_rt.save() event = Event(title=u'Event 1 - Old Title', event_website=self.event_website, url=u'http://localhost:8010/static/site_generic/event1.html', checker_runtime=checker_rt) event.save() self.soa_title.attr_type = 'T' self.soa_title.save() self.run_event_spider(1) event_updated = Event.objects.get(pk=event.id) self.assertEqual(event_updated.title, 'Event 1') self.assertEqual(len(Event.objects.filter(title='Event 1 - Old Title')), 0)
def test_single_standard_id_field(self): checker_rt = SchedulerRuntime() checker_rt.save() event = Event(title='Event 1', event_website=self.event_website, url='http://localhost:8010/static/site_generic/event5.html', checker_runtime=checker_rt) event.save() self.soa_url.id_field = False self.soa_url.save() self.soa_title.id_field = True self.soa_title.save() self.run_event_spider(1) self.assertEqual(len(Event.objects.all()), 4) self.assertEqual(Event.objects.filter(title='Event 1').count(), 1)
def test_single_standard_id_field(self): checker_rt = SchedulerRuntime() checker_rt.save() event = Event(title=u'Event 1', event_website=self.event_website, url=u'http://localhost:8010/static/site_generic/event5.html', checker_runtime=checker_rt) event.save() self.soa_url.id_field = False self.soa_url.save() self.soa_title.id_field = True self.soa_title.save() self.run_event_spider(1) self.assertEqual(len(Event.objects.all()), 4) self.assertEqual(Event.objects.filter(title='Event 1').count(), 1)
def setUpScraperJSChecker(self, path): super(ScraperJSRunTest, self).setUp() self.scraper.checker_type = 'X' self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_ref_url = u'%ssite_with_js/event_not_found.html' % path self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event(title='Event 1', event_website=self.event_website, description='Event 1 description', url='%ssite_with_js/event_not_found.html' % path, checker_runtime=scheduler_rt) self.event.save()
def extraSetUpHTMLChecker(self): self.scraper.checker_type = 'X' self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_x_path_result = u'Event not found!' self.scraper.checker_ref_url = u'http://localhost:8010/static/site_with_json_content_type/event_not_found.html' self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event( title='Event 1', event_website=self.event_website, description='Event 1 description', url= 'http://localhost:8010/static/site_with_json_content_type/event_not_found.html', checker_runtime=scheduler_rt) self.event.save()
def extraSetUpJSONChecker(self): self.scraper.detail_page_content_type = 'J' self.scraper.checker_type = 'X' self.scraper.checker_x_path = u'event_not_found' self.scraper.checker_x_path_result = u'Event not found!' self.scraper.checker_ref_url = u'http://localhost:8010/static/site_with_json_content_type/event_not_found.json' self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event( title='Event 1', event_website=self.event_website, description='Event 1 description', url= 'http://localhost:8010/static/site_with_json_content_type/event_not_found.json', checker_runtime=scheduler_rt) self.event.save()
def setUp(self): super(CheckerRunTest, self).setUp() self.scraper.checker_type = 'X' self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_x_path_result = u'Event was deleted!' self.scraper.checker_ref_url = u'http://localhost:8010/static/site_for_checker/event_not_found.html' self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event( title='Event 1', event_website=self.event_website, description='Event 1 description', url='http://localhost:8010/static/site_for_checker/event1.html', checker_runtime=scheduler_rt) self.event.save()
def extraSetUpHTMLChecker(self): self.scraper.checker_type = 'X' self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_x_path_result = u'Event not found!' self.scraper.checker_ref_url = u'http://localhost:8010/static/site_with_json_content_type/event_not_found.html' self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event(title='Event 1', event_website=self.event_website, description='Event 1 description', url='http://localhost:8010/static/site_with_json_content_type/event_not_found.html', checker_runtime=scheduler_rt) self.event.save()
def setUp(self): super(CheckerRunTest, self).setUp() self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_x_path_result = u'Event was deleted!' self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event(title='Event 1', event_website=self.event_website, description='Event 1 description', url='http://localhost:8010/static/site_for_checker/event1.html', checker_runtime=scheduler_rt) self.event.save()
def extraSetUpJSONChecker(self): self.scraper.detail_page_content_type = 'J' self.scraper.checker_type = 'X' self.scraper.checker_x_path = u'event_not_found' self.scraper.checker_x_path_result = u'Event not found!' self.scraper.checker_ref_url = u'http://localhost:8010/static/site_with_json_content_type/event_not_found.json' self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event(title='Event 1', event_website=self.event_website, description='Event 1 description', url='http://localhost:8010/static/site_with_json_content_type/event_not_found.json', checker_runtime=scheduler_rt) self.event.save()
def setUp(self): super(CheckerRunTest, self).setUp() self.scraper.checker_type = "X" self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_x_path_result = u"Event was deleted!" self.scraper.checker_ref_url = u"http://localhost:8010/static/site_for_checker/event_not_found.html" self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event( title="Event 1", event_website=self.event_website, description="Event 1 description", url="http://localhost:8010/static/site_for_checker/event1.html", checker_runtime=scheduler_rt, ) self.event.save()
def extraSetUpHTMLChecker(self): self.checker = Checker() self.checker.scraped_obj_attr = self.soa_url self.checker.scraper = self.scraper self.checker.checker_type = "X" self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()' self.checker.checker_x_path_result = "Event not found!" self.checker.checker_ref_url = "http://localhost:8010/static/site_with_json_content_type/event_not_found.html" self.checker.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event( title="Event 1", event_website=self.event_website, description="Event 1 description", url="http://localhost:8010/static/site_with_json_content_type/event_not_found.html", checker_runtime=scheduler_rt, ) self.event.save()
class CheckerRunTest(ScraperTest): def setUp(self): super(CheckerRunTest, self).setUp() self.scraper.checker_type = 'X' self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_x_path_result = u'Event was deleted!' self.scraper.checker_ref_url = u'http://localhost:8010/static/site_for_checker/event_not_found.html' self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event( title='Event 1', event_website=self.event_website, description='Event 1 description', url='http://localhost:8010/static/site_for_checker/event1.html', checker_runtime=scheduler_rt) self.event.save() def test_checker_test_wrong_checker_config(self): self.scraper.checker_ref_url = '' self.scraper.save() self.assertRaises(CloseSpider, self.run_checker_test, 1) def test_none_type(self): self.scraper.checker_type = 'N' self.scraper.save() self.assertRaises(CloseSpider, self.run_event_checker, 1) def test_x_path_type_keep_video(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_x_path_type_blank_result_field_keep_video(self): self.scraper.checker_x_path_result = '' self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_x_path_type_404_delete(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_x_path_type_404_delete_with_zero_actions(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() self.event.checker_runtime.num_zero_actions = 3 self.event.checker_runtime.save() kwargs = { 'id': 1, 'do_action': 'yes', 'run_type': 'TASK', } checker = EventChecker(**kwargs) self.crawler.crawl(checker) self.crawler.start() self.assertEqual(len(Event.objects.all()), 1) def test_x_path_type_x_path_delete(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_x_path_type_blank_result_field_x_path_delete(self): self.scraper.checker_x_path_result = '' self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) @unittest.skip( "Skipped due to unresolved problem with IMAGES_STORE setting not used from scraper_test.py." ) def test_x_path_type_404_delete_with_img(self): path = os.path.join(self.PROJECT_ROOT, 'imgs/event_image.jpg') if not os.path.exists(path): file = open(path, "w") file.write('Let\s assume this is an image!') file.close() self.se_desc.mandatory = True self.se_desc.save() self.soa_desc.attr_type = 'I' self.soa_desc.save() self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.description = 'event_image.jpg' self.event.save() self.assertTrue(os.path.exists(path)) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) self.assertFalse(os.path.exists(path)) def test_404_type_404_delete(self): self.scraper.checker_type = '4' self.scraper.save() self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_404_type_x_path_delete(self): self.scraper.checker_type = '4' self.scraper.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1)
def test_double_standard_id_field(self): checker_rt = SchedulerRuntime() checker_rt.save() event = Event(title='Event 1', event_website=self.event_website, description='Event 1 description', url='http://localhost:8010/static/site_generic/event5.html', checker_runtime=checker_rt) event.save() event = Event(title='Event 2', event_website=self.event_website, description='Event 1 description', url='http://localhost:8010/static/site_generic/event6.html', checker_runtime=checker_rt) event.save() event = Event(title='Event 1', event_website=self.event_website, description='Event 2 description', url='http://localhost:8010/static/site_generic/event7.html', checker_runtime=checker_rt) event.save() self.soa_url.id_field = False self.soa_url.save() self.soa_title.id_field = True self.soa_title.save() self.soa_desc.id_field = True self.soa_desc.save() self.run_event_spider(1) self.assertEqual(len(Event.objects.all()), 6) self.assertEqual(Event.objects.filter(description='Event 1 description').count(), 2)
class CheckerRunTest(ScraperTest): def setUp(self): super(CheckerRunTest, self).setUp() self.checker = Checker() self.checker.scraped_obj_attr = self.soa_url self.checker.scraper = self.scraper self.checker.checker_type = 'X' self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()' self.checker.checker_x_path_result = 'Event was deleted!' self.checker.checker_ref_url = 'http://*****:*****@class="event_not_found"]/div/text()' self.checker2.checker_x_path_result = 'Event was deleted!' self.checker2.checker_ref_url = 'http://*****:*****@class="oh_my_wrong_xpath_for_delete"]/div/text()' self.checker.save() self.event.url = 'http://*****:*****@unittest.skip("Skipped, CloseSpider can't be catched from within test env, other option: direct access to Scrapy log strings.") def test_checker_test_wrong_checker_config(self): self.checker.checker_ref_url = '' self.checker.save() self.assertRaises(CloseSpider, self.run_checker_test(1))
class ScraperJSONRunTest(ScraperTest): def setUpScraperJSONDefaultScraper(self): self.se_base.x_path = u'response.events' self.se_base.save() self.se_title.x_path = u'title' self.se_title.save() self.se_url.x_path = u'url' self.se_url.save() self.se_desc.x_path = u'description' self.se_desc.from_detail_page = False self.se_desc.save() self.scraper.content_type = 'J' self.scraper.save() self.event_website.url = os.path.join( self.SERVER_URL, 'site_with_json_content_type/event_main.json') self.event_website.save() def extraSetUpHTMLChecker(self): self.scraper.checker_type = 'X' self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_x_path_result = u'Event not found!' self.scraper.checker_ref_url = u'http://*****:*****@class="description"]/text()' self.se_desc.from_detail_page = True self.se_desc.save() self.run_event_spider(1) #log.msg(unicode(Event.objects.all()), level=log.INFO) self.assertEqual( len( Event.objects.filter( description='Event Detail Page 1 Description')), 1) def test_detail_page_json(self): self.setUpScraperJSONDefaultScraper() self.scraper.detail_page_content_type = 'J' self.scraper.save() self.se_url.x_path = u'json_url' self.se_url.save() self.se_desc.x_path = u'event_details.description' self.se_desc.from_detail_page = True self.se_desc.save() self.run_event_spider(1) #log.msg(unicode(Event.objects.all()), level=log.INFO) self.assertEqual( len( Event.objects.filter( description='Event Detail Page 1 Description')), 1) def test_checker_x_path_type_x_path_delete(self): self.setUpScraperJSONDefaultScraper() self.extraSetUpHTMLChecker() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_checker_x_path_type_x_path_no_delete(self): self.setUpScraperJSONDefaultScraper() self.extraSetUpHTMLChecker() self.scraper.checker_x_path = u'//div[@class="this_is_the_wrong_xpath"]/div/text()' self.scraper.save() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_json_checker_x_path_type_x_path_delete(self): self.setUpScraperJSONDefaultScraper() self.extraSetUpJSONChecker() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_json_checker_x_path_type_x_path_no_delete(self): self.setUpScraperJSONDefaultScraper() self.extraSetUpJSONChecker() self.scraper.checker_x_path = u'this_is_the_wrong_xpath' self.scraper.save() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1)
class CheckerRunTest(ScraperTest): def setUp(self): super(CheckerRunTest, self).setUp() self.scraper.checker_type = "X" self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_x_path_result = u"Event was deleted!" self.scraper.checker_ref_url = u"http://localhost:8010/static/site_for_checker/event_not_found.html" self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event( title="Event 1", event_website=self.event_website, description="Event 1 description", url="http://localhost:8010/static/site_for_checker/event1.html", checker_runtime=scheduler_rt, ) self.event.save() def test_none_type(self): self.scraper.checker_type = "N" self.scraper.save() self.assertRaises(CloseSpider, self.run_event_checker, 1) def test_x_path_type_keep_video(self): self.event.url = "http://localhost:8010/static/site_for_checker/event1.html" self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_x_path_type_blank_result_field_keep_video(self): self.scraper.checker_x_path_result = "" self.event.url = "http://localhost:8010/static/site_for_checker/event1.html" self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_x_path_type_404_delete(self): self.event.url = "http://localhost:8010/static/site_for_checker/event_which_is_not_there.html" self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_x_path_type_404_delete_with_zero_actions(self): self.event.url = "http://localhost:8010/static/site_for_checker/event_which_is_not_there.html" self.event.save() self.event.checker_runtime.num_zero_actions = 3 self.event.checker_runtime.save() kwargs = {"id": 1, "do_action": "yes", "run_type": "TASK"} checker = EventChecker(**kwargs) self.crawler.crawl(checker) self.crawler.start() self.assertEqual(len(Event.objects.all()), 1) def test_x_path_type_x_path_delete(self): self.event.url = "http://localhost:8010/static/site_for_checker/event2.html" self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_x_path_type_blank_result_field_x_path_delete(self): self.scraper.checker_x_path_result = "" self.event.url = "http://localhost:8010/static/site_for_checker/event2.html" self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def _create_imgs_in_dirs(self, img_dirs): img_paths = [] for img_dir in img_dirs: path = os.path.join(self.PROJECT_ROOT, img_dir, "event_image.jpg") if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) if not os.path.exists(path): file = open(path, "w") file.write("Let\s assume this is an image!") file.close() img_paths.append(path) return img_paths def _run_img_test_with_dirs(self, img_dirs): img_paths = self._create_imgs_in_dirs(img_dirs) self.se_desc.mandatory = True self.se_desc.save() self.soa_desc.attr_type = "I" self.soa_desc.save() self.event.url = "http://localhost:8010/static/site_for_checker/event_which_is_not_there.html" self.event.description = "event_image.jpg" self.event.save() for path in img_paths: self.assertTrue(os.path.exists(path)) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) for path in img_paths: self.assertFalse(os.path.exists(path)) def test_delete_with_img_flat_no_thumbs(self): img_dirs = ["imgs/"] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_flat_with_thumbs(self): img_dirs = ["imgs/"] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_all_no_thumbs(self): img_dirs = ["imgs/full/"] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_all_with_thumbs(self): img_dirs = ["imgs/full/", "imgs/thumbs/medium/", "imgs/thumbs/small/"] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_thumbs_with_thumbs(self): img_dirs = ["imgs/thumbs/medium/", "imgs/thumbs/small/"] self._run_img_test_with_dirs(img_dirs) def test_404_type_404_delete(self): self.scraper.checker_type = "4" self.scraper.save() self.event.url = "http://localhost:8010/static/site_for_checker/event_which_is_not_there.html" self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_404_type_x_path_delete(self): self.scraper.checker_type = "4" self.scraper.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) @unittest.skip( "Skipped, CloseSpider can't be catched from within test env, other option: direct access to Scrapy log strings." ) def test_checker_test_wrong_checker_config(self): self.scraper.checker_ref_url = "" self.scraper.save() self.assertRaises(CloseSpider, self.run_checker_test(1))
class CheckerRunTest(ScraperTest): def setUp(self): super(CheckerRunTest, self).setUp() self.scraper.checker_type = 'X' self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_x_path_result = u'Event was deleted!' self.scraper.checker_ref_url = u'http://localhost:8010/static/site_for_checker/event_not_found.html' self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event(title='Event 1', event_website=self.event_website, description='Event 1 description', url='http://localhost:8010/static/site_for_checker/event1.html', checker_runtime=scheduler_rt) self.event.save() def test_checker_test_wrong_checker_config(self): self.scraper.checker_ref_url = '' self.scraper.save() self.assertRaises(CloseSpider, self.run_checker_test, 1) def test_none_type(self): self.scraper.checker_type = 'N' self.scraper.save() self.assertRaises(CloseSpider, self.run_event_checker, 1) def test_x_path_type_keep_video(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_x_path_type_blank_result_field_keep_video(self): self.scraper.checker_x_path_result = '' self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_x_path_type_404_delete(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_x_path_type_404_delete_with_zero_actions(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() self.event.checker_runtime.num_zero_actions = 3 self.event.checker_runtime.save() kwargs = { 'id': 1, 'do_action': 'yes', 'run_type': 'TASK', } checker = EventChecker(**kwargs) self.crawler.crawl(checker) self.crawler.start() self.assertEqual(len(Event.objects.all()), 1) def test_x_path_type_x_path_delete(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_x_path_type_blank_result_field_x_path_delete(self): self.scraper.checker_x_path_result = '' self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def _create_imgs_in_dirs(self, img_dirs): img_paths = [] for img_dir in img_dirs: path = os.path.join(self.PROJECT_ROOT, img_dir, 'event_image.jpg') if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) if not os.path.exists(path): file = open(path,"w") file.write('Let\s assume this is an image!') file.close() img_paths.append(path) return img_paths def _run_img_test_with_dirs(self, img_dirs): img_paths = self._create_imgs_in_dirs(img_dirs) self.se_desc.mandatory = True self.se_desc.save() self.soa_desc.attr_type = 'I' self.soa_desc.save() self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.description = 'event_image.jpg' self.event.save() for path in img_paths: self.assertTrue(os.path.exists(path)) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) for path in img_paths: self.assertFalse(os.path.exists(path)) def test_delete_with_img_flat_no_thumbs(self): img_dirs = ['imgs/',] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_flat_with_thumbs(self): img_dirs = ['imgs/',] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_all_no_thumbs(self): img_dirs = ['imgs/full/',] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_all_with_thumbs(self): img_dirs = ['imgs/full/', 'imgs/thumbs/medium/', 'imgs/thumbs/small/',] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_thumbs_with_thumbs(self): img_dirs = ['imgs/thumbs/medium/', 'imgs/thumbs/small/',] self._run_img_test_with_dirs(img_dirs) def test_404_type_404_delete(self): self.scraper.checker_type = '4' self.scraper.save() self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_404_type_x_path_delete(self): self.scraper.checker_type = '4' self.scraper.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1)
class ScraperJSRunTest(ScraperTest): def setUpScraperJSDefaultScraper(self): self.event_website.url = os.path.join('http://*****:*****@class="event_not_found"]/div/text()' self.scraper.checker_ref_url = u'%ssite_with_js/event_not_found.html' % path self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event(title='Event 1', event_website=self.event_website, description='Event 1 description', url='%ssite_with_js/event_not_found.html' % path, checker_runtime=scheduler_rt) self.event.save() def setUpScraperJSDefaultChecker(self): self.setUpScraperJSChecker('http://localhost:8010/static/') def setUpScraperJSDockerChecker(self): self.setUpScraperJSChecker('http://10.0.2.2:8010/static/') self.rpt_dp1.render_javascript = True self.rpt_dp1.save() def test_default_no_scrapyjs_main_page(self): self.setUpScraperJSDefaultScraper() self.run_event_spider(1) self.assertEqual(len(Event.objects.all()), 1) def test_default_no_scrapyjs_detail_page(self): self.setUpScraperJSDefaultScraper() self.run_event_spider(1) self.assertEqual(len(Event.objects.filter(description='Event 1 description')), 1) def test_activated_scrapyjs_main_page(self): self.setUpScraperJSDockerScraper() self.run_event_spider(1) self.assertEqual(len(Event.objects.all()), 2) def test_activated_scrapyjs_detail_page(self): self.setUpScraperJSDockerScraper() self.run_event_spider(1) self.assertEqual(len(Event.objects.filter(description='Event 1 JS description')), 1) def test_only_main_page_scrapyjs_main_page(self): self.setUpScraperJSDockerScraper() self.event_website.url = os.path.join('http://10.0.2.2:8010/static/', 'site_with_js/event_main.html') self.event_website.save() self.rpt_dp1.render_javascript = False self.rpt_dp1.save() self.run_event_spider(1) self.assertEqual(len(Event.objects.all()), 2) self.assertEqual(len(Event.objects.filter(description='Event 1 description')), 1) self.assertEqual(len(Event.objects.filter(description='Event 1 JS description')), 0) def test_default_no_scrapyjs_checker_delete(self): self.setUpScraperJSDefaultChecker() self.scraper.checker_x_path_result = u'Event not found' self.scraper.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_default_no_scrapyjs_checker_no_delete(self): self.setUpScraperJSDefaultChecker() self.scraper.checker_x_path_result = u'Event JS not found' self.scraper.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_activated_scrapyjs_checker_delete(self): self.setUpScraperJSDockerChecker() self.scraper.checker_x_path_result = u'Event JS not found' self.scraper.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_activated_scrapyjs_checker_no_delete(self): self.setUpScraperJSDockerChecker() self.scraper.checker_x_path_result = u'Event not found' self.scraper.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1)
class ScraperJSONRunTest(ScraperTest): def setUpScraperJSONDefaultScraper(self): self.se_base.x_path = 'response.events' self.se_base.save() self.se_title.x_path = 'title' self.se_title.save() self.se_url.x_path = 'url' self.se_url.save() self.se_desc.x_path = 'description' self.se_desc.request_page_type = 'MP' self.se_desc.save() self.se_es_1.x_path = 'title' self.se_es_1.save() self.rpt_mp.content_type = 'J' self.rpt_mp.save() self.event_website.url = os.path.join( self.SERVER_URL, 'site_with_json_content_type/event_main.json') self.event_website.save() def extraSetUpHTMLChecker(self): self.checker = Checker() self.checker.scraped_obj_attr = self.soa_url self.checker.scraper = self.scraper self.checker.checker_type = 'X' self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()' self.checker.checker_x_path_result = 'Event not found!' self.checker.checker_ref_url = 'http://*****:*****@class="description"]/text()' self.se_desc.request_page_type = 'DP1' self.se_desc.save() self.run_event_spider(1) #logging.info(unicode(Event.objects.all())) self.assertEqual( len( Event.objects.filter( description='Event Detail Page 1 Description')), 1) def test_detail_page_json(self): self.setUpScraperJSONDefaultScraper() self.rpt_dp1.content_type = 'J' self.rpt_dp1.save() self.se_url.x_path = 'json_url' self.se_url.save() self.se_desc.x_path = 'event_details.description' self.se_desc.request_page_type = 'DP1' self.se_desc.save() self.run_event_spider(1) #logging.info(unicode(Event.objects.all())) self.assertEqual( len( Event.objects.filter( description='Event Detail Page 1 Description')), 1) def test_multiple_detail_pages(self): self.setUpScraperJSONDefaultScraper() self.se_desc.x_path = '//div/div[@class="description2"]/text()' self.se_desc.request_page_type = 'DP1' self.se_desc.save() self.soa_url.id_field = False self.soa_url.save_to_db = False self.soa_url.save() self.soa_url2.save_to_db = False self.soa_url2.save() self.rpt_dp2 = RequestPageType(page_type='DP2', scraper=self.scraper, scraped_obj_attr=self.soa_url2, content_type='J') self.rpt_dp2.save() self.se_url2 = ScraperElem(scraped_obj_attr=self.soa_url2, scraper=self.scraper, x_path='json_url', request_page_type='MP') self.se_url2.save() self.se_desc2 = ScraperElem(scraped_obj_attr=self.soa_desc2, scraper=self.scraper, x_path='event_details.description2', request_page_type='DP2', mandatory=False) self.se_desc2.save() self.run_event_spider(1) #logging.info(unicode(Event.objects.all())) events = Event.objects.filter( title='Event 1', #url='http://*****:*****@class="this_is_the_wrong_xpath"]/div/text()' self.checker.save() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_json_checker_x_path_type_x_path_delete(self): self.setUpScraperJSONDefaultScraper() self.extraSetUpJSONChecker() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_json_checker_x_path_type_x_path_no_delete(self): self.setUpScraperJSONDefaultScraper() self.extraSetUpJSONChecker() self.checker.checker_x_path = 'this_is_the_wrong_xpath' self.checker.save() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1)
class CheckerRunTest(ScraperTest): def setUp(self): super(CheckerRunTest, self).setUp() self.checker = Checker() self.checker.scraped_obj_attr = self.soa_url self.checker.scraper = self.scraper self.checker.checker_type = 'X' self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()' self.checker.checker_x_path_result = 'Event was deleted!' self.checker.checker_ref_url = 'http://*****:*****@class="event_not_found"]/div/text()' self.checker2.checker_x_path_result = 'Event was deleted!' self.checker2.checker_ref_url = 'http://localhost:8010/static/site_for_checker/event_not_found.html' self.checker2.save() @unittest.skip( "Skipped, CloseSpider not visible in test anymore after having reworked settings initialization" ) def test_no_checker(self): self.checker.delete() self.assertRaises(CloseSpider, self.run_event_checker, 1) def test_x_path_type_keep(self): self.event.url = 'http://*****:*****@class="oh_my_wrong_xpath_for_delete"]/div/text()' self.checker.save() self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_x_path_type_blank_result_field_x_path_delete(self): self.scraper.checker_x_path_result = '' self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def _create_imgs_in_dirs(self, img_dirs): img_paths = [] file_name = 'event_image_{rnd}.jpg'.format( rnd=str(random.randint(0, 1000000))) self.event.description = file_name self.event.save() for img_dir in img_dirs: path = os.path.join(self.PROJECT_ROOT, img_dir, file_name) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) if not os.path.exists(path): file = open(path, "w") file.write('Let\s assume this is an image!') file.close() img_paths.append(path) return img_paths def _run_img_test_with_dirs(self, img_dirs): img_paths = self._create_imgs_in_dirs(img_dirs) self.se_desc.mandatory = True self.se_desc.save() self.soa_desc.attr_type = 'I' self.soa_desc.save() self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() for path in img_paths: self.assertTrue(os.path.exists(path)) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) for path in img_paths: self.assertFalse(os.path.exists(path)) def test_delete_with_img_flat_no_thumbs(self): img_dirs = [ 'imgs/', ] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_flat_with_thumbs(self): img_dirs = [ 'imgs/', ] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_all_no_thumbs(self): img_dirs = [ 'imgs/full/', ] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_all_with_thumbs(self): img_dirs = [ 'imgs/full/', 'imgs/thumbs/medium/', 'imgs/thumbs/small/', ] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_thumbs_with_thumbs(self): img_dirs = [ 'imgs/thumbs/medium/', 'imgs/thumbs/small/', ] self._run_img_test_with_dirs(img_dirs) def test_404_type_404_delete(self): self.checker.checker_type = '4' self.checker.save() self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_404_type_x_path_delete(self): self.checker.checker_type = '4' self.checker.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) @unittest.skip( "Skipped, CloseSpider can't be catched from within test env, other option: direct access to Scrapy log strings." ) def test_checker_test_wrong_checker_config(self): self.checker.checker_ref_url = '' self.checker.save() self.assertRaises(CloseSpider, self.run_checker_test(1))
class ScraperJSRunTest(ScraperTest): def setUpScraperJSDefaultScraper(self): self.event_website.url = os.path.join('http://*****:*****@class="event_not_found"]/div/text()' self.scraper.checker_ref_url = u'%ssite_with_js/event_not_found.html' % path self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event(title='Event 1', event_website=self.event_website, description='Event 1 description', url='%ssite_with_js/event_not_found.html' % path, checker_runtime=scheduler_rt) self.event.save() def setUpScraperJSDefaultChecker(self): self.setUpScraperJSChecker('http://localhost:8010/static/') def setUpScraperJSDockerChecker(self): self.setUpScraperJSChecker('http://10.0.2.2:8010/static/') self.scraper.render_javascript = True self.scraper.save() def test_default_no_scrapyjs_main_page(self): self.setUpScraperJSDefaultScraper() self.run_event_spider(1) self.assertEqual(len(Event.objects.all()), 1) def test_default_no_scrapyjs_detail_page(self): self.setUpScraperJSDefaultScraper() self.run_event_spider(1) self.assertEqual( len(Event.objects.filter(description='Event 1 description')), 1) def test_activated_scrapyjs_main_page(self): self.setUpScraperJSDockerScraper() self.run_event_spider(1) self.assertEqual(len(Event.objects.all()), 2) def test_activated_scrapyjs_detail_page(self): self.setUpScraperJSDockerScraper() self.run_event_spider(1) self.assertEqual( len(Event.objects.filter(description='Event 1 JS description')), 1) def test_default_no_scrapyjs_checker_delete(self): self.setUpScraperJSDefaultChecker() self.scraper.checker_x_path_result = u'Event not found' self.scraper.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_default_no_scrapyjs_checker_no_delete(self): self.setUpScraperJSDefaultChecker() self.scraper.checker_x_path_result = u'Event JS not found' self.scraper.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_activated_scrapyjs_checker_delete(self): self.setUpScraperJSDockerChecker() self.scraper.checker_x_path_result = u'Event JS not found' self.scraper.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_activated_scrapyjs_checker_no_delete(self): self.setUpScraperJSDockerChecker() self.scraper.checker_x_path_result = u'Event not found' self.scraper.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1)
class ScraperJSONRunTest(ScraperTest): def setUpScraperJSONDefaultScraper(self): self.se_base.x_path = 'response.events' self.se_base.save() self.se_title.x_path = 'title' self.se_title.save() self.se_url.x_path = 'url' self.se_url.save() self.se_desc.x_path = 'description' self.se_desc.request_page_type = 'MP' self.se_desc.save() self.se_es_1.x_path = 'title' self.se_es_1.save() self.rpt_mp.content_type = 'J' self.rpt_mp.save() self.event_website.url = os.path.join(self.SERVER_URL, 'site_with_json_content_type/event_main.json') self.event_website.save() def extraSetUpHTMLChecker(self): self.checker = Checker() self.checker.scraped_obj_attr = self.soa_url self.checker.scraper = self.scraper self.checker.checker_type = 'X' self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()' self.checker.checker_x_path_result = 'Event not found!' self.checker.checker_ref_url = 'http://*****:*****@class="description"]/text()' self.se_desc.request_page_type = 'DP1' self.se_desc.save() self.run_event_spider(1) #logging.info(unicode(Event.objects.all())) self.assertEqual(len(Event.objects.filter(description='Event Detail Page 1 Description')), 1) def test_detail_page_json(self): self.setUpScraperJSONDefaultScraper() self.rpt_dp1.content_type = 'J' self.rpt_dp1.save() self.se_url.x_path = 'json_url' self.se_url.save() self.se_desc.x_path = 'event_details.description' self.se_desc.request_page_type = 'DP1' self.se_desc.save() self.run_event_spider(1) #logging.info(unicode(Event.objects.all())) self.assertEqual(len(Event.objects.filter(description='Event Detail Page 1 Description')), 1) def test_multiple_detail_pages(self): self.setUpScraperJSONDefaultScraper() self.se_desc.x_path = '//div/div[@class="description2"]/text()' self.se_desc.request_page_type = 'DP1' self.se_desc.save() self.soa_url.id_field = False self.soa_url.save_to_db = False self.soa_url.save() self.soa_url2.save_to_db = False self.soa_url2.save() self.rpt_dp2 = RequestPageType(page_type='DP2', scraper=self.scraper, scraped_obj_attr=self.soa_url2, content_type='J') self.rpt_dp2.save() self.se_url2 = ScraperElem(scraped_obj_attr=self.soa_url2, scraper=self.scraper, x_path='json_url', request_page_type='MP') self.se_url2.save() self.se_desc2 = ScraperElem(scraped_obj_attr=self.soa_desc2, scraper=self.scraper, x_path='event_details.description2', request_page_type='DP2', mandatory=False) self.se_desc2.save() self.run_event_spider(1) #logging.info(unicode(Event.objects.all())) events = Event.objects.filter( title='Event 1', #url='http://*****:*****@class="this_is_the_wrong_xpath"]/div/text()' self.checker.save() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_json_checker_x_path_type_x_path_delete(self): self.setUpScraperJSONDefaultScraper() self.extraSetUpJSONChecker() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_json_checker_x_path_type_x_path_no_delete(self): self.setUpScraperJSONDefaultScraper() self.extraSetUpJSONChecker() self.checker.checker_x_path = 'this_is_the_wrong_xpath' self.checker.save() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1)
class ScraperJSONRunTest(ScraperTest): def setUpScraperJSONDefaultScraper(self): self.se_base.x_path = u'response.events' self.se_base.save() self.se_title.x_path = u'title' self.se_title.save() self.se_url.x_path = u'url' self.se_url.save() self.se_desc.x_path = u'description' self.se_desc.from_detail_page = False self.se_desc.save() self.scraper.content_type = 'J' self.scraper.save() self.event_website.url = os.path.join(self.SERVER_URL, 'site_with_json_content_type/event_main.json') self.event_website.save() def extraSetUpHTMLChecker(self): self.scraper.checker_type = 'X' self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_x_path_result = u'Event not found!' self.scraper.checker_ref_url = u'http://*****:*****@class="description"]/text()' self.se_desc.from_detail_page = True self.se_desc.save() self.run_event_spider(1) #log.msg(unicode(Event.objects.all()), level=log.INFO) self.assertEqual(len(Event.objects.filter(description='Event Detail Page 1 Description')), 1) def test_detail_page_json(self): self.setUpScraperJSONDefaultScraper() self.scraper.detail_page_content_type = 'J' self.scraper.save() self.se_url.x_path = u'json_url' self.se_url.save() self.se_desc.x_path = u'event_details.description' self.se_desc.from_detail_page = True self.se_desc.save() self.run_event_spider(1) #log.msg(unicode(Event.objects.all()), level=log.INFO) self.assertEqual(len(Event.objects.filter(description='Event Detail Page 1 Description')), 1) def test_checker_x_path_type_x_path_delete(self): self.setUpScraperJSONDefaultScraper() self.extraSetUpHTMLChecker() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_checker_x_path_type_x_path_no_delete(self): self.setUpScraperJSONDefaultScraper() self.extraSetUpHTMLChecker() self.scraper.checker_x_path = u'//div[@class="this_is_the_wrong_xpath"]/div/text()' self.scraper.save() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_json_checker_x_path_type_x_path_delete(self): self.setUpScraperJSONDefaultScraper() self.extraSetUpJSONChecker() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_json_checker_x_path_type_x_path_no_delete(self): self.setUpScraperJSONDefaultScraper() self.extraSetUpJSONChecker() self.scraper.checker_x_path = u'this_is_the_wrong_xpath' self.scraper.save() self.assertEqual(len(Event.objects.all()), 1) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1)
class CheckerRunTest(ScraperTest): def setUp(self): super(CheckerRunTest, self).setUp() self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_x_path_result = u'Event was deleted!' self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event(title='Event 1', event_website=self.event_website, description='Event 1 description', url='http://localhost:8010/static/site_for_checker/event1.html', checker_runtime=scheduler_rt) self.event.save() def test_keep_video(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_404_delete(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_404_delete_with_zero_actions(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() self.event.checker_runtime.num_zero_actions = 3 self.event.checker_runtime.save() kwargs = { 'id': 1, 'do_action': 'yes', 'run_type': 'TASK', } checker = EventChecker(**kwargs) self.crawler.crawl(checker) self.crawler.start() self.assertEqual(len(Event.objects.all()), 1) def test_x_path_delete(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_404_delete_with_img(self): path = os.path.join(self.PROJECT_ROOT, 'imgs/event_image.jpg') if not os.path.exists(path): file = open(path,"w") file.write('Let\s assume this is an image!') file.close() self.se_desc.mandatory = True self.se_desc.save() self.soa_desc.attr_type = 'I' self.soa_desc.save() self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.description = 'event_image.jpg' self.event.save() self.assertTrue(os.path.exists(path)) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) self.assertFalse(os.path.exists(path))
class CheckerRunTest(ScraperTest): def setUp(self): super(CheckerRunTest, self).setUp() self.scraper.checker_type = 'X' self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()' self.scraper.checker_x_path_result = u'Event was deleted!' self.scraper.checker_ref_url = u'http://localhost:8010/static/site_for_checker/event_not_found.html' self.scraper.save() scheduler_rt = SchedulerRuntime() scheduler_rt.save() self.event = Event( title='Event 1', event_website=self.event_website, description='Event 1 description', url='http://localhost:8010/static/site_for_checker/event1.html', checker_runtime=scheduler_rt) self.event.save() def test_checker_test_wrong_checker_config(self): self.scraper.checker_ref_url = '' self.scraper.save() self.assertRaises(CloseSpider, self.run_checker_test, 1) def test_none_type(self): self.scraper.checker_type = 'N' self.scraper.save() self.assertRaises(CloseSpider, self.run_event_checker, 1) def test_x_path_type_keep_video(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_x_path_type_blank_result_field_keep_video(self): self.scraper.checker_x_path_result = '' self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1) def test_x_path_type_404_delete(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_x_path_type_404_delete_with_zero_actions(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() self.event.checker_runtime.num_zero_actions = 3 self.event.checker_runtime.save() kwargs = { 'id': 1, 'do_action': 'yes', 'run_type': 'TASK', } checker = EventChecker(**kwargs) self.crawler.crawl(checker) self.crawler.start() self.assertEqual(len(Event.objects.all()), 1) def test_x_path_type_x_path_delete(self): self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_x_path_type_blank_result_field_x_path_delete(self): self.scraper.checker_x_path_result = '' self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def _create_imgs_in_dirs(self, img_dirs): img_paths = [] for img_dir in img_dirs: path = os.path.join(self.PROJECT_ROOT, img_dir, 'event_image.jpg') if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) if not os.path.exists(path): file = open(path, "w") file.write('Let\s assume this is an image!') file.close() img_paths.append(path) return img_paths def _run_img_test_with_dirs(self, img_dirs): img_paths = self._create_imgs_in_dirs(img_dirs) self.se_desc.mandatory = True self.se_desc.save() self.soa_desc.attr_type = 'I' self.soa_desc.save() self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.description = 'event_image.jpg' self.event.save() for path in img_paths: self.assertTrue(os.path.exists(path)) self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) for path in img_paths: self.assertFalse(os.path.exists(path)) def test_delete_with_img_flat_no_thumbs(self): img_dirs = [ 'imgs/', ] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_flat_with_thumbs(self): img_dirs = [ 'imgs/', ] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_all_no_thumbs(self): img_dirs = [ 'imgs/full/', ] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_all_with_thumbs(self): img_dirs = [ 'imgs/full/', 'imgs/thumbs/medium/', 'imgs/thumbs/small/', ] self._run_img_test_with_dirs(img_dirs) def test_delete_with_img_thumbs_with_thumbs(self): img_dirs = [ 'imgs/thumbs/medium/', 'imgs/thumbs/small/', ] self._run_img_test_with_dirs(img_dirs) def test_404_type_404_delete(self): self.scraper.checker_type = '4' self.scraper.save() self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html' self.event.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 0) def test_404_type_x_path_delete(self): self.scraper.checker_type = '4' self.scraper.save() self.run_event_checker(1) self.assertEqual(len(Event.objects.all()), 1)