def setUpWithSecondChecker(self):
     self.checker2 = Checker()
     self.checker2.scraped_obj_attr = self.soa_url
     self.checker2.scraper = self.scraper
     self.checker2.checker_type = 'X'
     self.checker2.checker_x_path = '//div[@class="event_not_found"]/div/text()'
     self.checker2.checker_x_path_result = 'Event was deleted!'
     self.checker2.checker_ref_url = 'http://localhost:8010/static/site_for_checker/event_not_found.html'
     self.checker2.save()
 def setUpWithSecondChecker(self):
     self.checker2 = Checker()
     self.checker2.scraped_obj_attr = self.soa_url
     self.checker2.scraper = self.scraper
     self.checker2.checker_type = 'X'
     self.checker2.checker_x_path = '//div[@class="event_not_found"]/div/text()'
     self.checker2.checker_x_path_result = 'Event was deleted!'
     self.checker2.checker_ref_url = 'http://localhost:8010/static/site_for_checker/event_not_found.html'
     self.checker2.save()
예제 #3
0
 def setUpScraperJSChecker(self, path):
     super(ScraperJSRunTest, self).setUp()
     
     self.checker = Checker()
     self.checker.scraped_obj_attr = self.soa_url
     self.checker.scraper = self.scraper
     self.checker.checker_type = 'X'
     self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
     self.checker.checker_ref_url = path + 'site_with_js/event_not_found.html'
     self.checker.save()
     
     scheduler_rt = SchedulerRuntime()
     scheduler_rt.save()
     
     self.event = Event(title='Event 1', event_website=self.event_website,
         description='Event 1 description', 
         url=path + 'site_with_js/event_not_found.html',
         checker_runtime=scheduler_rt)
     self.event.save()
예제 #4
0
    def extraSetUpHTMLChecker(self):
        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = 'X'
        self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = 'Event not found!'
        self.checker.checker_ref_url = 'http://localhost:8010/static/site_with_json_content_type/event_not_found.html'
        self.checker.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title='Event 1',
            event_website=self.event_website,
            description='Event 1 description',
            url=
            'http://localhost:8010/static/site_with_json_content_type/event_not_found.html',
            checker_runtime=scheduler_rt)
        self.event.save()
    def setUp(self):
        super(CheckerRunTest, self).setUp()

        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = 'X'
        self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = 'Event was deleted!'
        self.checker.checker_ref_url = 'http://localhost:8010/static/site_for_checker/event_not_found.html'
        self.checker.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title='Event 1',
            event_website=self.event_website,
            description='Event 1 description',
            url='http://localhost:8010/static/site_for_checker/event1.html',
            checker_runtime=scheduler_rt)
        self.event.save()
예제 #6
0
    def extraSetUpJSONChecker(self):
        self.rpt_dp1.content_type = 'J'
        self.rpt_dp1.save()

        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = 'X'
        self.checker.checker_x_path = 'event_not_found'
        self.checker.checker_x_path_result = 'Event not found!'
        self.checker.checker_ref_url = 'http://localhost:8010/static/site_with_json_content_type/event_not_found.json'
        self.checker.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title='Event 1',
            event_website=self.event_website,
            description='Event 1 description',
            url=
            'http://localhost:8010/static/site_with_json_content_type/event_not_found.json',
            checker_runtime=scheduler_rt)
        self.event.save()
 def extraSetUpHTMLChecker(self):
     self.checker = Checker()
     self.checker.scraped_obj_attr = self.soa_url
     self.checker.scraper = self.scraper
     self.checker.checker_type = 'X'
     self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
     self.checker.checker_x_path_result = 'Event not found!'
     self.checker.checker_ref_url = 'http://localhost:8010/static/site_with_json_content_type/event_not_found.html'
     self.checker.save()
     
     scheduler_rt = SchedulerRuntime()
     scheduler_rt.save()
     
     self.event = Event(title='Event 1', event_website=self.event_website,
         description='Event 1 description', 
         url='http://localhost:8010/static/site_with_json_content_type/event_not_found.html',
         checker_runtime=scheduler_rt)
     self.event.save()
 def setUpScraperJSChecker(self, path):
     super(ScraperJSRunTest, self).setUp()
     
     self.checker = Checker()
     self.checker.scraped_obj_attr = self.soa_url
     self.checker.scraper = self.scraper
     self.checker.checker_type = 'X'
     self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
     self.checker.checker_ref_url = path + 'site_with_js/event_not_found.html'
     self.checker.save()
     
     scheduler_rt = SchedulerRuntime()
     scheduler_rt.save()
     
     self.event = Event(title='Event 1', event_website=self.event_website,
         description='Event 1 description', 
         url=path + 'site_with_js/event_not_found.html',
         checker_runtime=scheduler_rt)
     self.event.save()
 def setUp(self):
     super(CheckerRunTest, self).setUp()
     
     self.checker = Checker()
     self.checker.scraped_obj_attr = self.soa_url
     self.checker.scraper = self.scraper
     self.checker.checker_type = 'X'
     self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
     self.checker.checker_x_path_result = 'Event was deleted!'
     self.checker.checker_ref_url = 'http://localhost:8010/static/site_for_checker/event_not_found.html'
     self.checker.save()
     
     scheduler_rt = SchedulerRuntime()
     scheduler_rt.save()
     
     self.event = Event(title='Event 1', event_website=self.event_website,
         description='Event 1 description', 
         url='http://localhost:8010/static/site_for_checker/event1.html',
         checker_runtime=scheduler_rt)
     self.event.save()
 def extraSetUpJSONChecker(self):
     self.rpt_dp1.content_type = 'J'
     self.rpt_dp1.save()
     
     self.checker = Checker()
     self.checker.scraped_obj_attr = self.soa_url
     self.checker.scraper = self.scraper
     self.checker.checker_type = 'X'
     self.checker.checker_x_path = 'event_not_found'
     self.checker.checker_x_path_result = 'Event not found!'
     self.checker.checker_ref_url = 'http://localhost:8010/static/site_with_json_content_type/event_not_found.json'
     self.checker.save()
     
     scheduler_rt = SchedulerRuntime()
     scheduler_rt.save()
     
     self.event = Event(title='Event 1', event_website=self.event_website,
         description='Event 1 description', 
         url='http://localhost:8010/static/site_with_json_content_type/event_not_found.json',
         checker_runtime=scheduler_rt)
     self.event.save()
    def extraSetUpJSONChecker(self):
        self.rpt_dp1.content_type = "J"
        self.rpt_dp1.save()

        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = "X"
        self.checker.checker_x_path = "event_not_found"
        self.checker.checker_x_path_result = "Event not found!"
        self.checker.checker_ref_url = "http://localhost:8010/static/site_with_json_content_type/event_not_found.json"
        self.checker.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title="Event 1",
            event_website=self.event_website,
            description="Event 1 description",
            url="http://localhost:8010/static/site_with_json_content_type/event_not_found.json",
            checker_runtime=scheduler_rt,
        )
        self.event.save()
class ScraperJSRunTest(ScraperTest):
    
    def setUpScraperJSDefaultScraper(self):
        self.event_website.url = os.path.join('http://*****:*****@class="event_not_found"]/div/text()'
        self.checker.checker_ref_url = path + 'site_with_js/event_not_found.html'
        self.checker.save()
        
        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()
        
        self.event = Event(title='Event 1', event_website=self.event_website,
            description='Event 1 description', 
            url=path + 'site_with_js/event_not_found.html',
            checker_runtime=scheduler_rt)
        self.event.save()

    def setUpScraperJSDefaultChecker(self):
        self.setUpScraperJSChecker('http://localhost:8010/static/')
    
    def setUpScraperJSDockerChecker(self):
        self.setUpScraperJSChecker(WITH_JS_URL)
        self.rpt_dp1.render_javascript = True
        self.rpt_dp1.save()
    


    def test_default_no_scrapyjs_main_page(self):
        self.setUpScraperJSDefaultScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_default_no_scrapyjs_detail_page(self):
        self.setUpScraperJSDefaultScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.filter(description='Event 1 description')), 1)

    def test_activated_scrapyjs_main_page(self):
        self.setUpScraperJSDockerScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.all()), 2)

    def test_activated_scrapyjs_detail_page(self):
        self.setUpScraperJSDockerScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.filter(description='Event 1 JS description')), 1)

    def test_only_main_page_scrapyjs_main_page(self):
        self.setUpScraperJSDockerScraper()
        self.event_website.url = os.path.join(WITH_JS_URL, 'site_with_js/event_main.html')
        self.event_website.save()
        self.rpt_dp1.render_javascript = False
        self.rpt_dp1.save()

        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.all()), 2)
        self.assertEqual(len(Event.objects.filter(description='Event 1 description')), 1)
        self.assertEqual(len(Event.objects.filter(description='Event 1 JS description')), 0)

    def test_default_no_scrapyjs_checker_delete(self):
        self.setUpScraperJSDefaultChecker()
        self.checker.checker_x_path_result = 'Event not found'
        self.checker.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_default_no_scrapyjs_checker_no_delete(self):
        self.setUpScraperJSDefaultChecker()
        self.checker.checker_x_path_result = 'Event JS not found'
        self.checker.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_activated_scrapyjs_checker_delete(self):
        self.setUpScraperJSDockerChecker()
        self.checker.checker_x_path_result = 'Event JS not found'
        self.checker.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_activated_scrapyjs_checker_no_delete(self):
        self.setUpScraperJSDockerChecker()
        self.checker.checker_x_path_result = 'Event not found'
        self.checker.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
예제 #13
0
class ScraperJSONRunTest(ScraperTest):
    def setUpScraperJSONDefaultScraper(self):
        self.se_base.x_path = 'response.events'
        self.se_base.save()
        self.se_title.x_path = 'title'
        self.se_title.save()
        self.se_url.x_path = 'url'
        self.se_url.save()
        self.se_desc.x_path = 'description'
        self.se_desc.request_page_type = 'MP'
        self.se_desc.save()
        self.se_es_1.x_path = 'title'
        self.se_es_1.save()

        self.rpt_mp.content_type = 'J'
        self.rpt_mp.save()

        self.event_website.url = os.path.join(
            self.SERVER_URL, 'site_with_json_content_type/event_main.json')
        self.event_website.save()

    def extraSetUpHTMLChecker(self):
        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = 'X'
        self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = 'Event not found!'
        self.checker.checker_ref_url = 'http://*****:*****@class="description"]/text()'
        self.se_desc.request_page_type = 'DP1'
        self.se_desc.save()
        self.run_event_spider(1)
        #logging.info(unicode(Event.objects.all()))
        self.assertEqual(
            len(
                Event.objects.filter(
                    description='Event Detail Page 1 Description')), 1)

    def test_detail_page_json(self):
        self.setUpScraperJSONDefaultScraper()
        self.rpt_dp1.content_type = 'J'
        self.rpt_dp1.save()
        self.se_url.x_path = 'json_url'
        self.se_url.save()
        self.se_desc.x_path = 'event_details.description'
        self.se_desc.request_page_type = 'DP1'
        self.se_desc.save()
        self.run_event_spider(1)
        #logging.info(unicode(Event.objects.all()))
        self.assertEqual(
            len(
                Event.objects.filter(
                    description='Event Detail Page 1 Description')), 1)

    def test_multiple_detail_pages(self):
        self.setUpScraperJSONDefaultScraper()
        self.se_desc.x_path = '//div/div[@class="description2"]/text()'
        self.se_desc.request_page_type = 'DP1'
        self.se_desc.save()

        self.soa_url.id_field = False
        self.soa_url.save_to_db = False
        self.soa_url.save()

        self.soa_url2.save_to_db = False
        self.soa_url2.save()

        self.rpt_dp2 = RequestPageType(page_type='DP2',
                                       scraper=self.scraper,
                                       scraped_obj_attr=self.soa_url2,
                                       content_type='J')
        self.rpt_dp2.save()

        self.se_url2 = ScraperElem(scraped_obj_attr=self.soa_url2,
                                   scraper=self.scraper,
                                   x_path='json_url',
                                   request_page_type='MP')
        self.se_url2.save()

        self.se_desc2 = ScraperElem(scraped_obj_attr=self.soa_desc2,
                                    scraper=self.scraper,
                                    x_path='event_details.description2',
                                    request_page_type='DP2',
                                    mandatory=False)
        self.se_desc2.save()

        self.run_event_spider(1)
        #logging.info(unicode(Event.objects.all()))
        events = Event.objects.filter(
            title='Event 1',
            #url='http://*****:*****@class="this_is_the_wrong_xpath"]/div/text()'
        self.checker.save()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_json_checker_x_path_type_x_path_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpJSONChecker()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_json_checker_x_path_type_x_path_no_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpJSONChecker()
        self.checker.checker_x_path = 'this_is_the_wrong_xpath'
        self.checker.save()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
예제 #14
0
class ScraperJSRunTest(ScraperTest):
    
    def setUpScraperJSDefaultScraper(self):
        self.event_website.url = os.path.join('http://*****:*****@class="event_not_found"]/div/text()'
        self.checker.checker_ref_url = path + 'site_with_js/event_not_found.html'
        self.checker.save()
        
        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()
        
        self.event = Event(title='Event 1', event_website=self.event_website,
            description='Event 1 description', 
            url=path + 'site_with_js/event_not_found.html',
            checker_runtime=scheduler_rt)
        self.event.save()

    def setUpScraperJSDefaultChecker(self):
        self.setUpScraperJSChecker('http://localhost:8010/static/')
    
    def setUpScraperJSDockerChecker(self):
        self.setUpScraperJSChecker('http://10.0.2.2:8010/static/')
        self.rpt_dp1.render_javascript = True
        self.rpt_dp1.save()
    


    def test_default_no_scrapyjs_main_page(self):
        self.setUpScraperJSDefaultScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_default_no_scrapyjs_detail_page(self):
        self.setUpScraperJSDefaultScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.filter(description='Event 1 description')), 1)

    def test_activated_scrapyjs_main_page(self):
        self.setUpScraperJSDockerScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.all()), 2)

    def test_activated_scrapyjs_detail_page(self):
        self.setUpScraperJSDockerScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.filter(description='Event 1 JS description')), 1)

    def test_only_main_page_scrapyjs_main_page(self):
        self.setUpScraperJSDockerScraper()
        self.event_website.url = os.path.join('http://10.0.2.2:8010/static/', 'site_with_js/event_main.html')
        self.event_website.save()
        self.rpt_dp1.render_javascript = False
        self.rpt_dp1.save()

        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.all()), 2)
        self.assertEqual(len(Event.objects.filter(description='Event 1 description')), 1)
        self.assertEqual(len(Event.objects.filter(description='Event 1 JS description')), 0)

    def test_default_no_scrapyjs_checker_delete(self):
        self.setUpScraperJSDefaultChecker()
        self.checker.checker_x_path_result = 'Event not found'
        self.checker.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_default_no_scrapyjs_checker_no_delete(self):
        self.setUpScraperJSDefaultChecker()
        self.checker.checker_x_path_result = 'Event JS not found'
        self.checker.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_activated_scrapyjs_checker_delete(self):
        self.setUpScraperJSDockerChecker()
        self.checker.checker_x_path_result = 'Event JS not found'
        self.checker.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_activated_scrapyjs_checker_no_delete(self):
        self.setUpScraperJSDockerChecker()
        self.checker.checker_x_path_result = 'Event not found'
        self.checker.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
예제 #15
0
class CheckerRunTest(ScraperTest):
    
    def setUp(self):
        super(CheckerRunTest, self).setUp()
        
        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = 'X'
        self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = 'Event was deleted!'
        self.checker.checker_ref_url = 'http://*****:*****@class="event_not_found"]/div/text()'
        self.checker2.checker_x_path_result = 'Event was deleted!'
        self.checker2.checker_ref_url = 'http://*****:*****@class="oh_my_wrong_xpath_for_delete"]/div/text()'
        self.checker.save()
        self.event.url = 'http://*****:*****@unittest.skip("Skipped, CloseSpider can't be catched from within test env, other option: direct access to Scrapy log strings.")
    def test_checker_test_wrong_checker_config(self):
        self.checker.checker_ref_url = ''
        self.checker.save()
        
        self.assertRaises(CloseSpider, self.run_checker_test(1))
class CheckerRunTest(ScraperTest):
    def setUp(self):
        super(CheckerRunTest, self).setUp()

        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = "X"
        self.checker.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = u"Event was deleted!"
        self.checker.checker_ref_url = u"http://*****:*****@class="event_not_found"]/div/text()'
        self.checker2.checker_x_path_result = u"Event was deleted!"
        self.checker2.checker_ref_url = u"http://*****:*****@class="oh_my_wrong_xpath_for_delete"]/div/text()'
        self.checker.save()
        self.event.url = "http://localhost:8010/static/site_for_checker/event2.html"
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_x_path_type_blank_result_field_x_path_delete(self):
        self.scraper.checker_x_path_result = ""
        self.event.url = "http://localhost:8010/static/site_for_checker/event2.html"
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def _create_imgs_in_dirs(self, img_dirs):
        img_paths = []
        for img_dir in img_dirs:
            path = os.path.join(self.PROJECT_ROOT, img_dir, "event_image.jpg")
            if not os.path.exists(os.path.dirname(path)):
                os.makedirs(os.path.dirname(path))
            if not os.path.exists(path):
                file = open(path, "w")
                file.write("Let\s assume this is an image!")
                file.close()
            img_paths.append(path)
        return img_paths

    def _run_img_test_with_dirs(self, img_dirs):
        img_paths = self._create_imgs_in_dirs(img_dirs)

        self.se_desc.mandatory = True
        self.se_desc.save()
        self.soa_desc.attr_type = "I"
        self.soa_desc.save()

        self.event.url = "http://localhost:8010/static/site_for_checker/event_which_is_not_there.html"
        self.event.description = "event_image.jpg"
        self.event.save()

        for path in img_paths:
            self.assertTrue(os.path.exists(path))
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
        for path in img_paths:
            self.assertFalse(os.path.exists(path))

    def test_delete_with_img_flat_no_thumbs(self):
        img_dirs = ["imgs/"]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_flat_with_thumbs(self):
        img_dirs = ["imgs/"]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_all_no_thumbs(self):
        img_dirs = ["imgs/full/"]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_all_with_thumbs(self):
        img_dirs = ["imgs/full/", "imgs/thumbs/medium/", "imgs/thumbs/small/"]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_thumbs_with_thumbs(self):
        img_dirs = ["imgs/thumbs/medium/", "imgs/thumbs/small/"]
        self._run_img_test_with_dirs(img_dirs)

    def test_404_type_404_delete(self):
        self.checker.checker_type = "4"
        self.checker.save()
        self.event.url = "http://localhost:8010/static/site_for_checker/event_which_is_not_there.html"
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_404_type_x_path_delete(self):
        self.checker.checker_type = "4"
        self.checker.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    @unittest.skip(
        "Skipped, CloseSpider can't be catched from within test env, other option: direct access to Scrapy log strings."
    )
    def test_checker_test_wrong_checker_config(self):
        self.checker.checker_ref_url = ""
        self.checker.save()

        self.assertRaises(CloseSpider, self.run_checker_test(1))
class ScraperJSONRunTest(ScraperTest):

    def setUpScraperJSONDefaultScraper(self):
        self.se_base.x_path = 'response.events'
        self.se_base.save()
        self.se_title.x_path = 'title'
        self.se_title.save()
        self.se_url.x_path = 'url'
        self.se_url.save()
        self.se_desc.x_path = 'description'
        self.se_desc.request_page_type = 'MP'
        self.se_desc.save()
        self.se_es_1.x_path = 'title'
        self.se_es_1.save()

        self.rpt_mp.content_type = 'J'
        self.rpt_mp.save()

        self.event_website.url = os.path.join(self.SERVER_URL, 'site_with_json_content_type/event_main.json')
        self.event_website.save()

    def extraSetUpHTMLChecker(self):
        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = 'X'
        self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = 'Event not found!'
        self.checker.checker_ref_url = 'http://*****:*****@class="description"]/text()'
        self.se_desc.request_page_type = 'DP1'
        self.se_desc.save()
        self.run_event_spider(1)
        #logging.info(unicode(Event.objects.all()))
        self.assertEqual(len(Event.objects.filter(description='Event Detail Page 1 Description')), 1)


    def test_detail_page_json(self):
        self.setUpScraperJSONDefaultScraper()
        self.rpt_dp1.content_type = 'J'
        self.rpt_dp1.save()
        self.se_url.x_path = 'json_url'
        self.se_url.save()
        self.se_desc.x_path = 'event_details.description'
        self.se_desc.request_page_type = 'DP1'
        self.se_desc.save()
        self.run_event_spider(1)
        #logging.info(unicode(Event.objects.all()))
        self.assertEqual(len(Event.objects.filter(description='Event Detail Page 1 Description')), 1)


    def test_multiple_detail_pages(self):
        self.setUpScraperJSONDefaultScraper()
        self.se_desc.x_path = '//div/div[@class="description2"]/text()'
        self.se_desc.request_page_type = 'DP1'
        self.se_desc.save()

        self.soa_url.id_field = False
        self.soa_url.save_to_db = False
        self.soa_url.save()

        self.soa_url2.save_to_db = False
        self.soa_url2.save()

        self.rpt_dp2 = RequestPageType(page_type='DP2', scraper=self.scraper, scraped_obj_attr=self.soa_url2, content_type='J')
        self.rpt_dp2.save()
        
        self.se_url2 = ScraperElem(scraped_obj_attr=self.soa_url2, scraper=self.scraper, 
            x_path='json_url', request_page_type='MP')
        self.se_url2.save()
        
        self.se_desc2 = ScraperElem(scraped_obj_attr=self.soa_desc2, scraper=self.scraper, 
            x_path='event_details.description2', request_page_type='DP2', mandatory=False)
        self.se_desc2.save()
        

        self.run_event_spider(1)
        #logging.info(unicode(Event.objects.all()))
        events = Event.objects.filter(
            title='Event 1',
            #url='http://*****:*****@class="this_is_the_wrong_xpath"]/div/text()'
        self.checker.save()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)


    def test_json_checker_x_path_type_x_path_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpJSONChecker()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)


    def test_json_checker_x_path_type_x_path_no_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpJSONChecker()
        self.checker.checker_x_path = 'this_is_the_wrong_xpath'
        self.checker.save()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
class CheckerRunTest(ScraperTest):
    def setUp(self):
        super(CheckerRunTest, self).setUp()

        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = 'X'
        self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = 'Event was deleted!'
        self.checker.checker_ref_url = 'http://*****:*****@class="event_not_found"]/div/text()'
        self.checker2.checker_x_path_result = 'Event was deleted!'
        self.checker2.checker_ref_url = 'http://localhost:8010/static/site_for_checker/event_not_found.html'
        self.checker2.save()

    @unittest.skip(
        "Skipped, CloseSpider not visible in test anymore after having reworked settings initialization"
    )
    def test_no_checker(self):
        self.checker.delete()
        self.assertRaises(CloseSpider, self.run_event_checker, 1)

    def test_x_path_type_keep(self):
        self.event.url = 'http://*****:*****@class="oh_my_wrong_xpath_for_delete"]/div/text()'
        self.checker.save()
        self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_x_path_type_blank_result_field_x_path_delete(self):
        self.scraper.checker_x_path_result = ''
        self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def _create_imgs_in_dirs(self, img_dirs):
        img_paths = []
        file_name = 'event_image_{rnd}.jpg'.format(
            rnd=str(random.randint(0, 1000000)))
        self.event.description = file_name
        self.event.save()
        for img_dir in img_dirs:
            path = os.path.join(self.PROJECT_ROOT, img_dir, file_name)
            if not os.path.exists(os.path.dirname(path)):
                os.makedirs(os.path.dirname(path))
            if not os.path.exists(path):
                file = open(path, "w")
                file.write('Let\s assume this is an image!')
                file.close()
            img_paths.append(path)
        return img_paths

    def _run_img_test_with_dirs(self, img_dirs):
        img_paths = self._create_imgs_in_dirs(img_dirs)

        self.se_desc.mandatory = True
        self.se_desc.save()
        self.soa_desc.attr_type = 'I'
        self.soa_desc.save()

        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()

        for path in img_paths:
            self.assertTrue(os.path.exists(path))
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
        for path in img_paths:
            self.assertFalse(os.path.exists(path))

    def test_delete_with_img_flat_no_thumbs(self):
        img_dirs = [
            'imgs/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_flat_with_thumbs(self):
        img_dirs = [
            'imgs/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_all_no_thumbs(self):
        img_dirs = [
            'imgs/full/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_all_with_thumbs(self):
        img_dirs = [
            'imgs/full/',
            'imgs/thumbs/medium/',
            'imgs/thumbs/small/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_thumbs_with_thumbs(self):
        img_dirs = [
            'imgs/thumbs/medium/',
            'imgs/thumbs/small/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_404_type_404_delete(self):
        self.checker.checker_type = '4'
        self.checker.save()
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_404_type_x_path_delete(self):
        self.checker.checker_type = '4'
        self.checker.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    @unittest.skip(
        "Skipped, CloseSpider can't be catched from within test env, other option: direct access to Scrapy log strings."
    )
    def test_checker_test_wrong_checker_config(self):
        self.checker.checker_ref_url = ''
        self.checker.save()

        self.assertRaises(CloseSpider, self.run_checker_test(1))