def test_double(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title=u'Event 1', url=u'http://localhost:8010/static/site_generic/event1.html',
         checker_runtime=checker_rt)
     event.save()
     self.run_event_spider(1)
     
     self.assertEqual(len(Event.objects.all()), 4)
     self.assertEqual(len(Event.objects.filter(title='Event 1')), 1)
 def test_double_standard_id_field(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title=u'Event 1', event_website=self.event_website,
         description=u'Event 1 description',
         url=u'http://localhost:8010/static/site_generic/event5.html',
         checker_runtime=checker_rt)
     event.save()
     event = Event(title=u'Event 2', event_website=self.event_website,
         description=u'Event 1 description',
         url=u'http://localhost:8010/static/site_generic/event6.html',
         checker_runtime=checker_rt)
     event.save()
     event = Event(title=u'Event 1', event_website=self.event_website,
         description=u'Event 2 description',
         url=u'http://localhost:8010/static/site_generic/event7.html',
         checker_runtime=checker_rt)
     event.save()
     self.soa_url.id_field = False
     self.soa_url.save()
     self.soa_title.id_field = True
     self.soa_title.save()
     self.soa_desc.id_field = True
     self.soa_desc.save()
     self.run_event_spider(1)
     
     self.assertEqual(len(Event.objects.all()), 6)
     self.assertEqual(Event.objects.filter(description='Event 1 description').count(), 2)
 def test_double(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title=u'Event 1', event_website=self.event_website, 
         url=u'http://localhost:8010/static/site_generic/event1.html',
         checker_runtime=checker_rt)
     event.save()
     self.run_event_spider(1)
     
     self.assertEqual(len(Event.objects.all()), 4)
     self.assertEqual(len(Event.objects.filter(title='Event 1')), 1)
 def test_detail_page_url_id_field(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title=u'Event 1', event_website=self.event_website, 
         url=u'http://localhost:8010/static/site_generic/event5.html',
         checker_runtime=checker_rt)
     event.save()
     self.run_event_spider(1)
     
     self.assertEqual(len(Event.objects.all()), 5)
     self.assertEqual(Event.objects.filter(title='Event 1').count(), 2)
 def test_standard_update_field_update(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title=u'Event 1 - Old Title', event_website=self.event_website, 
         url=u'http://localhost:8010/static/site_generic/event1.html',
         checker_runtime=checker_rt)
     event.save()
     self.soa_title.attr_type = 'T'
     self.soa_title.save()
     
     self.run_event_spider(1)
     
     event_updated = Event.objects.get(pk=event.id)
     self.assertEqual(event_updated.title, 'Event 1')
     self.assertEqual(len(Event.objects.filter(title='Event 1 - Old Title')), 0)
 def test_single_standard_id_field(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title='Event 1', event_website=self.event_website, 
         url='http://localhost:8010/static/site_generic/event5.html',
         checker_runtime=checker_rt)
     event.save()
     self.soa_url.id_field = False
     self.soa_url.save()
     self.soa_title.id_field = True
     self.soa_title.save()
     self.run_event_spider(1)
     
     self.assertEqual(len(Event.objects.all()), 4)
     self.assertEqual(Event.objects.filter(title='Event 1').count(), 1)
 def test_single_standard_id_field(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title=u'Event 1', event_website=self.event_website, 
         url=u'http://localhost:8010/static/site_generic/event5.html',
         checker_runtime=checker_rt)
     event.save()
     self.soa_url.id_field = False
     self.soa_url.save()
     self.soa_title.id_field = True
     self.soa_title.save()
     self.run_event_spider(1)
     
     self.assertEqual(len(Event.objects.all()), 4)
     self.assertEqual(Event.objects.filter(title='Event 1').count(), 1)
Example #8
0
    def setUpScraperJSChecker(self, path):
        super(ScraperJSRunTest, self).setUp()

        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_ref_url = u'%ssite_with_js/event_not_found.html' % path
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(title='Event 1',
                           event_website=self.event_website,
                           description='Event 1 description',
                           url='%ssite_with_js/event_not_found.html' % path,
                           checker_runtime=scheduler_rt)
        self.event.save()
Example #9
0
    def extraSetUpHTMLChecker(self):
        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u'Event not found!'
        self.scraper.checker_ref_url = u'http://localhost:8010/static/site_with_json_content_type/event_not_found.html'
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title='Event 1',
            event_website=self.event_website,
            description='Event 1 description',
            url=
            'http://localhost:8010/static/site_with_json_content_type/event_not_found.html',
            checker_runtime=scheduler_rt)
        self.event.save()
Example #10
0
    def extraSetUpJSONChecker(self):
        self.scraper.detail_page_content_type = 'J'
        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'event_not_found'
        self.scraper.checker_x_path_result = u'Event not found!'
        self.scraper.checker_ref_url = u'http://localhost:8010/static/site_with_json_content_type/event_not_found.json'
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title='Event 1',
            event_website=self.event_website,
            description='Event 1 description',
            url=
            'http://localhost:8010/static/site_with_json_content_type/event_not_found.json',
            checker_runtime=scheduler_rt)
        self.event.save()
Example #11
0
    def setUp(self):
        super(CheckerRunTest, self).setUp()

        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u'Event was deleted!'
        self.scraper.checker_ref_url = u'http://localhost:8010/static/site_for_checker/event_not_found.html'
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title='Event 1',
            event_website=self.event_website,
            description='Event 1 description',
            url='http://localhost:8010/static/site_for_checker/event1.html',
            checker_runtime=scheduler_rt)
        self.event.save()
 def extraSetUpHTMLChecker(self):
     self.scraper.checker_type = 'X'
     self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
     self.scraper.checker_x_path_result = u'Event not found!'
     self.scraper.checker_ref_url = u'http://localhost:8010/static/site_with_json_content_type/event_not_found.html'
     self.scraper.save()
     
     scheduler_rt = SchedulerRuntime()
     scheduler_rt.save()
     
     self.event = Event(title='Event 1', event_website=self.event_website,
         description='Event 1 description', 
         url='http://localhost:8010/static/site_with_json_content_type/event_not_found.html',
         checker_runtime=scheduler_rt)
     self.event.save()
 def setUp(self):
     super(CheckerRunTest, self).setUp()
     
     self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
     self.scraper.checker_x_path_result = u'Event was deleted!'
     self.scraper.save()
     
     scheduler_rt = SchedulerRuntime()
     scheduler_rt.save()
     
     self.event = Event(title='Event 1', event_website=self.event_website,
         description='Event 1 description', 
         url='http://localhost:8010/static/site_for_checker/event1.html',
         checker_runtime=scheduler_rt)
     self.event.save()
 def extraSetUpJSONChecker(self):
     self.scraper.detail_page_content_type = 'J'
     self.scraper.checker_type = 'X'
     self.scraper.checker_x_path = u'event_not_found'
     self.scraper.checker_x_path_result = u'Event not found!'
     self.scraper.checker_ref_url = u'http://localhost:8010/static/site_with_json_content_type/event_not_found.json'
     self.scraper.save()
     
     scheduler_rt = SchedulerRuntime()
     scheduler_rt.save()
     
     self.event = Event(title='Event 1', event_website=self.event_website,
         description='Event 1 description', 
         url='http://localhost:8010/static/site_with_json_content_type/event_not_found.json',
         checker_runtime=scheduler_rt)
     self.event.save()
    def setUpScraperJSChecker(self, path):
        super(ScraperJSRunTest, self).setUp()

        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_ref_url = u'%ssite_with_js/event_not_found.html' % path
        self.scraper.save()
        
        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()
        
        self.event = Event(title='Event 1', event_website=self.event_website,
            description='Event 1 description', 
            url='%ssite_with_js/event_not_found.html' % path,
            checker_runtime=scheduler_rt)
        self.event.save()
    def setUp(self):
        super(CheckerRunTest, self).setUp()

        self.scraper.checker_type = "X"
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u"Event was deleted!"
        self.scraper.checker_ref_url = u"http://localhost:8010/static/site_for_checker/event_not_found.html"
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title="Event 1",
            event_website=self.event_website,
            description="Event 1 description",
            url="http://localhost:8010/static/site_for_checker/event1.html",
            checker_runtime=scheduler_rt,
        )
        self.event.save()
    def extraSetUpHTMLChecker(self):
        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = "X"
        self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = "Event not found!"
        self.checker.checker_ref_url = "http://localhost:8010/static/site_with_json_content_type/event_not_found.html"
        self.checker.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title="Event 1",
            event_website=self.event_website,
            description="Event 1 description",
            url="http://localhost:8010/static/site_with_json_content_type/event_not_found.html",
            checker_runtime=scheduler_rt,
        )
        self.event.save()
Example #18
0
class CheckerRunTest(ScraperTest):
    def setUp(self):
        super(CheckerRunTest, self).setUp()

        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u'Event was deleted!'
        self.scraper.checker_ref_url = u'http://localhost:8010/static/site_for_checker/event_not_found.html'
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title='Event 1',
            event_website=self.event_website,
            description='Event 1 description',
            url='http://localhost:8010/static/site_for_checker/event1.html',
            checker_runtime=scheduler_rt)
        self.event.save()

    def test_checker_test_wrong_checker_config(self):
        self.scraper.checker_ref_url = ''
        self.scraper.save()

        self.assertRaises(CloseSpider, self.run_checker_test, 1)

    def test_none_type(self):
        self.scraper.checker_type = 'N'
        self.scraper.save()
        self.assertRaises(CloseSpider, self.run_event_checker, 1)

    def test_x_path_type_keep_video(self):
        self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_x_path_type_blank_result_field_keep_video(self):
        self.scraper.checker_x_path_result = ''
        self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_x_path_type_404_delete(self):
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_x_path_type_404_delete_with_zero_actions(self):
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()

        self.event.checker_runtime.num_zero_actions = 3
        self.event.checker_runtime.save()

        kwargs = {
            'id': 1,
            'do_action': 'yes',
            'run_type': 'TASK',
        }
        checker = EventChecker(**kwargs)
        self.crawler.crawl(checker)
        self.crawler.start()

        self.assertEqual(len(Event.objects.all()), 1)

    def test_x_path_type_x_path_delete(self):

        self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_x_path_type_blank_result_field_x_path_delete(self):
        self.scraper.checker_x_path_result = ''
        self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    @unittest.skip(
        "Skipped due to unresolved problem with IMAGES_STORE setting not used from scraper_test.py."
    )
    def test_x_path_type_404_delete_with_img(self):
        path = os.path.join(self.PROJECT_ROOT, 'imgs/event_image.jpg')
        if not os.path.exists(path):
            file = open(path, "w")
            file.write('Let\s assume this is an image!')
            file.close()

        self.se_desc.mandatory = True
        self.se_desc.save()
        self.soa_desc.attr_type = 'I'
        self.soa_desc.save()

        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.description = 'event_image.jpg'
        self.event.save()

        self.assertTrue(os.path.exists(path))
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
        self.assertFalse(os.path.exists(path))

    def test_404_type_404_delete(self):
        self.scraper.checker_type = '4'
        self.scraper.save()
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_404_type_x_path_delete(self):
        self.scraper.checker_type = '4'
        self.scraper.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
Example #19
0
 def test_double_standard_id_field(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title='Event 1', event_website=self.event_website,
         description='Event 1 description',
         url='http://localhost:8010/static/site_generic/event5.html',
         checker_runtime=checker_rt)
     event.save()
     event = Event(title='Event 2', event_website=self.event_website,
         description='Event 1 description',
         url='http://localhost:8010/static/site_generic/event6.html',
         checker_runtime=checker_rt)
     event.save()
     event = Event(title='Event 1', event_website=self.event_website,
         description='Event 2 description',
         url='http://localhost:8010/static/site_generic/event7.html',
         checker_runtime=checker_rt)
     event.save()
     self.soa_url.id_field = False
     self.soa_url.save()
     self.soa_title.id_field = True
     self.soa_title.save()
     self.soa_desc.id_field = True
     self.soa_desc.save()
     self.run_event_spider(1)
     
     self.assertEqual(len(Event.objects.all()), 6)
     self.assertEqual(Event.objects.filter(description='Event 1 description').count(), 2)
class CheckerRunTest(ScraperTest):
    
    def setUp(self):
        super(CheckerRunTest, self).setUp()
        
        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = 'X'
        self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = 'Event was deleted!'
        self.checker.checker_ref_url = 'http://*****:*****@class="event_not_found"]/div/text()'
        self.checker2.checker_x_path_result = 'Event was deleted!'
        self.checker2.checker_ref_url = 'http://*****:*****@class="oh_my_wrong_xpath_for_delete"]/div/text()'
        self.checker.save()
        self.event.url = 'http://*****:*****@unittest.skip("Skipped, CloseSpider can't be catched from within test env, other option: direct access to Scrapy log strings.")
    def test_checker_test_wrong_checker_config(self):
        self.checker.checker_ref_url = ''
        self.checker.save()
        
        self.assertRaises(CloseSpider, self.run_checker_test(1))
Example #21
0
class ScraperJSONRunTest(ScraperTest):
    def setUpScraperJSONDefaultScraper(self):
        self.se_base.x_path = u'response.events'
        self.se_base.save()
        self.se_title.x_path = u'title'
        self.se_title.save()
        self.se_url.x_path = u'url'
        self.se_url.save()
        self.se_desc.x_path = u'description'
        self.se_desc.from_detail_page = False
        self.se_desc.save()

        self.scraper.content_type = 'J'
        self.scraper.save()

        self.event_website.url = os.path.join(
            self.SERVER_URL, 'site_with_json_content_type/event_main.json')
        self.event_website.save()

    def extraSetUpHTMLChecker(self):
        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u'Event not found!'
        self.scraper.checker_ref_url = u'http://*****:*****@class="description"]/text()'
        self.se_desc.from_detail_page = True
        self.se_desc.save()
        self.run_event_spider(1)
        #log.msg(unicode(Event.objects.all()), level=log.INFO)
        self.assertEqual(
            len(
                Event.objects.filter(
                    description='Event Detail Page 1 Description')), 1)

    def test_detail_page_json(self):
        self.setUpScraperJSONDefaultScraper()
        self.scraper.detail_page_content_type = 'J'
        self.scraper.save()
        self.se_url.x_path = u'json_url'
        self.se_url.save()
        self.se_desc.x_path = u'event_details.description'
        self.se_desc.from_detail_page = True
        self.se_desc.save()
        self.run_event_spider(1)
        #log.msg(unicode(Event.objects.all()), level=log.INFO)
        self.assertEqual(
            len(
                Event.objects.filter(
                    description='Event Detail Page 1 Description')), 1)

    def test_checker_x_path_type_x_path_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpHTMLChecker()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_checker_x_path_type_x_path_no_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpHTMLChecker()
        self.scraper.checker_x_path = u'//div[@class="this_is_the_wrong_xpath"]/div/text()'
        self.scraper.save()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_json_checker_x_path_type_x_path_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpJSONChecker()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_json_checker_x_path_type_x_path_no_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpJSONChecker()
        self.scraper.checker_x_path = u'this_is_the_wrong_xpath'
        self.scraper.save()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
class CheckerRunTest(ScraperTest):
    def setUp(self):
        super(CheckerRunTest, self).setUp()

        self.scraper.checker_type = "X"
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u"Event was deleted!"
        self.scraper.checker_ref_url = u"http://localhost:8010/static/site_for_checker/event_not_found.html"
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title="Event 1",
            event_website=self.event_website,
            description="Event 1 description",
            url="http://localhost:8010/static/site_for_checker/event1.html",
            checker_runtime=scheduler_rt,
        )
        self.event.save()

    def test_none_type(self):
        self.scraper.checker_type = "N"
        self.scraper.save()
        self.assertRaises(CloseSpider, self.run_event_checker, 1)

    def test_x_path_type_keep_video(self):
        self.event.url = "http://localhost:8010/static/site_for_checker/event1.html"
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_x_path_type_blank_result_field_keep_video(self):
        self.scraper.checker_x_path_result = ""
        self.event.url = "http://localhost:8010/static/site_for_checker/event1.html"
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_x_path_type_404_delete(self):
        self.event.url = "http://localhost:8010/static/site_for_checker/event_which_is_not_there.html"
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_x_path_type_404_delete_with_zero_actions(self):
        self.event.url = "http://localhost:8010/static/site_for_checker/event_which_is_not_there.html"
        self.event.save()

        self.event.checker_runtime.num_zero_actions = 3
        self.event.checker_runtime.save()

        kwargs = {"id": 1, "do_action": "yes", "run_type": "TASK"}
        checker = EventChecker(**kwargs)
        self.crawler.crawl(checker)
        self.crawler.start()

        self.assertEqual(len(Event.objects.all()), 1)

    def test_x_path_type_x_path_delete(self):

        self.event.url = "http://localhost:8010/static/site_for_checker/event2.html"
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_x_path_type_blank_result_field_x_path_delete(self):
        self.scraper.checker_x_path_result = ""
        self.event.url = "http://localhost:8010/static/site_for_checker/event2.html"
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def _create_imgs_in_dirs(self, img_dirs):
        img_paths = []
        for img_dir in img_dirs:
            path = os.path.join(self.PROJECT_ROOT, img_dir, "event_image.jpg")
            if not os.path.exists(os.path.dirname(path)):
                os.makedirs(os.path.dirname(path))
            if not os.path.exists(path):
                file = open(path, "w")
                file.write("Let\s assume this is an image!")
                file.close()
            img_paths.append(path)
        return img_paths

    def _run_img_test_with_dirs(self, img_dirs):
        img_paths = self._create_imgs_in_dirs(img_dirs)

        self.se_desc.mandatory = True
        self.se_desc.save()
        self.soa_desc.attr_type = "I"
        self.soa_desc.save()

        self.event.url = "http://localhost:8010/static/site_for_checker/event_which_is_not_there.html"
        self.event.description = "event_image.jpg"
        self.event.save()

        for path in img_paths:
            self.assertTrue(os.path.exists(path))
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
        for path in img_paths:
            self.assertFalse(os.path.exists(path))

    def test_delete_with_img_flat_no_thumbs(self):
        img_dirs = ["imgs/"]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_flat_with_thumbs(self):
        img_dirs = ["imgs/"]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_all_no_thumbs(self):
        img_dirs = ["imgs/full/"]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_all_with_thumbs(self):
        img_dirs = ["imgs/full/", "imgs/thumbs/medium/", "imgs/thumbs/small/"]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_thumbs_with_thumbs(self):
        img_dirs = ["imgs/thumbs/medium/", "imgs/thumbs/small/"]
        self._run_img_test_with_dirs(img_dirs)

    def test_404_type_404_delete(self):
        self.scraper.checker_type = "4"
        self.scraper.save()
        self.event.url = "http://localhost:8010/static/site_for_checker/event_which_is_not_there.html"
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_404_type_x_path_delete(self):
        self.scraper.checker_type = "4"
        self.scraper.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    @unittest.skip(
        "Skipped, CloseSpider can't be catched from within test env, other option: direct access to Scrapy log strings."
    )
    def test_checker_test_wrong_checker_config(self):
        self.scraper.checker_ref_url = ""
        self.scraper.save()

        self.assertRaises(CloseSpider, self.run_checker_test(1))
class CheckerRunTest(ScraperTest):
    
    def setUp(self):
        super(CheckerRunTest, self).setUp()
        
        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u'Event was deleted!'
        self.scraper.checker_ref_url = u'http://localhost:8010/static/site_for_checker/event_not_found.html'
        self.scraper.save()
        
        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()
        
        self.event = Event(title='Event 1', event_website=self.event_website,
            description='Event 1 description', 
            url='http://localhost:8010/static/site_for_checker/event1.html',
            checker_runtime=scheduler_rt)
        self.event.save()
    
    
    def test_checker_test_wrong_checker_config(self):
        self.scraper.checker_ref_url = ''
        self.scraper.save()
        
        self.assertRaises(CloseSpider, self.run_checker_test, 1)
    
    
    def test_none_type(self):
        self.scraper.checker_type = 'N'
        self.scraper.save()
        self.assertRaises(CloseSpider, self.run_event_checker, 1)
    
    
    def test_x_path_type_keep_video(self):
        self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html'
        self.event.save()
        
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
    
    
    def test_x_path_type_blank_result_field_keep_video(self):
        self.scraper.checker_x_path_result = ''
        self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html'
        self.event.save()
        
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
    
    
    def test_x_path_type_404_delete(self):
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()
        
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
    
    
    def test_x_path_type_404_delete_with_zero_actions(self):
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()
        
        self.event.checker_runtime.num_zero_actions = 3
        self.event.checker_runtime.save()
        
        kwargs = {
            'id': 1,
            'do_action': 'yes',
            'run_type': 'TASK',
        }
        checker = EventChecker(**kwargs)
        self.crawler.crawl(checker)
        self.crawler.start()
        
        self.assertEqual(len(Event.objects.all()), 1)
        
    
    def test_x_path_type_x_path_delete(self):
        
        self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html'
        self.event.save()
        
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
    
    
    def test_x_path_type_blank_result_field_x_path_delete(self):
        self.scraper.checker_x_path_result = ''
        self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html'
        self.event.save()
        
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
    

    def _create_imgs_in_dirs(self, img_dirs):
        img_paths = []
        for img_dir in img_dirs:
            path = os.path.join(self.PROJECT_ROOT, img_dir, 'event_image.jpg')
            if not os.path.exists(os.path.dirname(path)):
                os.makedirs(os.path.dirname(path))
            if not os.path.exists(path):
                file = open(path,"w")
                file.write('Let\s assume this is an image!')
                file.close()
            img_paths.append(path)
        return img_paths

    
    def _run_img_test_with_dirs(self, img_dirs):
        img_paths = self._create_imgs_in_dirs(img_dirs)

        self.se_desc.mandatory = True
        self.se_desc.save()
        self.soa_desc.attr_type = 'I'
        self.soa_desc.save()
        
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.description = 'event_image.jpg'
        self.event.save()

        for path in img_paths:
            self.assertTrue(os.path.exists(path))
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
        for path in img_paths:
            self.assertFalse(os.path.exists(path))


    def test_delete_with_img_flat_no_thumbs(self):
        img_dirs = ['imgs/',]
        self._run_img_test_with_dirs(img_dirs)


    def test_delete_with_img_flat_with_thumbs(self):
        img_dirs = ['imgs/',]
        self._run_img_test_with_dirs(img_dirs)


    def test_delete_with_img_all_no_thumbs(self):
        img_dirs = ['imgs/full/',]
        self._run_img_test_with_dirs(img_dirs)
    

    def test_delete_with_img_all_with_thumbs(self):
        img_dirs = ['imgs/full/', 'imgs/thumbs/medium/', 'imgs/thumbs/small/',]
        self._run_img_test_with_dirs(img_dirs)


    def test_delete_with_img_thumbs_with_thumbs(self):
        img_dirs = ['imgs/thumbs/medium/', 'imgs/thumbs/small/',]
        self._run_img_test_with_dirs(img_dirs)

    
    def test_404_type_404_delete(self):
        self.scraper.checker_type = '4'
        self.scraper.save()
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()
        
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
    
    
    def test_404_type_x_path_delete(self):
        self.scraper.checker_type = '4'
        self.scraper.save()
        
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
class ScraperJSRunTest(ScraperTest):
    
    def setUpScraperJSDefaultScraper(self):
        self.event_website.url = os.path.join('http://*****:*****@class="event_not_found"]/div/text()'
        self.scraper.checker_ref_url = u'%ssite_with_js/event_not_found.html' % path
        self.scraper.save()
        
        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()
        
        self.event = Event(title='Event 1', event_website=self.event_website,
            description='Event 1 description', 
            url='%ssite_with_js/event_not_found.html' % path,
            checker_runtime=scheduler_rt)
        self.event.save()

    def setUpScraperJSDefaultChecker(self):
        self.setUpScraperJSChecker('http://localhost:8010/static/')
    
    def setUpScraperJSDockerChecker(self):
        self.setUpScraperJSChecker('http://10.0.2.2:8010/static/')
        self.rpt_dp1.render_javascript = True
        self.rpt_dp1.save()
    


    def test_default_no_scrapyjs_main_page(self):
        self.setUpScraperJSDefaultScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_default_no_scrapyjs_detail_page(self):
        self.setUpScraperJSDefaultScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.filter(description='Event 1 description')), 1)

    def test_activated_scrapyjs_main_page(self):
        self.setUpScraperJSDockerScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.all()), 2)

    def test_activated_scrapyjs_detail_page(self):
        self.setUpScraperJSDockerScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.filter(description='Event 1 JS description')), 1)

    def test_only_main_page_scrapyjs_main_page(self):
        self.setUpScraperJSDockerScraper()
        self.event_website.url = os.path.join('http://10.0.2.2:8010/static/', 'site_with_js/event_main.html')
        self.event_website.save()
        self.rpt_dp1.render_javascript = False
        self.rpt_dp1.save()

        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.all()), 2)
        self.assertEqual(len(Event.objects.filter(description='Event 1 description')), 1)
        self.assertEqual(len(Event.objects.filter(description='Event 1 JS description')), 0)

    def test_default_no_scrapyjs_checker_delete(self):
        self.setUpScraperJSDefaultChecker()
        self.scraper.checker_x_path_result = u'Event not found'
        self.scraper.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_default_no_scrapyjs_checker_no_delete(self):
        self.setUpScraperJSDefaultChecker()
        self.scraper.checker_x_path_result = u'Event JS not found'
        self.scraper.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_activated_scrapyjs_checker_delete(self):
        self.setUpScraperJSDockerChecker()
        self.scraper.checker_x_path_result = u'Event JS not found'
        self.scraper.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_activated_scrapyjs_checker_no_delete(self):
        self.setUpScraperJSDockerChecker()
        self.scraper.checker_x_path_result = u'Event not found'
        self.scraper.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
Example #25
0
class ScraperJSONRunTest(ScraperTest):
    def setUpScraperJSONDefaultScraper(self):
        self.se_base.x_path = 'response.events'
        self.se_base.save()
        self.se_title.x_path = 'title'
        self.se_title.save()
        self.se_url.x_path = 'url'
        self.se_url.save()
        self.se_desc.x_path = 'description'
        self.se_desc.request_page_type = 'MP'
        self.se_desc.save()
        self.se_es_1.x_path = 'title'
        self.se_es_1.save()

        self.rpt_mp.content_type = 'J'
        self.rpt_mp.save()

        self.event_website.url = os.path.join(
            self.SERVER_URL, 'site_with_json_content_type/event_main.json')
        self.event_website.save()

    def extraSetUpHTMLChecker(self):
        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = 'X'
        self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = 'Event not found!'
        self.checker.checker_ref_url = 'http://*****:*****@class="description"]/text()'
        self.se_desc.request_page_type = 'DP1'
        self.se_desc.save()
        self.run_event_spider(1)
        #logging.info(unicode(Event.objects.all()))
        self.assertEqual(
            len(
                Event.objects.filter(
                    description='Event Detail Page 1 Description')), 1)

    def test_detail_page_json(self):
        self.setUpScraperJSONDefaultScraper()
        self.rpt_dp1.content_type = 'J'
        self.rpt_dp1.save()
        self.se_url.x_path = 'json_url'
        self.se_url.save()
        self.se_desc.x_path = 'event_details.description'
        self.se_desc.request_page_type = 'DP1'
        self.se_desc.save()
        self.run_event_spider(1)
        #logging.info(unicode(Event.objects.all()))
        self.assertEqual(
            len(
                Event.objects.filter(
                    description='Event Detail Page 1 Description')), 1)

    def test_multiple_detail_pages(self):
        self.setUpScraperJSONDefaultScraper()
        self.se_desc.x_path = '//div/div[@class="description2"]/text()'
        self.se_desc.request_page_type = 'DP1'
        self.se_desc.save()

        self.soa_url.id_field = False
        self.soa_url.save_to_db = False
        self.soa_url.save()

        self.soa_url2.save_to_db = False
        self.soa_url2.save()

        self.rpt_dp2 = RequestPageType(page_type='DP2',
                                       scraper=self.scraper,
                                       scraped_obj_attr=self.soa_url2,
                                       content_type='J')
        self.rpt_dp2.save()

        self.se_url2 = ScraperElem(scraped_obj_attr=self.soa_url2,
                                   scraper=self.scraper,
                                   x_path='json_url',
                                   request_page_type='MP')
        self.se_url2.save()

        self.se_desc2 = ScraperElem(scraped_obj_attr=self.soa_desc2,
                                    scraper=self.scraper,
                                    x_path='event_details.description2',
                                    request_page_type='DP2',
                                    mandatory=False)
        self.se_desc2.save()

        self.run_event_spider(1)
        #logging.info(unicode(Event.objects.all()))
        events = Event.objects.filter(
            title='Event 1',
            #url='http://*****:*****@class="this_is_the_wrong_xpath"]/div/text()'
        self.checker.save()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_json_checker_x_path_type_x_path_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpJSONChecker()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_json_checker_x_path_type_x_path_no_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpJSONChecker()
        self.checker.checker_x_path = 'this_is_the_wrong_xpath'
        self.checker.save()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
class CheckerRunTest(ScraperTest):
    def setUp(self):
        super(CheckerRunTest, self).setUp()

        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = 'X'
        self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = 'Event was deleted!'
        self.checker.checker_ref_url = 'http://*****:*****@class="event_not_found"]/div/text()'
        self.checker2.checker_x_path_result = 'Event was deleted!'
        self.checker2.checker_ref_url = 'http://localhost:8010/static/site_for_checker/event_not_found.html'
        self.checker2.save()

    @unittest.skip(
        "Skipped, CloseSpider not visible in test anymore after having reworked settings initialization"
    )
    def test_no_checker(self):
        self.checker.delete()
        self.assertRaises(CloseSpider, self.run_event_checker, 1)

    def test_x_path_type_keep(self):
        self.event.url = 'http://*****:*****@class="oh_my_wrong_xpath_for_delete"]/div/text()'
        self.checker.save()
        self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_x_path_type_blank_result_field_x_path_delete(self):
        self.scraper.checker_x_path_result = ''
        self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def _create_imgs_in_dirs(self, img_dirs):
        img_paths = []
        file_name = 'event_image_{rnd}.jpg'.format(
            rnd=str(random.randint(0, 1000000)))
        self.event.description = file_name
        self.event.save()
        for img_dir in img_dirs:
            path = os.path.join(self.PROJECT_ROOT, img_dir, file_name)
            if not os.path.exists(os.path.dirname(path)):
                os.makedirs(os.path.dirname(path))
            if not os.path.exists(path):
                file = open(path, "w")
                file.write('Let\s assume this is an image!')
                file.close()
            img_paths.append(path)
        return img_paths

    def _run_img_test_with_dirs(self, img_dirs):
        img_paths = self._create_imgs_in_dirs(img_dirs)

        self.se_desc.mandatory = True
        self.se_desc.save()
        self.soa_desc.attr_type = 'I'
        self.soa_desc.save()

        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()

        for path in img_paths:
            self.assertTrue(os.path.exists(path))
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
        for path in img_paths:
            self.assertFalse(os.path.exists(path))

    def test_delete_with_img_flat_no_thumbs(self):
        img_dirs = [
            'imgs/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_flat_with_thumbs(self):
        img_dirs = [
            'imgs/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_all_no_thumbs(self):
        img_dirs = [
            'imgs/full/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_all_with_thumbs(self):
        img_dirs = [
            'imgs/full/',
            'imgs/thumbs/medium/',
            'imgs/thumbs/small/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_thumbs_with_thumbs(self):
        img_dirs = [
            'imgs/thumbs/medium/',
            'imgs/thumbs/small/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_404_type_404_delete(self):
        self.checker.checker_type = '4'
        self.checker.save()
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_404_type_x_path_delete(self):
        self.checker.checker_type = '4'
        self.checker.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    @unittest.skip(
        "Skipped, CloseSpider can't be catched from within test env, other option: direct access to Scrapy log strings."
    )
    def test_checker_test_wrong_checker_config(self):
        self.checker.checker_ref_url = ''
        self.checker.save()

        self.assertRaises(CloseSpider, self.run_checker_test(1))
Example #27
0
class ScraperJSRunTest(ScraperTest):
    def setUpScraperJSDefaultScraper(self):
        self.event_website.url = os.path.join('http://*****:*****@class="event_not_found"]/div/text()'
        self.scraper.checker_ref_url = u'%ssite_with_js/event_not_found.html' % path
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(title='Event 1',
                           event_website=self.event_website,
                           description='Event 1 description',
                           url='%ssite_with_js/event_not_found.html' % path,
                           checker_runtime=scheduler_rt)
        self.event.save()

    def setUpScraperJSDefaultChecker(self):
        self.setUpScraperJSChecker('http://localhost:8010/static/')

    def setUpScraperJSDockerChecker(self):
        self.setUpScraperJSChecker('http://10.0.2.2:8010/static/')
        self.scraper.render_javascript = True
        self.scraper.save()

    def test_default_no_scrapyjs_main_page(self):
        self.setUpScraperJSDefaultScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_default_no_scrapyjs_detail_page(self):
        self.setUpScraperJSDefaultScraper()
        self.run_event_spider(1)
        self.assertEqual(
            len(Event.objects.filter(description='Event 1 description')), 1)

    def test_activated_scrapyjs_main_page(self):
        self.setUpScraperJSDockerScraper()
        self.run_event_spider(1)
        self.assertEqual(len(Event.objects.all()), 2)

    def test_activated_scrapyjs_detail_page(self):
        self.setUpScraperJSDockerScraper()
        self.run_event_spider(1)
        self.assertEqual(
            len(Event.objects.filter(description='Event 1 JS description')), 1)

    def test_default_no_scrapyjs_checker_delete(self):
        self.setUpScraperJSDefaultChecker()
        self.scraper.checker_x_path_result = u'Event not found'
        self.scraper.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_default_no_scrapyjs_checker_no_delete(self):
        self.setUpScraperJSDefaultChecker()
        self.scraper.checker_x_path_result = u'Event JS not found'
        self.scraper.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_activated_scrapyjs_checker_delete(self):
        self.setUpScraperJSDockerChecker()
        self.scraper.checker_x_path_result = u'Event JS not found'
        self.scraper.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_activated_scrapyjs_checker_no_delete(self):
        self.setUpScraperJSDockerChecker()
        self.scraper.checker_x_path_result = u'Event not found'
        self.scraper.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
class ScraperJSONRunTest(ScraperTest):

    def setUpScraperJSONDefaultScraper(self):
        self.se_base.x_path = 'response.events'
        self.se_base.save()
        self.se_title.x_path = 'title'
        self.se_title.save()
        self.se_url.x_path = 'url'
        self.se_url.save()
        self.se_desc.x_path = 'description'
        self.se_desc.request_page_type = 'MP'
        self.se_desc.save()
        self.se_es_1.x_path = 'title'
        self.se_es_1.save()

        self.rpt_mp.content_type = 'J'
        self.rpt_mp.save()

        self.event_website.url = os.path.join(self.SERVER_URL, 'site_with_json_content_type/event_main.json')
        self.event_website.save()

    def extraSetUpHTMLChecker(self):
        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = 'X'
        self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = 'Event not found!'
        self.checker.checker_ref_url = 'http://*****:*****@class="description"]/text()'
        self.se_desc.request_page_type = 'DP1'
        self.se_desc.save()
        self.run_event_spider(1)
        #logging.info(unicode(Event.objects.all()))
        self.assertEqual(len(Event.objects.filter(description='Event Detail Page 1 Description')), 1)


    def test_detail_page_json(self):
        self.setUpScraperJSONDefaultScraper()
        self.rpt_dp1.content_type = 'J'
        self.rpt_dp1.save()
        self.se_url.x_path = 'json_url'
        self.se_url.save()
        self.se_desc.x_path = 'event_details.description'
        self.se_desc.request_page_type = 'DP1'
        self.se_desc.save()
        self.run_event_spider(1)
        #logging.info(unicode(Event.objects.all()))
        self.assertEqual(len(Event.objects.filter(description='Event Detail Page 1 Description')), 1)


    def test_multiple_detail_pages(self):
        self.setUpScraperJSONDefaultScraper()
        self.se_desc.x_path = '//div/div[@class="description2"]/text()'
        self.se_desc.request_page_type = 'DP1'
        self.se_desc.save()

        self.soa_url.id_field = False
        self.soa_url.save_to_db = False
        self.soa_url.save()

        self.soa_url2.save_to_db = False
        self.soa_url2.save()

        self.rpt_dp2 = RequestPageType(page_type='DP2', scraper=self.scraper, scraped_obj_attr=self.soa_url2, content_type='J')
        self.rpt_dp2.save()
        
        self.se_url2 = ScraperElem(scraped_obj_attr=self.soa_url2, scraper=self.scraper, 
            x_path='json_url', request_page_type='MP')
        self.se_url2.save()
        
        self.se_desc2 = ScraperElem(scraped_obj_attr=self.soa_desc2, scraper=self.scraper, 
            x_path='event_details.description2', request_page_type='DP2', mandatory=False)
        self.se_desc2.save()
        

        self.run_event_spider(1)
        #logging.info(unicode(Event.objects.all()))
        events = Event.objects.filter(
            title='Event 1',
            #url='http://*****:*****@class="this_is_the_wrong_xpath"]/div/text()'
        self.checker.save()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)


    def test_json_checker_x_path_type_x_path_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpJSONChecker()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)


    def test_json_checker_x_path_type_x_path_no_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpJSONChecker()
        self.checker.checker_x_path = 'this_is_the_wrong_xpath'
        self.checker.save()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
class ScraperJSONRunTest(ScraperTest):

    def setUpScraperJSONDefaultScraper(self):
        self.se_base.x_path = u'response.events'
        self.se_base.save()
        self.se_title.x_path = u'title'
        self.se_title.save()
        self.se_url.x_path = u'url'
        self.se_url.save()
        self.se_desc.x_path = u'description'
        self.se_desc.from_detail_page = False
        self.se_desc.save()

        self.scraper.content_type = 'J'
        self.scraper.save()

        self.event_website.url = os.path.join(self.SERVER_URL, 'site_with_json_content_type/event_main.json')
        self.event_website.save()

    def extraSetUpHTMLChecker(self):
        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u'Event not found!'
        self.scraper.checker_ref_url = u'http://*****:*****@class="description"]/text()'
        self.se_desc.from_detail_page = True
        self.se_desc.save()
        self.run_event_spider(1)
        #log.msg(unicode(Event.objects.all()), level=log.INFO)
        self.assertEqual(len(Event.objects.filter(description='Event Detail Page 1 Description')), 1)


    def test_detail_page_json(self):
        self.setUpScraperJSONDefaultScraper()
        self.scraper.detail_page_content_type = 'J'
        self.scraper.save()
        self.se_url.x_path = u'json_url'
        self.se_url.save()
        self.se_desc.x_path = u'event_details.description'
        self.se_desc.from_detail_page = True
        self.se_desc.save()
        self.run_event_spider(1)
        #log.msg(unicode(Event.objects.all()), level=log.INFO)
        self.assertEqual(len(Event.objects.filter(description='Event Detail Page 1 Description')), 1)


    def test_checker_x_path_type_x_path_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpHTMLChecker()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)


    def test_checker_x_path_type_x_path_no_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpHTMLChecker()
        self.scraper.checker_x_path = u'//div[@class="this_is_the_wrong_xpath"]/div/text()'
        self.scraper.save()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)


    def test_json_checker_x_path_type_x_path_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpJSONChecker()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)


    def test_json_checker_x_path_type_x_path_no_delete(self):
        self.setUpScraperJSONDefaultScraper()
        self.extraSetUpJSONChecker()
        self.scraper.checker_x_path = u'this_is_the_wrong_xpath'
        self.scraper.save()
        self.assertEqual(len(Event.objects.all()), 1)
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
class CheckerRunTest(ScraperTest):
    
    def setUp(self):
        super(CheckerRunTest, self).setUp()
        
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u'Event was deleted!'
        self.scraper.save()
        
        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()
        
        self.event = Event(title='Event 1', event_website=self.event_website,
            description='Event 1 description', 
            url='http://localhost:8010/static/site_for_checker/event1.html',
            checker_runtime=scheduler_rt)
        self.event.save()
    
    
    def test_keep_video(self):
        self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html'
        self.event.save()
        
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)
    
    
    def test_404_delete(self):
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()
        
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
    
    
    def test_404_delete_with_zero_actions(self):
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()
        
        self.event.checker_runtime.num_zero_actions = 3
        self.event.checker_runtime.save()
        
        kwargs = {
            'id': 1,
            'do_action': 'yes',
            'run_type': 'TASK',
        }
        checker = EventChecker(**kwargs)
        self.crawler.crawl(checker)
        self.crawler.start()
        
        self.assertEqual(len(Event.objects.all()), 1)
        
    
    def test_x_path_delete(self):
        
        self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html'
        self.event.save()
        
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
        
    
    def test_404_delete_with_img(self):
        path = os.path.join(self.PROJECT_ROOT, 'imgs/event_image.jpg')
        if not os.path.exists(path):
            file = open(path,"w")
            file.write('Let\s assume this is an image!')
            file.close()
        
        self.se_desc.mandatory = True
        self.se_desc.save()
        self.soa_desc.attr_type = 'I'
        self.soa_desc.save()
        
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.description = 'event_image.jpg'
        self.event.save()
        
        self.assertTrue(os.path.exists(path))
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)   
        self.assertFalse(os.path.exists(path))
class CheckerRunTest(ScraperTest):
    def setUp(self):
        super(CheckerRunTest, self).setUp()

        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u'Event was deleted!'
        self.scraper.checker_ref_url = u'http://localhost:8010/static/site_for_checker/event_not_found.html'
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title='Event 1',
            event_website=self.event_website,
            description='Event 1 description',
            url='http://localhost:8010/static/site_for_checker/event1.html',
            checker_runtime=scheduler_rt)
        self.event.save()

    def test_checker_test_wrong_checker_config(self):
        self.scraper.checker_ref_url = ''
        self.scraper.save()

        self.assertRaises(CloseSpider, self.run_checker_test, 1)

    def test_none_type(self):
        self.scraper.checker_type = 'N'
        self.scraper.save()
        self.assertRaises(CloseSpider, self.run_event_checker, 1)

    def test_x_path_type_keep_video(self):
        self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_x_path_type_blank_result_field_keep_video(self):
        self.scraper.checker_x_path_result = ''
        self.event.url = 'http://localhost:8010/static/site_for_checker/event1.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)

    def test_x_path_type_404_delete(self):
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_x_path_type_404_delete_with_zero_actions(self):
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()

        self.event.checker_runtime.num_zero_actions = 3
        self.event.checker_runtime.save()

        kwargs = {
            'id': 1,
            'do_action': 'yes',
            'run_type': 'TASK',
        }
        checker = EventChecker(**kwargs)
        self.crawler.crawl(checker)
        self.crawler.start()

        self.assertEqual(len(Event.objects.all()), 1)

    def test_x_path_type_x_path_delete(self):

        self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_x_path_type_blank_result_field_x_path_delete(self):
        self.scraper.checker_x_path_result = ''
        self.event.url = 'http://localhost:8010/static/site_for_checker/event2.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def _create_imgs_in_dirs(self, img_dirs):
        img_paths = []
        for img_dir in img_dirs:
            path = os.path.join(self.PROJECT_ROOT, img_dir, 'event_image.jpg')
            if not os.path.exists(os.path.dirname(path)):
                os.makedirs(os.path.dirname(path))
            if not os.path.exists(path):
                file = open(path, "w")
                file.write('Let\s assume this is an image!')
                file.close()
            img_paths.append(path)
        return img_paths

    def _run_img_test_with_dirs(self, img_dirs):
        img_paths = self._create_imgs_in_dirs(img_dirs)

        self.se_desc.mandatory = True
        self.se_desc.save()
        self.soa_desc.attr_type = 'I'
        self.soa_desc.save()

        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.description = 'event_image.jpg'
        self.event.save()

        for path in img_paths:
            self.assertTrue(os.path.exists(path))
        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)
        for path in img_paths:
            self.assertFalse(os.path.exists(path))

    def test_delete_with_img_flat_no_thumbs(self):
        img_dirs = [
            'imgs/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_flat_with_thumbs(self):
        img_dirs = [
            'imgs/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_all_no_thumbs(self):
        img_dirs = [
            'imgs/full/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_all_with_thumbs(self):
        img_dirs = [
            'imgs/full/',
            'imgs/thumbs/medium/',
            'imgs/thumbs/small/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_delete_with_img_thumbs_with_thumbs(self):
        img_dirs = [
            'imgs/thumbs/medium/',
            'imgs/thumbs/small/',
        ]
        self._run_img_test_with_dirs(img_dirs)

    def test_404_type_404_delete(self):
        self.scraper.checker_type = '4'
        self.scraper.save()
        self.event.url = 'http://localhost:8010/static/site_for_checker/event_which_is_not_there.html'
        self.event.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 0)

    def test_404_type_x_path_delete(self):
        self.scraper.checker_type = '4'
        self.scraper.save()

        self.run_event_checker(1)
        self.assertEqual(len(Event.objects.all()), 1)