class ScraperTest(TestCase): SERVER_URL = 'http://*****:*****@href', from_detail_page=False) self.se_url.save() self.se_desc = ScraperElem( scraped_obj_attr=self.soa_desc, scraper=self.scraper, x_path=u'//div/div[@class="description"]/text()', from_detail_page=True, mandatory=False) self.se_desc.save() self.sched_rt = SchedulerRuntime() self.sched_rt.save() self.event_website = EventWebsite( pk=1, name=u'Event Website', scraper=self.scraper, url=os.path.join(self.SERVER_URL, 'site_generic/event_main.html'), scraper_runtime=self.sched_rt, ) self.event_website.save() settings.overrides['ITEM_PIPELINES'] = [ 'dynamic_scraper.pipelines.DjangoImagesPipeline', 'dynamic_scraper.pipelines.ValidationPipeline', 'scraper.scraper_test.DjangoWriterPipeline', ] settings.overrides['IMAGES_STORE'] = os.path.join( self.PROJECT_ROOT, 'imgs') settings.overrides['IMAGES_THUMBS'] = { 'small': (170, 170), } self.crawler = CrawlerProcess(settings) self.crawler.install() self.crawler.configure() for name, signal in vars(signals).items(): if not name.startswith('_'): dispatcher.connect(self.record_signal, signal) def tearDown(self): pass
class ScraperTest(TestCase): SERVER_URL = 'http://*****:*****@href', from_detail_page=False) self.se_url.save() self.se_desc = ScraperElem( scraped_obj_attr=self.soa_desc, scraper=self.scraper, x_path=u'//div/div[@class="description"]/text()', from_detail_page=True, mandatory=False) self.se_desc.save() self.sched_rt = SchedulerRuntime() self.sched_rt.save() self.event_website = EventWebsite( pk=1, name=u'Event Website', scraper=self.scraper, url=os.path.join(self.SERVER_URL, 'site_generic/event_main.html'), scraper_runtime=self.sched_rt, ) self.event_website.save() for name, signal in vars(signals).items(): if not name.startswith('_'): dispatcher.connect(self.record_signal, signal) def tearDown(self): pass
class ScraperTest(TestCase): SERVER_URL = "http://*****:*****@href", request_page_type="MP" ) self.se_url.save() self.se_desc = ScraperElem( scraped_obj_attr=self.soa_desc, scraper=self.scraper, x_path='//div/div[@class="description"]/text()', request_page_type="DP1", mandatory=False, ) self.se_desc.save() self.se_es_1 = ScraperElem( scraped_obj_attr=self.soa_es_1, scraper=self.scraper, x_path="a/text()", request_page_type="MP" ) self.se_es_1.save() self.rpt_mp = RequestPageType(page_type="MP", scraper=self.scraper) self.rpt_mp.save() self.rpt_dp1 = RequestPageType(page_type="DP1", scraper=self.scraper, scraped_obj_attr=self.soa_url) self.rpt_dp1.save() self.sched_rt = SchedulerRuntime() self.sched_rt.save() self.event_website = EventWebsite( pk=1, name="Event Website", scraper=self.scraper, url=os.path.join(self.SERVER_URL, "site_generic/event_main.html"), scraper_runtime=self.sched_rt, ) self.event_website.save() for name, signal in list(vars(signals).items()): if not name.startswith("_"): dispatcher.connect(self.record_signal, signal) def tearDown(self): self.event_website.delete() Event.objects.all().delete()
class ScraperTest(TestCase): SERVER_URL = 'http://*****:*****@href', from_detail_page=False) self.se_url.save() self.se_desc = ScraperElem(scraped_obj_attr=self.soa_desc, scraper=self.scraper, x_path=u'//div/div[@class="description"]/text()', from_detail_page=True, mandatory=False) self.se_desc.save() self.sched_rt = SchedulerRuntime() self.sched_rt.save() self.event_website = EventWebsite(pk=1, name=u'Event Website', scraper=self.scraper, url=os.path.join(self.SERVER_URL, 'site_generic/event_main.html'), scraper_runtime=self.sched_rt,) self.event_website.save() settings.overrides['ITEM_PIPELINES'] = [ 'dynamic_scraper.pipelines.DjangoImagesPipeline', 'dynamic_scraper.pipelines.ValidationPipeline', 'scraper.scraper_test.DjangoWriterPipeline', ] settings.overrides['IMAGES_STORE'] = os.path.join(self.PROJECT_ROOT, 'imgs') settings.overrides['IMAGES_THUMBS'] = { 'small': (170, 170), } self.crawler = CrawlerProcess(settings) self.crawler.install() self.crawler.configure() for name, signal in vars(signals).items(): if not name.startswith('_'): dispatcher.connect(self.record_signal, signal) def tearDown(self): pass
class ScraperTest(TestCase): SERVER_URL = 'http://*****:*****@href', request_page_type='MP') self.se_url.save() self.se_desc = ScraperElem(scraped_obj_attr=self.soa_desc, scraper=self.scraper, x_path=u'//div/div[@class="description"]/text()', request_page_type='DP1', mandatory=False) self.se_desc.save() self.rpt_mp = RequestPageType(page_type='MP', scraper=self.scraper) self.rpt_mp.save() self.rpt_dp1 = RequestPageType(page_type='DP1', scraper=self.scraper, scraped_obj_attr=self.soa_url) self.rpt_dp1.save() self.sched_rt = SchedulerRuntime() self.sched_rt.save() self.event_website = EventWebsite(pk=1, name=u'Event Website', scraper=self.scraper, url=os.path.join(self.SERVER_URL, 'site_generic/event_main.html'), scraper_runtime=self.sched_rt,) self.event_website.save() for name, signal in vars(signals).items(): if not name.startswith('_'): dispatcher.connect(self.record_signal, signal) def tearDown(self): pass