Ejemplo n.º 1
0
 def __init__(self, seed_urls, save_html=1, use_splash=1, screenshot_dir=None, **kwargs):
     self.save_html = bool(int(save_html))
     self.use_splash = bool(int(use_splash))
     self.random = random.Random(self.random_seed)
     self.start_urls = [add_scheme_if_missing(url) for url in seed_urls.split(',')]
     self.req_count = defaultdict(int)
     super(WebsiteFinderSpider, self).__init__(name=None, screenshot_dir=screenshot_dir, **kwargs)
Ejemplo n.º 2
0
 def parse_row(self, response, row):
     url = add_scheme_if_missing(row["url"])
     return scrapy.Request(url, self.parse_website, meta={"url": url})