def __init__(self, seed_urls, save_html=1, use_splash=1, screenshot_dir=None, **kwargs): self.save_html = bool(int(save_html)) self.use_splash = bool(int(use_splash)) self.random = random.Random(self.random_seed) self.start_urls = [add_scheme_if_missing(url) for url in seed_urls.split(',')] self.req_count = defaultdict(int) super(WebsiteFinderSpider, self).__init__(name=None, screenshot_dir=screenshot_dir, **kwargs)
def parse_row(self, response, row): url = add_scheme_if_missing(row["url"]) return scrapy.Request(url, self.parse_website, meta={"url": url})