コード例 #1
0
    def parse(self, response):
        """
        parses all categories and searches a link to each
        """
        log.info("Start parsing ...")

        categories = response.xpath('//div[@id="col3_content"]/table[1]/tbody/tr[not(@class)]')

        for category in categories:
            anchor = category.xpath("td[1]//a")

            url = response.urljoin(anchor.xpath("@href").extract_first())
            category = anchor.xpath("text()").extract_first().strip()

            # request the category page
            request = scrapy.Request(url, callback=self.parse_category_contents)
            request.meta["category"] = category
            yield request