Example #1
0
    def parse_item(self, response):
        item = HeelsItem()
        item['comment'] = response.xpath('//*[@id="thread_subject"]/text()').extract()
        item['image_urls'] = response.xpath('//ignore_js_op//img/@zoomfile').extract()
        item['source_url'] = response.url

        return item
Example #2
0
    def parse_post_detail(self, response):
        hxs = HtmlXPathSelector(response)

        item = HeelsItem()
        item['comment'] = hxs.select('//title/text()').extract()
        item['image_urls'] = hxs.select(
            '//*[@id="Blog1"]/div[1]/div/div/div/div[1]//img/@src').extract()
        item['source_url'] = response.url

        return item
Example #3
0
    def parse_post_detail(self, response):
        """
        Scrapy creates scrapy.http.Request objects for each URL in the
        start_urls attribute of the Spider, and assigns them the parse method
        of the spider as their callback function.
        """

        hxs = HtmlXPathSelector(response)

        item = HeelsItem()

        item['comment'] = hxs.select('//title/text()').extract()
        item['image_urls'] = hxs.select('//*[@id="Blog1"]//div[contains(@class, "post")]//div[contains(@class, "post-body")]//img/@src').extract()
        item['source_url'] = response.url

        return item
Example #4
0
    def parse_post_detail(self, response):
        """
        Scrapy creates scrapy.http.Request objects for each URL in the
        start_urls attribute of the Spider, and assigns them the parse method
        of the spider as their callback function.
        """

        sel = Selector(response, type='html')

        item = HeelsItem()
        item['comment'] = sel.xpath('//title/text()').extract()
        item['image_urls'] = sel.xpath(
            '//*[@id="Blog1"]/div[1]/div/div/div/div[1]//img/@src').extract()
        item['source_url'] = response.url

        return item
Example #5
0
    def parse_item_detail(self, response):
        """
        Scrapy creates scrapy.http.Request objects for each URL in the
        start_urls attribute of the Spider, and assigns them the parse method
        of the spider as their callback function.
        """

        hxs = HtmlXPathSelector(response)

        item = HeelsItem()

        item['comment'] = hxs.select('//*[@id="content"]//figure//figcaption/text()').extract()
        item['image_urls'] = hxs.select('//*[@id="content"]//span[contains(@class, "wrapper-fig-image")]//img/@src').extract()
        item['source_url'] = response.url

        return item
Example #6
0
    def parse_pin_detail(self, response):
        hxs = HtmlXPathSelector(response)

        item = HeelsItem()

        item['comment'] = hxs.select('//title/text()').extract()

        urls_1 = hxs.select(
            '//div[contains(@class, "pinWrapper")]//div[contains(@class, "pinImageSourceWrapper")]//img/@src'
        ).extract()
        urls_2 = hxs.select(
            '//div[contains(@class, "pinWrapper")]//div[contains(@class, "pinImageSourceWrapper")]//a/@href'
        ).extract()
        item['image_urls'] = urls_1 + urls_2

        item['source_url'] = response.url

        return item