Esempio n. 1
0
    def start_requests(self):
        reqs = []
        if self.BRAND_GET_PARAM:
            for url in self.start_urls:
                parsed = urlparse.urlparse(url)
                params = urlparse.parse_qs(parsed.query)
                brand = params.get(self.BRAND_GET_PARAM) if not callable(self.BRAND_GET_PARAM) else [self.BRAND_GET_PARAM()]
                reqs.append(Request(url, meta={'product_brand': brand[0] if brand else None}))

        if self.SPECIFIC_PRODUCTS:
            for products in self.SPECIFIC_PRODUCTS:
                brand = products.get('brand', '')
                for url in products.get('urls', ''):
                    reqs.append(Request(url, meta={'product_brand': brand}))

        if reqs:
            return reqs
        
        else:
            return BaseSpider.start_requests(self)
Esempio n. 2
0
 def start_requests(self):
     """Combine scrape and start requests."""
     return itertools.chain(CallbackMixin.scrape_requests(self),
                            _BaseSpider.start_requests(self))
 def start_requests(self):
     """Combine scrape and start requests."""
     return itertools.chain(CallbackMixin.scrape_requests(self),
                            _BaseSpider.start_requests(self))
    def start_requests(self):
        """Check arguments before starting any requests

        """
        self.parse_date_args()
        return BaseSpider.start_requests(self)