def __init__(self, search_sort='default', *args, **kwargs): super(OzonProductsSpider, self).__init__( url_formatter=FormatterWithDefaults( search_sort=self.SEARCH_SORT[search_sort] ), *args, **kwargs)
def __init__(self, search_sort='default', *args, **kwargs): self.order = self.SEARCH_SORT[search_sort] super(EBuyerProductSpider, self).__init__(url_formatter=FormatterWithDefaults(), site_name=self.allowed_domains[0], *args, **kwargs)
def __init__(self, order='default', *args, **kwargs): # Handle multiple allowed domains if getattr(self, 'allowed_domains', None): if len(self.allowed_domains) > 1 and 'site' not in kwargs: kwargs['site_name'] = self.allowed_domains[0] # Decorate optional fields request and parse methods # with _option_requester and _option_parser for key in self.OPTIONAL_REQUESTS.keys(): method_name = '_parse_%s' % key old_method = getattr(self, method_name) new_method = types.MethodType(option_parser(old_method), self) setattr(self.__class__, method_name, new_method) method_name = '_request_%s' % key old_method = getattr(self, method_name) new_method = types.MethodType( option_requester(key)(old_method), self) setattr(self.__class__, method_name, new_method) # Creating the list of optional fields to be scraped self.options = [ k for k, v in self.OPTIONAL_REQUESTS.iteritems() if (v or kwargs.get(k, False)) and kwargs.get(k, True) ] # Handle sort modes sort_mode = self.SORT_MODES.get(order, None) if self.SORT_MODES: if sort_mode is None: self.log('Sort mode "%s" is not defined' % order) self.sort_mode = sort_mode formatter = FormatterWithDefaults(sort_mode=sort_mode) super(ProductsSpider, self).__init__(formatter, *args, **kwargs)
def __init__(self, search_sort='POPULAR', *args, **kwargs): self.start_pos = 0 super(NextCoUkProductSpider, self).__init__(site_name=self.allowed_domains[0], url_formatter=FormatterWithDefaults( search_sort=self._SORT_MODES[search_sort]), *args, **kwargs)
def __init__(self, sort_mode=None, *args, **kwargs): self.SORT = self._parse_sort(sort_mode) self.pages = dict() super(PepperfryProductsSpider, self).__init__( url_formatter=FormatterWithDefaults(sort_mode=self.SORT[0], dir=self.SORT[1]), site_name=self.allowed_domains[0], *args, **kwargs)
def __init__(self, search_sort='recommended', *args, **kwargs): self.br = BuyerReviewsBazaarApi(called_class=self) super(HalfordsProductSpider, self).__init__( site_name=self.allowed_domains[0], url_formatter=FormatterWithDefaults( sort=self._SORT_MODES[search_sort] ), *args, **kwargs)
def __init__(self, search_sort='best_match', *args, **kwargs): if "search_modes" in kwargs: search_sort = kwargs["search_modes"] super(AutozoneProductsSpider, self).__init__(url_formatter=FormatterWithDefaults( search_sort=self.SEARCH_SORT[search_sort]), site_name="autozone.com", *args, **kwargs)
def __init__(self, search_sort='NEWEST', *args, **kwargs): self.br = BuyerReviewsBazaarApi(called_class=self) super(HouseoffraserProductSpider, self).__init__(site_name=self.allowed_domains[0], url_formatter=FormatterWithDefaults( sort_mode=self._SORT_MODES[search_sort]), *args, **kwargs)
def __init__(self, sort_mode="default", *args, **kwargs): if sort_mode not in self.SORT_MODES: self.log('"%s" not in SORT_MODES') sort_mode = 'default' formatter = FormatterWithDefaults(sort_by=self.SORT_MODES[sort_mode]) super(SouqProductsSpider, self).__init__(formatter, site_name=self.allowed_domains[0], *args, **kwargs)
def __init__(self, order='default', *args, **kwargs): if order not in self.SORT_MODES.keys(): self.log( "'%s' not in SORT_MODES. Used default for this session" % order, WARNING) order = 'default' search_sort = self.SORT_MODES[order] super(MaplinProductsSpider, self).__init__( url_formatter=FormatterWithDefaults(search_sort=search_sort, ), *args, **kwargs)
def __init__(self, search_sort='best_match', zip_code='M3C', search_order='default', *args, **kwargs): if zip_code: self.zip_code = zip_code super(WalmartCaProductsSpider, self).__init__( site_name=self.allowed_domains[0], url_formatter=FormatterWithDefaults( search_sort=self._SEARCH_SORT[search_sort], search_order=self._SEARCH_ORDER[search_order] ), *args, **kwargs)
def __init__(self, search_sort='default', *args, **kwargs): # All this is to set the site_name since we have several # allowed_domains. super(PGShopProductSpider, self).__init__(url_formatter=FormatterWithDefaults( search_sort=self.SEARCH_SORT[search_sort], start=0, ), site_name=self.allowed_domains[0], *args, **kwargs)
def __init__(self, search_sort='default', *args, **kwargs): self.search_sort = self.SEARCH_SORT[search_sort] self.new_stile = False # used to store unique links self.links = [] # used to store all response from new_stile site version # to prevent make additional requests self.initial_responses = [] super(CoachSpider, self).__init__( url_formatter=FormatterWithDefaults( search_sort=self.search_sort, ), *args, **kwargs )
def __init__(self, sort_mode=None, *args, **kwargs): if sort_mode: if sort_mode.lower() not in self.SORT_MODES: self.log('"%s" not in SORT_MODES') else: self.SORTING = self.SORT_MODES[sort_mode.lower()] super(RiverislandProductsSpider, self).__init__( url_formatter=FormatterWithDefaults( sort_mode=self.SORTING or self.SORT_MODES['default']), site_name=self.allowed_domains[0], *args, **kwargs)
def __init__(self, sort_mode=None, *args, **kwargs): if sort_mode: if sort_mode.lower() not in self.SORT_MODES: self.log('"%s" not in SORT_MODES') else: self.SORTING = self.SORT_MODES[sort_mode.lower()] super(WalGreensProductsSpider, self).__init__( site_name=self.allowed_domains[0], url_formatter=FormatterWithDefaults( page=self.page, sort_mode=self.SORTING or self.SORT_MODES['relevance'],), *args, **kwargs)
def __init__(self, sort_mode=None, fetch_related_products=True, store_id=10151, catalog_id=24551, lang_id=110, *args, **kwargs): self.fetch_related_products = fetch_related_products if sort_mode in self.SORT_MODES: sort_mode = self.SORT_MODES[sort_mode] self.SEARCH_URL = self.SORT_SEARCH_URL self.SORTING = sort_mode formatter = FormatterWithDefaults(sort_mode=sort_mode, store_id=store_id, catalog_id=catalog_id, lang_id=lang_id) else: self.log('"%s" not in SORT_MODES') self.SORTING = self.SORT_MODES['relevance'] formatter = FormatterWithDefaults() cond_set_value(kwargs, 'site_name', 'argos.co.uk') super(ArgosUKProductsSpider, self).__init__(formatter, *args, **kwargs)
def __init__(self, sort_mode=None, *args, **kwargs): formatter = None if sort_mode: if sort_mode in self.SORT_MODES: formatter = FormatterWithDefaults( CM=self.SORT_MODES[sort_mode]['CM'], T1=self.SORT_MODES[sort_mode]['T1']) self.SEARCH_URL = self.SEARCH_URL2 super(BootsProductsSpider, self).__init__(formatter, site_name=self.allowed_domains[0], *args, **kwargs)
def __init__(self, sort_mode=None, *args, **kwargs): self.buyer_reviews = BuyerReviewsBazaarApi(called_class=self) if sort_mode: if sort_mode.lower() not in self.SORT_MODES: self.log('"%s" not in SORT_MODES') else: self.SORTING = self.SORT_MODES[sort_mode.lower()] super(JcpenneyProductsSpider, self).__init__(url_formatter=FormatterWithDefaults( sort_mode=self.SORTING or self.SORT_MODES['default']), site_name=self.allowed_domains[0], *args, **kwargs) settings.overrides['CONCURRENT_REQUESTS'] = 1 self.user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36'
def __init__(self, sort_mode=None, *args, **kwargs): if sort_mode: if sort_mode.lower() not in self.SORT_MODES: self.log('"%s" not in SORT_MODES') else: self.SORTING = self.SORT_MODES[sort_mode.lower()] # do not hit 404 multiple times #settings.overrides['RETRY_HTTP_CODES'] \ # = [c for c in settings['RETRY_HTTP_CODES'] if c != 404] super(UltaProductSpider, self).__init__( url_formatter=FormatterWithDefaults( sort_mode=self.SORTING or self.SORT_MODES['default']), *args, **kwargs)
def __init__(self, order="default", *args, **kwargs): sort_mode = self.SORT_MODES.get(order) if sort_mode is None: raise Exception('%s sorting mode is not defined' % order) formatter = FormatterWithDefaults(sort_mode=sort_mode) super(ProswimwearCoUkSpider, self).__init__(formatter, *args, **kwargs)
def __init__(self, *args, **kwargs): self.sort_by = self.SORT_BY.get(kwargs.get('order', 'relevance'), 'relevance') formatter = FormatterWithDefaults(sort=self.sort_by) super(AhProductsSpider, self).__init__(formatter, *args, **kwargs)
def __init__(self, search_sort='best_match', *args, **kwargs): super(FlipkartProductsSpider, self).__init__(url_formatter=FormatterWithDefaults( search_sort=self.SEARCH_SORT[search_sort]), *args, **kwargs)
def __init__(self, search_sort='best_sellers', *args, **kwargs): super(MorrisonsProductsSpider, self).__init__(url_formatter=FormatterWithDefaults( search_sort=self.SEARCH_SORT[search_sort]), *args, **kwargs)
def __init__(self, *args, **kwargs): super(AsdaProductsSpider, self).__init__( url_formatter=FormatterWithDefaults(pagenum=1, prods_per_page=32), *args, **kwargs)
def __init__(self, order='relevance', *args, **kwargs): order = self.SEARCH_ORDER.get(order, 'relevance') formatter = FormatterWithDefaults(order=order, sort='asc') super(BabymonitorsdirectProductsSpider, self).__init__(formatter, *args, **kwargs)