def start_requests(self): log.start(logfile=self.__smzdm_log_file, loglevel='INFO', logstdout=False) smzdm_config = ConfigParser.RawConfigParser() smzdm_config.read("configure/smzdm.ini") self.price_pattern = re.compile(smzdm_config.get("item_page", "price_pattern").decode("utf-8")) self.usd_price_pattern = re.compile(smzdm_config.get("item_page", "usd_price_pattern").decode("utf-8")) self.jpy_price_pattern = re.compile(smzdm_config.get("item_page", "jpy_price_pattern").decode("utf-8")) self.eur_price_pattern = re.compile(smzdm_config.get("item_page", "eur_price_pattern").decode("utf-8")) self.head_separator = smzdm_config.get("item_page", "head_separator_pattern").decode("utf-8") self.attachment_pattern = re.compile(smzdm_config.get("item_page", "attachment_pattern").decode("utf-8")) config_file_name = "configure/shopping_page.ini" shopping_config = ConfigParser.RawConfigParser() shopping_config.read(config_file_name) for section_name in shopping_config.sections(): log.msg("Supported url pattern:\t%s" % shopping_config.get(section_name, "url_pattern").decode('utf8'), level=log.DEBUG, spider=SmzdmSpider) url_pattern = re.compile(shopping_config.get(section_name, "url_pattern").decode('utf8')) title_xpath = shopping_config.get(section_name, "title_xpath") price_xpath = shopping_config.get(section_name, "price_xpath") price_redudant_pattern = re.compile(shopping_config.get(section_name, "price_redudant_pattern").decode('utf8')) description_xpath = shopping_config.get(section_name, "description_xpath") description_img_xpath = shopping_config.get(section_name, "description_img_xpath") currency = shopping_config.get(section_name, "currency") title_img_xpath_list = shopping_config.get(section_name, "title_img_xpath").split(",") self.__url_pattern_xpath_dict[url_pattern] = (title_xpath, \ price_xpath, price_redudant_pattern, description_xpath, description_img_xpath, currency, title_img_xpath_list) CrawlSpider.start_requests(self) yield WebdriverRequest('http://www.smzdm.com/fenlei/yingertuiche/youhui/p1', meta={'category': 'stroller'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest('http://www.smzdm.com/fenlei/anquanzuoyi/youhui/p1', meta={'category': 'car_seat'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest('http://www.smzdm.com/fenlei/lego/youhui/p1', meta={'category': 'lego'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest('http://www.smzdm.com/fenlei/huwaibeibao/youhui/p1', meta={'category': 'backpack'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest('http://www.smzdm.com/fenlei/yingertuiche/haitao/p1', meta={'category': 'stroller'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest('http://www.smzdm.com/fenlei/anquanzuoyi/haitao/p1', meta={'category': 'car_seat'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest('http://www.smzdm.com/fenlei/lego/haitao/p1', meta={'category': 'lego'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest('http://www.smzdm.com/fenlei/huwaibeibao/haitao/p1', meta={'category': 'backpack'}, callback=self.parse_smzdm_list_page)
def start_requests(self): log.start(logfile=self.__smzdm_log_file, loglevel='INFO', logstdout=False) smzdm_config = ConfigParser.RawConfigParser() smzdm_config.read("configure/smzdm.ini") self.price_pattern = re.compile( smzdm_config.get("item_page", "price_pattern").decode("utf-8")) self.usd_price_pattern = re.compile( smzdm_config.get("item_page", "usd_price_pattern").decode("utf-8")) self.jpy_price_pattern = re.compile( smzdm_config.get("item_page", "jpy_price_pattern").decode("utf-8")) self.eur_price_pattern = re.compile( smzdm_config.get("item_page", "eur_price_pattern").decode("utf-8")) self.head_separator = smzdm_config.get( "item_page", "head_separator_pattern").decode("utf-8") self.attachment_pattern = re.compile( smzdm_config.get("item_page", "attachment_pattern").decode("utf-8")) config_file_name = "configure/shopping_page.ini" shopping_config = ConfigParser.RawConfigParser() shopping_config.read(config_file_name) for section_name in shopping_config.sections(): log.msg("Supported url pattern:\t%s" % shopping_config.get( section_name, "url_pattern").decode('utf8'), level=log.DEBUG, spider=SmzdmSpider) url_pattern = re.compile( shopping_config.get(section_name, "url_pattern").decode('utf8')) title_xpath = shopping_config.get(section_name, "title_xpath") price_xpath = shopping_config.get(section_name, "price_xpath") price_redudant_pattern = re.compile( shopping_config.get(section_name, "price_redudant_pattern").decode('utf8')) description_xpath = shopping_config.get(section_name, "description_xpath") description_img_xpath = shopping_config.get( section_name, "description_img_xpath") currency = shopping_config.get(section_name, "currency") title_img_xpath_list = shopping_config.get( section_name, "title_img_xpath").split(",") self.__url_pattern_xpath_dict[url_pattern] = (title_xpath, \ price_xpath, price_redudant_pattern, description_xpath, description_img_xpath, currency, title_img_xpath_list) CrawlSpider.start_requests(self) yield WebdriverRequest( 'http://www.smzdm.com/fenlei/yingertuiche/youhui/p1', meta={'category': 'stroller'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest( 'http://www.smzdm.com/fenlei/anquanzuoyi/youhui/p1', meta={'category': 'car_seat'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest('http://www.smzdm.com/fenlei/lego/youhui/p1', meta={'category': 'lego'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest( 'http://www.smzdm.com/fenlei/huwaibeibao/youhui/p1', meta={'category': 'backpack'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest( 'http://www.smzdm.com/fenlei/yingertuiche/haitao/p1', meta={'category': 'stroller'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest( 'http://www.smzdm.com/fenlei/anquanzuoyi/haitao/p1', meta={'category': 'car_seat'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest('http://www.smzdm.com/fenlei/lego/haitao/p1', meta={'category': 'lego'}, callback=self.parse_smzdm_list_page) yield WebdriverRequest( 'http://www.smzdm.com/fenlei/huwaibeibao/haitao/p1', meta={'category': 'backpack'}, callback=self.parse_smzdm_list_page)
def start_requests(self): """Combine scrape and start requests.""" return itertools.chain(CallbackMixin.scrape_requests(self), _CrawlSpider.start_requests(self))
def start_requests(self): """Combine scrape and start requests.""" return itertools.chain(CallbackMixin.scrape_requests(self), _CrawlSpider.start_requests(self))
def start_requests(self): """CrawlSpider's start_requests() should take precedence over SitemapSpider. """ return CrawlSpider.start_requests(self)