예제 #1
0
 def start_requests(self):
     citys = rdb.lrange('dzdp_citys', 0, -1)
     if citys:
         self.getCityChannel(citys)
     else:
         url = 'http://www.dianping.com/citylist'
         yield Request(url, callback=self.parseCitys, dont_filter=True)
예제 #2
0
 def getCityChannel(self, citys=None):
     if not citys:
         citys = rdb.lrange('dzdp_citys', 0, -1)
     for data in citys:
         self.log('请求获取城市频道: {}'.format(data))
         data = pickle.loads(data)
         rdb.set('dzdp_cur_city', data.get('en_name'))
         href = data.get('href')
         for i in self.classify[:1]:
             href = '{}/g{}'.format(href, i)
             yield Request(href, callback=self.parseItems, dont_filter=True)
예제 #3
0
 def parse(self, response):
     for query in response.css(
             'div.searchPdListCon div.searchPdListConL div.gysItemsWrap'):
         href = query.xpath(
             './div[@class="gysItems"]/div[@class="gysItemsL"]/a/@href'
         ).extract_first()
         rdb.rpush('yhby_urls', href + 'contact.html')
     rdb.set('yhby_page', self.count)
     if self.count == 99:
         hrefs = [href.decode() for href in rdb.lrange('yhby_urls', 0, -1)]
         for href in hrefs:
             yield Request(href, self.parseCompany, dont_filter=True)
예제 #4
0
 def start_requests(self):
     self.log('start request start:**********************', self.count)
     start = self.count
     if start == 99:
         hrefs = [href.decode() for href in rdb.lrange('yhby_urls', 0, -1)]
         for href in hrefs:
             yield Request(href, self.parseCompany, dont_filter=True)
     else:
         for i in range(start, 100):
             self.count = i
             url = 'http://www.youboy.com/scom?kw=&p={}'.format(i)
             yield Request(url, self.parse, dont_filter=True)
예제 #5
0
 def getChannelClassify(self, city, channels=None, specialName=''):
     if not specialName:
         if not channels:
             key = 'dzdp_{}_channels'.format(city)
             channels = rdb.lrange(key, 0, -1)
         for channel in channels:
             key = 'dzdp_{}_cur_channel'.format(city)
             self.log('获取频道的分类信息:{}'.format(channel))
             channel = pickle.loads(channel)
             rdb.rset(key, channel.get('name'))
             yield Request(channel.get('href'),
                           self.parseClassify,
                           dont_filter=True)
     else:
         key = 'dzdp_{}_channels_hash'.format(city)
         channel_href = rdb.hget(key, specialName)
         self.log('获取{}频道分类信息:{}'.format(specialName, channel_href))
         yield Request(channel_href, self.parseClassify, dont_filter=True)