예제 #1
0
 def oninit(self, url):
     url = self.url_suf + url
     res = self.downloader.download(url)
     if not res: return
     cont = res.content
     uid = re.search("CONFIG\['oid'\]='(.*?)'", cont).group(1)
     nick = re.search("CONFIG\['onick'\]='(.*?)'",
                      cont).group(1).decode('utf-8')
     # ajax 请求微博时要用
     page_id = re.search("CONFIG\['page_id'\]='(.*?)'", cont).group(1)
     domain = re.search("CONFIG\['domain'\]='(.*?)'", cont).group(1)
     location = re.search("CONFIG\['location'\]='(.*?)'", cont).group(1)
     nums = Parser.parse_index(cont)
     if not nums: return
     follow_num, fans_num, wb_num = nums
     # 验证该用户关注粉丝微博数是否变化
     # 如变化更新信息,微博变化则需要爬取更新的微博
     # 该方法返回需要爬取的微博数
     crawl_info, wb_num = self.validater.validate_nums(page_id, nums)
     self.crawl_info = crawl_info
     return {
         'uid': uid,
         'page_id': page_id,
         'nick': nick,
         'domain': domain,
         'location': location,
         'follow_num': int(follow_num),
         'fans_num': int(fans_num),
         'wb_num': int(wb_num)
     }