def parse_based_followernum(self, response): item = WeibospiderItem() analyzer = Analyzer() total_follower_pq = analyzer.get_followerhtml(response.body) follower_page_num = analyzer.get_relation_pagenum(total_follower_pq) if follower_page_num != "" and int(follower_page_num) >= 5: for page in range(5, 0, -1): GetWeibopage.relation_data['page'] = page follower_url = getinfo.get_follower_mainurl( response.meta['uid'] ) + WeiboSpider.getweibopage.get_relation_paramurl() yield Request(url=follower_url, meta={ 'cookiejar': response.meta['cookiejar'], 'uid': response.meta['uid'] }, callback=self.parse_follower) elif follower_page_num == "": follower_url = 'http://weibo.com/%s/fans?page=1' % response.meta[ 'uid'] yield Request(url=follower_url, meta={ 'cookiejar': 1, 'uid': response.meta['uid'] }, callback=self.parse_follower) #yield None else: for page in range(int(follower_page_num), 0, -1): GetWeibopage.relation_data['page'] = page follower_url = getinfo.get_follower_mainurl( response.meta['uid'] ) + WeiboSpider.getweibopage.get_relation_paramurl() yield Request(url=follower_url, meta={ 'cookiejar': response.meta['cookiejar'], 'uid': response.meta['uid'] }, callback=self.parse_follower)
def parse_based_followernum(self,response): item = WeibospiderItem() analyzer = Analyzer() total_follower_pq = analyzer.get_followerhtml(response.body) follower_page_num = analyzer.get_relation_pagenum(total_follower_pq) if follower_page_num != "" and int(follower_page_num) >= 5: for page in range(5,0,-1): GetWeibopage.relation_data['page'] = page follower_url = getinfo.get_follower_mainurl(response.meta['uid']) + WeiboSpider.getweibopage.get_relation_paramurl() yield Request(url=follower_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid']},callback=self.parse_follower) elif follower_page_num == "": follower_url = 'http://weibo.com/%s/fans?page=1' % response.meta['uid'] yield Request(url=follower_url,meta={'cookiejar':1,'uid':response.meta['uid']},callback=self.parse_follower) #yield None else: for page in range(int(follower_page_num),0,-1): GetWeibopage.relation_data['page'] = page follower_url = getinfo.get_follower_mainurl(response.meta['uid']) + WeiboSpider.getweibopage.get_relation_paramurl() yield Request(url=follower_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid']},callback=self.parse_follower)
def get_relation(self,response): '''获取用户粉丝或关注请求''' getweibopage = GetWeibopage() for page in range(WeiboSpider.follow_page_num,0,-1): GetWeibopage.relation_data['page'] = page follow_url = getinfo.get_follow_mainurl(self.uid) + getweibopage.get_relation_paramurl() yield Request(url=follow_url,meta={'cookiejar':response.meta['cookiejar'],'uid':self.uid},callback=self.parse_follow) for page in range(WeiboSpider.follower_page_num,0,-1): GetWeibopage.relation_data['page'] = page follower_url = getinfo.get_follower_mainurl(self.uid) + getweibopage.get_relation_paramurl() yield Request(url=follower_url,meta={'cookiejar':response.meta['cookiejar'],'uid':self.uid},callback=self.parse_follower)
def parse_follower(self,response): item = WeibospiderItem() analyzer = Analyzer() getweibopage = GetWeibopage() total_follower_pq = analyzer.get_followerhtml(response.body) item['uid'] = response.meta['uid'] item['follower_uid_list'] = analyzer.get_follower(total_follower_pq) item['follow_uid_list'] = [] yield item if self.uid == response.meta['uid'] and len(item['follower_uid_list']): db = OracleStore() conn = db.get_connection() for follower_uid in item['follower_uid_list']: #获取粉丝用户的关注用户 sql1 = """select count(*) from t_user_follow where userID=%s""" % str(follower_uid) cursor1 = db.select_operation(conn,sql1) count1 = cursor1.fetchone() follower_scraped = count1[0] cursor1.close() if not follower_scraped: #scraped为0,即该账户没有获取过 for page in range(WeiboSpider.follow_page_num,0,-1): GetWeibopage.relation_data['page'] = page follow_url = getinfo.get_follow_mainurl(follower_uid) + getweibopage.get_relation_paramurl() yield Request(url=follow_url,meta={'cookiejar':response.meta['cookiejar'],'uid':follower_uid},callback=self.parse_follow) else: print 'follow_uid existed!',follower_uid yield None #获取粉丝用户的粉丝用户 sql2 = """select count(*) from t_user_follower where userID=%s""" % str(follower_uid) cursor2 = db.select_operation(conn,sql2) count2 = cursor2.fetchone() follower_scraped = count2[0] cursor2.close() if not follower_scraped: #scraped为0,即该账户没有获取过 for page in range(WeiboSpider.follower_page_num,0,-1): GetWeibopage.relation_data['page'] = page follower_url = getinfo.get_follower_mainurl(follower_uid) + getweibopage.get_relation_paramurl() yield Request(url=follower_url,meta={'cookiejar':response.meta['cookiejar'],'uid':follower_uid},callback=self.parse_follower) else: print 'follower_uid existed!',follower_uid yield None conn.close()