コード例 #1
0
    def parse_based_followernum(self, response):
        item = WeibospiderItem()
        analyzer = Analyzer()
        total_follower_pq = analyzer.get_followerhtml(response.body)
        follower_page_num = analyzer.get_relation_pagenum(total_follower_pq)

        if follower_page_num != "" and int(follower_page_num) >= 5:
            for page in range(5, 0, -1):
                GetWeibopage.relation_data['page'] = page
                follower_url = getinfo.get_follower_mainurl(
                    response.meta['uid']
                ) + WeiboSpider.getweibopage.get_relation_paramurl()
                yield Request(url=follower_url,
                              meta={
                                  'cookiejar': response.meta['cookiejar'],
                                  'uid': response.meta['uid']
                              },
                              callback=self.parse_follower)

        elif follower_page_num == "":
            follower_url = 'http://weibo.com/%s/fans?page=1' % response.meta[
                'uid']
            yield Request(url=follower_url,
                          meta={
                              'cookiejar': 1,
                              'uid': response.meta['uid']
                          },
                          callback=self.parse_follower)
            #yield None
        else:
            for page in range(int(follower_page_num), 0, -1):
                GetWeibopage.relation_data['page'] = page
                follower_url = getinfo.get_follower_mainurl(
                    response.meta['uid']
                ) + WeiboSpider.getweibopage.get_relation_paramurl()
                yield Request(url=follower_url,
                              meta={
                                  'cookiejar': response.meta['cookiejar'],
                                  'uid': response.meta['uid']
                              },
                              callback=self.parse_follower)
コード例 #2
0
    def parse_based_followernum(self,response):
        item = WeibospiderItem()
        analyzer = Analyzer()
        total_follower_pq = analyzer.get_followerhtml(response.body)
        follower_page_num = analyzer.get_relation_pagenum(total_follower_pq) 

        if follower_page_num != "" and int(follower_page_num) >= 5:
            for page in range(5,0,-1):
                GetWeibopage.relation_data['page'] = page
                follower_url = getinfo.get_follower_mainurl(response.meta['uid']) + WeiboSpider.getweibopage.get_relation_paramurl()
                yield Request(url=follower_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid']},callback=self.parse_follower)

        elif follower_page_num == "":
            follower_url = 'http://weibo.com/%s/fans?page=1' % response.meta['uid']
            yield Request(url=follower_url,meta={'cookiejar':1,'uid':response.meta['uid']},callback=self.parse_follower)
            #yield None
        else:
            for page in range(int(follower_page_num),0,-1):
                GetWeibopage.relation_data['page'] = page
                follower_url = getinfo.get_follower_mainurl(response.meta['uid']) + WeiboSpider.getweibopage.get_relation_paramurl()
                yield Request(url=follower_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid']},callback=self.parse_follower)
コード例 #3
0
    def get_relation(self,response):
        '''获取用户粉丝或关注请求'''
        getweibopage = GetWeibopage()
        for page in range(WeiboSpider.follow_page_num,0,-1):
            GetWeibopage.relation_data['page'] = page
            follow_url = getinfo.get_follow_mainurl(self.uid) + getweibopage.get_relation_paramurl()
            yield Request(url=follow_url,meta={'cookiejar':response.meta['cookiejar'],'uid':self.uid},callback=self.parse_follow)

        for page in range(WeiboSpider.follower_page_num,0,-1):
            GetWeibopage.relation_data['page'] = page
            follower_url = getinfo.get_follower_mainurl(self.uid) + getweibopage.get_relation_paramurl()
            yield Request(url=follower_url,meta={'cookiejar':response.meta['cookiejar'],'uid':self.uid},callback=self.parse_follower)
コード例 #4
0
    def parse_follower(self,response):
        item = WeibospiderItem()
        analyzer = Analyzer()
        getweibopage = GetWeibopage()
        total_follower_pq = analyzer.get_followerhtml(response.body)
        item['uid'] = response.meta['uid']
        item['follower_uid_list'] = analyzer.get_follower(total_follower_pq)
        item['follow_uid_list'] = []    
        yield item

        if self.uid == response.meta['uid'] and len(item['follower_uid_list']):
            db = OracleStore()
            conn = db.get_connection()

            for follower_uid in item['follower_uid_list']:
                #获取粉丝用户的关注用户
                sql1 = """select count(*) from t_user_follow where userID=%s""" % str(follower_uid)
                cursor1 = db.select_operation(conn,sql1)
                count1 = cursor1.fetchone()
                follower_scraped = count1[0]
                cursor1.close()
                if not follower_scraped:  #scraped为0,即该账户没有获取过
                    for page in range(WeiboSpider.follow_page_num,0,-1):
                        GetWeibopage.relation_data['page'] = page
                        follow_url = getinfo.get_follow_mainurl(follower_uid) + getweibopage.get_relation_paramurl()
                        yield Request(url=follow_url,meta={'cookiejar':response.meta['cookiejar'],'uid':follower_uid},callback=self.parse_follow)
                else:
                    print 'follow_uid existed!',follower_uid
                    yield None

                #获取粉丝用户的粉丝用户
                sql2 = """select count(*) from t_user_follower where userID=%s""" % str(follower_uid)
                cursor2 = db.select_operation(conn,sql2)
                count2 = cursor2.fetchone()
                follower_scraped = count2[0]
                cursor2.close()
                if not follower_scraped:  #scraped为0,即该账户没有获取过
                    for page in range(WeiboSpider.follower_page_num,0,-1):
                        GetWeibopage.relation_data['page'] = page
                        follower_url = getinfo.get_follower_mainurl(follower_uid) + getweibopage.get_relation_paramurl()
                        yield Request(url=follower_url,meta={'cookiejar':response.meta['cookiejar'],'uid':follower_uid},callback=self.parse_follower)
                else:
                    print 'follower_uid existed!',follower_uid
                    yield None

            conn.close()