コード例 #1
0
 def atuser_uid_parser(self,response):
     item = WeibospiderItem()
     analyzer = Analyzer()
     friendcircle = FriendCircle()
     total_pq = analyzer.get_html(response.body,'script:contains("W_face_radius")') 
     uid = friendcircle.get_user_uid(total_pq)
     self.atuser_dict[response.meta['atuser_nickname']] = uid
コード例 #2
0
 def atuser_uid_parser(self, response):
     item = WeibospiderItem()
     analyzer = Analyzer()
     friendcircle = FriendCircle()
     total_pq = analyzer.get_html(response.body,
                                  'script:contains("W_face_radius")')
     uid = friendcircle.get_user_uid(total_pq)
     self.atuser_dict[response.meta['atuser_nickname']] = uid
コード例 #3
0
    def parse_load(self, response):
        user_info = userinfo.WeiboSpider()
        request_url = response.request.url
        p = re.compile('&pre_page=(\d).*&page=(\d)')  #用于判断是第一页的第一次加载
        match = p.search(request_url)
        if int(match.group(1)) == 0 and int(match.group(2)) == 1:  #进行用户信息的获取
            db = OracleStore()
            conn = db.get_connection()
            sql = "select count(*) from t_user_info where userID='%s'" % self.uid
            cursor = db.select_operation(conn, sql)
            count = cursor.fetchone()
            if not count[0]:  #若没有爬取过该uid用户,则爬取用户基本信息
                analyzer = Analyzer()
                total_pq = analyzer.get_html(
                    response.body, 'script:contains("PCD_person_info")')
                user_property = analyzer.get_userproperty(total_pq)
                if user_property == 'icon_verify_co_v':  #该账号为公众账号
                    public_userinfo_url = analyzer.get_public_userinfohref(
                        total_pq)
                    #yield Request(url=public_userinfo_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid'],'user_property':user_property},callback=self.parse_public_userinfo) 暂时不处理公众账号,需要数据库设置外键
                else:
                    userinfo_url = analyzer.get_userinfohref(total_pq)
                    yield Request(url=userinfo_url,
                                  meta={
                                      'cookiejar': response.meta['cookiejar'],
                                      'uid': response.meta['uid'],
                                      'user_property': user_property
                                  },
                                  callback=self.parse_userinfo)
            db.close_connection(conn, cursor)

        item = WeibospiderItem()  #获取用户微博信息及@用户信息
        analyzer = Analyzer()
        friendcircle = FriendCircle()
        total_pq = analyzer.get_mainhtml(response.body)
        item['uid'] = response.meta['uid']
        item['content'] = analyzer.get_content(total_pq)
        item['time'], item['timestamp'] = analyzer.get_time(total_pq)
        atuser_info, item['repost_user'] = analyzer.get_atuser_repostuser(
            total_pq)
        atuser_list = friendcircle.atuser_parser(atuser_info)
        item['atuser_nickname_list'] = atuser_list
        yield item

        for atuser_inlist in atuser_list:
            if atuser_inlist != []:
                for atuser in atuser_inlist:
                    uid_url = "http://s.weibo.com/user/" + quote(
                        quote(str(atuser))) + "&Refer=SUer_box"
                    yield Request(url=uid_url,
                                  meta={
                                      'cookiejar': response.meta['cookiejar'],
                                      'uid': self.uid,
                                      'atuser_nickname': atuser
                                  },
                                  callback=self.parse_atuser_uid)
            else:
                continue
コード例 #4
0
    def parse_load(self, response):
        request_url = response.request.url
        p = re.compile('&pre_page=(\d).*&page=(\d)')  #用于判断是第一页的第一次加载
        match = p.search(request_url)
        if match:
            if int(match.group(1)) == 0 and int(
                    match.group(2)) == 1:  #进行当前主用户信息的获取(即非@用户和转发用户)
                is_search = response.meta['is_search']
                if not is_search:  #没有搜索过该主用户
                    analyzer = Analyzer()
                    total_pq = analyzer.get_html(
                        response.body, 'script:contains("PCD_person_info")')
                    user_property = analyzer.get_userproperty(total_pq)
                    if not user_property == 'icon_verify_co_v':  #该账号不为公众账号
                        userinfo_url = analyzer.get_userinfohref(total_pq)
                        yield Request(url=userinfo_url,
                                      meta={
                                          'cookiejar':
                                          response.meta['cookiejar'],
                                          'uid': response.meta['uid'],
                                          'is_friend': 0
                                      },
                                      callback=self.parse_userinfo)

        item = WeibospiderItem()  #获取用户微博信息及@用户信息
        analyzer = Analyzer()
        friendcircle = FriendCircle()
        total_pq = analyzer.get_html(response.body,
                                     'script:contains("WB_feed WB_feed_v3")')
        item['uid'] = response.meta['uid']
        item['content'] = analyzer.get_content(total_pq)
        item['time'], item['timestamp'] = analyzer.get_time(total_pq)
        atuser_info, item['repost_user'] = analyzer.get_atuser_repostuser(
            total_pq)
        atuser_list = friendcircle.atuser_parser(atuser_info)
        item['atuser_nickname_list'] = atuser_list
        yield item

        frc_analyzer = friendcircle_analyzer()
        #获取@用户uid及基本信息
        atuser_set = self.get_atuser_set(atuser_list)
        #        for atuser_alias in atuser_set:
        #            friend_url = frc_analyzer.get_frienduid_url(atuser_alias)
        #            yield Request(url=friend_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid'],'is_friend':1},callback=self.parse_friend_uid) #is_friend=1代表爬取@用户基本信息

        #获取转发用户uid及基本信息
        for repostuser_alias in item['repost_user']:
            if repostuser_alias:  #repostuser_alias不为空,即有转发用户
                friend_url = frc_analyzer.get_frienduid_url(repostuser_alias)
                yield Request(
                    url=friend_url,
                    meta={
                        'cookiejar': response.meta['cookiejar'],
                        'uid': response.meta['uid'],
                        'is_friend': 2
                    },
                    callback=self.parse_friend_uid)  #is_friend=2代表爬取转发用户基本信息
コード例 #5
0
 def parse_atuser_uid(self,response):
    item = WeibospiderItem()
    analyzer = Analyzer()
    friendcircle = FriendCircle()
    item['atuser_nickname'] = response.meta['atuser_nickname'];  
    total_pq = analyzer.get_html(response.body,'script:contains("W_face_radius")') 
    atuser_uid = friendcircle.get_user_uid2(item['atuser_nickname'],total_pq)
    item['atuser_uid'] = atuser_uid
    item['uid'] = response.meta['uid']
    yield item
 def parse_atuser_uid(self,response):
     '''解析对应@用户的uid'''
     item = WeibospiderItem()
     analyzer = Analyzer()
     friendcircle = FriendCircle()
     atuser_nickname = response.meta['atuser_nickname'];
     total_pq = analyzer.get_html(response.body,'script:contains("W_face_radius")') 
     #uid = friendcircle.get_user_uid(total_pq)
     atuser_uid = friendcircle.get_user_uid2(atuser_nickname,total_pq) #根据昵称获取@用户uid
     self.atuser_dict[atuser_nickname] = atuser_uid
コード例 #7
0
 def parse_atuser_uid(self, response):
     '''解析对应@用户的uid'''
     item = WeibospiderItem()
     analyzer = Analyzer()
     friendcircle = FriendCircle()
     atuser_nickname = response.meta['atuser_nickname']
     total_pq = analyzer.get_html(response.body,
                                  'script:contains("W_face_radius")')
     #uid = friendcircle.get_user_uid(total_pq)
     atuser_uid = friendcircle.get_user_uid2(atuser_nickname, total_pq)
     self.atuser_dict[atuser_nickname] = atuser_uid
コード例 #8
0
 def parse_atuser_uid(self, response):
     item = WeibospiderItem()
     analyzer = Analyzer()
     friendcircle = FriendCircle()
     item['atuser_nickname'] = response.meta['atuser_nickname']
     total_pq = analyzer.get_html(response.body,
                                  'script:contains("W_face_radius")')
     atuser_uid = friendcircle.get_user_uid2(item['atuser_nickname'],
                                             total_pq)
     item['atuser_uid'] = atuser_uid
     item['uid'] = response.meta['uid']
     yield item
コード例 #9
0
 def parse_load(self,response):
     item = WeibospiderItem()
     analyzer = Analyzer()
     friendcircle = FriendCircle()
     total_pq =  analyzer.get_mainhtml(response.body)
     item['uid'] = response.meta['uid']
     item['content'] = analyzer.get_content(total_pq)
     item['time'],item['timestamp'] = analyzer.get_time(total_pq)
     atuser_info,item['repost_user'] = analyzer.get_atuser_repostuser(total_pq)
     atuser_list = friendcircle.atuser_parser(atuser_info)
     item['atuser_nickname_list'] = atuser_list
     #item['atuser_uid']= ""
     yield item
コード例 #10
0
 def parse_load(self, response):
     item = WeibospiderItem()
     analyzer = Analyzer()
     friendcircle = FriendCircle()
     total_pq = analyzer.get_mainhtml(response.body)
     item['uid'] = response.meta['uid']
     item['content'] = analyzer.get_content(total_pq)
     item['time'], item['timestamp'] = analyzer.get_time(total_pq)
     atuser_info, item['repost_user'] = analyzer.get_atuser_repostuser(
         total_pq)
     atuser_list = friendcircle.atuser_parser(atuser_info)
     item['atuser_nickname_list'] = atuser_list
     #item['atuser_uid']= ""
     yield item
    def parse_load(self,response):
        request_url = response.request.url
        p=re.compile('&pre_page=(\d).*&page=(\d)')  #用于判断是第一页的第一次加载
        match = p.search(request_url)
        if match:
            if int(match.group(1)) == 0 and int(match.group(2)) == 1: #进行当前主用户信息的获取(即非@用户和转发用户)
                is_search = response.meta['is_search']
                if not is_search: #没有搜索过该主用户,则is_search=0
                    analyzer = Analyzer()
                    total_pq = analyzer.get_html(response.body,'script:contains("PCD_person_info")')
                    user_property = analyzer.get_userproperty(total_pq)
                    if not user_property == 'icon_verify_co_v': #该账号不为公众账号
                        userinfo_url = analyzer.get_userinfohref(total_pq)
                        yield Request(url=userinfo_url,cookies=random.choice(COOKIES),meta={'uid':response.meta['uid'],'is_friend':0},callback=self.parse_userinfo)

        item = WeibospiderItem()  #获取用户微博信息及@用户与转发信息
        analyzer = Analyzer()
        friendcircle = FriendCircle()
        total_pq = analyzer.get_html(response.body,'script:contains("WB_feed WB_feed_v3")')
        item['uid'] = response.meta['uid']
        item['content'] = analyzer.get_content(total_pq)
        item['time'],item['timestamp'] = analyzer.get_time(total_pq)

        weibo_analyzer = weibocontent_analyzer()
        item['repost_nums'],item['comment_nums'],item['like_nums'] = weibo_analyzer.get_weibo_relative_args(total_pq)


        atuser_info,item['repost_user'] = analyzer.get_atuser_repostuser(total_pq)
        atuser_list = friendcircle.atuser_parser(atuser_info)
        item['atuser_nickname_list'] = atuser_list
        yield item     
        
        frc_analyzer = friendcircle_analyzer()
        #获取@用户uid及基本信息
        atuser_set = self.get_atuser_set(atuser_list)
        for atuser_alias in atuser_set:
            friend_url = frc_analyzer.get_frienduid_url(atuser_alias)
            yield Request(url=friend_url,cookies=random.choice(COOKIES),meta={'uid':response.meta['uid'],'is_friend':1},callback=self.parse_friend_uid) #is_friend=1代表爬取@用户基本信息 
       
        #获取转发用户uid及基本信息
        for repostuser_alias in item['repost_user']:
            if repostuser_alias: #repostuser_alias不为空,即有转发用户
                friend_url = frc_analyzer.get_frienduid_url(repostuser_alias)
                yield Request(url=friend_url,cookies=random.choice(COOKIES),meta={'uid':response.meta['uid'],'is_friend':2},callback=self.parse_friend_uid) #is_friend=2代表爬取转发用户基本信息 
コード例 #12
0
    def parse_load(self,response):
        item = WeibospiderItem()  #获取用户微博内容信息
        analyzer = Analyzer()
        friendcircle = FriendCircle()
        total_pq = analyzer.get_html(response.body,'script:contains("WB_feed WB_feed_v3")')
        item['uid'] = response.meta['uid']
        item['content'] = analyzer.get_content(total_pq)
        item['time'],item['timestamp'] = analyzer.get_time(total_pq)

        weibo_analyzer = weibocontent_analyzer()
        item['repost_nums'],item['comment_nums'],item['like_nums'] = weibo_analyzer.get_weibo_relative_args(total_pq)
        yield item     
コード例 #13
0
    def parse_load(self,response):
        user_info = userinfo.WeiboSpider()
        request_url = response.request.url
        p=re.compile('&pre_page=(\d).*&page=(\d)')  #用于判断是第一页的第一次加载
        match = p.search(request_url)
        if int(match.group(1)) == 0 and int(match.group(2)) == 1: #进行用户信息的获取
            db = OracleStore();conn = db.get_connection()
            sql = "select count(*) from t_user_info where userID='%s'" % self.uid
            cursor = db.select_operation(conn,sql);count = cursor.fetchone()
            if not count[0]:  #若没有爬取过该uid用户,则爬取用户基本信息
                analyzer = Analyzer()
                total_pq =  analyzer.get_html(response.body,'script:contains("PCD_person_info")')
                user_property = analyzer.get_userproperty(total_pq)
                if user_property == 'icon_verify_co_v': #该账号为公众账号
                    public_userinfo_url = analyzer.get_public_userinfohref(total_pq)
                    #yield Request(url=public_userinfo_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid'],'user_property':user_property},callback=self.parse_public_userinfo) 暂时不处理公众账号,需要数据库设置外键
                else:
                    userinfo_url = analyzer.get_userinfohref(total_pq)
                    yield Request(url=userinfo_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid'],'user_property':user_property},callback=self.parse_userinfo)
            db.close_connection(conn,cursor)

        item = WeibospiderItem()  #获取用户微博信息及@用户信息
        analyzer = Analyzer()
        friendcircle = FriendCircle()
        total_pq =  analyzer.get_mainhtml(response.body)
        item['uid'] = response.meta['uid']
        item['content'] = analyzer.get_content(total_pq)
        item['time'],item['timestamp'] = analyzer.get_time(total_pq)
        atuser_info,item['repost_user'] = analyzer.get_atuser_repostuser(total_pq)
        atuser_list = friendcircle.atuser_parser(atuser_info)
        item['atuser_nickname_list'] = atuser_list
        yield item
      
        for atuser_inlist in atuser_list:
            if atuser_inlist != []:
                for atuser in atuser_inlist:
                    uid_url = "http://s.weibo.com/user/"+quote(quote(str(atuser)))+"&Refer=SUer_box"
                    yield Request(url=uid_url,meta={'cookiejar':response.meta['cookiejar'],'uid':self.uid,'atuser_nickname':atuser},callback=self.parse_atuser_uid)
            else:
                continue