def parse_follows(self, response): result = json.loads(response.text) if result.get('ok') and result.get('data').get('cards') and len(result.get('data').get('cards'))\ and result.get('data').get('cards')[-1].get('card_group'): #解析用户 follows = result.get('data').get('cards')[-1].get('card_group') for follow in follows: if follow.get('user'): uid = follow.get('user').get('id') yield Request(self.user_url.format(uid=uid), callback=self.parse_user) #关注列表 uid = response.meta.get('uid') user_relation_item = UserRelationItem() follows = [{ 'id': follow.get('user').get('id'), 'name': follow.get('user').get('screen_name') } for follow in follows] user_relation_item['id'] = uid user_relation_item['follows'] = follows user_relation_item['fans'] = [] yield user_relation_item #下一页关注 page = response.meta.get('page') + 1 yield Request(self.follow_url.format(uid=uid, page=page), callback=self.parse_follows, meta={ 'page': page, 'uid': uid })
def parse_fans(self, response): result = json.loads(response.text) if result.get('data').get('cards'): fans = result.get('data').get('cards')[-1].get('card_group') for fan in fans: if fan.get('user'): uid = fan.get('user').get('id') yield scrapy.Request( url=self.user_url.format(uid=uid), callback=self.parse_user, ) fanser = [{ 'id': fan.get('user').get('id'), 'name': fan.get('user').get('screen_name') } for fan in fans] item = UserRelationItem() uid = response.meta.get('uid') item['id'] = uid item['fans'] = fanser item['follows'] = [] yield item #下一页粉丝 since_id = response.meta.get('since_id') + 1 yield scrapy.Request(url=self.fans_url.format(uid=uid, since_id=since_id), callback=self.parse_fans, meta={ 'since_id': since_id, 'uid': uid })
def parse_fans(self, response): """ 解析用户粉丝 :param response: Response对象 """ result = json.loads(response.text) if result.get('ok') and result.get('data').get('cards') and len(result.get('data').get('cards')) and \ result.get('data').get('cards')[-1].get('card_group'): # 解析用户 fans = result.get('data').get('cards')[-1].get('card_group') for fan in fans: if fan.get('user'): uid = fan.get('user').get('id') yield Request(self.user_url.format(uid=uid), callback=self.parse_user) uid = response.meta.get('uid') # 粉丝列表 user_relation_item = UserRelationItem() fans = [{ 'id': fan.get('user').get('id'), 'name': fan.get('user').get('screen_name') } for fan in fans] user_relation_item['id'] = uid user_relation_item['fans'] = fans user_relation_item['follows'] = [] yield user_relation_item # 下一页粉丝 page = response.meta.get('page') + 1 yield Request(self.fan_url.format(uid=uid, page=page), callback=self.parse_fans, meta={ 'page': page, 'uid': uid })
def parse_follow(self, response): result = json.loads(response.text) if result.get('data').get('cards'): # cards = result.get('data').get('cards') #解析出用户信息 follows = result.get('data').get('cards')[-1].get('card_group') for follow in follows: if follow.get('user'): uid = follow.get('user').get('id') yield scrapy.Request(self.user_url.format(uid=uid), callback=self.parse_user) item = UserRelationItem() #解析关注用户信息 uid = response.meta.get('uid') followers = [{ 'id': follow.get('user').get('id'), 'name': follow.get('user').get('screen_name'), } for follow in follows] item['id'] = uid item['follows'] = followers item['fans'] = [] yield item #下一页关注列表 page = response.meta.get('page') + 1 yield scrapy.Request(url=self.follower_url.format(uid=uid, page=page), callback=self.parse_follow, meta={ 'page': page, 'uid': uid })