Esempio n. 1
0
class GetRooms():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()
        self.sec_user_id_list = []
        self.room_id_list = []

    def get_channel(self):
        try:
            channel_raw_data = self.get_raw_data.get_channel()
        except Exception as e:
            logger.error('get_channel出错-' + e.args[0])
            return None

        try:
            self.parse_channel(channel_raw_data)
        except Exception as e:
            logger.error('parse_channel出错-' + e.args[0])
            return None

        #logger.info(json.dumps([i[-10:-1] for i in self.sec_user_id_list]))
        for each in self.room_id_list:
            self.redis_client.add_rooms(each)
        for each in self.sec_user_id_list:
            self.redis_client.add_users(each, 1)

    def parse_channel(self, channel_raw_data):
        for each in channel_raw_data.get('data'):
            room_id = each.get('data').get('id_str')
            sec_user_id = each.get('data').get('owner').get('sec_uid')
            follower = each.get('data').get('owner').get('follow_info').get(
                'follower_count')

            if follower >= 10000:
                try:
                    item_list = self.get_raw_data.get_item_list(
                        sec_user_id, room_id)
                except Exception as e:
                    logger.error('get_item_list出错-' + e.args[0])
                    return None

                if len(item_list.get('promotions')) != 0:
                    self.room_id_list.append(room_id)
                    self.sec_user_id_list.append(sec_user_id)

    def run(self):
        tasks = [gevent.spawn(self.get_channel) for i in range(1)]
        gevent.joinall(tasks)
        logger.info('本批次共获得room_id和sec_user_id-' +
                    str(len(self.sec_user_id_list)) + '-' +
                    str(len(self.room_id_list)))
        self.sec_user_id_list.clear()
        self.room_id_list.clear()
Esempio n. 2
0
class GetCurrentRoom():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()
        self.room_id_list = []

    def get_users(self):
        users = self.redis_client.get_users()
        return users

    def save_rooms(self):
        for each in self.room_id_list:
            self.redis_client.add_rooms(each, 0)

    def get_current_room(self, sec_user_id):
        try:
            raw_data = self.get_raw_data.get_current_room(sec_user_id)
        except Exception as e:
            logger.error('get_current_room出错-' + e.args[0] + '-sec_user_id-' +
                         sec_user_id)
            return None

        try:
            check = raw_data.get('data').get('pay_grade').get('grade_describe')
        except Exception as e:
            logger.error('parse_current_room出错' + e.args[0] + '-sec_user_id-' +
                         sec_user_id)
            return None

        own_room = raw_data.get('data').get('own_room')
        if own_room:  #如果有这个,说明直播以及开始了
            room_id = own_room.get('room_ids_str')[0]
            self.room_id_list.append(room_id)
            logger.info(sec_user_id + '-正在直播,room_id-' + room_id)
        else:
            logger.info(sec_user_id + '-未在直播')

    def run(self):
        users = self.get_users()
        logger.info('共有users-' + str(len(users)))
        batch_size = 20
        for batch_limit in range(0, len(users), batch_size):
            start = batch_limit
            stop = min(batch_limit + batch_size, len(users))
            logger.info('当前爬取用户序号-' + str(start + 1) + '-' + str(stop))
            tasks = [
                gevent.spawn(self.get_current_room, sec_user_id)
                for sec_user_id in users[start:stop]
            ]
            gevent.joinall(tasks)
Esempio n. 3
0
class GetUserDongtai():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()
        self.room_id_list = []

    def get_users(self):
        users = self.redis_client.get_users()
        return users

    def save_rooms(self):
        for each in self.room_id_list:
            self.redis_client.add_rooms(each, 0)

    def get_user_dongtai(self, sec_user_id):
        try:
            raw_data = self.get_raw_data.get_user_dongtai(sec_user_id)
        except Exception as e:
            logger.error('get_user_dongtai出错-' + e.args[0] + '-sec_user_id-' +
                         sec_user_id)
            return None

        try:
            self.parse_user_dongtai(raw_data)
        except Exception as e:
            logger.error('parse_user_dongtai出错-' + e.args[0] +
                         '-sec_user_id-' + sec_user_id)

    def parse_user_dongtai(self, raw_data):
        data = raw_data.get('dongtai_list')[0]
        room_id = data.get('aweme').get('author').get('room_id')
        if room_id != 0:
            self.room_id_list.append(str(room_id))
            logger.info('该主播已开始直播,room_id-' + str(room_id))
        else:
            logger.info('该主播尚未开始直播')

    def run(self):
        users = self.get_users()
        logger.info('共有users-' + str(len(users)))
        batch_size = 20  #20个也获取不到数据
        for batch_limit in range(0, len(users), batch_size):
            start = batch_limit
            stop = min(batch_limit + batch_size, len(users))
            logger.info('当前爬取用户序号-' + str(start + 1) + '-' + str(stop))
            tasks = [
                gevent.spawn(self.get_user_dongtai, sec_user_id)
                for sec_user_id in users[start:stop]
            ]
            gevent.joinall(tasks)
Esempio n. 4
0
class CheckQualificationByRankList():
	def __init__(self):
		self.get_raw_data = GetRawData()
		self.redis_client = RedisClient()
		self.live_user_list = []
		self.room_id_list = []
		self.stupid_key_words = STUPID_KEY_WORDS

	def get_users(self):
		users = self.redis_client.get_users() #每次获取分数最低的10000个
		return users

	def is_live_user(self, sec_user_id):
		return self.redis_client.is_live_user(sec_user_id)

	def save_rooms(self):
		for each in self.room_id_list:
			self.redis_client.add_rooms(each)

	def add_to_live_users(self):
		for each in self.live_user_list:
			self.redis_client.add_live_users(each, 1)

	def increase_user_score(self, sec_user_id):
		self.redis_client.increase_user_score(sec_user_id)

	def is_qualified_user(self, user):
		nickname = user.get('nickname')
		for word in self.stupid_key_words:
			if word in nickname:
				return False
		return True

	def get_rank_list(self, sec_user_id):

		if not self.is_live_user(sec_user_id):
			try:
				raw_data = self.get_raw_data.get_rank_list(sec_user_id)
			except Exception as e:
				logger.error('get_rank_list出错-' + e.args[0] + '-sec_user_id-' + sec_user_id)
				return None

			try:
				user = raw_data.get('data').get('anchor_info').get('user')
			except Exception as e:
				logger.error('parse_current_room出错' + e.args[0] + '-sec_user_id-' + sec_user_id)
				return None

			if self.is_qualified_user(user):
				own_room = user.get('own_room')
				if own_room: #如果有这个,说明直播开始了
					room_id = own_room.get('room_ids_str')[0]
					self.live_user_list.append(sec_user_id)
					self.room_id_list.append(room_id)
				self.increase_user_score(sec_user_id)
			else:
				self.redis_client.delete_users(sec_user_id)
				logger.info('删除user-sec_user_id-' + sec_user_id)
		else:
			self.increase_user_score(sec_user_id)
		"""
		try:
			raw_data = self.get_raw_data.get_rank_list(sec_user_id)
		except Exception as e:
			logger.error('get_rank_list出错-' + e.args[0] + '-sec_user_id-' + sec_user_id)
			return None

		try:
			nickname = raw_data.get('data').get('anchor_info').get('user').get('nickname')
			print(nickname)
		except Exception as e:
			logger.error('parse_current_room出错' + e.args[0] + '-sec_user_id-' + sec_user_id)
			return None
		"""

	def run(self):
		users = self.get_users()

		batch_size = 50 #这个接口80个一批可行的(总共近4000个),更多的我不敢再试了
		for batch_limit in range(0, len(users), batch_size):
			start = batch_limit
			stop = min(batch_limit+batch_size, len(users))
			logger.info('当前获取用户序号-' + str(start+1) + '-' + str(stop))
			tasks = [gevent.spawn(self.get_rank_list, sec_user_id) for sec_user_id in users[start:stop]]
			gevent.joinall(tasks)

			self.save_rooms()
			self.add_to_live_users()
			logger.info('新增room_id-' + str(len(self.room_id_list)))
			self.room_id_list.clear()
			self.live_user_list.clear()
Esempio n. 5
0
class CheckRooms():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()
        self.lives_on_list = []

    def get_rooms(self):
        return self.redis_client.get_rooms(0, 0)

    def change_room_status(self):
        for each in self.lives_on_list:
            self.redis_client.add_rooms(each, 1)

    def check_room(self, room_id):
        try:
            room_raw_data = self.get_raw_data.get_live(room_id)
        except Exception as e:
            logger.error('get_live出错-' + e.args[0] + '-room_id-' + room_id)
            return None
        try:
            owner = room_raw_data.get('data').get('owner')
            follower_count = owner.get('follow_info').get('follower_count')
            sec_user_id = owner.get('sec_uid')
        except Exception as e:
            logger.error('解析room_raw_data出错-' + e.args[0] + '-room_id-' +
                         room_id)
            return None

        if follower_count < 10000:
            self.redis_client.delete_users(sec_user_id)
            self.redis_client.delete_rooms(room_id)
        else:
            status = room_raw_data.get('data').get('status')
            if status == 2:
                try:  #判断该场直播是否带货
                    item_list_raw_data = self.get_raw_data.get_item_list(
                        sec_user_id, room_id)
                except Exception as e:
                    logger.error('get_item_list出错' + e.args[0] +
                                 '-sec_user_id和room_id-' + sec_user_id + '-' +
                                 room_id)
                    return None
                if len(item_list_raw_data.get('promotions')) != 0:
                    self.lives_on_list.append(room_id)

    def run(self):
        all_room_ids = self.get_rooms()
        logger.info('此前未在直播的直播间数量:' + str(len(all_room_ids)))
        batch_size = 200
        for batch_limit in range(0, len(all_room_ids), batch_size):
            start = batch_limit
            stop = min(batch_limit + batch_size, len(all_room_ids))
            logger.info('待查看的此前未在直播的直播间-' + str(start + 1) + '-' + str(stop))
            tasks = [
                gevent.spawn(self.check_room, room_id)
                for room_id in all_room_ids[start:stop]
            ]
            gevent.joinall(tasks)

            logger.info('新发现开始的直播数量-' + str(len(self.lives_on_list)))
            self.change_room_status()
            self.lives_on_list.clear()
Esempio n. 6
0
class GetRankList():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()
        self.user_list = []
        self.room_id_list = []

    def get_users(self):
        users = self.redis_client.get_live_users(0, 0)
        return users

    def save_rooms(self):
        for each in self.room_id_list:
            self.redis_client.add_rooms(each)

    def change_user_status(self):
        for each in self.user_list:
            self.redis_client.add_live_users(each, 1)

    def get_rank_list(self, sec_user_id):
        try:
            raw_data = self.get_raw_data.get_rank_list(sec_user_id)
        except Exception as e:
            logger.error('get_rank_list出错-' + e.args[0] + '-sec_user_id-' +
                         sec_user_id)
            return None

        try:
            own_room = raw_data.get('data').get('anchor_info').get('user').get(
                'own_room')
        except Exception as e:
            logger.error('parse_current_room出错' + e.args[0] + '-sec_user_id-' +
                         sec_user_id)
            return None

        if own_room:  #如果有这个,说明直播开始了
            room_id = own_room.get('room_ids_str')[0]
            self.user_list.append(sec_user_id)
            self.room_id_list.append(room_id)
            #logger.info(sec_user_id + '-正在直播,room_id-' + room_id)
        #else:
        #logger.info(sec_user_id + '-未在直播')

    def run(self):
        users = self.get_users()
        logger.info('共有未在直播的users-' + str(len(users)))

        batch_size = 50  #这个接口80个一批可行的(总共近4000个),更多的我不敢再试了
        for batch_limit in range(0, len(users), batch_size):
            start = batch_limit
            stop = min(batch_limit + batch_size, len(users))
            logger.info('当前获取用户序号-' + str(start + 1) + '-' + str(stop))
            tasks = [
                gevent.spawn(self.get_rank_list, sec_user_id)
                for sec_user_id in users[start:stop]
            ]
            gevent.joinall(tasks)

            self.save_rooms()
            self.change_user_status()
            logger.info('新增room_id-' + str(len(self.room_id_list)))
            self.room_id_list.clear()
            self.user_list.clear()