예제 #1
0
class GetSecUserIds():
	def __init__(self):
		self.get_raw_data = GetRawData()
		self.redis_client = RedisClient()

	def get_aweme_lists(self):
		offset = 20
		aweme_lists = []
		awemes = self.redis_client.get_feigua_awemes()
		for i in range(0, len(awemes), offset):
			aweme_list = awemes[i:i+offset]
			aweme_lists.append(aweme_list)
		return aweme_lists

	def get_clips(self, aweme_list):
		sec_user_id_list = []
		aweme_id_list = []
		aweme_int_list =  [int(aweme) for aweme in aweme_list]
		try:
			raw_data = self.get_raw_data.get_clips(aweme_int_list)
		except Exception as e:
			logger.error('get_clips出错-' + e.args[0])
			return None

		if raw_data.get('status_code') == 2053:
			logger.info('这组没有视频')
		else:
			data = raw_data.get('aweme_details')
			for each in data:
				aweme_id = each.get('aweme_id')
				sec_user_id = each.get('author').get('sec_uid')
				if sec_user_id:
					aweme_id_list.append(aweme_id)
					sec_user_id_list.append(sec_user_id)
			for each in aweme_id_list:
				self.redis_client.delete_feigua_awemes(each)
			for each in sec_user_id_list:
				self.redis_client.add_pre_users(each, -1)

	def run(self):
		aweme_lists = self.get_aweme_lists()
		logger.info('共有feigua_aweme组数:' + str(len(aweme_lists)))
		batch_size = 1
		for batch_limit in range(0, len(aweme_lists), batch_size):
			start = batch_limit
			stop = min(batch_limit+batch_size, len(aweme_lists))
			logger.info('get_clips爬取当前feigua_aweme组序号-' + str(start+1) + '-' + str(stop))
			tasks = [gevent.spawn(self.get_clips, aweme_list) for aweme_list in aweme_lists[start:stop]]
			gevent.joinall(tasks)
예제 #2
0
class CheckQualificationByPromotion():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()

    def get_users(self):
        users = self.redis_client.get_pre_users(-1, -1)
        return users

    def check_commercial(self, sec_user_id):
        try:
            raw_data = self.get_raw_data.get_promotions(sec_user_id)
        except Exception as e:
            logger.error('get_promotions出错-' + e.args[0] + '-sec_user_id-' +
                         sec_user_id)
            return None

        try:
            raw_data.get('columns')[0].get('name')  #表示确实获取到了页面
            data = raw_data.get('promotions')
            if len(data) == 0:  #确实获取到了页面,promotion仍没有,那就真的不带货了
                logger.info('该用户不带货,将删除,sec_user_id-' + sec_user_id)
                self.redis_client.delete_pre_users(sec_user_id)
            else:  #prrmotion是有的,说明带货,那就状态改为0
                self.redis_client.add_pre_users(sec_user_id, 0)
        except Exception as e:
            logger.error('解析promotions页面失败-sec_user_id-' + sec_user_id + '-' +
                         e.args[0])

    def run(self):
        users = self.get_users()
        logger.info('共有待确认是否带货用户数量:' + str(len(users)))
        batch_size = 50  #尽管异步,还是很慢,200个就很慢很慢了,慢到跟同步一样,这可能是抖音某个神奇的特点吧
        for batch_limit in range(0, len(users), batch_size):
            start = batch_limit
            stop = min(batch_limit + batch_size, len(users))
            logger.info('check_commercial爬取当前用户序号-' + str(start + 1) + '-' +
                        str(stop))
            tasks = [
                gevent.spawn(self.check_commercial, sec_user_id)
                for sec_user_id in users[start:stop]
            ]
            gevent.joinall(tasks)
예제 #3
0
class GetPreUsers():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()
        self.batch_size = 10
        self.pre_user_list = []
        self.stupid_key_words = STUPID_KEY_WORDS

    def count_following(self):
        return self.redis_client.count_following()

    def count_pre_users(self):
        return self.redis_client.count_pre_users()

    def get_following(self):
        batch = []
        for i in range(self.batch_size):
            batch.append(self.redis_client.get_following())
        return batch

    def add_following_and_pre_users(self):
        for pre_user in self.pre_user_list:
            if self.count_following() < 100000:
                self.redis_client.add_following(pre_user)
            if self.count_pre_users() < 500000:
                self.redis_client.add_pre_users(pre_user)

    def is_qualified_user(self, user):
        if user.get('is_gov_media_vip'):
            return False
        if user.get('enterprise_verify_reason'):
            return False
        if user.get('custom_verify'):
            if not '自媒体' in user.get('custom_verify'):
                if not '主播' in user.get('custom_verify'):
                    if not '视频' in user.get('custom_verify'):
                        return False
        nickname = user.get('nickname')
        for word in self.stupid_key_words:
            if word in nickname:
                return False
        return True

    def get_pre_users(self, sec_user_id):
        try:
            raw_data = self.get_raw_data.get_following(sec_user_id)
        except Exception as e:
            logger.error('get_pre_user出错-' + e.args[0] + '-sec_user_id-' +
                         sec_user_id)
            return None

        if not raw_data.get('status_code') == 2096:
            following_list = raw_data.get('followings')
            if len(following_list) == 0:
                logger.error('获取不到数据了,程序退出')
                #sys.exit()
            for user in following_list:
                if self.is_qualified_user(user):
                    self.pre_user_list.append(user.get('sec_uid'))
        else:
            logger.info('关注不可见-sec_user_id-' + sec_user_id)

    def run(self):
        if self.count_following() < 100000 or self.count_pre_users() < 500000:
            batch = self.get_following()
            tasks = [
                gevent.spawn(self.get_pre_users, sec_user_id)
                for sec_user_id in batch
            ]
            gevent.joinall(tasks)
            logger.info('获取到pre_user-' + str(len(self.pre_user_list)))
            self.add_following_and_pre_users()
            self.pre_user_list.clear()
        else:
            logger.info('已经有太多following或pre_users了')
예제 #4
0
class GetPreUsers():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()
        self.batch_size = 10
        self.pre_user_list = []
        self.stupid_key_words = [
            '公司', '店', '铺', '厂', '行', '鞋', '装', '市', '服', '饰', '商', '贸', '牌',
            '汇', '馆', '裤', '业', '专', '卖'
        ]

    def get_following(self):
        batch = []
        for i in range(self.batch_size):
            batch.append(self.redis_client.get_following())
        return batch

    def add_following_and_pre_users(self):
        for pre_user in self.pre_user_list:
            self.redis_client.add_following(pre_user)
            self.redis_client.add_pre_users(pre_user, -1)

    def is_qualified_user(self, user):
        if user.get('is_gov_media_vip'):
            return False
        if user.get('enterprise_verify_reason') != '':
            return False
        if user.get('custom_verify') != '':
            return False
        nickname = user.get('nickname')
        for word in self.stupid_key_words:
            if word in nickname:
                return False
        return True

    def get_pre_users(self, sec_user_id):
        try:
            raw_data = self.get_raw_data.get_following(sec_user_id)
        except Exception as e:
            logger.error('get_pre_user出错-' + e.args[0] + '-sec_user_id-' +
                         sec_user_id)
            return None

        if not raw_data.get('status_code') == 2096:
            following_list = raw_data.get('followings')
            for user in following_list:
                if self.is_qualified_user(user):
                    self.pre_user_list.append(user.get('sec_uid'))
        else:
            logger.info('关注不可见-sec_user_id-' + sec_user_id)

    def run(self):
        batch = self.get_following()
        tasks = [
            gevent.spawn(self.get_pre_users, sec_user_id)
            for sec_user_id in batch
        ]
        gevent.joinall(tasks)
        logger.info('获取到pre_user-' + str(len(self.pre_user_list)))
        self.add_following_and_pre_users()
        self.pre_user_list.clear()