Esempio n. 1
0
def get_userfollowing_list_repeat(followings):
    """
    根据用户关注列表循环执行,需要剔除已经执行过的
    :param followings:关注列表
    :return:
    """
    global complete
    # logger.info(complete)
    try:
        totals = followings[0]
        # logger.info(totals)
        for i in range(1, totals + 1):
            try:
                flag = 0
                for j in range(len(complete)):
                    # logger.info(complete[j])
                    # logger.info(followings[i][0])
                    if followings[i][0] == complete[j]:
                        flag = 1
                        break
                # logger.info('flag = {}'.format(flag))
                if 1 != flag:
                    # logger.info(followings[i])
                    run(followings[i][0], followings[i][1])
            except:
                # logger.info('该用户关注列表数多于50, user_id = {}'.format(user_id))
                break
    except ConnectionError as e:
        logger.error('网络连接异常,e = {}', format(e))
Esempio n. 2
0
def get_userfollowing_list(user_id, user_name):
    """
    获取用户关注列表
    :param user_id:
    :param user_name:
    :return:
    """
    global complete
    try:
        url = 'https://api.bilibili.com/x/relation/followings?vmid=' + str(
            user_id)
        response = requests.get(url, headers=headers, timeout=6)
        if response.status_code == 200:
            # logger.info('获取用户关注列表成功, user_id = {}'.format(user_id))
            content = response.json()
            if content.get('data'):
                data = content['data']
                followings = []

                totals = data['total']
                followings.append(totals)

                # 由于系统限制 只能获取前50的关注
                if totals > 50:
                    # logger.info('该用户关注列表数多于50, user_id = {}'.format(user_id))
                    totals = 50
                for i in range(0, totals):
                    try:
                        following = (data['list'][i]['mid'],
                                     data['list'][i]['uname'])
                        followings.append(following)
                    except:
                        break
                # 保存用户关系表到mysql数据库上
                # logger.info(followings)
                save_userinfo_mysql(followings, user_id, user_name)

                complete.append(user_id)
                logger.info(complete)

                get_userfollowing_list_repeat(followings)
            else:
                logger.info('获取用户关注列表失败, use_id = {}, user_name = {}'.format(
                    user_id, user_name))
        else:
            logger.info('获取用户关注列表失败, use_id = {}, code = {}'.format(
                user_id, response.status_code))
    except ConnectionError as e:
        logger.error('网络连接异常,e = {}', format(e))
Esempio n. 3
0
def enter_space(user_id):
    """
    进入用户主页
    :param user_id:
    :return:
    """
    try:
        url = 'https://space.bilibili.com/' + str(user_id)
        response = requests.get(url, headers=headers, timeout=6)
        if response.status_code == 200:
            # logger.info('进入主页成功, user_id = {}'.format(user_id))
            get_basic_userinfo(user_id)
        else:
            logger.info('进入主页失败, use_id = {}, code = {}'.format(
                user_id, response.status_code))
    except ConnectionError as e:
        logger.error('网络连接异常,e = {}', format(e))
Esempio n. 4
0
def get_add_userview(user_id):
    """
    获取用户播放数archive_view和阅读数article_view
    :param user_id:
    :return: result
    """
    try:
        url = 'https://api.bilibili.com/x/space/upstat?mid=' + str(user_id)
        response = requests.get(url, headers=headers, timeout=6)
        if response.status_code == 200:
            content = response.json()
            if content.get('data'):
                data = content['data']
                result = (data['archive']['view'], data['article']['view'])
                return result
            else:
                logger.info('获取用户播放数和阅读数失败, use_id = {}'.format(user_id))
        else:
            logger.info('获取用户播放数和阅读数失败, use_id = {}, code = {}'.format(
                user_id, response.status_code))
    except ConnectionError as e:
        logger.error('网络连接异常,e = {}', format(e))
Esempio n. 5
0
def get_add_userfollow(user_id):
    """
    获取用户粉丝数follower和关注数following
    :param user_id:
    :return: result
    """
    try:
        url = 'https://api.bilibili.com/x/relation/stat?vmid=' + str(user_id)
        response = requests.get(url, headers=headers, timeout=6)
        if response.status_code == 200:
            content = response.json()
            if content.get('data'):
                data = content['data']
                result = (data['following'], data['follower'])
                return result
            else:
                logger.info('获取用户粉丝数和关注数失败, use_id = {}'.format(user_id))
        else:
            logger.info('获取用户粉丝数和关注数失败, use_id = {}, code = {}'.format(
                user_id, response.status_code))
    except ConnectionError as e:
        logger.error('网络连接异常,e = {}', format(e))
Esempio n. 6
0
def save_userinfo_mysql(followings, user_id, user_name):
    """
    存储用户关系信息到mysql数据库
    相关:数据库bilibili 数据表bilibili_userrelation
    :param followings:
    :param user_id:
    :param user_name:
    :return:
    """
    try:
        totals = followings[0]
        if totals > 50:
            totals = 50

        # 单向 A关注B
        sql_A2B = 'insert into bilibili_userrelation(user1_mid, user1_name, user2_mid, user2_name, status) ' \
                     'values(%s, %s, %s, %s, 0);'
        # 单向 B关注A
        sql_B2A = 'insert into bilibili_userrelation(user1_mid, user1_name, user2_mid, user2_name, status) ' \
                     'values(%s, %s, %s, %s, 1);'
        # 双向 AB互相关注
        sql_AB = 'update bilibili_userrelation set status=2 where user1_mid = %s AND user2_mid = %s;'
        # 检查记录是否存在
        sql_selectAB = 'select count(*) from bilibili_userrelation where user1_mid = %s AND user2_mid = %s;'
        sql_selectABstatus = 'select status from bilibili_userrelation where user1_mid = %s AND user2_mid = %s;'

        for row in followings[1:totals + 1]:
            # logger.info('mid = {}'.format(row[0]))
            if user_id < row[0]:
                cur.execute(sql_selectAB, (user_id, row[0]))
                count = cur.fetchall()[0][0]
                # logger.info('count = {}'.format(count))
                if 0 == count:
                    result = (user_id, user_name) + row
                    try:
                        cur.execute(sql_A2B, result)
                    except:
                        conn.rollback()
                        logger.info('用户关系信息保存到数据库中失败,A2BB,mid分别是{}和{}'.format(
                            user_id, row[0]))
                elif 1 == count:
                    try:
                        cur.execute(sql_selectABstatus, (user_id, row[0]))
                        status = cur.fetchone()[0]
                        # logger.info(status)
                        if 1 == status:
                            try:
                                cur.execute(sql_AB, (user_id, row[0]))
                            except:
                                conn.rollback()
                                logger.info(
                                    '用户关系信息保存到数据库中失败,A2B,mid分别是{}和{}'.format(
                                        user_id, row[0]))
                        else:
                            logger.info(
                                '用户关系信息在数据库中,A2B已经执行过,mid分别是{}和{}'.format(
                                    user_id, row[0]))
                    except:
                        conn.rollback()
                        logger.info('用户关系信息保存到数据库中失败,A2B,mid分别是{}和{}'.format(
                            user_id, row[0]))
                else:
                    logger.info('用户关系信息在数据库中重复有多条,A2B,mid分别是{}和{}'.format(
                        user_id, row[0]))
            elif user_id > row[0]:
                result = row + (user_id, user_name)
                cur.execute(sql_selectAB, (row[0], user_id))
                count = cur.fetchall()[0][0]
                if 0 == count:
                    try:
                        cur.execute(sql_B2A, result)
                    except:
                        conn.rollback()
                        logger.info('用户关系信息保存到数据库中失败,B2AA,mid分别是{}和{}'.format(
                            row[0], user_id))
                elif 1 == count:
                    try:
                        cur.execute(sql_selectABstatus, (row[0], user_id))
                        status = cur.fetchone()[0]
                    except:
                        logger.info('用户关系信息查询状态出错,B2A,mid分别是{}和{}'.format(
                            row[0], user_id))
                    if 0 == status:
                        try:
                            cur.execute(sql_AB, (row[0], user_id))
                        except:
                            conn.rollback()
                            logger.info(
                                '用户关系信息保存到数据库中失败,B2A,mid分别是{}和{}'.format(
                                    row[0], user_id))
                    else:
                        logger.info('用户关系信息在数据库中,B2A已经执行过,mid分别是{}和{}'.format(
                            row[0], user_id))
                else:
                    logger.info('用户关系信息在数据库中重复有多条,B2A,mid分别是{}和{}'.format(
                        row[0], user_id))
            else:
                logger.info('用户关系信息出错,自关,mid是{}', format(user_id))
    except ConnectionError as e:
        logger.error('网络连接异常,e = {}', format(e))
Esempio n. 7
0
def get_basic_userinfo(user_id):
    """
    获取基础用户个人信息
    :param user_id:
    :return:
    """
    global total

    url = 'https://space.bilibili.com/ajax/member/GetInfo'
    payload = {'mid': user_id}
    head = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36'
        '(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
        'Referer': 'https://space.bilibili.com/' + str(user_id)
    }
    try:
        response = requests.post(url, headers=head, data=payload, timeout=6)
        if response.status_code == 200:
            content = response.json()
            if content.get('data'):
                data = content['data']
                try:
                    regtime = time.strftime("%Y-%m-%d %H:%M:%S",
                                            time.localtime(data['regtime']))
                except:
                    logger.info(
                        '用户没有regtime这个标签, user_id = {}'.format(user_id))
                    regtime = 0
                    pass
                try:
                    birthday = data['birthday']
                except:
                    logger.info(
                        '用户没有birthday这个标签, user_id = {}'.format(user_id))
                    birthday = 0
                    regtime = 0
                    pass
                result = (
                    data['mid'],
                    data['name'],
                    data['sex'],
                    data['rank'],
                    data['face'],
                    regtime,
                    data['spacesta'],
                    # data['birthday'],
                    birthday,
                    data['sign'],
                    data['level_info']['current_level'],
                    data['official_verify']['desc'],
                    data['vip']['vipType'],
                    data['vip']['vipStatus'],
                    data['toutu'],
                    data['toutuId'],
                    data['theme'],
                    data['theme_preview'],
                    data['coins'],
                    data['im9_sign'],
                    data['fans_badge'])
                # logger.info('获取用户个人信息成功 use_id = {}'.format(user_id))
                result += get_add_userfollow(user_id)
                result += get_add_usercount(user_id)
                result += get_add_userview(user_id)
                # logger.info(result)
                # 将用户个人信息保存到mysql数据库中
                save_userinfo_mysql(result)
                total += 1
                if total % 100 == 0:
                    logger.info('目前共计爬取到{}条数据'.format(total))
            else:
                logger.info('获取用户个人信息失败, use_id = {}'.format(user_id))
        else:
            logger.info('获取用户个人信息失败, use_id = {}, code = {}'.format(
                user_id, response.status_code))
    except ConnectionError as e:
        logger.error('网络连接异常,e = {}', format(e))