예제 #1
0
    def test_get_user_from_db(self):
        from db_operation.user_dao import get_user
        # 数据库中存在的数据
        user = get_user('3858873234')
        self.assertEqual(user.get('name'), '景区宝')

        # 数据库中不存在的数据
        user2 = get_user('2674334272')
        self.assertEqual(isinstance(user2, dict), True)
예제 #2
0
파일: user.py 프로젝트: joyivan/WeiboSpider
def get_profile(user_id, session, headers):
    """
    默认为个人用户,如果为作家,则需要再做一次抓取,而为企业用户,它会重定向到企业主页,直接解析即可
    登陆后可以根据http://weibo.com/u/userId来进行确定用户主页,不知道稳定不,todo 测试这个路径
    好像'http://weibo.com/p/100505' + user_id + '/info?mod=pedit_more' 这个路径可以解决大部分路径问题,只是非普通用户
    会被重定向到主页,有的并不行,比如domain=100106
    """
    if user_id == '':
        return User()

    user = User()
    info = get_user(user_id)

    if info:
        user.id = user_id
        user.screen_name = info.get('name')
        user.province = info.get('province')
        user.city = info.get('city')
        user.location = info.get('location')
        user.description = info.get('description')
        user.headimg_url = info.get('headimg_url')
        user.blog_url = info.get('blog_url')
        user.domain_name = info.get('domain_name')
        user.gender = info.get('gender')
        user.followers_count = info.get('followers_count')
        user.friends_count = info.get('friends_count')
        user.status_count = info.get('status_count')
        user.birthday = info.get('birthday')
        user.verify_type = info.get('verify_type')
        user.verify_info = info.get('verify_info')
        user.register_time = info.get('register_time')

        # 防止在插入数据库的时候encode()出问题
        for key in user.__dict__:
            if user.__dict__[key] is None:
                setattr(user, key, '')

        storage.info('ID为{id}的用户信息已经存在于数据库中'.format(id=user_id))

    else:
        url = 'http://weibo.com/p/100505' + user_id + '/info?mod=pedit_more'
        html = get_page(url, session, headers)

        if not is_404(html):
            domain = get_publicinfo.get_userdomain(html)

            if domain == '100505' or domain == '103505' or domain == '100306':
                user = get_personalinfo.get_detail(html)
                if user is not None:
                    user.followers_count = get_personalinfo.get_fans(html)
                    user.friends_count = get_personalinfo.get_friends(html)
                    user.status_count = get_personalinfo.get_status(html)
                else:
                    user = User()
            else:
                # 为了尽可能少抓取url,所以这里不适配所有服务号
                if domain == '100106':
                    url = 'http://weibo.com/p/'+domain+user_id+'/home'
                    html = get_page(url, session, headers)
                    if html == '':
                        return user

                user.followers_count = get_enterpriseinfo.get_fans(html)
                user.friends_count = get_enterpriseinfo.get_friends(html)
                user.status_count = get_enterpriseinfo.get_status(html)
                user.description = get_enterpriseinfo.get_description(html).encode('gbk', 'ignore').decode('gbk')

            user.id = user_id
            user.screen_name = get_publicinfo.get_username(html)
            user.headimg_url = get_publicinfo.get_headimg(html)
            user.verify_type = get_publicinfo.get_verifytype(html)
            user.verify_info = get_publicinfo.get_verifyreason(html, user.verify_type)

            save_user(user)
            storage.info('已经成功保存ID为{id}的用户信息'.format(id=user_id))

    return user
예제 #3
0
    默认为个人用户,如果为作家,则需要再做一次抓取,而为企业用户,它会重定向到企业主页,直接解析即可
    登陆后可以根据http://weibo.com/u/userId来进行确定用户主页,不知道稳定不,todo 测试这个路径
    好像'http://weibo.com/p/100505' + user_id + '/info?mod=pedit_more' 这个路径可以解决大部分路径问题,只是非普通用户
    会被重定向到主页,有的并不行,比如domain=100106
    :param headers:
    :param session:
    :param user_id:
    :return:
    """
<<<<<<< Updated upstream
    if user_id == '':
        return User()
=======
>>>>>>> Stashed changes
    user = User()
    r = get_user(user_id)

    if r:
        user.id = user_id
        user.screen_name = r[0]
        user.province = r[1]
        user.city = r[2]
        user.location = '{province} {city}'.format(province=r[1], city=r[2])
        try:
            user.description = r[3].read()
        except AttributeError:
            user.description = ''
        user.headimg_url = r[4]
        user.blog_url = r[5]
        user.domain_name = r[6]
        user.gender = r[7]
예제 #4
0
def get_profile(user_id, session, headers):
    """
    默认为个人用户,如果为作家,则需要再做一次抓取,而为企业用户,它会重定向到企业主页,直接解析即可
    登陆后可以根据http://weibo.com/u/userId来进行确定用户主页,不知道稳定不,todo 测试这个路径
    好像'http://weibo.com/p/100505' + user_id + '/info?mod=pedit_more' 这个路径可以解决大部分路径问题,只是非普通用户
    会被重定向到主页,有的并不行,比如domain=100106
    :param headers:
    :param session:
    :param user_id:
    :return:
    """
    user = User()
    r = get_user(user_id)

    if r:
        user.id = user_id
        user.screen_name = r[0]
        user.province = r[1]
        user.city = r[2]
        user.location = '{province} {city}'.format(province=r[1], city=r[2])
        try:
            user.description = r[3].read()
        except AttributeError:
            user.description = ''
        user.headimg_url = r[4]
        user.blog_url = r[5]
        user.domain_name = r[6]
        user.gender = r[7]
        user.followers_count = r[8]
        user.friends_count = r[9]
        user.status_count = r[10]
        user.birthday = r[11]
        user.verify_type = r[12]
        user.verify_info = r[13]
        user.register_time = r[14]

        # 防止在插入数据库的时候encode()出问题
        for key in user.__dict__:
            if user.__dict__[key] is None:
                setattr(user, key, '')

        print('该用户信息已经存在于数据库中')
    else:
        url = 'http://weibo.com/p/100505' + user_id + '/info?mod=pedit_more'
        html = get_page(url, session, headers)

        if not is_404(html):
            domain = get_publicinfo.get_userdomain(html)

            if domain == '100505' or domain == '103505' or domain == '100306':
                user = get_personalinfo.get_detail(html)
                if user is not None:
                    user.followers_count = get_personalinfo.get_fans(html)
                    user.friends_count = get_personalinfo.get_friends(html)
                    user.status_count = get_personalinfo.get_status(html)
                else:
                    user = User()
            else:
                # 为了尽可能少抓取url,所以这里不适配所有服务号
                if domain == '100106':
                    url = 'http://weibo.com/p/' + domain + user_id + '/home'
                    html = get_page(url, session, headers)
                    if html == '':
                        return user

                user.followers_count = get_enterpriseinfo.get_fans(html)
                user.friends_count = get_enterpriseinfo.get_friends(html)
                user.status_count = get_enterpriseinfo.get_status(html)
                user.description = get_enterpriseinfo.get_description(
                    html).encode('gbk', 'ignore').decode('gbk')

            user.id = user_id
            user.screen_name = get_publicinfo.get_username(html)
            user.headimg_url = get_publicinfo.get_headimg(html)
            user.verify_type = get_publicinfo.get_verifytype(html)
            user.verify_info = get_publicinfo.get_verifyreason(
                html, user.verify_type)

            save_user(user)

    return user