Esempio n. 1
0
def get_user_from_page_comment():
    r = redis.Redis('127.0.0.1','6379')

    page = 0
    while True:
        page_id = r.spop('page')

        if not page_id:
            print('in get_user_from_page_comment')
            time.sleep(random.randint(5,10))
            continue

        while True:
            url = 'http://api.budejie.com/api/api_open.php?a=datalist&per=5&c=comment&hot=0&appname=www&client=www&device=pc&data_id={}&page={}'
            response = get_response(url.format(page_id.decode('utf-8'),page))
            result = json.loads(response.content)
            if not result:
                page = 1
                break
            page += 1
            for user in result['data']:
                user_id = user['user']['id']
                
                flag = r.sismember('used_user',user_id)
                if not flag:
                    r.sadd('used_user',user_id)
                    r.sadd('users',user_id)
Esempio n. 2
0
def get_comment_article():
    r = redis.Redis('127.0.0.1','6379')

    np = 0
    while True:
        user_id = r.spop('user_comment')
        
        if not user_id:
            time.sleep(random.randint(4,8))
            print('in get_comment_article')
            continue
        while True:
            url = 'http://d.api.budejie.com/comment/user-comment/{}/baisishequ-iphone-8.0/{}-20.json'
            response = get_response(url.format(user_id.decode('utf-8'),np))
            result = json.loads(response.content)

            np = result['info']['np']

            if not np:
                np = 0
                break
            np = int(np)

            for page in result['list']:
                page_id = page['topic']['id']
                #page_up = page['topic']['up']
                page_comment = page['topic']['comment']

                flag = r.sismember('used_page',page_id)
                if int(page_comment) >0 and not flag:
                    r.sadd('used_page',page_id)
                    r.sadd('page',page_id)
Esempio n. 3
0
def get_user_follows():
    r = redis.Redis('127.0.0.1', '6379')

    follow_id = 0
    while True:
        user_id = r.spop('user_follow')
        if not user_id:
            time.sleep(random.randint(4, 8))
            print('in get user follows')
            continue
        while True:
            url = 'http://api.budejie.com/api/api_open.php?a=follow_list&c=user&follow_id={}&userid={}'
            response = get_response(
                url.format(follow_id, user_id.decode('utf-8')))
            result = json.loads(response.content.decode('utf-8'))
            follow_id = result['data']['info']['follow_id']
            if follow_id == '0':
                break
            user_list = result['data']['list']
            for user in user_list:
                new_user_id = user['id']

                flag = r.sismember('used_user', new_user_id)
                if not flag:
                    r.sadd('used_user', new_user_id)
                    r.sadd('users', new_user_id)
Esempio n. 4
0
def get_share_article():
    r = redis.Redis('127.0.0.1', '6379')

    np = 0
    while True:
        user_id = r.spop('user_share')

        if not user_id:
            print('in get_share_article')
            continue

        while True:
            url = 'http://d.api.budejie.com/topic/share-topic/{}/baisishequ-iphone-8.0/{}-20.json'
            response = get_response(url.format(user_id.decode('utf-8'), np))
            result = json.loads(response.content)

            np = result['info']['np']
            if not np:
                np = 0
                break
            np = int(np)
            for page in result['list']:
                page_id = page['id']
                page_up = page['up']

                flag = r.sismember('used_page', page_id)
                if not flag:
                    r.sadd('used_page', page_id)
                    r.sadd('page', page_id)
Esempio n. 5
0
def get_user_articles(redis_set='user_article',
                      not_climb_depth=False,
                      check_date=False,
                      redis_increament_set=None):
    r = redis.Redis('127.0.0.1', '6379')
    mongodb_client = pymongo.MongoClient(host='127.0.0.1', port=27017)
    mongodb_db = mongodb_client.budejie
    mongodb_table = mongodb_db.budejie_article
    print(
        'redis_set is: {},not_climb_depth is: {},bool is: {},check_date is: {},bool is: {}'
        .format(redis_set, not_climb_depth, bool(not_climb_depth), check_date,
                bool(check_date)))
    while True:
        user_id = r.spop(redis_set)
        np = 0
        outdate_flag = False
        if not user_id:
            time.sleep(random.randint(4, 8))
            continue
        while True:
            url = 'http://d.api.budejie.com/topic/user-topic/{}/1/desc/baisishequ-win-1.0/{}-20.json'
            response = get_response(url.format(user_id.decode('utf-8'), np))
            try:
                result = json.loads(response.content)
            except Exception as e:
                print(response.content)
                input()

            np = result['info']['np']
            if not np:
                break
            for page in result['list']:
                value = {'user_id': user_id.decode('utf-8')}
                page_id = page['id']
                # 点赞数
                page_comment = page['comment']
                if int(page_comment) >= 0 and not r.sismember(
                        'used_page', page_id):
                    r.sadd('used_page', page_id)
                    r.sadd('page', page_id)
                up_count = page['up']
                article_text = page['text'].replace(' ', '').replace('\n', '')
                if int(up_count) <= 0 or '该内容已被删除' in article_text:
                    continue
                data = page['passtime'].split(' ')[0]
                if check_date and data < str(datetime.date.today() -
                                             datetime.timedelta(days=30)):
                    outdate_flag = True
                    continue
                if redis_increament_set:
                    r.sadd(redis_increament_set, user_id)
                print('up_count is: {}'.format(up_count))
                article_type = page['type']
                value.update({
                    'page': page_id,
                    'up_count': up_count,
                    'article_type': article_type,
                    'article_text': article_text
                })
                try:
                    hot_comment_list = [
                        comment['content'].replace(' ', '').replace('\n', '')
                        for comment in page['top_comments']
                        if comment['content'] != ''
                    ]
                    value.update({'hot_comment': hot_comment_list})
                except Exception as e:
                    pass
                if article_type == 'image':
                    url = page['image']['big'][0]
                    value.update({'url': url})
                elif article_type == 'gif':
                    url = page['gif']['images'][0]
                    value.update({'url': url})
                elif article_type == 'video':
                    url = page['video']['download'][0]
                    value.update({'url': url})
                value.update({'is_download': 'false'})
                try:
                    mongodb_table.insert_many([value])
                except Exception as e:
                    print(e)
                    continue

            if not_climb_depth or outdate_flag:
                break
Esempio n. 6
0
def get_user_detail():
    r = redis.Redis('127.0.0.1', '6379')
    conn = psycopg2.connect(database='budejie',
                            user='******',
                            password='******',
                            host='127.0.0.1',
                            port=5432)
    cur = conn.cursor()
    count = 0
    while True:
        user_id = r.spop('users')
        if not user_id:
            print('in get_user_information')
            time.sleep(random.randint(4, 8))
            continue
        url = 'http://api.budejie.com/api/api_open.php?a=profile&c=user&userid={}'
        count += 1
        try:
            response = get_response(url.format(user_id.decode('utf-8')))
            user_information = json.loads(
                response.content.decode('utf-8'))['data']
            user_article_num = user_information['tiezi_count']
            user_sex = user_information['sex']
            user_follow = user_information['follow_count']
            user_fans = user_information['fans_count']
            user_phone = user_information['phone']
            user_name = user_information['username'].strip()
            user_id = user_information['id']
            user_comment = user_information['comment_count']
            #user_share = user_information['share_count']

        except Exception as e:
            continue
        if 0 < int(user_fans):
            r.sadd('user_fans', user_id)

        if 0 < int(user_follow):
            r.sadd('user_follow', user_id)

        if 0 < int(user_article_num):
            r.sadd('user_article', user_id)

        # if 0 < int(user_share):
        #    r.sadd('user_share',user_id)

        if 0 < int(user_comment):
            r.sadd('user_comment', user_id)
        try:
            if user_phone != '':
                cur.execute(
                    "insert into budejie_user (user_id,user_sex,user_phone,user_name,user_fans,user_article,is_download) values (%s,%s,%s,%s,%s,%s,%s);",
                    (user_id, '女' if user_sex == 'f' else '男', user_phone,
                     user_name, user_fans, user_article_num, '0'))
                print(
                    'user_id is: {},user_name is: {},user_sex is: {},user_phone is: {},user_comment is: {}'
                    .format(user_id, user_name, user_sex, user_phone,
                            user_comment))
            else:
                continue
        except Exception as e:
            print('insert error,user_id is: {}'.format(user_id))
            continue

        if count % 1 == 0:
            conn.commit()

    cur.close()
    conn.close()