コード例 #1
0
ファイル: favorite.py プロジェクト: bingwin/pythonThreeSpider
def favorite(user_id, user_name=None):
    print("正在拉取", user_name if user_name else user_id, '喜欢的抖音视频...')
    # 组装数据
    params = getParams(user_id)
    favorite_video_list = []
    max_cursor = None
    while True:
        if max_cursor:
            params['max_cursor'] = str(max_cursor)
        # 请求数据
        result = fetch(URL.favorite_url(),
                       headers=common_headers,
                       params=params,
                       verify=False)
        #修改全局变量的值
        aweme_list = result.get('aweme_list')
        if aweme_list != None and len(aweme_list) != 0:
            favorite_video_list.extend(aweme_list)
        if result.get('has_more') != 1:
            break
        else:
            max_cursor = result.get('max_cursor')
    print(user_name if user_name else user_id, '喜欢的抖音视频拉取完成')
    videos = []
    for item in favorite_video_list:
        video = data_to_video(item)
        videos.append(video)
    return videos
コード例 #2
0
def billboard():
    result = fetch(URL.billboard(), headers=common_headers, verify=False)
    aweme_list = result.get('aweme_list')
    videos = []
    for item in aweme_list:
        # 视频列表有4个视频地址,前两个web可以打开,后面两个只能用手机或者模拟器打开
        video = data_to_video(item.get('aweme_info', {}))
        videos.append(video)
    return videos
コード例 #3
0
def hotSearchWord():
    result = fetch(URL.hot_search_word(), headers=common_headers, verify=False)
    word_list = result.get('data').get('word_list')
    words = []
    for item in word_list:
        # 热门词
        word = data_to_word(item)
        words.append(word)
    return words
コード例 #4
0
def get_music_urls(mc_id):
    musics = []
    result = fetch(URL.music_collection_list(mc_id),
                   headers=common_headers,
                   verify=False)
    music_list = result.get('music_list', [])
    for item in music_list:
        music = data_collection_to_music(item)
        musics.append(music)
    return musics
コード例 #5
0
def music_collection():
    result = fetch(URL.music_collection_url(),
                   headers=common_headers,
                   verify=False)
    mc_list = result.get('mc_list', [])
    collection_list = []
    for mc in mc_list:
        mc_id = mc['id']
        mc_name = mc['mc_name']
        print('正在拉取', mc_name, '...')
        mc_musics = get_music_urls(mc_id)
        musicCollection = MusicCollection(
            mc_id=mc_id, mc_name=mc_name, mc_musics=mc_musics) if id else None
        collection_list.append(musicCollection)
        print(mc_name, '拉取完毕')
    return collection_list
コード例 #6
0
ファイル: category.py プロジェクト: bingwin/pythonThreeSpider
def category(maxCursor=50):
    """
    get trend result
    :return:
    """
    offset = 0
    # 单个例子测试
    # query['cursor'] = str(offset)
    # result = fetch(URL.category_list(), headers=common_headers, params=query, verify=False)
    # category_list = result.get('category_list')
    # datetime = parse_datetime(result.get('extra', {}).get('now'))
    # final = []
    # for item in category_list:
    #     # process per category
    #     if item.get('desc') == '热门话题':
    #         print('===热门话题')
    #         final.append(data_to_topic(item))
    #     if item.get('desc') == '热门音乐':
    #         print('===热门音乐')
    #         final.append(data_to_music(item.get('music_info', {})))
    # return final

    # 获取所有的
    while True:
        query['cursor'] = str(offset)
        result = fetch(URL.category_list(),
                       headers=common_headers,
                       params=query,
                       verify=False)
        category_list = result.get('category_list')
        datetime = parse_datetime(result.get('extra', {}).get('now'))
        final = []
        for item in category_list:
            # process per category
            if item.get('desc') == '热门话题':
                final.append(data_to_topic(item))
            if item.get('desc') == '热门音乐':
                final.append(data_to_music(item.get('music_info', {})))
        if result.get('has_more') != 1:
            break
        else:
            if offset > maxCursor:  # 不想拉取很多可以设置这个参数
                break
            offset = result.get('cursor')
    return final
コード例 #7
0
ファイル: post.py プロジェクト: bingwin/pythonThreeSpider
def get_aweme_list(queryParams):
    user_video_list = []
    max_cursor = 0
    while True:
        if max_cursor:
            queryParams['max_cursor'] = str(max_cursor)
        result = fetch(URL.person_post_url(),
                       headers=person_post_headers,
                       params=queryParams,
                       verify=False)
        aweme_list = result.get('aweme_list', [])
        if aweme_list != None and len(aweme_list) != 0:
            user_video_list.extend(aweme_list)
        if result.get('has_more') != 1:
            break
        else:
            max_cursor = result.get('max_cursor')
    return user_video_list
コード例 #8
0
ファイル: topic.py プロジェクト: bingwin/pythonThreeSpider
    def videos(self, max=None):
        """
        get videos of topic
        :return:
        """

        from douyinspider.utils.tranform import data_to_video
        if max and not isinstance(max, int):
            raise RuntimeError('`max` param must be int')
        import subprocess
        query = {'ch_id': self.id, 'count': '9', 'cursor': '0', 'aid': '1128'}
        offset, count = 0, 0
        videos = []
        sign_code = "" + query['ch_id'] + query['count'] + str(offset)
        signature = subprocess.getoutput('node signature.js %s' % sign_code)

        while True:
            # define cursor, 这里有个问题就是生成了正确的签名也不一定可以拿到数据,以此还要做循环请求
            if query['cursor'] != str(offset) and offset != 0:
                signature = subprocess.getoutput('node signature.js %s' %
                                                 sign_code)
            query['cursor'] = str(offset)
            query['_signature'] = signature
            result = fetch(URL.category_list_videos(),
                           params=query,
                           headers=common_headers,
                           verify=False)
            aweme_list = result.get('aweme_list', [])
            if aweme_list != None and len(aweme_list) >= 1:
                for item in aweme_list:
                    video = data_to_video(item)
                    count += 1
                    videos.append(video)
                if count >= max:
                    break
                # next page
                if result.get('has_more') != 1:
                    break
                else:
                    offset = result.get('cursor')
        return videos
コード例 #9
0
ファイル: person.py プロジェクト: bingwin/pythonThreeSpider
def person():
    print("正在拉取抖音用户列表信息...")
    reponse = requests.get(URL.person_list_url(),
                           headers=common_headers,
                           verify=False)
    print("抖音用户列表信息拉取完毕")
    soup = BeautifulSoup(reponse.text, 'html.parser')
    # 获取昵称
    persons = soup.find_all('div', class_="card-body")
    person_path = os.getcwd() + "/doc/抖音用户.txt"
    if os.path.exists(person_path):
        with open(person_path, 'r') as f:
            person_list = json.loads(f.read())
        print(person_list)
    else:
        person_list = []
        for person in persons:
            person_id = person.find('a').get('href')
            person_id = person_id[4:]
            person_name = person.find('a').string
            person_list.append({"id": person_id, "name": person_name})
        print(person_list)
        save_txt(person_path, person_list)