def favorite(user_id, user_name=None): print("正在拉取", user_name if user_name else user_id, '喜欢的抖音视频...') # 组装数据 params = getParams(user_id) favorite_video_list = [] max_cursor = None while True: if max_cursor: params['max_cursor'] = str(max_cursor) # 请求数据 result = fetch(URL.favorite_url(), headers=common_headers, params=params, verify=False) #修改全局变量的值 aweme_list = result.get('aweme_list') if aweme_list != None and len(aweme_list) != 0: favorite_video_list.extend(aweme_list) if result.get('has_more') != 1: break else: max_cursor = result.get('max_cursor') print(user_name if user_name else user_id, '喜欢的抖音视频拉取完成') videos = [] for item in favorite_video_list: video = data_to_video(item) videos.append(video) return videos
def billboard(): result = fetch(URL.billboard(), headers=common_headers, verify=False) aweme_list = result.get('aweme_list') videos = [] for item in aweme_list: # 视频列表有4个视频地址,前两个web可以打开,后面两个只能用手机或者模拟器打开 video = data_to_video(item.get('aweme_info', {})) videos.append(video) return videos
def hotSearchWord(): result = fetch(URL.hot_search_word(), headers=common_headers, verify=False) word_list = result.get('data').get('word_list') words = [] for item in word_list: # 热门词 word = data_to_word(item) words.append(word) return words
def get_music_urls(mc_id): musics = [] result = fetch(URL.music_collection_list(mc_id), headers=common_headers, verify=False) music_list = result.get('music_list', []) for item in music_list: music = data_collection_to_music(item) musics.append(music) return musics
def music_collection(): result = fetch(URL.music_collection_url(), headers=common_headers, verify=False) mc_list = result.get('mc_list', []) collection_list = [] for mc in mc_list: mc_id = mc['id'] mc_name = mc['mc_name'] print('正在拉取', mc_name, '...') mc_musics = get_music_urls(mc_id) musicCollection = MusicCollection( mc_id=mc_id, mc_name=mc_name, mc_musics=mc_musics) if id else None collection_list.append(musicCollection) print(mc_name, '拉取完毕') return collection_list
def category(maxCursor=50): """ get trend result :return: """ offset = 0 # 单个例子测试 # query['cursor'] = str(offset) # result = fetch(URL.category_list(), headers=common_headers, params=query, verify=False) # category_list = result.get('category_list') # datetime = parse_datetime(result.get('extra', {}).get('now')) # final = [] # for item in category_list: # # process per category # if item.get('desc') == '热门话题': # print('===热门话题') # final.append(data_to_topic(item)) # if item.get('desc') == '热门音乐': # print('===热门音乐') # final.append(data_to_music(item.get('music_info', {}))) # return final # 获取所有的 while True: query['cursor'] = str(offset) result = fetch(URL.category_list(), headers=common_headers, params=query, verify=False) category_list = result.get('category_list') datetime = parse_datetime(result.get('extra', {}).get('now')) final = [] for item in category_list: # process per category if item.get('desc') == '热门话题': final.append(data_to_topic(item)) if item.get('desc') == '热门音乐': final.append(data_to_music(item.get('music_info', {}))) if result.get('has_more') != 1: break else: if offset > maxCursor: # 不想拉取很多可以设置这个参数 break offset = result.get('cursor') return final
def get_aweme_list(queryParams): user_video_list = [] max_cursor = 0 while True: if max_cursor: queryParams['max_cursor'] = str(max_cursor) result = fetch(URL.person_post_url(), headers=person_post_headers, params=queryParams, verify=False) aweme_list = result.get('aweme_list', []) if aweme_list != None and len(aweme_list) != 0: user_video_list.extend(aweme_list) if result.get('has_more') != 1: break else: max_cursor = result.get('max_cursor') return user_video_list
def videos(self, max=None): """ get videos of topic :return: """ from douyinspider.utils.tranform import data_to_video if max and not isinstance(max, int): raise RuntimeError('`max` param must be int') import subprocess query = {'ch_id': self.id, 'count': '9', 'cursor': '0', 'aid': '1128'} offset, count = 0, 0 videos = [] sign_code = "" + query['ch_id'] + query['count'] + str(offset) signature = subprocess.getoutput('node signature.js %s' % sign_code) while True: # define cursor, 这里有个问题就是生成了正确的签名也不一定可以拿到数据,以此还要做循环请求 if query['cursor'] != str(offset) and offset != 0: signature = subprocess.getoutput('node signature.js %s' % sign_code) query['cursor'] = str(offset) query['_signature'] = signature result = fetch(URL.category_list_videos(), params=query, headers=common_headers, verify=False) aweme_list = result.get('aweme_list', []) if aweme_list != None and len(aweme_list) >= 1: for item in aweme_list: video = data_to_video(item) count += 1 videos.append(video) if count >= max: break # next page if result.get('has_more') != 1: break else: offset = result.get('cursor') return videos
def person(): print("正在拉取抖音用户列表信息...") reponse = requests.get(URL.person_list_url(), headers=common_headers, verify=False) print("抖音用户列表信息拉取完毕") soup = BeautifulSoup(reponse.text, 'html.parser') # 获取昵称 persons = soup.find_all('div', class_="card-body") person_path = os.getcwd() + "/doc/抖音用户.txt" if os.path.exists(person_path): with open(person_path, 'r') as f: person_list = json.loads(f.read()) print(person_list) else: person_list = [] for person in persons: person_id = person.find('a').get('href') person_id = person_id[4:] person_name = person.find('a').string person_list.append({"id": person_id, "name": person_name}) print(person_list) save_txt(person_path, person_list)