def download_video(self, aweme_id): query_params = self.common_params query_params['aweme_id'] = aweme_id params = {**query_params, **self.common_params} url = URL.video_detail_url() real_url = gen_url(self.token, url, params) post_data = { "aweme_id": aweme_id } cookies = self.__COOKIES cookies['install_id'] = str(self.__device["install_id"]) resp = fetch(real_url, data=post_data, cookies=cookies, headers=self.__HEADERS, timeout=3).json() try: play_addr_raw = resp['aweme_detail']['video']['play_addr']['url_list'] play_addr = play_addr_raw[0] print(play_addr) content = fetch(play_addr).content except: self.logger.warning("提取视频信息失败...") content = None return content
def gen_real_url(token, raw_url, query): if isinstance(query, dict): query = params2str(query) url = raw_url + "?" + query resp = fetch(URL.api_sign(token), json={"url": url}, method="post") if not resp: print("你的当日 token 次数已经用完,请明天再来尝试吧...") raise BaseException("you have run out of token.Please try tomorrow") resp_json = resp.json() real_url = resp_json['url'] return real_url
def post(user_id): device = gen_device(TOKEN) common_params = gen_common_params(device) count = 0 max_cursor = 0 while True: query_params = {"count": 21 if not count else count, "user_id": user_id, "max_cursor": max_cursor} search_params = {**common_params, **query_params} real_url = gen_real_url(TOKEN, URL.post_url(), search_params) cookies = COMMON_COOKIES cookies['install_id'] = str(device["install_id"]) # download video resp_json = fetch(real_url, verify=False, cookies=cookies, headers=COMMON_HEADERS, timeout=3).json() # print(json.dumps(resp_json)) results = [] for video_info in resp_json.get("aweme_list"): aweme_id = video_info.get("aweme_id") play_url = aweme_id_video_url(aweme_id) if not play_url: print("Failed grab <{}> video play url".format(aweme_id)) continue video_info["play_url"] = play_url results.append(data_to_video(video_info)) yield results max_cursor = resp_json.get("max_cursor") count = 12 if resp_json.get("has_more") != 1: print("%s post video spider done!" % user_id) break
def gen_device(token): resp = fetch(URL.api_device(token), timeout=10).json() return resp