def download_video(self, aweme_id): query_params = self.common_params query_params['aweme_id'] = aweme_id params = {**query_params, **self.common_params} url = URL.video_detail_url() real_url = gen_url(self.token, url, params) post_data = { "aweme_id": aweme_id } cookies = self.__COOKIES cookies['install_id'] = str(self.__device["install_id"]) resp = fetch(real_url, data=post_data, cookies=cookies, headers=self.__HEADERS, timeout=3).json() try: play_addr_raw = resp['aweme_detail']['video']['play_addr']['url_list'] play_addr = play_addr_raw[0] print(play_addr) content = fetch(play_addr).content except: self.logger.warning("提取视频信息失败...") content = None return content
def gen_url(token, raw_url, query): if isinstance(query, dict): query = params2str(query) url = raw_url + "?" + query resp = requests.post(URL.api_sign(token), json={"url": url}).json() real_url = resp['url'] return real_url
def download_video(self, aweme_id): query_params = self.common_params query_params['aweme_id'] = aweme_id sign = getSign(self.__get_token(), query_params) params = {**query_params, **sign} post_data = {"aweme_id": aweme_id} resp = requests.get(URL.video_detail_url(), params=params, data=post_data, verify=False, headers=self.__HEADERS) resp_result = resp.json() # print(resp_result) try: play_addr_raw = resp_result['aweme_detail']['video']['play_addr'][ 'url_list'] play_addr = play_addr_raw[0] content = requests.get(play_addr).content except: self.logger.warning("提取视频信息失败...") content = None return content
def get_device(token): device_info = requests.get(URL.api_device(token), timeout=10) device_info_json = device_info.json() return device_info_json
def gen_url(token, raw_url, query): if isinstance(query, dict): query = params2str(query) url = raw_url + "?" + query resp = requests.post(URL.api_sign(token), json={"url": url}) if resp.status_code != 200: print("您当日的 api 次数已经使用完毕, 请明日再来爬取吧...") return real_url = resp.json()['url'] return real_url
def gen_real_url(token, raw_url, query): if isinstance(query, dict): query = params2str(query) url = raw_url + "?" + query resp = fetch(URL.api_sign(token), json={"url": url}, method="post") if not resp: print("你的当日 token 次数已经用完,请明天再来尝试吧...") raise BaseException("you have run out of token.Please try tomorrow") resp_json = resp.json() real_url = resp_json['url'] return real_url
def grab_video(self, user_id, action, content, max_cursor=0): url = URL.favorite_url() if action == "USER_LIKE" else URL.post_url() favorite_params = copy.deepcopy(self.__USER_VIDEO_PARAMS) favorite_params['user_id'] = user_id favorite_params['max_cursor'] = max_cursor query_params = {**favorite_params, **self.common_params} real_url = gen_url(self.token, url, query_params) # 目前支持两种类型爬取,用户喜欢过的,和当前用户所有已发布的视频 cookies = self.__COOKIES cookies['install_id'] = str(self.__device["install_id"]) resp = requests.get(real_url, verify=False, cookies=cookies, headers={"User-Agent": "okhttp/3.10.0.1"}, timeout=3) favorite_info = json.loads(resp.content.decode("utf-8")) hasmore = favorite_info.get('has_more') max_cursor = favorite_info.get('max_cursor') video_infos = favorite_info.get('aweme_list') for per_video in video_infos: author_nick_name = per_video['author'].get("nickname") author_uid = per_video['author'].get('uid') video_desc = per_video.get('desc') music_id = per_video['music']['play_url'].get('uri') if content == "-m" else None download_item = { "author_nick_name": author_nick_name, "video_desc": video_desc, "author_uid": author_uid, "music_id": music_id } aweme_id = per_video.get("aweme_id") self.download_user_video(aweme_id, **download_item) time.sleep(5) return hasmore, max_cursor
def download_music(self, music_id): if not music_id: return url = URL.music_url(music_id) resp = requests.get(url, headers=self.__HEADERS, verify=False) music_content = resp.content return music_content
def grab_video(self, user_id, action, content, max_cursor=0): favorite_params = copy.deepcopy(self.__USER_VIDEO_PARAMS) favorite_params['user_id'] = user_id favorite_params['max_cursor'] = max_cursor query_params = {**favorite_params, **self.common_params} sign = getSign(self.__get_token(), query_params) params = {**query_params, **sign} # 目前支持两种类型爬取,用户喜欢过的,和当前用户所有已发布的视频 url = URL.favorite_url() if action == "USER_LIKE" else URL.post_url() resp = requests.get(url, params=params, verify=False, headers=self.__HEADERS) favorite_info = resp.json() hasmore = favorite_info.get('has_more') max_cursor = favorite_info.get('max_cursor') video_infos = favorite_info.get('aweme_list') for per_video in video_infos: author_nick_name = per_video['author'].get("nickname") author_uid = per_video['author'].get('uid') video_desc = per_video.get('desc') music_id = per_video['music']['play_url'].get( 'uri') if content == "-m" else None download_item = { "author_nick_name": author_nick_name, "video_desc": video_desc, "author_uid": author_uid, "music_id": music_id } aweme_id = per_video.get("aweme_id") self.download_user_video(aweme_id, **download_item) time.sleep(5) return hasmore, max_cursor
def __grab_comment(self, aweme_id, cursor, upvote_bound=10): url = URL.comment_url() comment_params = copy.deepcopy(self.__COMMENT_LIST_PARAMS) comment_params['aweme_id'] = aweme_id comment_params['cursor'] = cursor params = {**comment_params, **self.common_params} real_url = gen_url(self.token, url, params) cookies = self.__COOKIES cookies['install_id'] = str(self.__device["install_id"]) resp = requests.get(real_url, verify=False, cookies=cookies, headers=self.__HEADERS) comment_content = json.loads(resp.content.decode("utf-8")) comments = comment_content.get("comments") for per_comment in comments: is_reply = per_comment.get("reply_comment") if is_reply: upvote_count = is_reply[0].get("digg_count") comment_info = { "text": is_reply[0].get("text"), "upvote_count": upvote_count, "nick_name": is_reply[0]['user'].get("nickname"), "user_id": is_reply[0]['user'].get("uid"), } else: upvote_count = per_comment.get("digg_count") comment_info = { "text": per_comment.get("text"), "upvote_count": upvote_count, "nick_name": per_comment['user'].get("nickname"), "user_id": per_comment['user'].get("uid"), } if int(upvote_count) < upvote_bound: return -2 self.download_comment(aweme_id, **comment_info) # print(text,upvote_count,nick_name,user_id) hasmore = int(comment_content.get("hasmore")) return hasmore
def __grab_comment(self, aweme_id, cursor, upvote_bound=10): comment_params = copy.deepcopy(self.__COMMENT_LIST_PARAMS) comment_params['aweme_id'] = aweme_id comment_params['cursor'] = cursor query_params = {**comment_params, **self.common_params} sign = getSign(self.__get_token(), query_params) params = {**query_params, **sign} resp = requests.get(URL.comment_url(), params=params, verify=False, headers=self.__HEADERS) comment_content = resp.json() comments = comment_content.get("comments") for per_comment in comments: is_reply = per_comment.get("reply_comment") if is_reply: upvote_count = is_reply[0].get("digg_count") comment_info = { "text": is_reply[0].get("text"), "upvote_count": upvote_count, "nick_name": is_reply[0]['user'].get("nickname"), "user_id": is_reply[0]['user'].get("uid"), } else: upvote_count = per_comment.get("digg_count") comment_info = { "text": per_comment.get("text"), "upvote_count": upvote_count, "nick_name": per_comment['user'].get("nickname"), "user_id": per_comment['user'].get("uid"), } print(upvote_count) if int(upvote_count) < upvote_bound: return -2 self.download_comment(aweme_id, **comment_info) # print(text,upvote_count,nick_name,user_id) hasmore = int(comment_content.get("hasmore")) return hasmore
def like_video(self, aweme_id): query_params = {**{"pass-region": "1"}, **self.common_params} sign = getSign(self.__get_token(), query_params) params = {**query_params, **sign} form_params = {"aweme_id": aweme_id, "type": 1} headers = copy.deepcopy(self.__HEADERS) headers["sdk-version"] = '1' headers["Accept-Encoding"] = 'br, gzip, deflate' print(self.__request.cookies) result = self.__request.post(URL.like_video_url(), params=params, data=form_params, verify=False, headers=headers) print(result.json())
def gen_device(token): resp = fetch(URL.api_device(token), timeout=10).json() return resp