def get_thread_author_post(thread_url): thread_return_code, thread_page, thread_response = tool.http_request(thread_url) if thread_return_code == 1: content_type = tool.get_response_info(thread_response.info(), "Content-Type") charset = tool.find_sub_string(content_type, "charset=") post_message = tool.find_sub_string(thread_page, '<td class="t_f" id="postmessage_', '<div id="comment_') post_message = post_message[post_message.find('">') + 2: post_message.rfind("</td>")] return post_message.decode(charset) return None
def set_csrf_token(): global CSRF_TOKEN index_url = "https://www.instagram.com/instagram" index_page_response = tool.http_request(index_url) if index_page_response[0] == 1: set_cookie_info = tool.get_response_info(index_page_response[2].info(), "Set-Cookie") if set_cookie_info is not None: csrf_token = tool.find_sub_string(set_cookie_info, "csrftoken=", ";") if csrf_token: CSRF_TOKEN = csrf_token return True return False
def run(self): global TOTAL_IMAGE_COUNT global TOTAL_VIDEO_COUNT account_id = self.account_info[0] if len(self.account_info) >= 6 and self.account_info[5]: account_name = self.account_info[5] else: account_name = self.account_info[0] try: log.step(account_name + " 开始") # 如果需要重新排序则使用临时文件夹,否则直接下载到目标目录 if IS_SORT: image_path = os.path.join(IMAGE_TEMP_PATH, account_name) video_path = os.path.join(VIDEO_TEMP_PATH, account_name) else: image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name) video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name) image_count = 1 first_image_time = "0" need_make_image_dir = True while IS_DOWNLOAD_IMAGE: # 获取全部图片地址列表 image_url_list = get_image_url_list(account_id) if image_url_list is None: log.error(account_name + " 图片列表获取失败") break for image_url in list(image_url_list): # 不使用缩略图 image_url = image_url.split("@")[0] image_return_code, image_byte, image_response = tool.http_request(image_url) if image_return_code != 1: log.step(account_name + " 第%s张图片下载失败" % image_count) continue # 获取图片的上传时间(字符串) response_last_modified_time = tool.get_response_info(image_response.info(), "Last-Modified") # 字符串转换为时间戳 image_created_time = tool.response_time_to_timestamp(response_last_modified_time) # 检查是否已下载到前一次的图片 if int(image_created_time) <= int(self.account_info[4]): break # 将第一张图片的上传时间做为新的存档记录 if first_image_time == "0": first_image_time = str(image_created_time) log.step(account_name + " 开始下载第%s张图片 %s" % (image_count, image_url)) # 第一张图片,创建目录 if need_make_image_dir: if not tool.make_dir(image_path, 0): log.error(account_name + " 创建图片下载目录 %s 失败" % image_path) tool.process_exit() need_make_image_dir = False file_type = image_url.split(".")[-1].split(":")[0] image_file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type)) save_image(image_byte, image_file_path) log.step(account_name + " 第%s张图片下载成功" % image_count) image_count += 1 # 达到配置文件中的下载数量,结束 if 0 < GET_IMAGE_COUNT < image_count: break break # 视频 video_count = 1 first_video_time = "0" need_make_video_dir = True while IS_DOWNLOAD_VIDEO: # 获取全部视频ID列表 video_id_list = get_video_id_list(account_id) if video_id_list is None: log.error(account_name + " 视频列表获取失败") break for video_id in list(video_id_list): # 获取视频的时间和下载地址 video_info = get_video_info(video_id) if video_info is None: log.error(account_name + " 第%s个视频 %s 信息获取失败" % (video_count, video_id)) continue # 检查是否已下载到前一次的视频 if int(video_info["data"]["createtime"]) <= int(self.account_info[2]): break # 将第一个视频的上传时间做为新的存档记录 if first_video_time == "0": first_video_time = str(video_info["data"]["createtime"]) # m3u8文件的地址 link_url = str(video_info["data"]["linkurl"]) # 视频的真实下载地址列表 ts_url_list = get_ts_url_list(link_url) if ts_url_list is None: log.error(account_name + " 第%s个视频下载地址列表 %s 获取失败" % (video_count, link_url)) continue log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, ts_url_list)) # 第一个视频,创建目录 if need_make_video_dir: if not tool.make_dir(video_path, 0): log.error(account_name + " 创建图片下载目录 %s 失败" % video_path) tool.process_exit() need_make_video_dir = False video_file_path = os.path.join(video_path, "%04d.ts" % video_count) if save_video(ts_url_list, video_file_path): log.step(account_name + " 第%s个视频下载成功" % video_count) video_count += 1 else: log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, ts_url_list)) # 达到配置文件中的下载数量,结束 if 0 < GET_VIDEO_COUNT < video_count: break break log.step(account_name + " 下载完毕,总共获得%s张图片和%s个视频" % (image_count - 1, video_count - 1)) # 排序 if IS_SORT: if image_count > 1: destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name) if robot.sort_file(image_path, destination_path, int(self.account_info[3]), 4): log.step(account_name + " 图片从下载目录移动到保存目录成功") else: log.error(account_name + " 创建图片保存目录 %s 失败" % destination_path) tool.process_exit() if video_count > 1: destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name) if robot.sort_file(video_path, destination_path, int(self.account_info[1]), 4): log.step(account_name + " 视频从下载目录移动到保存目录成功") else: log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path) tool.process_exit() if first_image_time != "0": self.account_info[3] = str(int(self.account_info[3]) + image_count - 1) self.account_info[4] = first_image_time if first_video_time != "0": self.account_info[1] = str(int(self.account_info[1]) + video_count - 1) self.account_info[2] = first_video_time # 保存最后的信息 tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH) self.thread_lock.acquire() TOTAL_IMAGE_COUNT += image_count - 1 TOTAL_VIDEO_COUNT += video_count - 1 ACCOUNTS.remove(account_id) self.thread_lock.release() log.step(account_name + " 完成") except SystemExit, se: if se.code == 0: log.step(account_name + " 提前退出") else: log.error(account_name + " 异常退出")