Ejemplo n.º 1
0
def get_thread_author_post(thread_url):
    thread_return_code, thread_page, thread_response = tool.http_request(thread_url)
    if thread_return_code == 1:
        content_type = tool.get_response_info(thread_response.info(), "Content-Type")
        charset = tool.find_sub_string(content_type, "charset=")
        post_message = tool.find_sub_string(thread_page, '<td class="t_f" id="postmessage_', '<div id="comment_')
        post_message = post_message[post_message.find('">') + 2: post_message.rfind("</td>")]
        return post_message.decode(charset)
    return None
Ejemplo n.º 2
0
def set_csrf_token():
    global CSRF_TOKEN
    index_url = "https://www.instagram.com/instagram"
    index_page_response = tool.http_request(index_url)
    if index_page_response[0] == 1:
        set_cookie_info = tool.get_response_info(index_page_response[2].info(), "Set-Cookie")
        if set_cookie_info is not None:
            csrf_token = tool.find_sub_string(set_cookie_info, "csrftoken=", ";")
            if csrf_token:
                CSRF_TOKEN = csrf_token
                return True
    return False
Ejemplo n.º 3
0
    def run(self):
        global TOTAL_IMAGE_COUNT
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 6 and self.account_info[5]:
            account_name = self.account_info[5]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹,否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_name)
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            image_count = 1
            first_image_time = "0"
            need_make_image_dir = True
            while IS_DOWNLOAD_IMAGE:
                # 获取全部图片地址列表
                image_url_list = get_image_url_list(account_id)
                if image_url_list is None:
                    log.error(account_name + " 图片列表获取失败")
                    break

                for image_url in list(image_url_list):
                    # 不使用缩略图
                    image_url = image_url.split("@")[0]
                    image_return_code, image_byte, image_response = tool.http_request(image_url)
                    if image_return_code != 1:
                        log.step(account_name + " 第%s张图片下载失败" % image_count)
                        continue

                    # 获取图片的上传时间(字符串)
                    response_last_modified_time = tool.get_response_info(image_response.info(), "Last-Modified")
                    # 字符串转换为时间戳
                    image_created_time = tool.response_time_to_timestamp(response_last_modified_time)

                    # 检查是否已下载到前一次的图片
                    if int(image_created_time) <= int(self.account_info[4]):
                        break

                    # 将第一张图片的上传时间做为新的存档记录
                    if first_image_time == "0":
                        first_image_time = str(image_created_time)

                    log.step(account_name + " 开始下载第%s张图片 %s" % (image_count, image_url))

                    # 第一张图片,创建目录
                    if need_make_image_dir:
                        if not tool.make_dir(image_path, 0):
                            log.error(account_name + " 创建图片下载目录 %s 失败" % image_path)
                            tool.process_exit()
                        need_make_image_dir = False

                    file_type = image_url.split(".")[-1].split(":")[0]
                    image_file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                    save_image(image_byte, image_file_path)
                    log.step(account_name + " 第%s张图片下载成功" % image_count)
                    image_count += 1

                    # 达到配置文件中的下载数量,结束
                    if 0 < GET_IMAGE_COUNT < image_count:
                        break
                break

            # 视频
            video_count = 1
            first_video_time = "0"
            need_make_video_dir = True
            while IS_DOWNLOAD_VIDEO:
                # 获取全部视频ID列表
                video_id_list = get_video_id_list(account_id)
                if video_id_list is None:
                    log.error(account_name + " 视频列表获取失败")
                    break

                for video_id in list(video_id_list):
                    # 获取视频的时间和下载地址
                    video_info = get_video_info(video_id)
                    if video_info is None:
                        log.error(account_name + " 第%s个视频 %s 信息获取失败" % (video_count, video_id))
                        continue

                    # 检查是否已下载到前一次的视频
                    if int(video_info["data"]["createtime"]) <= int(self.account_info[2]):
                        break

                    # 将第一个视频的上传时间做为新的存档记录
                    if first_video_time == "0":
                        first_video_time = str(video_info["data"]["createtime"])

                    # m3u8文件的地址
                    link_url = str(video_info["data"]["linkurl"])
                    # 视频的真实下载地址列表
                    ts_url_list = get_ts_url_list(link_url)
                    if ts_url_list is None:
                        log.error(account_name + " 第%s个视频下载地址列表 %s 获取失败" % (video_count, link_url))
                        continue

                    log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, ts_url_list))

                    # 第一个视频,创建目录
                    if need_make_video_dir:
                        if not tool.make_dir(video_path, 0):
                            log.error(account_name + " 创建图片下载目录 %s 失败" % video_path)
                            tool.process_exit()
                        need_make_video_dir = False

                    video_file_path = os.path.join(video_path, "%04d.ts" % video_count)
                    if save_video(ts_url_list, video_file_path):
                        log.step(account_name + " 第%s个视频下载成功" % video_count)
                        video_count += 1
                    else:
                        log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, ts_url_list))

                    # 达到配置文件中的下载数量,结束
                    if 0 < GET_VIDEO_COUNT < video_count:
                        break
                break

            log.step(account_name + " 下载完毕,总共获得%s张图片和%s个视频" % (image_count - 1, video_count - 1))

            # 排序
            if IS_SORT:
                if image_count > 1:
                    destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(image_path, destination_path, int(self.account_info[3]), 4):
                        log.step(account_name + " 图片从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建图片保存目录 %s 失败" % destination_path)
                        tool.process_exit()
                if video_count > 1:
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[1]), 4):
                        log.step(account_name + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            if first_image_time != "0":
                self.account_info[3] = str(int(self.account_info[3]) + image_count - 1)
                self.account_info[4] = first_image_time

            if first_video_time != "0":
                self.account_info[1] = str(int(self.account_info[1]) + video_count - 1)
                self.account_info[2] = first_video_time

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += image_count - 1
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")