Esempio n. 1
0
    def down_album_source(self):
        self.show_status_signal.emit("开始获取相册目录")

        self.get_userspace_url()
        catelog_outline_info = "相册总数:{}\n图片总数:{}\n相册目录地址:{}\n用户id:{}".format(
            self.albumCount, self.photoCount, self.cacheFileUrl, self.userId
        )
        self.show_status_signal.emit(catelog_outline_info)

        catelog_items = self.get_catelog_info()
        catelog_detail_info = ""
        for item in catelog_items:
            catelog_detail_info += "相册:{},图片数量:{},描述:{},id:{}\n".format(
                item["name"], item["count"], item["desc"], item["idd"]
            )
        self.show_status_signal.emit(catelog_detail_info)

        fileutil.check_and_create_dir(self.backup_dir + "source")
        for i, item in enumerate(catelog_items):
            try:
                self.show_status_signal.emit(
                    "相册进度:{}/{},{}".format(i + 1, len(catelog_items), item["name"])
                )
                self.down_single_album(item)
            except:
                print(sys.exc_info())
                self.show_status_signal.emit(
                    "系统异常:" + str(sys.exc_info()[1]) + "\n单个相册下载失败,相册:" + item["name"]
                )
Esempio n. 2
0
 def on_pb_init_project_clicked(self):
     self.path = self.le_path.text()
     if self.path and self.config.conf:
         check_and_create_dir(self.path)
         self.init_project_name()
         # ~ self.show_result("$正在初始化项目" + self.project_name)
         os.chdir(self.path)
         cmd_init_project = "cd {};git init;git remote add origin [email protected]:{}/{}.git".format(
             self.path, self.config.conf["username"], self.project_name)
         self.exec_cmd(cmd_init_project)
         self.show_result("$ 初始化项目成功")
Esempio n. 3
0
 def on_buttonBox_accepted(self):
     conf = {}
     conf["username"] = self.le_username.text()
     conf["email"] = self.le_email.text()
     # ~ conf["project"] = self.le_project.text()
     conf["path"] = self.path
     check_and_create(self.config_path)
     check_and_create_dir(self.path)
     with open(self.config_path, "w+") as f:
         f.write(json.dumps(conf))
         os.chdir(self.path)
         self.exec_cmd(
             "cd {};git config --global user.name {};git config --global user.email {}"
             .format(self.path, conf["username"], conf["email"]))
         self.show_result("$ 设置帐号信息成功")
     self.conf = conf
Esempio n. 4
0
    def get_catelog_info(self):
        fileutil.check_and_create_dir(self.backup_dir + "source")
        print(self.cacheFileUrl)
        print(
            "cd {};wget http://{} -O {}.txt".format(
                self.backup_dir + "source", self.cacheFileUrl, "catelog_info.txt"
            )
        )
        os.system(
            "cd {};wget http://{} -O {}.txt".format(
                self.backup_dir + "source", self.cacheFileUrl, "catelog_info"
            )
        )

        with open(
            self.backup_dir + "source/catelog_info.txt", "r", encoding="gb2312"
        ) as f:
            content = f.read().strip()
            self.show_status_signal.emit("获取相册目录成功")

            field_name = {}
            field_name["name"] = "name"
            field_name["pattern"] = r"(?<=name:\').+?(?=\',)"
            field_desc = {}
            field_desc["name"] = "desc"
            field_desc["pattern"] = r"(?<=desc:\').*?(?=\',)"
            field_count = {}
            field_count["name"] = "count"
            field_count["pattern"] = r"(?<=count:).+?(?=,)"
            field_purl = {}
            field_purl["name"] = "purl"
            field_purl["pattern"] = r"(?<=purl:\').*?(?=\'})"
            field_id = {}
            field_id["name"] = "idd"
            field_id["pattern"] = r"(?<={id:).+?(?=,)"
            fields = [field_name, field_desc, field_count, field_purl, field_id]
            catelog_items = self.analyze_response(content, fields)
            print("相册目录信息:", catelog_items)
            return catelog_items
Esempio n. 5
0
    def get_single_blog(self, item, item_index):
        link = item["link"]
        blog_title = item["title"]
        url = "http://{}.blog.163.com/{}".format(self.blog_name, link)
        print("url:", url)
        r = requests.get(url)
        html_doc = r.text
        fileutil.check_and_create_dir(self.backup_dir + "日志/source/")
        with open(self.backup_dir + "日志/source/" + blog_title + ".txt",
                  "w+") as f:
            f.write(r.text)
        soup = BeautifulSoup(html_doc, "lxml")
        blog_sep = soup.find("span", class_="blogsep")
        blog_catelog = soup.find("a", class_="fc03 m2a")
        if blog_sep is None:
            print("无权限访问:" + item["title"])
            pass
            return
        print("发表时间:{},分类:{}".format(blog_sep.string, blog_catelog.string))
        blog_body = soup.find_all("div", class_="bct fc05 fc11 nbw-blog ztag")
        if blog_body:
            blog_imgs = BeautifulSoup(str(blog_body[0]),
                                      "lxml").find_all("img")
            print("正文:", blog_body[0])

        single_blog_dir = (self.backup_dir + "日志/" + blog_catelog.string +
                           "/" + blog_title + "/")
        fileutil.check_and_create_dir(single_blog_dir)
        with open(
                self.backup_dir + "日志/" + blog_catelog.string + "/" +
                blog_title + ".html",
                "w+",
        ) as f:
            content = "<html><meta charset='utf-8'><body><h1>{}</h1><div class='tag'>{} | 分类:  {}</div><hr>".format(
                blog_title, blog_sep.string, blog_catelog.string)
            if blog_body:
                # ~ content += str(blog_body[0])
                str_body = str(blog_body[0])
                # r'(?<=title=").+?(?=";)'
                # ~ r'(?<=src=").+?(?/=\d{5})'
                pattern = re.compile(r'(?<=src=").+?(?=/\d{5})')
                print(pattern.findall(str_body))
                content += re.sub(pattern, blog_title, str_body)
            content += "<br>阅读({}) |  评论({})".format(item["accessCount"],
                                                     item["commentCount"])
            content += "</body></html>"
            f.write(content)

            # ~ 下载图片
            if blog_imgs:
                self.show_status_signal.emit("正在获取日志:{}的{}张图片".format(
                    item["title"], len(blog_imgs)))
                for img in blog_imgs:
                    print(img)
                    if img["src"]:
                        img_name = img["src"][img["src"].rindex("/") + 1:]
                        print("下载图片:", img["src"], img_name)
                        self.show_status_signal.emit('下载图片:{}'.format(
                            img["src"]))
                        r = requests.get(img["src"])
                        img["src"] = blog_title + "/" + img_name
                        with open(single_blog_dir + img_name, "wb+") as f:
                            f.write(r.content)
Esempio n. 6
0
    def down_single_album(self, item):
        if item["purl"].strip() != "":
            print(
                "cd {};wget http://{} -O {}.txt".format(
                    self.backup_dir + "source", item["purl"], item["name"]
                )
            )
            os.system(
                "cd {};wget http://{} -O {}.txt".format(
                    self.backup_dir + "source", item["purl"], item["name"].strip()
                )
            )

            with open(
                self.backup_dir + "source/{}.txt".format(item["name"].strip()),
                "r+",
                encoding="gb2312",
            ) as f:
                content = f.read().strip()
                # ~ print("content:",content)
                field_desc = {}
                field_desc["name"] = "desc"
                field_desc["pattern"] = r"(?<=desc:\').*?(?=\',)"
                # ~ field_desc["pattern"] = r"var"
                field_purl = {}
                field_purl["name"] = "murl"
                field_purl["pattern"] = r"(?<=murl:\').*?(?=\',)"
                fields = [field_desc, field_purl]
                photos = self.analyze_response(str(content), fields)
                print(item["name"], "相册目录信息:", photos)

                if len(photos) > 0:
                    fileutil.check_and_create_dir(self.backup_dir + item["name"])
                for i, photo in enumerate(photos):
                    try:
                        r = requests.get(self.img_url_prex + photo["murl"][1:])
                        if photo["desc"] == "":
                            slash_index = photo["murl"].rindex("/")
                            photo["desc"] = photo["murl"][slash_index:]
                        else:
                            slash_index = photo["murl"].rindex(".")
                            photo["desc"] = (
                                photo["desc"].replace(".JPG", "")
                                + photo["murl"][slash_index:]
                            )
                        print("下载图片:{},{}".format(photo["desc"], photo["murl"]))
                        self.show_status_signal.emit(
                            "相册:{},{}/{},图片:{}".format(
                                item["name"], i, len(photos), photo["desc"]
                            )
                        )
                        with open(
                            self.backup_dir + item["name"] + "/" + photo["desc"], "wb+"
                        ) as f:
                            f.write(r.content)
                    except:
                        print(sys.exc_info())
                        self.show_status_signal.emit(
                            "系统异常:"
                            + str(sys.exc_info()[1])
                            + "\n单张图片下载失败,图片:"
                            + photo["desc"]
                        )
        else:
            self.show_status_signal.emit("空相册:{}".format(item["name"]))
Esempio n. 7
0
 def create_dir(self):
     self.backup_dir = os.getcwd() + "/" + self.le_blog_name.text(
     ) + "的博客备份/"
     fileutil.check_and_create_dir(self.backup_dir)