def down_album_source(self): self.show_status_signal.emit("开始获取相册目录") self.get_userspace_url() catelog_outline_info = "相册总数:{}\n图片总数:{}\n相册目录地址:{}\n用户id:{}".format( self.albumCount, self.photoCount, self.cacheFileUrl, self.userId ) self.show_status_signal.emit(catelog_outline_info) catelog_items = self.get_catelog_info() catelog_detail_info = "" for item in catelog_items: catelog_detail_info += "相册:{},图片数量:{},描述:{},id:{}\n".format( item["name"], item["count"], item["desc"], item["idd"] ) self.show_status_signal.emit(catelog_detail_info) fileutil.check_and_create_dir(self.backup_dir + "source") for i, item in enumerate(catelog_items): try: self.show_status_signal.emit( "相册进度:{}/{},{}".format(i + 1, len(catelog_items), item["name"]) ) self.down_single_album(item) except: print(sys.exc_info()) self.show_status_signal.emit( "系统异常:" + str(sys.exc_info()[1]) + "\n单个相册下载失败,相册:" + item["name"] )
def on_pb_init_project_clicked(self): self.path = self.le_path.text() if self.path and self.config.conf: check_and_create_dir(self.path) self.init_project_name() # ~ self.show_result("$正在初始化项目" + self.project_name) os.chdir(self.path) cmd_init_project = "cd {};git init;git remote add origin [email protected]:{}/{}.git".format( self.path, self.config.conf["username"], self.project_name) self.exec_cmd(cmd_init_project) self.show_result("$ 初始化项目成功")
def on_buttonBox_accepted(self): conf = {} conf["username"] = self.le_username.text() conf["email"] = self.le_email.text() # ~ conf["project"] = self.le_project.text() conf["path"] = self.path check_and_create(self.config_path) check_and_create_dir(self.path) with open(self.config_path, "w+") as f: f.write(json.dumps(conf)) os.chdir(self.path) self.exec_cmd( "cd {};git config --global user.name {};git config --global user.email {}" .format(self.path, conf["username"], conf["email"])) self.show_result("$ 设置帐号信息成功") self.conf = conf
def get_catelog_info(self): fileutil.check_and_create_dir(self.backup_dir + "source") print(self.cacheFileUrl) print( "cd {};wget http://{} -O {}.txt".format( self.backup_dir + "source", self.cacheFileUrl, "catelog_info.txt" ) ) os.system( "cd {};wget http://{} -O {}.txt".format( self.backup_dir + "source", self.cacheFileUrl, "catelog_info" ) ) with open( self.backup_dir + "source/catelog_info.txt", "r", encoding="gb2312" ) as f: content = f.read().strip() self.show_status_signal.emit("获取相册目录成功") field_name = {} field_name["name"] = "name" field_name["pattern"] = r"(?<=name:\').+?(?=\',)" field_desc = {} field_desc["name"] = "desc" field_desc["pattern"] = r"(?<=desc:\').*?(?=\',)" field_count = {} field_count["name"] = "count" field_count["pattern"] = r"(?<=count:).+?(?=,)" field_purl = {} field_purl["name"] = "purl" field_purl["pattern"] = r"(?<=purl:\').*?(?=\'})" field_id = {} field_id["name"] = "idd" field_id["pattern"] = r"(?<={id:).+?(?=,)" fields = [field_name, field_desc, field_count, field_purl, field_id] catelog_items = self.analyze_response(content, fields) print("相册目录信息:", catelog_items) return catelog_items
def get_single_blog(self, item, item_index): link = item["link"] blog_title = item["title"] url = "http://{}.blog.163.com/{}".format(self.blog_name, link) print("url:", url) r = requests.get(url) html_doc = r.text fileutil.check_and_create_dir(self.backup_dir + "日志/source/") with open(self.backup_dir + "日志/source/" + blog_title + ".txt", "w+") as f: f.write(r.text) soup = BeautifulSoup(html_doc, "lxml") blog_sep = soup.find("span", class_="blogsep") blog_catelog = soup.find("a", class_="fc03 m2a") if blog_sep is None: print("无权限访问:" + item["title"]) pass return print("发表时间:{},分类:{}".format(blog_sep.string, blog_catelog.string)) blog_body = soup.find_all("div", class_="bct fc05 fc11 nbw-blog ztag") if blog_body: blog_imgs = BeautifulSoup(str(blog_body[0]), "lxml").find_all("img") print("正文:", blog_body[0]) single_blog_dir = (self.backup_dir + "日志/" + blog_catelog.string + "/" + blog_title + "/") fileutil.check_and_create_dir(single_blog_dir) with open( self.backup_dir + "日志/" + blog_catelog.string + "/" + blog_title + ".html", "w+", ) as f: content = "<html><meta charset='utf-8'><body><h1>{}</h1><div class='tag'>{} | 分类: {}</div><hr>".format( blog_title, blog_sep.string, blog_catelog.string) if blog_body: # ~ content += str(blog_body[0]) str_body = str(blog_body[0]) # r'(?<=title=").+?(?=";)' # ~ r'(?<=src=").+?(?/=\d{5})' pattern = re.compile(r'(?<=src=").+?(?=/\d{5})') print(pattern.findall(str_body)) content += re.sub(pattern, blog_title, str_body) content += "<br>阅读({}) | 评论({})".format(item["accessCount"], item["commentCount"]) content += "</body></html>" f.write(content) # ~ 下载图片 if blog_imgs: self.show_status_signal.emit("正在获取日志:{}的{}张图片".format( item["title"], len(blog_imgs))) for img in blog_imgs: print(img) if img["src"]: img_name = img["src"][img["src"].rindex("/") + 1:] print("下载图片:", img["src"], img_name) self.show_status_signal.emit('下载图片:{}'.format( img["src"])) r = requests.get(img["src"]) img["src"] = blog_title + "/" + img_name with open(single_blog_dir + img_name, "wb+") as f: f.write(r.content)
def down_single_album(self, item): if item["purl"].strip() != "": print( "cd {};wget http://{} -O {}.txt".format( self.backup_dir + "source", item["purl"], item["name"] ) ) os.system( "cd {};wget http://{} -O {}.txt".format( self.backup_dir + "source", item["purl"], item["name"].strip() ) ) with open( self.backup_dir + "source/{}.txt".format(item["name"].strip()), "r+", encoding="gb2312", ) as f: content = f.read().strip() # ~ print("content:",content) field_desc = {} field_desc["name"] = "desc" field_desc["pattern"] = r"(?<=desc:\').*?(?=\',)" # ~ field_desc["pattern"] = r"var" field_purl = {} field_purl["name"] = "murl" field_purl["pattern"] = r"(?<=murl:\').*?(?=\',)" fields = [field_desc, field_purl] photos = self.analyze_response(str(content), fields) print(item["name"], "相册目录信息:", photos) if len(photos) > 0: fileutil.check_and_create_dir(self.backup_dir + item["name"]) for i, photo in enumerate(photos): try: r = requests.get(self.img_url_prex + photo["murl"][1:]) if photo["desc"] == "": slash_index = photo["murl"].rindex("/") photo["desc"] = photo["murl"][slash_index:] else: slash_index = photo["murl"].rindex(".") photo["desc"] = ( photo["desc"].replace(".JPG", "") + photo["murl"][slash_index:] ) print("下载图片:{},{}".format(photo["desc"], photo["murl"])) self.show_status_signal.emit( "相册:{},{}/{},图片:{}".format( item["name"], i, len(photos), photo["desc"] ) ) with open( self.backup_dir + item["name"] + "/" + photo["desc"], "wb+" ) as f: f.write(r.content) except: print(sys.exc_info()) self.show_status_signal.emit( "系统异常:" + str(sys.exc_info()[1]) + "\n单张图片下载失败,图片:" + photo["desc"] ) else: self.show_status_signal.emit("空相册:{}".format(item["name"]))
def create_dir(self): self.backup_dir = os.getcwd() + "/" + self.le_blog_name.text( ) + "的博客备份/" fileutil.check_and_create_dir(self.backup_dir)