def downBtnClicked(self): #获得路径 path = sys.path[0] whichChoice = self.titleList.currentRow() tmpAdd = str(self.titList[whichChoice]) tmpurl = URL + str(self.URLList[whichChoice + 1])[2:-2] path = path + '\\' + tmpAdd[2:-2] #判断文件是否存在 isExists = os.path.exists(path) if not isExists: #如果不存在 os.makedirs(path) os.chdir(path) res = getHtml(tmpurl) fileUrl = getFileURL(res) #获取文件url for i in fileUrl: res = getHtml(URL + str(i)) fileName = getFileName(i) filename = '' for i in fileName: if i != '_': filename += i else: break print(filename) f = open(filename, 'w') f.write(getContent(res)) f.close() else: #如果存在 QMessageBox.Information(self, '提示', '已存在目标目录,请检查是否已下载过!!!')
def getData(self, url, path): html = self.urlTotext(url) dl = re.findall(r'id="list".*?</dl>', html, re.S)[0] links = re.findall(r'<a href="(.*?)">', dl) path = path + "\\" + self.book_number + "\\" # 设置文章存储路径 if not os.path.isdir(path): # 判断路径是否存在 os.mkdir(path) # 创建路径 for item in links[8:20]: # 遍历文章列表 # print(item) serial_number = item[0:-5] print(serial_number) articleUrl = self.baseurl + item # 获取遍历到的具体文章地址 articleHtml = self.urlTotext(articleUrl) # 提取章节内容 article_content = re.findall(r'id="content">(.*?)</div>', articleHtml, re.S)[0] # 过滤掉内容的间隔符、换行符等 article_content = article_content.replace('<br /><br />', '') article_content = article_content.replace('</br>', '') article_content = article_content.replace(' ', '') title = re.findall(r'<h1>(.*?)</h1>', articleHtml, re.S)[0] # 获取文章标题 fileName = path + serial_number + title + '.txt' # 设置文章保存路径(包括文章名) newFile = open(fileName, "w") # 打开或者创建文件 newFile.write("<<" + title + ">>\n\n") # 向文件中写入标题并换行 newFile.write(article_content) # 向文件中写入内容 newFile.close() # 关闭文件 QMessageBox.Information(None, "提示", self.book_number + "的小说保存完成", QMessageBox.Ok)
def getData(self, url, path): soup = self.urlTosoup(url) # 获取BeautifulSoup对象 link = soup.select('.booklist a') # 获取文章列表 path = path + "\\" + self.date + "\\" # 设置文章存储路径 if not os.path.isdir(path): # 判断路径是否存在 os.mkdir(path) # 创建路径 for item in link: # 遍历文章列表 articleUrl = self.baseurl + item['href'] # 获取遍历到的具体文章地址 articleSoup = self.urlTosoup(articleUrl) # 生成BeautifulSoup对象 title = str(articleSoup.find("h1")).lstrip("<h1>").rstrip("</h1>") # 获取文章标题 author = str(articleSoup.find(id="pub_date")).strip() # 获取文章作者 fileName = path + title + '.txt' # 设置文章保存路径(包括文章名) newFile = open(fileName, "w") # 打开或者创建文件 newFile.write("<<" + title + ">>\n\n") # 向文件中写入标题并换行 newFile.write(author + "\n\n") # 向文件中写入作者并换行 content = articleSoup.select(".blkContainerSblkCon p") # 获取文章所有内容 for c in content: # 遍历获取到的内容 text = c.text # 获取文章内容 newFile.write(text) # 向文件中写入内容 newFile.close() # 关闭文件 QMessageBox.Information(None, "提示", self.date + "的读者文章保存完成", QMessageBox.Ok)
def warning(self, message, parent=None, dialog=False): self.__console('warning', message) if dialog: QMessageBox.Information(parent, "警告", message, QMessageBox.Close)
def LOG_Information(self, Content): QMessageBox.Information(self, "Information", str(Content), QMessageBox.Ok)