def button1(): self.target = [] self.names = [] #存放章节名 self.urls = [] #存放章节链接 self.nums = 0 #章节数 self.book_name = str(e1.get()) self.target = self.url_data[self.book_name] self.save_path = str(e3.get()) myfunc.mkdir(model.save_path) model.get_download_url() #model.novel_save() book_save_path = model.save_path + f'{self.book_name}.txt' if os.path.exists(book_save_path): # 大于100kb的不要删,直接关闭 if os.path.getsize(book_save_path) / 1024 > 100: e4.delete(0, "end") e4.insert(0, f'已存在{self.book_name},中止爬虫') return else: os.remove(book_save_path) e4.delete(0, "end") e4.insert(0, f'《{self.book_name}》开始下载') for i in range(model.nums): #time.sleep(1) model.writer(model.names[i], model.save_path + f'{self.book_name}.txt', model.get_contents(model.urls[i])) #sys.stdout.write(" 已下载:%.3f%%" % float(i/model.nums*100) + '\r') #sys.stdout.flush() e4.delete(0, "end") e4.insert(0, "%.2f%%" % float((i + 1) / model.nums * 100)) print(f"{self.book_name}下载完成")
def __init__(self, name="火影忍者"): self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' } self.server = 'https://manhua.fzdm.com/' self.web = 'http://p1.manhuapan.com/' self.save_path = './漫画/' + name + "/" self.save_path_word = [] self.index = 0 self.name = name ''' if os.path.exists(self.save_path): os.remove(self.save_path) ''' myfunc.mkdir(self.save_path)
def download(self): # 下边的域名规则可能有问题,进行修正 urls = [] url_names = [] req = requests.get(url=self.traget_url, headers=self.headers) html = req.text bf = BeautifulSoup(html) url_texts = bf.find_all('a') for each in url_texts[:]: url = re.findall('''.*href="(.*)" title=.*''', str(each)) url_name = re.findall('''.*title="(.*)">.*''', str(each)) self.traget_chapter_url = self.traget_url + str(url)[2:-2] self.url_name = str(url_name)[2:-2] if len(self.traget_chapter_url) > len(self.traget_url): url_names.append(self.url_name) urls.append(self.traget_chapter_url) # 倒序 urls = urls[::-1] url_names = url_names[::-1] for i in range(len(urls)): #sys.stdout.write(" 已下载章节:%.2f%%" % float((i+1)/len(urls)*100) + '\r') #sys.stdout.flush() self.save_path_word = self.save_path + url_names[i] + '/' myfunc.mkdir(self.save_path_word) self.chapter_name = url_names[i] urls_chapter = [ urls[i] + 'index_{}.html'.format(str(j)) for j in range(0, 200) ] self.index = 0 for url in urls_chapter: try: self.index = self.index + 1 if os.path.exists(self.save_path_word + str(self.index) + '.jpg'): print( f"已存在{self.save_path_word + str(self.index) + '.jpg'} 跳过" ) continue self.get_info(url) #time.sleep(1) except: break
tkinter.Button(window, text="搜索", width=10, command=button2).grid(row=4, column=0, sticky="w", padx=10, pady=5) tkinter.Button(window, text="退出", width=10, command=window.quit).grid(row=4, column=1, sticky="e", padx=10, pady=5) window.mainloop() if __name__ == "__main__": #book_name = input() model = downloader() #myfunc.rmdir(model.save_path) myfunc.mkdir(model.save_path) model.tkinter() ''' model.get_download_url() print(f'《{model.book_name}》开始下载:') for i in range(model.nums): model.writer(model.names[i], model.save_path + f'{model.book_name}.txt', model.get_contents(model.urls[i])) sys.stdout.write(" 已下载:%.3f%%" % float(i/model.nums*100) + '\r') sys.stdout.flush() print(f'《{model.book_name}》下载完成') '''
import time import pickle import numpy import copy import gc import re import time import warnings import torch import random from qqd_model import myfunc # 清除目录 data_path = "../Data/dataset" myfunc.rmdir(data_path) myfunc.mkdir(data_path) # 设置 data_interval = 120 predict_interval = 3 profit_chg = 5 dev_date = '2020-11-30' train_split_num = 200000 #读取数据 stockprice = [] for root, dirs, files in os.walk('../Data/stockdata/'): for fname in files: code = fname.split('.')[1] tmp = pandas.read_csv('../Data/stockdata/' + fname) stockprice.append(tmp)