def __init__(self, logMessage, errMessage): self.page = 1 self.dbconf = MongoConfig() self.dbprod = MongoProduct() self.dbkey = MongoKeyword() self.dmes = DingMsg() self.finsh = False self.paginator_next = False self.data_list = [] # self.base_path = os.path.abspath(os.path.dirname(__file__)) self.logMessage = logMessage self.errMessage = errMessage
def __init__(self, master, userListBox): self.master = master self.userListBox = userListBox self.window = tk.Toplevel(master) self.window.wm_attributes('-topmost', 1) sw = self.window.winfo_screenwidth() sh = self.window.winfo_screenheight() ww = 400 wh = 300 x = (sw - ww) / 4 y = (sh - wh) / 3 self.window.geometry('%dx%d+%d+%d' % (ww, wh, x, y)) # 父容器大小 self.window.title('关键字') self.username = tk.StringVar() self.password = tk.StringVar() self.maxPrice = tk.StringVar() self.dbkey = MongoKeyword() self.create_page()
def __init__(self, master): self.window = master sw = self.window.winfo_screenwidth() sh = self.window.winfo_screenheight() ww = 1400 wh = 650 x = (sw - ww) / 2 y = (sh - wh) / 2 self.window.geometry('%dx%d+%d+%d' % (ww, wh, x, y)) # 父容器大小 self.threadnumVar = tk.IntVar() self.salenumVar = tk.IntVar() self.logMessage = JoinableQueue() self.errMessage = JoinableQueue() self.dbconf = MongoConfig() self.dbprod = MongoProduct() self.dbkey = MongoKeyword() self.dbtime = MongoTime() self.create_page() self.show_logs() self.asyCraler()
class XianYu(): def __init__(self, logMessage, errMessage): self.page = 1 self.dbconf = MongoConfig() self.dbprod = MongoProduct() self.dbkey = MongoKeyword() self.dmes = DingMsg() self.finsh = False self.paginator_next = False self.data_list = [] # self.base_path = os.path.abspath(os.path.dirname(__file__)) self.logMessage = logMessage self.errMessage = errMessage def range_webhook(self): webhook = 'https://oapi.dingtalk.com/robot/send?access_token=' + self.dbconf.select_all( )[random.randint(0, self.dbconf.count() - 1)].get('webhook') return webhook async def parse_html(self, html, keyword): print('开始解析') doc = pq(html) num = doc.find('.cur-num').text() # 注释掉的参数为后续更改需求服务 # print('总数',num) paginator_count = doc.find('.paginator-count').text() # paginator_pre = doc.find('.paginator-pre').text() self.paginator_next = doc.find('.paginator-next').text() # 下一页 now_page = doc.find('.paginator-curr').text() # print('现在在第几页',now_page) # P = '共([0-9]+)页' # all_pagenum = re.findall(P, paginator_count, re.S) # if all_pagenum: # print(int(all_pagenum[0])) itmes = doc('#J_ItemListsContainer .ks-waterfall').items() for item in itmes: data = {} data['keyword'] = keyword data['seller_nick'] = item.find('.seller-nick a').text() # nick data['pic_href'] = 'https:' + str( item.find('.item-pic a').attr('href')) # details_ulr data['title'] = item.find('.item-pic a').attr('title') # title data['img_src'] = 'https:' + str( item.find('.item-pic a img').attr( 'data-ks-lazyload-custom')) # img data['price'] = item.find( '.item-attributes .item-price span em').text() # price data['location'] = item.find( '.item-attributes .item-location').text() data['desc'] = item.find('.item-brief-desc').text() data['pub_time'] = item.find( '.item-pub-info .item-pub-time').text() data['add_time'] = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M-%S') #TODO:10分钟之内的商品采集 try: if int(data.get('pub_time').replace( '分钟前', '')) > 10: # 大于20分之间隔时间判定为超时 continue except Exception as e: # 默认的时间大于及时了,直接舍弃 continue # 如果数据库找不到对应关键字的链接 就插入数据,并且推送 if not self.dbprod.select({ "keyword": keyword, "pic_href": data.get('pic_href') }): self.dbprod.insert(data) self.logMessage.put('[' + keyword + '][' + data['pub_time'] + '][' + data['title'] + '[') # print(data) # TODO:普通发消息 # def send_message(): # if not getDingMes(webhook_url=self.range_webhook(), data=data,type=1): # send_message() # send_message() # TODO: 链接发消息,不可取 #TODO:markdown发消息 self.markdown_list.append(data) else: print('数据已经存在mpass') print(data) print('#' * 50) async def get(self, url, payload): async with aiohttp.ClientSession() as session: async with session.get(url, params=payload) as resp: print(resp.status) print(resp.url) result = await resp.text() return result async def request(self, keyword_obj): keyword = keyword_obj.get('keyword') minPrice = keyword_obj.get('minPrice') maxPrice = keyword_obj.get('maxPrice') payload = { "st_edtime": 1, # 最新发布 "_input_charset": "utf8", "search_type": "item", "q": keyword, "page": self.page, "start": minPrice, # 价格范围 "end": maxPrice, } url = 'https://s.2.taobao.com/list/' print('Waiting for', url) result = await self.get(url, payload) await self.parse_html(result, keyword) def run(self, type): self.markdown_list = [] keywords = self.dbkey.select_all({'start': 1}) tasks = [] for key_obj in keywords: tasks.append(asyncio.ensure_future(self.request(key_obj))) print(len(tasks)) if tasks: loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) # 发送markdown数据文档,节约资源 def send_message(): if not self.dmes.send_msg(webhook_url=self.range_webhook(), data=self.markdown_list, type=type): send_message() self.errMessage.put('钉钉消息发送失败,发送数据太过于频繁') else: self.errMessage.put('钉钉消息发送,使用类型{}'.format(type)) if self.markdown_list: send_message()
class optionUser(object): ''' 用户账号添加修改页面界面 ''' def __init__(self, master, userListBox): self.master = master self.userListBox = userListBox self.window = tk.Toplevel(master) self.window.wm_attributes('-topmost', 1) sw = self.window.winfo_screenwidth() sh = self.window.winfo_screenheight() ww = 400 wh = 300 x = (sw - ww) / 4 y = (sh - wh) / 3 self.window.geometry('%dx%d+%d+%d' % (ww, wh, x, y)) # 父容器大小 self.window.title('关键字') self.username = tk.StringVar() self.password = tk.StringVar() self.maxPrice = tk.StringVar() self.dbkey = MongoKeyword() self.create_page() def create_page(self): User = tk.LabelFrame(self.window, text="关键字配置", padx=10, pady=5) # 水平,垂直方向上的边距均为 10 User.place(x=50, y=80) tk.Label(User, text="关键字:").grid(column=0, row=0, sticky='w', pady=5, padx=5) # 添加用户账号 tk.Label(User, text="最低价格:").grid(column=0, row=1, sticky='w', pady=5, padx=5) # 添加用户密码 tk.Label(User, text="最高价格:").grid(column=0, row=2, sticky='w', pady=5, padx=5) # 添加用户密码 self.userEntry = tk.Entry(User, textvariable=self.username, width=23) self.userEntry.grid(column=1, row=0, pady=5) self.pwdEntry = tk.Entry(User, textvariable=self.password, width=23) self.pwdEntry.grid(column=1, row=1, pady=5) self.maxPEntry = tk.Entry(User, textvariable=self.maxPrice, width=23) self.maxPEntry.grid(column=1, row=2, pady=5) tk.Button(User, text="确认添加", command=self.add_user).grid(columnspan=2, row=3, pady=5, ipadx=10) def add_user(self): username = self.userEntry.get() pwd = self.pwdEntry.get() maxP = self.maxPEntry.get() if username is '': tk.messagebox.showerror(title='错误', message='关键字不为空!') else: rechack_useinfo = tk.messagebox.askyesno(title='检查', message='请核对{}信息无误后确认添加'.format(username)) if rechack_useinfo: if self.dbkey.select_one({"keyword": username}): self.username.set('') self.password.set('') self.maxPrice.set('') tk.messagebox.showerror('错误', '此关键字已经存在') else: if pwd == '': pwd = 0 if maxP == '': maxP = 'None' self.dbkey.insert({"start": 1, "keyword": username, "minPrice": pwd, "maxPrice": maxP}) self.username.set('') self.password.set('') self.maxPrice.set('') self.userListBox.insert(END, '关键字:{} 价格:{}-{} 状态:{};'.format(username, pwd, maxP, '开启')) # 同时增加用户到前端上 # self.window.destroy() # optionUser(self.master) tk.messagebox.showinfo(title='恭喜', message='账户添加成功!') window.update() def delete_user(self, user, pwd): pass
class MainPage(object): def __init__(self, master): self.window = master sw = self.window.winfo_screenwidth() sh = self.window.winfo_screenheight() ww = 1400 wh = 650 x = (sw - ww) / 2 y = (sh - wh) / 2 self.window.geometry('%dx%d+%d+%d' % (ww, wh, x, y)) # 父容器大小 self.threadnumVar = tk.IntVar() self.salenumVar = tk.IntVar() self.logMessage = JoinableQueue() self.errMessage = JoinableQueue() self.dbconf = MongoConfig() self.dbprod = MongoProduct() self.dbkey = MongoKeyword() self.dbtime = MongoTime() self.create_page() self.show_logs() self.asyCraler() # self._temp_t() def asyCraler(self): TProcess_crawler = Process(target=_run, args=(self.logMessage, self.errMessage)) TProcess_crawler.daemon = True TProcess_crawler.start() TProcess_crawler.join() def _temp_t(self): t = threading.Thread(target=self.asyCraler, args=()) # t.daemon=True t.start() print('启动线程') # t.join() # TProcess_crawler.join() def create_page(self): self.meun() # 菜单 self.keyword() # 关键字 self.config() # 配置 self.log() # 日志 self.error_log() # 系统日志 self.user() # 用户信息 self.img() # 图片 # self.loading() # 进度条 def img(self): # 图片 photo = PhotoImage(file='xianyu.png') label = Label(image=photo) label.image = photo label.grid(row=0, column=2, columnspan=2, rowspan=2, sticky=W + E + N + S, padx=5, pady=5) def keyword(self): # 钉钉机器人 Keyword = tk.LabelFrame(self.window, text="钉钉机器人", padx=10, pady=10) # 水平,垂直方向上的边距均为10 Keyword.place(x=1070, y=100) self.keywordListBox = Listbox(Keyword, width=35, height=8, ) self.keywordListBox.pack(side=LEFT) keywordScroBar = Scrollbar(Keyword) keywordScroBar.pack(side=RIGHT, fill=Y) self.keywordListBox['yscrollcommand'] = keywordScroBar.set keywords = self.dbconf.select_all() for key in keywords: keyword = key.get('webhook') self.keywordListBox.insert(END, '机器人:{};'.format(keyword)) keywordScroBar['command'] = self.keywordListBox.yview keywordoption = tk.LabelFrame(self.window, text="", padx=10, pady=5) # 水平,垂直方向上的边距均为 10 keywordoption.place(x=1070, y=290) tk.Button(keywordoption, text="添加机器人", command=self.add_keyword).grid(column=0, row=1, padx=9, pady=5) tk.Button(keywordoption, text="删除机器人", command=self.delete_keyword).grid(column=1, row=1, padx=9, pady=5) tk.Button(keywordoption, text="测试机器人", command=self.testLogin).grid(column=2, row=1, padx=9, pady=5) def insert_userListbox(self): userinfos = self.dbkey.select_all({}) for user in userinfos: username = user.get('keyword') pwd = user.get('minPrice') maxP = user.get('maxPrice') start = user.get('start') if start == 1: now_status = '开启' else: now_status = '关闭' self.userListBox.insert(END, '关键字:{} 价格:{}-{} 状态:{};'.format(username, pwd, maxP, now_status)) def user(self): # 用户信息 User = tk.LabelFrame(self.window, text="关键字任务", padx=10, pady=5) # 水平,垂直方向上的边距均为 10 User.place(x=30, y=100) self.userListBox = Listbox(User, width=50, height=9, ) self.userListBox.pack(side=LEFT) userScroBar = Scrollbar(User) userScroBar.pack(side=RIGHT, fill=Y) self.userListBox['yscrollcommand'] = userScroBar.set self.insert_userListbox() userScroBar['command'] = self.userListBox.yview # userScrotext = scrolledtext.ScrolledText(User, width=30, height=6, padx=10, pady=10, wrap=tk.WORD) # userScrotext.grid(columnspan=2, pady=10) Useroption = tk.LabelFrame(self.window, text="", padx=10, pady=5) # 水平,垂直方向上的边距均为 10 Useroption.place(x=30, y=300) tk.Button(Useroption, text="添加关键字", command=self.add_user).grid(column=0, row=1, padx=57, pady=5) tk.Button(Useroption, text="删除关键字", command=self.delete_use).grid(column=1, row=1, padx=57, pady=5) tk.Button(Useroption, text="一键开启", command=self.all_start).grid(column=0, row=3, padx=55, pady=5) tk.Button(Useroption, text="一键关闭", command=self.all_stop).grid(column=1, row=3, padx=55, pady=5) self.startBtn = tk.Button(Useroption, text="单项开启", command=self.start_spider) self.startBtn.grid(column=0, row=2, padx=55, pady=5) self.stopBtn = tk.Button(Useroption, text="单项关闭", command=self.stop_spider) self.stopBtn.grid(column=1, row=2, padx=55, pady=5) def config(self): # 配置 Config = tk.LabelFrame(self.window, text="配置", padx=25, pady=5) # 水平,垂直方向上的边距均为 10 Config.place(x=30, y=430) tk.Label(Config, text="爬取频率/s:").grid(column=0, row=0, sticky='w', pady=5) # tk.Label(Config, text="发送方式:").grid(column=0, row=1, sticky='w', pady=5) # 添加波特率标签 try: configs = self.dbtime.select_one({}) self.threadnum = configs.get('time') self.salenum = configs.get('type') except Exception as e: self.dbtime.insert({"flag": 1, "time": 10, "type": 3}) self.threadnum = 10 self.salenum = 3 self.threadnumVar.set(self.threadnum) self.salenumVar.set(self.salenum) self.threadEntry = tk.Entry(Config, textvariable=self.threadnumVar, width=38) self.threadEntry.grid(column=1, row=0, pady=5) self.saleEntry = tk.Entry(Config, textvariable=self.salenumVar, width=38) self.saleEntry.grid(column=1, row=1, pady=5) Config_start = tk.LabelFrame(self.window, text="", padx=10, pady=5) # 水平,垂直方向上的边距均为 10 Config_start.place(x=30, y=550) tk.Button(Config_start, text="更新配置", command=self.updata_config).grid(column=0, row=0, pady=5, ipadx=20,padx=15) self.clearDbBtn = tk.Button(Config_start, text="清空配置", command=self.clearDB) self.clearDbBtn.config(bg='red') self.clearDbBtn.grid(column=2, row=0, pady=5, ipadx=15,padx=15) # self.exportDbBtn = tk.Button(Config_start, text="导出数据", command='') # self.exportDbBtn.config(state=tk.DISABLED) # self.exportDbBtn.grid(column=2, row=0, pady=5, ipadx=15) # self.testloginBtn = tk.Button(Config_start, text="测试登录", command=self.testLogin) # self.testloginBtn.grid(column=0, row=1, pady=5, ipadx=15) # self.loginBtn = tk.Button(Config_start, text="账户登录", command=self.login) # self.loginBtn.grid(column=1, row=1, pady=5, ipadx=15) self.logoutBtn = tk.Button(Config_start, text="清除缓存", command=self.clear_product) self.logoutBtn.grid(column=1, row=0, pady=5, ipadx=15,padx=15) # self.listenBtn = tk.Button(Config_start, text="开启监听", command=self.listen_spider) # self.listenBtn.grid(column=0, row=2, pady=5, ipadx=15) # self.startBtn = tk.Button(Config_start, text="开始采集", command=self.start_spider) # self.startBtn.grid(column=1, row=2, pady=5, ipadx=15) # self.stopBtn = tk.Button(Config_start, text="停止采集", command=self.stop_spider) # self.stopBtn.grid(column=2, row=2, pady=5, ipadx=15) def loading(self): # 进度条 Loading = tk.LabelFrame(self.window, text="进度条", padx=10, pady=5) # 水平,垂直方向上的边距均为 10 Loading.place(x=350, y=20) canvas = tk.Canvas(Loading, width=665, height=22, bg="white") canvas.grid() def log(self): # 日志 self.logMessage.put('欢迎使用【闲鱼信息采集器】') logInformation = tk.LabelFrame(self.window, text="日志", padx=10, pady=10) # 水平,垂直方向上的边距均为10 logInformation.place(x=450, y=100) self.logInformation_Window = scrolledtext.ScrolledText(logInformation, width=77, height=22, padx=10, pady=10, wrap=tk.WORD) self.logInformation_Window.grid() def error_log(self): # 系统日志 error_logInformation = tk.LabelFrame(self.window, text="系统日志", padx=10, pady=10) # 水平,垂直方向上的边距均为10 error_logInformation.place(x=450, y=460) self.errorInformation_Window = scrolledtext.ScrolledText(error_logInformation, width=77, height=5, padx=10, pady=10, wrap=tk.WORD) self.errorInformation_Window.grid() # 菜单说明 def meun(self): menubar = tk.Menu(self.window) aboutmemu = tk.Menu(menubar, tearoff=0) menubar.add_cascade(label='关于', menu=aboutmemu) aboutmemu.add_command(label='软件说明', command=self.show_Description) aboutmemu.add_command(label='版本', command=self.show_Version) aboutmemu.add_command(label='开发者', command=self.show_Developer) window.config(menu=menubar) # 添加关键字 def add_keyword(self): optionKeyword(self.window, self.keywordListBox) # 删除关键字 def delete_keyword(self): if tk.messagebox.askyesno('警告', message='是否删除机器人'): try: value = self.keywordListBox.get(self.keywordListBox.curselection()) keyword = re.findall('机器人:(.*?);', value.replace('\n', '').replace(' ', ''), re.S) print(keyword[0]) self.dbconf.delete({"webhook": keyword[0]}) self.keywordListBox.delete(ACTIVE) self.errMessage.put('成功删除机器人:{}'.format(keyword[0])) tk.messagebox.showinfo('成功', message='成功删除机器人:{}'.format(keyword[0])) except Exception as e: tk.messagebox.showerror('错误', message='请选定指定关键字删除') # 测试机器人 def testLogin(self): self.errMessage.put('进行机器人测试') value = self.keywordListBox.get(self.keywordListBox.curselection()) keyword = re.findall('机器人:(.*?);', value.replace('\n', '').replace(' ', ''), re.S) dmsg = DingMsg() if dmsg.send_msg(webhook_url='https://oapi.dingtalk.com/robot/send?access_token=' + keyword[0], data='欢迎测试闲鱼信息及时推送器-机器人验证', type=4): tk.messagebox.showinfo(title='恭喜', message='信息已经发送到钉钉') self.errMessage.put('信息已经发送到钉钉') else: tk.messagebox.showerror(title='警告', message='此链接可能失效,请重试') self.errMessage.put('此链接可能失效,请重试') # 添加用户账号 def add_user(self): optionUser(self.window, self.userListBox) def _get_active_keyList(self): try: value = self.userListBox.get(self.userListBox.curselection()) print(value) user_pwd = re.findall('关键字:(.*?)价格', value.replace('\n', '').replace(' ', ''), re.S) return user_pwd except Exception as e: tk.messagebox.showerror('错误', message='请选定指定关键字') return ['请选定指定关键字'] # 删除用户账号 def delete_use(self): user_pwd = self._get_active_keyList() if user_pwd[0] == '请选定指定关键字': self.errMessage.put('关闭闲鱼数据:{}采集'.format(user_pwd)) return False if tk.messagebox.askyesno('警告', message='是否删除关键字'): try: self.dbkey.delete({"keyword": user_pwd[0]}) self.userListBox.delete(ACTIVE) self.errMessage.put('成功删除关键字任务{}'.format(user_pwd[0])) tk.messagebox.showinfo('成功', message='成功删除用户{}'.format(user_pwd[0])) except Exception as e: tk.messagebox.showerror('错误', message='请选定指定账户删除') # 跟新配置 def updata_config(self): self.logMessage.put('更新配置') threadnum = self.threadEntry.get() salenum = self.saleEntry.get() print(threadnum) print(salenum) self.dbtime.update_time(int(threadnum)) self.dbtime.update_type(int(salenum)) tk.messagebox.showinfo(title='配置', message='配置信息更新成功!') def all_start(self): self.dbkey.collection.update_many({"start": 0}, {'$set': {"start": 1}}) self.userListBox.delete(0, END) self.insert_userListbox() self.errMessage.put('已开启全部任务') tk.messagebox.showinfo(title='任务', message='已开启全部任务!') def all_stop(self): self.dbkey.collection.update_many({"start": 1}, {'$set': {"start": 0}}) self.userListBox.delete(0, END) self.insert_userListbox() self.errMessage.put('已关闭全部任务') tk.messagebox.showinfo(title='任务', message='已关闭全部任务!') def start_spider(self): # TODO: 获取所有的配置信息函数。 user_pwd = self._get_active_keyList() self.errMessage.put('开始闲鱼数据:{}采集'.format(user_pwd)) self.dbkey.update_start(user_pwd[0]) self.userListBox.delete(0, END) self.insert_userListbox() def stop_spider(self): # TODO:按钮恢复 user_pwd = self._get_active_keyList() self.errMessage.put('关闭闲鱼数据:{}采集'.format(user_pwd)) self.dbkey.update_stop(user_pwd[0]) self.userListBox.delete(0, END) self.insert_userListbox() def clear_product(self): if tk.messagebox.askyesno(title='删除', message='这将清空缓存数据,是否确定删除?'): self.logMessage.put('开始清除数据库缓存') self.dbprod.delete_all({}) self.logMessage.put('清除数据库缓存结束') tk.messagebox.showinfo(title='恭喜', message='清除数据库缓存结束') # 清空数据库 def clearDB(self): if tk.messagebox.askyesno(title='删除', message='这将清空所有的数据,是否确定删除?'): if tk.messagebox.askyesno(title='再次确认', message='清空数据后请重启软件,是否确定删除?'): self.dbkey.delete_all({}) self.dbtime.delete_all({}) self.dbconf.delete_all({}) self.dbprod.delete_all({}) self.logMessage.put('清除数据库所有数据') self.logMessage.put('请重新启动软件,加载配置') self.window.update() tk.messagebox.showinfo(title='恭喜', message='所有数据清除完成!请重新启动软件,加载配置') def log_queue(self): while True: log = self.logMessage.get() date = datetime.now().strftime("%m-%d %H:%M:%S") self.logInformation_Window.insert(END, '[{date}][{log}]'.format(date=date, log=log) + '\n') self.logInformation_Window.see(END) # self.logMessage.task_done() def errlog_queue(self): while True: log = self.errMessage.get() date = datetime.now().strftime("%m-%d %H:%M:%S") self.errorInformation_Window.insert(END, '[{date}][{log}]'.format(date=date, log=log) + '\n') self.errorInformation_Window.see(END) def show_logs(self): Tlog_queue = threading.Thread(target=self.log_queue, args=()) Terrlog_queue = threading.Thread(target=self.errlog_queue, args=()) Tlog_queue.daemon = True Tlog_queue.start() Terrlog_queue.daemon = True Terrlog_queue.start() # self.logMessage.join() def show_Description(self): Description(self.window) def show_Version(self): Version(self.window) def show_Developer(self): Developer(self.window)