def on_cb_waiting(self, tab, page, loops, remain): """默认等待事件回调函数,可进行外部idle驱动,或其他判定处理. tab - 当前的tab对象 page - 本次获取的页面内容 loops - 目前已经循环等待过的次数 remain - 剩余等待时间,秒float 返回值:是否停止等待. """ if loops: if not tab.last_title: # 在没有得到有效title的时候,尝试从html页面获取 tab.last_title = spd_base.query_re_str(page, '<title>(.*?)</title>', '') if not tab.last_title: # 页面获取失败则尝试动态从tab中获取 title, msg = tab.exec('document.title') if title != tab.last_url: tab.last_title = title # 得到有效的title了,记录下来 else: js = """document.title='等待<%d>次 剩余<%.02f>秒'""" % (loops, remain) tab.exec(js) # 等待中,修改title显示进度 elif tab.last_title: js = """document.title='%s'""" % tab.last_title # 等待完成,恢复原有title tab.exec(js) return False
def _on_requestWillBeSent(self, requestId, loaderId, documentURL, request, timestamp, wallTime, initiator, **param): """记录发送的请求信息""" url = request['url'] if self.req_event_filter_re and not spd_base.query_re_str(url, self.req_event_filter_re): return # 如果明确指定了re规则进行匹配,则不匹配时直接退出 if url not in self._data_requestWillBeSent: self._data_requestWillBeSent[url] = [] # 创建url对应的发送请求信息列表 if len(self._data_requestWillBeSent[url]) > 100: self._data_requestWillBeSent[url].pop(0) # 如果信息列表过长则清空最初的旧数据 self._data_requestWillBeSent[url].append((request, requestId)) # 记录请求信息和请求id self._data_requestIDs[requestId] = [request] # 记录requestid对应的请求信息,回应阶段1
def _on_Page_javascriptDialogOpening(self, url, message, type, hasBrowserHandler, *args, **kwargs): """拦截页面对话框""" def _handle_js_dialog(enable=False, proto_timeout=10): """在对话框事件回调中进行调用,禁用或启用页面上的js对话框""" try: rst = self.call_method('Page.handleJavaScriptDialog', accept=enable, _timeout=proto_timeout) return True, '' except Exception as e: return False, spd_base.es(e) if type == 'alert' and self.disable_alert_url_re and spd_base.query_re_str(url, self.disable_alert_url_re): _handle_js_dialog(False)
def _get(): if hold_url is None: # 没有明确告知拦截url的模式,则返回全部记录的请求url return list(self._data_requestWillBeSent.keys()) rst = [] if url_is_re: for url in self._data_requestWillBeSent.keys(): if spd_base.query_re_str(url, hold_url): # 否则记录匹配的请求url rst.append(url) else: if self.get_request_info(hold_url): rst.append(hold_url) return rst
def clear_storage(self, tab, url=None, types='all'): """删除浏览器指定域名下的storage数据;types可以为以下值逗号分隔串: appcache, cookies, file_systems, indexeddb, local_storage, shader_cache, websql, service_workers, cache_storage, interest_groups, all, other 返回值: (bool,msg) msg=''为正常,否则为错误信息""" try: t = self._tab(tab) if url is None: url = t.last_url origin = spd_base.query_re_str(url, '^.*?://.*?/') t.call_method('Storage.clearDataForOrigin', origin=origin, storageTypes=types, _timeout=self.proto_timeout) return True, '' except Exception as e: return False, py_util.get_trace_stack()
def get_downtmp(rrinfo, downpath): """在回应获取失败的时候,尝试查找下载文件""" if len(rrinfo) != 3: return None rsp_heads = rrinfo[1].get('headers', None) if rsp_heads is None: return None Content_disposition = rsp_heads.get('Content-disposition', None) if Content_disposition is None: return None # attachment; filename="五河县农村供水保障项目双忠庙供水区群众喝上更好水工程施工补遗说明BB2022WHGCZ004.pdf" filename = spd_base.query_re_str(Content_disposition, r'filename\s*=\s*"?([^"]*)[";]?', None) if filename is None: return None filepath = downpath + filename filedata = spd_base.load_from_file(filepath, None, 'rb') if filedata: os.remove(filepath) # 下载并读取成功了,则删除当前的文件 return filedata
def __init__(self, url="http://127.0.0.1:9222"): self.dev_url = url self._tabs = {} # 记录被管理的tab页 self.downpath = os.getcwd() + '\\tmpdown' + spd_base.query_re_str(url, r'://.*:(\d+)', 'tmpdown') + '\\'