def exec(self, tab, js): """在指定的tab页中运行js代码.返回值(内容串,错误消息)""" try: t = self._tab(tab) return t.exec(js, self.proto_timeout) except Exception as e: return '', py_util.get_trace_stack()
def query_cookies(self, tab, urls=None): """查询指定url对应的cookie.如果urls列表没有指定,则获取当前tab页下的全部cookie信息.返回值:([{cookie}],msg) urls可以进行域名路径限定,如'http://xysy.sanya.gov.cn/CreditHnExtranetWeb' """ def remove_key(r, key): if key in r: del r[key] try: t = self._tab(tab) if isinstance(urls, str): urls = [urls] if isinstance(urls, list): rst = t.call_method('Network.getCookies', urls=urls, _timeout=self.proto_timeout) else: rst = t.call_method('Network.getCookies', _timeout=self.proto_timeout) # 丢弃结果中的不关注内容 ret = rst['cookies'] for r in ret: remove_key(r, 'size') remove_key(r, 'httpOnly') remove_key(r, 'session') remove_key(r, 'priority') remove_key(r, 'sameParty') remove_key(r, 'sourceScheme') return ret, '' except Exception as e: return None, py_util.get_trace_stack()
def new(self, url='', req_event_filter=None): """打开tab页,并浏览指定的url;返回值:(tab页对象,错误消息)""" try: tab = self.browser.new_tab(url, self.proto_timeout, req_event_filter=req_event_filter) return tab, '' except Exception as e: return None, py_util.get_trace_stack()
def close(self, tab): """关闭指定的tab页.tab可以是id也可以是序号.返回值:(tab页id,错误消息)""" try: t = self._tab(tab) self.browser.close_tab(t.id, self.proto_timeout) return t.id, '' except Exception as e: return '', py_util.get_trace_stack()
def tab(self, tab): """根据tab标识或序号获取tab对象.返回值(tab对象,错误消息)""" if tab is None: return None, 'tab not exists.' try: return self._tab(tab), '' except Exception as e: return None, py_util.get_trace_stack()
def modify_cookies(self, tab, url, name, value): """修改匹配url与name的cookie值;返回值:(bool,msg),msg=''为正常,否则为错误信息""" try: t = self._tab(tab) t.call_method('Network.setCookie', name=name, value=value, url=url, _timeout=self.proto_timeout) return True, '' except Exception as e: return False, py_util.get_trace_stack()
def active(self, tab): """激活指定的tab页,返回值:(tab页id,错误消息)""" try: t = self._tab(tab) self.browser.activate_tab(t.id, self.proto_timeout) return t.id, '' except Exception as e: return '', py_util.get_trace_stack()
def clear_request(self, tab, url=None): """清空记录的请求内容""" try: t = self._tab(tab) req_lst = t.get_request_info(url) req_lst.clear() return '' except Exception as e: return py_util.get_trace_stack()
def miss_cache(self, tab, is_disable=True): """是否屏蔽缓存内容的使用; 返回值: (bool,msg) msg=''为正常,否则为错误信息""" try: t = self._tab(tab) rst = t.call_method('Network.setCacheDisabled', cacheDisabled=is_disable, _timeout=self.proto_timeout) return True, '' except Exception as e: return False, py_util.get_trace_stack()
def clear_cache(self, tab): """删除浏览器全部的cache内容; 返回值: (bool,msg) msg=''为正常,否则为错误信息""" try: t = self._tab(tab) rst = t.call_method('Network.clearBrowserCache', _timeout=self.proto_timeout) return True, '' except Exception as e: return False, py_util.get_trace_stack()
def list(self, backinit=True, excludes={}): """列出现有打开的tab页,backinit可告知是否反向补全外部打开的tab进行操控;返回值:([{tab}],错误消息) 按最后的活动顺序排列,元素0总是当前激活的tab页 """ try: rst = self.browser.list_tab(self.proto_timeout, backinit, excludes=excludes) return rst, '' except requests.exceptions.ConnectionError: return '', 'connect fail: %s' % self.browser.dev_url except Exception as e: return '', py_util.get_trace_stack()
def sendkey(self, tab, keyCode=0x0D, eventType='keyDown'): """给指定的tab页发送键盘事件.返回值(True,错误消息).事件代码参考 https://msdn.microsoft.com/en-us/library/dd375731(VS.85).aspx https://docs.microsoft.com/zh-cn/windows/win32/inputdev/virtual-key-codes?redirectedfrom=MSDN """ try: t = self._tab(tab) t.call_method('Input.dispatchKeyEvent', type=eventType, windowsVirtualKeyCode=keyCode, nativeVirtualKeyCode=keyCode, _timeout=self.proto_timeout) return True, '' except Exception as e: return False, py_util.get_trace_stack()
def set_cookie(self, tab, name, val, domain, expires=None, path='/', secure=False): """设置cookie,需要给出必要的参数; 返回值: (bool,msg) msg=''为正常,否则为错误信息""" try: t = self._tab(tab) if expires is None: expires = int(time.time()) + 3600 * 24 * 365 rst = t.call_method('Network.setCookie', name=name, value=val, domain=domain, expires=expires, path=path, secure=secure, _timeout=self.proto_timeout) return True, '' except Exception as e: return False, py_util.get_trace_stack()
def dom_node(self, tab, sel, parentNodeId=1): """使用css选择表达式,或xpath表达式,在父节点id之下,查询对应的节点id""" try: t = self._tab(tab) rst = t.call_method('DOM.querySelector', nodeId=parentNodeId, selector=sel, _timeout=self.proto_timeout) if rst is None: return '', '' if 'nodeId' in rst: return rst['nodeId'], '' else: return '', ret except Exception as e: return '', py_util.get_trace_stack()
def dom_document(self, tab): """获取当前tab页的DOM根节点""" try: t = self._tab(tab) rst = t.call_method('DOM.getDocument', _timeout=self.proto_timeout) if rst is None: return '', '' if 'root' in rst: return rst['root'], '' else: return '', ret except Exception as e: return '', py_util.get_trace_stack()
def wait_request_infos(self, tab, url, timeout=60, url_is_re=True): """尝试等待请求信息中出现指定的url.返回值:([请求信息列表],msg),msg为空正常.""" try: t = self._tab(tab) wait = spd_base.waited_t(timeout) while True: dst, msg = t.get_request_infos(url, url_is_re) if dst and len(dst): return dst, '' if wait.timeout(): break return [], '' except Exception as e: return None, py_util.get_trace_stack()
def clear_storage(self, tab, url=None, types='all'): """删除浏览器指定域名下的storage数据;types可以为以下值逗号分隔串: appcache, cookies, file_systems, indexeddb, local_storage, shader_cache, websql, service_workers, cache_storage, interest_groups, all, other 返回值: (bool,msg) msg=''为正常,否则为错误信息""" try: t = self._tab(tab) if url is None: url = t.last_url origin = spd_base.query_re_str(url, '^.*?://.*?/') t.call_method('Storage.clearDataForOrigin', origin=origin, storageTypes=types, _timeout=self.proto_timeout) return True, '' except Exception as e: return False, py_util.get_trace_stack()
def reopen(self): """对tab的websocket连接进行强制重连处理""" try: self._close_websock() self._open_websock() self.call_method('Page.enable', _timeout=1) self.call_method('Network.enable', maxResourceBufferSize=_maxResourceBufferSize, maxTotalBufferSize=_maxTotalBufferSize, _timeout=1) if self.downpath: self.call_method('Browser.setDownloadBehavior', behavior='allow', downloadPath=self.downpath, _timeout=1) return True except websocket.WebSocketBadStatusException as e: logger.warning('reopen error: %s :: %d' % (self._websocket_url, e.status_code)) except Exception as e: logger.warning('reopen error: %s :: %s' % (self._websocket_url, py_util.get_trace_stack())) return False
def dom_dhtml(self, tab, sel, parentNodeId=1): """获取dom对象的html文本,但对于iframe无效""" nid, err = self.dom_query_node(tab, sel, parentNodeId) if err: return '', err try: t = self._tab(tab) rst = t.call_method('DOM.getOuterHTML', nodeId=nid, _timeout=self.proto_timeout) if rst is None: return '', '' if 'outerHTML' in rst: return rst['outerHTML'], '' else: return '', ret except Exception as e: return '', py_util.get_trace_stack()
def exec(self, js, proto_timeout=10): """在tab页中运行js代码.返回值(运行结果,错误消息)""" try: rst = self.call_method('Runtime.evaluate', expression=js, returnByValue=True, _timeout=proto_timeout) if rst is None: return '', '' ret = rst['result'] if 'value' in ret: return ret['value'], '' elif 'description' in ret: return '', ret['description'] elif 'type' in ret and ret['type'] == 'undefined': return '', '' else: return '', ret except Exception as e: return '', py_util.get_trace_stack()
def _recv(self, timeout=0.01): """尝试进行一次接收处理. 返回值:(结果数,事件数,错误消息) (0,0,'')超时; (None,None,err)通信错误 """ if not self._websocket: return (None, None, 'not websocket connection.') try: self._websocket.settimeout(timeout) message_json = self._websocket.recv() message = json.loads(message_json) # 接收到json消息后就转换为对象 except websocket.WebSocketTimeoutException: return (0, 0, '') # 超时了,什么都没有收到 except websocket.WebSocketException as e: return (None, None, spd_base.es(e)) # websocket错误 except Exception as e: return (None, None, spd_base.es(e)) # 其他错误 if self.debug: # 如果开启了调试输出,则打印接收到的消息 print('< RECV %s' % message_json) if "method" in message: # 接收到事件报文,尝试进行回调处理 method = message['method'] if method in self.event_handlers: try: self.event_handlers[method](**message['params']) except Exception as e: logger.warning("callback %s exception %s" % (method, py_util.get_trace_stack())) return (0, 1, '') elif "id" in message: # 接收到结果报文 msg_id = message["id"] if msg_id in self.method_results: self.method_results[msg_id] = message # 得到了等待的对应结果,则记录下来 return (1, 0, '') else: logger.warning("unknown CDP message: %s" % (message)) return (None, None, 'unknown CDP message.') return (0, 0, '')
def remove_cookies(self, tab, url, names=None): """删除匹配url与names的cookie值;返回值:(bool,msg),msg=''为正常,否则为错误信息""" try: coks, msg = self.query_cookies(tab, url) # 先根据url查询匹配的cookies if msg: return False, msg if isinstance(names, str): # 如果指定了具体的cookie名字串,则将其转换为名字集合 names = {names} elif names is None: # 如果没有指定具体的cookie名字,则记录全部cookie名字. names = {c['name'] for c in coks} t = self._tab(tab) for c in coks: # 对全部cookie进行遍历 name = c['name'] if name in names: # 如果名字匹配则进行删除. t.call_method('Network.deleteCookies', name=name, domain=c['domain'], path=c['path'], _timeout=self.proto_timeout) # 删除时除了名字,还需要指定必要的限定信息 return True, '' except Exception as e: return False, py_util.get_trace_stack()