def _crawl(detail_link: str, driver: WebDriver) -> Info: driver.get(detail_link) time.sleep(5) accordeon = driver.find_elements_by_class_name('accordeon') try: driver.find_element_by_class_name( 'moove-gdpr-infobar-allow-all').click( ) # get rid of cookie banner blocking all clicks on this website except: pass info = _get_all_present_fields(accordeon, driver) return _convert(info)
def batchAccept(browser: WebDriver): # 批量受理 u = 'http://10.204.14.35/eoms4/sheetBpp/myWaitingDealSheetQueryty.action?baseSchema=WF4_EL_TTM_TTH_EQU&var_pagesize=100' browser.get(u) total_ele = 'form#form1 span.pagenumber' total_element = browser.find_element(By.CSS_SELECTOR, total_ele) total = re.search(r'共([0-9]+)条数据', total_element.text).group(1) if total < 1: return todo_list = browser.find_elements(By.CSS_SELECTOR, 'table#tab tr')[1:] data = [] i = 0 for x in range(0, int(total)): title = todo_list[2 * x + 1].text if title.find('[数据网]') != -1: checkbox = todo_list[2 * x].find_element(By.NAME, 'checkid') checkbox.click() # batchAccept_btn = browser.find_element(By.CSS_SELECTOR, 'li.page_active_button') browser.execute_script('batchAccept();')
def _crawl_detail_page(driver: WebDriver) -> List[DetailPageInfo]: detail_pages_infos = [] for idx in range(1, 315): driver.get(URL) rows: List[WebElement] = _get_table_rows(driver) for row in rows: cols = row.find_elements_by_tag_name('td') row_idx: int = extract_idx(cols[0]) if row_idx == idx: link_element = cols[0].find_element_by_tag_name('a') link_element.click() detail_page_info = _extract_detail_info(driver, idx) detail_pages_infos.append(detail_page_info) break _clean_detail_pages(detail_pages_infos) return detail_pages_infos
def jumpToEOMS(browser: WebDriver, keyword='') -> list: eoms_url = 'http://uip.ln.cmcc/_layouts/Document/BridgeToSPControl.aspx?skipcode=emoss' browser.get(eoms_url) # 故障处理工单(设备): todo_list_url = 'http://10.204.14.35/eoms4/sheetBpp/myWaitingDealSheetQueryGlobalTemplate.action?baseSchema=WF4_EL_TTM_TTH_EQU&var_pagesize=100' browser.get(todo_list_url) total_ele = 'form#form1 span.pagenumber' total_element = browser.find_element(By.CSS_SELECTOR, total_ele) total = re.search(r'共([0-9]+)条数据', total_element.text).group(1) # oo = $('table#tab tr') # Array.prototype.shift.apply(oo) todo_list = browser.find_elements(By.CSS_SELECTOR, 'table#tab tr')[1:] def get_url(title_ele): onclick = title_ele.find_element_by_tag_name('a').get_attribute( 'onclick') args = re.findall(r'\'(.*?)\'', onclick) # browser.execute_script('') url = 'http://10.204.14.35/eoms4/sheet/openWaittingSheet.action?baseSchema={}&taskid={}&baseId={}&entryId=&version=&processType={}' url = url.format(args[0], args[2], args[1], args[3]) # url = 'http://10.204.14.31:8001/bpp/ultrabpp/view.action?baseSchema={}&baseId={}&taskid={}&processType={}' # url = url.format(args[0], args[1], args[2], args[3]) return url data = [] # 待处理工单统计 i = 0 for x in range(0, int(total)): title = todo_list[2 * x + 1] if title.text.find(keyword) != -1: data.append({}) data[i]['title'] = title.text[21:] content = todo_list[2 * x].find_elements_by_tag_name('td') data[i]['end_time'] = datep(content[4].text) data[i]['find_time'] = datep(content[5].text) data[i]['status'] = content[6].text data[i]['url'] = get_url(title) i += 1 # 上清除时间的工单统计 data2 = [] # 未上清除时间统计 data3 = [] j = 0 for x in range(i): browser.get(data[x]['url']) data[x]['clear_time'] = browser.find_element_by_id( 'INC_Alarm_ClearTime').get_attribute('value') title_match = re.search(r'([A-Z\-0-9]+) 上报 (.+)', data[x]['title']) try: data[x]['device'] = title_match.group(1) data[x]['event'] = title_match.group(2) except: pass if data[x]['clear_time'] != '': data2.append(data[x]) j += 1 else: data3.append(data[x]) # TODO 详细推送 msg_title = '{}故障工单 {} 个,已上清除 {} 个。' msg_title = msg_title.format(keyword, len(data), len(data2)) find_time2, find_time3 = [], [] if data2 != []: for x in data2: find_time2.append(datef(x['find_time'])) if data3 != []: for x in data3: find_time3.append(datef(x['find_time'])) msg_text = '### {}\n\n**已上清除建单时间分别为**:\n\n{}\n\n**未上清除建单时间分别为**:\n\n{}\n\n> 推送时间:{}' t2, t3 = '\n\n'.join(find_time2), '\n\n'.join(find_time3) msg_text = msg_text.format(msg_title, t2, t3, datef()) send_msg(msg_markdown(msg_title, msg_text, True)) browser.get(eoms_url) return data, data2, data3
def cleanUpCookiesAndLaunchIE(self): caps = DesiredCapabilities.INTERNETEXPLORER ie = WebDriver() ie.delete_all_cookies() ie.get("http://www.yahoo.com") return ie