def doSomething(keyword): key = keyword driver.get("https://www.ncbi.nlm.nih.gov/pccompound/?term=" + keyword) retry(lambda: driver.find_element_by_id("msgportlet")) try: msgEle = driver.find_element_by_id("msgportlet") if "No items found." in msgEle.text: f3.write(key) return except Exception, e: del e
def doSomething(driver, keyword, tableId, pfi): key = keyword; driver.get("https://www.ncbi.nlm.nih.gov/pccompound/?term="+keyword); retry(lambda:driver.find_element_by_id("msgportlet")); try: msgEle = driver.find_element_by_id("msgportlet"); if "No items found." in msgEle.text: addResult([[key,"None","None","None","None"]]) updateStatus(STATUS_SUCCESS, tableId) return except Exception,e: del e
def processInNewDriver(href): ''' 处理文章链接 ''' print "处理链接", href driverContent = webdriver.Chrome(executable_path=executable_path) driverContent.get(href) retry(lambda: driver.find_element_by_class_name("rprt_all")) contentEle = driverContent.find_element_by_class_name("rprt_all") content = contentEle.text for key in keys: if content.find(key) != -1: print "find" reusltData.append([href, key]) write(reusltData, FILENAME_RESULT) break recoreDate.append([href]) driverContent.quit()
def getPro(key,myid): text="" retry(lambda:driver.find_element_by_id("local-searchbox")) driver.find_element_by_id("local-searchbox").send_keys(key) retry(lambda:driver.find_element_by_class_name("submit")) driver.find_element_by_class_name("submit").click() try: chiTbls=driver.find_elements_by_xpath("//td[@class='gridLayoutCellTitle']/a") a_text=chiTbls[0].text print a_text if a_text.lower()==key.lower(): Url=chiTbls[0].get_attribute("href") fi=driver.get("http://www.ebi.ac.uk/chebi/"+Url) chikey=driver.find_elements_by_class_name("chebiDataHeader") text=chikey[1].text print text fi.quit() except Exception, e: print e pass;
def initDriver(): driver = webdriver.Chrome(executable_path=executable_path) #跳到网址 retry(lambda: driver.get("http://www.ncbi.nlm.nih.gov/pubmed/")) #填入查询字段 keyword = "Worldviews On Evidence-Based Nursing" retry(lambda: driver.find_element_by_id("term").clear()) retry(lambda: driver.find_element_by_id("term").send_keys(keyword)) #点击查询按钮 retry(lambda: driver.find_element_by_id("search").click()) return driver
def getchemID(key): while True: if driver.current_url == "http://www.ncbi.nlm.nih.gov/pccompound": chat = driver.find_element_by_id("term") chat.clear() chat.send_keys(key) driver.find_element_by_id("search").click() try: retry(lambda: driver.find_element_by_xpath( "//table[@class='top-summary-items']/tbody/tr/td")) ID1 = driver.find_element_by_xpath( "//table[@class='top-summary-items']/tbody/tr/td").text Moleculars = driver.find_elements_by_xpath( "//table[@class='top-summary-items']/tbody/tr/td/a") Molecular1 = Moleculars[1].text print Molecular1 result.append([key, ID1, Molecular1]) pubChem.recorddata.append([key]) driver.back() break except Exception, e: print "1", e import traceback print traceback.format_exc() try: retry(lambda: driver.find_elements_by_xpath( "//div[@class='content']/div/div/div/div/p/a")) myTagas = driver.find_elements_by_xpath( "//div[@class='content']/div/div/div/div/p/a") for myTaga in myTagas: if key.upper() in myTaga.text.upper(): href = myTaga.get_attribute("href") drivercontent = webdriver.Chrome(driverPathname) drivercontent.get(href) retry(lambda: drivercontent.find_element_by_xpath( "//table[@class='top-summary-items']/tbody/tr/td")) ID2 = drivercontent.find_element_by_xpath( "//table[@class='top-summary-items']/tbody/tr/td" ).text Moleculars1 = drivercontent.find_elements_by_xpath( "//table[@class='top-summary-items']/tbody/tr/td/a" ) Molecular2 = Moleculars1[1].text result.append([key, ID2, Molecular2]) pubChem.recorddata.append([key]) drivercontent.quit() return except Exception, e: result.append([key, "NA", "NA"]) pubChem.recorddata.append([key]) break
def getchemID(key): while True: if driver.current_url == "https://www.ncbi.nlm.nih.gov/pccompound": chat = driver.find_element_by_id("term") chat.clear() chat.send_keys(key) driver.find_element_by_id("search").click() try: retry(lambda: driver.find_element_by_xpath( "//table[@class='top-summary-items']/tbody/tr/td")) ID1 = driver.find_element_by_xpath( "//table[@class='top-summary-items']/tbody/tr/td").text result.append([key, ID1]) driver.back() break except Exception, e: print e try: retry(lambda: driver.find_elements_by_xpath( "//div[@class='content']/div/div/div/div/p/a")) myTagas = driver.find_elements_by_xpath( "//div[@class='content']/div/div/div/div/p/a") key.upper() if len(myTagas) == 0: result.append([key, "NA", "NA"]) break else: for myTaga in myTagas: if key.upper() in myTaga.text.upper(): href = myTaga.get_attribute("href") drivercontent = webdriver.Chrome( "/Users/yangjie/Documents/env/python/chromedriver_2.21/chromedriver_mac32/chromedriver" ) drivercontent.get(href) retry(lambda: drivercontent.find_element_by_xpath( "//table[@class='top-summary-items']/tbody/tr/td" )) ID2 = drivercontent.find_element_by_xpath( "//table[@class='top-summary-items']/tbody/tr/td" ).text result.append([key, ID2]) drivercontent.quit() return else: pass except Exception, e: result.append([key, "NA"]) print e break
def doSomething(driver, keyword, tableId, pfi): driver.switch_to_default_content(); retry(lambda:driver.find_element_by_id("txt_1_value1").clear()) retry(lambda:driver.find_element_by_id("txt_1_value1").send_keys(keyword)) retry(lambda:driver.find_element_by_id("btnSearch").click()) time.sleep(0.1) while True: waitDlgEle = driver.find_element_by_id("Search__Waiting") style = waitDlgEle.get_attribute("style"); if -1 == str(style).find("display: none;"): time.sleep(0.1); else: break driver.switch_to_frame("iframeResult") driver.find_element_by_class_name("brief_downloadIcon").send_keys(Keys.ENTER) time.sleep(0.1) if len(driver.window_handles) == 2: driver.switch_to_window(driver.window_handles[1]); driver.close() driver.switch_to_window(driver.window_handles[0]); time.sleep(0.1) driver.switch_to_frame("iframeResult") aEle = driver.find_element_by_xpath("//a[@class='fz14']") if keyword in aEle.text: aEle.click(); driver.switch_to_window(driver.window_handles[1]); aLinkEles = driver.find_elements_by_xpath("//div[@id='QK_nav']/ul/li/a"); for aLinkEle in aLinkEles: aLinkEle.parent print aLinkEle.text; if aLinkEle.text == u"CAJ下载": aLinkEle.send_keys(Keys.ENTER); break; driver.find_element_by_link_text(u"CAJ下载").send_keys(Keys.ENTER) try: result = driver.find_element_by_id("ChDivSummary").text; addResult([[keyword,result,'']]) except: addResult([[keyword,u'摘要获取失败','']]) driver.close() driver.switch_to_window(driver.window_handles[0]); else: addResult([[keyword,'','']]) updateStatus(STATUS_SUCCESS, tableId)
driver = pfi.driver while True: try: raiseOnError = True try: driver.get(URL % quote(keyword)) except KeyError, ke: addResult([[keyword, "KEYERR", "KEYERR"]]) updateStatus(STATUS_SUCCESS, tableId) break time.sleep(0.1) isFind = True if driver.title.endswith("NCBI"): #result_count if retry(lambda: driver.find_element_by_id("result_sel"), times=10, allowError=True, raiseOnError=raiseOnError): pass #success else: #error updateStatus(STATUS_NON, tableId) pfi.needInitDriver = True break isFind = False retry(lambda: driver.find_elements_by_xpath( "//div[@class='content']/div/div/div/div/p/a"), times=10, allowError=True, raiseOnError=raiseOnError) myTagas = driver.find_elements_by_xpath(
@author: yangjie ''' from com.office.util.codeUtil import forbidCodeErr import time from com.office.util.pubUtil import retry forbidCodeErr() # #使用第三方库,模拟浏览器登录 from selenium import webdriver driver = webdriver.Chrome( executable_path="/Users/jyang/work/env/py/selenium/chromedriver") driver.get("https://www.mi.com/index.html") while True: try: if r"item.mi.com/product" in driver.current_url: driver.find_element_by_link_text(u"加入购物车").click() elif r"static.mi.com/buySuccess" in driver.current_url: driver.find_element_by_link_text(u"去购物车结算").click() elif r"static.mi.com/cart/" in driver.current_url: driver.find_element_by_link_text(u"去结算").click() elif r"order.mi.com/buy/checkout" in driver.current_url: retry(lambda: driver.find_element_by_class_name("J_addressItem"). click(), raiseOnError=True) retry(lambda: driver.find_element_by_link_text(u"立即下单").click(), raiseOnError=True) break except: time.sleep(0.01) continue
def doSomething(driver, keyword, tableId, pfi): key = keyword; driver.get("https://www.ncbi.nlm.nih.gov/pccompound/?term="+keyword); retry(lambda:driver.find_element_by_id("msgportlet")); try: msgEle = driver.find_element_by_id("msgportlet"); if "No items found." in msgEle.text: addResult([[key,"None","None","None","None"]]) updateStatus(STATUS_SUCCESS, tableId) return except Exception,e: del e try: retry(lambda:driver.find_element_by_xpath("//table[@class='top-summary-items']/tbody/tr/td")) resID=driver.find_element_by_xpath("//table[@class='top-summary-items']/tbody/tr/td").text cs ="" try: retry(lambda:driver.find_element_by_xpath('//*[@id="Canonical-SMILES"]/div/div[@class="section-content-item"]')) cs=driver.find_element_by_xpath('//*[@id="Canonical-SMILES"]/div/div[@class="section-content-item"]').text except Exception,e: del e pass addResult([[key,driver.current_url,resID,cs,""]]) updateStatus(STATUS_SUCCESS, tableId) return except Exception,e: del e retry(lambda:driver.find_elements_by_xpath("//div[@class='content']/div/div/div/div/p/a")) myTagas=driver.find_elements_by_xpath("//div[@class='content']/div/div/div/div/p/a")
forbidCodeErr() from selenium import webdriver def getPro(key): url = "http://omim.org/entry/" + key print url driver.get(url) try: driver.find_element_by_id("donationClose").click() except Exception, e: pass description = "" clinicalFeatures = "" try: retry(lambda: driver.find_element_by_xpath( "//td[@id='floatingEntryContainer']/table/tbody/tr/td")) tdEles = driver.find_elements_by_xpath( "//td[@id='floatingEntryContainer']/table/tbody/tr/td") for i in range(len(tdEles)): if tdEles[i].get_attribute("id") == "description": description = tdEles[i + 1].text elif tdEles[i].get_attribute("id") == "clinicalFeatures": clinicalFeatures = tdEles[i + 1].text except Exception, e: print e f2.write(key + "\t" + description + "\r" + clinicalFeatures + "\n") f1 = open("omim-id", "r").readlines() f2 = open("omim-description", "a") driverPathname = "/Users/yangjie/Documents/env/python/chromedriver_2.21/chromedriver_mac32/chromedriver"
profile.update_preferences() driver = webdriver.Chrome(executable_path="/Users/yangjie/mywork/icode/cloudnms/sky-auto/autouitest/driver/chrome/chromedriver_mac32/chromedriver") time.sleep(0.1) driver.get("http://apply.hzcb.gov.cn/apply/app/status/norm/person") time.sleep(0.1) qihaoEle = driver.find_element_by_id("issueNumber") qihaoSelect = Select(qihaoEle) all_options = qihaoSelect.options; optStrs = [] for all_option in all_options: optStrs.append(all_option.text) print optStrs for optStr in optStrs[1:]: print optStr retry(lambda:Select(driver.find_element_by_id("issueNumber")).select_by_value(optStr)) retry(lambda:driver.find_element_by_xpath('//*[@id="search"]').click()) retry(lambda:driver.find_element_by_class_name("totalcount").text) total = int(driver.find_element_by_class_name("totalcount").text) d = {} lastText = "" while True: try: # 找出当前页的所有名字 trEles = driver.find_elements_by_class_name("content_data") + driver.find_elements_by_class_name("content_data1"); for trEle in trEles: tdEles = trEle.find_elements_by_tag_name("td"); d[tdEles[0].text] = tdEles[1].text # 点击下一页 try: nextEles = driver.find_elements_by_class_name("taiji_pager_item");
from selenium import webdriver pfi.driver = webdriver.Chrome(executable_path=DIRVER_PATHNAME) pfi.driver.get(URL) except Exception, e: print 'initDriver error', e updateStatus(STATUS_NON, tableId) return pfi.needInitDriver = False updateStatus(STATUS_IN_PROCESS, tableId) driver = pfi.driver while True: try: if driver.current_url == URL: raiseOnError = True retry(lambda: driver.find_element_by_id("term").clear(), times=10, allowError=True, raiseOnError=raiseOnError) retry(lambda: driver.find_element_by_id("term").send_keys( keyword), times=10, allowError=True, raiseOnError=raiseOnError) retry(lambda: driver.find_element_by_id("search").click(), times=10, allowError=True, raiseOnError=raiseOnError) time.sleep(0.1) isFind = True if driver.title.endswith("NCBI"): #result_count if retry(lambda: driver.find_element_by_id("result_sel"),
retry(lambda: driver.find_element_by_class_name("rprt_all")) contentEle = driverContent.find_element_by_class_name("rprt_all") content = contentEle.text for key in keys: if content.find(key) != -1: print "find" reusltData.append([href, key]) write(reusltData, FILENAME_RESULT) break recoreDate.append([href]) driverContent.quit() while True: retry(lambda: driver.find_elements_by_xpath("//p[@class='title']/a")) aEles = driver.find_elements_by_xpath("//p[@class='title']/a") hrefs = [] for aEle in aEles: href = aEle.get_attribute("href") if href in matchRecordData: print "processd ", href else: hrefs.append(href) if 0 == len(hrefs): print "no new href" else: doInThread(processInNewDriver, hrefs, poolNum=4) write(recoreDate, FILENAME_RECORD) try: