def modelWalker(): global driver, prevTrail, models act=ActionChains(driver) CSSs = driver.find_elements_by_css_selector try: startIdx = getStartIdx() for idx, model in enumerate(models[startIdx:],len(models)): ulog('idx=%s, model="%s"'%(idx,model)) goToUrl(rootUrl) btn=waitClickable('.search-select button') act.move_to_element(btn).click(btn).perform() inp=waitClickable('.input-block-level') act.move_to_element(inp).click(inp).perform() act.send_keys(model + Keys.DOWN + Keys.ENTER).perform() time.sleep(0.1) waitUntil(isReadyState) ulog('url='+driver.current_url) title = waitText('.lightGrayBg > div > div > div > h2') ulog('title='+title) # 'Search by Model Number' or 'No Matches Found' if title.startswith('No Matches Found'): continue prevTrail+=[idx] tabWalker() prevTrail.pop() except Exception as ex: ipdb.set_trace() traceback.print_exc()
def modelWalker(): global driver, prevTrail, models act = ActionChains(driver) CSSs = driver.find_elements_by_css_selector try: startIdx = getStartIdx() for idx, model in enumerate(models[startIdx:], len(models)): ulog('idx=%s, model="%s"' % (idx, model)) goToUrl(rootUrl) btn = waitClickable('.search-select button') act.move_to_element(btn).click(btn).perform() inp = waitClickable('.input-block-level') act.move_to_element(inp).click(inp).perform() act.send_keys(model + Keys.DOWN + Keys.ENTER).perform() time.sleep(0.1) waitUntil(isReadyState) ulog('url=' + driver.current_url) title = waitText('.lightGrayBg > div > div > div > h2') ulog('title=' + title) # 'Search by Model Number' or 'No Matches Found' if title.startswith('No Matches Found'): continue prevTrail += [idx] tabWalker() prevTrail.pop() except Exception as ex: ipdb.set_trace() traceback.print_exc()
def walkProdCat(): global driver, prevTrail try: # click "Drilldown" waitClickable('#ctl00_ctl00_ctl00_mainContent_localizedContent_bodyCenter_BasicSearchPanel_btnAdvancedSearch')\ .click() zpath = ('#ctl00_ctl00_ctl00_mainContent_localizedContent_bodyCenter_'+ 'adsPanel_lbProductCategory') curSel = Select(css(zpath)) numProdCats = len(curSel.options) ulog('numProdCats=%d'%numProdCats) startIdx = getStartIdx() for idx in range(startIdx, numProdCats): curSel = Select(css(zpath)) ulog("idx=%s"%idx) ulog('select "%s"'%curSel.options[idx].text) curSel.select_by_index(idx) prevTrail+=[idx] walkProdFam() prevTrail.pop() except Exception as ex: traceback.print_exc(); ipdb.set_trace() driver.save_screenshot('netgear_exc.png')
def main(): startPfxIdx = int(sys.argv[1]) if len(sys.argv)>1 else 1 startSfxIdx = int(sys.argv[2]) if len(sys.argv)>2 else 1 global driver,conn harvest_utils.driver=getFirefox(dlDir) driver = harvest_utils.driver conn=sqlite3.connect('dlink_tsd.sqlite3') csr=conn.cursor() csr.execute("CREATE TABLE IF NOT EXISTS dlink(" "model TEXT," "file_name TEXT PRIMARY KEY," "desc TEXT," "href TEXT," "file_sha1 TEXT)" ); conn.commit() driver.get('http://tsd.dlink.com.tw/') modelPfxSel = Select(waitClickable( 'select.quickFindAndSearchForm:nth-child(4)')) numModelPfx=len(modelPfxSel.options) for pfxIdx in range(startPfxIdx,numModelPfx): modelPfxSel.select_by_index(pfxIdx) modelSfxSel = Select(waitClickable( 'select.quickFindAndSearchForm:nth-child(6)')) numModelSfx=len(modelSfxSel.options) for sfxIdx in range(startSfxIdx,numModelSfx): print("pfxIdx=%d, sfxIdx=%d"%(pfxIdx,sfxIdx)) startSfxIdx=1 modelSfxSel.select_by_index(sfxIdx) pfxTxt =modelPfxSel.options[pfxIdx].text sfxTxt =modelSfxSel.options[sfxIdx].text modelName=pfxTxt+'-'+sfxTxt print("Page1: modelName=",modelName) goBtn=waitClickable('.prodtd > p:nth-child(3) > a:nth-child(7)') goBtn.click() harvestPage2() driver.back() modelPfxSel = Select(waitClickable( 'select.quickFindAndSearchForm:nth-child(4)')) modelPfxSel.select_by_index(pfxIdx) modelSfxSel = Select(waitClickable( 'select.quickFindAndSearchForm:nth-child(6)')) # wait until all '.part' vanished while True: files = os.listdir(dlDir) downloading = [_ for _ in files if _.endswith('.part')] if downloading: print('-- Downloading : %s wait 3 seconds'%downloading) time.sleep(3) else: break print('-- terminate firefox') driver.quit()
def getAllModels(): global driver, allModels try: if path.exists('zyxel_models.txt') and \ path.getsize('zyxel_models.txt')>2 and \ time.time() - path.getmtime('zyxel_models.txt')<3600*12: with open('zyxel_models.txt', 'r', encoding='utf-8') as fin: lines = fin.read() allModels = [_ for _ in lines.splitlines()] allModels = [_.strip() for _ in allModels if _.strip()] return # click 'Enter model number here' btn = waitClickable('button[data-id=modelName]') btn.click() time.sleep(0.1) inp = waitClickable('.form-control') inp.click() inp.send_keys(Keys.UP) time.sleep(0.1) inp.send_keys(Keys.UP) oldNumModels = getNumElem('div.dropdown-menu.open ul li a') while True: inp.send_keys(Keys.UP) time.sleep(0.1) inp.send_keys(Keys.UP) numModels = getNumElem('div.dropdown-menu.open ul li a') ulog('numModels=%d' % numModels) if numModels == oldNumModels: break oldNumModels = numModels allModels = [ _.text for _ in getElems('div.dropdown-menu.open ul li a') ] allModels = [_.strip() for _ in allModels if _.strip()] allModels = [ _ for _ in allModels if not _.lower().startswith('enter model ') ] ulog('len(allModels)=%d' % len(allModels)) with open('zyxel_models.txt', 'w', encoding='utf-8') as fout: fout.write('\n'.join(_ for _ in allModels)) btn.click() except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName() + '_' + getFuncName() + '_excep.png')
def pageWalker(): global prevTrail, driver CSS=driver.find_elements_by_css_selector try: startIdx = getStartIdx() startPage = startIdx+1 curPage = 1 idx = curPage-1 while idx != startPage-1: ulog('idx=%d,page=%d'%(idx, (idx+1))) pages = getElems('.x-page-com a') def pageNum(p): try: return int(p.text.strip()) except ValueError: pass href = p.get_attribute('href') if not href: return sys.maxsize try: return int(re.search(r'void\((.+)\)', href).group(1)) except Exception as ex: ipdb.set_trace() traceback.print_exc() tarPage = min(pages, key=lambda p: abs(startPage - pageNum(p))) ulog('tarPage=%d'%pageNum(tarPage)) tarPage.click() ulog('tarPage.click()') time.sleep(0.5) retryUntilTrue(lambda:len(CSS('.x-waite'))==1, 16, 0.4 ) uprint('waitCursor shows') retryUntilTrue(lambda:len(CSS('.x-waite'))==0 or CSS('.x-waite')[0].is_displayed()==False, 60, 1 ) uprint('waitCursor disappears') curPage = int(waitText('a.cur')) ulog('curPage=%d'%curPage) idx = curPage-1 for idx in itertools.count(startIdx): ulog('idx=%d,page=%d'%(idx, (idx+1))) prevTrail+=[idx] rowWalker() prevTrail.pop() try: nextPage = waitClickable('.x-next-on') except (NoSuchElementException, TimeoutException): ulog('last page') break nextPage.click() ulog('nextPage.click()') time.sleep(0.5) retryUntilTrue(lambda:len(CSS('.x-waite'))==1, 16, 0.4 ) uprint('waitCursor shows') retryUntilTrue(lambda:len(CSS('.x-waite'))==0 or CSS('.x-waite')[0].is_displayed()==False, 60, 1 ) uprint('waitCursor disappears') except Exception as ex: ipdb.set_trace() traceback.print_exc()
def modelWalker(category): global driver CSS = driver.find_elements_by_css_selector try: waitClickable('#Combo_support-select-2 div input').click() models = getElems('#Combo_support-select-2 ul a') numModels = len(models) ulog('numModels=%d'%numModels) for idx in range(numModels): model = models[idx].text ulog('idx=%d, model=%s'%(idx, model)) sql("INSERT OR REPLACE INTO TFiles(category,model)" "VALUES(:category,:model)",locals()) uprint('UPSERT "%(category)s," "%(model)s"'%locals()) except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot('huawei_excep.png')
def getAllModels(): global driver, allModels try: if path.exists('zyxel_models.txt') and \ path.getsize('zyxel_models.txt')>2 and \ time.time() - path.getmtime('zyxel_models.txt')<3600*12: with open('zyxel_models.txt','r',encoding='utf-8') as fin: lines = fin.read() allModels=[_ for _ in lines.splitlines()] allModels=[_.strip() for _ in allModels if _.strip()] return # click 'Enter model number here' btn = waitClickable('button[data-id=modelName]') btn.click() time.sleep(0.1) inp = waitClickable('.form-control') inp.click() inp.send_keys(Keys.UP) time.sleep(0.1) inp.send_keys(Keys.UP) oldNumModels = getNumElem('div.dropdown-menu.open ul li a') while True: inp.send_keys(Keys.UP) time.sleep(0.1) inp.send_keys(Keys.UP) numModels = getNumElem('div.dropdown-menu.open ul li a') ulog('numModels=%d'%numModels) if numModels == oldNumModels: break oldNumModels = numModels allModels = [_.text for _ in getElems('div.dropdown-menu.open ul li a')] allModels = [_.strip() for _ in allModels if _.strip()] allModels = [_ for _ in allModels if not _.lower().startswith('enter model ')] ulog('len(allModels)=%d'%len(allModels)) with open('zyxel_models.txt','w',encoding='utf-8') as fout: fout.write('\n'.join(_ for _ in allModels)) btn.click() except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def fileEnumer(): global driver,prevTrail,modelName CSS=driver.find_element_by_css_selector CSSs=driver.find_elements_by_css_selector try: try: waitClickable('#lisupport a',15,1.6).click() except TimeoutException: driver.save_screenshot('asus_no_firmware_download.png') ulog('No firmware download for "%s" !'%modelName) return enterFrame('ifame_auto_size') # click 'Driver & Tools' waitClickable('#a_support_tab_Download',40,2).click() # switch to frame enterFrame('ifame_auto_size') # open dropdown list to select "Others" OS waitClickable('#mainzone_Download2_btn_select_os',10,1).click() retryA(lambda:elemWithText('ul.dropdown-menu.os a', "Others").click()) try: # expand firmware dropdown waitClickable('#btn_type_20',20,1).click() except TimeoutException: driver.save_screenshot('asus_no_firmware_download_2.png') ulog('No firmware download for" %s"!'%modelName) return # retryA(lambda:elemWithText('#download a','Firmware').click(), 20,1) waitUntilStable('#div_type_20',3,0.4) tables = [_ for _ in CSSs('#div_type_20 table') if getElemText(_).startswith('Description')] numTables = len(tables) ulog('numTables=%s'%numTables) versions = [getElemText(_) for _ in CSSs('#div_type_20 p')] assert len(versions)==numTables pageUrl=driver.current_url startIdx = getStartIdx() for idx in range(startIdx, numTables): desc = tables[idx].text relDate = guessDate(desc) fileSize = guessFileSize(desc) fwVer = guessVersion(versions[idx]) fileUrl = tables[idx].find_element_by_css_selector('a').get_attribute('href') trailStr=str(prevTrail+[idx]) sql("INSERT OR REPLACE INTO TFiles(" " model, fw_ver, rel_date, file_size, fw_desc, " " page_url, file_url, tree_trail) VALUES" "(:modelName,:fwVer,:relDate, :fileSize, :desc," ":pageUrl, :fileUrl, :trailStr)", glocals()) ulog('UPSERT "%(modelName)s", "%(fwVer)s", "%(relDate)s", ' '%(fileSize)s, "%(fileUrl)s", %(trailStr)s '%glocals()) except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def pageWalker(): global driver, prevTrail try: rowWalker() nextBtn=waitClickable('.arrowNext') nextBtn.click() rowWalker() except Exception as ex: ipdb.set_trace() traceback.print_exc()
def categoryWalker(): global driver try: # Select a Type (Category) waitClickable('#Combo_support-select-1 > div > input').click() cats = getElems('#Combo_support-select-1 ul a') numCats = len(cats) ulog('numCats=%d'%numCats) for idx in range(numCats): category=cats[idx].text ulog('idx=%d, select category=%s'% (idx,category)) cats[idx].click() modelWalker(category) waitClickable('#Combo_support-select-1 > div > input').click() cats = getElems('#Combo_support-select-1 ul a') except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot('huawei_excep.png')
def pageWalker(): global driver, prevTrail try: rowWalker() nextBtn = waitClickable('.arrowNext') nextBtn.click() rowWalker() except Exception as ex: ipdb.set_trace() traceback.print_exc()
def main(): global startTrail, prevTrail,driver,conn,keyword try: keyword = sys.argv[1] startTrail = [int(re.search(r'\d+', _).group(0)) for _ in sys.argv[2:]] ulog('startTrail=%s'%startTrail) conn=sqlite3.connect('huawei_consumer_search_by_keyword.sqlite3') sql("CREATE TABLE IF NOT EXISTS TFiles(" "id INTEGER NOT NULL," "keyword TEXT," "file_name TEXT," # 'Ascend Mate (MT1-U06,Android 4.1,Emotion UI,V100R001C00B221,General Version)' "file_desc TEXT," # NBG5715 "rel_date DATE," # 2015-05-30 "file_size INTEGER," # '1.26 GB' '352.32 MB' "file_url TEXT," # "http://download-c.huawei.com/download/downloadCenter?downloadId=44602&version=92646&siteCode=worldwide" "tree_trail TEXT," # [1, 2] "file_sha1 TEXT," # "PRIMARY KEY (id)," "UNIQUE(file_name)" ")") driver=harvest_utils.getFirefox() harvest_utils.driver=driver prevTrail=[] goToUrl(rootUrl) inp = waitClickable('#savekeyword') inp.click() inp.send_keys(keyword) waitClickable('#search_by_kw > img').click() CSS=driver.find_elements_by_css_selector retryUntilTrue(lambda:len(CSS('.x-waite'))==1, 4, 0.4 ) uprint('waitCursor shows') retryUntilTrue(lambda:len(CSS('.x-waite'))==0 or CSS('.x-waite')[0].is_displayed()==False, 30, 1 ) uprint('waitCursor disappears') pageWalker() driver.quit() conn.close() except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot('main_excep.png')
def harvestPage2(): global modelName modelName=getText('big > strong') print("Page2 modelName=",modelName) global driver numRows = getNumElem('tr#rsq') if numRows==0: return for iRow in range(2, numRows+1): row = waitClickable('tr#rsq:nth-child(%d)'%iRow) rowText = getElemText(row) uprint('Row%d %s'%(iRow, rowText)) if 'firmware' not in rowText.lower(): print(' -- bypass') continue uprint('Click '+rowText) row.click() modelName=getText('big > strong') print('Page3 modelName=%s'%modelName) desc=getText('.prodtd > table:nth-child(4) > tbody:nth-child(1) ' '> tr:nth-child(2) > td:nth-child(2)') uprint("Description="+desc) for fn9 in getElems('.fn9'): fileName = getElemText(fn9) fileExt = path.splitext(fileName)[1].lower() uprint('filaName="%s"'%fileName) if fileExt in ['.doc', '.docx', '.txt','.pdf','.htm','.html','.xls']: uprint(' -- fileName "%s" doesn\'t look like a firmware file'%fileName) global conn csr=conn.cursor() model=modelName csr.execute( "INSERT OR REPLACE INTO dlink(model,file_name,desc)" "VALUES(:model,:fileName,:desc)",locals() ) uprint('INSERT OR REPLACE INTO "%(model)s","%(fileName)s","%(desc)s"'% locals()) # waitDownloading() clickDownloadableElem(fn9) global driver driver.back()
def main1(catIdx, famIdx, prdIdx, executor): startCatIdx, startFamIdx, startPrdIdx = catIdx, famIdx, prdIdx driver = webdriver.PhantomJS() harvest_utils.driver = driver driver.get('http://downloadcenter.netgear.com/') # click DrillDown waitClickable('#ctl00_ctl00_ctl00_mainContent_localizedContent_bodyCenter' '_BasicSearchPanel_btnAdvancedSearch').click() # # wait Page2 try: catSel = Select(waitClickable(catSelCss)) numCat = len(catSel.options) for catIdx in range(startCatIdx, numCat): catSel = Select(waitClickable(catSelCss)) print('catIdx=', catIdx) startCatIdx = 0 catTxt = catSel.options[catIdx].text uprint('catTxt= ' + catTxt) oldText = getText(famSelCss) catSel.select_by_index(catIdx) waitTextChanged(famSelCss, oldText) famSel = Select(waitClickable(famSelCss)) numFam = len(famSel.options) for famIdx in range(startFamIdx, numFam): famSel = Select(waitClickable(famSelCss)) print('famIdx=', famIdx) startFamIdx = 0 famTxt = famSel.options[famIdx].text uprint('famTxt= ' + famTxt) oldText = getText(prdSelCss) famSel.select_by_index(famIdx) waitTextChanged(prdSelCss, oldText) prdSel = Select(waitClickable(prdSelCss)) numPrd = len(prdSel.options) for prdIdx in range(startPrdIdx, numPrd): prdSel = Select(waitClickable(prdSelCss)) startPrdIdx = 0 print("catIdx=%d, famIdx=%d, prdIdx=%d" % (catIdx, famIdx, prdIdx)) prdTxt = prdSel.options[prdIdx].text uprint('cat,fam,prd=("%s","%s","%s")' % (catTxt, famTxt, prdTxt)) prdWaiting = waitElem(prdWaitingCss) prdSel.select_by_index(prdIdx) try: WebDriverWait(driver, 1, 0.5).\ until(lambda x:prdWaiting.is_displayed() is True) except TimeoutException: pass try: WebDriverWait(driver, 5, 0.5).\ until(lambda x:prdWaiting.is_displayed() is False) except TimeoutException as ex: pass numResults = waitText(numResultsCss, 3, 0.5) if numResults is None: continue numResults = int(re.search(r"\d+", numResults).group(0)) print('numResults=', numResults) if numResults > 10: waitClickable("#lnkAllDownloadMore", 3).click() try: erItems = getElems('a.register-product.navlistsearch', 3, 0.5) except TimeoutException: erItems = getElems( 'div#LargeFirmware > ul > li > div > p > a.navlistsearch', 3) if len(erItems) != numResults: print('Error, numResults=%d, but len(erItems)=%d' % (numResults, len(erItems))) for itemIdx, erItem in enumerate(erItems): if not erItem.is_displayed(): print('itemIdx=%d is not displayed()' % itemIdx) continue desc = getElemText(erItem) uprint('desc="%s"' % desc) if 'firmware' not in desc.lower(): continue fw_url = erItem.get_attribute('data-durl') if not fw_url: fw_url = erItem.get_attribute('fw_url') print('fw_url=', fw_url) if not fw_url: continue if not fw_url.startswith('http'): print('Error: fw_url=', fw_url) continue executor.submit(download_file, prdTxt, desc, fw_url) # download_file(prdTxt, desc, fw_url) catIdx, famIdx, prdIdx = None, None, None return catIdx, famIdx, prdIdx except BaseException as ex: traceback.print_exc() dumpSnapshot('netgear_crawler.py.png') finally: driver.quit() return catIdx, famIdx, prdIdx
def versionWalker(): global startTrail,prevTrail,driver try: waitClickable('.treeLinks > a:nth-child(1)') ulog('current_url=%s'%driver.current_url) crumbs=waitText('.csProductSelectorBreadcrumb').replace('\n', ' > ') ulog('crumbs=%s'%crumbs) # click Expand All numNodes = len(driver.find_elements_by_css_selector('.tree a')) ulog('number of versions=%d'%numNodes) try: with UntilTextChanged('.tree'): clickElem(waitClickable('.treeLinks > a:nth-child(1)')) except TimeoutException: pass treeText=waitText('.tree') ulog('treeText="%s"'%treeText) if startTrail: startIdx=startTrail.pop(0) else: startIdx=1 ulog('startTrail=%s'%startTrail) ulog('prevTrail=%s'%prevTrail) ulog('startIdx=%d'%startIdx) assert startIdx >= 1 try: prevFwVer=waitText('.tree a.nodeSel', 5) except TimeoutException: uprint("css='.tree a.nodeSel' not found") prevFwVer=None ulog('prevFwVer="%s"'%prevFwVer) for idx in range(startIdx, numNodes): nodes = driver.find_elements_by_css_selector('.tree a') isLeaf = (nodes[idx-1].text != '') ulog('goto Trail=%s'%(prevTrail+[idx])) if isLeaf: if not nodes[idx].text.strip(): continue fwVer=nodes[idx].text.strip() nodeClass=nodes[idx].get_attribute('class') ulog('fwVer="%s", nodeClass="%s"'%(fwVer,nodeClass)) if 'nodeSel' not in nodeClass: noWait= (prevFwVer==fwVer) if prevFwVer else False try: with UntilTextChanged('table#imageTableContainer',10,1,noWait): ulog('Click "%s"'%fwVer) clickElem(nodes[idx]) except TimeoutException: with UntilTextChanged('table#imageTableContainer',10,1,noWait): ulog('Click "%s" twice'%fwVer) clickElem(nodes[idx]) prevTrail+=[idx] tableRowWalker(fwVer) prevTrail.pop() prevFwVer=fwVer # go back page crumbs=getElems('.csProductSelectorBreadcrumb a') ulog('backto "%s"'%getElemText(crumbs[-1])) ulog('prevTail=%s'%prevTrail) if prevTrail==[2, 1, 0, 1, 2, 0]: ipdb.set_trace() clickElem(crumbs[-1]) except Exception as ex: ipdb.set_trace() print(ex); traceback.print_exc() driver.save_screenshot('cisco_versionWalker.png')
def walkFile(): global driver, prevTrail try: try: modelName = waitTextChanged('h2#searchResults', None, 5, 1) except TimeoutException: try: modelName = waitText('h2#searchResults', 5, 1) except TimeoutException: return PROC_GIVE_UP ulog('modelName="%s"'%modelName) resultsCount = waitText('#LargeFirmware>p') # try: # resultsCount = waitTextChanged('#LargeFirmware>p', None, 0.5, 0.25) # ulog('waitTextChanged #LargeFirmware>p') # except TimeoutException: # ulog('TimeoutException: #LargeFirmware>p') # resultsCount = waitText('#LargeFirmware>p') # except NoSuchElementException: # ulog('NoSuchElementException: #LargeFirmware>p') # resultsCount = waitText('#LargeFirmware>p') ulog('resutlsCount=%s'%resultsCount) if resultsCount.startswith('No matching'): return numFiles = int(re.search(r'\d+', resultsCount).group(0)) ulog('numFiles=%d'%numFiles) try: waitTextChanged('#LargeFirmware a.navlistsearch', None, 1, 0.5) ulog('waitTextChanged #LargeFirmware a.navlistsearch') except TimeoutException: ulog('TimeoutException: #LargeFirmware a.navlistsearch') pass except NoSuchElementException: ulog('NoSuchElementException #LargeFirmware a.navlistsearch') return if numFiles > 10: ulog('click moreResults because numFiles=%d>10'%numFiles) bMoreResultsClicked=False for _i in range(10): moreResults = waitClickable('#lnkAllDownloadMore') try: moreResults.click() ulog('moreResults.click()') bMoreResultsClicked=True break except WebDriverException: time.sleep(0.5) if not bMoreResultsClicked: raise StaleElementReferenceException() lastFile = driver.find_element_by_css_selector('#LargeFirmware li:nth-child(%d) a.navlistsearch'%numFiles) for _i in range(10): if lastFile.is_displayed(): break time.sleep(0.5) # try: # waitTextChanged('#LargeFirmware li:nth-child(%d)'%numFiles, # None, 1, 0.5) # ulog('waitTextChanged #LargeFirmware li:nth-child(%d)'%numFiles) # except TimeoutException: # ulog('TimeoutException: #LargeFirmware li:nth-child(%d)'%numFiles) # pass # except NoSuchElementException: # ulog('NoSuchElementException #LargeFirmware li:nth-child(%d)'%numFiles) # return # waitClickable('#LargeFirmware li:nth-child(%d) a.navlistsearch' # %numFiles) files = getElems('#LargeFirmware a.navlistsearch') startIdx = getStartIdx() # get firmware download URL for idx in range(startIdx, numFiles): assert files[idx].is_displayed() fileName = files[idx].text ulog('idx=%d, fileName="%s"'%(idx, fileName)) if 'firmware' not in fileName.lower(): continue prevTrail+=[idx] storeFile(modelName, files[idx]) prevTrail.pop() return PROC_OK except (StaleElementReferenceException): try: driver.find_element_by_css_selector("a.btn.close.fl-left").\ click() return TRY_AGAIN except (NoSuchElementException): return TRY_AGAIN except TimeoutException as ex: raise ex except Exception as ex: traceback.print_exc(); ipdb.set_trace() driver.save_screenshot('netgear_exc.png')
def main(): startModelIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0 startRevisionIdx = int(sys.argv[2]) if len(sys.argv)>2 else 0 brand='Linksys' global driver,conn harvest_utils.driver=getFirefox() driver = harvest_utils.driver conn=sqlite3.connect('Linksys.sqlite3') csr=conn.cursor() csr.execute( "CREATE TABLE IF NOT EXISTS TFiles(" "brand TEXT," "model TEXT," "revision TEXT," # hardware version "fw_date DATE," "fw_ver TEXT," "file_title TEXT," "file_size INTEGER," "href TEXT," "file_sha1 TEXT," "PRIMARY KEY (brand,model,revision,file_title)" ");") conn.commit() driver.get('http://www.linksys.com/us/support/sitemap/') try: numModels = getNumElem('.item ul li a') print('numModels=',numModels) for modelIdx in range(startModelIdx, numModels): startModelIdx=0 modelElm = getElems('.item ul li a')[modelIdx] modelText = getElemText(modelElm, 5) print('modelIdx=',modelIdx) uprint('modelText="%s"'%modelText) # guess Possible Model model = guessModel(modelText) print('model=',model) rows = csr.execute( "SELECT model from TFiles WHERE model=:model",locals() ).fetchall() if rows: print('model "%s" already in TFiles, bypass!!'%model) continue modelElm.click() # click 'Download Software' try: waitClickable('a[title="Download Software"]', 40).click() except TimeoutException: print('No "Download Software" link found, bypass!!') csr.execute( "INSERT INTO TFiles(brand,model,revision)VALUES" "(:brand,:model,'')", locals()) conn.commit() print('INSERT model="%s"'%model) driver.back() continue # enumerate all accordians accordians = getElems('.article-accordian', 10) numAccordians=len(accordians) print('numAccordians=',numAccordians) print('driver.current_url=', driver.current_url) for revisionIdx in range(startRevisionIdx, numAccordians): startRevisionIdx=0 accordians = getElems('.article-accordian') # expand accordian (one-based) accordian = accordians[revisionIdx] revisionTxt = getElemText(accordian) print('revisionIdx=',revisionIdx) uprint('revisionTxt="%s"'%revisionTxt) revision = guessRevision(revisionTxt) print('revision=',revision) divId = accordian.get_attribute('data-collapse-target') # expand accordian 'revision'='Hardware Version' driver.execute_script( "document.querySelectorAll('.article-accordian')[%d].click()" %(revisionIdx)) divElm = waitVisible('#'+divId) divTxt = getElemTextUntilStabled(divElm,10,2.5) assert divTxt uprint('divTxt="%s"'%divTxt) numDowns = getCount(divTxt, 'Download') if numDowns ==0: csr.execute( "INSERT INTO TFiles(brand,model,revision)VALUES" "(:brand,:model,:revision)",locals()) conn.commit() print('INSERT "%(model)s","%(revision)s"'%locals()) continue downElms =iter(divElm.find_elements_by_css_selector('a')) lastSpanEnd=0 for downIdx in range(numDowns): spanBegin = getNthIndex(divTxt, downIdx, 'Download') spanEnd = divTxt.find('\n', spanBegin+len('Download')) if spanEnd==-1: spanEnd=len(divTxt) foreword='\n'.join(reversed(divTxt[lastSpanEnd:spanEnd].splitlines())) fwDate=guessDate(foreword) fileSize = guessFileSize(foreword) fwVer = guessVersion(foreword) if fwVer: fileTitle = guessFileTitle(foreword, fwVer) else: fileTitle = guessFileTitle2(foreword) while True: downElm = next(downElms) if downElm.text.strip().startswith('Download'): break href=downElm.get_attribute('href') lastSpanEnd=spanEnd csr.execute( "INSERT OR REPLACE INTO TFiles(brand,model,revision," "fw_date, fw_ver, file_title, file_size, " "href) VALUES (:brand,:model,:revision," ":fwDate, :fwVer, :fileTitle," ":fileSize, :href)", locals()) conn.commit() uprint("INSERT '%(model)s', '%(revision)s', '%(fwDate)s'" ", '%(fwVer)s', '%(fileTitle)s', '%(fileSize)d'" ", '%(href)s'" %locals()) driver.back() driver.back() except http.client.IncompleteRead as ex: print(ex) import traceback; traceback.print_exc() print('-- Selenium exhausted') driver.quit() except Exception as ex: import ipdb; ipdb.set_trace() print(ex) print('driver.current_url=',driver.current_url) import traceback; traceback.print_exc() print('-- terminate firefox') driver.quit()
def fileWalker(): global driver,prevTrail try: waitUntil(isReadyState) prodName=waitText('#prodname') ulog('prodName="%s"'%prodName) modelName = waitText('#prodmodel') ulog('modelName="%s"'%modelName) # click "Support" retryA(lambda: elemWithText('li.tab-link', 'Support').click()) # expand "Downloads" waitClickable('div.accordion-section:nth-child(2) a').click() pageUrl=driver.current_url # select tables try: tables = getElems('table.supp',9,1) except TimeoutException: tables=None if not tables: ulog('no firmware download for "%s"'%modelName) trailStr=str(prevTrail) sql("INSERT OR REPLACE INTO TFiles (model,product_name," "page_url,tree_trail) VALUES" "(:modelName, :prodName," ":pageUrl,:trailStr)",glocals()) ulog('UPSERT "%(modelName)s", "%(prodName)s", '%glocals()) driver.back() return files = getElems('table.supp tr') startIdx = getStartIdx() numFiles=len(files) ulog('numFiles=%s'%numFiles) bUpserted=False for idx in range(startIdx, numFiles): try: col=files[idx].find_element_by_css_selector('td:nth-child(1)') except NoSuchElementException: ulog('bypass idx=%s'%idx) continue fwDate=guessDate(col.text) if not fwDate: ulog('bypass idx=%s'%idx) continue desc=files[idx].find_element_by_css_selector('td:nth-child(2)') fwDesc=desc.text fileName=desc.find_element_by_css_selector('a') ulog('fileName.text="%s"'%fileName.text) if 'firmware' not in fileName.text.lower(): ulog('bypass idx=%s'%idx) continue fwVer = guessVersion(fileName.text) fileUrl=fileName.get_attribute('href') fwDesc=desc.text.strip() trailStr=str(prevTrail+[idx]) ulog('trail=%s'%trailStr) sql("INSERT OR REPLACE INTO TFiles (model,product_name," "fw_date, fw_ver, fw_desc, " "page_url,file_url,tree_trail) VALUES" "(:modelName, :prodName," ":fwDate,:fwVer,:fwDesc," ":pageUrl,:fileUrl,:trailStr)",glocals()) ulog('UPSERT "%(modelName)s", "%(prodName)s", "%(fwDate)s", ' ' "%(fwVer)s", %(fileUrl)s'%glocals()) bUpserted=True if not bUpserted: trailStr=str(prevTrail) sql("INSERT OR REPLACE INTO TFiles (model,product_name," "page_url,tree_trail) VALUES" "(:modelName, :prodName," ":pageUrl,:trailStr)",glocals()) ulog('UPSERT "%(modelName)s", "%(prodName)s", '%glocals()) driver.back() return except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_exc.png')
def main(): startCatIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0 startFamIdx = int(sys.argv[2]) if len(sys.argv)>2 else 0 startPrdIdx = int(sys.argv[3]) if len(sys.argv)>3 else 0 global driver,conn harvest_utils.driver=getFirefox(dlDir) driver = harvest_utils.driver conn=sqlite3.connect('netgear.sqlite3') csr=conn.cursor() csr.execute("CREATE TABLE IF NOT EXISTS TFiles(" "brand TEXT," "category TEXT," "family TEXT," "product TEXT,"# -- is model "desc TEXT,"# -- is fileName "href TEXT," "file_sha1 TEXT," "PRIMARY KEY (product,desc)" ")"); conn.commit() driver.get('http://downloadcenter.netgear.com/') #click DrillDown waitClickable('#ctl00_ctl00_ctl00_mainContent_localizedContent_bodyCenter_BasicSearchPanel_btnAdvancedSearch').click() # # wait Page2 try: catSel=Select(waitClickable(catSelCss)) numCat=len(catSel.options) for catIdx in range(startCatIdx,numCat): catSel=Select(waitClickable(catSelCss)) print('catIdx=',catIdx) catTxt=catSel.options[catIdx].text uprint('catTxt='+catTxt) catSel.select_by_index(catIdx) waitTextChanged(famSelCss) famSel=Select(waitClickable(famSelCss)) numFam=len(famSel.options) for famIdx in range(startFamIdx,numFam): famSel=Select(waitClickable(famSelCss)) print('famIdx=',famIdx) startFamIdx=0 famTxt =famSel.options[famIdx].text uprint('famTxt='+famTxt) famSel.select_by_index(famIdx) waitTextChanged(prdSelCss) prdSel=Select(waitClickable(prdSelCss)) numPrd=len(prdSel.options) for prdIdx in range(startPrdIdx,numPrd): prdSel=Select(waitClickable(prdSelCss)) startPrdIdx=0 print("catIdx=%d, famIdx=%d, prdIdx=%d"%(catIdx,famIdx,prdIdx)) prdTxt=prdSel.options[prdIdx].text uprint('cat,fam,prd=("%s","%s","%s")'%(catTxt,famTxt,prdTxt)) prdWaiting = waitElem(prdWaitingCss) prdSel.select_by_index(prdIdx) WebDriverWait(driver, 5, poll_frequency=0.5).\ until(lambda x:prdWaiting.is_displayed()==True) WebDriverWait(driver, 60, poll_frequency=0.5).\ until(lambda x:prdWaiting.is_displayed()==False) #waitUntil(lambda:prdWaiting.is_displayed()==True) #waitUntil(lambda:prdWaiting.is_displayed()==False) numResults=waitText(numResultsCss,3) print('numResults=',numResults) if numResults is None: continue numResults=int(re.search(r"\d+", numResults).group(0)) if numResults >10: showMore=waitClickable("#lnkAllDownloadMore",3) showMore.click() try: erItems=getElems('a.register-product.navlistsearch',3) except TimeoutException: erItems=getElems('div#LargeFirmware > ul > li > div > p > a.navlistsearch',3) if len(erItems) != numResults: print('Error, numResults=%d, but len(erItems)=%d' %(numResults,len(erItems))) for erItem in erItems: if not erItem.is_displayed(): continue desc=getElemText(erItem) uprint('desc="%s"'%desc) href=erItem.get_attribute('data-durl') if not href: href=erItem.get_attribute('href') print('href=',href) if not href.startswith('http'): print('Error: href=',href) sql("INSERT OR REPLACE INTO TFiles" "(brand,category,family,product,desc,href)VALUES" "('Netgear',:catTxt,:famTxt,:prdTxt,:desc,:href)", locals()) uprint('INSERT ' '("%(catTxt)s","%(famTxt)s","%(prdTxt)s","%(desc)s","%(href)s")' %locals()) except Exception as ex: import ipdb; ipdb.set_trace() print(ex) import traceback; traceback.print_exc() print('-- terminate firefox') driver.quit()