def walkProd(): global driver, prevTrail try: # click overlay advertisement popup left button "No Thanks" try: driver.find_element_by_css_selector("a.btn.close.fl-left").\ click() except (NoSuchElementException): pass zpath = ('#ctl00_ctl00_ctl00_mainContent_localizedContent_bodyCenter'+ '_adsPanel_lbProduct') waitTextChanged(zpath) curSel = Select(css(zpath)) numProds = len(curSel.options) ulog("numProds=%d"%numProds) startIdx = getStartIdx() for idx in range(startIdx, numProds): curSel = Select(css(zpath)) ulog("idx=%s"%idx) ulog('select "%s"'%curSel.options[idx].text) curSel.select_by_index(idx) prevTrail+=[idx] while True: ret = walkFile() if ret != TRY_AGAIN: break if ret== PROC_GIVE_UP: ulog('"%s" is GIVE UP'% curSel.options[idx].text) prevTrail.pop() return PROC_OK except Exception as ex: traceback.print_exc(); ipdb.set_trace() driver.save_screenshot('netgear_exc.png')
def selectProduct(prev_url): global category, prevTrail, searchResultsNotification,driver try: searchResultsNotification=waitTextChanged('.search-results-notification', searchResultsNotification).strip() products=getElems('.items a') retryUntilTrue(lambda:ulog('products=%s'%[(i,_.text) for i,_ in enumerate(products)])>=0) numProducts=len(products) startIdx=getStartIdx() for idx in range(startIdx,numProducts): ulog('click %s,"%s"'%(idx,products[idx].text)) prevTrail+=[idx] enterElem(products[idx],selectSupport) prevTrail.pop() products=getElems('.items a') driver.get(prev_url) searchResultsNotification=waitTextChanged('.search-results-notification', searchResultsNotification).strip() except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def selectCategory(prev_url): global category, prevTrail, searchResultsNotification,driver try: if len(prevTrail)==1: try: waitVisible('.filter-list', 30, 0.4) except TimeoutException: ulog('No search results, url=%s'%driver.current_url) driver.get(prev_url) return searchResultsNotification=waitText('.search-results-notification').strip() # Your search for f returned 4196 results elif len(prevTrail)==2: searchResultsNotification=waitTextChanged('.search-results-notification', searchResultsNotification).strip() # Your search for f returned 67 results ulog('%s'%searchResultsNotification) category = waitText('.accordion-activate a') ulog('category="%s"'%category) cats=getElems('.filter-list a') retryUntilTrue(lambda:ulog('cats=%s'%[(i,_.text)for i,_ in enumerate(cats)])) numCats=len(cats) startIdx = getStartIdx() for idx in range(startIdx, numCats): ulog('click %s,"%s"'%(idx,cats[idx].text)) prevTrail+=[idx] if len(prevTrail)==2: enterElem(cats[idx], selectCategory) else: enterElem(cats[idx], selectProduct) prevTrail.pop() cats = getElems('.filter-list a') if prev_url: driver.get(prev_url) searchResultsNotification=waitTextChanged('.search-results-notification', searchResultsNotification).strip() except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def walkProdFam(): global driver, prevTrail try: # ProductFamily (Middle) Select Control zpath = ('#ctl00_ctl00_ctl00_mainContent_localizedContent_bodyCenter'+ '_adsPanel_lbProductFamily') waitTextChanged(zpath) curSel = Select(css(zpath)) numProdFams = len(curSel.options) ulog("numProdFams=%d"%numProdFams) startIdx = getStartIdx() for idx in range(startIdx, numProdFams): curSel = Select(css(zpath)) ulog("idx=%s"%idx) ulog('select "%s"'%curSel.options[idx].text) curSel.select_by_index(idx) prevTrail+=[idx] walkProd() prevTrail.pop() except Exception as ex: traceback.print_exc(); ipdb.set_trace() driver.save_screenshot('netgear_exc.png')
def walkFile(): global driver, prevTrail try: try: modelName = waitTextChanged('h2#searchResults', None, 5, 1) except TimeoutException: try: modelName = waitText('h2#searchResults', 5, 1) except TimeoutException: return PROC_GIVE_UP ulog('modelName="%s"'%modelName) resultsCount = waitText('#LargeFirmware>p') # try: # resultsCount = waitTextChanged('#LargeFirmware>p', None, 0.5, 0.25) # ulog('waitTextChanged #LargeFirmware>p') # except TimeoutException: # ulog('TimeoutException: #LargeFirmware>p') # resultsCount = waitText('#LargeFirmware>p') # except NoSuchElementException: # ulog('NoSuchElementException: #LargeFirmware>p') # resultsCount = waitText('#LargeFirmware>p') ulog('resutlsCount=%s'%resultsCount) if resultsCount.startswith('No matching'): return numFiles = int(re.search(r'\d+', resultsCount).group(0)) ulog('numFiles=%d'%numFiles) try: waitTextChanged('#LargeFirmware a.navlistsearch', None, 1, 0.5) ulog('waitTextChanged #LargeFirmware a.navlistsearch') except TimeoutException: ulog('TimeoutException: #LargeFirmware a.navlistsearch') pass except NoSuchElementException: ulog('NoSuchElementException #LargeFirmware a.navlistsearch') return if numFiles > 10: ulog('click moreResults because numFiles=%d>10'%numFiles) bMoreResultsClicked=False for _i in range(10): moreResults = waitClickable('#lnkAllDownloadMore') try: moreResults.click() ulog('moreResults.click()') bMoreResultsClicked=True break except WebDriverException: time.sleep(0.5) if not bMoreResultsClicked: raise StaleElementReferenceException() lastFile = driver.find_element_by_css_selector('#LargeFirmware li:nth-child(%d) a.navlistsearch'%numFiles) for _i in range(10): if lastFile.is_displayed(): break time.sleep(0.5) # try: # waitTextChanged('#LargeFirmware li:nth-child(%d)'%numFiles, # None, 1, 0.5) # ulog('waitTextChanged #LargeFirmware li:nth-child(%d)'%numFiles) # except TimeoutException: # ulog('TimeoutException: #LargeFirmware li:nth-child(%d)'%numFiles) # pass # except NoSuchElementException: # ulog('NoSuchElementException #LargeFirmware li:nth-child(%d)'%numFiles) # return # waitClickable('#LargeFirmware li:nth-child(%d) a.navlistsearch' # %numFiles) files = getElems('#LargeFirmware a.navlistsearch') startIdx = getStartIdx() # get firmware download URL for idx in range(startIdx, numFiles): assert files[idx].is_displayed() fileName = files[idx].text ulog('idx=%d, fileName="%s"'%(idx, fileName)) if 'firmware' not in fileName.lower(): continue prevTrail+=[idx] storeFile(modelName, files[idx]) prevTrail.pop() return PROC_OK except (StaleElementReferenceException): try: driver.find_element_by_css_selector("a.btn.close.fl-left").\ click() return TRY_AGAIN except (NoSuchElementException): return TRY_AGAIN except TimeoutException as ex: raise ex except Exception as ex: traceback.print_exc(); ipdb.set_trace() driver.save_screenshot('netgear_exc.png')
def main(): startCatIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0 startFamIdx = int(sys.argv[2]) if len(sys.argv)>2 else 0 startPrdIdx = int(sys.argv[3]) if len(sys.argv)>3 else 0 global driver,conn harvest_utils.driver=getFirefox(dlDir) driver = harvest_utils.driver conn=sqlite3.connect('netgear.sqlite3') csr=conn.cursor() csr.execute("CREATE TABLE IF NOT EXISTS TFiles(" "brand TEXT," "category TEXT," "family TEXT," "product TEXT,"# -- is model "desc TEXT,"# -- is fileName "href TEXT," "file_sha1 TEXT," "PRIMARY KEY (product,desc)" ")"); conn.commit() driver.get('http://downloadcenter.netgear.com/') #click DrillDown waitClickable('#ctl00_ctl00_ctl00_mainContent_localizedContent_bodyCenter_BasicSearchPanel_btnAdvancedSearch').click() # # wait Page2 try: catSel=Select(waitClickable(catSelCss)) numCat=len(catSel.options) for catIdx in range(startCatIdx,numCat): catSel=Select(waitClickable(catSelCss)) print('catIdx=',catIdx) catTxt=catSel.options[catIdx].text uprint('catTxt='+catTxt) catSel.select_by_index(catIdx) waitTextChanged(famSelCss) famSel=Select(waitClickable(famSelCss)) numFam=len(famSel.options) for famIdx in range(startFamIdx,numFam): famSel=Select(waitClickable(famSelCss)) print('famIdx=',famIdx) startFamIdx=0 famTxt =famSel.options[famIdx].text uprint('famTxt='+famTxt) famSel.select_by_index(famIdx) waitTextChanged(prdSelCss) prdSel=Select(waitClickable(prdSelCss)) numPrd=len(prdSel.options) for prdIdx in range(startPrdIdx,numPrd): prdSel=Select(waitClickable(prdSelCss)) startPrdIdx=0 print("catIdx=%d, famIdx=%d, prdIdx=%d"%(catIdx,famIdx,prdIdx)) prdTxt=prdSel.options[prdIdx].text uprint('cat,fam,prd=("%s","%s","%s")'%(catTxt,famTxt,prdTxt)) prdWaiting = waitElem(prdWaitingCss) prdSel.select_by_index(prdIdx) WebDriverWait(driver, 5, poll_frequency=0.5).\ until(lambda x:prdWaiting.is_displayed()==True) WebDriverWait(driver, 60, poll_frequency=0.5).\ until(lambda x:prdWaiting.is_displayed()==False) #waitUntil(lambda:prdWaiting.is_displayed()==True) #waitUntil(lambda:prdWaiting.is_displayed()==False) numResults=waitText(numResultsCss,3) print('numResults=',numResults) if numResults is None: continue numResults=int(re.search(r"\d+", numResults).group(0)) if numResults >10: showMore=waitClickable("#lnkAllDownloadMore",3) showMore.click() try: erItems=getElems('a.register-product.navlistsearch',3) except TimeoutException: erItems=getElems('div#LargeFirmware > ul > li > div > p > a.navlistsearch',3) if len(erItems) != numResults: print('Error, numResults=%d, but len(erItems)=%d' %(numResults,len(erItems))) for erItem in erItems: if not erItem.is_displayed(): continue desc=getElemText(erItem) uprint('desc="%s"'%desc) href=erItem.get_attribute('data-durl') if not href: href=erItem.get_attribute('href') print('href=',href) if not href.startswith('http'): print('Error: href=',href) sql("INSERT OR REPLACE INTO TFiles" "(brand,category,family,product,desc,href)VALUES" "('Netgear',:catTxt,:famTxt,:prdTxt,:desc,:href)", locals()) uprint('INSERT ' '("%(catTxt)s","%(famTxt)s","%(prdTxt)s","%(desc)s","%(href)s")' %locals()) except Exception as ex: import ipdb; ipdb.set_trace() print(ex) import traceback; traceback.print_exc() print('-- terminate firefox') driver.quit()