def marketWalker(): global driver,prevTrail try: showEol=waitVisible('#showEndLife') if not showEol.is_selected(): showEol.click() selMkt= Select(waitVisible('.product-cat-box select:nth-child(1)')) startIdx=getStartIdx() numMkts = len(selMkt.options) curSel = selMkt.all_selected_options[0].text ulog('current Selected="%s"'%curSel) for idx in range(startIdx, numMkts): selMkt.select_by_index(idx) nextSel=selMkt.options[idx].text ulog('gonna select "%s"'%nextSel) btn = waitVisible('button.round-button.go') with UntilTextChanged('.content-box',9,0.4): btn.click() prevTrail+=[idx] modelWalker() prevTrail.pop() showEol=waitVisible('#showEndLife') if not showEol.is_selected(): showEol.click() selMkt= Select(waitVisible('.product-cat-box select:nth-child(1)')) except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_exc.png')
def selectSupport(prev_url): global prevTrail,category,productName,model,driver CSS=driver.find_element_by_css_selector try: waitVisible('.product-name-price') productName=CSS('.product-name-price h2').text.strip() ulog('productName="%s"'%productName) # 'Wireless G Travel Router' model=CSS('.product-name-price p').text.strip() # 'Part # F5D7233' model = model.split('#')[1].strip() ulog('model="%s"'%model) # 'F5D7233' if not productName: ulog('productName is empty, bypass!') driver.get(prev_url) # waitText('.search-results-notification') return try: support = next(_ for _ in getElems('.icon-list-header-container') if getElemText(_).startswith('DOWNLOAD')) except StopIteration: ulog('No download in '+driver.current_url) trailStr=str(prevTrail) sql("INSERT OR REPLACE INTO TFiles(category, product_name, model, tree_trail) VALUES (:category, :model, :productName, :trailStr)", glocals()) ulog('UPSERT "%(category)s", "%(model)s", "%(productName)s" %(prevTrail)s'%glocals()) driver.get(prev_url) # waitText('.search-results-notification') return downloads = support.find_elements_by_css_selector('a') numDownloads = len(downloads) startIdx=getStartIdx() for idx in range(startIdx, numDownloads): txt=downloads[idx].text if model not in txt: ulog('bypass %s,"%s" because it\'s Portal'%(idx,txt)) continue ulog('click %s,"%s"'%(idx,txt)) prevTrail += [idx] enterElem(downloads[idx],selectDownload) prevTrail.pop() support = next(_ for _ in getElems('.icon-list-header-container') if getElemText(_).startswith('DOWNLOAD')) downloads = support.find_elements_by_css_selector('a') driver.get(prev_url) # waitText('.search-results-notification') except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def upsertOneModel(): global driver, prevTrail, modelName try: prodName = [ _.text for _ in getElems('div.sectionTitle p.hidden-xs') if _.text.strip() ] if prodName: assert len(prodName) == 1 prodName = prodName[0] else: prodName = None pageUrl = driver.current_url try: imgUrl = waitVisible('.productPic img.img-responsive', 4, 1).get_attribute('src') except TimeoutException: imgUrl = None assert imgUrl is None or imgUrl.startswith('http') trailStr = str(prevTrail) sql( "INSERT OR REPLACE INTO TFiles(model,prod_name,page_url," "image_url,tree_trail) VALUES(:modelName, :prodName, :pageUrl," ":imgUrl,:trailStr)", glocals()) ulog('UPSERT "%(modelName)s" "%(prodName)s",%(trailStr)s,' '%(pageUrl)s' % glocals()) except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName() + '_' + getFuncName() + '_excep.png')
def upsertOneModel(): global driver, prevTrail, modelName try: prodName = [_.text for _ in getElems('div.sectionTitle p.hidden-xs') if _.text.strip()] if prodName: assert len(prodName)==1 prodName=prodName[0] else: prodName=None pageUrl=driver.current_url try: imgUrl=waitVisible('.productPic img.img-responsive',4,1).get_attribute('src') except TimeoutException: imgUrl=None assert imgUrl is None or imgUrl.startswith('http') trailStr=str(prevTrail) sql("INSERT OR REPLACE INTO TFiles(model,prod_name,page_url," "image_url,tree_trail) VALUES(:modelName, :prodName, :pageUrl," ":imgUrl,:trailStr)", glocals()) ulog('UPSERT "%(modelName)s" "%(prodName)s",%(trailStr)s,' '%(pageUrl)s' %glocals()) except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def getAllModels(): global driver, models act = ActionChains(driver) numElm = lambda c: driver.execute_script("return $('%s').length" % c) try: if path.exists('zyxel_models.txt') and \ path.getsize('zyxel_models.txt')>0 and \ time.time()-path.getmtime('zyxel_models.txt') < 3600*24*7: with open('zyxel_models.txt', 'r', encoding='utf-8') as fin: models = [] for _ in fin: models += [_] return goToUrl(rootUrl) btn = waitVisible('.search-select button') act.move_to_element(btn).click(btn).perform() inp = waitVisible('.input-block-level') act.move_to_element(inp).click(inp).perform() act.send_keys(Keys.DOWN).perform() time.sleep(0.1) act.send_keys(Keys.LEFT_CONTROL + Keys.END).perform() time.sleep(0.1) numModels = numElm('#searchDropUl li') uprint('numModels=%s' % numModels) while True: act.send_keys(Keys.LEFT_CONTROL + Keys.END).perform() time.sleep(0.1) numModels2 = numElm('#searchDropUl li') if numModels == numModels2: break numModels = numModels2 uprint('numModels=%s' % numModels) uprint('numModels=%s' % numModels) models = [ _.get_attribute('data') for _ in getElems('#searchDropUl li') ] models = [_ for _ in models if _] uprint('len(models)=%s' % len(models)) with open('zyxel_models.txt', 'w', encoding='utf-8') as fout: for m in models: fout.write(m + '\n') except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName() + '_' + getFuncName() + '_excep.png')
def versionWalker(): global driver, prevTrail try: rows = getElems('#Firmware tr') rows = [_ for _ in rows if _.text.startswith('Firmware')] if not rows: upsertOneModel() return assert len(rows) == 1 row = rows[0] try: imgUrl = waitVisible('.productPic img.img-responsive', 4, 1).get_attribute('src') except TimeoutException: ulog('no Picture!') imgUrl = None prodName = [ _.text for _ in getElems('div.sectionTitle p.hidden-xs') if _.text.strip() ] if prodName: assert len(prodName) == 1 prodName = prodName[0] else: prodName = None try: verBtn = row.find_element_by_css_selector('button') except NoSuchElementException: idx = 0 ulog('only one version') ulog('idx=%s' % idx) prevTrail += [idx] upsertOneVersion(row, imgUrl, prodName) prevTrail.pop() return verBtn.click() versions = row.find_elements_by_css_selector('ul li a') startIdx = getStartIdx() numVersions = len(versions) ulog('numVersions=%s' % numVersions) for idx in range(startIdx, numVersions): ulog('idx=%s' % idx) ulog('click "%s"' % versions[idx].text.strip()) versions[idx].click() time.sleep(0.1) prevTrail += [idx] upsertOneVersion(row, imgUrl, prodName) prevTrail.pop() if idx < numVersions - 1: verBtn.click() versions = row.find_elements_by_css_selector('ul li a') except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName() + '_' + getFuncName() + '_excep.png')
def getAllModels(): global driver, models act=ActionChains(driver) numElm=lambda c:driver.execute_script("return $('%s').length"%c) try: if path.exists('zyxel_models.txt') and \ path.getsize('zyxel_models.txt')>0 and \ time.time()-path.getmtime('zyxel_models.txt') < 3600*24*7: with open('zyxel_models.txt','r',encoding='utf-8') as fin: models=[] for _ in fin: models += [_] return goToUrl(rootUrl) btn=waitVisible('.search-select button') act.move_to_element(btn).click(btn).perform() inp=waitVisible('.input-block-level') act.move_to_element(inp).click(inp).perform() act.send_keys(Keys.DOWN).perform() time.sleep(0.1) act.send_keys(Keys.LEFT_CONTROL + Keys.END).perform() time.sleep(0.1) numModels = numElm('#searchDropUl li') uprint('numModels=%s'%numModels) while True: act.send_keys(Keys.LEFT_CONTROL + Keys.END).perform() time.sleep(0.1) numModels2 = numElm('#searchDropUl li') if numModels == numModels2: break numModels = numModels2 uprint('numModels=%s'%numModels) uprint('numModels=%s'%numModels) models = [_.get_attribute('data') for _ in getElems('#searchDropUl li')] models = [_ for _ in models if _] uprint('len(models)=%s'%len(models)) with open('zyxel_models.txt', 'w', encoding='utf-8') as fout: for m in models: fout.write(m + '\n') except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def selectDownload(prev_url): global driver,category,productName,model,prevTrail try: # switch to frame try: pageUrl=waitVisible('iframe[name~=inlineFrame]',30,0.4).get_attribute('src') except TimeoutException: ulog('url= '+driver.current_url) driver.get(prev_url) # http://www.belkin.com/us/support-article?articleNum=4879 driver.get(pageUrl) # convert html to Markdown Text page_src = waitVisible('.sfdc_richtext').get_attribute('innerHTML') h = html2text.HTML2Text() h.ignore_emphasis=True h.body_width=0 artTxt = h.handle(page_src) startIdx=getStartIdx() for idx in range(startIdx, sys.maxsize): try: fileSize,relDate,fwVer,downUrl=getSizeDateVersion(artTxt, idx) except StopIteration: break prevTrail+=[idx] trailStr=str(prevTrail) sql("INSERT OR REPLACE INTO TFiles(" " category, product_name, model" ",rel_date,fw_ver,file_size,page_url,download_url,tree_trail)" " VALUES" "(:category, :productName, :model," ":relDate,:fwVer,:fileSize,:pageUrl,:downUrl,:trailStr)", glocals()) ulog('UPSERT "%(category)s", "%(productName)s", "%(model)s",' ' "%(relDate)s", "%(fwVer)s", %(fileSize)s,' ' "%(downUrl)s", %(prevTrail)s '%glocals()) prevTrail.pop() driver.get(prev_url) # waitVisible('.product-name-price') except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def enterFrame(iframeId:str): global driver prev_url=driver.current_url url=waitVisible('iframe[id=%s]'%iframeId).get_attribute('src') ulog('%s => %s'%(prev_url,url)) driver.get(url) try: retryUntilTrue(isReadyState, 10, 2) except TimeoutException as ex: print(ex) pass
def selectCategory(prev_url): global category, prevTrail, searchResultsNotification,driver try: if len(prevTrail)==1: try: waitVisible('.filter-list', 30, 0.4) except TimeoutException: ulog('No search results, url=%s'%driver.current_url) driver.get(prev_url) return searchResultsNotification=waitText('.search-results-notification').strip() # Your search for f returned 4196 results elif len(prevTrail)==2: searchResultsNotification=waitTextChanged('.search-results-notification', searchResultsNotification).strip() # Your search for f returned 67 results ulog('%s'%searchResultsNotification) category = waitText('.accordion-activate a') ulog('category="%s"'%category) cats=getElems('.filter-list a') retryUntilTrue(lambda:ulog('cats=%s'%[(i,_.text)for i,_ in enumerate(cats)])) numCats=len(cats) startIdx = getStartIdx() for idx in range(startIdx, numCats): ulog('click %s,"%s"'%(idx,cats[idx].text)) prevTrail+=[idx] if len(prevTrail)==2: enterElem(cats[idx], selectCategory) else: enterElem(cats[idx], selectProduct) prevTrail.pop() cats = getElems('.filter-list a') if prev_url: driver.get(prev_url) searchResultsNotification=waitTextChanged('.search-results-notification', searchResultsNotification).strip() except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def versionWalker(): global driver,prevTrail try: rows = getElems('#Firmware tr') rows = [_ for _ in rows if _.text.startswith('Firmware')] if not rows: upsertOneModel() return assert len(rows)==1 row = rows[0] try: imgUrl=waitVisible('.productPic img.img-responsive',4,1).get_attribute('src') except TimeoutException: ulog('no Picture!') imgUrl=None prodName = [_.text for _ in getElems('div.sectionTitle p.hidden-xs') if _.text.strip()] if prodName: assert len(prodName)==1 prodName=prodName[0] else: prodName=None try: verBtn = row.find_element_by_css_selector('button') except NoSuchElementException: idx=0 ulog('only one version') ulog('idx=%s'%idx) prevTrail+=[idx] upsertOneVersion(row,imgUrl,prodName) prevTrail.pop() return verBtn.click() versions = row.find_elements_by_css_selector('ul li a') startIdx = getStartIdx() numVersions = len(versions) ulog('numVersions=%s'%numVersions) for idx in range(startIdx, numVersions): ulog('idx=%s'%idx) ulog('click "%s"'%versions[idx].text.strip()) versions[idx].click() time.sleep(0.1) prevTrail+=[idx] upsertOneVersion(row,imgUrl,prodName) prevTrail.pop() if idx < numVersions-1: verBtn.click() versions = row.find_elements_by_css_selector('ul li a') except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def revisionWalker(): global driver,prevTrail try: try: dropdown=waitVisible('#dlDropDownBox dd:nth-child(2) p span',9,0.4) except (TimeoutException,NoSuchElementException): prevTrail+=[0] ulog('no revision dropdown, trail=%s'%prevTrail) fileWalker() prevTrail.pop() driver.close() driver.switch_to.window(driver.window_handles[-1]) return dropdown.click() revs = getElems('#dlDropDownBox dd ul li a') waitUntil(lambda: all(_.is_displayed() for _ in revs)) waitUntil(lambda: ulog('revs=%s'% [_.text for _ in revs])>=0) numRevs = len(revs) startIdx=getStartIdx() for idx in range(startIdx,numRevs): rev=revs[idx] prevTrail+=[idx] ulog('click "%s",trail=%s'%(rev.text,prevTrail)) rev.click() fileWalker() prevTrail.pop() dropdown=waitVisible('#dlDropDownBox dd:nth-child(2) p span',3,0.4) dropdown.click() revs = getElems('#dlDropDownBox dd ul li a') waitUntil(lambda: all(_.is_displayed() for _ in revs)) driver.close() driver.switch_to.window(driver.window_handles[-1]) return except Exception as ex: ipdb.set_trace() traceback.print_exc() driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_exc.png')
def main(): startModelIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0 startRevisionIdx = int(sys.argv[2]) if len(sys.argv)>2 else 0 brand='Linksys' global driver,conn harvest_utils.driver=getFirefox() driver = harvest_utils.driver conn=sqlite3.connect('Linksys.sqlite3') csr=conn.cursor() csr.execute( "CREATE TABLE IF NOT EXISTS TFiles(" "brand TEXT," "model TEXT," "revision TEXT," # hardware version "fw_date DATE," "fw_ver TEXT," "file_title TEXT," "file_size INTEGER," "href TEXT," "file_sha1 TEXT," "PRIMARY KEY (brand,model,revision,file_title)" ");") conn.commit() driver.get('http://www.linksys.com/us/support/sitemap/') try: numModels = getNumElem('.item ul li a') print('numModels=',numModels) for modelIdx in range(startModelIdx, numModels): startModelIdx=0 modelElm = getElems('.item ul li a')[modelIdx] modelText = getElemText(modelElm, 5) print('modelIdx=',modelIdx) uprint('modelText="%s"'%modelText) # guess Possible Model model = guessModel(modelText) print('model=',model) rows = csr.execute( "SELECT model from TFiles WHERE model=:model",locals() ).fetchall() if rows: print('model "%s" already in TFiles, bypass!!'%model) continue modelElm.click() # click 'Download Software' try: waitClickable('a[title="Download Software"]', 40).click() except TimeoutException: print('No "Download Software" link found, bypass!!') csr.execute( "INSERT INTO TFiles(brand,model,revision)VALUES" "(:brand,:model,'')", locals()) conn.commit() print('INSERT model="%s"'%model) driver.back() continue # enumerate all accordians accordians = getElems('.article-accordian', 10) numAccordians=len(accordians) print('numAccordians=',numAccordians) print('driver.current_url=', driver.current_url) for revisionIdx in range(startRevisionIdx, numAccordians): startRevisionIdx=0 accordians = getElems('.article-accordian') # expand accordian (one-based) accordian = accordians[revisionIdx] revisionTxt = getElemText(accordian) print('revisionIdx=',revisionIdx) uprint('revisionTxt="%s"'%revisionTxt) revision = guessRevision(revisionTxt) print('revision=',revision) divId = accordian.get_attribute('data-collapse-target') # expand accordian 'revision'='Hardware Version' driver.execute_script( "document.querySelectorAll('.article-accordian')[%d].click()" %(revisionIdx)) divElm = waitVisible('#'+divId) divTxt = getElemTextUntilStabled(divElm,10,2.5) assert divTxt uprint('divTxt="%s"'%divTxt) numDowns = getCount(divTxt, 'Download') if numDowns ==0: csr.execute( "INSERT INTO TFiles(brand,model,revision)VALUES" "(:brand,:model,:revision)",locals()) conn.commit() print('INSERT "%(model)s","%(revision)s"'%locals()) continue downElms =iter(divElm.find_elements_by_css_selector('a')) lastSpanEnd=0 for downIdx in range(numDowns): spanBegin = getNthIndex(divTxt, downIdx, 'Download') spanEnd = divTxt.find('\n', spanBegin+len('Download')) if spanEnd==-1: spanEnd=len(divTxt) foreword='\n'.join(reversed(divTxt[lastSpanEnd:spanEnd].splitlines())) fwDate=guessDate(foreword) fileSize = guessFileSize(foreword) fwVer = guessVersion(foreword) if fwVer: fileTitle = guessFileTitle(foreword, fwVer) else: fileTitle = guessFileTitle2(foreword) while True: downElm = next(downElms) if downElm.text.strip().startswith('Download'): break href=downElm.get_attribute('href') lastSpanEnd=spanEnd csr.execute( "INSERT OR REPLACE INTO TFiles(brand,model,revision," "fw_date, fw_ver, file_title, file_size, " "href) VALUES (:brand,:model,:revision," ":fwDate, :fwVer, :fileTitle," ":fileSize, :href)", locals()) conn.commit() uprint("INSERT '%(model)s', '%(revision)s', '%(fwDate)s'" ", '%(fwVer)s', '%(fileTitle)s', '%(fileSize)d'" ", '%(href)s'" %locals()) driver.back() driver.back() except http.client.IncompleteRead as ex: print(ex) import traceback; traceback.print_exc() print('-- Selenium exhausted') driver.quit() except Exception as ex: import ipdb; ipdb.set_trace() print(ex) print('driver.current_url=',driver.current_url) import traceback; traceback.print_exc() print('-- terminate firefox') driver.quit()