Example #1
0
def main():
    startPfxIdx = int(sys.argv[1]) if len(sys.argv)>1 else 1
    startSfxIdx = int(sys.argv[2]) if len(sys.argv)>2 else 1
    global driver,conn
    harvest_utils.driver=getFirefox(dlDir)
    driver = harvest_utils.driver
    conn=sqlite3.connect('dlink_tsd.sqlite3')
    csr=conn.cursor()
    csr.execute("CREATE TABLE IF NOT EXISTS dlink("
        "model TEXT,"
        "file_name TEXT PRIMARY KEY,"
        "desc TEXT,"
        "href TEXT,"
        "file_sha1 TEXT)"
        );
    conn.commit()
    driver.get('http://tsd.dlink.com.tw/')
    modelPfxSel = Select(waitClickable(
        'select.quickFindAndSearchForm:nth-child(4)'))
    numModelPfx=len(modelPfxSel.options)
    for pfxIdx in range(startPfxIdx,numModelPfx):
        modelPfxSel.select_by_index(pfxIdx)
        modelSfxSel = Select(waitClickable(
            'select.quickFindAndSearchForm:nth-child(6)'))
        numModelSfx=len(modelSfxSel.options)
        for sfxIdx in range(startSfxIdx,numModelSfx):
            print("pfxIdx=%d, sfxIdx=%d"%(pfxIdx,sfxIdx))
            startSfxIdx=1
            modelSfxSel.select_by_index(sfxIdx)
            pfxTxt =modelPfxSel.options[pfxIdx].text
            sfxTxt =modelSfxSel.options[sfxIdx].text
            modelName=pfxTxt+'-'+sfxTxt
            print("Page1: modelName=",modelName)
            goBtn=waitClickable('.prodtd > p:nth-child(3) > a:nth-child(7)')
            goBtn.click()
            harvestPage2()
            driver.back()
            modelPfxSel = Select(waitClickable(
                'select.quickFindAndSearchForm:nth-child(4)'))
            modelPfxSel.select_by_index(pfxIdx)
            modelSfxSel = Select(waitClickable(
                'select.quickFindAndSearchForm:nth-child(6)'))
    # wait until all '.part' vanished
    while True:
        files = os.listdir(dlDir)
        downloading = [_ for _ in files if _.endswith('.part')]
        if downloading:
            print('-- Downloading : %s  wait 3 seconds'%downloading)
            time.sleep(3)
        else:
            break
    print('-- terminate firefox')
    driver.quit()
Example #2
0
def harvestPage2():
    global modelName
    modelName=getText('big > strong')
    print("Page2 modelName=",modelName)
    global driver
    numRows = getNumElem('tr#rsq')
    if numRows==0:
        return
    for iRow in range(2, numRows+1):
        row = waitClickable('tr#rsq:nth-child(%d)'%iRow)
        rowText = getElemText(row)
        uprint('Row%d %s'%(iRow, rowText))
        if 'firmware' not in rowText.lower():
            print(' -- bypass')
            continue
        uprint('Click '+rowText)
        row.click()
        modelName=getText('big > strong')
        print('Page3 modelName=%s'%modelName)
        desc=getText('.prodtd > table:nth-child(4) > tbody:nth-child(1) '
                '> tr:nth-child(2) > td:nth-child(2)')
        uprint("Description="+desc)
        for fn9 in getElems('.fn9'):
            fileName = getElemText(fn9)
            fileExt = path.splitext(fileName)[1].lower()
            uprint('filaName="%s"'%fileName)
            if fileExt in ['.doc', '.docx', '.txt','.pdf','.htm','.html','.xls']:
                uprint(' -- fileName "%s" doesn\'t look like a firmware file'%fileName)
            global conn
            csr=conn.cursor()
            model=modelName
            csr.execute(
                "INSERT OR REPLACE INTO dlink(model,file_name,desc)"
                "VALUES(:model,:fileName,:desc)",locals()
                )
            uprint('INSERT OR REPLACE INTO "%(model)s","%(fileName)s","%(desc)s"'%
                locals())
            # waitDownloading()
            clickDownloadableElem(fn9)
        global driver
        driver.back()
def fileWalker():
    global driver,prevTrail
    try:
        waitUntil(isReadyState)
        prodName=waitText('#prodname')
        ulog('prodName="%s"'%prodName)
        modelName = waitText('#prodmodel')
        ulog('modelName="%s"'%modelName)
        # click "Support"
        retryA(lambda: elemWithText('li.tab-link', 'Support').click())
        # expand "Downloads"
        waitClickable('div.accordion-section:nth-child(2) a').click()
        pageUrl=driver.current_url

        # select  tables
        try:
            tables = getElems('table.supp',9,1)
        except TimeoutException:
            tables=None
        if not tables:
            ulog('no firmware download for "%s"'%modelName)
            trailStr=str(prevTrail)
            sql("INSERT OR REPLACE INTO TFiles (model,product_name,"
                "page_url,tree_trail) VALUES"
                "(:modelName, :prodName,"
                ":pageUrl,:trailStr)",glocals())
            ulog('UPSERT "%(modelName)s", "%(prodName)s", '%glocals())
            driver.back()
            return
        files = getElems('table.supp tr')
        startIdx = getStartIdx()
        numFiles=len(files)
        ulog('numFiles=%s'%numFiles)
        bUpserted=False
        for idx in range(startIdx, numFiles):
            try:
                col=files[idx].find_element_by_css_selector('td:nth-child(1)')
            except NoSuchElementException:
                ulog('bypass idx=%s'%idx)
                continue
            fwDate=guessDate(col.text)
            if not fwDate:
                ulog('bypass idx=%s'%idx)
                continue
            desc=files[idx].find_element_by_css_selector('td:nth-child(2)')
            fwDesc=desc.text
            fileName=desc.find_element_by_css_selector('a')
            ulog('fileName.text="%s"'%fileName.text)
            if 'firmware' not in fileName.text.lower():
                ulog('bypass idx=%s'%idx)
                continue
            fwVer = guessVersion(fileName.text)
            fileUrl=fileName.get_attribute('href')

            fwDesc=desc.text.strip()
            trailStr=str(prevTrail+[idx])
            ulog('trail=%s'%trailStr)
            sql("INSERT OR REPLACE INTO TFiles (model,product_name,"
                "fw_date, fw_ver, fw_desc, "
                "page_url,file_url,tree_trail) VALUES"
                "(:modelName, :prodName,"
                ":fwDate,:fwVer,:fwDesc,"
                ":pageUrl,:fileUrl,:trailStr)",glocals())
            ulog('UPSERT "%(modelName)s", "%(prodName)s", "%(fwDate)s", '
                ' "%(fwVer)s", %(fileUrl)s'%glocals())
            bUpserted=True

        if not bUpserted:
            trailStr=str(prevTrail)
            sql("INSERT OR REPLACE INTO TFiles (model,product_name,"
                "page_url,tree_trail) VALUES"
                "(:modelName, :prodName,"
                ":pageUrl,:trailStr)",glocals())
            ulog('UPSERT "%(modelName)s", "%(prodName)s", '%glocals())

        driver.back()
        return
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
        driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_exc.png')
Example #4
0
def main():
    startModelIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0
    startRevisionIdx = int(sys.argv[2]) if len(sys.argv)>2 else 0
    brand='Linksys'
    global driver,conn
    harvest_utils.driver=getFirefox()
    driver = harvest_utils.driver
    conn=sqlite3.connect('Linksys.sqlite3')
    csr=conn.cursor()
    csr.execute(
        "CREATE TABLE IF NOT EXISTS TFiles("
        "brand TEXT,"
        "model TEXT,"
        "revision TEXT," # hardware version
        "fw_date DATE,"
        "fw_ver TEXT,"
        "file_title TEXT,"
        "file_size INTEGER,"
        "href TEXT,"
        "file_sha1 TEXT,"
        "PRIMARY KEY (brand,model,revision,file_title)"
        ");")
    conn.commit()
    driver.get('http://www.linksys.com/us/support/sitemap/')
    try:
        numModels = getNumElem('.item ul li a')
        print('numModels=',numModels)
        for modelIdx in range(startModelIdx, numModels):
            startModelIdx=0
            modelElm = getElems('.item ul li a')[modelIdx]
            modelText = getElemText(modelElm, 5)
            print('modelIdx=',modelIdx)
            uprint('modelText="%s"'%modelText)
            # guess Possible Model
            model = guessModel(modelText)
            print('model=',model)
            rows = csr.execute(
                "SELECT model from TFiles WHERE model=:model",locals()
                ).fetchall()
            if rows:
                print('model "%s" already in TFiles, bypass!!'%model)
                continue
            modelElm.click()
            # click 'Download Software'
            try:
                waitClickable('a[title="Download Software"]', 40).click()
            except TimeoutException:
                print('No "Download Software" link found, bypass!!')
                csr.execute(
                    "INSERT INTO TFiles(brand,model,revision)VALUES"
                    "(:brand,:model,'')", locals())
                conn.commit()
                print('INSERT model="%s"'%model)
                driver.back()
                continue
            # enumerate all accordians
            accordians = getElems('.article-accordian', 10)
            numAccordians=len(accordians)
            print('numAccordians=',numAccordians)
            print('driver.current_url=', driver.current_url)
            for revisionIdx in range(startRevisionIdx, numAccordians):
                startRevisionIdx=0
                accordians = getElems('.article-accordian')
                # expand accordian (one-based)
                accordian = accordians[revisionIdx]
                revisionTxt = getElemText(accordian)
                print('revisionIdx=',revisionIdx)
                uprint('revisionTxt="%s"'%revisionTxt)
                revision = guessRevision(revisionTxt)
                print('revision=',revision)
                divId = accordian.get_attribute('data-collapse-target')
                # expand accordian 'revision'='Hardware Version'
                driver.execute_script(
                    "document.querySelectorAll('.article-accordian')[%d].click()"
                    %(revisionIdx))
                divElm = waitVisible('#'+divId)
                divTxt = getElemTextUntilStabled(divElm,10,2.5)
                assert divTxt 
                uprint('divTxt="%s"'%divTxt)
                numDowns = getCount(divTxt, 'Download')
                if numDowns ==0:
                    csr.execute(
                        "INSERT INTO TFiles(brand,model,revision)VALUES"
                        "(:brand,:model,:revision)",locals())
                    conn.commit()
                    print('INSERT "%(model)s","%(revision)s"'%locals())
                    continue
                downElms =iter(divElm.find_elements_by_css_selector('a'))
                lastSpanEnd=0
                for downIdx in range(numDowns):
                    spanBegin = getNthIndex(divTxt, downIdx, 'Download')
                    spanEnd = divTxt.find('\n', spanBegin+len('Download'))
                    if spanEnd==-1:
                        spanEnd=len(divTxt)
                    foreword='\n'.join(reversed(divTxt[lastSpanEnd:spanEnd].splitlines()))
                    fwDate=guessDate(foreword)
                    fileSize = guessFileSize(foreword)
                    fwVer = guessVersion(foreword)
                    if fwVer:
                        fileTitle = guessFileTitle(foreword, fwVer)
                    else:
                        fileTitle = guessFileTitle2(foreword)
                    while True:
                        downElm = next(downElms)
                        if downElm.text.strip().startswith('Download'):
                            break
                    href=downElm.get_attribute('href')
                    lastSpanEnd=spanEnd
                    csr.execute(
                        "INSERT OR REPLACE INTO TFiles(brand,model,revision,"
                        "fw_date, fw_ver, file_title, file_size, "
                        "href) VALUES (:brand,:model,:revision,"
                        ":fwDate, :fwVer, :fileTitle,"
                        ":fileSize, :href)", locals())
                    conn.commit()
                    uprint("INSERT '%(model)s', '%(revision)s', '%(fwDate)s'"
                        ", '%(fwVer)s', '%(fileTitle)s', '%(fileSize)d'"
                        ", '%(href)s'" %locals())
            driver.back()
            driver.back()
    except http.client.IncompleteRead as ex:
        print(ex)
        import traceback; traceback.print_exc()
        print('-- Selenium exhausted')
        driver.quit()
    except Exception as ex:
        import ipdb; ipdb.set_trace()
        print(ex)
        print('driver.current_url=',driver.current_url)
        import traceback; traceback.print_exc()
    print('-- terminate firefox')
    driver.quit()