Esempio n. 1
0
def faqScraper(baseUrl, model, image_url, dev_desc, dev_hstore):
    # http://arris.force.com/consumers/articles/Drivers_and_Firmware/2247-N8-10NA-9-1-1h0d34-Firmware-Upgrade
    global prevTrail
    try:
        ulog('baseUrl= '+baseUrl)
        d = pq(url=baseUrl)
        files = [_ for _ in d('a') if _.text_content()/cicontains/'Firmware']
        numFiles= len(files)
        ulog('numFiles=%s'%numFiles)
        startIdx = getStartIdx()
        for idx in range(startIdx, numFiles):
            ulog('idx=%s'%idx)
            f=files[idx]
            file_name = f.text_content().strip()
            ulog('file_name="%s"'%file_name)
            try:
                fw_ver = re.search(r"\d+\.([\w\.\-]+)", file_name, re.I).group(0)
            except IndexError:
                ipdb.set_trace()
            file_url = f.attrib['href']
            tree_trail = str(prevTrail+[idx])
            sql("INSERT OR REPLACE INTO TFiles (model, image_url, dev_desc, dev_hstore, fw_ver, page_url, file_url, tree_trail) VALUES (:model, :image_url, :dev_desc, :dev_hstore, :fw_ver, :baseUrl, :file_url, :tree_trail)", locals())
            uprint('UPSERT "%(model)s", "%(fw_ver)s", %(tree_trail)s, %(file_url)s'%locals())

    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
def main():
    global startTrail,prevTrail,driver,conn
    try:
        startTrail = [int(re.search(r'\d+', _).group(0)) for _ in sys.argv[1:]]
        uprint('startTrail=%s'%startTrail)
        conn = sqlite3.connect('netgear.sqlite3')
        sql("CREATE TABLE IF NOT EXISTS TFiles("
                "id INTEGER NOT NULL,"
                "vendor TEXT,"
                "model TEXT,"
                "revision TEXT,"
                "fw_date TEXT,"
                "fw_ver TEXT,"
                "file_name TEXT,"
                "file_size TEXT,"
                "page_url TEXT,"
                "file_url TEXT,"
                "tree_trail TEXT,"
                "file_sha1 TEXT,"
                "PRIMARY KEY (id),"
                "UNIQUE(vendor,model,revision,file_name)"
                ");")
        driver = harvest_utils.getFirefox()
        harvest_utils.driver= driver
        driver.get("http://downloadcenter.netgear.com/")
        prevTrail=[]
        # tmr = ClickOutOverlayTimer()
        # tmr.start()
        walkProdCat()
    except Exception as ex:
        traceback.print_exc(); ipdb.set_trace()
        driver.save_screenshot('netgear_exc.png')
    finally:
        driver.quit()
        conn.close()
Esempio n. 3
0
def main():
    global startTrail,prevTrail, conn
    try:
        startTrail = [int(re.search(r'\d+', _).group(0)) for _ in sys.argv[1:]]
        uprint('startTrail=%s'%startTrail)
        conn=sqlite3.connect('arris.sqlite3')
        sql(
            "CREATE TABLE IF NOT EXISTS TFiles("
            "id INTEGER NOT NULL,"
            "model TEXT,"
            "image_url TEXT,"
            "dev_desc TEXT,"
            "dev_hstore TEXT,"
            "fw_ver TEXT,"
            "page_url TEXT,"
            "file_url TEXT,"
            "tree_trail TEXT,"
            "file_size INTEGER,"
            "file_sha1 TEXT,"
            "PRIMARY KEY (id),"
            "UNIQUE(model,fw_ver)"
            ");")
        prevTrail=[]
        rootUrl="http://arris.force.com/consumers"
        seriesWalker(rootUrl)
        conn.close()
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
Esempio n. 4
0
def main():
    global startTrail,prevTrail,conn
    try:
        startIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0
        conn= sqlite3.connect('arris.sqlite3')
        csr=conn.cursor()
        rows = csr.execute(
            "SELECT id,file_url,file_sha1 FROM TFiles ORDER BY id "
            "LIMIT -1 OFFSET %d"%startIdx
            ).fetchall()
        for idx, row in enumerate(rows,startIdx):
            devId,file_url,file_sha1 = row
            if not file_url:
                continue
            if file_sha1:
                continue
            uprint('idx=%d'%idx)
            try:
                local_file  = downloadFile(file_url, "Content-Disposition")
            except TypeError:
                continue
            file_sha1 = getFileSha1(local_file)
            file_size = path.getsize(local_file)
            csr.execute(
                "UPDATE TFiles SET file_sha1=:file_sha1,file_size=:file_size"
                " WHERE id = :devId", locals())
            conn.commit()
            ftp = ftputil.FTPHost(ftpHostName,ftpUserName,ftpPassword)
            uprint('upload to GRID')
            ftp.upload(local_file, path.basename(local_file))
            ftp.close()
            os.remove(local_file)
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
def main():
    global startTrail,prevTrail, driver,conn
    rootUrl='http://www.actiontec.com/support/'
    try:
        startTrail = [int(re.search(r'\d+', _).group(0)) for _ in sys.argv[1:]]
        uprint('startTrail=%s'%startTrail)
        conn=sqlite3.connect('actiontec.sqlite3')
        sql(
            "CREATE TABLE IF NOT EXISTS TFiles("
            "id INTEGER NOT NULL,"
            "model TEXT,"
            "product_name TEXT,"
            "fw_date DATE,"
            "fw_ver TEXT,"
            "fw_desc TEXT,"
            "file_size INTEGER,"
            "page_url TEXT,"
            "file_url TEXT,"
            "tree_trail TEXT,"
            "file_sha1 TEXT,"
            "PRIMARY KEY (id)"
            "UNIQUE(model,fw_date)"
            ");")
        driver=harvest_utils.getFirefox()
        harvest_utils.driver=driver
        prevTrail=[]
        goToUrl(rootUrl)
        marketWalker()
        driver.quit()
        conn.close()
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
        driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_exc.png')
Esempio n. 6
0
def main():
    global startTrail,prevTrail, driver,conn
    try:
        startTrail = [int(re.search(r'\d+', _).group(0)) for _ in sys.argv[1:]]
        uprint('startTrail=%s'%startTrail)
        conn=sqlite3.connect('tplink.sqlite3')
        sql(
            "CREATE TABLE IF NOT EXISTS TFiles("
            "id INTEGER NOT NULL,"
            "model TEXT,"
            "revision TEXT,"
            "fw_date DATE,"
            "fw_ver TEXT,"
            "fw_desc TEXT,"
            "file_name TEXT,"
            "file_size INTEGER,"
            "page_url TEXT,"
            "file_url TEXT,"
            "tree_trail TEXT,"
            "file_sha1 TEXT,"
            "PRIMARY KEY (id)"
            "UNIQUE(model,revision,file_name)"
            ");")
        driver=harvest_utils.getFirefox()
        harvest_utils.driver=driver
        driver.get('http://www.tp-link.com/en/download-center.html')
        prevTrail=[]
        marketWalker()
        driver.quit()
        conn.close()
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
        driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_exc.png')
def main():
    brand='Huawei'
    source='consumer.huawei.com/en'
    rev=""
    startInRowIdx=int(sys.argv[1]) if len(sys.argv)>1 else 0
    
    with sqlite3.connect('huawei_consumer_model.sqlite3') as inconn:
        incsr = inconn.cursor()
        global ouconn
        ouconn= psycopg2.connect(GridIotConnStr)
        inRows = incsr.execute(
            "SELECT category, model FROM TFiles "
            " ORDER BY id LIMIT -1 OFFSET %d"%startInRowIdx)
        for inRowIdx, inRow in enumerate(inRows,startInRowIdx):
            category,model=inRow
            uprint('inRowIdx=%s, model="%s"'%(inRowIdx, model))

            # UPSERT new Device
            devId=ousql(
                "UPDATE TDevice SET source=%(source)s "
                "WHERE brand=%(brand)s AND model=%(model)s AND"
                " revision=%(rev)s RETURNING id" ,locals())
            if devId:
                devId=devId[0][0]
            else:
                devId=ousql(
                    "INSERT INTO TDevice (brand,model,revision,source"
                    ")VALUES(%(brand)s,%(model)s,%(rev)s,%(source)s)"
                    " RETURNING id", locals())
                devId=devId[0][0]
            uprint("UPSERT brand='%(brand)s', model=%(model)s"
                ",source=%(source)s RETURNING devId=%(devId)s"%locals())
Esempio n. 8
0
def enterElem(e:WebElement, func):
    prev_url = driver.current_url
    uprint('prev_url='+prev_url)
    next_url = e.get_attribute('href')
    uprint('next_url='+next_url)
    driver.get(next_url)
    func(prev_url)
def main():
    global startTrail,prevTrail
    startTrail = [int(re.search(r'\d+', _).group(0)) for _ in sys.argv[1:]]
    uprint('startTrail=%s'%startTrail)
    global driver,conn
    conn=sqlite3.connect('cisco.sqlite3')
    sql(
        "CREATE TABLE IF NOT EXISTS TFiles("
        "id INTEGER NOT NULL,"
        "model TEXT,"
        "fw_date DATE,"
        "fw_ver TEXT,"
        "file_title TEXT,"
        "file_name TEXT,"
        "file_size INTEGER,"
        "need_contract INTEGER," # 1=needContract, -1=Deferral
        "page_url TEXT,"
        "tree_trail TEXT," # pssub_7_1_1_0_0_0 => 7_1_1_0_0_0
        "file_sha1 TEXT,"
        "PRIMARY KEY (id)"
        "UNIQUE(model,fw_ver,file_name,fw_date)"
        ");")
    driver=harvest_utils.getFirefox(path.abspath('cisco_files'), 2, False)
    driver.implicitly_wait(2.0)
    harvest_utils.driver=driver
    driver.get('https://software.cisco.com/download/')
    prevTrail=[]
    treeWalker()
    prevTrail.pop()
def pageWalker():
    global prevTrail, driver
    CSS=driver.find_elements_by_css_selector
    try:
        startIdx = getStartIdx()
        startPage = startIdx+1
        curPage = 1
        idx = curPage-1
        while idx != startPage-1:
            ulog('idx=%d,page=%d'%(idx, (idx+1)))
            pages = getElems('.x-page-com a')
            def pageNum(p):
                try:
                    return int(p.text.strip())
                except ValueError:
                    pass
                href = p.get_attribute('href')
                if not href:
                    return sys.maxsize
                try:
                    return int(re.search(r'void\((.+)\)', href).group(1))
                except Exception as ex:
                    ipdb.set_trace()
                    traceback.print_exc()
            tarPage = min(pages, key=lambda p: abs(startPage - pageNum(p)))
            ulog('tarPage=%d'%pageNum(tarPage))
            tarPage.click()
            ulog('tarPage.click()')
            time.sleep(0.5)
            retryUntilTrue(lambda:len(CSS('.x-waite'))==1, 16, 0.4 )
            uprint('waitCursor shows')
            retryUntilTrue(lambda:len(CSS('.x-waite'))==0 or 
                    CSS('.x-waite')[0].is_displayed()==False, 60, 1 )
            uprint('waitCursor disappears')
            curPage = int(waitText('a.cur'))
            ulog('curPage=%d'%curPage)
            idx = curPage-1

        for idx in itertools.count(startIdx):
            ulog('idx=%d,page=%d'%(idx, (idx+1)))
            prevTrail+=[idx]
            rowWalker()
            prevTrail.pop()
            try:
                nextPage = waitClickable('.x-next-on')
            except (NoSuchElementException, TimeoutException):
                ulog('last page')
                break
            nextPage.click()
            ulog('nextPage.click()')
            time.sleep(0.5)
            retryUntilTrue(lambda:len(CSS('.x-waite'))==1, 16, 0.4 )
            uprint('waitCursor shows')
            retryUntilTrue(lambda:len(CSS('.x-waite'))==0 or 
                    CSS('.x-waite')[0].is_displayed()==False, 60, 1 )
            uprint('waitCursor disappears')

    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
def downloadDir(ftp, model, rdir):
    global prevTrail
    try:
        files = ftp.listdir(rdir)
        startIdx = getStartIdx()
        uprint('numFiles=%s'%len(files))
        for idx,fname in enumerate(files[startIdx:], startIdx):
            prevTrail += [idx]
            if ftp.path.isdir(ftp.path.join(rdir, fname)):
                downloadDir(ftp, model, ftp.path.join(rdir, fname))
            else:
                downloadFile(ftp, model, ftp.path.join(rdir, fname))
            prevTrail.pop()
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
def main():
    global startTrail,prevTrail,conn
    try:
        startIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0
        conn= sqlite3.connect('huawei_consumer_search_by_keyword.sqlite3')
        csr=conn.cursor()
        rows = csr.execute(
            "SELECT id,file_name,file_url,file_sha1,file_size FROM TFiles"
            " ORDER BY id LIMIT -1 OFFSET %d"%startIdx
            ).fetchall()
        for idx, row in enumerate(rows,startIdx):
            devId,file_name,file_url,file_sha1,file_size = row
            if not file_url:
                continue
            if file_sha1:
                continue
            if 'Android' in file_name:
                continue
            if 'Ascend' in file_name:
                continue
            if 'Honor' in file_name:
                continue
            if 'Open Source' in file_name:
                continue
            if 'opensource' in file_name.lower():
                continue

            uprint('idx=%d, file_name="%s",file_size=%d'%(idx,file_name,file_size))
            model = guessModel(file_name)
            uprint('model="%s"'%model)
            if not model:
                continue
            fw_ver = guessVersion(file_name)
            uprint('fw_ver="%s"'%fw_ver)
            if not fw_ver:
                continue
            try:
                local_file  = downloadFile(file_url, "Content-Disposition")
            except TypeError:
                continue
            file_sha1 = getFileSha1(local_file)
            file_size = path.getsize(local_file)
            csr.execute(
                "UPDATE TFiles SET file_sha1=:file_sha1"
                ",file_size=:file_size"
                ",model=:model "
                ",fw_ver=:fw_ver "
                " WHERE id = :devId", locals())
            conn.commit()
            ftp = ftputil.FTPHost(ftpHostName,ftpUserName,ftpPassword)
            uprint('upload to GRID')
            ftp.upload(local_file, path.basename(local_file))
            ftp.close()
            os.remove(local_file)
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
def modelWalker(category):
    global driver
    CSS = driver.find_elements_by_css_selector
    try:
        waitClickable('#Combo_support-select-2 div input').click()
        models = getElems('#Combo_support-select-2 ul a')
        numModels = len(models)
        ulog('numModels=%d'%numModels)
        for idx in range(numModels):
            model = models[idx].text
            ulog('idx=%d, model=%s'%(idx, model))
            sql("INSERT OR REPLACE INTO TFiles(category,model)"
                "VALUES(:category,:model)",locals())
            uprint('UPSERT "%(category)s," "%(model)s"'%locals())
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
        driver.save_screenshot('huawei_excep.png')
Esempio n. 14
0
def main():
    startIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0
    global conn
    with sqlite3.connect('belkin.sqlite3') as conn:
        csr=conn.cursor()
        rows=csr.execute("SELECT id,download_url,page_url,file_sha1"
                " FROM TFiles LIMIT -1 OFFSET %d"%startIdx).fetchall()
        for idx, row in enumerate(rows,startIdx):
            devId,url,page_url,fileSha1=row
            if fileSha1 or not url:
                continue
            uprint('idx=%s, url=%s'%(idx,url))
            url = safeUrl(url)
            uprint('url='+url)
            fname=urlFileName(url)
            uprint('download "%s"'%(fname))
            fname = path.join(dlDir, fname)
            try:
                downloadFile(url, fname)
            except urllib.error.HTTPError:
                print(ex)
                continue
            except OSError as ex:
                if ex.errno == 28:
                    print(ex);print('[Errno 28] No space left on device')
                    break
            except Exception as ex:
                ipdb.set_trace()
                traceback.print_exc()
                continue

            fileSha1=getFileSha1(fname)
            fileSize=path.getsize(fname)
            print('sha1="%s" for "%s"'%(fileSha1,fname))
            csr.execute("UPDATE TFiles SET file_sha1=:fileSha1,"
                    " file_size=:fileSize WHERE id=:devId", locals())
            conn.commit()
            uprint('UPDATE fileSha1=%(fileSha1)s, fileSize=%(fileSize)s'
                    ' WHERE id="%(devId)s"' %locals())
            with ftputil.FTPHost(ftpHostName,ftpUserName,ftpPassword) as ftp:
                ftp.upload(fname, path.basename(fname))
                uprint('uploaded "%s" to ftp://%s/'
                    %(path.basename(fname), ftpHostName))
            os.remove(fname)
def guessFileSize(txt:str)->int:
    """ txt='0.52 MB'
       txt='256 / 128'
    """
    m = re.search(r'(\d*[.])?\d+', txt, re.IGNORECASE)
    if not m:
        uprint('[guessFileSize] error txt="%s"'%txt)
        return 0
    unitTxt = txt[m.span()[1]:].strip()
    if 'MB' in unitTxt:
        return int(float(m.group(0))* 1024*1024)
    elif 'KB' in unitTxt:
        return int(float(m.group(0))* 1024)
    else:
        try:
            return int(float(m.group(0))* 1024*1024)
        except Exception as ex:
            ipdb.set_trace()
            uprint('txt=%s'%txt)
Esempio n. 16
0
def downloadFile(url:str, fname:str, timeOut:int=10, chunkSize:int=2*1024*1024,
        timeOutInterval:int=3):
    """ download file from url to fname (abspath)
        Keyword arguments:
        url -- source url from where to download
        fname -- target file path
        timeOut -- timeOut for downloading each chunk
        chunkSize -- chunk size in bytes
        timeOutInterval -- if timeOut happens, sleep N seconds and then try again
    """
    import socket
    opener = urllib.request.build_opener(MyHTTPRedirectHandler)
    urllib.request.install_opener(opener)
    while True:
        try:
            with request.urlopen(firefox_url_req(url),
                timeout=timeOut) as resp:
                uprint("resp_headers=%s"%(resp.info().items()))
                if fname == 'Content-Disposition':
                    """
                    Content-Disposition: attachment; filename="SBRAC1750-1.0.9.img"
                    """
                    cdval = resp.info()['Content-Disposition']
                    fname = re.search(r'filename="(.+)(?<!\\)"', cdval).group(1)
                    uprint('fname="%s"'%fname)

                with open(fname+".part", mode='wb') as fout:
                    while True:
                        data=resp.read(chunkSize)
                        print('.',end='',flush=True)
                        if not data:
                            print('',flush=True)
                            import os
                            os.rename(fname+".part", fname)
                            return fname
                        fout.write(data)
                        fout.flush()
                import pdb; pdb.set_trace()
        except socket.timeout as ex:
            print('socket.timeout, sleep %d seconds'%timeOutInterval)
            import time
            time.sleep(timeOutInterval)
Esempio n. 17
0
 def http_error_302(self, req, fp, code, msg, headers):
     """store "Location" HTTP response header
     :return: http
     """
     self.location = headers.get('Location', '')
     uprint("headers['Location']=" + self.location)
     def squote(s):
         return urllib.parse.quote(s, ';/?:&=+,$[]%^')
     try:
         self.location.encode('ascii')
     except UnicodeEncodeError:
         scheme, netloc, path, params, query, fragment = \
             urllib.parse.urlparse(self.location)
         self.location = urllib.parse.urlunparse((
             scheme, netloc, urllib.parse.quote(path), squote(params), squote(query),
             fragment))
         headers.replace_header('Location', self.location)
         uprint("pquoted headers['Location']=" + self.location)
     return urllib.request.HTTPRedirectHandler.http_error_302(
         self, req, fp, code, msg, headers)
def main():
    global startTrail, prevTrail,driver,conn,keyword
    try:
        keyword = sys.argv[1]
        startTrail = [int(re.search(r'\d+', _).group(0)) for _ in sys.argv[2:]]
        ulog('startTrail=%s'%startTrail)
        conn=sqlite3.connect('huawei_consumer_search_by_keyword.sqlite3')
        sql("CREATE TABLE IF NOT EXISTS TFiles("
            "id INTEGER NOT NULL,"
            "keyword TEXT,"
            "file_name TEXT," # 'Ascend Mate (MT1-U06,Android 4.1,Emotion UI,V100R001C00B221,General Version)'
            "file_desc TEXT," # NBG5715
            "rel_date DATE," # 2015-05-30
            "file_size INTEGER," # '1.26 GB' '352.32 MB'
            "file_url TEXT," # "http://download-c.huawei.com/download/downloadCenter?downloadId=44602&version=92646&siteCode=worldwide"
            "tree_trail TEXT," # [1, 2]
            "file_sha1 TEXT," # 
            "PRIMARY KEY (id),"
            "UNIQUE(file_name)"
            ")")
        driver=harvest_utils.getFirefox()
        harvest_utils.driver=driver
        prevTrail=[]
        goToUrl(rootUrl)
        inp = waitClickable('#savekeyword')
        inp.click()
        inp.send_keys(keyword)
        waitClickable('#search_by_kw > img').click()
        CSS=driver.find_elements_by_css_selector
        retryUntilTrue(lambda:len(CSS('.x-waite'))==1, 4, 0.4 )
        uprint('waitCursor shows')
        retryUntilTrue(lambda:len(CSS('.x-waite'))==0 
                or CSS('.x-waite')[0].is_displayed()==False, 30, 1 )
        uprint('waitCursor disappears')
        pageWalker()
        driver.quit()
        conn.close()
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
        driver.save_screenshot('main_excep.png')
def downloadFile(ftp, model, rfile):
    global prevTrail,conn
    csr = conn.cursor()
    try:
        fname = ftp.path.basename(rfile)
        epoch = ftp.path.getmtime(rfile)
        fwDate = datetime.fromtimestamp(epoch) 
        fileSize = ftp.path.getsize(rfile)
        lfile = path.join(dlDir,fname)
        uprint('download "%s"'%fname)
        ftp.download(rfile, lfile)

        fileSha1=getFileSha1(lfile)
        fileUrl="ftp://"+zyxel_ftp+"/"+rfile
        modelName = model.replace('_',' ')

        trailStr=str(prevTrail)
        csr.execute("INSERT OR REPLACE INTO TFiles (model,"
            "fw_date,file_size,file_sha1,file_url,tree_trail) VALUES "
            "(:modelName,:fwDate,:fileSize,:fileSha1,:fileUrl,:trailStr)",
            locals())
        conn.commit()
        uprint('UPSERT fileSha1=%(fileSha1)s, fileSize=%(fileSize)s'
                ' model="%(modelName)s", trail=%(trailStr)s' %locals())
        with ftputil.FTPHost(ftpHostName,ftpUserName,ftpPassword) as grid:
            grid.upload(lfile, path.basename(lfile))
            uprint('uploaded "%s" to ftp://%s/'
                %(path.basename(lfile), ftpHostName))
        os.remove(lfile)
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
def rowWalker():
    global prevTrail, driver,keyword
    try:
        rows = getElems('#product-support-downloads-ul > li')
        numRows =len(rows)
        ulog('numRows=%s'%numRows)
        startIdx = getStartIdx()
        for idx in range(startIdx, numRows):
            ulog('idx=%s'%idx)
            file_name = rows[idx].find_element_by_css_selector('h2').text
            file_desc = rows[idx].find_element_by_css_selector('p.p-1').text
            date_size = rows[idx].find_element_by_css_selector('p.p-2').text
            rel_date = guessDate(date_size)
            file_size = guessFileSize(date_size)
            down = rows[idx].find_element_by_css_selector('a.download-bnt')
            file_url = down.get_attribute('href')
            tree_trail = str(prevTrail+[idx])
            sql("INSERT OR REPLACE INTO TFiles (keyword,file_name, file_desc, rel_date, file_size, file_url,tree_trail) VALUES (:keyword,:file_name,:file_desc,:rel_date,:file_size,:file_url,:tree_trail) ", glocals())
            uprint('UPSERT "%(file_name)s", %(file_url)s, %(tree_trail)s'%locals())
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
Esempio n. 21
0
def getAllModels():
    global driver, models
    act = ActionChains(driver)
    numElm = lambda c: driver.execute_script("return $('%s').length" % c)
    try:
        if path.exists('zyxel_models.txt') and \
                path.getsize('zyxel_models.txt')>0 and \
                time.time()-path.getmtime('zyxel_models.txt') < 3600*24*7:
            with open('zyxel_models.txt', 'r', encoding='utf-8') as fin:
                models = []
                for _ in fin:
                    models += [_]
            return
        goToUrl(rootUrl)
        btn = waitVisible('.search-select button')
        act.move_to_element(btn).click(btn).perform()
        inp = waitVisible('.input-block-level')
        act.move_to_element(inp).click(inp).perform()
        act.send_keys(Keys.DOWN).perform()
        time.sleep(0.1)
        act.send_keys(Keys.LEFT_CONTROL + Keys.END).perform()
        time.sleep(0.1)
        numModels = numElm('#searchDropUl li')
        uprint('numModels=%s' % numModels)
        while True:
            act.send_keys(Keys.LEFT_CONTROL + Keys.END).perform()
            time.sleep(0.1)
            numModels2 = numElm('#searchDropUl li')
            if numModels == numModels2:
                break
            numModels = numModels2
            uprint('numModels=%s' % numModels)
        uprint('numModels=%s' % numModels)
        models = [
            _.get_attribute('data') for _ in getElems('#searchDropUl li')
        ]
        models = [_ for _ in models if _]
        uprint('len(models)=%s' % len(models))
        with open('zyxel_models.txt', 'w', encoding='utf-8') as fout:
            for m in models:
                fout.write(m + '\n')
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
        driver.save_screenshot(getScriptName() + '_' + getFuncName() +
                               '_excep.png')
Esempio n. 22
0
def getAllModels():
    global driver, models
    act=ActionChains(driver)
    numElm=lambda c:driver.execute_script("return $('%s').length"%c)
    try:
        if path.exists('zyxel_models.txt') and \
                path.getsize('zyxel_models.txt')>0 and \
                time.time()-path.getmtime('zyxel_models.txt') < 3600*24*7:
            with open('zyxel_models.txt','r',encoding='utf-8') as fin:
                models=[]
                for _ in fin:
                    models += [_]
            return
        goToUrl(rootUrl)
        btn=waitVisible('.search-select button')
        act.move_to_element(btn).click(btn).perform()
        inp=waitVisible('.input-block-level')
        act.move_to_element(inp).click(inp).perform()
        act.send_keys(Keys.DOWN).perform()
        time.sleep(0.1)
        act.send_keys(Keys.LEFT_CONTROL + Keys.END).perform()
        time.sleep(0.1)
        numModels = numElm('#searchDropUl li')
        uprint('numModels=%s'%numModels)
        while True:
            act.send_keys(Keys.LEFT_CONTROL + Keys.END).perform()
            time.sleep(0.1)
            numModels2 = numElm('#searchDropUl li')
            if numModels == numModels2:
                break
            numModels = numModels2
            uprint('numModels=%s'%numModels)
        uprint('numModels=%s'%numModels)
        models = [_.get_attribute('data') for _ in getElems('#searchDropUl li')]
        models = [_ for _ in models if _]
        uprint('len(models)=%s'%len(models))
        with open('zyxel_models.txt', 'w', encoding='utf-8') as fout:
            for m in models:
                fout.write(m + '\n')
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
        driver.save_screenshot(getScriptName()+'_'+getFuncName()+'_excep.png')
def main():
    global conn
    try:
        startIdx = int(sys.argv[1]) if len(sys.argv)>1 else 0
        conn= sqlite3.connect('zyxel_ftp.sqlite3')
        csr=conn.cursor()
        rows = csr.execute(
            "SELECT id,model, file_url FROM TFiles ORDER BY id "
            "LIMIT -1 OFFSET %d"%startIdx).fetchall()
        for row in rows:
            devId,model,file_url = row
            fileName = path.basename(file_url)
            uprint('devId=%d, model="%s",fileName="%s"'%(devId,model,fileName))
            ftitle,fext = path.splitext(fileName)
            fext=fext.lower()
            if fext == '.pdf' or fext=='.txt' or fileName.endswith('_info'):
                uprint('bypass txt/pdf')
                continue
            try:
                fnModel, fwVer = ftitle.split('_',1)
            except ValueError:
                fwVer = ftitle
            uprint('fwVer="%s"'%fwVer)
            if '_' in fwVer:
                idx = fwVer.find(model) 
                if idx != -1:
                    fwVer = fwVer[idx+len(model):]
                    fwVer = fwVer.strip()
                    uprint('fwVer="%s"'%fwVer)
                else:
                    ipdb.set_trace()
            csr.execute(
                "UPDATE TFiles SET fw_ver=:fwVer WHERE id=:devId"
                ,locals())
            conn.commit()
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
Esempio n. 24
0
def main():
    global conn
    try:
        startIdx = int(sys.argv[1]) if len(sys.argv) > 1 else 0
        conn = sqlite3.connect('zyxel_ftp.sqlite3')
        csr = conn.cursor()
        rows = csr.execute("SELECT id,model, file_url FROM TFiles ORDER BY id "
                           "LIMIT -1 OFFSET %d" % startIdx).fetchall()
        for row in rows:
            devId, model, file_url = row
            fileName = path.basename(file_url)
            uprint('devId=%d, model="%s",fileName="%s"' %
                   (devId, model, fileName))
            ftitle, fext = path.splitext(fileName)
            fext = fext.lower()
            if fext == '.pdf' or fext == '.txt' or fileName.endswith('_info'):
                uprint('bypass txt/pdf')
                continue
            try:
                fnModel, fwVer = ftitle.split('_', 1)
            except ValueError:
                fwVer = ftitle
            uprint('fwVer="%s"' % fwVer)
            if '_' in fwVer:
                idx = fwVer.find(model)
                if idx != -1:
                    fwVer = fwVer[idx + len(model):]
                    fwVer = fwVer.strip()
                    uprint('fwVer="%s"' % fwVer)
                else:
                    ipdb.set_trace()
            csr.execute("UPDATE TFiles SET fw_ver=:fwVer WHERE id=:devId",
                        locals())
            conn.commit()
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
Esempio n. 25
0
def upsertModel(model,image_url,dev_desc,dev_hstore,baseUrl,tree_trail):
    sql("INSERT OR REPLACE INTO TFiles (model, image_url, dev_desc, dev_hstore, page_url, tree_trail) VALUES (:model, :image_url, :dev_desc, :dev_hstore, :baseUrl, :tree_trail)", locals())
    uprint('UPSERT "%(model)s", %(tree_trail)s, %(image_url)s'%locals())
Esempio n. 26
0
def parse_download_page(page_url):
    global prevTrail
    d = pq(url=page_url)
    trailStr = ''
    try:
        d('h1.product-name')[0].text_content().strip()
    except IndexError:
        print('%s does NOT exist' % page_url)
        return
    model = d('h2.product-number')[0].text_content().strip()
    for idx, item in enumerate(d('li.download-item'), start=getStartIdx()):
        try:
            title = item.cssselect('h3.thin')[0].text_content()
        except IndexError:
            continue
        if 'firmware' not in title.lower():
            continue

        rel_date = item.cssselect('small')[0].text_content()
        # 'Publication date \r: 18 January 2016'
        rel_date = rel_date.split('\r')[1].strip(': ')
        # '18 January 2016'
        rel_date = datetime.strptime(rel_date, '%d %B %Y')

        fw_ver = item.cssselect('.download-text-title')[0].text_content()
        # 'Version number\r: 2.11'
        fw_ver = fw_ver.split('\r')[-1].strip(': ')
        # '2.11'
        fw_ver = re.search(r'\d+(\.\d+)*', fw_ver).group(0)

        fw_desc = d('.download-item div')[0].text_content().strip()
        # 'Changed:\n\n\n\tAdd timeout to check DNS alive\n\tAdd procedure to
        # verify ipv4 and ipv6 on ppp session"

        fw_url = item.cssselect('a')[0].attrib['href']
        try:
            uprint('start to download %s' % fw_url)
            local_file_path = cookie_friendly_download(page_url,
                                                       fw_url,
                                                       store_dir,
                                                       timeout=1000)
        except urllib.error.HTTPError:
            print(ex)
            continue
        except OSError as ex:
            if ex.errno == 28:
                print(ex)
                print('[Errno 28] No space left on device')
                break
        except Exception as ex:
            ipdb.set_trace()
            traceback.print_exc()
            continue
        file_sha1 = getFileSha1(local_file_path)
        file_md5 = getFileMd5(local_file_path)
        file_size = path.getsize(local_file_path)
        uprint('file_path="%s", file_size=%s, file_sha1=%s' %
               (local_file_path, file_size, file_sha1))

        trailStr = str(prevTrail + [idx])
        psql(
            "INSERT INTO image"
            "(brand, model,"
            " rel_date, version, description,"
            " filename, file_sha1, hash, file_size,"
            " page_url, file_url, tree_trail) VALUES"
            "(    %s,   %s, "
            "       %s,      %s,          %s,"
            "       %s,        %s,   %s,        %s,"
            "       %s,       %s,         %s)",
            ('Sitecom', model, rel_date, fw_ver, fw_desc, local_file_path,
             file_sha1, file_md5, file_size, page_url, fw_url, trailStr))
Esempio n. 27
0
def main():
    brand='Arris'
    source='arris.force.com/consumers'
    rev=""
    startInRowIdx=int(sys.argv[1]) if len(sys.argv)>1 else 0
    
    with sqlite3.connect('arris.sqlite3') as inconn:
        incsr = inconn.cursor()
        global ouconn
        ouconn= psycopg2.connect(GridIotConnStr)
        inRows = incsr.execute(
            "SELECT model, image_url, dev_desc, dev_hstore, fw_ver, page_url"
            ", file_url, file_size, file_sha1 "
            " FROM TFiles "
            " ORDER BY id LIMIT -1 OFFSET %d"%startInRowIdx)
        for inRowIdx, inRow in enumerate(inRows,startInRowIdx):
            model,image_url,dev_desc,dev_hstore,fwVer,page_url,file_url,file_size,file_sha1=inRow
            uprint('inRowIdx=%s, model="%s","%s" '%(
                inRowIdx, model,fwVer))

            # UPSERT new Device
            devId=ousql(
                "UPDATE TDevice SET source=%(source)s,"
                " image_url=%(image_url)s, "
                " description=%(dev_desc)s, "
                " support_page=%(page_url)s, "
                " speedguide_props_hstore=%(dev_hstore)s::hstore "
                "WHERE brand=%(brand)s AND model=%(model)s AND"
                " revision=%(rev)s RETURNING id" ,locals())
            if devId:
                devId=devId[0][0]
            else:
                devId=ousql(
                    "INSERT INTO TDevice (brand,model,revision,source,"
                    "image_url,description,support_page,speedguide_props_hstore"
                    ")VALUES(%(brand)s,%(model)s,%(rev)s,%(source)s,%(image_url)s"
                    ",%(dev_desc)s,%(page_url)s,%(dev_hstore)s::hstore)"
                    " RETURNING id",
                    locals())
                devId=devId[0][0]
            uprint("UPSERT brand='%(brand)s', model=%(model)s"
                ",source=%(source)s RETURNING devId=%(devId)s"%locals())

            # UPSERT new Firmware
            if not fwVer:
                continue
            fwId=ousql(
                "UPDATE TFirmware SET file_sha1=%(file_sha1)s,"
                " include_prev=false,file_size=%(file_size)s,"
                " file_url=%(file_url)s,"
                " desc_url=%(page_url)s "
                "WHERE"
                "  device_id=%(devId)s AND version=%(fwVer)s AND"
                "  exclude_self=false RETURNING id",locals())
            if fwId:
                fwId=fwId[0][0]
            else:
                fwId=ousql(
                    "INSERT INTO TFirmware("
                    "  device_id, version, exclude_self, "
                    "  file_sha1, file_size, "
                    "  file_url, desc_url ) "
                    "VALUES ( %(devId)s, %(fwVer)s, false, "
                    " %(file_sha1)s, %(file_size)s, "
                    " %(file_url)s, %(page_url)s)"
                    " RETURNING id", locals())
                fwId=fwId[0][0]
            uprint("UPSERT TFirmware devId='%(devId)d', fwVer='%(fwVer)s',"
                " sha1='%(file_sha1)s', fwId=%(fwId)d"%locals())
def main():
    brand='Huawei'
    source='consumer.huawei.com/en'
    rev=""
    startInRowIdx=int(sys.argv[1]) if len(sys.argv)>1 else 0
    
    with sqlite3.connect('huawei_consumer_search_by_keyword.sqlite3') as inconn:
        incsr = inconn.cursor()
        global ouconn
        ouconn= psycopg2.connect(GridIotConnStr)
        inRows = incsr.execute(
            "SELECT model, fw_ver,rel_date, file_desc,file_name"
            ", file_url, file_size, file_sha1 "
            " FROM TFiles "
            " ORDER BY id LIMIT -1 OFFSET %d"%startInRowIdx)
        for inRowIdx, inRow in enumerate(inRows,startInRowIdx):
            model, fw_ver, rel_date, file_desc, file_name, \
                    file_url, file_size, file_sha1 = inRow
            if not model:
                continue
            uprint('inRowIdx=%s, model="%s","%s" '%(
                inRowIdx, model, fw_ver))

            # UPSERT new Device
            devId=ousql(
                "UPDATE TDevice SET source=%(source)s "
                "WHERE brand=%(brand)s AND model=%(model)s AND"
                " revision=%(rev)s RETURNING id" ,locals())
            if devId:
                devId=devId[0][0]
            else:
                devId=ousql(
                    "INSERT INTO TDevice (brand,model,revision,source"
                    ")VALUES(%(brand)s,%(model)s,%(rev)s,%(source)s)"
                    " RETURNING id", locals())
                devId=devId[0][0]
            uprint("UPSERT brand='%(brand)s', model=%(model)s"
                ",source=%(source)s RETURNING devId=%(devId)s"%locals())

            # UPSERT new Firmware
            if not fw_ver:
                continue
            fwId=ousql(
                "UPDATE TFirmware SET file_sha1=%(file_sha1)s,"
                " include_prev=false,file_size=%(file_size)s,"
                " file_url=%(file_url)s,"
                " description=%(file_desc)s,"
                " release_date=%(rel_date)s,"
                " file_path=%(file_name)s "
                " WHERE"
                "  device_id=%(devId)s AND version=%(fw_ver)s AND"
                "  exclude_self=false RETURNING id",locals())
            if fwId:
                fwId=fwId[0][0]
            else:
                fwId=ousql(
                    "INSERT INTO TFirmware("
                    "  device_id, version, exclude_self, "
                    "  file_sha1, file_size, "
                    "  file_url, description, release_date, file_path ) "
                    "VALUES ( %(devId)s, %(fw_ver)s, false, "
                    " %(file_sha1)s, %(file_size)s, "
                    " %(file_url)s, %(file_desc)s, %(rel_date)s, %(file_name)s)"
                    " RETURNING id", locals())
                fwId=fwId[0][0]
            uprint("UPSERT TFirmware devId='%(devId)d', fw_ver='%(fw_ver)s',"
                " sha1='%(file_sha1)s', fwId=%(fwId)d"%locals())
def main():
    global startTrail,prevTrail,conn
    try:
        startTrail = [int(re.search(r'\d+', _).group(0)) for _ in sys.argv[1:]]
        conn= sqlite3.connect('zyxel_ftp.sqlite3')
        csr=conn.cursor()
        csr.execute(
            "CREATE TABLE IF NOT EXISTS TFiles("
            "id INTEGER NOT NULL,"
            "model TEXT,"
            "product_name TEXT,"
            "fw_date DATE,"
            "fw_ver TEXT,"
            "fw_desc TEXT,"
            "file_size INTEGER,"
            "page_url TEXT,"
            "file_url TEXT,"
            "tree_trail TEXT,"
            "file_sha1 TEXT,"
            "PRIMARY KEY (id)"
            "UNIQUE(model,fw_date)"
            ");")

        ftp = ftputil.FTPHost(zyxel_ftp, 'anonymous', '')
        ftp.keep_alive()
        prevTrail=[]
        models = ftp.listdir('.')
        startDIdx = getStartIdx()
        for didx,model in enumerate(models[startDIdx:],startDIdx):
            uprint('didx=%d'%didx)
            prevTrail+=[didx]
            if not ftp.path.isdir(model):
                uprint('"%s" is not directory '%model)
                prevTrail.pop()
                continue
            while True:
                try:
                    dirs = ftp.listdir(model)
                    break
                except ftputil.error.TemporaryError as ex:
                    print(ex)
                    ftp = ftputil.FTPHost(zyxel_ftp, 'anonymous','')
            fw = next((_ for _ in dirs if _.lower().startswith('firmware')),None)
            if not fw:
                uprint('model "%s" has no firmware'%model)
                prevTrail.pop()
                continue
            uprint('model="%s"'%model)
            remoteDir = path.join(model,fw)
            files = ftp.listdir(remoteDir)
            uprint('numFiles=%d'%len(files))
            startFIdx = getStartIdx()
            for fidx,fname in enumerate(files[startFIdx:],startFIdx):
                uprint('fidx = %s,"%s"'%(fidx,fname))
                prevTrail+=[fidx]
                rfile = path.join(remoteDir,fname)
                if ftp.path.isdir(rfile):
                    uprint('"%s" is a directory!'%rfile)
                    downloadDir(ftp, model, rfile)
                else:
                    downloadFile(ftp, model, rfile)
                prevTrail.pop()
            prevTrail.pop()
        ftp.close()
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
def selectSoftwareType():
    """ This page would be jumped to versionWalker() 
       or either jumped back to treeWalker
       forward: may auto jump
       backward: not auto jump
    """
    global startTrail,prevTrail,driver
    try:
        waitText('.csProductSelectorBreadcrumb', 5, 1)
        waitUntilStable('.csProductSelectorBreadcrumb', 1, 0.3)
        depth = getDepth()
        jumpedLevels =depth - len(prevTrail)
        ulog('jumpedLevels=%d'%jumpedLevels)
        assert jumpedLevels>=0
        ulog('depth=%d, but prevTrail=%s'%(depth, prevTrail))

        startIdxFromStartTrail=False
        def getStartIdx()->int:
            if startTrail:
                nonlocal startIdxFromStartTrail
                startIdxFromStartTrail=True
                return startTrail.pop(0)
            else:
                return 0

        if jumpedLevels>0:
            while depth>len(prevTrail):
                startIdx=getStartIdx()
                prevTrail+=[startIdx]
        else:
            startIdx=getStartIdx()

        assert depth==len(prevTrail)
        ulog('startTrail=%s'%startTrail)
        ulog('prevTrail=%s'%prevTrail)
        ulog('startIdx=%d'%startIdx)

        ulog('url=%s'%driver.current_url)
        crumbs = waitText('.csProductSelectorBreadcrumb')
        uprint('crumbs=%s'%(crumbs.replace('\n',' > ')))

        if not hasElem('table#imageTableContainer', 1.5,0.4):
            if jumpedLevels>0:
                startIdx=getStartIdx()
                if depth > len(prevTrail):
                    prevTrail+=[startIdx]
            sdpBannerTitle=waitText('td.SDPBannerTitle').strip()
            ulog('SDBBannerTitle="%s"'%sdpBannerTitle)
            assert sdpBannerTitle.lower().startswith('select ')
            waitUntil(lambda: getNumElem('div.csWrapper li a') > 0)
            swtypes = getElems('div.csWrapper li a')
            ulog('%s'%[(i,getElemText(_)) for i,_ in enumerate(swtypes)])
            numSwTypes=len(swtypes)
            assert numSwTypes > 0
            for idx in range(startIdx, numSwTypes):
                ulog('goto Trail=%s'%(prevTrail+[idx]))
                swtypes = getElems('div.csWrapper li a')
                ulog('Click "%s"'% getElemText(swtypes[idx]))
                clickElem(swtypes[idx])
                prevTrail+=[idx]
                selectSoftwareType()
                prevTrail.pop()
            # Select a Product -> Select a Software type -> Select a Platform
            # https://software.cisco.com/download/type.html?mdfid=277873153&flowid=170&softwareid=283724313
            # Downloads Home >Products >Cisco Interfaces and Modules >WAN Interface Cards >1700/2600/3600/3700 Series 2-Port Analog Modem WAN Interface Card >Analog Firmware Loader >Windows 2000-v6780 
            # not auto back to treeWalker
            # go back manually
            crumbs = getElems('.csProductSelectorBreadcrumb a')
            ulog('manually backto "%s"'%getElemText(crumbs[-1]))
            ulog('prevTail=%s'%prevTrail)
            clickElem(crumbs[-1])
            # do I need to pop prevTrail?
            # prevTrail.pop()
        else:
            ulog('auto forward to versionWalker')
            if startIdxFromStartTrail:
                startTrail.insert(0, startIdx)
            for i in range(jumpedLevels):
                if not startTrail:
                    break
                startTrail.pop(0)
            versionWalker()
        for i in range(jumpedLevels):
            crumbs = getElems('.csProductSelectorBreadcrumb a')
            ulog('manually backto "%s"'%getElemText(crumbs[-1]))
            ulog('prevTail=%s'%prevTrail)
            clickElem(crumbs[-1])
            prevTrail.pop()
    except Exception as ex:
        ipdb.set_trace()
        print(ex); traceback.print_exc()
        driver.save_screenshot('cisco_selectSoftwareType.png')
Esempio n. 31
0
def detailScraper(baseUrl):
    global prevTrail
    try:
        ulog('baseUrl= '+baseUrl)
        """
        OK: http://arris.force.com/consumers/ConsumerProductDetail_Ja?p=a0ha000000Rx4I4AAJ&c=Touchstone%20Modems%20and%20Gateways
        Not: http://shop.surfboard.com/
        """
        if not re.match(r'(http|https)://.*arris\..+\.com/.+', baseUrl):
            ulog('Not arris.force.com')
            return
        d = pq(url=baseUrl)
        try:
            dev_desc = elmToMd(d('div.row')[1])
        except IndexError:
            ulog('no model to harvest')
            return

        dev_desc = '\n'.join(re.sub(r'^\+', '', _, 1).strip() for _ in dev_desc.splitlines())
        model = dev_desc.splitlines()[0].strip()
        assert model
        ulog('model= '+model)

        dev_hstore = [_.text_content().strip() for _ in d('.specTbl tr')]
        dev_hstore = dict2hstore(OrderedDict(
            [(_.splitlines()[0].strip(),
                _.splitlines()[1].strip()) for _ in dev_hstore]))

        image_url= d('.box.boxProduct')[0].attrib['style']
        # "background: url(https://arris--c.na13.content.force.com/servlet/servlet.ImageServer?id=015a0000003NYHt&oid=00D30000000kUAL&lastMod=1442430676000);"
        image_url = re.search(r'url\((.+)(?<!\\)\)', image_url).group(1)
        assert fileUrlIsCdn(image_url)

        files = d('#panel4 .small-12.columns:not(.text-center)')
        numFiles = len(files)
        ulog('numFiles=%s'%numFiles)
        if not numFiles:
            upsertModel(model, image_url, dev_desc, dev_hstore, baseUrl, str(prevTrail))
            return

        startIdx= getStartIdx()
        for idx in range(startIdx, numFiles):
            file_name = '\n'.join(_.strip() for _ in files[idx].text_content().splitlines() if _.strip())
            file_name = file_name.splitlines()[0].strip()
            ulog('file_name="%s"'%file_name)
            if re.match(r'No .+ Available', file_name, re.I):
                upsertModel(model, image_url, dev_desc, dev_hstore, baseUrl, str(prevTrail))
                continue

            try:
                fw_ver = re.search(r"\d\.[\w\.\-]+", file_name).group(0)
            except AttributeError:
                fw_ver = file_name
            file_urls = files[idx].cssselect('a')
            if not file_urls:
                ulog('No files')
                upsertModel(model, image_url, dev_desc, dev_hstore, baseUrl, str(prevTrail))
                continue
            file_url = next(_.attrib['href'] for _ in file_urls if _.text_content().strip().startswith('Download'))
            if not fileUrlIsCdn(file_url):
                faqScraper(file_url, model, image_url, dev_desc, dev_hstore)
            tree_trail = str(prevTrail+[idx])
            sql("INSERT OR REPLACE INTO TFiles (model, image_url, dev_desc, dev_hstore, fw_ver, page_url, file_url, tree_trail) VALUES (:model, :image_url, :dev_desc, :dev_hstore, :fw_ver, :baseUrl, :file_url, :tree_trail)", locals())
            uprint('UPSERT "%(model)s", "%(fw_ver)s", %(tree_trail)s, %(file_url)s '%locals())
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
def scrapeDetails(curUrl):
    global prevTrail
    ulog('curUrl= '+curUrl)
    try:
        d= pq(curUrl)
        md = elmToMd(d('#content')[0],True,True)
        # get device description
        mdl = md.splitlines()
        i = next(i for i,_ in enumerate(mdl) if _.strip().startswith('Home'))
        # step to non empty line
        brmd = [_.strip() for _ in mdl[i].split('» ')]
        brand = brmd[3]
        model = brmd[4].replace('Details','').strip()

        # get product Name
        i =findLineIdxWith(mdl,i+1,lambda _:_.strip().startswith('details:'))
        prodName = mdl[i].split(' |' )[1].strip()
        # "Dual-band wireless-AC3100 gigabit router"
        i = findLineIdxWith(mdl,i+1,
                lambda _:_.strip().startswith('hardware type:'))
        category = mdl[i].split(' | ')[1].strip()
        # "Wireless Router"

        # find empty line after details
        i = findLineIdxWith(mdl,i+1,lambda _:not _.strip())
        # find non empty line
        i = findLineIdxWith(mdl,i+1,lambda _:_.strip())
        j = findLineIdxWith(mdl,i+1,
                lambda _:re.match(r'All .+ products$',_.strip()))
        description='\n'.join(_.strip() for _ in mdl[i:j] if _.strip())
        default_user_name,default_password,wifi_proto,availability,\
                product_page,hw_fla1_amount,hw_ram1_amount = \
                None,None,None,None,None,None,None

        trs = d('.tblight tr')
        pr=OrderedDict()
        for tr in trs:
            l = elmToMd(tr,False,False)
            if ' | ' not in l:
                continue
            n,v = [_.strip() for _ in l.split(' | ',1)]
            v = '\n'.join(_.strip() for _ in v.splitlines())
            assert n not in pr
            n = n.rstrip(':')
            if v:
                v0 = v[0]
                if v0=='!':
                    yn = re.search(r'!\[(.+?)\]', v).group(1)
                    if yn=='yes': v= "true"
                    elif yn=='no': v = "false"
                    else: ipdb.set_trace(); uprint(yn)
                elif v0 =='<':
                    v = re.search(r'<(.+?)>',v).group(1).strip()
                    # "<http://router.asus.com>"
                elif v0 == '[':
                    hreftitle = re.search(r'\((.+?)(?<!\\)\)', v).group(1)
                    v = hreftitle.split()[0]
                    v = v.replace('\\', '')
                else:
                    """
                    'Transmit Power: |  +30 dBm'
                    'Receiver Sensitivity: |  -76 dBm'
                    'Street price: |  $52'
                    'Default admin password: |  (blank)'
                    """
                    if v0.isalnum() or v0 in "+-$" or (v0=='(' and v[-1]==')') :
                        pass
                    else:
                        ipdb.set_trace()
            else:
                pass
            pr[n] = v
            if n== 'Default admin username':
                default_user_name = convertUserPassword(v)
            elif n== 'Default admin password':
                default_password = convertUserPassword(v)
            elif n=='WiFi standards supported':
                wifi_proto = abgnac_format(v)
            elif n== 'Availability':
                availability = v
            elif n == 'Product page':
                product_page = v
            elif n=='Flash Memory':
                assert re.match(r'[\d]*\.?\d+\s*(Mb|Kb)',v,re.I)
                hw_fla1_amount = v
            elif n== 'RAM':
                assert re.match(r'[\d]*\.?\d+\s*(Mb|Kb)',v,re.I)
                hw_ram1_amount = v
            else:
                pr[n]=v

        props_hstore = dict2hstore(pr)
        img= d('a.piframe img')[0]
        image_url=urlChangePath(d.base_url,img.attrib['src'])
        trailStr = str(prevTrail)

        sql("INSERT OR REPLACE INTO TFiles(brand,model,prod_name,category"
            ", default_user_name, default_password, wifi_proto, availability"
            ", description, product_page, hw_fla1_amount, hw_ram1_amount"
            ", image_url, props_hstore, tree_trail) VALUES "
            "(:brand,:model,:prodName,:category"
            ",:default_user_name,:default_password,:wifi_proto,:availability"
            ",:description,:product_page,:hw_fla1_amount,:hw_ram1_amount"
            ",:image_url,:props_hstore,:trailStr)", locals())
        uprint('UPSERT "%(brand)s", "%(model)s", \'%(props_hstore)s\''
                ', %(trailStr)s '% locals())
    except Exception as ex:
        ipdb.set_trace()
        traceback.print_exc()
def main():
    brand='TP-Link'
    source='www.tp-link.com/en'
    startInRowIdx=int(sys.argv[1]) if len(sys.argv)>1 else 0
    
    with sqlite3.connect('tplink.sqlite3') as inconn:
        incsr = inconn.cursor()
        global ouconn
        ouconn= psycopg2.connect(GridIotConnStr)
        inRows = incsr.execute(
            "SELECT model, revision, fw_desc, file_size,fw_ver,fw_date,"
            "file_sha1, file_url, file_name, page_url FROM TFiles "
            " ORDER BY id LIMIT -1 OFFSET %d"%startInRowIdx)
        for inRowIdx, inRow in enumerate(inRows,startInRowIdx):
            model,rev,fwDesc,fileSize,fwVer,fwDate,fileSha1,\
                    fileUrl,fileName,pageUrl= inRow
            uprint('inRowIdx=%s, model="%s","%s", "%s","%s"'%(
                inRowIdx, model,rev,fileName,fwVer))

            # UPSERT new Device
            devId=ousql(
                "UPDATE TDevice SET source=%(source)s "
                "WHERE brand=%(brand)s AND model=%(model)s AND"
                " revision=%(rev)s RETURNING id" ,locals())
            if devId:
                devId=devId[0][0]
            else:
                devId=ousql(
                    "INSERT INTO TDevice (brand,model,revision,source"
                    ") VALUES (%(brand)s,%(model)s,%(rev)s, "
                    "%(source)s) RETURNING id",
                    locals())
                devId=devId[0][0]
            uprint("UPSERT brand='%(brand)s', model=%(model)s"
                ",source=%(source)s RETURNING devId=%(devId)s"%locals())

            # UPSERT new Firmware
            # if fwVer is None:
            #    fwVer=''
            fwId=ousql(
                "UPDATE TFirmware SET file_sha1=%(fileSha1)s,"
                "include_prev=false,file_size=%(fileSize)s,"
                "release_date=%(fwDate)s, file_url=%(fileUrl)s,"
                "desc_url=%(pageUrl)s, description=%(fwDesc)s,"
                "file_path=%(fileName)s WHERE"
                "  device_id=%(devId)s AND version=%(fwVer)s AND"
                "  exclude_self=false RETURNING id",locals())
            if fwId:
                fwId=fwId[0][0]
            else:
                fwId=ousql(
                    "INSERT INTO TFirmware("
                    "device_id, version, exclude_self, "
                    "  file_sha1, file_size, release_date, "
                    "  file_url, description, desc_url, file_path) "
                    "VALUES ( %(devId)s, %(fwVer)s, false, "
                    " %(fileSha1)s, %(fileSize)s, %(fwDate)s,"
                    " %(fileUrl)s, %(fwDesc)s, %(pageUrl)s, %(fileName)s"
                    ") RETURNING id", locals())
                fwId=fwId[0][0]
            uprint("UPSERT TFirmware devId='%(devId)d', fwVer='%(fwVer)s',"
                " sha1='%(fileSha1)s', fwId=%(fwId)d"%locals())